//
// Migrate Session Layer
//
// Alex C. Snoeren <snoeren@lcs.mit.edu>
//
// Copyright (c) 2002 Massachusetts Institute of Technology.
//
// This software is being provided by the copyright holders under the GNU
// General Public License, either version 2 or, at your discretion, any later
// version. For more information, see the `COPYING' file in the source
// distribution.
//
// $Id: migrate_handler_tcp.cc,v 1.24 2002/10/08 19:02:59 snoeren Exp $
//
// Migrate API implementation.
//

#include <hash_map>

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <ring.hh>

#include "migrate_handler_tcp.hh"

void
migrate_handler_tcp::updateConns() {

  if(migrate_daemon->is_present() && conn) {
    
    // Inform daemon of connection status
    migrate_daemon->sendto(CONNECTION_MSG, (int)getpid(),
			   (const char *)getOldConn(0, conn),
			   sizeof(migrate_connection));
  }
  
}


migrate_handler_tcp::~migrate_handler_tcp() {

  // Destroy ring buffer
  if (ring) delete ring;

  // Destroy connection
  if (conn) {
    // Inform daemon of connection close
    if (migrate_daemon->is_present()) {
      migrate_daemon->sendto(CONNECTION_MSG,(int)getpid(),
			     (const char *)getOldConn(-1, conn),
			     sizeof(migrate_connection));
    }
    delete conn;
  }
}


address
migrate_handler_tcp::getsockname()
{
  // Return the original source address
  if (conn) 
    return conn->saddr();
  else
    return downstream[0]->getsockname();
}

address
migrate_handler_tcp::getpeername()
{
  // Return the original destination, NOT the current one
  if (conn)
    return conn->daddr();
  else
    return downstream[0]->getpeername();
}

void
migrate_handler_tcp::initSession(address a, migrate_state state)
{
  // If we don't already have a session, create one
  if(!session) {
    ts_debug_1("Creating session to house connection");
    session = new MigrateSession(M_AUTOCLOSE);
  } else
    ts_debug_1("Session already exists");

  // If the session isn't yet established, attempt to contact remote side
  if(session->state() == MIGRATE_NOTCONNECTED)
    session->init(a, state);
}


int
migrate_handler_tcp::connect(address a)
{
  int ret = downstream[0]->connect(a);

  // Initialize the session to potentially house this connection
  if (ret != 0)
    ts_debug_1("- unable to connect: %s", strerror(errno));
  else {
    dest = a;
    initSession(a, MIGRATE_CONNECTING);
  }

  return ret;
}

void
migrate_handler_tcp::initRing()
{
  // Set up ring buffer
  string sbufstr = downstream[0]->getsockopt(SOL_SOCKET, SO_SNDBUF,
					     sizeof(int));
  if (sbufstr.length() == 0)
    ts_fatal("Unable to get snd buffer size");
  
  int sndbufsize = *((int *)sbufstr.data());
  int rcvbufsize = session->pbufsize();

  // XXX:If we don't yet know how big to make the buffer, go large
  if(!rcvbufsize) 
    rcvbufsize = 87380;

  // XXX: We don't yet understand Linux RCVBUFFER, be cautious for now
  rcvbufsize *= 2; 

  ts_debug_1("Creating new ring buffer of size %d+%d+%d", sndbufsize,
	     rcvbufsize, _slush);
  if(!(ring = new ring_buffer<char>(sndbufsize + rcvbufsize + _slush)))
    ts_fatal("Unable to reate ring of size %d",
	     (sndbufsize + rcvbufsize + _slush));
}

void
migrate_handler_tcp::initConn(int passive)
{
    ts_debug_1("Adding connection to session %d", session->id());

    // Get current addresses
    address csaddr = downstream[0]->getsockname();
    address cdaddr = downstream[0]->getpeername();
    assert(cdaddr.addrlen());

    // Update connection bindings
    conn = new MigrateConnection(csaddr, (passive ? cdaddr : dest),
				 csaddr, cdaddr, session, downstream[0]);

    // Notify remote end point of new connection
    session->addConn(this);
    updateConns();
}


void
migrate_handler_tcp::connected(flow_handler *from, bool success)
{
  if(success) {
    initConn(0);
    ts_debug_1("Migrated connected from %s:", conn->csaddr().c_str());
    ts_debug_1("to %s", conn->cdaddr().c_str());
  }

  // Notify upstream ONLY if the session has already been established
  if(!success || (session->state() == MIGRATE_ESTABLISHED) ||
     (session->state() == MIGRATE_NOTSUPPORTED)) {

    if (success && (session->state() != MIGRATE_NOTSUPPORTED))
      initRing();
    get_upstream().connected(this, success);

  } else {
    ts_debug_1("Holding connection pending session");
    from->may_avail(false);
    _connPending = true;
  }
}


acceptret
migrate_handler_tcp::accept()
{
  const acceptret ar = flow_handler::accept();

  // If it failed, just bail
  if(!ar)
    return acceptret();

  // Initialize the session to house this connection
  ((migrate_handler_tcp *)ar.h)->initSession(ar.addr, MIGRATE_NOTCONNECTED);
  // Set up the connection
  ((migrate_handler_tcp *)ar.h)->initConn(1);
  ((migrate_handler_tcp *)ar.h)->initRing();

  ts_debug_1("Migrate accepted connection from %s",ar.addr.c_str());

  return ar;
}

int
migrate_handler_tcp::close() {
  if(!downstream.empty() && downstream[0])
    return downstream[0]->close();
  else
    return 0;
}

int
migrate_handler_tcp::shutdown(bool r, bool w) {
  if(!downstream.empty() && downstream[0])
    return downstream[0]->shutdown(r,w);
  else
    return 0;
}

void
migrate_handler_tcp::may_avail(bool may) {
  flow_handler::may_avail(may);
  /* If we're closed, tell it */
  if(session && (session->state() == MIGRATE_NOTCONNECTED))
    upstream->avail(this, data());
}

bool
migrate_handler_tcp::avail(flow_handler *from, data d)
{
  int res = 0;
  // Nobody should call us but our downstream
  assert(from == downstream[0]);

  if(d.error()) {
    ts_debug_1("Migrate read got %s", strerror(d.error()));
    session->set_state(MIGRATE_FROZEN, true);
    migrate_handler::migrate_daemon->sendto(SESSION_MSG, (int)getpid(),
					    (const char *)session->getOldSession(),
					    sizeof(migrate_session));
    return true;
  }

  // Common case, we're synced--just pass the data up
  if (sync == MIGRATE_SYNCED) {
      rcvseq += d.length();
      ts_debug_1("Read %d (%u)", d.length(), rcvseq);

      // Ensure we're supposed to be availing
      assert(may_avail_now());

      return upstream->avail(this, d);
  }

  // Otherwise, we're expecting sync info

  unsigned int _rcvseq, sent, tosend, n;

  // Make sure the other side didn't immediately close this connection
  if(d.length() == 0) {
    ts_debug_1("Other side closed connection before SYNC");
    downstream[0]->may_avail(false);
    downstream[0]->shutdown(true, false);    
    return upstream->avail(this, data(NULL, 0, EPIPE));
  }
    
  // IP will never fragment less than 8 bytes, so this is safe
  assert(d.length() >= sizeof(unsigned int));

  _rcvseq = *((unsigned int *)d.bits());
  _rcvseq = ntohl(_rcvseq);
  ts_debug_1("We're syncing to %u %u...", sndseq, _rcvseq);
  
  // Figure out how much we have to send
  sent = _rcvseq - ackseq;
  ackseq = _rcvseq;
  ts_debug_1("Consumed %u", sent);
  ring->consumed(sent);
  tosend = sndseq - _rcvseq;
  
  // Sanity check the size of our ring buffer
  if (tosend > ring->size())
    ts_fatal("Lost more (%u) than ring size (%u)?", tosend, ring->size());
  if((n = ring->consecutive()) > tosend)
    ts_fatal("Ring contains more (%u) than we lost (%u)?", n, tosend);

  // Turn on writing--syncing may block and turn int back off
  upstream->may_write(this, true);
  
  // Replay the buffered data
  ts_debug_1("First writing %u of %u", n, tosend);
  if(res = downstream[0]->write(data(ring->head(),n)))
    ts_fatal("TESLA write failed: %s", strerror(-res));
  if (n < tosend) {
    ts_debug_1("Ring wrap-around, now writing %u", tosend - n);
    downstream[0]->write(data(ring->buff(), (tosend - n)));
  }

  // Update sync state
  ts_debug_1("Synced");
  sync = MIGRATE_SYNCED;

  // Check to see if we had data to pass up as well
  if(d.length() > sizeof(unsigned int)) {
    rcvseq += (d.length() - sizeof(unsigned int));
    ts_debug_1("Read after SYNC %d (%u)",
	       (d.length()-sizeof(unsigned int)), rcvseq);    

    // Ensure we're supposed to be availing
    assert(may_avail_now());

    return upstream->avail(this, data(d.bits() + sizeof(unsigned int),
				      (d.length() - sizeof(unsigned int))));
  }

  // Otherwise, there is nothing here, really
  return true;
}


int
migrate_handler_tcp::write(data d) {

#if 0
  // First check to make sure we're connected
  if(session->state() == MIGRATE_NOTCONNECTED) {
    ts_debug_1("We're not connected, generating EPIPE");
    return -EPIPE;
  }
#endif

  // We shouldn't be asked to write unless we're synced
  assert(sync == MIGRATE_SYNCED);
  assert(session);

  int res = downstream[0]->write(d);

  if(session->state() != MIGRATE_NOTSUPPORTED) {
    switch(-res) {
      
    case 0:
      // Check to make sure ring is appropriate size
      if(d.length() > _slush) {
	ts_debug_1("Growing ring slush from %u to %u", _slush, d.length());
	ring->resize((ring->size() - _slush) + d.length());
	_slush = d.length();
      }
      // Buffer all outgoing bytes
      ring->supply(d.bits(), d.length());
      sndseq += d.length();
      ts_debug_3("Wrote %d (%u)", d.length(), sndseq);
      break;
      
    case EPIPE:
    case ENETDOWN:
    case EHOSTDOWN:
    case EHOSTUNREACH:
    case ETIMEDOUT:

      // XXX: Suspend the session
      ts_debug_1("Migrate preventing disconnect: %s", strerror(-res));
      session->set_state(MIGRATE_FROZEN, true);
      migrate_handler::migrate_daemon->sendto(SESSION_MSG, (int)getpid(),
					      (const char *)session->getOldSession(),
					      sizeof(migrate_session));
      break;
      
    default:
      // Pass error code back up
      ts_debug_2("Migrate got an error on write: %s", strerror(res));
    }
  }
  return 0;
}

bool
migrate_handler_tcp::save_state(oserial& out) const
{
  ts_debug_1("In migrate_handler_tcp::save_state");

  out << _magic;       // Our internal identifier
  
  if(session) {
    out << 1;
    // We only know how to save suspended sessions
    assert(session->state() == MIGRATE_FROZEN);
    out << session;      // Our parent session
    out << conn;         // Current connection
    out << sndseq;       // Current sequence number 
    out << rcvseq;       // Current sequence number
    out << ackseq;       // Last successfully migrated byte 
    out << ring;         // Ring buffer
  } else {
    out << 0;
  }
  return out;
}  

DEFINE_HANDLER(migrate, migrate_handler_tcp, AF_INET, SOCK_STREAM);
HANDLER_USAGE(migrate_handler_tcp,

"Migrate TCP handler v"
VERSION
"\n"
	      );

void
migrate_handler_tcp::stateChange(migrate_state oldstate)
{
  // First, do TCP-specific stuff
  switch (session->state()) {
  case MIGRATE_ESTABLISHED:
  case MIGRATE_NOTSUPPORTED:
    if(_connPending) {
      _connPending = false;
      if (session->state() != MIGRATE_NOTSUPPORTED)
	initRing();
      downstream[0]->may_avail(true);
      get_upstream().connected(this, true);
    }
    break;
  default:
    /* Nothing interesting to do */
    break;
  }

  // Then do generic stuff
  migrate_handler::stateChange(oldstate);
}

void
migrate_handler_tcp::changeSession(MigrateSession *newSession,
				   const migrate_connection *)
{
  session = newSession;
  if(conn)
    conn->change_session(newSession);
}

void
migrate_handler_tcp::migrateConn(int fd, MigrateSession *session)
{

  if (sync == MIGRATE_SYNCED) {

    // XXX: If the current connection is synced, force a read

    // We never want to hear from this connection again
    if(!downstream.empty()) {
      if(downstream[0]) {
	downstream[0]->may_avail(false);
	downstream[0]->shutdown(true, false);
      }
      delete downstream[0];
      downstream.clear();
    }

  } else
    ts_fatal("Migrating a not synced connection: %d", sync);

  // Suspend writing until we are able to synchronize
  upstream->may_write(this, false);
  sync = MIGRATE_NOTSYNCED;

  // The socket I recieve will be identical to me
  downstream.push_back(flow_handler::plumb(flow_handler::get_domain(),
					  flow_handler::get_type(), fd));
  // Make sure it has the same avail I do
  downstream[0]->may_avail(may_avail_now());

  // Update the address bindings
  conn->update_current(downstream[0]->getsockname(),
		       downstream[0]->getpeername());

  // Attempt to sync up the session
  connSync();
}


void
migrate_handler_tcp::connSync()
{
  unsigned int _rcvseq;
  int res;

  assert((session->state() == MIGRATE_PMIGRATING) ||
	 (session->state() == MIGRATE_LMIGRATING) ||
	 (session->state() == MIGRATE_ESTABLISHED) ||
	 (session->state() == MIGRATE_LOST));

  // The first thing we do is exchange sequence numbers
  if (sync == MIGRATE_NOTSYNCED) {
    ts_debug_1("Telling other side we received %u", rcvseq);
    _rcvseq = htonl(rcvseq);
    if((res = downstream[0]->write(data((const char *)&_rcvseq,
					sizeof(_rcvseq)))))
      ts_fatal("Mirate got write error on sequence number: %s", strerror(res));
    sync = MIGRATE_HALFSYNCED;
  }

}

void
migrate_handler_tcp::freezeConn(void)
{
  struct linger li; 
  li.l_onoff = 1; 
  li.l_linger = 0; 
  int ret = 0;

  ts_debug_1("Freezing connection");

  // Tell upstream to chill out
  migrate_handler::freezeConn();

  // Close socket with authority
  if(ret = downstream[0]->setsockopt(SOL_SOCKET, SO_LINGER,
				    string((const char *)&li, sizeof(li))))
    ts_error("Unable to set linger on socket: %s", strerror(ret));
  if(ret = downstream[0]->close())
    ts_error("Unable to close socket: %s", strerror(ret));

  // We never want to hear from this socket again
  downstream[0]->may_avail(false);
  downstream[0]->shutdown(true, false);
  delete downstream[0];
  downstream[0] = NULL;
  downstream.clear();
}
