
#include <assert.h>
#include <stdlib.h>
#include "nexus.h"
#include "globus_gram_myjob.h"
#include "globus_common.h"

#include "duroc-common.h"

#include "globus_duroc_runtime.h"

#include "utils.h"

globus_module_descriptor_t globus_duroc_runtime_module = 
{
  "globus_duroc_runtime",
  globus_duroc_runtime_activate,
  globus_duroc_runtime_deactivate,
  globus_duroc_runtime_atexit,
  NULL /* get_pointer_func */
};

static void 
s_die_msg_handler (nexus_endpoint_t * endpointp,
		   nexus_buffer_t   * bufferp,
		   nexus_bool_t       is_non_threaded_handler);

static void 
s_run_msg_handler (nexus_endpoint_t * endpointp,
		   nexus_buffer_t   * bufferp,
		   nexus_bool_t       is_non_threaded_handler);

static
nexus_handler_t s_command_handlert[] =
{
  {
    NEXUS_HANDLER_TYPE_NON_THREADED,
    (nexus_handler_func_t) s_run_msg_handler
  },
  {
    NEXUS_HANDLER_TYPE_NON_THREADED,
    (nexus_handler_func_t) s_die_msg_handler
  }
};
/* KEEP THIS TABLE CONSISTENT WITH control.c and job_monitor.c :
 * RUN_MSG_ID = 0 
 * DIE_MSG_ID = 1 */

#define COMMAND_HANDLERT_SIZE 2


/* KEEP THIS DEFINITION CONSISTENT WITH control.c */

#define CHECKIN_MSG_ID 0

static globus_hashtable_t s_tagged_gram_myjob_hasht;

static int s_intra_subjob_initialized = 0;

static void 
s_intra_subjob_init ()
{
  int err;

  if ( s_intra_subjob_initialized ) return;

  s_intra_subjob_initialized = 1;

  err = globus_hashtable_init (&s_tagged_gram_myjob_hasht,
			       16 /* zero info default */,
			       globus_hashtable_string_hash,
			       globus_hashtable_string_keyeq);
  assert (!err);

}

static void
s_intra_subjob_done ()
{
  int err;

  err = globus_hashtable_destroy (&s_tagged_gram_myjob_hasht);
  assert (!err);
}

static globus_hashtable_t s_inter_subjob_tagged_duct_hasht;

static int
s_inter_subjob_duct_init ();

static int s_inter_subjob_initialized = 0;
static int s_barrier_pending = 1;

static int
s_inter_subjob_init ()
{
  int err;
  int gram_rank;

  err = gram_myjob_rank (&gram_rank); assert (!err);

  if ( s_inter_subjob_initialized ) return 0;

  if ( s_barrier_pending ) return 1;

  if ( gram_rank == 0 ) {
    err = globus_hashtable_init (&s_inter_subjob_tagged_duct_hasht,
				 16 /* zero info default */,
				 globus_hashtable_string_hash,
				 globus_hashtable_string_keyeq);
    assert (!err);

    s_inter_subjob_initialized = 1;

    return s_inter_subjob_duct_init ();
  }
  else {
    return 0;
  }
}

static void 
s_inter_subjob_done ()
{
  int err;
  int gram_rank;

  err = gram_myjob_rank (&gram_rank); assert (!err);

  if ( gram_rank == 0 ) {
    globus_hashtable_destroy (&s_inter_subjob_tagged_duct_hasht);
  }
}


static int s_duroc_runtime_module_enabled = 0;

static int
s_duroc_runtime_activate (void)
{
  if ( globus_module_activate (GLOBUS_COMMON_MODULE) != GLOBUS_SUCCESS ) 
    goto activate_common_module_error;

  if ( globus_module_activate (GLOBUS_THREAD_MODULE) != GLOBUS_SUCCESS )
    goto activate_thread_module_error;

  if ( globus_module_activate (GLOBUS_NEXUS_MODULE) != GLOBUS_SUCCESS )
    goto activate_nexus_module_error;

  if ( globus_module_activate (GLOBUS_DUCT_RUNTIME_MODULE) != GLOBUS_SUCCESS )
    goto activate_duct_runtime_module_error;

  if ( globus_module_activate (GLOBUS_GRAM_MYJOB_MODULE) != GLOBUS_SUCCESS )
    goto activate_gram_myjob_module_error;

  { 
    char * job_serialno_string;
    char * subjob_serialno_string;

    job_serialno_string = getenv ("GLOBUS_DUROC_JOB_SERIALNO");
    subjob_serialno_string = getenv ("GLOBUS_DUROC_SUBJOB_SERIALNO");
    if ( (job_serialno_string == NULL)
	 || (subjob_serialno_string == NULL) ) {
      /* this invocation has no barrier contact info, so barrier 
       * is a no-op w/o sequencing restrictions */
      s_barrier_pending = 0;
    }
  }

  s_intra_subjob_init ();

  /* this call automatically short-circuits if 
   * the barrier is pending, and gets recalled after the barrier
   * when it is safe */
  s_inter_subjob_init ();

  return GLOBUS_SUCCESS;

activate_gram_myjob_module_error:
  globus_module_deactivate (GLOBUS_DUCT_RUNTIME_MODULE);

activate_duct_runtime_module_error:
  globus_module_deactivate (GLOBUS_NEXUS_MODULE);

activate_nexus_module_error:
  globus_module_deactivate (GLOBUS_THREAD_MODULE);
  
activate_thread_module_error:
  globus_module_deactivate (GLOBUS_COMMON_MODULE);

activate_common_module_error:
  return GLOBUS_FAILURE;
}

int 
globus_duroc_runtime_activate (void)
{
  if ( s_duroc_runtime_module_enabled == 0 ) {
    if ( s_duroc_runtime_activate () == GLOBUS_SUCCESS ) {
      s_duroc_runtime_module_enabled = 1;
      return GLOBUS_SUCCESS;
    }
    else
      return GLOBUS_FAILURE;
  }
  else
    return GLOBUS_SUCCESS;
}

static int
s_duroc_runtime_deactivate (void)
{
  int rc;

  rc = GLOBUS_SUCCESS;

  s_inter_subjob_done ();

  s_intra_subjob_done ();

  if ( globus_module_deactivate (GLOBUS_GRAM_MYJOB_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_DUCT_RUNTIME_MODULE) != GLOBUS_SUCCESS)
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_NEXUS_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_THREAD_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  if ( globus_module_deactivate (GLOBUS_COMMON_MODULE) != GLOBUS_SUCCESS )
    rc = GLOBUS_FAILURE;

  return rc;
}

int
globus_duroc_runtime_deactivate (void)
{
  return GLOBUS_SUCCESS;
}

void 
globus_duroc_runtime_atexit ()
{
  s_duroc_runtime_deactivate ();
}

typedef struct globus_duroc_runtime_command_port_s {
  nexus_mutex_t        mutex;
  nexus_cond_t         cond;
  globus_bool_t        die;
  int                  die_reason;
  globus_bool_t        run;
  nexus_endpointattr_t epattr;
  nexus_endpoint_t     ep;
  nexus_startpoint_t   sp;
} globus_duroc_runtime_command_port_t;

static int 
s_make_startpoint (nexus_startpoint_t * spp, 
		   const char         * contact)
{
  int err;

  if ( (spp==NULL)
       || (contact==NULL) ) return -1;

  if ( (contact[0] == 'U')
       && (contact[1] == 'R')
       && (contact[2] == 'L') ) {
    /* an attachment URL */
    return -1;
  }
  else if ( (contact[0] == 'L')
	    && (contact[1] == 'S')
	    && (contact[2] == 'P') ) {
    /* a linearized startpoint */
    globus_byte_t   bbuff[GRAM_MYJOB_MAX_BUFFER_LENGTH];
    globus_byte_t * ptr;
    int           format = 0;

    /* contact has form:
     *   >L S P hd hd ... hd<
     * "LSP" prefix identifies this as linearized startpoint
     * hex digit substring should be hex_decoded to obtain:
     *   >d d ... d \0 user-sp<
     *   >d d ... d< is the user-buffer format tag in decimal digits
     *   >user-sp< is the startpoint in user-buffer format
     */

    assert ( (utils_strlen (contact+3) % 2) == 0 );

    globus_l_duroc_hex_decode_byte_array (contact+3, 
			       utils_strlen (contact+3) / 2,
			       bbuff);

    ptr = bbuff;
    err = nexus_stdio_lock(); assert (!err);
    err = sscanf ((char *)ptr, "%d", &format); assert (err==1);
    err = nexus_stdio_unlock(); assert (!err);
    while ( (*ptr)!='\0' ) ptr++; ptr++; /* d d ... d \0 user-sp */
    nexus_user_get_startpoint (&(ptr), spp, 1, format);

    return 0;
  }
  else return -1;
}

static int
s_map_myjob_error_to_duroc_error (int err)
{
  if (err == GLOBUS_GRAM_MYJOB_ERROR_NOT_INITIALIZED)
    err = GLOBUS_DUROC_ERROR_NOT_INITIALIZED;
  else if (err == GLOBUS_GRAM_MYJOB_ERROR_BAD_PARAM)
    err = GLOBUS_DUROC_ERROR_INVALID_PARAMETER;
  else if (err == GLOBUS_GRAM_MYJOB_ERROR_COMM_FAILURE)
    err = GLOBUS_DUROC_ERROR_GRAM_FAILED; 
  else if (err == GLOBUS_GRAM_MYJOB_ERROR_BAD_RANK)
    err = GLOBUS_DUROC_ERROR_INVALID_PARAMETER;
  else if (err == GLOBUS_GRAM_MYJOB_ERROR_BAD_SIZE)
    err = GLOBUS_DUROC_ERROR_INVALID_PARAMETER;
  else if (err == GLOBUS_SUCCESS)
    err = GLOBUS_SUCCESS;
  else
    err = GLOBUS_DUROC_ERROR_INTERNAL_FAILURE;

  return err;
}

int
globus_duroc_runtime_intra_subjob_rank (int * rankp)
{
  if (! s_intra_subjob_initialized)
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;

  return s_map_myjob_error_to_duroc_error (
				   gram_myjob_rank (rankp));
}

int
globus_duroc_runtime_intra_subjob_size (int * sizep)
{
  if (! s_intra_subjob_initialized)
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;

  return s_map_myjob_error_to_duroc_error (
				   gram_myjob_size (sizep));
}

int
globus_duroc_runtime_intra_subjob_send (int             dest_addr,
					const char    * tag,
					int             msg_len,
					globus_byte_t * msg)
{
  int err;
  int i,j;
  globus_byte_t gram_msg [GRAM_MYJOB_MAX_BUFFER_LENGTH];

  if (! s_intra_subjob_initialized)
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;

  assert ((msg_len 
	   + 8 /* version nibbles (hex encoded) */
	   + 1
	   + utils_strlen (tag) 
	   + 1) <= GRAM_MYJOB_MAX_BUFFER_LENGTH);

  {
    int version = GLOBUS_DUROC_RUNTIME_INTRA_SEND_PROTOCOL_VERSION;
    utils_sprintf ((char *) gram_msg, "%.8x", version);
    assert ( gram_msg[8]==((globus_byte_t) '\0') );
  }

  for (j=0, i=9; j< utils_strlen (tag); j++, i++) {
    gram_msg[i] = (globus_byte_t) tag[j];
  }
  gram_msg[i] = (globus_byte_t) '\0'; i++;

  for (j=0; j<msg_len; j++, i++) {
    gram_msg[i] = (globus_byte_t) msg[j];
  }

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "tagged_gram_myjob_send: version >>%s<<, tag >>%s<<\n\n",
	       (char *) gram_msg,
	       (char *) (gram_msg+9));
  
  err = gram_myjob_send (dest_addr, 
			 gram_msg,
			 8
			 + 1
			 + utils_strlen (tag) 
			 + 1
			 + msg_len);

  return s_map_myjob_error_to_duroc_error (err);
}

typedef struct globus_duroc_fifo_msg_s {
  int len;
  globus_byte_t *msg;
} globus_duroc_fifo_msg_t;

/* NOT THREAD-SAFE!! */
int
globus_duroc_runtime_intra_subjob_receive (const char    * tag,
					   int           * msg_lenp,
					   globus_byte_t * msgp)
{
  int err;
  globus_fifo_t *fifo;

  if (! s_intra_subjob_initialized)
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;

  fifo = ((globus_fifo_t *)
	  globus_hashtable_lookup (&s_tagged_gram_myjob_hasht,
				   (void *) tag));
  if ( fifo == NULL ) {
    char * tag_copy;

    tag_copy = utils_strdup (tag);

    fifo = (globus_fifo_t *) globus_malloc (sizeof (globus_fifo_t));
    err = globus_fifo_init (fifo);
    assert (!err);
    err = globus_hashtable_insert (&s_tagged_gram_myjob_hasht,
				   (void *) tag_copy,
				   (void *) fifo);
    assert (!err);

    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		 "tagged_gram_myjob_receive: new queue >>%s<<\n\n",
		 tag);
  }

  while ( globus_fifo_empty (fifo) ) {
    int i,j;
    int gram_msg_len;
    globus_byte_t gram_msg[GRAM_MYJOB_MAX_BUFFER_LENGTH];
    char        gram_tag[GRAM_MYJOB_MAX_BUFFER_LENGTH];
    globus_fifo_t * fifo;
    globus_duroc_fifo_msg_t *msgp;

    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		 "tagged_gram_myjob_receive: blocking on receive "
		 "(looking for >>%s<<)\n\n",
		 tag);

    err = gram_myjob_receive (gram_msg, &gram_msg_len);
    if (err) {
      err = s_map_myjob_error_to_duroc_error (err);
      return err;
    }

    {
      int version;

      nexus_stdio_lock ();
      err = sscanf ((char *) gram_msg, "%x", &version); assert (err==1);
      nexus_stdio_unlock ();
      assert ( gram_msg[8]==((globus_byte_t) '\0') );

      if ( version != GLOBUS_DUROC_RUNTIME_INTRA_SEND_PROTOCOL_VERSION ) {
	return GLOBUS_DUROC_ERROR_PROTOCOL_VERSION_MISMATCH;
      }
    }

    for (j=0, i=9; 
	  (i<gram_msg_len) && (((char) gram_msg[i])!='\0'); 
	 j++, i++) {
      gram_tag[j] = (char) gram_msg[i];
    }
    gram_tag[j] = '\0'; j++, i++;

    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	 "tagged_gram_myjob_receive: queuing msg for >>%s<<\n\n",
	 gram_tag);

    msgp = ((globus_duroc_fifo_msg_t *)
	    globus_malloc (sizeof (globus_duroc_fifo_msg_t)));
    assert (msgp!=NULL);

    if ( (gram_msg_len - i) > 0 ) {
      msgp->msg = ((globus_byte_t *) globus_malloc (sizeof (globus_byte_t)
						    * (gram_msg_len - i)));
      assert ( msgp->msg!=NULL);
    }
    else {
      msgp->msg = NULL;
    }

    for ( j=i; j<gram_msg_len; j++) {
      msgp->msg[j-i] = (globus_byte_t) gram_msg[j];
    }

    msgp->len = gram_msg_len - i;

    fifo = ((globus_fifo_t *)
	    globus_hashtable_lookup (&s_tagged_gram_myjob_hasht,
				     (void *) gram_tag));
    if ( fifo == NULL ) {
      char * tag_copy;

      tag_copy = utils_strdup (gram_tag);

      utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		   "tagged_gram_myjob_receive: new queue >>%s<<\n\n",
		   gram_tag);

      fifo = (globus_fifo_t *) globus_malloc (sizeof (globus_fifo_t));
      err = globus_fifo_init (fifo);
      assert (!err);
      err = globus_hashtable_insert (&s_tagged_gram_myjob_hasht,
				     (void *) tag_copy,
				     (void *) fifo);
      assert (!err);
    }

    err = globus_fifo_enqueue (fifo, (void *) msgp);
    assert (!err);
  }

  {
    int i;
    globus_duroc_fifo_msg_t *msg;

    msg = ((globus_duroc_fifo_msg_t *)
	   globus_fifo_dequeue (fifo));
    
    for (i=0; i<msg->len; i++) {
      msgp[i] = (msg->msg)[i];
    }
    globus_free (msg->msg);

    (*msg_lenp) = msg->len;

    globus_free (msg);
  }

  return GLOBUS_SUCCESS;
}


/*
 * create ports:
 * -- in master:
 * -- -- barrier command port
 *
 * subjob checkin:
 * -- in master:
 * -- -- send check-in to DUROC server
 * -- -- block waiting for job barrier command
 * 
 * barrier:
 * -- gram_myjob_init
 * -- in master:
 * -- -- send subjob barrier command to slaves (via gram_myjob)
 * -- in slaves:
 * -- -- block waiting for subjob barrier command (via gram_myjob)
 */
void
globus_duroc_runtime_barrier ()
{
  int    err;
  int    job_serialno;
  int    subjob_serialno;
  char * job_serialno_string;
  char * subjob_serialno_string;
  int    gram_rank;
  int    gram_size;

  globus_module_activate (GLOBUS_DUROC_RUNTIME_MODULE);
  

  err = globus_gram_myjob_rank (&gram_rank); assert (!err);
  err = globus_gram_myjob_size (&gram_size); assert (!err);

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "barrier: gram rank %d, gram size %d\n",
	       gram_rank, gram_size);

  assert ( gram_rank >= 0 ); assert ( gram_rank < gram_size );

  job_serialno_string = getenv ("GLOBUS_DUROC_JOB_SERIALNO");
  subjob_serialno_string = getenv ("GLOBUS_DUROC_SUBJOB_SERIALNO");
  if ( (job_serialno_string == NULL)
       || (subjob_serialno_string == NULL) ) {
    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		 "barrier: no duroc info, running standalone.\n");
    goto fn_exit;
  }

  job_serialno = -1;
  subjob_serialno = -1;

  nexus_stdio_lock();
  err = sscanf (job_serialno_string, "%x", &job_serialno);
  assert (err==1);
  err = sscanf (subjob_serialno_string, "%x", &subjob_serialno);
  assert (err==1);
  nexus_stdio_unlock();

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "barrier: job %d  subjob %d  entering barrier wait phase\n",
	       job_serialno, subjob_serialno);
  
  assert (job_serialno > 0);
  assert (subjob_serialno > 0);


  if ( gram_rank == 0 ) {
    /* MASTER PERFORMS CHECKIN */
    int                            i;
    nexus_buffer_t                 buffer;
    globus_duroc_runtime_command_port_t   port;
    char                         * checkin_contact;
    nexus_startpoint_t             checkin_sp;

    checkin_contact = getenv ("GLOBUS_DUROC_CHECKIN_CONTACT");
    assert (checkin_contact!=NULL);

    err = s_make_startpoint (&checkin_sp,
			     checkin_contact);
    assert (!err);

    port.die = GLOBUS_FALSE;
    port.run = GLOBUS_FALSE;

    err = nexus_mutex_init (&(port.mutex), NULL); assert (!err);
    err = nexus_cond_init (&(port.cond), NULL); assert (!err);

    err = nexus_endpointattr_init (&(port.epattr)); assert (!err);
    
    err = nexus_endpointattr_set_handler_table (&(port.epattr),
						s_command_handlert,
						COMMAND_HANDLERT_SIZE);
    assert (!err);
    
    err = nexus_endpoint_init (&(port.ep), &(port.epattr)); assert (!err);
    
    nexus_endpoint_set_user_pointer (&(port.ep),
				     (void *) &port);
  
    err = nexus_startpoint_bind (&(port.sp),
				 &(port.ep));
    assert (!err);

    
    err = nexus_buffer_init (&buffer, 0, 0); assert (!err);
    err = nxbuff_put_int (&buffer, 
			  GLOBUS_DUROC_CHECKIN_PROTOCOL_VERSION);
    assert (!err);
    err = nxbuff_put_startpoint_transfer (&buffer,
					  &(port.sp));
    err = nxbuff_put_int (&buffer, job_serialno); assert (!err);
    err = nxbuff_put_int (&buffer, subjob_serialno); assert (!err);
    assert (!err);
    

    err = nexus_send_rsr (&buffer, &checkin_sp,
			  CHECKIN_MSG_ID,
			  NEXUS_TRUE /* destroy buffer */,
			  NEXUS_TRUE /* always safe */);
    assert (!err);

    nexus_startpoint_flush (&checkin_sp);

    err = nexus_mutex_lock (&(port.mutex)); assert (!err);
    while ( (port.die == GLOBUS_FALSE)
	    && (port.run == GLOBUS_FALSE) ) {
      nexus_cond_wait (&(port.cond), &(port.mutex));
    }

    if ( port.die == GLOBUS_TRUE ) {
      int die_reason;

      die_reason = port.die_reason;

      err = nexus_mutex_unlock (&(port.mutex)); assert (!err);

      /* destroy command port */
      nexus_endpoint_destroy (&port.ep);
      nexus_endpointattr_destroy (&port.epattr);
		      
      utils_fprintf (stderr, 
		     "\nglobus_duroc_barrier: aborting job!\n"
		     "globus_duroc_barrier: reason: %s\n\n",
		     ((die_reason 
		       == GLOBUS_DUROC_ERROR_INVALID_CHECKIN)
		      ? "our checkin was invalid!"
		      : ((die_reason 
			  == GLOBUS_DUROC_ERROR_PROTOCOL_VERSION_MISMATCH)
			 ? "incompatible protocol versions!"
			 : "unknown failure!")));

      /* tell slaves to die */
      for (i=1; i<gram_size; i++) {
	globus_byte_t msg[GRAM_MYJOB_MAX_BUFFER_LENGTH];

	utils_sprintf ((char *) msg, "%.8x", die_reason);
	assert (msg[8]==((globus_byte_t)'\0'));

	globus_duroc_runtime_intra_subjob_send (i, 
						"globus_duroc_runtime run status", 
						1 + utils_strlen ((char *)msg), 
						msg);
      }

      exit (1);
    }
    else {
      err = nexus_mutex_unlock (&(port.mutex)); assert (!err);

      /* destroy command port */
      nexus_endpoint_destroy (&port.ep);
      nexus_endpointattr_destroy (&port.epattr);

      /* tell slaves to run */
      for (i=1; i<gram_size; i++) {
	globus_byte_t msg[GRAM_MYJOB_MAX_BUFFER_LENGTH];

	utils_sprintf ((char *) msg, "%.8x", 0 /* SUCCESS */);
	assert (msg[8]==((globus_byte_t)'\0'));

	globus_duroc_runtime_intra_subjob_send (i, 
						"globus_duroc_runtime run status", 
						1 + utils_strlen ((char *)msg), 
						msg);
      }
    }
  }
  else {
    /* SLAVES WAIT ON MASTER */
    int msg_len;
    int run_error;

    globus_byte_t msg[GRAM_MYJOB_MAX_BUFFER_LENGTH];

    globus_duroc_runtime_intra_subjob_receive ("globus_duroc_runtime run status", 
					       &msg_len, msg);

    assert (msg[8]==((globus_byte_t)'\0'));
    globus_nexus_stdio_lock ();
    err = sscanf ((char *) msg, "%x", &run_error);
    globus_nexus_stdio_unlock ();
    assert (err==1);

    if ( run_error ) {
      utils_fprintf (stderr, 
		     "\nglobus_duroc_barrier: aborting job!\n"
		     "globus_duroc_barrier: reason: %s\n\n",
		     ((run_error
		       == GLOBUS_DUROC_ERROR_INVALID_CHECKIN)
		      ? "our checkin was invalid!"
		      : ((run_error
			  == GLOBUS_DUROC_ERROR_PROTOCOL_VERSION_MISMATCH)
			 ? "incompatible protocol versions!"
			 : "unknown failure!")));
      
      exit (1);
    }
  }

  s_barrier_pending = 0;
  err = s_inter_subjob_init (); /* recall for "deferred" initialization */

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "barrier: job %d  subjob %d  leaving barrier.\n",
	       job_serialno, subjob_serialno);

  if (err) {
    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		 "barrier: job %d  subjob %d  inter_subjob_init failure %d\n",
		 job_serialno, subjob_serialno, err);
  }

 fn_exit:
  globus_module_deactivate (GLOBUS_DUROC_RUNTIME_MODULE);
}

static globus_duct_runtime_t s_inter_subjob_duct_runtime;
static globus_fifo_t       s_inter_subjob_duct_fifo;
static nexus_mutex_t       s_inter_subjob_duct_mutex;
static nexus_cond_t        s_inter_subjob_duct_cond;


static void
s_inter_subjob_duct_data_callback (globus_duct_runtime_t *runtimep,
				   int msg_size,
				   globus_byte_t *msg,
				   void * userdata)
{
  int err;
  globus_duroc_fifo_msg_t *fifo_msg;

  GLOBUS_IGNORE runtimep;
  GLOBUS_IGNORE userdata;

  fifo_msg = ((globus_duroc_fifo_msg_t *)
	      globus_malloc (sizeof (globus_duroc_fifo_msg_t)));
  assert (fifo_msg!=NULL);

  fifo_msg->len = msg_size;
  fifo_msg->msg = msg;
  
  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "inter subjob duct callback: %d byte mesg received\n",
	       msg_size);

  err = nexus_mutex_lock (&s_inter_subjob_duct_mutex); assert (!err);

  err = globus_fifo_enqueue (&s_inter_subjob_duct_fifo,
			     (void *) fifo_msg);

  err = nexus_cond_broadcast (&s_inter_subjob_duct_cond); assert (!err);

  err = nexus_mutex_unlock (&s_inter_subjob_duct_mutex); assert (!err);

  return;
}

/*
 * -- -- get inter-subjob DUCT contact from environment
 * -- -- join inter-subjob DUCT group
 */

static int s_inter_subjob_duct_initialized = 0;

static int
s_inter_subjob_duct_init ()
{
  int    err;
  char * checkin_contact;
  char * checkin_id_string;
  int    checkin_id;
  int    gram_rank;

  checkin_contact = getenv ("GLOBUS_DUROC_DUCT_CONTACT");

  checkin_id_string = getenv ("GLOBUS_DUROC_DUCT_ID");

  if ( (checkin_contact == NULL)
       || (checkin_id_string == NULL) ) {
    checkin_id = -1;
  }
  else {
    nexus_stdio_lock ();
    err = sscanf (checkin_id_string, "%x", &checkin_id);
    nexus_stdio_unlock ();
    assert (err==1);
  }

  err = globus_fifo_init (&s_inter_subjob_duct_fifo); assert (!err);
  err = nexus_mutex_init (&s_inter_subjob_duct_mutex, NULL); assert (!err);
  err = nexus_cond_init (&s_inter_subjob_duct_cond, NULL); assert (!err);

  err = gram_myjob_rank (&gram_rank);
  assert (!err);

  assert (gram_rank == 0);

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "\n"
	       "inter subjob duct init: contact >>%s<<  id %x  gram rank %d\n"
	       "\n",
	       (checkin_contact ? checkin_contact : "(null)"), 
	       checkin_id, gram_rank);

  if ( checkin_contact != NULL ) {
    err = globus_duct_runtime_init (&s_inter_subjob_duct_runtime,
				    checkin_contact,
				    checkin_id,
				    s_inter_subjob_duct_data_callback,
				    NULL /* data userdata */,
				    NULL /* config callback */,
				    NULL /* config userdata */);

    if (err) return err;
    else {
      s_inter_subjob_duct_initialized = 1;
      return 0;
    }
  }
  else
    return 0;
}

static int 
s_inter_subjob_duct_send (int             dst_addr,
			  int             msg_size,
			  globus_byte_t * msg)
{
  int err;
  int gram_rank;

  err = gram_myjob_rank (&gram_rank); assert (!err);
  assert ( gram_rank == 0 );

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "inter subjob duct send: sending %d byte mesg to subjob %d\n",
	       msg_size,
	       dst_addr);

  if ( s_inter_subjob_duct_initialized )
    return globus_duct_runtime_send (&s_inter_subjob_duct_runtime,
				     dst_addr,
				     msg_size,
				     msg);
  else
    return GLOBUS_DUROC_ERROR_INVALID_OPERATION;
}

static int
s_inter_subjob_duct_receive (int            * msg_sizep,
			     globus_byte_t ** msgp)
{
  int err;
  globus_duroc_fifo_msg_t *fifo_msg;
  int gram_rank;

  if ( ! s_inter_subjob_duct_initialized )
    return GLOBUS_DUROC_ERROR_INVALID_OPERATION;

  err = gram_myjob_rank (&gram_rank); assert (!err);
  assert ( gram_rank == 0 );

  err = nexus_mutex_lock (&s_inter_subjob_duct_mutex); assert (!err);

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "inter subjob duct receive: waiting for a mesg\n");

  while ( globus_fifo_empty (&s_inter_subjob_duct_fifo) ) {
    err = nexus_cond_wait (&s_inter_subjob_duct_cond,
			   &s_inter_subjob_duct_mutex);
  }
  
  fifo_msg = globus_fifo_dequeue (&s_inter_subjob_duct_fifo);
  assert (fifo_msg!=NULL);

  err = nexus_mutex_unlock (&s_inter_subjob_duct_mutex); assert (!err);

  (*msg_sizep) = fifo_msg->len;
  (*msgp) = fifo_msg->msg;

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "inter subjob duct receive: %d byte message received\n",
	       (*msg_sizep));

  globus_free (fifo_msg);

  return 0;
}

static int 
s_inter_subjob_duct_structure (int            * local_address,
			       int            * remote_count,
			       int           ** remote_addresses)
{
  int err;
  int gram_rank;

  err = gram_myjob_rank (&gram_rank); assert (!err);
  assert ( gram_rank == 0 );

  return globus_duct_runtime_structure (&s_inter_subjob_duct_runtime,
				      local_address,
				      remote_count,
				      remote_addresses);
}

#if 0
static void 
s_inter_subjob_duct_done ()
{
  int err;
  int gram_rank;  
  char * checkin_contact;

  checkin_contact = getenv ("GLOBUS_DUROC_DUCT_CONTACT");

  err = gram_myjob_rank (&gram_rank); assert (!err);
  assert ( gram_rank == 0 );

  nexus_mutex_destroy (&s_inter_subjob_duct_mutex);
  nexus_cond_destroy (&s_inter_subjob_duct_cond);
  globus_fifo_destroy (&s_inter_subjob_duct_fifo);

  /* 
  if ( checkin_contact!=NULL )
    globus_duct_runtime_destroy (&s_inter_subjob_duct_runtime);
    */
}
#endif 

int 
globus_duroc_runtime_inter_subjob_send (int             dest_addr,
					const char    * tag,
					int             msg_len,
					globus_byte_t * msg)
{
  int err;
  int i,j;
  int tag_len;
  globus_byte_t duct_msg [GRAM_MYJOB_MAX_BUFFER_LENGTH];
  int gram_rank;

  if ( ! s_inter_subjob_initialized ) 
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;

  err = gram_myjob_rank (&gram_rank); assert (!err);
  if ( gram_rank != 0 )
    return GLOBUS_DUROC_ERROR_INVALID_OPERATION;

  tag_len = utils_strlen (tag);

  assert ((msg_len 
	   + 8 /* version nibbles (hex encoded) */
	   + 1
	   + tag_len 
	   + 1) <= GRAM_MYJOB_MAX_BUFFER_LENGTH);

  {
    int version = GLOBUS_DUROC_RUNTIME_INTER_SEND_PROTOCOL_VERSION;
    utils_sprintf ((char *) duct_msg, "%.8x", version);
    assert ( duct_msg[8]==((globus_byte_t) '\0') );
  }

  for (j=0, i=9; j < tag_len; j++, i++) {
    duct_msg[i] = (globus_byte_t) tag[j];
  }
  duct_msg[i] = (globus_byte_t) '\0'; i++;

  for (j=0; j<msg_len; j++, i++) {
    duct_msg[i] = (globus_byte_t) msg[j];
  }

  utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	       "inter_subjob_send: version >>%s<<, tag >>%s<<(%d)\n\n",
	       (char *) duct_msg,
	       (char *) (duct_msg+9),
	       tag_len);
  
  err = s_inter_subjob_duct_send (dest_addr, 
				  8 + 1 + tag_len + 1 + msg_len,
				  duct_msg);

  return err;
}

int
globus_duroc_runtime_inter_subjob_receive (const char     * tag,
						int            * msg_lenp,
						globus_byte_t ** msgp)
{
  int err;
  globus_fifo_t *fifo;
  int gram_rank;

  /* the user must enable the runtime module first */
  if ( ! s_inter_subjob_initialized ) 
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;
  
  /* only the subjob master (rank 0) can send/receive */
  err = gram_myjob_rank (&gram_rank); assert (!err);
  if ( gram_rank != 0 )
    return GLOBUS_DUROC_ERROR_INVALID_OPERATION;

  /* there are no duct peers, so send/receive is nonsensical */
  if ( ! s_inter_subjob_duct_initialized )
    return GLOBUS_DUROC_ERROR_INVALID_OPERATION;

  fifo = ((globus_fifo_t *)
	  globus_hashtable_lookup (&s_inter_subjob_tagged_duct_hasht,
				   (void *) tag));
  if ( fifo == NULL ) {
    char * tag_copy;

    tag_copy = utils_strdup (tag);

    fifo = (globus_fifo_t *) globus_malloc (sizeof (globus_fifo_t));
    err = globus_fifo_init (fifo);
    assert (!err);
    err = globus_hashtable_insert (&s_inter_subjob_tagged_duct_hasht,
				   (void *) tag_copy,
				   (void *) fifo);
    assert (!err);

    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		 "inter_subjob_tagged_duct_receive: new queue >>%s<<\n\n",
		 tag);
  }

  while ( globus_fifo_empty (fifo) ) {
    int i,j;
    int duct_msg_len;
    globus_byte_t * duct_msg;
    char          duct_tag[GRAM_MYJOB_MAX_BUFFER_LENGTH];
    globus_fifo_t * fifo;
    globus_duroc_fifo_msg_t *msgp;

    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		 "inter_subjob_tagged_duct_receive: blocking on receive "
		 "(looking for >>%s<<)\n\n",
		 tag);

    err = s_inter_subjob_duct_receive (&duct_msg_len, &duct_msg);
    assert (!err);

    {
      int version;

      nexus_stdio_lock ();
      err = sscanf ((char *) duct_msg, "%x", &version); assert (err==1);
      nexus_stdio_unlock ();
      assert ( duct_msg[8]==((globus_byte_t) '\0') );

      if ( version != GLOBUS_DUROC_RUNTIME_INTER_SEND_PROTOCOL_VERSION ) {
	return GLOBUS_DUROC_ERROR_PROTOCOL_VERSION_MISMATCH;
      }
    }

    for (j=0, i=9; 
	 (i<duct_msg_len) && (((char) duct_msg[i])!='\0'); 
	 j++, i++) {
      duct_tag[j] = (char) duct_msg[i];
    }
    duct_tag[j] = '\0'; j++, i++;

    utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
	 "inter_subjob_tagged_duct_receive: queuing msg for >>%s<<\n\n",
	 duct_tag);

    msgp = ((globus_duroc_fifo_msg_t *)
	    globus_malloc (sizeof (globus_duroc_fifo_msg_t)));
    assert (msgp!=NULL);

    if ( (duct_msg_len - i) > 0 ) {
      msgp->msg = ((globus_byte_t *) globus_malloc (sizeof (globus_byte_t)
						    * (duct_msg_len - i)));
      assert (msgp->msg!=NULL);
    }
    else {
      msgp->msg = NULL;
    }

    for ( j=i; j<duct_msg_len; j++) {
      msgp->msg[j-i] = (globus_byte_t) duct_msg[j];
    }

    msgp->len = duct_msg_len - i;

    fifo = ((globus_fifo_t *)
	    globus_hashtable_lookup (&s_inter_subjob_tagged_duct_hasht,
				     (void *) duct_tag));
    if ( fifo == NULL ) {
      char * tag_copy;

      tag_copy = utils_strdup (duct_tag);

      utils_debug (GLOBUS_DUROC_DEBUG_FLAG,
		   "inter_subjob_tagged_duct_receive: new queue >>%s<<\n\n",
		   duct_tag);

      fifo = (globus_fifo_t *) globus_malloc (sizeof (globus_fifo_t));
      err = globus_fifo_init (fifo);
      assert (!err);
      err = globus_hashtable_insert (&s_inter_subjob_tagged_duct_hasht,
				     (void *) tag_copy,
				     (void *) fifo);
      assert (!err);
    }

    err = globus_fifo_enqueue (fifo, (void *) msgp);
    assert (!err);
  }

  {
    globus_duroc_fifo_msg_t *msg;

    msg = ((globus_duroc_fifo_msg_t *)
	   globus_fifo_dequeue (fifo));
    
    (*msgp) = (msg->msg);

    (*msg_lenp) = msg->len;

    globus_free (msg);
  }

  return GLOBUS_DUROC_SUCCESS;
}

int 
globus_duroc_runtime_inter_subjob_structure (int  * local_address,
						  int  * remote_count,
						  int ** remote_addresses)
{
  int err;
  int gram_rank;


  if ( ! s_inter_subjob_initialized ) 
    return GLOBUS_DUROC_ERROR_NOT_INITIALIZED;

  err = gram_myjob_rank (&gram_rank); assert (!err);
  if ( gram_rank != 0 )
    return GLOBUS_DUROC_ERROR_INVALID_OPERATION;

  return s_inter_subjob_duct_structure (local_address,
					remote_count,
					remote_addresses);
}


static void 
s_die_msg_handler (nexus_endpoint_t * endpointp,
		   nexus_buffer_t   * bufferp,
		   nexus_bool_t       is_non_threaded_handler)
{
  int err;
  int die_reason;
  globus_duroc_runtime_command_port_t * portp;

  GLOBUS_IGNORE is_non_threaded_handler;

  portp = ((globus_duroc_runtime_command_port_t *)
	   nexus_endpoint_get_user_pointer (endpointp));
  assert (portp!=NULL);

  err = nxbuff_get_int (bufferp, &die_reason); assert (!err);

  err = nexus_mutex_lock (&(portp->mutex)); assert (!err);
  portp->die = GLOBUS_TRUE;
  portp->die_reason = die_reason;

  err = nexus_cond_broadcast (&(portp->cond)); assert (!err);
  err = nexus_mutex_unlock (&(portp->mutex)); assert (!err);
}

static void 
s_run_msg_handler (nexus_endpoint_t * endpointp,
		   nexus_buffer_t   * bufferp,
		   nexus_bool_t       is_non_threaded_handler)
{
  int err;
  globus_duroc_runtime_command_port_t * portp;

  GLOBUS_IGNORE bufferp;
  GLOBUS_IGNORE is_non_threaded_handler;

  portp = ((globus_duroc_runtime_command_port_t *)
	   nexus_endpoint_get_user_pointer (endpointp));
  assert (portp!=NULL);

  err = nexus_mutex_lock (&(portp->mutex)); assert (!err);
  portp->run = GLOBUS_TRUE;

  err = nexus_cond_broadcast (&(portp->cond)); assert (!err);
  err = nexus_mutex_unlock (&(portp->mutex)); assert (!err);
}


