/*
 * Nexus
 *
 * pr_totem.c		- TOTEM Reliable Ordered Multicast 
 *                        Message Delivery protocol module
 * 
 * ISSUES:
 *
 *   Need a way to pass the header info up from the protocol to the
 *    application handler function
 *
 *   Need to be able to pass up membership messages and other special 
 *       info messages like flow control and QoS
 *
 * Organization
 *
 *   A communication channel (totem_comm_t) exists for each connection we have
 *   with a totemd.  The communcation channel is bidirectional and therefore
 *   associated the totem_comm_t contains both the incoming and outgoing state.
 *
 *   A single communication channel may have multiple process groups
 *   (totem_process_group_t) associated with it.  Each totem_process_group_t
 *   structure which contains both the incoming and outgoing message state for
 *   the process group.  Messages for a particular process group are identified
 *   by process group ID (long) which can be used to obtain the associated
 *   totem_process_group_t out of the process group hash table.
 *
 *   Multiple startpoints and endpoints may exist for a single process group.
 *   When a message is received for a process group, the message is delivered
 *   to each endpoint.  If a process group has no endpoints bound to it, the
 *   message is simply read in and ignored (this happens when the process group
 *   has one or more startpoints bound to it, so the process must remain a
 *   member of the group for the purposes of sending eventhough it no longer
 *   cares about the data being received)
 */

static char *rcsid = "$Header: /home/globdev/CVS/globus-current/Globus/Communication/nexus/libraries/nexus/pr_totem.c,v 1.20 1999/06/01 17:06:11 bresnaha Exp $";

#include "internal.h"
#include "globus_common.h"

#include <sys/types.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

#if  defined(HAVE_TOTEM_PROTO)
/*
 * totem_debug_printf()
 */
#if 1
#    define totem_debug_printf(lvl, printf_params)	\
    {                                                   \
      if( lvl < 2 || lvl > 4 )                          \
        nexus_printf ## printf_params;			\
    }
#else
#   define totem_debug_printf(lbl, printf_params) \
        nexus_debug_printf(lbl, printf_params)
#endif


/*
 * GLOBUS_L_NEXUS_TOTEM_PROTOCOL_VERSION
 *
 * The version of this protocol module's wire protocol.  If you change
 * this module wire protocol, bump this version number.
 */
#define GLOBUS_L_NEXUS_TOTEM_PROTOCOL_VERSION (0+GLOBUS_I_NEXUS_BUFFER_VERSION)

/*
 * GLOBUS_L_NEXUS_TOTEM_MI_PROTO_VERSION
 *
 * The version of this protocol module's mi_proto.  If you change
 * the contents of this module's mi_proto, bump this version number.
 */
#define GLOBUS_L_NEXUS_TOTEM_MI_PROTO_VERSION 0

/*
 * NEXUS_TOTEM_DIRECT_CUSTOM_MAX_SIZE
 *
 * Direct custom size is to determine at what message size nexus should switch
 * to fragmented buffers instead of inlining the direct components into the
 * base message.  (This is not implemented yet, use MAX_U_LONG to turn off)
 */
#if !defined(NEXUS_TOTEM_DIRECT_CUSTOM_MIN_SIZE)
#    define NEXUS_TOTEM_DIRECT_CUSTOM_MIN_SIZE NEXUS_DC_MAX_U_LONG
#endif

/*
 * Only one thread is allowed to be in the totem code (and thus
 * mucking with data structures) at a time.
 */

#ifdef BUILD_LITE
#   define totem_enter()
#   define totem_exit()
#else  /* BUILD_LITE */
    static nexus_mutex_t		totem_mutex;
#   define totem_enter() nexus_mutex_lock(&totem_mutex);
#   define totem_exit()	nexus_mutex_unlock(&totem_mutex);
#endif /* BUILD_LITE */

static nexus_bool_t		totem_done;
static char			totem_daemon_host[MAXHOSTNAMELEN];
static unsigned short		totem_daemon_port;
static globus_list_t *		totem_comm_list;
static globus_hashtable_t	totem_comm_hash_table;
static globus_hashtable_t	totem_process_group_hash_table;

#define TOTEM_COMM_HASH_TABLE_SIZE 13
#define TOTEM_PROCESS_GROUP_HASH_TABLE_SIZE 13

#define totem_fatal totem_exit(); globus_fatal

/*
 * Other useful defines
 */

/* define the default incoming storage size */
#define TOTEM_INCOMING_DEFAULT_SIZE 4096

/* port number totem daemon listens on by default */
#define TOTEM_DEFAULT_PORT 4589 

/*
 * Some useful queue macros
 */
#define Enqueue(Qhead, Qtail, Item) \
{ \
    if (Qhead) \
    { \
	(Qtail)->next = (Item); \
	(Qtail) = (Item); \
    } \
    else \
    { \
	(Qhead) = (Qtail) = (Item); \
    } \
}

#define Dequeue(Qhead, Qtail, Item) \
{ \
    (Item) = (Qhead); \
    (Qhead) = (Qhead)->next; \
}

#define QueueNotEmpty(Qhead)	(Qhead)


/*
 * Define some totem specific variables and constants
 */

#define	TOTEM_AGREED	0x0030
#define	TOTEM_SAFE	0x0040

#define	TOTEM_JOIN_SET	1002
#define	TOTEM_JOIN_ANS	1102
#define	TOTEM_QUERY_SET	1003
#define	TOTEM_QUERY_ANS	1103
#define	TOTEM_LEAVE_SET	1004
#define	TOTEM_LEAVE_ANS	1104
#define TOTEM_REQ_CLOSE  	1005
#define TOTEM_DISCONNECT_OK	1105
#define	TOTEM_CLOSE_MBOX	1006
#define	TOTEM_SEND_ANS	        1107
#define TOTEM_MEMBERSHIP	1108


/*
 * totem_pg_id_t
 *
 * Process group identifier for Totem.  This object consists of a processor id
 * (currently inet address) and the process id returned from Totemd.
 */
typedef struct totem_pg_id_s totem_pg_id_t;
struct totem_pg_id_s
{
    unsigned long			proc;
    unsigned long			pid;
};

/*
 * totem_pg_header_t
 *
 * This struct defines the header passed to the application from the
 * Process_Receive routine
 */
#define Max_PG       10

typedef struct totem_pg_header_s totem_pg_header_t;
struct totem_pg_header_s
{
    /* header length only */
    int					pg_header_len;

    /* length: message payload (hm_pad) */
    int					pg_payld_len;

    /* type of message: agreed, safe, join, leave, etc */
    int					pg_type;

    /* number of process groups being sent to */
    int					pg_num_sets;

    /* processor and process that sent message */
    totem_pg_id_t			pg_sender;

    /* process group id's of receiving groups */
    totem_pg_id_t			pg_pgid[Max_PG];

    /* tag for filtering on WAP */
    unsigned long			pg_tag;
};

#define TOTEM_PG_TAG_SIZE sizeof(unsigned long)
#define TOTEM_pg_snd_proc pg_sender.proc
#define TOTEM_pg_snd_pid pg_sender.pid

/*
 * totem_inf_header_t
 *
 * This structure defines the header on messages passed
 * between to and from the totem daemon.  Note, it is slightly
 * different from the header passed to the application.
 */
typedef struct totem_inf_header_s totem_inf_header_t;
struct totem_inf_header_s
{
    /* entire length: message and header */
    long				inf_len;

    /* header length only */
    int					inf_header_len;

    /* agreed, safe, join, leave */
    int					inf_type;

    /* number of process groups being sent to */
    int	inf_num_sets;

    /* process group id's of receiving groups */
    totem_pg_id_t			inf_pgid[Max_PG];
};


/* INF header size */
#define TOTEM_INF_HDR_SIZE sizeof(totem_inf_header_t)

/* Header size = inf header + 4 bytes for message length */
#define TOTEM_HDR_SIZE         TOTEM_INF_HDR_SIZE + sizeof(long)


/*
 * Some forward references...
 */
typedef struct totem_comm_s totem_comm_t;
typedef struct totem_comm_outgoing_s totem_comm_outgoing_t;
typedef struct totem_comm_incoming_s totem_comm_incoming_t;
typedef struct totem_comm_hashkey_s totem_comm_hashkey_t;
typedef struct totem_process_group_hashkey_s totem_process_group_hashkey_t;
typedef struct totem_process_group_s totem_process_group_t;

/*
 * totem_comm_incoming_t
 *
 * One of these structures is associated with each totemd communcation channel.
 * It stores partial incoming messages until they are complete and can be
 * queued.  It also contains a queue of messages waiting to be dispatched.
 */
struct totem_comm_incoming_s
{
    /* current message state including the format of message header and data
       (see nexus_dc), the number of bytes read that have been processed, and
       the number of bytes read that have not been processed */
    enum
    {
	INCOMING_STATE_MSG_SIZE,
	INCOMING_STATE_BODY
    }					state;
    int					format;
    unsigned long			msg_size;

    /* current storage buffer information including a pointer to the next
       unused byte (current) in the buffer */
    unsigned long			storage_size;
    nexus_byte_t *			storage;
    nexus_byte_t *			current;

    /* message dispatch queue information */
    nexus_bool_t			dispatch_in_progress;
    struct globus_nexus_buffer_s *		dispatch_q_head;
    struct globus_nexus_buffer_s *		dispatch_q_tail;
};


/*
 * totem_comm_outgoing_t
 *
 * This structure will be used to hold the outgoing message queue for all
 * process groups associated with a particular totemd communication channel.
 */
struct totem_comm_outgoing_s
{
    /* send queue */
    nexus_bool_t		write_in_progress;
    struct globus_nexus_buffer_s *	write_q_head;
    struct globus_nexus_buffer_s *	write_q_tail;
};


/*
 * totem_comm_hashkey_t
 *
 * Simple structure to keep the host and port in single object for hashing
 * purposes
 */
struct totem_comm_hashkey_s
{
    char *				host;
    unsigned short			port;
};


/*
 * totem_comm_t
 *
 * This structure contains the state associated with the communcation channel
 * between Nexus and totemd.
 */
struct totem_comm_s
{
    /* communication channel state */
    enum
    {
	COMM_STATE_OPEN,
	COMM_STATE_CLOSED
    }					state;

    /* file descriptor connected to totemd */
    int					fd;

    /* this process's unique identifier */
    totem_pg_id_t			pgid;

    /* incoming and outgoing structures associated with this communication
       channel */
    totem_comm_incoming_t		incoming;
    totem_comm_outgoing_t		outgoing;

    /* Data structures to track process groups associated with this
       communication channel */
    globus_list_t *			pg_list;

    /* connection information used to eliminate redundant channels */
    char *				host;
    unsigned short			port;
    totem_comm_hashkey_t		hashkey;
};


/*
 * totem_process_group_hashkey_t
 *
 * Simple structure to keep the host, port, and process group ID in single
 * object for hashing purposes
 */
struct totem_process_group_hashkey_s
{
    /* XXX: it might make more sense if this were an inet addr */
    char *				host;
    unsigned short			port;
    long				pid;
};


/*
 * totem_process_group_t
 *
 * totem_process_group_t is an overload of nexus_proto_t.  Every Nexus
 * startpoint has a pointer to a nexus_proto_t and therefore each startpoint
 * using the totem protocol has a pointer to a totem_process_group_t.
 *
 * In Totem we have one totem_pg_outgoing_t per process group.  A process group
 * may have multiple startpoints and endpoints associated with it. So, in
 * addition to required nexus_proto_t information, totem_process_group_t
 * contains totem specific information needed to manage these startpoints and
 * endpoints.
 *
 * In particular, reference counting is needed to keep track of the number of
 * startpoints associated with a process group.  Additionally, a list
 * associated endpoints must be maintained so we know where to deliver incoming
 * messages.
 */
struct totem_process_group_s
{
    /*
     * required nexus_proto_t fields
     */
    nexus_proto_type_t			type;
    nexus_proto_funcs_t *		funcs;
    int					version;
    unsigned long			direct_custom_min_size;
    unsigned long			direct_custom_max_size;
    unsigned long			direct_pointer_min_size;
    unsigned long			direct_pointer_max_size;
    nexus_bool_t			can_use_iovec;
    unsigned long			reserved_header_size;

    /*
     * additional Totem specific fields
     */

    enum
    {
	PROCESS_GROUP_STATE_JOINING,
	PROCESS_GROUP_STATE_ACTIVE,
	PROCESS_GROUP_STATE_LEAVING,
	PROCESS_GROUP_STATE_INACTIVE,
	PROCESS_GROUP_STATE_FAULT
    }					state;
    int					fault_code;

    /* this process group's unique identifier */
    long				pid;

    /* BRIAN?? I have no idea what these are but Brian says I need them */
    nexus_byte_t *                      proto_array;
    int                                 proto_array_size;

    /* startpoint reference count */
    int					sp_cnt;

    /* endpoint list */
    globus_list_t *			ep_list;

    /* totemd communication channel used by this process group */
    totem_comm_t *			comm;

    /* hash key used to eliminate redundant process group structures */
    totem_process_group_hashkey_t	hashkey;

    /* current membership of the process group */
    int                                 numb_members;
    totem_pg_id_t                       membership[Max_PG];

};


/*
 * Various forward declarations of procedures
 */

static void
totem_init(
    nexus_bool_t *			add_to_my_mi_proto);

static void
totem_shutdown(void);

static int
totem_send_rsr(
    struct globus_nexus_buffer_s *	buffer);

static globus_bool_t
totem_send_rsr_outstanding(
    globus_nexus_proto_t *		nproto);

static void
totem_increment_reference_count(
    nexus_proto_t *			nproto);

static nexus_bool_t
totem_decrement_reference_count(
    nexus_proto_t *			nproto);

static int
totem_get_my_mi_proto(
    nexus_byte_t **			array,
    int *				size,
    void *				proto_info,
    nexus_endpoint_t *			endpoint);

static int
totem_destroy_my_mi_proto(
    nexus_endpoint_t *			endpoint,
    nexus_byte_t *			proto_array,
    int					size);



static nexus_bool_t
totem_construct_from_mi_proto(
    nexus_proto_t **			proto,
    nexus_mi_proto_t *			mi_proto,
    nexus_byte_t *			proto_array,
    int					size);

static int
totem_test_proto(
    nexus_proto_t *			proto);

static int
totem_direct_info_size(void);

static void
outgoing_register_next_write(
    totem_comm_t *			comm);

static void
outgoing_write_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				nbytes);

static void
outgoing_write_error_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				n_bytes,
    int					error);

static void
incoming_read_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				nbytes,
    char **      		        new_buf,
    size_t *				new_max_nbytes,
    size_t *				new_wait_for_nbytes);

static void
incoming_read_error_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				nbytes,
    int					error);

static int 
totem_Update_Membership(
    totem_process_group_t *            process_group,
    totem_inf_header_t *               infh,
    totem_pg_id_t *                    sender,
    nexus_byte_t *                     data );

static int
totem_Is_Member(
    totem_process_group_t *             process_group,
    totem_pg_id_t *                     pgid );

static void
totem_Print_Membership( 
    totem_process_group_t *             process_group );

static int
totem_Process_Join_Or_Leave(
    totem_process_group_t *		process_group,
    int					type);

static totem_comm_t *
totem_comm_open(
    char *				host,
    unsigned short				port);

static void
totem_comm_close(
    totem_comm_t *			comm,
    int					fault_code);

static totem_process_group_t *
totem_process_group_join_initiate(
    char *				host,
    unsigned short			port,
    long				pid);

static void
totem_process_group_join_finalize(
    totem_process_group_t *		process_group);

static void
totem_process_group_leave_initiate(
    totem_process_group_t *		process_group);

static void
totem_process_group_leave_finalize(
    totem_process_group_t *		process_group);

static int
totem_comm_hash_value( void *key, int table_size );

static int
totem_comm_hash_keyeq( void *key1, void *key2 );

static int
totem_process_group_hash_value( void *key, int table_size );

static int
totem_process_group_hash_keyeq( void *key1, void *key2 );

totem_process_group_t *
totem_process_group_find( char *host, unsigned short port, long pid );

void
totem_Infheader_htonl( totem_inf_header_t *infh );

void
totem_Infheader_ntohl( totem_inf_header_t *infh );

unsigned long
totem_Get_Hostlong( char *hostname );

#endif /* HAVE_TOTEM_PROTO */

#define GLOBUS_L_TOTEM_PROTO_COUNT            1

static nexus_proto_type_t
totem_proto_type(void);

static globus_bool_t     totem_startpoint_proto_match(
					    globus_nexus_mi_proto_t * mi_proto0,
					    int                       offset0,
					    nexus_byte_t *            subarray0,
					    int                       sub_length0,
					    globus_nexus_mi_proto_t * mi_proto1,
					    int                       offset1,
					    nexus_byte_t *            subarray1,
					    int                       sub_length1);

static int      totem_proto_count(void);

#if  defined(HAVE_TOTEM_PROTO)
static nexus_proto_funcs_t totem_proto_funcs =
{
    totem_proto_type,
    totem_init,
    totem_shutdown,
    totem_increment_reference_count,
    totem_decrement_reference_count,
    totem_get_my_mi_proto,
    totem_construct_from_mi_proto,
    totem_destroy_my_mi_proto,
    totem_test_proto,
    totem_send_rsr,
    totem_send_rsr_outstanding,
    totem_direct_info_size,
    NULL /* totem_direct_get */,
    totem_startpoint_proto_match,
    totem_proto_count,
};

#else

static nexus_proto_funcs_t totem_proto_funcs =
{
    totem_proto_type,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    GLOBUS_NULL,
    totem_startpoint_proto_match,
    totem_proto_count,
};

#endif /* HAVE_TOTOEM_PROTO */

/*
 * _nx_pr_totem_info()
 *
 * Return the nexus_proto_funcs_t function table for this protocol module.
 *
 * This procedure is used for bootstrapping the protocol module.
 * The higher level Nexus code needs to call this routine to
 * retrieve the functions it needs to use this protocol module.
 */
void *_nx_pr_totem_info(void)
{
    return((void *) (&totem_proto_funcs));
} /* _nx_pr_totem_info() */

/*
 *  added by bresnaha
 */
static globus_bool_t
totem_startpoint_proto_match(globus_nexus_mi_proto_t * mi_proto0,
			     int                       offset0,
			     nexus_byte_t *            subarray0,
			     int                       sub_length0,
			     globus_nexus_mi_proto_t * mi_proto1,
			     int                       offset1,
			     nexus_byte_t *            subarray1,
			     int                       sub_length1)
{
    return GLOBUS_TRUE;
}



static int
totem_proto_count(void)
{
    return GLOBUS_L_TOTEM_PROTO_COUNT;
}

/*
 * totem_proto_type()
 *
 * Return the nexus_proto_type_t for this protocol module.
 */
static nexus_proto_type_t totem_proto_type(void)
{
    return (NEXUS_PROTO_TYPE_TOTEM);
} /* totem_proto_type() */


#if  defined(HAVE_TOTEM_PROTO)

/*
 * totem_init()
 *
 * Initialize the TOTEM protocol.
 */
static void totem_init(nexus_bool_t * add_to_my_mi_proto)
{

    int					i;
    int					rc;
    char *				arg;

#   if !defined(BUILD_LITE)
    {
	nexus_mutex_init(&totem_mutex, (nexus_mutexattr_t *) NULL);
    }
#   endif
    
    totem_done = NEXUS_FALSE;

    /*
     * Initialize data structures to track communication channels
     */
    totem_comm_list = NULL;

    globus_hashtable_init( &totem_comm_hash_table,
		      TOTEM_COMM_HASH_TABLE_SIZE,
		      totem_comm_hash_value,
		      totem_comm_hash_keyeq);

    /*
     * Initialize data structures to track process groups
     */
    globus_hashtable_init( &totem_process_group_hash_table,
		      TOTEM_PROCESS_GROUP_HASH_TABLE_SIZE,
		      totem_process_group_hash_value,
		      totem_process_group_hash_keyeq);

    /* 
     * Now to find out the host that the totem daemon is running on
     * the host has either been supplied on the command line that
     * started nexus or it is the host we are running the application on
     */
    if ((arg = globus_nexus_option_find("totem_host")) != GLOBUS_NULL)
      {
	strcpy( totem_daemon_host, arg);
      }
    else
    {
	char *				host;

	host = nexus_rdb_lookup(_nx_my_hostname, "totem_daemon_host");
	if (host)
	{
	    strcpy( totem_daemon_host, host);
	    nexus_rdb_free(host);
	}
	else
	{
	    /* the daemon is running on the local machine */
	    globus_libc_gethostname( totem_daemon_host, MAXHOSTNAMELEN);
	}
    }

    /* 
     * Now to set the communication port.  If it is not specified on the
     * command line then set it to the default value.
     */
    if ((arg = globus_nexus_option_find("totem_daemon_port")) != GLOBUS_NULL)
    {
	totem_daemon_port = atoi(arg);
    }
    else
    {
	char *				port;

	port = nexus_rdb_lookup(_nx_my_hostname, "totem_port");
	if (port)
	{
	    totem_daemon_port = atoi(port);
	    nexus_rdb_free(port);
	}
	else
	{
	    /* use the default totem port */
	    totem_daemon_port = TOTEM_DEFAULT_PORT;
	}
    }

    /*
     * Do not put totem in default mi_proto list, and get_my_mi_proto() should
     * not be called during configuration
     */
    *add_to_my_mi_proto = NEXUS_FALSE;
}
/* totem_init() */


/*
 * totem_shutdown()
 *
 * This routine is called during normal shutdown of a process.
 *
 */
static void totem_shutdown(void)
{
    int					i;
    int					fd;
    totem_comm_t *			comm;
    int					rc;
    totem_process_group_t *             process_group;

    totem_enter();
    {
	globus_list_t *			comm_entry;
	totem_done = NEXUS_TRUE;

	/*
	 * Leave each of the process groups
	 */
	comm_entry = totem_comm_list;
	while(comm_entry)
	{
	    totem_comm_t *		comm;
	    globus_list_t *		pg_entry;

	    comm = globus_list_first(comm_entry);

	    pg_entry = comm->pg_list;
	    while(pg_entry)
	    {
		process_group = globus_list_first(pg_entry);

		totem_process_group_leave_initiate(process_group);

		pg_entry = globus_list_rest(pg_entry);
	    }

	    comm_entry = globus_list_rest(comm_entry);
	}
    }
    totem_exit();

    /*
     * Close communication channel to totemd once all of the process groups
     * have been left
     */
    while(!globus_list_empty(totem_comm_list))
    {
	nexus_fd_handle_events(GLOBUS_NEXUS_FD_POLL_NONBLOCKING_ALL, NULL);
	nexus_thread_yield();
    }

    /*
     * Release resources used to track communication channels
     */
    globus_list_free(totem_comm_list);
    globus_hashtable_destroy( &totem_comm_hash_table);

    /*
     * Release resources used to track process groups
     */
    globus_hashtable_destroy( &totem_process_group_hash_table);

}
/* totem_shutdown() */


/*
 * totem_increment_reference_count()
 *
 * Increase the reference count on the associated process_group.  This routine
 * is called when nexus_startpoint_copy() is called to create a duplicate
 * startpoint within the current context.
 */
static void
totem_increment_reference_count(
    nexus_proto_t *			nproto)
{
    totem_process_group_t *		process_group;

    process_group = (totem_process_group_t *) nproto;

    totem_enter();
    {
	process_group->sp_cnt++;
    }
    totem_exit();

    nexus_printf( "totem_increment_reference_count(): called\n");
}
/* totem_increment_reference_count() */


/*
 * totem_decrement_reference_count()
 *
 * Decrement the reference count for the outgoing_t associated
 * with this proto_t.  If the reference count goes to 0 then 
 * we release resource used by the associated totem_process_group_t.
 *
 * This is called by nexus_startpoint_destroy()
 *
 * Return NEXUS_TRUE if this function frees the proto.
 */
static nexus_bool_t
totem_decrement_reference_count(
    nexus_proto_t *			nproto)
{
    totem_process_group_t *		process_group;
    nexus_bool_t proto_freed = NEXUS_FALSE;
    
    process_group = (totem_process_group_t *) nproto;

    nexus_printf( "totem_decrement_reference_count(): called\n");

    totem_enter();
    {
	process_group->sp_cnt--;
    
	NexusAssert2(
	    (process_group->sp_cnt >= 0),
	    ("totem_decrement_reference_count(): Internal error: "
	     "Reference count < 0\n"));
	       
	if (process_group->sp_cnt == 0)
	{
	    totem_process_group_leave_initiate(process_group);

	    proto_freed = NEXUS_TRUE;
	}
    }
    totem_exit();

    return(proto_freed);
} /* totem_decrement_reference_count() */


/*
 * totem_get_my_mi_proto()
 *
 * This is called by nexus_endpoint_init()
 *
 * Return in 'array' and 'size' a byte array containing
 * enough information to enable another process to connect
 * to this one.  This byte array will be assembled with those
 * of the other protocol modules and placed in a nexus_mi_proto_t.
 */
static int
totem_get_my_mi_proto(
    nexus_byte_t **			array,
    int *				size,
    void *				proto_info,
    nexus_endpoint_t *			endpoint)
{
    int					i;
    nexus_proto_info_totem_t *		totem_info;
    long				pid;
    totem_process_group_t *		process_group;

    totem_info = (nexus_proto_info_totem_t *) proto_info;
    pid = totem_info->process_group;

    totem_enter();
    {
	/*
	 * Find/Create the process group data structures
	 */
	process_group = totem_process_group_join_initiate(
	    totem_daemon_host,
	    totem_daemon_port,
	    pid);
	if (process_group == NULL)
	{
	    return(NEXUS_ERROR_CONNECT_FAILED);
	}

	/* BRIAN??? here we need to add the endpoint to the ep_list */
	globus_list_insert( &process_group->ep_list, endpoint );

	/*
	 * Return the machine independent protocol array
	 */
	*array = process_group->proto_array;
	*size = process_group->proto_array_size;
    }
    totem_exit();

    return(0);
}


/*
 * totem_destroy_my_mi_proto()
 *
 * called by nexus_endpoint_destroy()
 */
static int
totem_destroy_my_mi_proto(
    nexus_endpoint_t *			endpoint,
    nexus_byte_t *			proto_array,
    int					size)
{
    int					version;
    int					format;
    long				pid;
    totem_process_group_t *		process_group;
    globus_list_t *                     ep_entry;

    /* Check the totem mi_proto version */
    version = (int) *proto_array++;
    if (version != GLOBUS_L_NEXUS_TOTEM_MI_PROTO_VERSION)
    {
	return(GLOBUS_NEXUS_ERROR_VERSION_MISMATCH);
    }

    /*
     * Get the process group ID from the machine independent protocol
     * descriptor
     */
    format = (int) *proto_array++;
    nexus_dc_get_long(&proto_array, &pid, 1, format);

    totem_enter();
    {
	/*
	 * Find the associated process_group_t
	 */
	process_group = totem_process_group_find(totem_daemon_host,
						 totem_daemon_port,
						 pid);

	NexusAssert2(
	    (process_group),
	    ("totem_destroy_my_mi_proto(): "
	     "Internal error - process group %ld not found in hash table\n",
	     pid));

	/*
	 * Remove the endpoint from list of endpoints associated with this
	 * process group
	 */
	ep_entry = globus_list_search(process_group->ep_list, endpoint);
	globus_list_remove(&process_group->ep_list, ep_entry);

	/*
	 * Leave the process group if noone else needs it
	 */
	totem_process_group_leave_initiate(process_group);
    }
    totem_exit();

    return(0);
}
/* totem_destroy_my_mi_proto() */


/*
 * totem_construct_from_mi_proto()
 *
 * This is called by nexus_startpoint_bind() or nexus_get_startpoint()
 *
 * From the passed machine independent protocol list ('mi_proto'), plus
 * the totem specific entry from that list ('proto_array' and 'size'),
 * see if I can use the information to create a nexus_proto_t object
 * that can be used to connect to the context:
 *	- If I cannot use this protocol to attach to the context, then
 *		return NEXUS_FALSE.  (This option is useful if two contexts
 *		both speak a particular protocol, but they cannot
 *		talk to each other via that protocol.  For example,
 *		on two MPP's, the contexts within a single MPP can
 *		talk to each other via the native messaging protocol,
 *		but cannot talk to the contexts on the other MPP
 *		using that native protocol.)
 *	- If this totem protocol points to myself, and thus the local
 *		protocol module should be used, then set
 *		*proto=NULL, and return NEXUS_TRUE.
 *	- Otherwise, construct a totem protocol object for this mi_proto
 *		and put it in *proto.  Then return NEXUS_TRUE.
 *
 * The 'proto_array' should contain:
 *	format ID (1 byte)
 *	process group id (4 bytes)
 */
static nexus_bool_t
totem_construct_from_mi_proto(
    nexus_proto_t **			proto,
    nexus_mi_proto_t *			mi_proto,
    nexus_byte_t *			proto_array,
    int					size)
{
    int					version;
    int					format;
    long				pid;
    totem_process_group_t *		process_group;

    /* Check the totem mi_proto version */
    version = (int) *proto_array++;
    if (version != GLOBUS_L_NEXUS_TOTEM_MI_PROTO_VERSION)
    {
	_nx_fault_detected(GLOBUS_NEXUS_ERROR_VERSION_MISMATCH);
	return(NEXUS_FALSE);
    }
    
    /*
     * Get the process group ID from the machine independent protocol
     * descriptor
     */
    format = (int) *proto_array++;
    nexus_dc_get_long(&proto_array, &pid, 1, format);

    /*
     * Create totem_process_group_t/nexus_proto_t for this process group
     */
    totem_enter();
    {
	process_group = totem_process_group_join_initiate(totem_daemon_host,
							 totem_daemon_port,
							 pid);
	process_group->sp_cnt++;
    }
    totem_exit();

    /*
     * Return process group
     */
    *proto = (nexus_proto_t *) process_group;

    return (NEXUS_TRUE);
} /* totem_construct_from_mi_proto() */


/*
 * totem_test_proto()
 */
static int
totem_test_proto(
    nexus_proto_t *			proto)
{
    totem_process_group_t *		process_group;

    process_group = (totem_process_group_t *) proto;

    if (process_group->comm == NULL)
    {
	return(NEXUS_ERROR_BAD_PROTOCOL);
    }

    return(process_group->fault_code);
}
/* totem_test_proto() */


/*
 * totem_direct_info_size()
 */
static int
totem_direct_info_size(void)
{
    /* TODO: This needs to be filled in */
    return(0);
}
/* totem_direct_info_size() */


/*
 * totem_send_rsr()
 */
static int
totem_send_rsr(
    struct globus_nexus_buffer_s *		buffer)
{
    int					rc;
    totem_process_group_t *		process_group;
    totem_inf_header_t *		infh;
    size_t				total_size;
    long *				total_msg_length;
    totem_comm_t *			comm;
    long				msg_size;


    rc = 0;

    totem_debug_printf(
	2,
	("totem_send_rsr(): invoked with buffer: %x\n",buffer));

    /*
     * If there are any direct components to the message, then fail
     */
    NexusAssert(buffer->n_direct == 0);

    /*
     * Get the outgoing process group information for this message
     */
    process_group = (totem_process_group_t *) buffer->proto;

    /*
     * First, move the current pointer back to the beginning of the totem
     * header + message length field (reserved header size)
     */
    buffer->base_segments->current -= TOTEM_HDR_SIZE;

    /*
     * set the INF header to point to where the beginning of the INF header
     * should be placed in the buffer.  Also set total_msg_length to point to
     * the first 4 bytes of the buffer so that the message length can be
     * written there.  Then up the size_used to the new size.
     */
    msg_size = TOTEM_INF_HDR_SIZE + buffer->base_segments->size_used;
    infh = (totem_inf_header_t *)
	(buffer->base_segments->current + sizeof(long));
    total_msg_length = (long *) buffer->base_segments->current;
    buffer->base_segments->size_used += TOTEM_HDR_SIZE; 

    /*
     * DEBTODO: unsafe - assumes same word sizes and structure packing on other
     * end.  Needs to be fixed sometime, but requires changes to totemd as
     * well.
     */

    /*
     * Set the fields of the totem header
     */
    infh->inf_len = msg_size;
    infh->inf_header_len = TOTEM_INF_HDR_SIZE;
    infh->inf_type = TOTEM_AGREED;
    infh->inf_num_sets = 1;
    infh->inf_pgid[0].pid = process_group->pid;
    infh->inf_pgid[0].proc = 0;
    totem_Infheader_htonl(infh);

    /* set the message length field */
    *total_msg_length = htonl(msg_size);

    totem_enter();
    {
	/*
	 * Make sure we still have a valid process group
	 */
	if (process_group->state != PROCESS_GROUP_STATE_JOINING
	    && process_group->state != PROCESS_GROUP_STATE_ACTIVE)
	{
	    if (process_group->state == PROCESS_GROUP_STATE_FAULT)
	    {
		rc = process_group->fault_code;
	    }
	    else
	    {
		rc = NEXUS_ERROR_BAD_PROTOCOL;
	    }
	    buffer->using_barrier = NEXUS_FALSE;
	    goto mutex_abort;
	}

	/*
	 * Get communication channel state.  If it isn't available, then we
	 * must be in the process of shutting down.  This shouldn't ever happen
	 * unless the user attempts to perform a send_rsr during or after
	 * shutdown.
	 */
	comm = process_group->comm;
	if (comm == NULL)
	{
	    rc = NEXUS_ERROR_BAD_PROTOCOL;
	    buffer->using_barrier = NEXUS_FALSE;
	    goto mutex_abort;
	}

	NexusAssert2((process_group->type == NEXUS_PROTO_TYPE_TOTEM),
		     ("totem_send_rsr(): Internal error: "
		      "proto_type is not NEXUS_PROTO_TYPE_TOTEM\n"));
    
	NexusAssert2((comm->fd >= 0),
		     ("totem_send_rsr(): Internal error: totemd communcation "
		      "channel improperly established\n"));

	/*
	 * Enqueue this message on the communication channel's outgoing queue
	 */
	Enqueue(comm->outgoing.write_q_head,
		comm->outgoing.write_q_tail,
		buffer);
    
	/*
	 * If nobody else has registered a write for channel, register a
	 * write for the message we just enqueued.
	 *
	 * If there is already a write in progress, the write callback will
	 * take care of registering this write (and any other ones that are
	 * enqueued) when the current write completes.
	 */
	if (!comm->outgoing.write_in_progress)
	{
	    outgoing_register_next_write(comm);
	}
      mutex_abort:
	  ;
    }
    totem_exit();

    return (rc);
} /* totem_send_rsr() */


/*
 * totem_send_rsr_outstanding()
 *
 * Return true if there are any sends outstanding for this proto,
 * otherwise false.
 */
static globus_bool_t
totem_send_rsr_outstanding(globus_nexus_proto_t *nproto)
{
    globus_bool_t		rc;
    totem_process_group_t *	process_group;
    totem_comm_t *		comm;

    rc = GLOBUS_FALSE;
    totem_enter();
    process_group = (totem_process_group_t *) nproto;
    comm = process_group->comm;
    if (comm->outgoing.write_q_head)
    {
	rc = GLOBUS_TRUE;
    }
    totem_exit();
    return(rc);
} /* totem_send_rsr_outstanding() */


/*
 * outgoing_register_next_write()
 *
 * Register the next write operation for this communication channel.
 *
 * If the write is complete on the buffer at the head of the write_q,
 * then this function destroys it.
 *
 * This function assumes that totem_enter() has already been called.
 *
 * This function should not be called if 
 * outgoing->write_in_progres==NEXUS_TRUE.
 */
static void
outgoing_register_next_write(
    totem_comm_t *			comm)
{
    struct globus_nexus_buffer_s *		buffer;
    int rc;

    /*
     * If there is a message on the queue then we need to transmit
     * its base segment.
     */
    nexus_byte_t *			buf;
    size_t				size;
    
    NexusAssert(comm->outgoing.write_in_progress == NEXUS_FALSE);
    
    buffer = comm->outgoing.write_q_head;
    if (buffer != NULL)

    {
	/*
	 * Register the write on the base segment. We are assuming in
	 * this code that there is only ever one base segment.  The base
	 * segment contains the actual data including headers that have been
	 * stuck onto the message.
	 */
	buf = buffer->base_segments->current;
	size = buffer->base_segments->size_used;

	/* inside register for write the code will worry about if the 
	 * communication channel needs to take the data in smaller 
	 * chunks */
	rc = nexus_fd_register_for_write(
	    comm->fd,
	    (char *) buf,
	    size,
	    outgoing_write_callback,
	    outgoing_write_error_callback,
	    (void *) comm);
	/* TODO: Check the return code */

	/*
	 * Mark the base segment as written.  This is OK because the write
	 * callback will not be called until the above register_for_write is
	 * done.
	 */
	buffer->current_base_segment = (nexus_base_segment_t *) NULL;
	comm->outgoing.write_in_progress = NEXUS_TRUE;

	totem_debug_printf(
	    1,
	    ("outgoing_register_next_write(): "
	     "buf=0x%lx, size=%ld, fd=%d\n",
	     (unsigned long) buffer,
	     size,
	     comm->fd));
    }
} /* outgoing_register_next_write() */


/*
 * outgoing_write_callback()
 *
 * The requested write operation has completed.  Release the
 * buffer that just got written and Call
 * outgoing_register_next_write() to see if we
 * have more messages to send.
 */
static void
outgoing_write_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				nbytes)
{
    totem_comm_t *			comm;
    struct globus_nexus_buffer_s*             buffer;

    comm = (totem_comm_t *) arg;
    totem_debug_printf(3, ("outgoing_write_callback(): entering\n"));
    totem_enter();
    {
	comm->outgoing.write_in_progress = NEXUS_FALSE;

	/*
	 * If the first message in the queue has been sent, then dequeue it
	 * and free all resources consumed by it
	 */
	buffer = comm->outgoing.write_q_head;
	if (buffer != NULL && buffer->current_base_segment == NULL)
	{
	    Dequeue(comm->outgoing.write_q_head,
		    comm->outgoing.write_q_tail,
		    buffer);

	    /*
	     * BRIANTODO: may want to check in the future as to whether
	     * this buffer has asked to stick around!!
	     */
	    nexus_buffer_destroy(&buffer);

	    totem_debug_printf(
		2,
		("outgoing_write_callback(): "
		 "deleting buffer 0x%lx on fd=%i\n",
		 (unsigned long) buffer,
		 comm->fd));
	}

	if (comm->state == COMM_STATE_OPEN)
	{
	    outgoing_register_next_write(comm);
	}
	else
	{
	    globus_list_t *		comm_entry;

	    /*
	     * The channel has been closed, so unregister it
	     */
	    comm_entry = globus_list_search( totem_comm_list, comm );
	    NexusAssert(comm_entry != NULL);
	    globus_list_remove(&totem_comm_list, comm_entry );

	    globus_hashtable_remove( &totem_comm_hash_table,
				     &comm->hashkey);

	    nexus_fd_close(comm->fd);

	    NexusFree(comm);
	}
    }
    totem_exit();
    totem_debug_printf(3, ("outgoing_write_callback(): exiting\n"));
}
/* outgoing_write_callback() */


/*
 * outgoing_write_error_callback()
 *
 * TODO: This should try to re-establish the connection and retry the send.
 */
static void
outgoing_write_error_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				n_bytes,
    int					error)
{
    totem_comm_t *			comm;

    comm = (totem_comm_t *) arg;

    totem_debug_printf(3, ("outgoing_write_error_callback(): entering\n"));

    if (error == EPIPE)
    {
        /*
	 * The outgoing fd was closed unexpectedly.  If the process at the
	 * other end died, or erroneously closed the fd.
	 */
	totem_enter();
	{
	    totem_comm_close( comm, NEXUS_ERROR_PROCESS_DIED);
	}
	totem_exit();

	if (_nx_fault_detected(NEXUS_ERROR_PROCESS_DIED) != 0)
	{
	    globus_fatal(
		"outgoing_write_error_callback(): fd unexpectedly closed. "
		"Another process probably died: errno=%d: %s\n",
		error,
		globus_libc_system_error_string(error));
	}
    }
    else
    {
        globus_fatal(
	    "outgoing_write_error_callback(): Write failed (errno=%i): %s\n",
	    error,
	    globus_libc_system_error_string(error));
    }

    totem_debug_printf(3, ("outgoing_write_error_callback(): exiting\n"));
}
/* outgoing_write_error_callback() */


/*
 * incoming_read_callback()
 *
 * this routine is called by the FD event handling code when a read operation
 * requested through nexus_fd_register_for_read() completes.  This routine then
 * processes the header of the message, requests additional read operations to
 * get the body of the message, and dispatches the message once the data
 * arrives.
 *
 * NOTE: EOF and the CLOSE_* flags that accompany EOF are handled by
 * incoming_read_error_callback().
 */
static void
incoming_read_callback(
    void *				arg,
    int					fd,
    char *				buf,
    size_t				nbytes_read,
    char  **			        new_buf,
    size_t *				new_max_nbytes,
    size_t *				new_wait_for_nbytes)
{
    totem_comm_t *			comm;
    totem_comm_incoming_t *		incoming;
    nexus_bool_t			done;
    int					sizeof_u_long;
    unsigned long			tmp_u_long;
    struct globus_nexus_buffer_s *	buffer;
    nexus_bool_t			message_enqueued;
    totem_inf_header_t *                infh;
    totem_pg_id_t *			sender_pg_id;
    globus_list_t *                     ep_entry;
    long                                pid;
    totem_process_group_t *             process_group;
    totem_pg_id_t *                     sender;
    int					version;

    comm = (totem_comm_t *) arg;
    message_enqueued = NEXUS_FALSE;

    totem_debug_printf(3,
		       ("incoming_read_callback(): entering\n"));

    totem_enter();
    {
	incoming = &comm->incoming;

	NexusAssert2((  (incoming->state == INCOMING_STATE_MSG_SIZE)
			 || (incoming->state == INCOMING_STATE_BODY) ),
		     ("incoming_read_callback(): Internal error: "
		      "Invalid incoming->state = %d\n",
		      incoming->state) );

	totem_debug_printf(
	    4, 
	    ("starting with %d new bytes read\n",
	     nbytes_read));

	switch(incoming->state)
	{
	    
	  case INCOMING_STATE_MSG_SIZE:
	    totem_debug_printf(3, ("getting message size\n"));
	    /* Got the message size so now we need to translate it */
	    incoming->msg_size = ntohl( incoming->msg_size );
	    incoming->state = INCOMING_STATE_BODY;
	    NexusMalloc(incoming_read_callback(),
			incoming->storage,
			nexus_byte_t *,
			incoming->msg_size);

	    infh = (totem_inf_header_t *) incoming->storage;
	    infh->inf_len = incoming->msg_size;


	    incoming->storage_size = incoming->msg_size;
	    incoming->current = incoming->storage + sizeof(long);
	    *new_buf = (char *)incoming->current;
	    *new_max_nbytes = incoming->msg_size - sizeof(long);
	    *new_wait_for_nbytes = incoming->msg_size - sizeof(long);
	    break;

	  case INCOMING_STATE_BODY:
	    totem_debug_printf(3, ("reading body of message\n"));

	    /* DEBTODO: call the 
	     * membership retention routines to cludge remembering
	     * membership.
	     */
	    infh = (totem_inf_header_t *) incoming->storage;
	    totem_Infheader_ntohl(infh);

	    /* Now to get the message sender information which is right
	     * after the header and readjust the data pointer to point to the
	     * rest of the message.  */
	    sender_pg_id = (totem_pg_id_t *)
		(incoming->storage + infh->inf_header_len);
	    sender_pg_id->pid = ntohl( sender_pg_id->pid);

	    /*
	     * We need to set incoming->current to point to the rest of the
	     * message.  Also, change incoming->msg_size to reflect the size
	     * without the totem header.
	     */
	    incoming->current = incoming->storage
		+ infh->inf_header_len
		+ sizeof(totem_pg_id_t);
	    incoming->msg_size -= infh->inf_header_len + sizeof(totem_pg_id_t);

	    /*
	     * pid is the process group this message was sent to since we
	     * are assuming that you can only send a message to a single
	     * process group at a time
	     */
	    pid = infh->inf_pgid[0].pid;
	    process_group = 
		totem_process_group_find(totem_daemon_host, 
					 totem_daemon_port,
					 pid);

	    totem_debug_printf(
		1,
		("sz=%ld, hdr_sz=%d, type=%d, sets=%d, pid=%d, "
		 "sender=%08x:%d\n",
		 infh->inf_len,
		 infh->inf_header_len,
		 infh->inf_type,
		 infh->inf_num_sets,
		 pid,
		 sender_pg_id->proc,
		 sender_pg_id->pid));

	    if (infh->inf_type == TOTEM_SAFE
		|| infh->inf_type == TOTEM_AGREED)
	    {
		/*
		 * Verify that the internal structure of the message is one of
		 * the formats that we understand
		 */
		version = *((globus_byte_t *) incoming->current);
		if (version == GLOBUS_L_NEXUS_TOTEM_PROTOCOL_VERSION)
		{
		    /*
		     * Enqueue the message for dispatch
		     *
		     * This has to be done once for each endpoint associated
		     * with the process group.  Copies of the incoming buffer
		     * must be made because _nx_buffer_dispatch() frees the
		     * messaage memory once the message has been dispatched
		     */
		    ep_entry = process_group->ep_list;
		    while(ep_entry)
		    {
			nexus_endpoint_t *	ep;
			nexus_byte_t*		msgbuf;

			ep = globus_list_first(ep_entry);

			NexusMalloc(incoming_read_callback(),
				    msgbuf,
				    nexus_byte_t *,
				    incoming->msg_size);
			memcpy(msgbuf, incoming->current, incoming->msg_size);

			_nx_buffer_create_from_raw(
			    msgbuf,
			    incoming->msg_size,
			    0, /* offset to nexus header */
			    incoming->msg_size,
			    ep,
			    &buffer);
			Enqueue(
			    incoming->dispatch_q_head,
			    incoming->dispatch_q_tail,
			    buffer);
			message_enqueued = NEXUS_TRUE;
			totem_debug_printf(3, ("message enqueued\n"));

			ep_entry = globus_list_rest(ep_entry);
		    }
		}
		else
		{
		    totem_exit();
		    _nx_fault_detected(GLOBUS_NEXUS_ERROR_VERSION_MISMATCH);
		    totem_enter();
		}
	    }
	    else if(infh->inf_type >= TOTEM_JOIN_ANS && 
		    infh->inf_type <= TOTEM_MEMBERSHIP )
	    {
		totem_Update_Membership(process_group,
					infh,
					sender_pg_id,
					incoming->current );
	    }
	    
	    /*
	     * Free up storage and prepare to receive the next message
	     */
	    NexusFree(incoming->storage);
	    incoming->state = INCOMING_STATE_MSG_SIZE;
	    incoming->storage = (nexus_byte_t *) &incoming->msg_size;
	    incoming->storage_size = sizeof(incoming->msg_size);
	    incoming->current = incoming->storage;
	    *new_buf = (char *)incoming->storage;
	    *new_max_nbytes = incoming->storage_size;
	    *new_wait_for_nbytes = incoming->storage_size;
		    
	    break;
	}

	/* If no other thread is currently dispatching messages for this
	 * process group, then dequeue message and dispatch it.  Otherwise,
	 * leave the message enqueued and exit, assuming it will be dispatched
	 * later by the currently active dispatch thread.
	 */
	if (message_enqueued && !incoming->dispatch_in_progress)
	{
	    incoming->dispatch_in_progress = NEXUS_TRUE;
	    while (QueueNotEmpty(incoming->dispatch_q_head))
	    {
		Dequeue(incoming->dispatch_q_head,
			incoming->dispatch_q_tail,
			buffer);
		totem_exit();
		{
		    totem_debug_printf(3, ("dispatching buffer\n"));
		    _nx_buffer_dispatch(buffer);
		}
		totem_enter();
	    }
	    incoming->dispatch_in_progress = NEXUS_FALSE;
	}
    }
    totem_exit();

    totem_debug_printf(3,
		       ("incoming_read_callback(): exiting\n"));

} /* incoming_read_callback() */


/*
 * incoming_read_error_callback()
 */
static void incoming_read_error_callback(void *arg,
					 int fd,
					 char *buf,
					 size_t nbytes_read,
					 int error)
{
    totem_comm_t *			comm;
    nexus_byte_t			flag;

    comm = (totem_comm_t *) arg;

    totem_enter();
    {

	if (error == 0)
	{
	    if (comm->state != COMM_STATE_CLOSED)
	    {
		totem_comm_close(comm, NEXUS_ERROR_BAD_PROTOCOL);
		totem_exit();
		if (_nx_fault_detected(NEXUS_ERROR_BAD_PROTOCOL) != 0)
		{
		    globus_fatal(
			"incoming_read_error_callback(): Internal Error: "
			"Got an unexpected end-of-file %s:%hu, fd=%d\n",
			comm->host,
			comm->port,
			comm->fd);
		}
		totem_enter();
	    }
	}
	else if ((error == ECONNRESET)
		 || (error == EPIPE) )
	{
	    /*
	     * Got connection reset by peer on the read, so: if fault tolerance
	     * is not enabled then die else, close the incoming and keep on
	     * going
	     */
	    totem_comm_close(comm, NEXUS_ERROR_PROCESS_DIED);
	    totem_exit();
	    if (_nx_fault_detected(NEXUS_ERROR_PROCESS_DIED) != 0)
	    {
		totem_fatal(
		    "incoming_read_error_callback(): connected to %s:%hu "
		    "was unexpectedly severed, fd=%d, n_read=%d\n",
		    comm->host,
		    comm->port,
		    comm->fd,
		    comm->incoming.current - comm->incoming.storage);
	    }
	    totem_enter();

	    /*
	     * XXX: Should we somehow mark the endpoints using this
	     * connection so the user knows a fault occurred on them?  Nexus
	     * endpoints don't seem to currently have a way for us to do this.
	     */

	    /*
	     * DEBTODO: should we try reopening the connection?  if we are
	     * still alive, the user has received a fault notification,
	     * although I'm not sure how useful that is given that I'm not sure
	     * our fault detection code doesn't make it easy to find
	     * out what happened nor re-establish communication.  We should
	     * talk about what you need when I am out visiting so that I can
	     * get Nexus FT to a level that will be useful in the long term.
	     */
	}
	else /* Some other read() error */
	{
	    totem_comm_close(comm, NEXUS_ERROR_READ_FAILED);
	    totem_exit();
	    if (_nx_fault_detected(NEXUS_ERROR_READ_FAILED) != 0)
	    {	
		globus_fatal(
		    "incoming_read_error_callback(): Internal Error: "
		    "Read failed with errno=%i, %s:%hu, fd=%d\n",
		    error,
		    comm->host,
		    comm->port,
		    comm->fd);
	    }
	    totem_enter();
	}
    }
    totem_exit();
} /* incoming_read_error_callback() */

/*
 * totem_Update_Membership()
 *
 *  This routine is called to process membership messages received
 *  from the totem daemon. 
 */
static int 
totem_Update_Membership(
    totem_process_group_t *            process_group,
    totem_inf_header_t *               infh,
    totem_pg_id_t *                    sender,
    nexus_byte_t *                     data )
{
    long *                             temp;
    long                               i, j;

  totem_debug_printf( 3, ("Processing totem membership message type=%d\n", infh->inf_type));
  switch( infh->inf_type )
    {
    case TOTEM_MEMBERSHIP:
      /* this message gives us the complete group membership */
      totem_debug_printf( 5, ("Received a membership message for group %ld\n", process_group->pid ));
      temp = (long *)data;
      process_group->numb_members = ntohl( *temp );
      for( i = 0; i < process_group->numb_members; i++ )
	{
	for( j = 0; j < 2; j++ )
	  {
	  data += sizeof( long );
	  temp = (long *)data;
	  if( j == 0 )
	    process_group->membership[i].proc = *temp;
	  else
	    process_group->membership[i].pid = ntohl( *temp );
	  }
	} 
      if (process_group->state == PROCESS_GROUP_STATE_JOINING &&
	  totem_Is_Member( process_group, &process_group->comm->pgid ) != -1 )
	{
	totem_process_group_join_finalize(process_group);
	}
      break;
    case TOTEM_DISCONNECT_OK:
      /* 
       * this message is sent in response to a request to disconnect. Check
       * whether we sent it before disconnecting.  If we did not send it then
       * treat it as a leave answer message. 
       */
      totem_debug_printf( 5, ("Received a disconnect OK message for group %ld from %lu, %lu\n", \
			      process_group->pid, sender->proc, sender->pid ));
    case TOTEM_LEAVE_ANS:
      /*
       *  A process has left the process group so we need to
       *  remove them from our view of the membership.
       */
      if( infh->inf_type != TOTEM_DISCONNECT_OK )
	totem_debug_printf( 5, ("Received a leave ans message for group %ld from %lu, %lu\n", \
			      process_group->pid, sender->proc, sender->pid ));
      if( sender->proc == process_group->comm->pgid.proc && 
	  sender->pid == process_group->comm->pgid.pid )
	{
	totem_process_group_leave_finalize(process_group);
	return 0;
	}
      i = totem_Is_Member( process_group, sender );
      if( i != -1 )
	{
	for( j = i+1; j < process_group->numb_members; i++, j++ )
	  {
	  process_group->membership[i].proc = process_group->membership[j].proc;
	  process_group->membership[i].pid  = process_group->membership[j].pid;
	  }
	process_group->numb_members--;
	}
      break;
    case TOTEM_JOIN_ANS:
      /*
       *  A new process has joined the process group so we need to add
       *  them to our view of the membership.
       */
      totem_debug_printf( 5, ("Received a join ans message for group %ld from %lu, %lu\n", \
			      process_group->pid, sender->proc, sender->pid ));
      i = totem_Is_Member( process_group, sender );
      if( i != -1 )
	totem_debug_printf( 1, ("totem_Update_Membership: Something wrong new member is already member\n"));
      process_group->membership[process_group->numb_members].proc = sender->proc;
      process_group->membership[process_group->numb_members].pid = sender->pid;
      process_group->numb_members++;
      break;
    default:
      /* we should never invoke this case so something is funky if we have */
      totem_debug_printf(1, ("totem_Update_Membership: Something wrong not a recognized membership message\n"));
      break;
    }
  totem_debug_printf( 3, ("Finished processing totem membership message\n"));
  totem_Print_Membership( process_group );
  return 0;
}

/*
 * totem_Is_Member()
 *
 * This routine is called to check whether a particular process is a 
 * member of this process group.
 */
static int
totem_Is_Member(
    totem_process_group_t *             process_group,
    totem_pg_id_t *                     pgid )
{
  int i;
  
  if( process_group->numb_members == 0 )
    return -1;
  for( i = 0; i < process_group->numb_members; i++ )
    {
    if( process_group->membership[i].proc == pgid->proc  &&
	process_group->membership[i].pid == pgid->pid )
      return i;
    }
  return -1;
}

/*
 * totem_Print_Membership()
 *
 * This routine will print the current membership of the process group
 * it only has output if the debug level is set to 3
 */
static void
totem_Print_Membership( 
    totem_process_group_t *             process_group )
{
  int i;

  totem_debug_printf( 1, ("Membership of process group %ld\n", process_group->pid ));
  for( i = 0; i < process_group->numb_members; i++ )
    {
    totem_debug_printf( 1, ("\tmemb %d: %lu, %ld\n", i, process_group->membership[i].proc,\
			    process_group->membership[i].pid ));
    }
}

/*
 * This routine will be called any time a special message needs to be sent to
 * Totem.  This includes join, leave, close, and disconnect messages.  This is
 * the routine that creates the message, and sends it to Totem.
 */
static int
totem_Process_Join_Or_Leave(
    totem_process_group_t *		process_group,
    int					type)
{
    totem_inf_header_t * infh;
    nexus_byte_t * storage;
    nexus_buffer_t  buffer;
    long *msg_size;

    NexusMalloc(totem_Process_Join_Or_Leave(),
		storage,
		nexus_byte_t *,
		TOTEM_HDR_SIZE);

    msg_size = (long *)storage;

    /*
     * send the size of the message followed by the totem header
     */
    *msg_size = TOTEM_INF_HDR_SIZE;
    infh = (totem_inf_header_t *) (storage + sizeof(msg_size));
    infh->inf_type = type;
    infh->inf_num_sets = 1;
    infh->inf_pgid[0].pid = process_group->pid;
    infh->inf_pgid[0].proc = 0;
    infh->inf_header_len = TOTEM_INF_HDR_SIZE;
    infh->inf_len = infh->inf_header_len;

    totem_Infheader_htonl( infh );
    _nx_buffer_create_from_raw(
	storage, 
	TOTEM_HDR_SIZE,
	0, 
	TOTEM_HDR_SIZE,
	NULL,
	&buffer);
	
    totem_debug_printf(
	3,
	("Process_join_or_leave, msg_type=%ld, pgid=%ld, length=%ld\n", 
	 type,	
	 process_group->pid,
	 infh->inf_len));

    /*
     * Enqueue this message on the communication channel's outgoing queue
     */
    Enqueue(process_group->comm->outgoing.write_q_head,
	    process_group->comm->outgoing.write_q_tail,
	    buffer);
    
    /*
     * If nobody else has registered a write for channel, register a
     * write for the message we just enqueued.
     *
     * If there is already a write in progress, the write callback will
     * take care of registering this write (and any other ones that are
     * enqueued) when the current write completes.
     */
    if (!process_group->comm->outgoing.write_in_progress)
    {
	outgoing_register_next_write(process_group->comm);
    }
 
    return( 0 );
}

/*
 * totem_Process_Close()
 *
 * This routine will be called to send a close message to the totem
 * daemon. This is the routine that creates the message, and sends it to Totem.
 */
static int
totem_Process_Close(
    totem_comm_t *			comm)
{
	totem_inf_header_t * infh;
	nexus_byte_t * storage;
	nexus_buffer_t  buffer;
	long *msg_size;

	NexusMalloc(totem_Process_Close(),
		storage,
		nexus_byte_t *,
		TOTEM_HDR_SIZE);

	/*
	 * send the size of the message followed by the totem header
	 */	
	msg_size = (long *)storage;
	*msg_size = TOTEM_INF_HDR_SIZE;
	infh = (totem_inf_header_t *)(storage + sizeof(msg_size));
	infh->inf_type = TOTEM_CLOSE_MBOX;
	infh->inf_num_sets = 1;
	infh->inf_pgid[0].pid = 0;
	infh->inf_pgid[0].proc = 0;
	infh->inf_header_len = TOTEM_INF_HDR_SIZE;
	infh->inf_len = infh->inf_header_len;

	totem_Infheader_htonl( infh );
	_nx_buffer_create_from_raw(
	    storage, 
	    TOTEM_HDR_SIZE,
	    0, 
	    TOTEM_HDR_SIZE,
	    NULL,
	    &buffer);
	
       totem_debug_printf(3,("Process_Close, Length: %ld\n", infh->inf_len));

	/*
	 * Enqueue this message on the communication channel's outgoing queue
	 */
	Enqueue(comm->outgoing.write_q_head,
		comm->outgoing.write_q_tail,
		buffer);
    
	/*
	 * If nobody else has registered a write for channel, register a
	 * write for the message we just enqueued.
	 *
	 * If there is already a write in progress, the write callback will
	 * take care of registering this write (and any other ones that are
	 * enqueued) when the current write completes.
	 */
	if (!comm->outgoing.write_in_progress)
	{
	    outgoing_register_next_write( comm );
	}
 
       return( 0 );
}

/*
 * totem_Infheader_htonl()
 *
 * Convert the fields of the message header to network byte order from host
 * byte order.
 */
void
totem_Infheader_htonl(
    totem_inf_header_t *		infh)
{
        int i;
	long num;
	
        infh->inf_len = htonl( (unsigned long)infh->inf_len );
        infh->inf_header_len = htonl( (unsigned long)infh->inf_header_len );
        infh->inf_type = htonl( (unsigned long)infh->inf_type );
	num = infh->inf_num_sets;
        infh->inf_num_sets = htonl( (unsigned long)infh->inf_num_sets );
	for( i=0; i < num; i++ ) {
	   infh->inf_pgid[i].pid = htonl( (unsigned long)infh->inf_pgid[i].pid );
	   infh->inf_pgid[i].proc = htonl( (unsigned long)infh->inf_pgid[i].proc );
	}
	return;
	
}


/*
 * totem_Infheader_ntohl()
 *
 * Convert the fields of the message header to network byte order from host
 * byte order.
 */
void
totem_Infheader_ntohl(
    totem_inf_header_t *		infh)
{
        int i;
	long num;
	
        infh->inf_len = ntohl( (unsigned long)infh->inf_len );
        infh->inf_header_len = ntohl( (unsigned long)infh->inf_header_len );
        infh->inf_type = ntohl( (unsigned long)infh->inf_type );
	num = infh->inf_num_sets;
        infh->inf_num_sets = ntohl( (unsigned long)infh->inf_num_sets );
	for( i=0; i < num; i++ )
	{
	    infh->inf_pgid[i].pid =
		ntohl( (unsigned long)infh->inf_pgid[i].pid );
	   infh->inf_pgid[i].proc =
	       ntohl( (unsigned long)infh->inf_pgid[i].proc );
	}
	return;
}


/*
 * totem_comm_open()
 *
 * Construct a totem_comm_t and open a connection to the totemd on the given
 * host and port.
 *
 * Note_enqueue: This routine could cause messages to be enqueued.
 */
static totem_comm_t *
totem_comm_open(
    char *				host,
    unsigned short			port)
{
    int					rc;
    long				temp_pgid;
    totem_comm_t *			comm;
    totem_comm_hashkey_t		comm_hashkey;
    globus_list_t *			comm_entry;

    /*
     * Check to see of a communication channel already exists for this totemd
     */
    comm_hashkey.host = host;
    comm_hashkey.port = port;
    comm = globus_hashtable_lookup( &totem_comm_hash_table,
			       &comm_hashkey);
    if (comm != NULL)
    {
	return comm;
    }

    /*
     * Construct a new communication channel
     */
    NexusMalloc(totem_comm_open(),
		comm,
		totem_comm_t *,
		sizeof(totem_comm_t));

    comm->state = COMM_STATE_OPEN;
    comm->host = _nx_copy_string(host);
    comm->port = port;
    comm->hashkey.host = comm->host;
    comm->hashkey.port = comm->port;
    comm->fd = -1;
    comm->pgid.pid = 0;
    comm->pgid.proc = 0;
    comm->pg_list = NULL;
    comm->outgoing.write_in_progress = NEXUS_FALSE;
    comm->outgoing.write_q_head = (struct globus_nexus_buffer_s *) NULL;
    comm->outgoing.write_q_tail = (struct globus_nexus_buffer_s *) NULL;

    comm->incoming.state = INCOMING_STATE_MSG_SIZE;
    comm->incoming.storage_size = 0;
    comm->incoming.storage = NULL;
    comm->incoming.current = comm->incoming.storage;
    comm->incoming.dispatch_in_progress = NEXUS_FALSE;
    comm->incoming.dispatch_q_head = (struct globus_nexus_buffer_s *) NULL;
    comm->incoming.dispatch_q_tail = (struct globus_nexus_buffer_s *) NULL;

    /*
     * Register this new communication channel so we can find it later
     */
    globus_list_insert(&totem_comm_list, comm);

    globus_hashtable_insert(&totem_comm_hash_table,
			&comm->hashkey,
			comm);

    /*
     * Connect to totemd
     */
    rc = nexus_fd_connect(comm->host,
			  comm->port,
			  &comm->fd);
    if (rc != 0)
    {
	goto fn_abort;
    }

    totem_debug_printf(
	2,
	("outgoing_open(): nexus_fd_connect(%s/%hu) returns fd=%d\n",
	 comm->host,
	 comm->port,
	 comm->fd));

    /*
     * We need to contact the totem daemon and find out our identifier the code
     * below does this.
     */
    temp_pgid = htonl( (unsigned long) 0 );

    /*
     * The following write is accepted by SS_Accept_Session of the totem daemon
     */
    rc = nexus_fd_register_for_write(
	comm->fd,
	(char *) &temp_pgid,
	sizeof(long),
	outgoing_write_callback,
	outgoing_write_error_callback,
	(void *) comm );
    if ( rc != 0 )
    {
	nexus_fd_close(comm->fd);
	goto fn_abort;
    }

    /* 
     * Block on a read waiting to complete the handshake with the totem daemon
     */
    rc = _nx_read_blocking(comm->fd, 
			  (char *)&comm->pgid.pid,
			  sizeof(long));
    if (rc != 0 )
    {
	nexus_fd_close(comm->fd);
	goto fn_abort;
    }

    comm->pgid.pid = ntohl( (unsigned long)(comm->pgid.pid));
    comm->pgid.proc = totem_Get_Hostlong( comm->host );

    totem_debug_printf(3, ("totem proc is %lu address is %s\n", comm->pgid.proc,
		       comm->host ));

    totem_debug_printf(
	3,
	("socket %d,private logical name is proc=%lu pid=%lu\n",comm->fd,
	 comm->pgid.proc,
	 comm->pgid.pid ));

    /*
     * Register read so that data coming from totemd will be processed
     */
    rc = nexus_fd_register_for_read(comm->fd,
				    (char *) &comm->incoming.msg_size,
				    sizeof(comm->incoming.msg_size),
				    sizeof(comm->incoming.msg_size),
				    incoming_read_callback,
				    incoming_read_error_callback,
				    (void *) comm);
    if (rc != 0)
    {
	nexus_fd_close(comm->fd);
	goto fn_abort;
    }

    totem_debug_printf(
	1,
	("totem_comm_open(): opened comm %s:%hu, fd=%d\n",
	 comm->host,
	 comm->port,
	 comm->fd));


    return (comm);

  fn_abort:
    totem_debug_printf(
	1,
	("totem_comm_open(): "
	 "Failed to establish communication with %s:%hu (rc=%d)\n",
	 comm->host,
	 comm->port,
	 rc));

    /*
     * Unregister this communication channel
     */
    comm->state = COMM_STATE_CLOSED;
    comm_entry = globus_list_search( totem_comm_list, comm );
    NexusAssert(comm_entry != NULL);
    globus_list_remove(&totem_comm_list, comm_entry );

    globus_hashtable_remove( &totem_comm_hash_table,
			&comm->hashkey);

    return(NULL);
}
/* totem_comm_open() */


/*
 * totem_comm_close()
 *
 * Close the communication channel to totemd
 */
static void
totem_comm_close(
    totem_comm_t *			comm,
    int					fault_code)
{
    globus_list_t *			pg_entry;
    totem_process_group_t *		process_group;
    globus_list_t *                     comm_entry;

    totem_debug_printf(
	1,
	("totem_comm_close(): closing comm %s:%hu, fd=%d, state=%d\n",
	 comm->host,
	 comm->port,
	 comm->fd,
	 comm->state));

    if (comm->state == COMM_STATE_CLOSED)
    {
	return;
    }

    comm->state = COMM_STATE_CLOSED;

    /*
     * Let all of the process groups associated with this communication channel
     * know that it no longer exists
     */
    pg_entry = comm->pg_list;
    while (pg_entry != NULL)
    {
	process_group = globus_list_first(pg_entry);

	/*
	 * Change to the process group's fault state, set the fault code, and
	 * kill the pointer to the failing communication channel.
	 */
	process_group->state = PROCESS_GROUP_STATE_FAULT;
	process_group->fault_code = fault_code;
	process_group->comm = NULL;

	pg_entry = globus_list_rest(pg_entry);
    }

    /*
     * Free up resources held by this communication channel
     */
    globus_list_free(comm->pg_list);

    if (comm->host != (char *) NULL)
    {
	NexusFree(comm->host);
    }

    /*
     * Unregister any pending read or write operations.  Close the connection
     * to totemd.
     */
    nexus_fd_unregister(comm->fd, NULL);


    /*
     * Send a close message; the channel will be closed when the outgoing
     * callback is received.
     */
    if (fault_code == NEXUS_SUCCESS)
    {
	totem_Process_Close( comm );
    }
    else
    {
	/*
	 * Unregister this communication channel
	 */
	comm_entry = globus_list_search( totem_comm_list, comm );
	NexusAssert(comm_entry != NULL);
	globus_list_remove(&totem_comm_list, comm_entry );

	globus_hashtable_remove( &totem_comm_hash_table,
				 &comm->hashkey);

	nexus_fd_close(comm->fd);

	NexusFree(comm);
    }
}
/* totem_comm_close() */


/*
 * totem_process_group_join_initiate()
 *
 * Join a new a new process group with the specified ID.  Create a new one if
 * one doesn't already exist.
 *
 * NOTE: the totem mutex must be held by any thread calling this routine
 */
static totem_process_group_t *
totem_process_group_join_initiate(
    char *				host,
    unsigned short			port,
    long				pid)
{
    totem_process_group_t *		process_group;
    totem_comm_t *			comm;
    nexus_byte_t *			a;

    /*
     * Find the incoming structure associated with this process group.  If no
     * such structure exists, create a new one
     */
    process_group = totem_process_group_find(host, port, pid);
    if (process_group)
    {
	return(process_group);
    }

    /*
     * Find/open a communication channel to the totemd
     */
    comm = totem_comm_open(host, port);
    if (!comm)
    {
	return NULL;
    }


    NexusMalloc(totem_process_group_join_initiate(),
		process_group,
		totem_process_group_t *,
		sizeof(totem_process_group_t));

    /*
     * size of pg_inf totem header this amount of space will be reserved in
     * the front of any buffer that nexus hands me for the totem header to
     * be placed.  Increased space by 4 bytes to include message size.
     */
    process_group->reserved_header_size = TOTEM_HDR_SIZE;

    process_group->type = NEXUS_PROTO_TYPE_TOTEM;
    process_group->funcs = &totem_proto_funcs;
    process_group->version = GLOBUS_L_NEXUS_TOTEM_PROTOCOL_VERSION;
    process_group->direct_custom_min_size =
	NEXUS_TOTEM_DIRECT_CUSTOM_MIN_SIZE;
    process_group->direct_custom_max_size = NEXUS_DC_MAX_U_LONG;
    process_group->direct_pointer_min_size = NEXUS_DC_MAX_U_LONG;
    process_group->direct_pointer_max_size = NEXUS_DC_MAX_U_LONG;
    process_group->can_use_iovec = NEXUS_FALSE;

    /*
     * Initialize totem specific fields
     */
    process_group->state = PROCESS_GROUP_STATE_INACTIVE;
    process_group->fault_code = 0;
    process_group->comm = comm;
    process_group->pid = pid;
    process_group->sp_cnt = 0;
    process_group->ep_list = NULL;
    process_group->hashkey.host = _nx_copy_string(host);
    process_group->hashkey.port = port;
    process_group->hashkey.pid = pid;
    process_group->numb_members = 0;

    /*
     * Create a machine independent protocol descriptor for this new
     * endpoint
     */
    process_group->proto_array_size = 2 + nexus_dc_sizeof_long(1);
    NexusMalloc(totem_process_group_join_initiate(),
		process_group->proto_array,
		nexus_byte_t *,
		process_group->proto_array_size);
    a = process_group->proto_array;
    *a++ = GLOBUS_L_NEXUS_TOTEM_MI_PROTO_VERSION;
    *a++ = (nexus_byte_t) NEXUS_DC_FORMAT_LOCAL;
    nexus_dc_put_long(&a, &pid, 1);

    /*
     * Add this process group to the communcation channel's process group
     * tracking structures
     */
    globus_list_insert(&process_group->comm->pg_list, process_group);

    globus_hashtable_insert( &totem_process_group_hash_table,
			&process_group->hashkey,
			process_group);

    /*
     * send message to join process group here
     */
    if( totem_Process_Join_Or_Leave( process_group, TOTEM_JOIN_SET ))
      totem_debug_printf(
	  2,
	  ("totem_Process_Join_Or_Leave(): nonzero return from join"));

    process_group->state = PROCESS_GROUP_STATE_JOINING;

    return process_group;
}
/* totem_process_group_join_initiate() */


/*
 * totem_process_group_join_finalize()
 *
 * This should be called when a receive confirmation that we joined the
 * process group.
 *
 * NOTE: the totem mutex must be held by any thread calling this routine
 */
static void
totem_process_group_join_finalize(
    totem_process_group_t *		process_group)
{
    totem_debug_printf(
	2,
	("totem_process_group_join_finalize(), state=%d\n",
	 process_group->state));

    if (process_group->state == PROCESS_GROUP_STATE_JOINING)
    {
	process_group->state = PROCESS_GROUP_STATE_ACTIVE;

	/*
	 * Verify that this process group still exists; if not, leave the
	 * process group
	 */
	totem_process_group_leave_initiate(process_group);
    }
}
/* totem_process_group_join_finalize() */


/*
 * totem_process_group_leave_initiate()
 *
 * This should be called anytime we might be ready to leave a process group.
 *
 * NOTE: the totem mutex must be held by any thread calling this routine
 */
static void
totem_process_group_leave_initiate(
    totem_process_group_t *		process_group)
{
    totem_debug_printf(
	2,
	("totem_process_group_leave_initiate(), state=%d, empty=%d\n",
	 process_group->state,
	 globus_list_empty(process_group->ep_list)));

    if (globus_list_empty(process_group->ep_list)
	|| totem_done)
    {
	if (process_group->state == PROCESS_GROUP_STATE_ACTIVE)
	{
	    /*
	     * send message to leave this process group
	     */
	  if( totem_Process_Join_Or_Leave( process_group, TOTEM_LEAVE_SET ))
	    totem_debug_printf(
		2,
		("totem_Process_Join_Or_Leave(): nonzero return from join"));

	    process_group->state = PROCESS_GROUP_STATE_LEAVING;
	}
	else if (process_group->state == PROCESS_GROUP_STATE_FAULT)
	{
	    process_group->state = PROCESS_GROUP_STATE_LEAVING;
	    totem_process_group_leave_finalize(process_group);
	}
    }
}
/* totem_process_group_leave_initiate() */


/*
 * totem_process_group_leave_finalize()
 *
 * This should be called when we receive a confirmation that we have left a
 * process group.  This is when totem has delivered up a process group leave.
 *
 * NOTE: the totem mutex must be held by any thread calling this routine
 */
static void
totem_process_group_leave_finalize(
    totem_process_group_t *		process_group)
{
    totem_debug_printf(
	2,
	("totem_process_group_leave_finalize(), state=%d\n",
	 process_group->state));

    process_group->state = PROCESS_GROUP_STATE_INACTIVE;

    /*
     * Remove this process group from the communication channel's process group
     * tracking structures
     */
    if (process_group->comm != NULL)
    {
	globus_list_t *			pg_entry;

	pg_entry = globus_list_search(process_group->comm->pg_list,
				      process_group);
	globus_list_remove(&process_group->comm->pg_list,
			   pg_entry);

	globus_hashtable_remove( &totem_process_group_hash_table,
			    &process_group->hashkey);

	if (globus_list_empty(process_group->comm->pg_list))
	{
	    totem_comm_close(process_group->comm, NEXUS_SUCCESS );
	}

	process_group->comm = NULL;
    }

    /*
     * Unless we are shutting down, we should not have any remaining
     * startpoints and endpoints
     */
    NexusAssert(totem_done || ( globus_list_empty( process_group->ep_list) && 
				process_group->sp_cnt == 0));

    /*
     * If we have no outstanding startpoints and endpoints, then we can
     * free the process group structure.  We will want to keep the data structures even if
     * there are only start points referring to it so that we know what process group
     * the start point is for.
     */
    if (globus_list_empty(process_group->ep_list)
	&& process_group->sp_cnt == 0)
    {
	NexusFree(process_group->hashkey.host);
	NexusFree(process_group->proto_array);
	NexusFree(process_group);
    }
}

/* totem_process_group_leave_finalize() */


/*
 * totem_comm_hash_value()
 *
 * Compute the hash value for communication channels to totem
 */
int
totem_comm_hash_value(
    void *				key,
    int					table_size)
{
    totem_comm_hashkey_t *		hashkey;

    hashkey = (totem_comm_hashkey_t *) key;

    return (((int) hashkey->host[0] + (int) hashkey->port) % table_size);
}
/* totem_comm_hash_value() */

/*
 * totem_comm_hash_keyeq()
 *
 * Compare the hash keys for communication channels to totem
 */
int
totem_comm_hash_keyeq(
    void *				key1,
    void *				key2)
{
    totem_comm_hashkey_t *		hashkey1;
    totem_comm_hashkey_t *		hashkey2;

    hashkey1 = (totem_comm_hashkey_t *) key1;
    hashkey2 = (totem_comm_hashkey_t *) key2;

    if (hashkey1->port == hashkey2->port 
	&& strcmp(hashkey1->host, hashkey2->host) == 0)
    {
	return(1);
    }
    else
    {
	return(0);
    }
}
/* totem_comm_hash_keyeq() */

/*
 *  totem_process_group_hash_value()
 *  compute the hash value for totem process groups
 */
int
totem_process_group_hash_value( void *key, int table_size )
{
    totem_process_group_hashkey_t *	hashkey;

    hashkey = (totem_process_group_hashkey_t *) key;

    return (((int) hashkey->host[0]
	     + (int) hashkey->port
	     + (int) hashkey->pid) % table_size);
}
/* totem_process_group_hash_value() */

/*
 *  totem_process_group_hash_keyeq()
 *  compare the hash keys for totem process groups
 */
int
totem_process_group_hash_keyeq( void *key1, void *key2)
{
    totem_process_group_hashkey_t *	hashkey1;
    totem_process_group_hashkey_t *	hashkey2;

    hashkey1 = (totem_process_group_hashkey_t *) key1;
    hashkey2 = (totem_process_group_hashkey_t *) key2;

    if (hashkey1->pid == hashkey2->pid
	&& hashkey1->port == hashkey2->port 
	&& strcmp(hashkey1->host, hashkey2->host) == 0)
    {
	return(1);
    }
    else
    {
	return(0);
    }
}
/* totem_process_group_hash_keyeq() */

/*
 *  totem_process_group_find()
 */
totem_process_group_t *
totem_process_group_find(
    char *				host,
    unsigned short			port,
    long				pid)
{
  totem_process_group_hashkey_t key;

  key.host = host;
  key.port = port;
  key.pid = pid;

  return globus_hashtable_lookup(
      &totem_process_group_hash_table,
      &key);
}
/* totem_process_group_find() */

/*
 * totem_Get_Hostlong()
 *
 * This routine converts a string with dotted notation 
 * that contains a hostname into the Internet Address as
 * network long.
 */
unsigned long 
totem_Get_Hostlong( char *hostname )
{
  struct    hostent   he;
  struct    hostent   *temp;
  char      buffer[500];
  int       err;
  struct    in_addr      *inaddr_p;

  temp = globus_libc_gethostbyname_r( hostname, &he, buffer, 500, &err );
  inaddr_p = (struct in_addr *) temp->h_addr;
  return inaddr_p->s_addr;

}



#endif /* HAVE_TOTEM_PROTO */
