#include "globus_hbm_codes.h"
#include "globus_hbm_datacollector.h"
#include "globus_hbm_defaults.h"

#include "hbm.h"

/*
#define DEBUGGING_HBMDC 1
*/

/*
** #define HBM_EXPERIMENT  is included in hbm_common.h
*/

/*
** Internal Definitions
*/
#define DATACOLLECTOR_BUFFER_SIZE 1536
#define LOCALMONITOR_BUFFER_SIZE   384
#define CLIENT_BUFFER_SIZE        2048

#define HBMDC_INTERNAL_CHKPT_FD     -1
#define HBMDC_MAX_BLOCK_TIME        60 /* seconds */


/*
** Internal Data Structures
*/

struct globus_l_hbm_heartbeat_s{
    int             version;
    int             msg_len;
    struct in_addr  lm_addr;
    unsigned int    lm_port;
    int             pid;
    char*           procname;
    unsigned int    status;
    int             regtime;
    int             rptinterval;
    unsigned int    rptnum;
    int             blockedtime;
    int             cputime;
    int             unregtime;
    int             unregnum;
    int             msgnum;
    char*           msg;
};
typedef struct globus_l_hbm_heartbeat_s globus_l_hbm_heartbeat_t;


struct hbmdc_cl_list_head_s {
    hbmdc_cl_fields_t*          head;
    hbmdc_cl_fields_t*          tail;
};
typedef struct hbmdc_cl_list_head_s hbmdc_cl_list_head_t;

typedef struct hbmdc_lm_fields_s hbmdc_lm_fields_t;
struct hbmdc_lm_fields_s {
    struct in_addr              LM_host_addr;
    char*                       LM_hostIPNum;
        /* previous two fields obtained in recvfrom:from_addr */
    char*                       LM_hostName;
    unsigned int                LM_hostPort;
    int                         LM_ClientsCnt;
    int                         LM_ClientsLiveCnt;
    unsigned int                LM_SummaryStatus;
    hbmdc_cl_list_head_t        LM_client_list;
    hbmdc_lm_fields_t*          next;
    hbmdc_lm_fields_t*          prev;
};


typedef struct hbmdc_lm_list_head_s hbmdc_lm_list_head_t;
struct hbmdc_lm_list_head_s {
    hbmdc_lm_fields_t*          head;
    hbmdc_lm_fields_t*          tail;
};

typedef struct hbmdc_dc_fields_s hbmdc_dc_fields_t;
typedef hbmdc_dc_fields_t globus_hbm_datacollector_t;
struct hbmdc_dc_fields_s {
    globus_hbm_datacollector_handle_t
                                DC_handle;
    globus_mutex_t              DC_mutex;
    int                         DC_mutex_initialized;
    char                        DC_hostName[MAXHOSTNAMELEN];
    struct in_addr              DC_host_in_addr;
    char*                       DC_hostIPNum;
    struct sockaddr_in          DC_addr;
    u_short                     DC_portNumHBMsg;
    int                         DC_hb_fd; /* fd for incoming heartbeats */
    int                         DC_req_fd; /* temporary, tcp fd for http requests */
    FILE*                       DC_log_file;
    int                         DC_EvalInterval;
    int                         DC_network_variation_allowance_secs;
    int                         DC_heartbeats_missing_overdue;
    int                         DC_heartbeats_missing_shutdown;
    char*                       DC_ckptFileName;
    char*                       DC_ckptFileNameWk;
    int                         DC_ckptNeeded;
    UTCtime                     DC_ckpt_and_eval_time;
    hbmdc_cbf_t                 DC_reg_callback;
    void*                       DC_reg_callback_user_data;
    hbmdc_tcp_fd_cbf_t          DC_tcp_req_callback; /* temporary, callback for http req*/
    int                         DC_LocalMonitorsCnt;
    int                         DC_ClientsLiveCnt;
    hbmdc_lm_list_head_t        DC_lm_list;
    hbmdc_dc_fields_t*          next;
    hbmdc_dc_fields_t*          prev;
};


typedef struct hbmdc_dc_list_s hbmdc_dc_list_t;
struct hbmdc_dc_list_s {
    hbmdc_dc_fields_t*          head;
    hbmdc_dc_fields_t*          tail;
};


typedef struct hbmdc_dc_callback_entry_s hbmdc_dc_callback_entry_t;
struct hbmdc_dc_callback_entry_s {
    hbmdc_dc_callback_entry_t*  next;
    hbmdc_dc_callback_entry_t*  prev;
    hbmdc_cbf_t                 callback_function_ptr;
    int                         handle;
    globus_hbm_client_callbackdata_t*
                                callbackdata_ptr;
    hbmdc_cl_fields_t*          client_copy_ptr;
    void*                       callback_userdata_ptr;
};


typedef struct hbmdc_callback_list_s {
    hbmdc_dc_callback_entry_t*  head;
    hbmdc_dc_callback_entry_t*  tail;
}       hbmdc_callback_list_t;


/*
** Global Variables
*/
#if  (defined TARGET_ARCH_HPUX )
    static int                  read_fds;
    static size_t               max_fds = 0;
#else
    static fd_set               read_fds;
    static int                  max_fds = 0;
#endif  /*  TARGET_ARCH_HPUX  */

/* added by bresnaha for globus_callback replacement of globus_poll */
static globus_callback_handle_t       globus_l_hbm_callback_handle;

/*
**  The data collector uses two mutual exclusion controls:
**      globus_l_hbm_datacollector_global_mutex
**          for global data collector operations that may impact more
**          than one data collector entry/instance
**      data_collector_ptr->DC_mutex
**          for data collector operations relating to a single
**          data collector entry/instance
**
**  Usage:
**      globus_l_hbm_datacollector_global_mutex
**          Must be held to traverse or modify the data collector list,
**          including adding/deleting data collector instances to/from
**          the list.
**          Must be held when calling any procedure which returns a
**          datacollector pointer.
**          May (should ) be released prior to modifying a data collector
**          entry/instance AFTER obtaining control of the DC_mutex
**          for that data collector entry/instance.
**      data_collector_ptr->DC_mutex
**          Must be held to read or modify any data collector fields in
**          the hbmdc_dc_fields_s struct EXCEPT:
**              the NEXT and PREV pointers
**              fd fields that are written only at data collector creation
**          (Must be held when deleting the data collector instance ).
**          Must be held when calling any procedure which takes a
**          datacollector pointer (as opposed to a handle ) as a
**          parameter, as the called procedure will assume that the
**          appropriate locks are held.
**
**  Priority/order of usage (to prevent deadlocks ):
**      globus_l_hbm_datacollector_global_mutex
**          Must be held when locking any data_collector_ptr->DC_mutex.
**      data_collector_ptr->DC_mutex
**          Must NOT be held when locking
**          globus_l_hbm_datacollector_global_mutex.
*/
static globus_mutex_t   globus_l_hbm_datacollector_global_mutex;

static globus_mutex_t   globus_l_hbm_datacollector_callback_mutex;
hbmdc_callback_list_t   globus_l_hbm_datacollector_callback_list;

static hbmdc_dc_list_t  globus_l_hbm_datacollector_list
                                      = { GLOBUS_NULL,
                                          GLOBUS_NULL };

static int              globus_l_hbm_datacollector_count = 0;
static int              globus_l_hbm_datacollector_next_handle = 0;

int                     time_conv_diff = 0;

#ifdef HBM_EXPERIMENT
struct timeval          heartbeat_file_cutoff_time;
int                     heartbeat_file_counter = 0;
char                    heartbeat_file_name_str[GLOBUS_HBM_BUFF_SIZE_MAX];
FILE*                   heartbeat_file_fd1 = GLOBUS_NULL;
FILE*                   heartbeat_file_fd2 = GLOBUS_NULL;
#endif

/*
** Utility Routines
*/
static hbmdc_cl_fields_t*
globus_l_hbmdc_copy_cl_fields(
                hbmdc_cl_fields_t*      cl_entry );

static int
globus_l_hbmdc_count_clients(
                hbmdc_cl_list_head_t*   client_list_ptr );

static int
globus_l_hbmdc_count_live_clients(
                hbmdc_cl_list_head_t*   client_list_ptr );

static int
globus_l_hbmdc_count_localmonitors(
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static int
globus_l_hbmdc_datacollector_activate(
                void );

static int
globus_l_hbmdc_datacollector_deactivate(
                void );

static void
globus_l_hbmdc_evaluate_datacollector(
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static hbmdc_dc_callback_entry_t*
globus_l_hbmdc_event_callback(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                hbmdc_cl_fields_t*      cl_entry,
                unsigned int            event );

static hbmdc_cl_fields_t*
globus_l_hbmdc_extract_client(
                char*                   buff_ptr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                hbmdc_lm_fields_t*      lm_entry );

static int
globus_l_hbmdc_extract_datacollector(
                char*                   buff_ptr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static globus_l_hbm_heartbeat_t*
globus_l_hbmdc_extract_hb_data(
                char*                   read_buff,
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static hbmdc_lm_fields_t*
globus_l_hbmdc_extract_localmonitor(
                char*                   buff_ptr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static hbmdc_cl_fields_t*
globus_l_hbmdc_find_client_entry(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                struct in_addr          host_ipaddr,
                u_int                   pid,
                char*                   procname );

static globus_hbm_datacollector_t*
globus_l_hbmdc_find_datacollector(
                globus_hbm_datacollector_handle_t
                                        handle );

static globus_hbm_datacollector_t*
globus_l_hbmdc_find_datacollector_fd(
                int                     fd );

static hbmdc_lm_fields_t*
globus_l_hbmdc_find_localmonitor_entry(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                char*                   lm_ipnum,
                unsigned int            lm_port );

static globus_hbm_datacollector_t*
globus_l_hbmdc_find_set_fd(
                globus_bool_t*          is_tcp_fd );

static void
globus_l_hbmdc_free_callback_entry(
                hbmdc_dc_callback_entry_t*
                                        callback_entry_ptr );

static void
globus_l_hbmdc_free_callbackdata(
                globus_hbm_client_callbackdata_t*
                                        callbackdata_ptr );

static void
globus_l_hbmdc_free_client_entry(
                hbmdc_cl_fields_t*      cl_entry );

static void
globus_l_hbmdc_free_datacollector(
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static void
globus_l_hbmdc_free_hb_data(
                globus_l_hbm_heartbeat_t*
                                        hb_data );

static void
globus_l_hbmdc_free_localmonitor_entry(
                hbmdc_lm_fields_t*      lm_entry );

static int
globus_l_hbmdc_internal_checkpoint(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                int                     chkpt_fd );

static globus_bool_t
globus_l_hbmdc_poll(
                globus_abstime_t *      time_stop,
                void*                   user_args );

static int
globus_l_hbmdc_process_heartbeat(
                int                     n_bytes_read,
                char*                   read_buff,
                struct sockaddr_in*     from_addr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static int
globus_l_hbmdc_restore_from_checkpoint(
                char*                   ckpt_filename_str,
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static int
globus_l_hbmdc_set_cl_buffer(
                char*                   buffer,
                hbmdc_cl_fields_t*      cl_fields );

static int
globus_l_hbmdc_set_dc_buffer(
                char*                   buffer,
                globus_hbm_datacollector_t*
                                        datacollector_ptr );

static int
globus_l_hbmdc_set_lm_buffer(
                char*                   buffer,
                hbmdc_lm_fields_t*      lm_fields );

static int
globus_l_hbmdc_validate_client_IPNum(
                char*                   client_IPNum,
                hbmdc_lm_fields_t*      lm );


/***************************************************************************
 *
 *
 *                        Module activation structure
 *
 *
 ***************************************************************************
 */
globus_module_descriptor_t
    globus_i_hbm_datacollector_module =
        {
            "globus_hbm_datacollector",
            globus_l_hbmdc_datacollector_activate,
            globus_l_hbmdc_datacollector_deactivate,
            GLOBUS_NULL,
	    GLOBUS_NULL
        };


int
globus_hbm_datacollector_clear_unregistered_clients(
                globus_hbm_datacollector_handle_t
                                        dc_handle )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;
    hbmdc_lm_fields_t*  lm_entry      = GLOBUS_NULL;
    hbmdc_cl_fields_t*  cl_entry      = GLOBUS_NULL;
    hbmdc_lm_fields_t*  tmp_lm        = GLOBUS_NULL;
    hbmdc_cl_fields_t*  tmp_cl        = GLOBUS_NULL;

    /* get datacollector for this handle */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_datacollector( dc_handle );
    if( datacollector_ptr == GLOBUS_NULL )
    {
/*
**      No place to write error messages to.
*/
#ifdef DEBUGGING_HBMDC
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_hbm_datacollector_clear_unregistered_clients():"
                        "\n"
                        "        globus_l_hbmdc_find_datacollector() "
                        "failed:\n"
                        "            DataCollector not found.\n\n" );
#endif /*  defined DEBUGGING_HBMDC  */

        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &datacollector_ptr->DC_mutex );
    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    lm_entry = datacollector_ptr->DC_lm_list.head;
    while( lm_entry != GLOBUS_NULL )
    {
        cl_entry = lm_entry->LM_client_list.head;
        while( cl_entry != GLOBUS_NULL )
        {
            if(   ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NORMAL )
               || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_ABNORMAL )
               || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_DIED )
               || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NO_RPT ))
            {
                tmp_cl = cl_entry->next;
                ListRemove( &( lm_entry->LM_client_list ),
                            cl_entry );
                globus_l_hbmdc_free_client_entry( cl_entry );
                cl_entry = tmp_cl;
                lm_entry->LM_ClientsCnt--;
            }
            else
            {
                cl_entry = cl_entry->next;
            }
        }
        if( lm_entry->LM_client_list.head == GLOBUS_NULL )
        {
            /* assertion */
            if( lm_entry->LM_ClientsCnt != 0 )
            {
                if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                {
                    globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_hbm_datacollector_clear_unregistered_clients():"
                        "\n"
                        "        No clients found for LM, but count > 0.\n"
                        "        Removing LM.\n\n" );
                }
                lm_entry->LM_ClientsCnt = 0;
            }
            tmp_lm = lm_entry->next;
/*
**          Take the LM out of the list.
*/
            ListRemove( &( datacollector_ptr->DC_lm_list ),
                        lm_entry );
            globus_l_hbmdc_free_localmonitor_entry( lm_entry );
            datacollector_ptr->DC_LocalMonitorsCnt--;
            lm_entry = tmp_lm;
        }
        else
        {
            lm_entry = lm_entry->next;
        }
    }

    globus_mutex_unlock( &datacollector_ptr->DC_mutex );

    return GLOBUS_SUCCESS;
}


int
globus_hbm_datacollector_create(
                u_short*                hb_port_ptr,
                int                     eval_interval_secs,
                int                     network_variation_allowance_secs,
                int                     heartbeats_missing_overdue,
                int                     heartbeats_missing_shutdown,
                char*                   ckpt_filename_restore_str,
                char*                   ckpt_filename_str,
                FILE*                   log_file_ptr,
                void ( *proc_client_reg_callback )(
                            globus_hbm_datacollector_handle_t
                                        dc_handle,
                            globus_hbm_client_callbackdata_t*
                                        callbackdata_ptr,
                            globus_hbm_datacollector_client_entry_t*
                                        client_ptr,
                            void*       user_data_ptr ),
                void*                   user_data_ptr,
                globus_hbm_datacollector_handle_t*
                                        dc_handle_ptr )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr =
                                          GLOBUS_NULL;
    hbmdc_cl_fields_t*  cl_entry        = GLOBUS_NULL;
    hbmdc_lm_fields_t*  lm_entry        = GLOBUS_NULL;

    struct hostent*     hostent_ptr     = GLOBUS_NULL;
    struct hostent*     hostent_result  = GLOBUS_NULL;

    hbmdc_dc_callback_entry_t*
                        callback_entry_ptr;

    int                 error;
    int                 one             = 1;
    unsigned int        events        = 0;

    char*               buffer          = GLOBUS_NULL;

    char*               ipnum           = GLOBUS_NULL;

    struct timeval      tm;

    int                 fd;
    FILE*               hbmdc_log       = GLOBUS_NULL;

    /*
    ** TODO: check all args to make sure they're valid
    */

    hbmdc_log = log_file_ptr;

    if( gettimeofday( &tm, GLOBUS_NULL ))
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_hbm_datacollector_create():\n"
                        "        gettimeofday() failed:  errno [%d]: %s.\n\n",
                        errno,
                        strerror( errno ));
        }
        return GLOBUS_FAILURE;
    }
    if( time_conv_diff == 0 )
    {
        time_conv_diff = globus_i_hbm_get_time_conv_diff();
    }

/*
**  Initialize *dc_handle_ptr to zero for failure.
*/
    *dc_handle_ptr = 0;

    datacollector_ptr = globus_malloc( sizeof( globus_hbm_datacollector_t ));
    if( datacollector_ptr == GLOBUS_NULL )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        return GLOBUS_FAILURE;
    }
    memset(             (void *) datacollector_ptr,
                        0,
                        sizeof( globus_hbm_datacollector_t ));
    datacollector_ptr->DC_mutex_initialized       = 0;
    datacollector_ptr->DC_hostIPNum               = GLOBUS_NULL;
    datacollector_ptr->DC_hb_fd                   = -1;
    datacollector_ptr->DC_req_fd                  = -1;
    datacollector_ptr->DC_log_file                = GLOBUS_NULL;
    datacollector_ptr->DC_ckptFileName            = GLOBUS_NULL;
    datacollector_ptr->DC_ckptFileNameWk          = GLOBUS_NULL;
    datacollector_ptr->DC_reg_callback_user_data  = GLOBUS_NULL;
    datacollector_ptr->DC_lm_list.head            = GLOBUS_NULL;
    datacollector_ptr->DC_lm_list.tail            = GLOBUS_NULL;
    datacollector_ptr->next =
            datacollector_ptr->prev               = GLOBUS_NULL;

/*
**  Fill in handle structure.
*/

    if( globus_libc_gethostname(
                        (char *) &datacollector_ptr->DC_hostName,
                        MAXHOSTNAMELEN ) < 0 )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_libc_gethostname() failed:  "
                        "errno [%d]: %s.\n\n",
                        errno,
                        strerror( errno ));
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

    hostent_result = globus_malloc( sizeof( struct hostent ));
    if( hostent_result == GLOBUS_NULL )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

    buffer = (char *) globus_malloc( GLOBUS_HBM_BUFF_SIZE_HOSTENT );
    if( buffer == GLOBUS_NULL )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_free( hostent_result );
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

    memset(             (void *) hostent_result,
                        0,
                        sizeof( struct hostent ));
    memset(             (void *) buffer,
                        0,
                        GLOBUS_HBM_BUFF_SIZE_HOSTENT );
    hostent_ptr = globus_libc_gethostbyname_r(
                        datacollector_ptr->DC_hostName,
                        hostent_result,
                        buffer,
                        GLOBUS_HBM_BUFF_SIZE_HOSTENT,
                        &error );
    if( hostent_ptr == GLOBUS_NULL )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_libc_gethostbyname_r() failed:  "
                        "errno [%d]: %s.\n\n",
                        errno,
                        strerror( errno ));
        }
        globus_free( buffer );
        globus_free( hostent_result );
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

    memcpy( (void *) &datacollector_ptr->DC_host_in_addr,
            (void *) *( hostent_ptr->h_addr_list ),
            sizeof( struct in_addr ));

    globus_free( buffer );
    globus_free( hostent_result );

    /*
    ** TODO: inet_ntoa is not thread-safe
    */
    ipnum = inet_ntoa( datacollector_ptr->DC_host_in_addr );
    datacollector_ptr->DC_hostIPNum = globus_malloc( strlen(ipnum ) + 1 );
    if( datacollector_ptr->DC_hostIPNum == GLOBUS_NULL )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }
    strcpy( datacollector_ptr->DC_hostIPNum, ipnum );

    datacollector_ptr->DC_addr.sin_family = AF_INET;
    datacollector_ptr->DC_addr.sin_port = htons( *hb_port_ptr );
    datacollector_ptr->DC_addr.sin_addr.s_addr = htonl(INADDR_ANY );

/*
**  Open up a udp socket.
*/

    if(( fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP )) < 0 )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [08] in "
                        "globus_hbm_datacollector_create():\n"
                        "        socket() failed:  "
                        "errno [%d]: %s.\n\n",
                        errno,
                        strerror( errno ));
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

    if( setsockopt(     fd,
                        SOL_SOCKET,
                        SO_REUSEADDR,
                        (char *) &one,
                        sizeof( one )) < 0 )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_hbm_datacollector_create():\n"
                        "        setsockopt(SO_REUSEADDR) failed:  "
                        "errno [%d]: %s.\n"
                        "        Continuing.\n\n",
                        errno,
                        strerror( errno ));
        }
    }

    if( bind(           fd,
                        (struct sockaddr *) &datacollector_ptr->DC_addr,
                        sizeof( struct sockaddr_in )) < 0 )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [10] in "
                        "globus_hbm_datacollector_create():\n"
                        "        bind() failed:  "
                        "errno [%d]: %s.\n\n",
                        errno,
                        strerror( errno ));
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

/*
**  Fill in the remainder of handle with the parameters.
*/
    datacollector_ptr->DC_portNumHBMsg = *hb_port_ptr =
                                ntohs( datacollector_ptr->DC_addr.sin_port );

    datacollector_ptr->DC_hb_fd = fd;
    datacollector_ptr->DC_req_fd = -1;
    datacollector_ptr->DC_log_file = hbmdc_log;

    if( eval_interval_secs == 0 )
    {
        datacollector_ptr->DC_EvalInterval =
                                HBMDC_DEFAULT_EVAL_INTERVAL_SECS;
    }
    else if( eval_interval_secs <
                            HBMDC_LIM_EVAL_INTERVAL_SECS_MIN )
    {
        datacollector_ptr->DC_EvalInterval =
                                HBMDC_LIM_EVAL_INTERVAL_SECS_MIN;
    }
    else if( eval_interval_secs >
                            HBMDC_LIM_EVAL_INTERVAL_SECS_MAX )
    {
        datacollector_ptr->DC_EvalInterval =
                                HBMDC_LIM_EVAL_INTERVAL_SECS_MAX;
    }
    else
    {
        datacollector_ptr->DC_EvalInterval =
                                eval_interval_secs;
    }

    if( network_variation_allowance_secs == 0 )
    {
        datacollector_ptr->DC_network_variation_allowance_secs =
                                HBMDC_DEFAULT_NETWORK_VAR_ALLOWANCE_SECS;
    }
    else if( network_variation_allowance_secs <
                            HBMDC_LIM_NETWORK_VAR_ALLOWANCE_SECS_MIN )
    {
        datacollector_ptr->DC_network_variation_allowance_secs =
                                HBMDC_LIM_NETWORK_VAR_ALLOWANCE_SECS_MIN;
    }
    else if( network_variation_allowance_secs >
                            HBMDC_LIM_NETWORK_VAR_ALLOWANCE_SECS_MAX )
    {
        datacollector_ptr->DC_network_variation_allowance_secs =
                                HBMDC_LIM_NETWORK_VAR_ALLOWANCE_SECS_MAX;
    }
    else
    {
        datacollector_ptr->DC_network_variation_allowance_secs =
                                network_variation_allowance_secs;
    }

    if( heartbeats_missing_overdue == 0 )
    {
        datacollector_ptr->DC_heartbeats_missing_overdue =
                                HBMDC_DEFAULT_HEARTBEATS_MISSING_OVERDUE;
    }
    else if( heartbeats_missing_overdue <
                            HBMDC_LIM_HEARTBEATS_MISSING_OVERDUE_MIN )
    {
        datacollector_ptr->DC_heartbeats_missing_overdue =
                                HBMDC_LIM_HEARTBEATS_MISSING_OVERDUE_MIN;
    }
    else if( heartbeats_missing_overdue >
                            HBMDC_LIM_HEARTBEATS_MISSING_OVERDUE_MAX )
    {
        datacollector_ptr->DC_heartbeats_missing_overdue =
                                HBMDC_LIM_HEARTBEATS_MISSING_OVERDUE_MAX;
    }
    else
    {
        datacollector_ptr->DC_heartbeats_missing_overdue =
                                heartbeats_missing_overdue;
    }

    if( heartbeats_missing_shutdown == 0 )
    {
        datacollector_ptr->DC_heartbeats_missing_shutdown =
                                HBMDC_DEFAULT_HEARTBEATS_MISSING_SHUTDOWN;
    }
    else if( heartbeats_missing_shutdown <
                            HBMDC_LIM_HEARTBEATS_MISSING_SHUTDOWN_MIN )
    {
        datacollector_ptr->DC_heartbeats_missing_shutdown =
                                HBMDC_LIM_HEARTBEATS_MISSING_SHUTDOWN_MIN;
    }
    else if( heartbeats_missing_shutdown >
                            HBMDC_LIM_HEARTBEATS_MISSING_SHUTDOWN_MAX )
    {
        datacollector_ptr->DC_heartbeats_missing_shutdown =
                                HBMDC_LIM_HEARTBEATS_MISSING_SHUTDOWN_MAX;
    }
    else
    {
        datacollector_ptr->DC_heartbeats_missing_shutdown =
                                heartbeats_missing_shutdown;
    }

    if( datacollector_ptr->DC_heartbeats_missing_overdue >
                            datacollector_ptr->DC_heartbeats_missing_shutdown )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [11] in "
                        "globus_hbm_datacollector_create():\n"
                        "        (heartbeats_missing_overdue > "
                        "heartbeats_missing_shutdown)  Aborting.\n\n" );
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }

    if( ckpt_filename_str != GLOBUS_NULL )
    {
        datacollector_ptr->DC_ckptFileName =
                globus_malloc( strlen( ckpt_filename_str ) + 1 );
        if( datacollector_ptr->DC_ckptFileName == GLOBUS_NULL )
        {
            if( hbmdc_log != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [12] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
            }
            globus_l_hbmdc_free_datacollector( datacollector_ptr );

            return GLOBUS_FAILURE;
        }
        strcpy( datacollector_ptr->DC_ckptFileName, ckpt_filename_str );

        datacollector_ptr->DC_ckptFileNameWk =
            globus_malloc( strlen( ckpt_filename_str ) + 1 + 3 ); /* "_wk" = 3*/
        if( datacollector_ptr->DC_ckptFileNameWk == GLOBUS_NULL )
        {
            if( hbmdc_log != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [13] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
            }
            globus_l_hbmdc_free_datacollector( datacollector_ptr );

            return GLOBUS_FAILURE;
        }
        globus_libc_sprintf(
                        datacollector_ptr->DC_ckptFileNameWk,
                        "%s_wk",
                        ckpt_filename_str );
    }

    datacollector_ptr->DC_ckpt_and_eval_time =
                                tm.tv_sec
                              + datacollector_ptr->DC_EvalInterval;
    datacollector_ptr->DC_reg_callback = proc_client_reg_callback;
    datacollector_ptr->DC_reg_callback_user_data = user_data_ptr;
    datacollector_ptr->DC_LocalMonitorsCnt = 0;
    datacollector_ptr->DC_ClientsLiveCnt = 0;

    /* restore from checkpoint file if one is provided */
    if( ckpt_filename_restore_str != GLOBUS_NULL )
    {
        if( globus_l_hbmdc_restore_from_checkpoint(
                        ckpt_filename_restore_str,
                        datacollector_ptr ) ==
                                        GLOBUS_FAILURE )
        {
            if( hbmdc_log != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [14] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_l_hbmdc_restore_from_checkpoint() "
                        "failed:\n"
                        "            Unable to restore from checkpoint "
                        "file \"%s\".\n"
                        "            Continuing without checkpoint file.\n\n",
                        ckpt_filename_restore_str );
            }
        }
        else
        {
            if( hbmdc_log != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    globus_hbm_datacollector_create():\n"
                        "        globus_l_hbmdc_restore_from_checkpoint() "
                        "succeeded:\n"
                        "            Succesfully restored from checkpoint "
                        "file \"%s\".\n\n",
                        ckpt_filename_restore_str );
            }
        }
    }

    /* initialize mutex for this DC instance */
    if( globus_mutex_init(  &( datacollector_ptr->DC_mutex ),
                            GLOBUS_NULL ) ==
                                        GLOBUS_FAILURE )
    {
        if( hbmdc_log != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        hbmdc_log,
                        "Globus HBM Data Collector library:\n"
                        "    Error [15] in "
                        "globus_hbm_datacollector_create():\n"
                        "        globus_mutex_init() failed:  "
                        "Aborting.\n\n" );
        }
        globus_l_hbmdc_free_datacollector( datacollector_ptr );

        return GLOBUS_FAILURE;
    }
    datacollector_ptr->DC_mutex_initialized = 1;

/*
**  Do the things that require mutual exclusion.
**
**      Don't worry about max_fds -- it is set in the poll routine.
*/
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );
    globus_mutex_lock( &datacollector_ptr->DC_mutex );

/*
**  Add this (new ) datacollector to the list.
*/
    datacollector_ptr->DC_handle = globus_l_hbm_datacollector_next_handle;
    globus_l_hbm_datacollector_next_handle++;
    ListAppend( &( globus_l_hbm_datacollector_list ),
                datacollector_ptr );
    globus_l_hbm_datacollector_count++;

/*
**  Checkpoint state.
*/
    globus_l_hbmdc_internal_checkpoint(
                                datacollector_ptr,
                                HBMDC_INTERNAL_CHKPT_FD );

    *dc_handle_ptr = datacollector_ptr->DC_handle;

/*
**  Execute registration callbacks for all the restored clients.
**
**      First we set up for all the callbacks.
*/
    for( lm_entry = datacollector_ptr->DC_lm_list.head;
         lm_entry != GLOBUS_NULL;
         lm_entry = lm_entry->next )
    {
        for( cl_entry = lm_entry->LM_client_list.head;
             cl_entry != GLOBUS_NULL;
             cl_entry = cl_entry->next )
        {
            events = GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION;
            if( ( callback_entry_ptr =
                        globus_l_hbmdc_event_callback(
                            datacollector_ptr,
                            cl_entry,
                            events )) != GLOBUS_NULL )
            {
                globus_mutex_lock(
                        &globus_l_hbm_datacollector_callback_mutex );
                ListAppend(
                        &globus_l_hbm_datacollector_callback_list,
                        callback_entry_ptr );
                globus_mutex_unlock(
                        &globus_l_hbm_datacollector_callback_mutex );
            }
            datacollector_ptr->DC_ckptNeeded = 1;
        }
    }

/*
**  Release the mutex locks or the callbacks will deadlock.
*/
    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
    globus_mutex_unlock( &datacollector_ptr->DC_mutex );

/*
**  Now we can execute all the callbacks.
*/
    while( globus_l_hbm_datacollector_callback_list.head != GLOBUS_NULL )
    {
        globus_mutex_lock( &( globus_l_hbm_datacollector_callback_mutex ));
        if( globus_l_hbm_datacollector_callback_list.head != GLOBUS_NULL )
        {
            callback_entry_ptr =
                            globus_l_hbm_datacollector_callback_list.head;
            ListRemove( &globus_l_hbm_datacollector_callback_list,
                        callback_entry_ptr );

            globus_mutex_unlock( &globus_l_hbm_datacollector_callback_mutex );

            (*(callback_entry_ptr->callback_function_ptr ))(
                        callback_entry_ptr->handle,
                        callback_entry_ptr->callbackdata_ptr,
                        callback_entry_ptr->client_copy_ptr,
                        callback_entry_ptr->callback_userdata_ptr );

            globus_l_hbmdc_free_callback_entry(
                        callback_entry_ptr );
        }
        else
        {
            globus_mutex_unlock( &globus_l_hbm_datacollector_callback_mutex );
        }
    }

    return GLOBUS_SUCCESS;
}


int
globus_hbm_datacollector_destroy(
        globus_hbm_datacollector_handle_t
                dc_handle,
        int     force_mode,
        int     *num_live_clients )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    /* get datacollector for this handle */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_datacollector( dc_handle );
    if( datacollector_ptr == GLOBUS_NULL )
    {
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &datacollector_ptr->DC_mutex );

    /* validate args */
    if(   ( num_live_clients == GLOBUS_NULL )
       || (   ( force_mode != GLOBUS_HBM_DATACOLLECTOR_FORCE_DESTROY_YES )
           && ( force_mode != GLOBUS_HBM_DATACOLLECTOR_FORCE_DESTROY_NO )))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_hbm_datacollector_destroy():\n"
                        "        Invalid arguments, Aborting.\n\n" );
        }
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_FAILURE;
    }

    *num_live_clients = datacollector_ptr->DC_ClientsLiveCnt;

/*
**  Do not destroy the datacollector if it has live clients and the
**  FORCE parameter is no.
*/
    if(   ( datacollector_ptr->DC_ClientsLiveCnt > 0 )
       && ( force_mode == GLOBUS_HBM_DATACOLLECTOR_FORCE_DESTROY_NO ))
    {
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_SUCCESS;
    }

/*
**  Destroy this datacollector.
**
**  First remove the data collector from the list,
**  Then destroy it.
*/
    ListRemove( &( globus_l_hbm_datacollector_list ),
                datacollector_ptr );

    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    globus_l_hbmdc_free_datacollector( datacollector_ptr );

    return GLOBUS_SUCCESS;
}


int
globus_hbm_datacollector_reg_tcpfd(
                int                     fd,
                void (*request_callback ) (
                        int             fd,
                        globus_hbm_datacollector_handle_t
                                        dc_handle ),
                globus_hbm_datacollector_handle_t
                                        handle )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    /* get datacollector for this handle */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_datacollector( handle );
    if( datacollector_ptr == GLOBUS_NULL )
    {
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &datacollector_ptr->DC_mutex );
    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    if( request_callback == GLOBUS_NULL || fd <= 0 )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_hbm_datacollector_reg_tcpfd():\n"
                        "        Invalid arguments, Aborting.\n\n" );
        }
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_FAILURE;
    }

    datacollector_ptr->DC_req_fd = fd;
    datacollector_ptr->DC_tcp_req_callback = request_callback;
    globus_mutex_unlock( &datacollector_ptr->DC_mutex );

    return GLOBUS_SUCCESS;
}


int
globus_hbm_datacollector_set_clientevent_callback(
        globus_hbm_datacollector_handle_t
                    dc_handle,
        globus_hbm_client_callbackdata_t*
                    client_callbackdata_ptr,
        u_int       overdue_secs,
        u_int       shutdown_no_rpt_secs,
        void*       user_data_ptr,
        void        ( *proc_event_callback_ptr )(
                            globus_hbm_datacollector_handle_t
                                    dc_handle,
                            globus_hbm_client_callbackdata_t*
                                    callbackdata_ptr,
                            globus_hbm_datacollector_client_entry_t*
                                    client_ptr,
                            void*   user_data_ptr ))
{
    hbmdc_cl_fields_t*  cl_entry      = GLOBUS_NULL;
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    /* find the *datacollector_ptr with this handle */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_datacollector( dc_handle );
    if( datacollector_ptr == GLOBUS_NULL )
    {
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &datacollector_ptr->DC_mutex );
    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
    /* check args */
    if(   ( client_callbackdata_ptr == GLOBUS_NULL )
       || ( client_callbackdata_ptr->cl_procname == GLOBUS_NULL )
       || ( proc_event_callback_ptr == GLOBUS_NULL ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_hbm_datacollector_set_clientevent_callback():\n"
                        "        Invalid arguments, Aborting.\n\n" );
        }
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_FAILURE;
    }

    /* find the client */
    cl_entry = globus_l_hbmdc_find_client_entry(
                                datacollector_ptr,
                                client_callbackdata_ptr->cl_host_ipaddr,
                                client_callbackdata_ptr->cl_pid,
                                client_callbackdata_ptr->cl_procname );
    if( cl_entry == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_hbm_datacollector_set_clientevent_callback():\n"
                        "        globus_l_hbmdc_find_client_entry() failed:\n"
                        "            Client not found.\n\n" );
        }
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_FAILURE;
    }

    /* set-reset the callback and associated vars */
    cl_entry->CL_event_callback = proc_event_callback_ptr;
    cl_entry->CL_eventmask = client_callbackdata_ptr->cl_eventmask;
    cl_entry->CL_event_callback_user_data = user_data_ptr;

    if( overdue_secs == 0 )
    {
        cl_entry->CL_overdue_secs =
                    (  (  (  datacollector_ptr->DC_heartbeats_missing_overdue
                           + 1 )
                        * cl_entry->CL_rptInterval )
                     + datacollector_ptr->DC_network_variation_allowance_secs );
    }
    else if( overdue_secs < HBMDC_LIM_HEARTBEATS_MISSINGSECS_OVERDUE_MIN )
    {
        cl_entry->CL_overdue_secs =
                                HBMDC_LIM_HEARTBEATS_MISSINGSECS_OVERDUE_MIN;
    }
    else if( overdue_secs > HBMDC_LIM_HEARTBEATS_MISSINGSECS_OVERDUE_MAX )
    {
        cl_entry->CL_overdue_secs =
                                HBMDC_LIM_HEARTBEATS_MISSINGSECS_OVERDUE_MAX;
    }
    else
    {
        cl_entry->CL_overdue_secs = overdue_secs;
    }

    if( shutdown_no_rpt_secs == 0 )
    {
        cl_entry->CL_shutdown_no_rpt_secs =
                    (  (  (  datacollector_ptr->DC_heartbeats_missing_shutdown
                           + 1 )
                        * cl_entry->CL_rptInterval )
                     + datacollector_ptr->DC_network_variation_allowance_secs );
    }
    else if( shutdown_no_rpt_secs <
                                HBMDC_LIM_HEARTBEATS_MISSINGSECS_SHUTDOWN_MIN )
    {
        cl_entry->CL_shutdown_no_rpt_secs =
                                HBMDC_LIM_HEARTBEATS_MISSINGSECS_SHUTDOWN_MIN;
    }
    else if( shutdown_no_rpt_secs >
                                HBMDC_LIM_HEARTBEATS_MISSINGSECS_SHUTDOWN_MAX )
    {
        cl_entry->CL_shutdown_no_rpt_secs =
                                HBMDC_LIM_HEARTBEATS_MISSINGSECS_SHUTDOWN_MAX;
    }
    else
    {
        cl_entry->CL_shutdown_no_rpt_secs = shutdown_no_rpt_secs;
    }

    globus_mutex_unlock( &datacollector_ptr->DC_mutex );

    return GLOBUS_SUCCESS;
}


int
globus_hbm_datacollector_unreg_tcpfd(
                int                     fd,
                globus_hbm_datacollector_handle_t
                                        handle )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    /* get datacollector_ptr for this handle */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_datacollector( handle );
    if( datacollector_ptr == GLOBUS_NULL )
    {
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &datacollector_ptr->DC_mutex );
    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    if( fd < 0 )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_hbm_datacollector_unreg_tcpfd():\n"
                        "        Invalid arguments, Aborting.\n\n" );
        }
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_FAILURE;
    }

    if( datacollector_ptr->DC_req_fd != fd )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_hbm_datacollector_unreg_tcpfd():\n"
                        "        Invalid fd [%d] expecting [%d].\n\n",
                        fd,
                        datacollector_ptr->DC_req_fd );
        }
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );

        return GLOBUS_FAILURE;
    }

    datacollector_ptr->DC_req_fd = -1;
    datacollector_ptr->DC_tcp_req_callback = GLOBUS_NULL;
    globus_mutex_unlock( &datacollector_ptr->DC_mutex );

    return GLOBUS_SUCCESS;
}


/*
** if chkpt_fd == -1, use the checkpoint file provided in
** datacollector_create()
*/
int
globus_hbm_datacollector_user_checkpoint(
        globus_hbm_datacollector_handle_t
                    dc_handle,
        int         chkpt_fd )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    /* get datacollector for this handle */

    /*  Not called by internal checkpoint, need to get locks  */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_datacollector( dc_handle );
    if( datacollector_ptr == GLOBUS_NULL )
    {
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &datacollector_ptr->DC_mutex );
    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    globus_l_hbmdc_internal_checkpoint(
                                datacollector_ptr,
                                chkpt_fd );

    globus_mutex_unlock( &datacollector_ptr->DC_mutex );

    return GLOBUS_SUCCESS;
}


/*
** Utility Routines
*/

static hbmdc_cl_fields_t*
globus_l_hbmdc_copy_cl_fields(
                hbmdc_cl_fields_t*      cl_entry )
{
    hbmdc_cl_fields_t*  client_copy   = GLOBUS_NULL;

    if( cl_entry == GLOBUS_NULL )
    {
        return GLOBUS_NULL;
    }

    client_copy = globus_malloc( sizeof( hbmdc_cl_fields_t ));
    if( client_copy == GLOBUS_NULL )
    {
        return GLOBUS_NULL;
    }

    memset(             (void *) client_copy,
                        0,
                        sizeof( hbmdc_cl_fields_t ));
    client_copy->CL_hostIPNum                = GLOBUS_NULL;
    client_copy->CL_procName                 = GLOBUS_NULL;
    client_copy->CL_event_callback           = GLOBUS_NULL;
    client_copy->CL_event_callback_user_data = GLOBUS_NULL;
    client_copy->CL_Msg                      = GLOBUS_NULL;
    client_copy->prev = client_copy->next    = GLOBUS_NULL;

    client_copy->CL_host_addr.s_addr = cl_entry->CL_host_addr.s_addr;
    client_copy->CL_hostIPNum =
        globus_malloc( strlen( cl_entry->CL_hostIPNum ) + 1 );
    if( client_copy->CL_hostIPNum == GLOBUS_NULL )
    {
        globus_l_hbmdc_free_client_entry( client_copy );

        return GLOBUS_NULL;
    }
    strcpy( client_copy->CL_hostIPNum, cl_entry->CL_hostIPNum );

    client_copy->CL_procName = globus_malloc( strlen( cl_entry->CL_procName ) + 1 );
    if( client_copy->CL_procName == GLOBUS_NULL )
    {
        globus_l_hbmdc_free_client_entry( client_copy );

        return GLOBUS_NULL;
    }
    strcpy( client_copy->CL_procName,
            cl_entry->CL_procName );

    client_copy->CL_procPID = cl_entry->CL_procPID;
    client_copy->CL_procStatus = cl_entry->CL_procStatus;
    client_copy->CL_regTime = cl_entry->CL_regTime;
    client_copy->CL_blockedTime = cl_entry->CL_blockedTime;
    client_copy->CL_cpuSecs = cl_entry->CL_cpuSecs;
    client_copy->CL_unregTime = cl_entry->CL_unregTime;
    client_copy->CL_deleteTime = cl_entry->CL_deleteTime;
    client_copy->CL_rptNumber = cl_entry->CL_rptNumber;
    client_copy->CL_rptTimeNext = cl_entry->CL_rptTimeNext;
    client_copy->CL_overdue_secs = cl_entry->CL_overdue_secs;
    client_copy->CL_shutdown_no_rpt_secs = cl_entry->CL_shutdown_no_rpt_secs;
    client_copy->CL_event_callback = cl_entry->CL_event_callback;
    client_copy->CL_eventmask = cl_entry->CL_eventmask;
    client_copy->CL_event_callback_user_data =
                                cl_entry->CL_event_callback_user_data;
    client_copy->CL_MsgNum = cl_entry->CL_MsgNum;

    if( cl_entry->CL_Msg == GLOBUS_NULL )
    {
        client_copy->CL_Msg = GLOBUS_NULL;
    }
    else
    {
        client_copy->CL_Msg = globus_malloc( strlen( cl_entry->CL_Msg ) + 1 );
        if( client_copy->CL_Msg == GLOBUS_NULL )
        {
            globus_l_hbmdc_free_client_entry( client_copy );

            return GLOBUS_NULL;
        }
        strcpy( client_copy->CL_Msg, cl_entry->CL_Msg );
    }

    return client_copy;
}


static int
globus_l_hbmdc_count_clients(
                hbmdc_cl_list_head_t*   client_list_ptr )
{
    hbmdc_cl_fields_t*  cl_entry      = GLOBUS_NULL;
    int                 count         = 0;

    cl_entry = client_list_ptr->head;
    while( cl_entry != GLOBUS_NULL )
    {
        count++;
        cl_entry = cl_entry->next;
    }

    return count;
}


static int
globus_l_hbmdc_count_live_clients(
        hbmdc_cl_list_head_t*
                        client_list_ptr )
{
    hbmdc_cl_fields_t*  cl_entry      = GLOBUS_NULL;
    int count = 0;

    cl_entry = client_list_ptr->head;
    while( cl_entry != GLOBUS_NULL )
    {
        if(   ( cl_entry->CL_procStatus == GLOBUS_HBM_PROCSTATUS_ACTIVE )
           || ( cl_entry->CL_procStatus == GLOBUS_HBM_PROCSTATUS_BLOCKED )
           || ( cl_entry->CL_procStatus == GLOBUS_HBM_PROCSTATUS_OVERDUE ))
        {
            count++;
        }
        cl_entry = cl_entry->next;
    }

    return count;
}


static int
globus_l_hbmdc_count_localmonitors(
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    hbmdc_lm_fields_t*  localmonitor  = GLOBUS_NULL;
    int                 count         = 0;

    for( localmonitor = datacollector_ptr->DC_lm_list.head;
         localmonitor != GLOBUS_NULL;
         localmonitor = localmonitor->next )
    {
        count++;
    }

    return count;
}


/***************************************************************************
 *
 *                 globus_hbm_datacollector module activation functions
 *
 ***************************************************************************
 */

static int
globus_l_hbmdc_datacollector_activate(
                void )
{
    globus_reltime_t           start_time;

    if( globus_mutex_init(  &globus_l_hbm_datacollector_global_mutex,
                            GLOBUS_NULL ) ==
                                        GLOBUS_FAILURE )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        globus_mutex_init() failed: global mutex "
                        "initialization failed.\n\n" );

        return GLOBUS_FAILURE;
    }

    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    globus_l_hbm_datacollector_list.head =
            globus_l_hbm_datacollector_list.tail = GLOBUS_NULL;
    globus_l_hbm_datacollector_count = 0;
    globus_l_hbm_datacollector_next_handle = 1;

    if( globus_mutex_init(  &globus_l_hbm_datacollector_callback_mutex,
                                GLOBUS_NULL ) ==
                                        GLOBUS_FAILURE )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        globus_mutex_init() failed: callback mutex "
                        "initialization failed.\n\n" );

        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_destroy( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }
    if( globus_module_activate( GLOBUS_COMMON_MODULE ) !=
                                GLOBUS_SUCCESS )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        globus_module_activate(GLOBUS_COMMON_MODULE) "
                        "failed.\n\n" );

        globus_mutex_destroy( &globus_l_hbm_datacollector_callback_mutex );
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_destroy( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

/*
**  Register the poll routine.
**    (We only need to register the poll routine once.
**     The poll does a select on behalf of all of the
**     datacollector instances. )
**
**  Then make sure the heartbeat log file is open if this is
**    for an experiment.
*/
    if( globus_module_activate( GLOBUS_CALLBACK_MODULE ) ==
                                    GLOBUS_FAILURE )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        globus_module_activate"
                        "(GLOBUS_CALLBACK_MODULE) "
                        "failed.\n\n" );

        globus_module_deactivate( GLOBUS_COMMON_MODULE );
        globus_mutex_destroy( &globus_l_hbm_datacollector_callback_mutex );
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_destroy( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

/*
**  Register my select_call with globus_poll_add()
*/
   GlobusTimeReltimeSet(start_time, 0, 0);

   if( globus_callback_register_periodic(
                        &globus_l_hbm_callback_handle,
                        &start_time,
                        &start_time,
                        globus_l_hbmdc_poll,
                        GLOBUS_NULL,
                        GLOBUS_NULL,
                        GLOBUS_NULL ) ==
                                        GLOBUS_FAILURE )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        globus_callback_register_periodic() "
                        "failed.\n\n" );

        globus_module_deactivate( GLOBUS_CALLBACK_MODULE );
        globus_module_deactivate( GLOBUS_COMMON_MODULE );
        globus_mutex_destroy( &globus_l_hbm_datacollector_callback_mutex );
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_destroy( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }

#ifdef HBM_EXPERIMENT
/*
**      Initialize heartbeat file counter, open heartbeat file.
*/
    if( gettimeofday( &heartbeat_file_cutoff_time, GLOBUS_NULL ))
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        gettimeofday() failed:  errno [%d]: %s.\n\n",
                        errno,
                        strerror( errno ));

        globus_callback_unregister( globus_l_hbm_callback_handle );
        globus_module_deactivate( GLOBUS_CALLBACK_MODULE );
        globus_module_deactivate( GLOBUS_COMMON_MODULE );
        globus_mutex_destroy( &globus_l_hbm_datacollector_callback_mutex );
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_destroy( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }
    heartbeat_file_cutoff_time.tv_sec += GLOBUS_HBM_TIME_SECS_PER_DAY;
    heartbeat_file_cutoff_time.tv_usec = 0;
    heartbeat_file_counter++;
    globus_libc_sprintf(
                        heartbeat_file_name_str,
                        "../var/globus-hbm-datacollector.%s.heartbeat%3.3u",
                        datacollector_ptr->DC_hostName,
                        heartbeat_file_counter );

    if(( heartbeat_file_fd1 = fopen(
                    heartbeat_file_name_str,
                    "w" )) == GLOBUS_NULL )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_l_hbmdc_datacollector_activate():\n"
                        "        fopen() failed:  errno [%d]: %s.\n"
                        "        Error opening heartbeat log file: %s.\n\n",
                        errno,
                        strerror( errno ),
                        heartbeat_file_name_str );

        globus_callback_unregister( globus_l_hbm_callback_handle );
        globus_module_deactivate( GLOBUS_CALLBACK_MODULE );
        globus_module_deactivate( GLOBUS_COMMON_MODULE );
        globus_mutex_destroy( &globus_l_hbm_datacollector_callback_mutex );
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
        globus_mutex_destroy( &globus_l_hbm_datacollector_global_mutex );

        return GLOBUS_FAILURE;
    }
#endif
    FD_ZERO( &read_fds );

    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    return GLOBUS_SUCCESS;
}


static int
globus_l_hbmdc_datacollector_deactivate(
                void )
{
    int  rc = GLOBUS_SUCCESS;

    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

#ifdef HBM_EXPERIMENT
    if( globus_l_hbm_datacollector_list.head == GLOBUS_NULL )
    {
        fclose( heartbeat_file_fd1 );
    }
#endif

    if( globus_callback_unregister(globus_l_hbm_callback_handle ) !=
                                        GLOBUS_SUCCESS )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_datacollector_deactivate():\n"
                        "        globus_callback_unregister() failed.\n\n" );

        rc = GLOBUS_FAILURE;
    }

    if( globus_module_deactivate( GLOBUS_CALLBACK_MODULE ) !=
                                        GLOBUS_SUCCESS )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_datacollector_deactivate():\n"
                        "        globus_module_deactivate"
                        "(GLOBUS_CALLBACK_MODULE) failed.\n\n" );

        rc = GLOBUS_FAILURE;
    }

    if( globus_module_deactivate( GLOBUS_COMMON_MODULE ) != GLOBUS_SUCCESS )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_datacollector_deactivate():\n"
                        "        globus_module_deactivate"
                        "(GLOBUS_COMMON_MODULE) failed.\n\n" );

        rc = GLOBUS_FAILURE;
    }

    if( globus_mutex_destroy(
                        &globus_l_hbm_datacollector_callback_mutex ) !=
                                    GLOBUS_SUCCESS )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_datacollector_deactivate():\n"
                        "        globus_mutex_destroy() failed: "
                        "globus_l_hbm_datacollector_callback_mutex "
                        "destruction failed.\n\n" );

        rc = GLOBUS_FAILURE;
    }

    globus_mutex_unlock(
                        &globus_l_hbm_datacollector_global_mutex );
    if( globus_mutex_destroy(
                        &globus_l_hbm_datacollector_global_mutex ) !=
                                    GLOBUS_SUCCESS )
    {
        globus_libc_fprintf(
                        stderr,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_datacollector_deactivate():\n"
                        "        globus_mutex_destroy() failed: "
                        "globus_l_hbm_datacollector_global_mutex "
                        "destruction failed.\n\n" );

        rc = GLOBUS_FAILURE;
    }

    return rc;
}


static void
globus_l_hbmdc_evaluate_datacollector(
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    hbmdc_lm_fields_t*  lm_entry      = GLOBUS_NULL;
    hbmdc_lm_fields_t*  lm_entry_wk   = GLOBUS_NULL;
    hbmdc_cl_fields_t*  cl_entry      = GLOBUS_NULL;
    hbmdc_cl_fields_t*  cl_entry_wk   = GLOBUS_NULL;
    hbmdc_dc_callback_entry_t*
                        callback_entry_ptr;
    struct timeval      current_time;
    unsigned int        events        = 0;

    /*
    ** walk through datastructures looking at each client entry,
    ** trigger event callbacks when necessary
    */

    if( gettimeofday( &current_time, GLOBUS_NULL ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_evaluate_datacollector():\n"
                        "        gettimeofday() failed:  errno [%d].\n"
                        "        %s\n\n",
                        errno,
                        strerror( errno ));
        }
        return;
    }

    lm_entry = datacollector_ptr->DC_lm_list.head;
    while( lm_entry != GLOBUS_NULL )
    {
        cl_entry = lm_entry->LM_client_list.head;
        while( cl_entry != GLOBUS_NULL )
        {
            events = 0;

            if(   (   ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NORMAL )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_ABNORMAL )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_DIED )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NO_RPT ))
               && ( current_time.tv_sec > cl_entry->CL_deleteTime ))
            {
                cl_entry_wk = cl_entry->next;
                ListRemove( &( lm_entry->LM_client_list ),
                            cl_entry );
                globus_l_hbmdc_free_client_entry( cl_entry );
                cl_entry = cl_entry_wk;
                lm_entry->LM_ClientsCnt--;
            }
            else
            {
                if(   ( cl_entry->CL_procStatus ==
                                        GLOBUS_HBM_PROCSTATUS_ACTIVE )
                   || ( cl_entry->CL_procStatus ==
                                        GLOBUS_HBM_PROCSTATUS_BLOCKED )
                   || ( cl_entry->CL_procStatus ==
                                        GLOBUS_HBM_PROCSTATUS_OVERDUE ))
                {
/*
**                  Client is live -- is the heartbeat late?
*/
                    if(   ( cl_entry->CL_procStatus !=
                                        GLOBUS_HBM_PROCSTATUS_OVERDUE )
                       && ( cl_entry->CL_rptTimeNext < current_time.tv_sec )
                       && (( current_time.tv_sec - cl_entry->CL_rptTimeNext ) >
                                        cl_entry->CL_overdue_secs ))
                    {
                        cl_entry->CL_procStatus =
                            GLOBUS_HBM_PROCSTATUS_OVERDUE;
                        events |=
                            GLOBUS_HBM_DATACOLLECTOR_EVENT_HEARTBEAT_OVERDUE;
                        datacollector_ptr->DC_ckptNeeded = 1;
                    }
/*
**                  Is the heartbeat so late that the client should be
**                      considered dead?
*/
                    if(   ( current_time.tv_sec > cl_entry->CL_rptTimeNext )
                       && (( current_time.tv_sec - cl_entry->CL_rptTimeNext ) >
                                        cl_entry->CL_shutdown_no_rpt_secs ))
                    {
                        cl_entry->CL_procStatus =
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NO_RPT;
                        cl_entry->CL_deleteTime =
                                current_time.tv_sec
                              + HBMDC_CL_SHUTDOWN_RETENTION_SECS;

                        lm_entry->LM_ClientsLiveCnt--;
                        datacollector_ptr->DC_ClientsLiveCnt--;

                        events |=
                            GLOBUS_HBM_DATACOLLECTOR_EVENT_SHUTDOWN_ABNORMAL;
                        datacollector_ptr->DC_ckptNeeded = 1;
                    }
                }
                
                if( events )
                {
                    if( ( callback_entry_ptr =
                            globus_l_hbmdc_event_callback(
                                datacollector_ptr,
                                cl_entry,
                                events )) != GLOBUS_NULL )
                    {
                        globus_mutex_lock(
                                &globus_l_hbm_datacollector_callback_mutex );
                        ListAppend(
                                &globus_l_hbm_datacollector_callback_list,
                                callback_entry_ptr );
                        globus_mutex_unlock(
                                &globus_l_hbm_datacollector_callback_mutex );
                    }
                }

                cl_entry = cl_entry->next;
            }
        }

/*
**      If the LM has no more clients, then we will delete it.
*/
        if( lm_entry->LM_ClientsCnt <= 0 )
        {
            lm_entry_wk = lm_entry->next;
            ListRemove( &( datacollector_ptr->DC_lm_list ),
                        lm_entry );
            globus_l_hbmdc_free_localmonitor_entry( lm_entry );
            datacollector_ptr->DC_LocalMonitorsCnt--;
            lm_entry = lm_entry_wk;
        }
        else
        {
/*
**          Update the lm summary status based on the clients status.
*/
            if( lm_entry->LM_ClientsLiveCnt > 0 )
            {
                if( lm_entry->LM_ClientsLiveCnt == lm_entry->LM_ClientsCnt )
                {
                    lm_entry->LM_SummaryStatus =
                                GLOBUS_HBM_LM_STATUS_ALIVE;
                }
                else
                {
                    lm_entry->LM_SummaryStatus =
                                GLOBUS_HBM_LM_STATUS_DEGRADED;
                }
            }
            else
            {
                lm_entry->LM_SummaryStatus = GLOBUS_HBM_LM_STATUS_DEAD;
            }
            lm_entry = lm_entry->next;
        }
    }

    return;
}


static hbmdc_dc_callback_entry_t*
globus_l_hbmdc_event_callback(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                hbmdc_cl_fields_t*      cl_entry,
                unsigned int            event )
{
    hbmdc_dc_callback_entry_t*
                        callback_entry_ptr = GLOBUS_NULL;
    globus_hbm_client_callbackdata_t*
                        callbackdata  = GLOBUS_NULL;
    hbmdc_cl_fields_t*  client_copy   = GLOBUS_NULL;

/*
**  If no registration callback then
**  no other event callbacks could have been set.
**  Return.
*/
    if( datacollector_ptr->DC_reg_callback == GLOBUS_NULL )
    {
        return GLOBUS_NULL;
    }

/*
**  If no event_callback, return.
*/
    if(   (!( event & GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION ))
       && ( cl_entry->CL_event_callback == GLOBUS_NULL ))
    {
        return GLOBUS_NULL;
    }

/*
**  If event is not set, don't generate callback.
**  (Registration events are automatically called,
**   event mask doesn't need to be set.)
*/
    if(   (!( event & GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION ))
       && (!( event & cl_entry->CL_eventmask )))
    {
        return GLOBUS_NULL;
    }

/*
**  callbackdata that the library generates will be passed back
**  as an argument to the event_callback.
*/
    callbackdata =
        globus_malloc( sizeof( globus_hbm_client_callbackdata_t ));
    if( callbackdata == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_event_callback():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        return GLOBUS_NULL;
    }

    memset(             (void *) callbackdata,
                        0,
                        sizeof( globus_hbm_client_callbackdata_t ));
    callbackdata->cl_procname = GLOBUS_NULL;

    callbackdata->cl_host_ipaddr.s_addr = cl_entry->CL_host_addr.s_addr;
    callbackdata->cl_pid = cl_entry->CL_procPID;
    callbackdata->cl_procname =
        globus_malloc( strlen( cl_entry->CL_procName ) + 1 );
    if( callbackdata->cl_procname == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_event_callback():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_callbackdata( callbackdata );

        return GLOBUS_NULL;
    }
    strcpy( callbackdata->cl_procname, cl_entry->CL_procName );
    callbackdata->cl_eventmask = event;

/*
**  Copy cl_entry.
*/
    client_copy = globus_l_hbmdc_copy_cl_fields( cl_entry );
    if( client_copy == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_event_callback():\n"
                        "        globus_l_hbmdc_copy_cl_fields() failed.\n\n" );
        }
        globus_l_hbmdc_free_callbackdata( callbackdata );

        return GLOBUS_NULL;
    }

/*
**  Allocate, complete callback_entry.
*/
    if( ( callback_entry_ptr =
                globus_malloc( sizeof( hbmdc_dc_callback_entry_t ))) ==
                                    GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_event_callback():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_callbackdata( callbackdata );

        return GLOBUS_NULL;
    }

    memset(             (void *) callback_entry_ptr,
                        0,
                        sizeof( hbmdc_dc_callback_entry_t ));
    callback_entry_ptr->next =
            callback_entry_ptr->prev = GLOBUS_NULL;
    callback_entry_ptr->callback_function_ptr = GLOBUS_NULL;
    callback_entry_ptr->callbackdata_ptr = GLOBUS_NULL;
    callback_entry_ptr->client_copy_ptr = GLOBUS_NULL;
    callback_entry_ptr->callback_userdata_ptr = GLOBUS_NULL;

    if( event & GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION )
    {
#ifdef DEBUGGING_HBMDC
        /* log callback */
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    globus_l_hbmdc_event_callback():\n"
                        "        Calling Registration Callback for "
                        "%s : %s : %d\n\n",
                        cl_entry->CL_hostIPNum,
                        cl_entry->CL_procName,
                        cl_entry->CL_procPID );
        }
#endif
        callback_entry_ptr->handle =
                        datacollector_ptr->DC_handle;
        callback_entry_ptr->callback_function_ptr =
                        datacollector_ptr->DC_reg_callback;
        callback_entry_ptr->callbackdata_ptr =
                        callbackdata;
        callback_entry_ptr->client_copy_ptr =
                        client_copy;
        callback_entry_ptr->callback_userdata_ptr =
                        datacollector_ptr->DC_reg_callback_user_data;
    }
    else
    {
#ifdef DEBUGGING_HBMDC
        /* log callback */
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            /* TODO: write a printevent routine which provides a more
               useful output of which event is occuring here */
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    globus_l_hbmdc_event_callback():\n"
                        "        Calling Event [%d] Callback for "
                        "%s : %s : %d\n\n",
                        event,
                        cl_entry->CL_hostIPNum,
                        cl_entry->CL_procName,
                        cl_entry->CL_procPID );
        }
#endif
        callback_entry_ptr->handle =
                        datacollector_ptr->DC_handle;
        callback_entry_ptr->callback_function_ptr =
                        cl_entry->CL_event_callback;
        callback_entry_ptr->callbackdata_ptr =
                        callbackdata;
        callback_entry_ptr->client_copy_ptr =
                        client_copy;
        callback_entry_ptr->callback_userdata_ptr =
                        client_copy->CL_event_callback_user_data;
    }

    return callback_entry_ptr;
}


static hbmdc_cl_fields_t*
globus_l_hbmdc_extract_client(
                char*                   buff_ptr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                hbmdc_lm_fields_t*
                                        localmonitor )
{
    char*               token         = GLOBUS_NULL;
    hbmdc_cl_fields_t*  client        = GLOBUS_NULL;
    globus_bool_t       result;

    if( localmonitor == GLOBUS_NULL )
    {
        return GLOBUS_NULL;
    }

    client = globus_malloc( sizeof( hbmdc_cl_fields_t ));
    if( client == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        return GLOBUS_NULL;
    }
    memset(             (void *) client,
                        0,
                        sizeof( hbmdc_cl_fields_t ));
    client->CL_hostIPNum                = GLOBUS_NULL;
    client->CL_procName                 = GLOBUS_NULL;
    client->CL_event_callback_user_data = GLOBUS_NULL;
    client->CL_Msg                      = GLOBUS_NULL;
    client->prev = client->next         = GLOBUS_NULL;

/*
**  CL_hostIPNum
*/
    token = strtok( buff_ptr, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No host IP Number.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_hostIPNum = globus_malloc( strlen( token ) + 1 );
    if( client->CL_hostIPNum == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    strcpy( client->CL_hostIPNum, token );

    client->CL_host_addr.s_addr = inet_addr( client->CL_hostIPNum );

    result = globus_l_hbmdc_validate_client_IPNum(
                        client->CL_hostIPNum,
                        localmonitor );
    if( result == GLOBUS_FALSE )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        globus_l_hbmdc_validate_client_IPNum() "
                        "failed.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }

/*
**  CL_procName
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client process name.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_procName = globus_malloc( strlen( token ) + 1 );
    if( client->CL_procName == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    strcpy( client->CL_procName, token );

/*
**  CL_procPID
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client PID.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_procPID = (unsigned int)atoi( token );

/*
**  CL_procStatus
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [08] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client process status.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_procStatus = (unsigned int)atoi( token );

/*
**  CL_regTime
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client registration time.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_regTime =
        globus_i_hbm_convert_string_to_UTCtime( token, time_conv_diff );

/*
**  CL_blockedTime
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [10] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client blocked time.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_blockedTime =
        globus_i_hbm_convert_string_to_UTCtime( token, time_conv_diff );

/*
**  CL_cpuSecs
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [11] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client cpu time (seconds).\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_cpuSecs = (unsigned int)atoi( token );

/*
**  CL_unregTime
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [12] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client unregistration time.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_unregTime =
        globus_i_hbm_convert_string_to_UTCtime( token, time_conv_diff );

/*
**  CL_deleteTime
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [13] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client delete time.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_deleteTime =
        globus_i_hbm_convert_string_to_UTCtime( token, time_conv_diff );

/*
**  CL_rptNumber
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [14] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client report time.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_rptNumber = (unsigned int)atoi( token );

/*
**  CL_rptInterval
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [15] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client report interval.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_rptInterval = (unsigned int)atoi( token );

/*
**  CL_overdue_secs
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [16] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client overdue seconds.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_overdue_secs = (unsigned int)atoi( token );

/*
**  CL_shutdown_no_rpt_secs
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [17] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client report seconds.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_shutdown_no_rpt_secs = (unsigned int)atoi( token );

/*
**  CL_rptTimeLast
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [18] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client report time last.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_rptTimeLast =
        globus_i_hbm_convert_string_to_UTCtime( token, time_conv_diff );

/*
**  CL_rptTimeNext
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [19] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client report time next.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_rptTimeNext =
        globus_i_hbm_convert_string_to_UTCtime( token, time_conv_diff );

/*
**  CL_MsgNum
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [20] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client message number.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    client->CL_MsgNum = (unsigned int)atoi( token );

/*
**  CL_Msg
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [21] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        Bad checkpoint file client record on "
                        "restore:\n"
                        "            No client message.\n\n" );
        }
        globus_l_hbmdc_free_client_entry( client );

        return GLOBUS_NULL;
    }
    if( !strcasecmp( token, "<None>" ))
    {
        client->CL_Msg = GLOBUS_NULL;
    }
    else
    {
        client->CL_Msg = globus_malloc( strlen( token ) + 1 );
        if( client->CL_Msg == GLOBUS_NULL )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [22] in "
                        "globus_l_hbmdc_extract_client():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
            }
            globus_l_hbmdc_free_client_entry( client );

            return GLOBUS_NULL;
        }
        strcpy( client->CL_Msg, token );
    }

    return client;
}


static int
globus_l_hbmdc_extract_datacollector(
                char*                   buff_ptr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    char*       token                 = GLOBUS_NULL;

/*
**  For the most part we are just validating format,
**  but we are using some of the values.
**
**  The values from the checkpoint file that we will use are:
**      DC_hostIPNum:           For validation (should be this host).
**      DC_hostName:            For validation (chould be this host).
**      DC_LocalMonitorsCnt:    For verification against the number
**                              found in the file.
*/

/*
**  DC_hostIPNum
*/
    token = strtok( buff_ptr, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector host IP number.\n\n" );
        }
        return GLOBUS_FAILURE;
    }
    if( strcmp(         token,
                        datacollector_ptr->DC_hostIPNum ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            Data collector host IP number mismatch.\n"
                        "            This host:  %s.\n"
                        "            From file:  %s.\n\n",
                        datacollector_ptr->DC_hostIPNum,
                        token );
        }
        return GLOBUS_FAILURE;
    }

/*
**  DC_hostName
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector host name.\n\n" );
        }
        return GLOBUS_FAILURE;
    }
    if( strcmp(         token,
                        datacollector_ptr->DC_hostName ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            Data collector host Name mismatch.\n"
                        "            This host:  %s.\n"
                        "            From file:  %s.\n\n",
                        datacollector_ptr->DC_hostName,
                        token );
        }
        return GLOBUS_FAILURE;
    }

/*
**  DC_portNumHBMsg
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector port number for "
                        "receiving heartbeats.\n\n" );
        }
        return GLOBUS_FAILURE;
    }

/*
**  DC_EvalInterval
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector default client "
                        "evaluation interval.\n\n" );
        }
        return GLOBUS_FAILURE;
    }

/*
**  DC_LocalMonitorsCnt
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector count of "
                        "local monitors.\n\n" );
        }
        return GLOBUS_FAILURE;
    }
    datacollector_ptr->DC_LocalMonitorsCnt = atoi( token );

/*
**  DC_ckptFileName
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [08] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector checkpoint file "
                        "name.\n\n" );
        }
        return GLOBUS_FAILURE;
    }

/*
**  DC_ckpt_and_eval_time
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_l_hbmdc_extract_datacollector():\n"
                        "        Bad checkpoint file data collector record on "
                        "restore:\n"
                        "            No data collector checkpoint and "
                        "evaluation time.\n\n" );
        }
        return GLOBUS_FAILURE;
    }

    return GLOBUS_SUCCESS;
}


static globus_l_hbm_heartbeat_t*
globus_l_hbmdc_extract_hb_data(
                char*                   read_buff,
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    char*               buff_ptr      = GLOBUS_NULL;
    globus_l_hbm_heartbeat_t*
                        hb_data       = GLOBUS_NULL;
    int                 cl_msg_len;

    hb_data = globus_malloc( sizeof( globus_l_hbm_heartbeat_t ));
    if( hb_data == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_extract_hb_data():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        return GLOBUS_NULL;
    }
    memset(             (void *) hb_data,
                        0,
                        sizeof( globus_l_hbm_heartbeat_t ));
    hb_data->procname = GLOBUS_NULL;
    hb_data->msg      = GLOBUS_NULL;

    buff_ptr = read_buff;

/*
**  Version
*/
    UnpackUInt32( buff_ptr, hb_data->version );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptLMmsgLength
*/
    UnpackUInt32( buff_ptr, hb_data->msg_len );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptLMhostIPNum
*/
    memcpy(             (void *) &( hb_data->lm_addr ),
                        (void *) buff_ptr,
                        NUM_PACKED_BYTES );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptLMportNum
*/
    UnpackUInt32( buff_ptr, hb_data->lm_port );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCprocessPID
*/
    UnpackUInt32( buff_ptr, hb_data->pid );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCprocessName
*/
    hb_data->procname = (char * ) globus_malloc( strlen( buff_ptr ) + 1 );
    if( hb_data->procname == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_extract_hb_data():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_hb_data( hb_data );

        return GLOBUS_NULL;
    }
    strcpy( hb_data->procname, buff_ptr );
    buff_ptr += strlen( buff_ptr ) + 1;

/*
**  RptCstatus
*/
    UnpackUInt32( buff_ptr, hb_data->status );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCregistrationTime
*/
    UnpackUInt32( buff_ptr, hb_data->regtime );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCrptInterval
*/
    UnpackUInt32( buff_ptr, hb_data->rptinterval );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCrptNum
*/
    UnpackUInt32( buff_ptr, hb_data->rptnum );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCblockedTime
*/
    UnpackUInt32( buff_ptr, hb_data->blockedtime );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCcpuTime
*/
    UnpackUInt32( buff_ptr, hb_data->cputime );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCunregisterTime
*/
    UnpackUInt32( buff_ptr, hb_data->unregtime );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCnumUnregisterMsg
*/
    UnpackUInt32( buff_ptr, hb_data->unregnum );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCDCmsgNum
*/
    UnpackUInt32( buff_ptr, hb_data->msgnum );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  RptCDCmsg
*/
    cl_msg_len = strlen( buff_ptr );
    if( cl_msg_len == 0 )
    {
        hb_data->msg = GLOBUS_NULL;
    }
    else
    {
        hb_data->msg = (char *) globus_malloc( cl_msg_len + 1 );
        if( hb_data->msg == GLOBUS_NULL )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_extract_hb_data():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
            }
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_NULL;
        }
        strcpy( hb_data->msg, buff_ptr );
    }

    return hb_data;
}


static hbmdc_lm_fields_t *
globus_l_hbmdc_extract_localmonitor(
                char*                   buff_ptr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    char*               token         = GLOBUS_NULL;
    hbmdc_lm_fields_t*  localmonitor  = GLOBUS_NULL;

    localmonitor = globus_malloc( sizeof(hbmdc_lm_fields_t ));
    if( localmonitor == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        return GLOBUS_NULL;
    }
    memset(             (void *) localmonitor,
                        0,
                        sizeof( hbmdc_lm_fields_t ));
    localmonitor->LM_hostIPNum              = GLOBUS_NULL;
    localmonitor->LM_hostName               = GLOBUS_NULL;
    localmonitor->LM_client_list.head       = GLOBUS_NULL;
    localmonitor->LM_client_list.tail       = GLOBUS_NULL;
    localmonitor->prev = localmonitor->next = GLOBUS_NULL;

/*
**  LM_hostIPNum
*/
    token = strtok( buff_ptr, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        Bad checkpoint file local monitor record on "
                        "restore:\n"
                        "            No local monitor host IP number.\n\n" );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    localmonitor->LM_hostIPNum = globus_malloc( strlen( token ) + 1 );
    if( localmonitor->LM_hostIPNum == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    strcpy( localmonitor->LM_hostIPNum, token );

/*
**  LM_hostName
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        Bad checkpoint file local monitor record on "
                        "restore:\n"
                        "            No local monitor host name.\n\n " );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    localmonitor->LM_hostName = globus_malloc( strlen( token ) + 1 );
    if( localmonitor->LM_hostName == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        globus_malloc() failed:  out of memory.\n\n" );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    strcpy( localmonitor->LM_hostName, token );

/*
**  LM_hostPort
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        Bad checkpoint file local monitor record on "
                        "restore:\n"
                        "            No local monitor host port.\n\n " );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    localmonitor->LM_hostPort = (unsigned int)atoi( token );

/*
**  LM_ClientsCnt
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        Bad checkpoint file local monitor record on "
                        "restore:\n"
                        "            No local monitor client count.\n\n " );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    localmonitor->LM_ClientsCnt = (unsigned int )atoi( token );

/*
**  LM_ClientsLiveCnt
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [08] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        Bad checkpoint file local monitor record on "
                        "restore:\n"
                        "            No local monitor "
                        "live client count.\n\n " );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    localmonitor->LM_ClientsLiveCnt = (unsigned int )atoi( token );

/*
**  LM_SummaryStatus
*/
    token = strtok( GLOBUS_NULL, ";" );
    if( token == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_l_hbmdc_extract_localmonitor():\n"
                        "        Bad checkpoint file local monitor record on "
                        "restore:\n"
                        "            No local monitor summary status.\n\n " );
        }
        globus_l_hbmdc_free_localmonitor_entry( localmonitor );

        return GLOBUS_NULL;
    }
    localmonitor->LM_SummaryStatus = (unsigned int )atoi( token );

    return localmonitor;
}


static hbmdc_cl_fields_t*
globus_l_hbmdc_find_client_entry(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                struct in_addr          host_ipaddr,
                u_int                   pid,
                char*                   procname )
{
    hbmdc_cl_fields_t*  cl_entry      = GLOBUS_NULL;
    hbmdc_lm_fields_t*  lm_entry      = GLOBUS_NULL;

    for( lm_entry = datacollector_ptr->DC_lm_list.head;
         lm_entry != GLOBUS_NULL;
         lm_entry = lm_entry->next )
    {
        for( cl_entry = lm_entry->LM_client_list.head;
             cl_entry != GLOBUS_NULL;
             cl_entry = cl_entry->next )
        {
            if(   ( host_ipaddr.s_addr == cl_entry->CL_host_addr.s_addr )
               && ( pid == cl_entry->CL_procPID )
               && ( strcmp( procname, cl_entry->CL_procName ) == 0 ))
            {
                return cl_entry;
            }
        }
    }

    return GLOBUS_NULL;
}


static globus_hbm_datacollector_t*
globus_l_hbmdc_find_datacollector(
                globus_hbm_datacollector_handle_t
                                        handle )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    datacollector_ptr = globus_l_hbm_datacollector_list.head;
    while( datacollector_ptr != GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_handle == handle )
        {
            return datacollector_ptr;
        }
        datacollector_ptr = datacollector_ptr->next;
    }
    return GLOBUS_NULL;
}


static globus_hbm_datacollector_t*
globus_l_hbmdc_find_datacollector_fd(
                int                     fd )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    for( datacollector_ptr = globus_l_hbm_datacollector_list.head;
         datacollector_ptr != GLOBUS_NULL;
         datacollector_ptr = datacollector_ptr->next )
    {
        if(   ( fd == datacollector_ptr->DC_hb_fd )
           || ( fd == datacollector_ptr->DC_req_fd ))
        {
            break;
        }
    }

    return datacollector_ptr;
}


static hbmdc_lm_fields_t*
globus_l_hbmdc_find_localmonitor_entry(
        globus_hbm_datacollector_t*
                        datacollector_ptr,
        char*           ipnum,
        unsigned int    port )
{
    hbmdc_lm_fields_t*  lm_entry      = GLOBUS_NULL;

    for( lm_entry = datacollector_ptr->DC_lm_list.head;
         lm_entry != GLOBUS_NULL;
         lm_entry = lm_entry->next )
    {
      if( ( strcmp( lm_entry->LM_hostIPNum, ipnum ) == 0 ) &&
          ( lm_entry->LM_hostPort == port ))
        {
          break;
        }
    }

    return lm_entry;
}


static globus_hbm_datacollector_t*
globus_l_hbmdc_find_set_fd(
                globus_bool_t*          is_tcp_fd )
{
    int                 fd;
    globus_hbm_datacollector_t*
                        datacollector_ptr = GLOBUS_NULL;

    /* find which fd is set, then find the datacollector for that fd */
    for( fd = 0; fd <= max_fds; fd++ )
    {
        if( fd > max_fds )
        {
            return GLOBUS_NULL;
        }

#ifdef  TARGET_ARCH_HPUX
        if( read_fds & ( 1 << fd ))
#else
        if( FD_ISSET( fd, &read_fds ))
#endif  /*  TARGET_ARCH_HPUX defined  */
        {
            datacollector_ptr = globus_l_hbmdc_find_datacollector_fd( fd );
            if( datacollector_ptr == GLOBUS_NULL )
            {
                return GLOBUS_NULL;
            }
            if( fd == datacollector_ptr->DC_req_fd )
            {
                *is_tcp_fd = GLOBUS_TRUE;
            }
            else
            {
                *is_tcp_fd = GLOBUS_FALSE;
            }
            return datacollector_ptr;
        }
    }

    return GLOBUS_NULL;
}


static void
globus_l_hbmdc_free_callback_entry(
                hbmdc_dc_callback_entry_t*
                                        callback_entry_ptr )
{
    if( callback_entry_ptr != GLOBUS_NULL )
    {
        if( callback_entry_ptr->callbackdata_ptr != GLOBUS_NULL )
            globus_l_hbmdc_free_callbackdata(
                        callback_entry_ptr->callbackdata_ptr );

        if( callback_entry_ptr->client_copy_ptr != GLOBUS_NULL )
            globus_l_hbmdc_free_client_entry(
                        callback_entry_ptr->client_copy_ptr );

        globus_free( callback_entry_ptr );
    }

    return;
}


static void
globus_l_hbmdc_free_callbackdata(
                globus_hbm_client_callbackdata_t*
                                        callbackdata_ptr )
{
    if( callbackdata_ptr != GLOBUS_NULL )
    {
        if( callbackdata_ptr->cl_procname != GLOBUS_NULL )
            globus_free( callbackdata_ptr->cl_procname );

        globus_free( callbackdata_ptr );
    }

    return;
}


static void
globus_l_hbmdc_free_client_entry(
                hbmdc_cl_fields_t*      cl_entry )
{
    if( cl_entry != GLOBUS_NULL )
    {
        if( cl_entry->CL_hostIPNum != GLOBUS_NULL )
            globus_free( cl_entry->CL_hostIPNum );

        if( cl_entry->CL_procName != GLOBUS_NULL )
            globus_free( cl_entry->CL_procName );

        if( cl_entry->CL_Msg != GLOBUS_NULL )
            globus_free( cl_entry->CL_Msg );

        globus_free( cl_entry );
    }

    return;
}


static void
globus_l_hbmdc_free_datacollector(
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    hbmdc_lm_fields_t*  lm_entry      = GLOBUS_NULL;
    hbmdc_lm_fields_t*  tmp_lm        = GLOBUS_NULL;

    if( datacollector_ptr != GLOBUS_NULL )
    {
        lm_entry = datacollector_ptr->DC_lm_list.head;
        while( lm_entry != GLOBUS_NULL )
        {
            tmp_lm = lm_entry->next;
            ListRemove( &( datacollector_ptr->DC_lm_list ),
                        lm_entry );
            globus_l_hbmdc_free_localmonitor_entry( lm_entry );
            lm_entry = tmp_lm;
        }

        if( datacollector_ptr->DC_mutex_initialized )
            globus_mutex_destroy(
                        &( datacollector_ptr->DC_mutex ));

        if( datacollector_ptr->DC_hostIPNum != GLOBUS_NULL )
            globus_free( datacollector_ptr->DC_hostIPNum );

        if( datacollector_ptr->DC_hb_fd != -1 )
            close( datacollector_ptr->DC_hb_fd );

/*
**      No way to tell for sure if the program closed the request fd,
**      so don't do anything with it.
**      ( datacollector_ptr->DC_req_fd field is going away anyway. )
*/

        if( datacollector_ptr->DC_ckptFileName != GLOBUS_NULL )
            globus_free( datacollector_ptr->DC_ckptFileName );

        if( datacollector_ptr->DC_ckptFileNameWk != GLOBUS_NULL )
            globus_free( datacollector_ptr->DC_ckptFileNameWk );

        globus_free( datacollector_ptr );
    }

    return;
}


static void
globus_l_hbmdc_free_hb_data(
                globus_l_hbm_heartbeat_t*
                                        hb_data )
{
    if( hb_data != GLOBUS_NULL )
    {
        if( hb_data->procname != GLOBUS_NULL )
            globus_free( hb_data->procname );

        if( hb_data->msg != GLOBUS_NULL )
            globus_free( hb_data->msg );

        globus_free( hb_data );
    }

    return;
}


static void
globus_l_hbmdc_free_localmonitor_entry(
                hbmdc_lm_fields_t*      lm_entry_ptr )
{
    hbmdc_cl_fields_t*  cl_entry_ptr;
    hbmdc_cl_fields_t*  cl_entry_tmp_ptr;

    if( lm_entry_ptr != GLOBUS_NULL )
    {
        cl_entry_ptr = lm_entry_ptr->LM_client_list.head;
        while( cl_entry_ptr != GLOBUS_NULL )
        {
            cl_entry_tmp_ptr = cl_entry_ptr->next;
            ListRemove( &( lm_entry_ptr->LM_client_list ),
                        cl_entry_ptr );

            globus_l_hbmdc_free_client_entry( cl_entry_ptr );

            cl_entry_ptr = cl_entry_tmp_ptr;
        }

        if( lm_entry_ptr->LM_hostIPNum  != GLOBUS_NULL )
            globus_free( lm_entry_ptr->LM_hostIPNum );

        if( lm_entry_ptr->LM_hostName != GLOBUS_NULL )
            globus_free( lm_entry_ptr->LM_hostName );

        globus_free( lm_entry_ptr );
    }

    return;
}


static int
globus_l_hbmdc_internal_checkpoint(
                globus_hbm_datacollector_t*
                                        datacollector_ptr,
                int                     chkpt_fd )
{
    char                date_time_str[GLOBUS_HBM_DATE_TIME_LEN];
    char                datacollector_buffer[DATACOLLECTOR_BUFFER_SIZE];
    char                localmonitor_buffer[LOCALMONITOR_BUFFER_SIZE];
    char                client_buffer[CLIENT_BUFFER_SIZE];
    int                 n_bytes;
    int                 n_bytes_written;
    globus_bool_t       created_by_me = GLOBUS_FALSE;
    hbmdc_cl_fields_t*  cl_fields     = GLOBUS_NULL;
    hbmdc_lm_fields_t*  lm_fields     = GLOBUS_NULL;
    struct timeval      time;

    if( chkpt_fd == HBMDC_INTERNAL_CHKPT_FD )
    {
        datacollector_ptr->DC_ckptNeeded = 0;
        if( datacollector_ptr->DC_ckptFileName == GLOBUS_NULL )
        {
            return GLOBUS_SUCCESS;
        }
    }

    /*
    ** if internal checkpoint or user passes in -1 for chkpt_fd,
    ** open and write to the checkpoint file
    ** that's specified in datacollector_ptr->DC_ckptFileName
    */
    if( chkpt_fd == HBMDC_INTERNAL_CHKPT_FD )
    {
        created_by_me = GLOBUS_TRUE;
        chkpt_fd = creat( datacollector_ptr->DC_ckptFileNameWk, 0644 );
        if( chkpt_fd == -1 )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        creat() failed: errno [%d]: %s.\n"
                        "        Cannot open checkpoint work file:  %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        datacollector_ptr->DC_ckptFileNameWk );
            }
            return GLOBUS_FAILURE;
        }
    }

    if( gettimeofday( &time, GLOBUS_NULL ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        gettimeofday() failed: errno [%d]: %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ));
        }
        if( created_by_me == GLOBUS_TRUE )
        {
            close( chkpt_fd );
        }

        return GLOBUS_FAILURE;
    }

/*
**  Output literal and current date and time
*/
    globus_i_hbm_convert_UTC_to_str( time.tv_sec, date_time_str );

/*
** TODO: find out the appropriate call to check a fd for validity
*/

    n_bytes_written = write(
                        chkpt_fd,
                        "HBMDC CHECKPOINT:",
                        17 );
    if( n_bytes_written != 17 )
    {
        if( created_by_me == GLOBUS_TRUE )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint work file:  %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        datacollector_ptr->DC_ckptFileNameWk );
            }
            close( chkpt_fd );
        }
        else
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint fd [%d].\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        chkpt_fd );
            }
        }

        return GLOBUS_FAILURE;
    }

    n_bytes_written =  write(
                        chkpt_fd,
                        (char *) &date_time_str,
                        strlen( date_time_str ));
    if( n_bytes_written != ((int) strlen( date_time_str )))
    {
        if( created_by_me == GLOBUS_TRUE )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint work file:  %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        datacollector_ptr->DC_ckptFileNameWk );
            }
            close( chkpt_fd );
        }
        else
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint fd [%d].\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        chkpt_fd );
            }
        }

        return GLOBUS_FAILURE;
    }

    n_bytes_written = write(
                        chkpt_fd,
                        "\n",
                        1 );
    if( n_bytes_written != 1 )
    {
        if( created_by_me == GLOBUS_TRUE )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint work file:  %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        datacollector_ptr->DC_ckptFileNameWk );
            }
            close( chkpt_fd );
        }
        else
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [08] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint fd [%d].\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        chkpt_fd );
            }
        }

        return GLOBUS_FAILURE;
    }

/*
**  Write Data Collector data, each field is separated by a ';'
*/

    n_bytes = globus_l_hbmdc_set_dc_buffer(
                        datacollector_buffer,
                        datacollector_ptr );

    n_bytes_written = write(
                        chkpt_fd,
                        (char*) &datacollector_buffer,
                        n_bytes );
    if( n_bytes_written != n_bytes )
    {
        if( created_by_me == GLOBUS_TRUE )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint work file:  %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        datacollector_ptr->DC_ckptFileNameWk );
            }
            close( chkpt_fd );
        }
        else
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [10] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint fd [%d].\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        chkpt_fd );
            }
        }

        return GLOBUS_FAILURE;
    }

/*
**  Write the Local Monitor and Client information,
**  each field separated by a semicolon ';'.
*/
    lm_fields = datacollector_ptr->DC_lm_list.head;
    while( lm_fields )
    {
        n_bytes = globus_l_hbmdc_set_lm_buffer(
                        localmonitor_buffer,
                        lm_fields );

        n_bytes_written = write(
                        chkpt_fd,
                        (char *) &localmonitor_buffer,
                        n_bytes );
        if( n_bytes_written != n_bytes )
        {
            if( created_by_me == GLOBUS_TRUE )
            {
                if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                {
                    globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [11] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint work file:  %s.\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        datacollector_ptr->DC_ckptFileNameWk );
                }
                close( chkpt_fd );
            }
            else
            {
                if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                {
                    globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [12] in "
                        "globus_l_hbmdc_internal_checkpoint():\n"
                        "        write() failed: errno [%d]: %s.\n"
                        "        Cannot write to checkpoint fd [%d].\n"
                        "        Aborting checkpoint.\n\n",
                        errno,
                        strerror( errno ),
                        chkpt_fd );
                }
            }

            return GLOBUS_FAILURE;
        }

        cl_fields = lm_fields->LM_client_list.head;
        while( cl_fields )
        {
            n_bytes = globus_l_hbmdc_set_cl_buffer(
                        client_buffer,
                        cl_fields );

            n_bytes_written = write(
                        chkpt_fd,
                        (char *) &client_buffer,
                        n_bytes );
            if( n_bytes_written != n_bytes )
            {
                if( created_by_me == GLOBUS_TRUE )
                {
                    if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                    {
                        globus_libc_fprintf(
                            datacollector_ptr->DC_log_file,
                            "Globus HBM Data Collector library:\n"
                            "    Error [11] in "
                            "globus_l_hbmdc_internal_checkpoint():\n"
                            "        write() failed: errno [%d]: %s.\n"
                            "        Cannot write to checkpoint work file:  "
                            "%s.\n"
                            "        Aborting checkpoint.\n\n",
                            errno,
                            strerror( errno ),
                            datacollector_ptr->DC_ckptFileNameWk );
                    }
                    close( chkpt_fd );
                }
                else
                {
                    if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                    {
                        globus_libc_fprintf(
                            datacollector_ptr->DC_log_file,
                            "Globus HBM Data Collector library:\n"
                            "    Error [12] in "
                            "globus_l_hbmdc_internal_checkpoint():\n"
                            "        write() failed: errno [%d]: %s.\n"
                            "        Cannot write to checkpoint fd [%d].\n"
                            "        Aborting checkpoint.\n\n",
                            errno,
                            strerror( errno ),
                            chkpt_fd );
                    }
                }

                return GLOBUS_FAILURE;
            }

            cl_fields = cl_fields->next;
        }

        lm_fields = lm_fields->next;
    }

    if( created_by_me == GLOBUS_TRUE )
    {
        unlink( datacollector_ptr->DC_ckptFileName );
        link(   datacollector_ptr->DC_ckptFileNameWk,
                datacollector_ptr->DC_ckptFileName );
        unlink( datacollector_ptr->DC_ckptFileNameWk );

        close( chkpt_fd );
    }

    return GLOBUS_SUCCESS;
}


static globus_bool_t
globus_l_hbmdc_poll(
                globus_abstime_t *      time_stop,
                void *                  user_args )
{
    globus_hbm_datacollector_t*
                        datacollector_ptr =
                                        GLOBUS_NULL;
    hbmdc_dc_callback_entry_t*
                        callback_entry_ptr =
                                        GLOBUS_NULL;
    globus_bool_t       is_tcp_fd     = GLOBUS_FALSE;
    struct timeval      current_time;
    struct timeval      time_to_block;
    struct timeval      next_eval_time;
    int                 dc_handle_wk;
    int                 dc_req_fd_wk;
    int                 live_clients;
    int                 rc;
    int                 saved_errno;
    char                read_buff[GLOBUS_HBM_BUFF_SIZE_MAX];
    struct sockaddr_in  from_addr;
    int                 n_bytes_read;

#if  (   (defined TARGET_ARCH_CRAYT3E ) \
      || (defined TARGET_ARCH_HPUX ) \
      || (defined TARGET_ARCH_IRIX ) \
      || (defined TARGET_ARCH_SOLARIS ))
    int                 from_addr_len;
#else
    size_t              from_addr_len;
#endif

    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

/*
**  Setup the read_fds fields and calculate the time to block (if can ).
**  Need to include all data collectors.
*/
    max_fds = 0;
    FD_ZERO( &read_fds );
/*
**  Does this need to be changed to the following for HPUX?
**  memset((void *) &fd_readset, 0, sizeof(fd_readset));
*/

    next_eval_time.tv_sec = 0;
    next_eval_time.tv_usec = 0;
    live_clients = 0;

    for( datacollector_ptr = globus_l_hbm_datacollector_list.head;
         datacollector_ptr != GLOBUS_NULL;
         datacollector_ptr = datacollector_ptr->next )
    {
        globus_mutex_lock( &datacollector_ptr->DC_mutex );

/*
**      First update the fd fields for this data collector instance.
*/
#ifdef  TARGET_ARCH_HPUX
        read_fds |= (1 << datacollector_ptr->DC_hb_fd );
#else
        FD_SET( datacollector_ptr->DC_hb_fd, HBM_FD_SET_CAST &read_fds );
#endif  /*  TARGET_ARCH_HPUX defined  */

        if( max_fds <= datacollector_ptr->DC_hb_fd )
        {
            max_fds = datacollector_ptr->DC_hb_fd + 1;
        }
        if( datacollector_ptr->DC_req_fd != -1 )
        {
#ifdef  TARGET_ARCH_HPUX
            read_fds |= (1 << datacollector_ptr->DC_req_fd );
#else
            FD_SET( datacollector_ptr->DC_req_fd, HBM_FD_SET_CAST &read_fds );
#endif  /*  TARGET_ARCH_HPUX defined  */
            if( max_fds <= datacollector_ptr->DC_req_fd )
            {
                max_fds = datacollector_ptr->DC_req_fd + 1;
            }
        }

/*
**      Now update the blocking information.
*/
        if( datacollector_ptr->DC_ClientsLiveCnt )
        {
            live_clients = 1;

            if( next_eval_time.tv_sec == 0 )
            {
                next_eval_time.tv_sec =
                        datacollector_ptr->DC_ckpt_and_eval_time;
            }
            else if( datacollector_ptr->DC_ckpt_and_eval_time <
                                next_eval_time.tv_sec )
            {
                next_eval_time.tv_sec =
                        datacollector_ptr->DC_ckpt_and_eval_time;
            }
        }

        globus_mutex_unlock( &datacollector_ptr->DC_mutex );
    }

    if(!globus_time_abstime_is_infinity(time_stop))
    {
        GlobusTimeAbstimeGet(*time_stop,
                             time_to_block.tv_sec,
                             time_to_block.tv_usec);
        /* set blocking time to zero */
        time_to_block.tv_sec = 0;
        time_to_block.tv_usec = 0;
    }
    else
    {
        if( gettimeofday( &current_time, GLOBUS_NULL ))
        {
            /* can't write to log file */
            return GLOBUS_FALSE;
        }
/*
**      Set default (maximum ) blocking time.
**      If another datacollector gets created there
**      needs to be an opportunity for it to get
**      it's sockets listened to also.
*/
        if( next_eval_time.tv_sec == 0 )
        {
            time_to_block.tv_sec = HBMDC_MAX_BLOCK_TIME;
            time_to_block.tv_usec = 0;
        }
        else if( next_eval_time.tv_sec <= current_time.tv_sec )
        {
            time_to_block.tv_sec = 0;
            time_to_block.tv_usec = 0;
#ifdef DEBUGGING_HBMDC
            globus_libc_fprintf(
                        stderr,
                        "0 blocking, datacollector needs to be "
                        "evaluated.\n\n" );
#endif /* DEBUGGING_HBMDC */
        }
        else
        {
            time_to_block.tv_sec =  next_eval_time.tv_sec
                                  - current_time.tv_sec;
            time_to_block.tv_usec = 0;
/*
**          ( time_to_block.tv_sec > 0 )
*/
            if( current_time.tv_usec )
            {
                time_to_block.tv_sec--;
                time_to_block.tv_usec = 1000000
                                  - current_time.tv_usec;
            }
        }
    }

#ifdef DEBUGGING_HBMDC
    globus_libc_fprintf(
                        stderr,
                        "time_to_block: %d.%6d\n"
                        "next_eval_time.tv_sec: %d\n"
                        "live_clients: %d\n\n",
                        time_to_block.tv_sec,
                        time_to_block.tv_usec,
                        next_eval_time.tv_sec,
                        live_clients );
#endif

    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    /* call select, all needed values are now set */
    rc = select(        max_fds,
                        HBM_FD_SET_CAST &read_fds,
                        GLOBUS_NULL,
                        GLOBUS_NULL,
                        &time_to_block );
    saved_errno = errno;

    if( rc < 0 )
    {
        if( saved_errno == EINTR )
        {
            return GLOBUS_TRUE;
        }
        else
        {
            return GLOBUS_FALSE;
        }
    }

/*
**  Figure out which fd is set, 2 options:
**      1) search fds 0 through (maxfds -1)  OR
**      2) search through datacollector_list and
**         test both the DC_hb_fd and DC_req_fd.
**  For the first cut use option 1.
**  If performance sucks try option 2
*/
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    datacollector_ptr = globus_l_hbmdc_find_set_fd( &is_tcp_fd );

/*
**  If we have an fd that's set,
**      read the information and execute the callbacks.
*/
    if( datacollector_ptr != GLOBUS_NULL )
    {
#ifdef DEBUGGING_HBMDC
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "got a message!\n\n" );
        }
#endif
        globus_mutex_lock( &datacollector_ptr->DC_mutex );
        globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

/*      If it's the tcp socket,
**          pass fd back to the user with the callback.
**      If it's the udp socket,
**          read and pass the buffer to globus_l_hbmdc_process_heartbeat().
*/
        if( is_tcp_fd == GLOBUS_TRUE )
        {
            dc_handle_wk = datacollector_ptr->DC_handle;
            dc_req_fd_wk = datacollector_ptr->DC_req_fd;
            globus_mutex_unlock( &datacollector_ptr->DC_mutex );

            (*datacollector_ptr->DC_tcp_req_callback )(
                                dc_req_fd_wk,
                                dc_handle_wk );
        }
        else
        {
            memset(     (void *) &from_addr,
                        0,
                        sizeof( from_addr ));
            from_addr.sin_family = AF_INET;
            from_addr_len = sizeof( from_addr );

            n_bytes_read = recvfrom( datacollector_ptr->DC_hb_fd,
                                     read_buff,
                                     GLOBUS_HBM_BUFF_SIZE_MAX,
                                     0,
                                     (struct sockaddr *) &from_addr,
                                     &from_addr_len );
            saved_errno = errno;

            if(   ( n_bytes_read == 0 )
               || ( saved_errno == EAGAIN ))
            {
                return GLOBUS_TRUE;
            }

            if( n_bytes_read < 0 )
            {
                return GLOBUS_FALSE;
            }

            rc = globus_l_hbmdc_process_heartbeat(
                                n_bytes_read,
                                read_buff,
                                &from_addr,
                                datacollector_ptr );
            if( rc == GLOBUS_FAILURE )
            {
                if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                {
                    globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_poll():\n"
                        "        globus_l_hbmdc_process_heartbeat() "
                        "failed.\n\n" );
                }
                return GLOBUS_FAILURE;
            }
            globus_mutex_unlock( &datacollector_ptr->DC_mutex );
        }
    }

    if( gettimeofday( &current_time, GLOBUS_NULL ))
    {
        /* can't write to log file */
        return GLOBUS_FALSE;
    }

#ifdef HBM_EXPERIMENT
/*
**  If after the cutoff time then need to switch to a new heartbeat file.
*/
    if( current_time.tv_sec >= heartbeat_file_cutoff_time.tv_sec )
    {
        globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );
        if( current_time.tv_sec >= heartbeat_file_cutoff_time.tv_sec )
        {
            heartbeat_file_cutoff_time.tv_sec += GLOBUS_HBM_TIME_SECS_PER_DAY;
            heartbeat_file_cutoff_time.tv_usec = 0;
            heartbeat_file_counter++;
            globus_libc_sprintf(
                        heartbeat_file_name_str,
                        "../var/globus-hbm-datacollector.%s.heartbeat%3.3u",
                        globus_l_hbm_datacollector_list->DC_hostName,
                        heartbeat_file_counter );
            if( ( heartbeat_file_fd2 = fopen(
                        heartbeat_file_name_str,
                        "w" )) == GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        globus_l_hbm_datacollector_list->DC_log_file,
                        "\nError opening heartbeat log file: %s\n"
                        "Will continue using previous file.\n\n",
                        heartbeat_file_name_str );
            }
            else
            {
                fclose( heartbeat_file_fd1 );
                heartbeat_file_fd1 = heartbeat_file_fd2;
            }
        }
       globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );
    }
#endif

    /*
    ** go through all datacollectors to see if they need to be
    ** evaluated at this time
    */
    globus_mutex_lock( &globus_l_hbm_datacollector_global_mutex );

    for( datacollector_ptr = globus_l_hbm_datacollector_list.head;
         datacollector_ptr != GLOBUS_NULL;
         datacollector_ptr = datacollector_ptr->next )
    {
        globus_mutex_lock( &datacollector_ptr->DC_mutex );

        if( datacollector_ptr->DC_ckpt_and_eval_time <= current_time.tv_sec )
        {
            globus_l_hbmdc_evaluate_datacollector( datacollector_ptr );
            if( datacollector_ptr->DC_ckptNeeded )
            {
                globus_l_hbmdc_internal_checkpoint( datacollector_ptr,
                                                    HBMDC_INTERNAL_CHKPT_FD );
            }
            datacollector_ptr->DC_ckpt_and_eval_time =
                current_time.tv_sec + datacollector_ptr->DC_EvalInterval;
        }
        globus_mutex_unlock( &datacollector_ptr->DC_mutex );
    }

    globus_mutex_unlock( &globus_l_hbm_datacollector_global_mutex );

    /*
    **  Now make any callbacks that were generated.
    */
    while( globus_l_hbm_datacollector_callback_list.head != GLOBUS_NULL )
    {
        globus_mutex_lock( &( globus_l_hbm_datacollector_callback_mutex ));
        if( globus_l_hbm_datacollector_callback_list.head != GLOBUS_NULL )
        {
            callback_entry_ptr =
                            globus_l_hbm_datacollector_callback_list.head;
            ListRemove( &globus_l_hbm_datacollector_callback_list,
                        callback_entry_ptr );

            globus_mutex_unlock( &globus_l_hbm_datacollector_callback_mutex );

            (*(callback_entry_ptr->callback_function_ptr ))(
                        callback_entry_ptr->handle,
                        callback_entry_ptr->callbackdata_ptr,
                        callback_entry_ptr->client_copy_ptr,
                        callback_entry_ptr->callback_userdata_ptr );

            globus_l_hbmdc_free_callback_entry(
                        callback_entry_ptr );
        }
        else
        {
            globus_mutex_unlock( &globus_l_hbm_datacollector_callback_mutex );
        }
    }

    return GLOBUS_TRUE;
}


static int
globus_l_hbmdc_process_heartbeat(
                int                     n_bytes_read,
                char*                   read_buff,
                struct sockaddr_in*     from_addr,
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
#ifdef HBM_EXPERIMENT
    struct timeval      time_curr;
#endif

    char*               buff_ptr      = GLOBUS_NULL;
    int                 len;
    int                 version;
    char                lm_ipnum[16]; /* "123.456.789.012\0" */
    char*               tmp_ipnum     = GLOBUS_NULL;
    globus_l_hbm_heartbeat_t*
                        hb_data;
    hbmdc_cl_fields_t*  cl_entry;
    hbmdc_lm_fields_t*  lm_entry;
    struct timeval      current_time;
    hbmdc_dc_callback_entry_t*
                        callback_entry_ptr;
    unsigned int        events        =
                            GLOBUS_HBM_DATACOLLECTOR_EVENT_HEARTBEAT_RECEIVED;

/*
**  The DC_mutex for this data collector is already locked.
*/

/*
**  First check the length and version of the heartbeat
*/
    buff_ptr = read_buff;
    UnpackUInt32( buff_ptr, version );
    buff_ptr += NUM_PACKED_BYTES;

/*
**  version != n_bytes_read is temporary fix to allow
**  versioning and non-versioning heartbeat monitors
**  to be recognized by this datacollector.
**  This is only temporary and should be removed
*/
    if(   ( version != GLOBUS_HBM_VERSION )
       && ( version != n_bytes_read ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        Invalid version for incoming Heartbeat, "
                        "Heartbeat dropped.\n"
                        "            Expected version: %d.\n"
                        "            Received version: %d.\n\n",
                        GLOBUS_HBM_VERSION,
                        version );
        }
        return GLOBUS_FAILURE;
    }

    UnpackUInt32( buff_ptr, len );
    buff_ptr += NUM_PACKED_BYTES;
    if( len != n_bytes_read )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        Corrupt Heartbeat message, invalid length:\n"
                        "            Read:    %d.\n"
                        "            MsgLen:  %d.\n"
                        "        Heartbeat dropped.\n\n",
                        n_bytes_read,
                        len );
        }
        return GLOBUS_FAILURE;
    }
    hb_data = globus_l_hbmdc_extract_hb_data( read_buff, datacollector_ptr );
    if( hb_data == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        globus_l_hbmdc_extract_hb_data() "
                        "failed.\n"
                        "        Heartbeat dropped.\n\n" );
        }
        return GLOBUS_FAILURE;
    }

    tmp_ipnum = inet_ntoa( from_addr->sin_addr );
    strcpy( lm_ipnum, tmp_ipnum );

#ifdef HBM_EXPERIMENT

    time_curr.tv_sec = time_curr.tv_usec = 0;
    if( gettimeofday( &time_curr, GLOBUS_NULL ))
    {
        time_curr.tv_sec = time_curr.tv_usec = 0;
    }

    globus_libc_fprintf(
                        heartbeat_file_fd1,
                        "%15.15s;%6.6u;%12.12u:%6.6u;%6.6u;%1.1u\n",
                        lm_ipnum,
                        hb_data->pid,
                        (unsigned int ) time_curr.tv_sec,
                        (unsigned int ) time_curr.tv_usec,
                        hb_data->rptnum,
                        hb_data->status );
    fflush( heartbeat_file_fd1 );

#endif

    /* find a LM entry if there is one, if not, create one */
    lm_entry = globus_l_hbmdc_find_localmonitor_entry(
                        datacollector_ptr,
                        (char *) &lm_ipnum,
                        hb_data->lm_port );
    if( lm_entry == GLOBUS_NULL )
    {
        lm_entry = globus_malloc( sizeof( hbmdc_lm_fields_t ));
        if( lm_entry == GLOBUS_NULL )
        {
            if( datacollector_ptr->DC_log_file )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        globus_malloc() failed:  out of memory.\n"
                        "        Heartbeat dropped.\n\n" );
            }
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_FAILURE;
        }

        memset(         (void *) lm_entry,
                        0,
                        sizeof( hbmdc_lm_fields_t ));
        lm_entry->LM_hostIPNum          = GLOBUS_NULL;
        lm_entry->LM_hostName           = GLOBUS_NULL;
        lm_entry->LM_client_list.head   = GLOBUS_NULL;
        lm_entry->LM_client_list.tail   = GLOBUS_NULL;
        lm_entry->prev = lm_entry->next = GLOBUS_NULL;

        lm_entry->LM_host_addr.s_addr = from_addr->sin_addr.s_addr;
        lm_entry->LM_hostIPNum = globus_malloc( strlen( lm_ipnum ) + 1 );
        if( lm_entry->LM_hostIPNum == GLOBUS_NULL )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [05] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        globus_malloc() failed:  out of memory.\n"
                        "        Heartbeat dropped.\n\n" );
            }
            globus_l_hbmdc_free_localmonitor_entry( lm_entry );
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_FAILURE;
        }
        strcpy( lm_entry->LM_hostIPNum, lm_ipnum );

        lm_entry->LM_hostName = globus_i_hbm_gethostname_from_addr(
            &from_addr->sin_addr );

        lm_entry->LM_hostPort = hb_data->lm_port;

        lm_entry->LM_ClientsCnt = 0;
        lm_entry->LM_ClientsLiveCnt = 0;

/*
**      LM_SummaryStatus is temporary until client is added --
**          don't want to consider it alive until
**          the client info validates and adds properly.
*/
        lm_entry->LM_SummaryStatus    = GLOBUS_HBM_LM_STATUS_DEAD;

/*
**      Add lm_entry to list.
*/
        ListAppend( &( datacollector_ptr->DC_lm_list ),
                    lm_entry );
        datacollector_ptr->DC_LocalMonitorsCnt++;
    }
/*
**  No fields to update if lm_entry exists.
*/

/*
**  We'll need the time for some client information.
*/
    if( gettimeofday( &current_time, GLOBUS_NULL ))
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [06] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        gettimeofday() failed:  errno [%d]: %s.\n"
                        "        Heartbeat dropped.\n\n",
                        errno,
                        strerror( errno ));
        }
        globus_l_hbmdc_free_hb_data( hb_data );

        return GLOBUS_FAILURE;
    }

/*
**  Find a client entry if there is one, if not, create one.
*/
    cl_entry = globus_l_hbmdc_find_client_entry(
                        datacollector_ptr,
                        hb_data->lm_addr,
                        hb_data->pid,
                        hb_data->procname );

/*
**  We set datacollector_ptr->DC_mutex in both the if and the else
**  sides of this test after we know we won't return prematurely.
**  We'll unlock after the program flow recombines.
*/
    if( cl_entry == GLOBUS_NULL )
    {
        if(   ( hb_data->status ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NORMAL )
           || ( hb_data->status ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_ABNORMAL )
           || ( hb_data->status ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_DIED ))
        {
/*
**          This process either never got reported while it was alive
**          or has been purged.
**          It's dead -- we don't want to load it.
*/
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_SUCCESS;
        }

        cl_entry = globus_malloc( sizeof( hbmdc_cl_fields_t ));
        if( cl_entry == GLOBUS_NULL )
        {
            if( datacollector_ptr->DC_log_file )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [07] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        globus_malloc() failed:  out of memory.\n"
                        "        Heartbeat dropped.\n\n" );
            }
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_FAILURE;
        }
        memset(         (void *) cl_entry,
                        0,
                        sizeof( hbmdc_cl_fields_t ));
        cl_entry->CL_hostIPNum                = GLOBUS_NULL;
        cl_entry->CL_procName                 = GLOBUS_NULL;
        cl_entry->CL_event_callback           = GLOBUS_NULL;
        cl_entry->CL_event_callback_user_data = GLOBUS_NULL;
        cl_entry->CL_Msg                      = GLOBUS_NULL;
        cl_entry->prev = cl_entry->next       = GLOBUS_NULL;

        cl_entry->CL_host_addr.s_addr = hb_data->lm_addr.s_addr;

        tmp_ipnum = inet_ntoa( hb_data->lm_addr );
        strcpy( lm_ipnum, tmp_ipnum );
        cl_entry->CL_hostIPNum = globus_malloc( strlen( lm_ipnum ) + 1 );
        if( cl_entry->CL_hostIPNum == GLOBUS_NULL )
        {
            if( datacollector_ptr->DC_log_file )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [08] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        globus_malloc() failed:  out of memory.\n"
                        "        Heartbeat dropped.\n\n" );
            }
            globus_l_hbmdc_free_client_entry( cl_entry );
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_FAILURE;
        }
        strcpy( cl_entry->CL_hostIPNum, lm_ipnum );

        cl_entry->CL_procName = hb_data->procname;
        hb_data->procname = GLOBUS_NULL;

        cl_entry->CL_procPID     = hb_data->pid;
        cl_entry->CL_procStatus  = hb_data->status;
        cl_entry->CL_regTime     = hb_data->regtime;
        cl_entry->CL_blockedTime = hb_data->blockedtime;
        cl_entry->CL_cpuSecs     = hb_data->cputime;
        cl_entry->CL_unregTime   = hb_data->unregtime;
        cl_entry->CL_deleteTime  = 0;
        cl_entry->CL_rptNumber   = hb_data->rptnum;
        cl_entry->CL_rptTimeLast = current_time.tv_sec;
        cl_entry->CL_rptInterval = hb_data->rptinterval;
        cl_entry->CL_rptTimeNext = current_time.tv_sec + hb_data->rptinterval;

        cl_entry->CL_overdue_secs =
                    (  (  (  datacollector_ptr->DC_heartbeats_missing_overdue
                           + 1 )
                        * cl_entry->CL_rptInterval )
                     + datacollector_ptr->DC_network_variation_allowance_secs );

        cl_entry->CL_shutdown_no_rpt_secs =
                    (  (  (  datacollector_ptr->DC_heartbeats_missing_shutdown
                           + 1 )
                        * cl_entry->CL_rptInterval )
                     + datacollector_ptr->DC_network_variation_allowance_secs );

        cl_entry->CL_event_callback   = GLOBUS_NULL;
        cl_entry->CL_eventmask        = GLOBUS_HBM_DATACOLLECTOR_EVENT_NONE;
        cl_entry->CL_event_callback_user_data = GLOBUS_NULL;
        cl_entry->CL_MsgNum = hb_data->msgnum;

        cl_entry->CL_Msg = hb_data->msg;
        hb_data->msg = GLOBUS_NULL;

/*
**  Add the client to the list and increment lm client counts.
*/
        ListAppend( &( lm_entry->LM_client_list ),
                    cl_entry );

        lm_entry->LM_ClientsCnt++;
        lm_entry->LM_ClientsLiveCnt++;
        datacollector_ptr->DC_ClientsLiveCnt++;

/*
**      Set events flags for the new client.
*/
        events |= GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION;
        datacollector_ptr->DC_ckptNeeded = 1;
    }
    else
    {
/*
**      client entry exists.
*/

/*
**      If outdated report, discard.
*/
        if( hb_data->rptnum < cl_entry->CL_rptNumber )
        {
            if( datacollector_ptr->DC_log_file )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_l_hbmdc_process_heartbeat():\n"
                        "        Heartbeat is out of date, dropped:\n"
                        "            Latest Heartbeat received:  %d.\n"
                        "            This   Heartbeat:           %d.\n\n",
                        cl_entry-> CL_rptNumber,
                        hb_data->rptnum );
            }
            globus_l_hbmdc_free_hb_data( hb_data );

            return GLOBUS_SUCCESS;
        }

/*
**      Update client fields, setting appropriate event flags.
*/
        if( hb_data->status != cl_entry->CL_procStatus )
        {
            if(   ( hb_data->status == GLOBUS_HBM_PROCSTATUS_ACTIVE )
               || ( hb_data->status == GLOBUS_HBM_PROCSTATUS_BLOCKED )
               || ( hb_data->status == GLOBUS_HBM_PROCSTATUS_OVERDUE ))
            {
                cl_entry->CL_deleteTime = 0;
                if(   ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NORMAL )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_ABNORMAL )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_DIED ))
                {
                    lm_entry->LM_ClientsLiveCnt++;
                    datacollector_ptr->DC_ClientsLiveCnt++;

                    events |= GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION;
                }
                else if( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NO_RPT )
                {
                    lm_entry->LM_ClientsLiveCnt++;
                    datacollector_ptr->DC_ClientsLiveCnt++;
                }
            }
            else /* The new status is a SHUTDOWN_* status. */
            {
                cl_entry->CL_deleteTime =
                                current_time.tv_sec
                              + HBMDC_CL_SHUTDOWN_RETENTION_SECS;
                if(   ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_ACTIVE )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_BLOCKED )
                   || ( cl_entry->CL_procStatus ==
                                GLOBUS_HBM_PROCSTATUS_OVERDUE ))
                {
                    lm_entry->LM_ClientsLiveCnt--;
                    datacollector_ptr->DC_ClientsLiveCnt--;
                }

                if( hb_data->status ==
                                GLOBUS_HBM_PROCSTATUS_SHUTDOWN_NORMAL )
                {
                    events |= GLOBUS_HBM_DATACOLLECTOR_EVENT_SHUTDOWN_NORMAL;
                }
                else /* The new process status is for abnormal termination. */
                {
                    events |= GLOBUS_HBM_DATACOLLECTOR_EVENT_SHUTDOWN_ABNORMAL;
                }
            }

            cl_entry->CL_procStatus = hb_data->status;
            datacollector_ptr->DC_ckptNeeded = 1;
        }

        cl_entry->CL_regTime     = hb_data->regtime;
        cl_entry->CL_blockedTime = hb_data->blockedtime;
        cl_entry->CL_cpuSecs     = hb_data->cputime;
        cl_entry->CL_unregTime   = hb_data->unregtime;
        cl_entry->CL_rptNumber   = hb_data->rptnum;
        cl_entry->CL_rptTimeLast = current_time.tv_sec;
        cl_entry->CL_rptTimeNext = current_time.tv_sec + hb_data->rptinterval;

        if( hb_data->msgnum > cl_entry->CL_MsgNum )
        {
/*
**          We've got a new message.
*/
            cl_entry->CL_MsgNum = hb_data->msgnum;
            if( cl_entry->CL_Msg != GLOBUS_NULL )
                globus_free( cl_entry->CL_Msg );
            cl_entry->CL_Msg = hb_data->msg;
            hb_data->msg = GLOBUS_NULL;
            events |= GLOBUS_HBM_DATACOLLECTOR_EVENT_REGISTRATION;
            datacollector_ptr->DC_ckptNeeded = 1;
        }
    }

/*
**  Update the lm summary status based on the clients status.
*/
    if( lm_entry->LM_ClientsLiveCnt > 0 )
    {
        if( lm_entry->LM_ClientsLiveCnt == lm_entry->LM_ClientsCnt )
        {
            lm_entry->LM_SummaryStatus =
                                GLOBUS_HBM_LM_STATUS_ALIVE;
        }
        else
        {
            lm_entry->LM_SummaryStatus =
                            GLOBUS_HBM_LM_STATUS_DEGRADED;
        }
    }
    else
    {
        lm_entry->LM_SummaryStatus = GLOBUS_HBM_LM_STATUS_DEAD;
    }

/*
**  Free hb_data (the strings may have been taken by the cl_entry).
*/
    globus_l_hbmdc_free_hb_data( hb_data );

/*
**  Execute callbacks if necessary.
*/
    if( events != 0 )
    {
        if( ( callback_entry_ptr =
                        globus_l_hbmdc_event_callback(
                                datacollector_ptr,
                                cl_entry,
                                events )) != GLOBUS_NULL )
        {
            globus_mutex_lock( &globus_l_hbm_datacollector_callback_mutex );
            ListAppend( &globus_l_hbm_datacollector_callback_list,
                        callback_entry_ptr );
            globus_mutex_unlock( &globus_l_hbm_datacollector_callback_mutex );
        }
    }

    return GLOBUS_SUCCESS;
}


static int
globus_l_hbmdc_restore_from_checkpoint(
                char*                   ckpt_filename_str,
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    FILE*       fp                    = GLOBUS_NULL;
    char        read_buff[1024];
    char*       buff_ptr              = GLOBUS_NULL;
    int         num_localmonitors;
    int         num_clients;
    int         num_live_clients;

    hbmdc_lm_fields_t*
                lm_entry              = GLOBUS_NULL;
    hbmdc_cl_fields_t*
                cl_entry              = GLOBUS_NULL;

    globus_bool_t got_datacollector   = GLOBUS_FALSE;

/*
**  We are restoring this data collector instance from a checkpoint file,
**  so it is not in the data collector list yet,
**  and no other thread can access it.
**  Therefore we do not have to worry about locking the data collector.
*/
    if( ckpt_filename_str == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [01] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        No checkpoint filename.\n\n" );
        }
        return GLOBUS_FAILURE;
    }

    fp = fopen( ckpt_filename_str, "r" );
    if( fp == GLOBUS_NULL )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [02] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        fopen() failed:  errno [%d]: %s.\n"
                        "            Error opening checkpoint file: %s.\n"
                        "        Aborting checkpoint restore.\n\n",
                        errno,
                        strerror( errno ),
                        ckpt_filename_str );
        }
        return GLOBUS_FAILURE;
    }

    memset(             (void *) &read_buff,
                        0,
                        1024 );

    if( fgets( read_buff, 1024, fp ))
    {
        if( strlen( read_buff ) < 17)
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [03] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        Corrupt Checkpoint File:  %s\n"
                        "        Aborting checkpoint restore.\n\n",
                        ckpt_filename_str );
            }
            return GLOBUS_FAILURE;
        }
        if( strncasecmp(
                        read_buff,
                        "HBMDC CHECKPOINT:",
                        17 ))
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [04] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        Corrupt Checkpoint File:  %s\n"
                        "        Aborting checkpoint restore.\n\n",
                        ckpt_filename_str );
            }
            return GLOBUS_FAILURE;
        }

        while( fgets( read_buff, 1024, fp ))
        {
            buff_ptr = read_buff;

            if( strncmp( buff_ptr, "Datacollector:", 14 ) == 0 )
            {
                buff_ptr += 14;
                if( got_datacollector == GLOBUS_FALSE )
                {
                    if( globus_l_hbmdc_extract_datacollector(
                            buff_ptr,
                            datacollector_ptr ) !=
                                    GLOBUS_SUCCESS )
                    {
                        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                        {
                            globus_libc_fprintf(
                                datacollector_ptr->DC_log_file,
                                "Globus HBM Data Collector library:\n"
                                "    Error [05] in "
                                "globus_l_hbmdc_restore_from_checkpoint():\n"
                                "        globus_l_hbmdc_"
                                "extract_datacollector() failed:\n"
                                "            Corrupt Checkpoint File:  %s\n"
                                "            Aborting checkpoint restore.\n\n",
                                ckpt_filename_str );
                        }
                        return GLOBUS_FAILURE;
                    }
                }
                else /* corrupt file */
                {
                    if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                    {
                        globus_libc_fprintf(
                            datacollector_ptr->DC_log_file,
                            "Globus HBM Data Collector library:\n"
                            "    Error [06] in "
                            "globus_l_hbmdc_restore_from_checkpoint():\n"
                            "        Corrupt Checkpoint File:  %s\n"
                            "        Aborting checkpoint restore.\n\n",
                            ckpt_filename_str );
                    }
                    return GLOBUS_FAILURE;
                }
            }
            else if( strncmp( buff_ptr, "LocalMonitor:", 13 ) == 0 )
            {
                buff_ptr += 13;
                lm_entry =
                    globus_l_hbmdc_extract_localmonitor(
                        buff_ptr,
                        datacollector_ptr );

                if( lm_entry == GLOBUS_NULL )
                {
                    if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                    {
                        globus_libc_fprintf(
                            datacollector_ptr->DC_log_file,
                            "Globus HBM Data Collector library:\n"
                            "    Error [07] in "
                            "globus_l_hbmdc_restore_from_checkpoint():\n"
                            "        globus_l_hbmdc_"
                            "extract_localmonitor() failed:\n"
                            "            Corrupt Checkpoint File:  %s\n"
                            "            Aborting checkpoint restore.\n\n",
                            ckpt_filename_str );
                    }
                    return GLOBUS_FAILURE;
                }
                /* add lm_entry to list */
                ListAppend(
                        &( datacollector_ptr->DC_lm_list ),
                        lm_entry );
                datacollector_ptr->DC_LocalMonitorsCnt++;
            }
            else if( strncmp( buff_ptr, "Client:", 7 ) == 0 )
            {
                buff_ptr += 7;
                cl_entry =
                    globus_l_hbmdc_extract_client(
                                buff_ptr,
                                datacollector_ptr,
                                lm_entry );
                if(   ( cl_entry == GLOBUS_NULL )
                   || ( lm_entry == GLOBUS_NULL ))
                {
                    if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                    {
                        globus_libc_fprintf(
                            datacollector_ptr->DC_log_file,
                            "Globus HBM Data Collector library:\n"
                            "    Error [08] in "
                            "globus_l_hbmdc_restore_from_checkpoint():\n"
                            "        globus_l_hbmdc_"
                            "extract_client() failed:\n"
                            "            Corrupt Checkpoint File:  %s\n"
                            "            Aborting checkpoint restore.\n\n",
                            ckpt_filename_str );
                    }
                    return GLOBUS_FAILURE;
                }

                ListAppend(
                        &( lm_entry->LM_client_list ),
                        cl_entry );
            }
            else
            {
                if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
                {
                    globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [09] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        Corrupt Checkpoint File:  %s\n"
                        "        (Record type not recognized.)\n"
                        "        Aborting checkpoint restore.\n\n",
                        ckpt_filename_str );
                }
                return GLOBUS_FAILURE;

            }
            memset(     (void *) &read_buff,
                        0,
                        1024 );
        }
    }
    fclose( fp );

    /* re-validate internal state */
    num_localmonitors = globus_l_hbmdc_count_localmonitors( datacollector_ptr );
    if( num_localmonitors != datacollector_ptr->DC_LocalMonitorsCnt )
    {
        if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
        {
            globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [10] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        globus_l_hbmdc_count_localmonitors() "
                        "discrepancy (from file value differs from restored):\n"
                        "            Checkpoint File:  %s\n"
                        "            Local monitor count from file:  %d\n"
                        "            Local monitors restored:        %d\n"
                        "        Using number restored.\n\n",
                        ckpt_filename_str,
                        datacollector_ptr->DC_LocalMonitorsCnt,
                        num_localmonitors );
        }
        datacollector_ptr->DC_LocalMonitorsCnt = num_localmonitors;
    }

    datacollector_ptr->DC_ClientsLiveCnt = 0;
    for( lm_entry = datacollector_ptr->DC_lm_list.head;
         lm_entry != GLOBUS_NULL;
         lm_entry = lm_entry->next )
    {
        num_clients = globus_l_hbmdc_count_clients(
                                &( lm_entry->LM_client_list ));
        if( num_clients != lm_entry->LM_ClientsCnt )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [11] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        globus_l_hbmdc_count_clients() "
                        "discrepancy (from file value differs from restored):\n"
                        "            Checkpoint File:  %s\n"
                        "            LM host:  %s [%s]\n"
                        "            Client count from file:  %d\n"
                        "            Clients restored:        %d\n"
                        "        Using number restored.\n\n",
                        ckpt_filename_str,
                        lm_entry->LM_hostName,
                        lm_entry->LM_hostIPNum,
                        lm_entry->LM_ClientsCnt,
                        num_clients );
            }
            lm_entry->LM_ClientsCnt = num_clients;
        }

        num_live_clients = globus_l_hbmdc_count_live_clients(
                                &( lm_entry->LM_client_list ));
        datacollector_ptr->DC_ClientsLiveCnt += num_live_clients;
        if( num_live_clients != lm_entry->LM_ClientsLiveCnt )
        {
            if( datacollector_ptr->DC_log_file != GLOBUS_NULL )
            {
                globus_libc_fprintf(
                        datacollector_ptr->DC_log_file,
                        "Globus HBM Data Collector library:\n"
                        "    Error [12] in "
                        "globus_l_hbmdc_restore_from_checkpoint():\n"
                        "        globus_l_hbmdc_count_live_clients() "
                        "discrepancy (from file value differs from restored):\n"
                        "            Checkpoint File:  %s\n"
                        "            LM host:  %s [%s]\n"
                        "            Live Client count from file:  %d\n"
                        "            Live Clients restored:        %d\n"
                        "        Using number restored.\n\n",
                        ckpt_filename_str,
                        lm_entry->LM_hostName,
                        lm_entry->LM_hostIPNum,
                        lm_entry->LM_ClientsLiveCnt,
                        num_live_clients );
            }
            lm_entry->LM_ClientsLiveCnt = num_live_clients;
        }

        if( lm_entry->LM_ClientsLiveCnt > 0 )
        {
            if( lm_entry->LM_ClientsLiveCnt == lm_entry->LM_ClientsCnt )
            {
                lm_entry->LM_SummaryStatus =
                                GLOBUS_HBM_LM_STATUS_ALIVE;
            }
            else
            {
                lm_entry->LM_SummaryStatus =
                                GLOBUS_HBM_LM_STATUS_DEGRADED;
            }
        }
        else
        {
            lm_entry->LM_SummaryStatus = GLOBUS_HBM_LM_STATUS_DEAD;
        }

    }

    return GLOBUS_SUCCESS;
}

static int
globus_l_hbmdc_set_cl_buffer(
                char*                   cl_buffer,
                hbmdc_cl_fields_t*      cl_fields )
{
    char        date_time_str[GLOBUS_HBM_DATE_TIME_LEN];
    int         cpy_len;
    int         len                   = 0;
    char*       tmp                   = GLOBUS_NULL;
    char        tmp_buff[GLOBUS_HBM_DATE_TIME_LEN];

    tmp = cl_buffer;
    memcpy(             (void *) tmp,
                        (void *) "Client:",
                        7 );
    tmp += 7; len += 7;

    cpy_len = strlen( cl_fields->CL_hostIPNum );
    memcpy(             (void *) tmp,
                        (void *) cl_fields->CL_hostIPNum,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    cpy_len = strlen( cl_fields->CL_procName );
    memcpy(             (void *) tmp,
                        (void *) cl_fields->CL_procName,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_procPID );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_procStatus );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_i_hbm_convert_UTC_to_str(
                        cl_fields->CL_regTime,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_i_hbm_convert_UTC_to_str(
                        cl_fields->CL_blockedTime,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_cpuSecs );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_i_hbm_convert_UTC_to_str(
                        cl_fields->CL_unregTime,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_i_hbm_convert_UTC_to_str(
                        cl_fields->CL_deleteTime,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_rptNumber );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_rptInterval );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_overdue_secs );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_shutdown_no_rpt_secs );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_i_hbm_convert_UTC_to_str(
                        cl_fields->CL_rptTimeLast,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_i_hbm_convert_UTC_to_str(
                        cl_fields->CL_rptTimeNext,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        cl_fields->CL_MsgNum );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    if(   ( cl_fields->CL_Msg == GLOBUS_NULL )
       || ( *(cl_fields->CL_Msg ) == '\0' ))
    {
        memcpy(         (void *) tmp,
                        (void *) "<None>;",
                        7 );
        tmp += 7;
        len += 7;
    }
    else
    {
        cpy_len = strlen( cl_fields->CL_Msg );
        memcpy(         (void *) tmp,
                        (void *) cl_fields->CL_Msg,
                        cpy_len );
        tmp += cpy_len; len += cpy_len;
        memcpy(         (void *) tmp,
                        (void *) ";",
                        1 );
        tmp += 1;
        len += 1;
    }

    memcpy(             (void *) tmp,
                        (void *) "\n",
                        1 );
    tmp += 1; len += 1;

    return len;
}


static int
globus_l_hbmdc_set_dc_buffer(
                char*                   dc_buffer,
                globus_hbm_datacollector_t*
                                        datacollector_ptr )
{
    char   date_time_str[GLOBUS_HBM_DATE_TIME_LEN];
    char   tmp_buff[GLOBUS_HBM_DATE_TIME_LEN];
    int    cpy_len;
    int    len                        = 0;
    char*  tmp                        = GLOBUS_NULL;

    tmp = dc_buffer;
    memcpy(             (void *) tmp,
                        (void *) "Datacollector:",
                        14 );
    tmp += 14; len += 14;

    cpy_len = strlen( datacollector_ptr->DC_hostIPNum );
    memcpy(             (void *) tmp,
                        (void *) datacollector_ptr->DC_hostIPNum,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    cpy_len = strlen( datacollector_ptr->DC_hostName );
    memcpy(             (void *) tmp,
                        (void *) datacollector_ptr->DC_hostName,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_libc_sprintf(
                        tmp_buff,
                        "%hu;",
                        datacollector_ptr->DC_portNumHBMsg );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        datacollector_ptr->DC_EvalInterval );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        datacollector_ptr->DC_LocalMonitorsCnt );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    cpy_len = strlen( datacollector_ptr->DC_ckptFileName );
    memcpy(             (void *) tmp,
                        (void *) datacollector_ptr->DC_ckptFileName,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_i_hbm_convert_UTC_to_str(
                        datacollector_ptr->DC_ckpt_and_eval_time,
                        date_time_str );
    cpy_len = strlen( date_time_str );
    memcpy(             (void *) tmp,
                        (void *) date_time_str,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";\n",
                        2 );
    tmp += 2; len += 2;

    return len;
}


static int
globus_l_hbmdc_set_lm_buffer(
                char*                   lm_buffer,
                hbmdc_lm_fields_t*      lm_fields )
{
    int         cpy_len;
    int         len                   = 0;
    char*       tmp                   = GLOBUS_NULL;
    char        tmp_buff[GLOBUS_HBM_DATE_TIME_LEN];

    tmp = lm_buffer;
    memcpy(             (void *) tmp,
                        (void *) "LocalMonitor:",
                        13 );
    tmp += 13; len += 13;

    cpy_len = strlen( lm_fields->LM_hostIPNum );
    memcpy(             (void *) tmp,
                        (void *) lm_fields->LM_hostIPNum,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    cpy_len = strlen( lm_fields->LM_hostName );
    memcpy(             (void *) tmp,
                        (void *) lm_fields->LM_hostName,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) ";",
                        1 );
    tmp++; len++;

    globus_libc_sprintf(
                        tmp_buff,
                        "%u;",
                        lm_fields->LM_hostPort );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        lm_fields->LM_ClientsCnt );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        lm_fields->LM_ClientsLiveCnt );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    globus_libc_sprintf(
                        tmp_buff,
                        "%d;",
                        lm_fields->LM_SummaryStatus );
    cpy_len = strlen( tmp_buff );
    memcpy(             (void *) tmp,
                        (void *) tmp_buff,
                        cpy_len );
    tmp += cpy_len; len += cpy_len;

    memcpy(             (void *) tmp,
                        (void *) "\n",
                        1 );
    tmp++; len ++;

    return len;
}


static globus_bool_t
globus_l_hbmdc_validate_client_IPNum(
                char*                   client_IPNum,
                hbmdc_lm_fields_t*      localmonitor )
{
    struct in_addr      client_addr;
    char*               client_hostname = GLOBUS_NULL;
    globus_bool_t       result        = GLOBUS_TRUE;

/*
**  Get hostname using client's IPNum.
**  Make sure it's the same as the IPNum in the localmonitor.
**  Can't check using the IPNum of localmonitor because
**  the heartbeats can be sent using different interfaces on
**  the localmonitor host.
*/

    client_addr.s_addr = inet_addr( client_IPNum );

    client_hostname =
        globus_i_hbm_gethostname_from_addr( &client_addr );
    if( strcmp( client_hostname, localmonitor->LM_hostName ) != 0 )
    {
        globus_free( client_hostname );

        result = GLOBUS_FALSE;
    }

    globus_free( client_hostname );

    return result;
}
