/*
 * durocrun.c
 *
 * starts up a multi request using the DUROC API.
 *
 */

#include "globus_common.h"
#include "globus_duroc_control.h"
#include "globus_rsl.h"

/* forward declarations */

char *   read_rsl_from_file(char *);
int      count_subjobs( globus_duroc_control_t *,   /* control structure    */
			char *,                     /* job contact string   */
			globus_bool_t,              /* debug output         */
			int *,                      /* n subjobs checked in */
			int *  );                   /* n subjobs done       */


int main(
    int      argc, 
    char*    argv[])
{
    char *          request_string;
    globus_rsl_t *  request_ast;
    int             i;
    int             rc;
    
    /*
     * Module activation
     */

    rc=globus_module_activate(GLOBUS_COMMON_MODULE);
    globus_assert(rc == GLOBUS_SUCCESS); 

    rc = globus_module_activate(GLOBUS_DUROC_CONTROL_MODULE);
	
    globus_assert(rc == GLOBUS_SUCCESS);


    /*
     * Read the RSL to work with from a file
     */
    if (argc < 2)
    {
	globus_libc_fprintf(stderr,"provide argument (file containing RSL)\n");
	exit(1);
    }
    request_string = read_rsl_from_file(argv[1]);
    globus_assert(request_string);

    /*
     * To ensure that the RSL string is correct, parse it and then
     * unparse it. This is good practice.
     */
    request_ast = globus_rsl_parse(request_string);
    if (!request_ast)
    {
	globus_libc_fprintf(stderr, "Error: Cannot Parse RSL\n");
	exit(1);
    }
    globus_libc_free(request_string);
    request_string = globus_rsl_unparse(request_ast);

    {
	/*
	 * DUROC submission
	 */
	globus_duroc_control_t  control;
	char *                  job_contact;
	int *                   subjob_states;
	int                     n_subjobs;
	int                     checked_in;
	int                     n_done;
	int                     rc;

	/*
	 * initialize the control
	 */
	rc = globus_duroc_control_init(&control);
	globus_assert(rc == GLOBUS_SUCCESS);

	/*
	 * process the DUROC request: this call triggers a sequence of
	 * GRAM submissions, one for each subjob
	 */
	globus_libc_fprintf(stderr, "submitting job, wait...\n");
	rc = globus_duroc_control_job_request( 
	    &control,
	    request_string,
	    0,                    /* job state mask               */
	    GLOBUS_NULL,          /* callback function (not used) */
	    &job_contact,         /* id string                    */
	    &n_subjobs,           /* will be set to # of subjobs  */
	    &subjob_states );     /* state for each subjob        */

	if (rc != GLOBUS_SUCCESS)
	{
	    globus_libc_fprintf(stderr, "error while submitting job\n");
	    exit(1);
	}

	globus_libc_free(subjob_states);

	globus_libc_fprintf(stderr, 
			    "successfully submitted %d subjobs\n",
			    n_subjobs);

	/*
	 * Poll the state of each subjob from the DUROC control, and
	 * wait until all the subjobs have checked in.
	 *
	 * NOTE: this code is unneccessary if we don't want to recover
	 * when subjobs fall over: if we just want synchronization, then
	 * globus_duroc_control_barrier_release(wait_for_subjobs=GLOBUS_TRUE)
	 * is enough.
	 */

	checked_in = 0;
	rc = GLOBUS_SUCCESS;
	while (rc==GLOBUS_SUCCESS && checked_in < n_subjobs)
	{
	    rc = count_subjobs( &control, 
				job_contact,
				GLOBUS_TRUE,
				&checked_in,
				&n_done );
	    globus_libc_usleep(500000);
	    
	}  /* while() */
	
	if (rc != GLOBUS_SUCCESS)
	{
	    globus_libc_printf("something happened when polling states\n");
	    globus_duroc_control_job_cancel( &control, job_contact);
	    exit(1);
	}
	
	/* 
	 * TODO 1: remove subjob with label argv[2] before releasing barrier
	 */

	/* 
	 * TODO 2: add subjob with RSL argv[3] before releasing barrier
	 */

	/*
	 * Release the DUROC barrier when all subjobs are checked in
	 */
	globus_libc_printf("releasing barrier, awaiting all to be done\n");
	rc = globus_duroc_control_barrier_release( &control,
						   job_contact,
						   GLOBUS_TRUE );
	
	/*
	 * Wait for job completion
	 */
	n_done = 0;
	checked_in = 0;
	rc = GLOBUS_SUCCESS;
	while (rc==GLOBUS_SUCCESS && n_done < n_subjobs)
	{
	    rc = count_subjobs( &control, 
				job_contact,
				GLOBUS_TRUE,
				&checked_in,
				&n_done );
	    globus_libc_usleep(500000);

	}  /* while() */

	if (rc != GLOBUS_SUCCESS)
	{
	    globus_libc_printf("something happened when polling states\n");
	    globus_duroc_control_job_cancel( &control, job_contact);
	    exit(1);
	}

	globus_libc_free(job_contact);
    } /* duroc submission */

    /*
     * close the shop...
     */
    globus_libc_free(request_string);
    globus_module_deactivate_all();

    return 0;
}
/* main() */


/*
 * count_subjobs()
 *
 * retreives statistics on the state of the subjobs
 *
 * Parameters:
 * 
 *     control - the globus_duroc_control_t object to which 
 *               the job was submitted
 *
 *     job_contact - as returned by globus_duroc_job_request 
 *
 *     verbose - whether to print more information in this function
 *
 *     checked_in - variable used to return the number of jobs in the 
 *                  "checked in" state
 *
 *     done - variable used to return the number of jobs that are done
 *
 * Returns:
 *    
 *    GLOBUS_SUCCESS on success
 *
 *    -1 if any of the subjobs have failed   
 *
 */


int 
count_subjobs( 
    globus_duroc_control_t *  control,
    char *                    job_contact,
    globus_bool_t             verbose,
    int  *                    checked_in,
    int  *                    done)
{
    int         rc;
    int         n_subjobs;
    int *       subjob_states;
    char **     subjob_labels;
    int         i;

    /*
     * Get the subjob states, check if they failed, checked in or are done
     */
    rc = globus_duroc_control_subjob_states( control,
					     job_contact,
					     &n_subjobs,
					     &subjob_states,
					     &subjob_labels);
    if (rc != GLOBUS_SUCCESS)
	return rc;

    *checked_in = 0;
    *done = 0;
    for (i=0; i<n_subjobs; i++)
    {
	if (subjob_states[i]==GLOBUS_DUROC_SUBJOB_STATE_FAILED)
	{
	    fprintf( stderr,
		     "subjob %s failed\n",
		     (subjob_labels[i]) ? (subjob_labels[i]) : "NULL" );
	    return -1;
	}

	if (subjob_states[i]==GLOBUS_DUROC_SUBJOB_STATE_CHECKED_IN)
	    *checked_in += 1;

	if (subjob_states[i]==GLOBUS_DUROC_SUBJOB_STATE_DONE)
	    *done += 1;

	if (subjob_labels[i])
	    globus_libc_free(subjob_labels[i]);
    }

    if (verbose)
	globus_libc_printf("got info about %d subjobs: checked_in = %d " \
			   "  done = %d\n", n_subjobs, *checked_in, *done);

    globus_libc_free(subjob_labels);
    globus_libc_free(subjob_states);
    return GLOBUS_SUCCESS;
}
/* count_subjobs() */


/*
 * read_rsl_from_file()
 *
 * reads a rsl string from a file
 *
 * Paramters:
 * 
 *     filename - a string containing the file name
 *
 * Returns:
 *
 *     A pointer to the read rsl string on success.
 *     Memory for this string is allocated in the function 
 *     and needs to be freed by the caller
 * 
 *     GLOBUS_NULL on failure
 */

char *  
read_rsl_from_file(
    char *  filename)
{
    char *  req;
    int     fd, i;
    char    c;
    globus_off_t   len = 0;
    
    fd = globus_libc_open(filename, O_RDONLY);
    if (fd < 0)
    {
	globus_libc_fprintf(stderr, "cannot open file %s!\n", filename);
	return GLOBUS_NULL;
    }
    
    /* calcualte length of string */

    len = globus_libc_lseek(fd, 0, SEEK_END);
    globus_libc_lseek(fd, 0, SEEK_SET);
    
    /* allocate memory for string */

    req = (char *) globus_libc_malloc (sizeof(char) * (len + 1));
    i=0;
    
    /* read string from file */

    while ( (i<=len)  && read(fd, &c, 1) > 0)
    {
	req[i++] = c;
    }
    req[i] = '\0';
    globus_libc_close(fd);
    
    return req;
}
/* read_rsl_from_file() */







