/*
 * test_harness.c
 *
 *	This is the main interface between the test and
 *	the rest of the harness.
 */

#include "globus_nexus.h"
#include <stdio.h>
#include <string.h>

#include "harness_options.h"

#include "sp_list.h"
#include "ep_list.h"
#include "sp_group.h"
#include "startup.h"
#include "barrier.h"
#include "harness_endpoint.h"

#include "test_harness.h"

/* This is guaranteed to be called exactly once on each node,
 * with r being the rank of the context.  Printing a unique
 * line associates the globus_libc_printf leader with the harness
 * rank, making it easy for a shell script to sort the
 * output.  YMMV.
 */
#define PrintRank(r)	globus_libc_printf("sort_rank_pid <<%i>>\n", r)


/* Usage string.
 *	Run one of the tests with `--help'
 *	option to see it printed out nicely.
 *	The options also show up in the comments in
 *	this file where they are used.
 */
static char harness_usage_string[] =
	"    --help\t\t\tshow this message\n"
	"    -debug \t\t\tset output level, 0=quiet, 3=debug\n"
	"           [local=n]\t\tfor local nodes (1)\n"
	"           [global=n]\t\tfor all nodes\n"
	"           [test=n]\t\tfor the test\n"
	"    -startup listen <n>\t\twait for <n> attaches\n"
	"             attach <url>\tattach to <url>\n"
	"    -selftest ???\t\tharness diagnostics\n"
	"    -ep <options>\t\ttest endpoint options\n"
	"    -run <options>\t\ttest options\n"
	"    -end\t\t\tignore remaining args\n";


/* Argument Lists.
 *	arg_list_t is a struct with argc and argv.
 *	The command line gets parsed and then any options
 *	(minus the `-xxx' tag) will be saved here.
 *	test_al is an array, one arg_list per `-run'
 *	given on the command line.
 */
static arg_list_t	debug_al;	/* `-debug'	*/
static arg_list_t	startup_al;	/* `-startup'	*/
static arg_list_t	selftest_al;	/* `-selftest'	*/
static arg_list_t	testep_al;	/* `-ep'	*/
static arg_list_t	*test_al;	/* `-run'	*/
static int		n_tests;	/* # of -run's	*/


/* Options after being parsed by the test.
 *	Array of pointers to strings.  The strings are
 *	created (globus_malloc'd) by test_parse_options() on
 *	the master node, and later free'd by the harness.
 *	[0]	= `-ep' option string.
 *	[1..n]	= `-run' option strings.
 */
static char		**parsed_options;


/* Debug levels.  Given on the command line after `-debug'.
 *	local = the one used by the harness, set by nodes
 *		connected by gram_myjob.
 *	global = a temp variable, the master will get it
 *		from the command line and give it to all
 *		slaves from listen/attach, where it will
 *		overwrite local_dbg_lvl.
 *	test = from the master's command line, sent to all
 *		slaves.
 */
static int		local_dbg_lvl;	/* `local='	*/
static int		global_dbg_lvl;	/* `global='	*/
static int		test_dbg_lvl;	/* `test='	*/


/* This is the error code returned by main().
 *	Idea here is that an automated script needs
 *	to know about errors that occurred on contexts
 *	other than the one it started.
 */
static int		error_code;


/* Next test to run.  Bit of a hack between main() and
 * do_next_test().
 */
static int		next_test;


/* Options String from master to slaves, sent by
 *	send_option_string().  This is just a pointer to
 *	another string on the master.  On slaves, it is
 *	free'd if not NULL, and then a new buffer is
 *	malloc'd each time a string is sent.
 */
static char		*option_string;


int
harness_debug_level()
/*
 * This is declared in harness_options.h, and is used
 * internally be the harness debug macros.
 */
{
    return (local_dbg_lvl);
}


void
locally_set_debug_level(int harness, int test)
/*
 * Adjust the debug levels, but only to raise them
 * (produce more output).  Debug levels are passed from
 * masters to slaves, in startup.c, using RSRs.
 */
{
    DebugTst(globus_libc_printf("harness_set_debug_level(): h=%i t=%i\n",
			  harness, test));
    if (harness > local_dbg_lvl)
	local_dbg_lvl = harness;
    if (test > test_dbg_lvl)
	test_dbg_lvl = test;
}


static void
locally_set_error(int error)
/*
 * Adjust the error code.  Lowest non-zero code is
 * considered a bigger error, so keep it.
 */
{
    if ((error > 0) && (error < error_code))
	error_code = error;
}


void
harness_set_error(int error)
/*
 * Broadcast our error to all contexts.
 *	XXX -- or just to master??
 */
{
    locally_set_error(error);
    /* XXX -- send out messages */
}


/* Prototypes of subroutines called by main().
 * Note these are all in this file.
 */
static int do_startup(int *argc, char ***argv);
static int do_cmd_line();
static int do_endpoint_options();
static int do_next_test();
static int do_shutdown();


/*
 * main()
 */
int
main(int argc,
     char **argv)
{
    int		r;

    HelloTst(fprintf(stderr, "main(): hello\n"));

    /* Initialize variables.
     */
    parsed_options = (char **) NULL;
    local_dbg_lvl = 0;
    global_dbg_lvl = 0;
    test_dbg_lvl = -1;
    error_code = 0;
    next_test = 1;
    option_string = (char *) NULL;

    /* Parse the command line.  Returns -1 iff `--help'
     * was given, 0 if success, otherwise the index
     * of the bad argument.
     */
    r = split_cmd_line(argc, argv,
		       &debug_al,
		       &startup_al,
		       &selftest_al,
		       &testep_al,
		       &n_tests,
		       &test_al);
    if (r != 0)
    {
	/* Print error (if any), usage, and then abort.
	 */
	if (r > 0)
	    printf("command-line error at %i, '%s'\n", r, argv[r]);
	printf("-----------------------------------------\n"
	       "usage: %s [--help] <options>\n"
	       "  harness options:\n"
	       "%s"
	       "  ---------------------------------------\n"
	       "  test specific options:\n"
	       "  ---------------------------------------\n",
	       argv[0],
	       harness_usage_string);
	test_usage_string();
	printf("-----------------------------------------\n");
	return (1);
    }
    else
    {
	DebugTst(printf("command-line OK\n"));
    }

    /* Call startup_init().
     *	- starts nexus
     *	- internal initialization in startup.c
     *	- does startup for ep_list, sp_group, barrier
     *	- creates the harness endpoint
     */
    if (startup_init(&argc, &argv) != 0)
    {
	fprintf(stderr, "harness_main(): startup_init() failed\n");
	return (1);
    }
    else
    {
	DebugTst(globus_libc_printf("hello, nexus\n"));
    }

    /* -- nexus is started -- */

    /* Call do_startup(), which will do local startup
     * with gram_myjob, then (optionally) listen/attach.
     * All nodes will be left with startpoints to all
     * harness endpoints.
     */
    if (do_startup(&argc, &argv) != 0)
    {
	globus_libc_printf("harness_main(): startup failed\n");
	globus_module_deactivate(GLOBUS_NEXUS_MODULE);
	return (1);
    }
    TraceTst(globus_libc_printf("do_startup() done\n"));

    /* Set up the barrier for use by the harness.
     * Take advantage of the synchronization done by
     * barrier_create() since creating the harness
     * sp_group did not block.
     */
    if (barrier_create(HARNESS_GROUP_ID, (int *) NULL)
	!= HARNESS_BARRIER_ID)
    {
	globus_libc_printf("barrier_create failed for harness\n");
	return (1);
    }
    else
    {
	TraceTst(globus_libc_printf("harness barrier OK\n"));
    }

    /* The master will give the command line to the test
     * for parsing.  Also do some tests if `-selftest'
     * was given on the command line.
     */
    if (do_cmd_line() != 0)
    {
	globus_libc_printf("harness_main(): parse cmd line failed\n");
	globus_module_deactivate(GLOBUS_NEXUS_MODULE);
	return (1);
    }
    else
    {
	TraceTst(globus_libc_printf("harness_main(): "
			      "parse command line OK\n"));
    }

    /* Pass the `-ep' options to all tests, and create
     * the test endpoints.  Then swap startpoints (if the
     * test returned an endpoint) and create a barrier for
     * the test to use.
     */
    if (do_endpoint_options() == 0)
    {
	TraceTst(globus_libc_printf("harness_main(): "
			      "endpoint options successful\n"));
    }
    else
    {
	globus_libc_printf("harness_main(): error in test EP\n");
	return (1);
    }

    /* Loop through all the `-run' tests.  
     */
    while (do_next_test() == 0)
    {
	HelloTst(globus_libc_printf("harness_main(): "
			      "test completed successfully\n"));
    }

    /* Cleanup.
     */
    if (sp_group_rank(HARNESS_GROUP_ID) == 0)
    {
	/* Master.	 */
	for (n_tests--; n_tests >= 0; n_tests--)
	    globus_free(parsed_options[n_tests]);
	globus_free(parsed_options);
    }
    else
    {
	/* Slave.	*/
	globus_free(option_string);
    }

    /* Delete all barriers, sp_groups, endpoints, etc.
     * and shutdown nexus.
     */
    if (do_shutdown() == 0)
    {
	TraceTst(globus_libc_printf("harness_main(): "
			      "shutdown completed successfully\n"));
	/* XXX -- get rid of ArgLists
	 */
	return (error_code);
    }
    else
    {
	globus_libc_printf("error in shutdown\n");
	return (1);
    }
}


/*
 *
 * main sub-functions
 *
 */


static int
do_startup(int *argc, char ***argv)
/*
 * Using functions in startup.c, startpoints from the
 * slaves are given to the master, first using gram_myjob
 * and then listen/attach.  The master sends out its
 * startpoint and assigns ranks, then the harness
 * sp_group is created to finish distributing SPs.
 */
{
    sp_list_t	sp_list;
    int		n_attaches;
    char	*s;

    /* Look for debug levels given on command line.
     * Abort if there is a parse error, since
     * defaulting to a low level could waste
     * the run and using a high level would
     * obscure the typo.
     */
    s = arg_list_find(&debug_al, "local", "1");
    if (sscanf(s, "%i", &local_dbg_lvl) != 1)
    {
	globus_libc_printf("harness_main(startup): bad -debug option "
		     "local='%s'\n", s);
	harness_abort();
    }
    s = arg_list_find(&debug_al, "global", "0");
    if (sscanf(s, "%i", &global_dbg_lvl) != 1)
    {
	globus_libc_printf("harness_main(startup): bad -debug option "
		     "global='%s'\n", s);
	harness_abort();
    }
    if (global_dbg_lvl > local_dbg_lvl)
	local_dbg_lvl = global_dbg_lvl;
    s = arg_list_find(&debug_al, "test", "-1");
    if (sscanf(s, "%i", &test_dbg_lvl) != 1)
    {
	globus_libc_printf("harness_main(startup): bad -debug option "
		     "test='%s'\n", s);
	harness_abort();
    }

    /* The variable sp_list is used to keep track
     * of all startpoints from slaves.  If our list
     * becomes empty, we know that we have become
     * a slave.
     */
    if (sp_list_init(&sp_list) != 0)
    {
	globus_libc_printf("harness_main(startup): sp_list_init failed\n");
	return (1);
    }

    /* startup_local() uses gram_myjob to connect
     * any other contexts started with this one.
     */
    if (startup_local(argc, argv,
		      &sp_list,
		      local_dbg_lvl,
		      test_dbg_lvl) != 0)
    {
	globus_libc_printf("harness_main(startup): "
		     "startup_local() failed\n");
	sp_list_destroy_sps(&sp_list, 0,
			    sp_list_get_size(&sp_list));
	return (1);
    }

    if (sp_list_get_size(&sp_list) == 0)
    {
	/* Slave already.  Wait for message
	 * from the master.
	 */
	startup_done((sp_list_t *) NULL);
    }
    else
    {
	/* Local master.  Check to see if we
	 * do a listen or attach.
	 */
	if (startup_al.argc > 0)
	{
	    if (startup_al.argc != 2)
	    {
		/* error */
		globus_libc_printf("bad -startup options\n");
		sp_list_done(&sp_list);
		return (1);
	    }
	    if (strcmp(startup_al.argv[0], "listen") == 0)
	    {
		if (sscanf(startup_al.argv[1], "%i", &n_attaches)
		    != 1)
		{
		    /* error */
		    globus_libc_printf("bad -startup listen <n>\n");
		    sp_list_done(&sp_list);
		    return (1);
		}

		/*
		 * Listen for <n> attaches.
		 */
		if (startup_listen(n_attaches, &sp_list,
				   global_dbg_lvl, test_dbg_lvl)
		    != 0)
		{
		    /* error */
		    globus_libc_printf("startup_listen failed\n");
		    sp_list_done(&sp_list);
		    return (1);
		}

		/* Give the list of slave SPs to startup_done(),
		 * which will assign ranks and pass out copies
		 * of the master SP.
		 */
		startup_done(&sp_list);
	    }
	    else if (strcmp(startup_al.argv[0], "attach") == 0)
	    {
		/*
		 * Attach to <url>.
		 */
		if (startup_attach(startup_al.argv[1], &sp_list)
		    != 0)
		{
		    /* error */
		    globus_libc_printf("startup_attach() failed\n");
		    sp_list_done(&sp_list);
		    return (1);
		}

		/* Now a slave.  Wait for message from
		 * master.
		 */
		startup_done((sp_list_t *) NULL);
	    }
	    else
	    {
		/* error */
		globus_libc_printf("bad -startup options\n");
		return (1);
	    }
	}
	else			/* assume myjob startup */
	{
	    startup_done(&sp_list);
	}
    }

    /* All done with the sp_list.
     */
    if (sp_list_done(&sp_list) != 0)
	globus_libc_printf("harness_main(startup): "
		     "warning: sp_list_done() failed\n");

    /* Create the harness group.  Start with master having
     * SPs to all slaves, and all slaves having SP to
     * master, end up with all-to-all connections.
     */
    TraceTst(globus_libc_printf("harness_main(startup): "
			  "creating harness group\n"));
    if (sp_group_create(0, HARNESS_ENDPOINT_ID, (int *) NULL)
	!= HARNESS_GROUP_ID)
    {
	globus_libc_printf("group_create failed for harness group\n");
	return (1);
    }

    /* We now have a unique rank.
     * Print it out for reference.
     */
    HelloTst(globus_libc_printf("successful startup: rank=%i of %i\n",
			  sp_group_rank(HARNESS_GROUP_ID),
			  sp_group_size(HARNESS_GROUP_ID)));
    PrintRank(sp_group_rank(HARNESS_GROUP_ID));

    /* Give the test a chance to do any initialization,
     * although the harness is not usable yet.
     */
    if (test_startup(test_dbg_lvl) != 0)
    {
	globus_libc_printf("harness_main(startup): test_startup() failed\n");
	return (1);
    }
    return (0);
}


static void
send_option_string()
/* Master should have set option_string to whatever string
 * it wants sent.  The RSR will fill it in.  End with
 * a barrier block for synchronization.
 */
{
    globus_nexus_buffer_t	buffer;
    int				i, s;

    if (sp_group_rank(HARNESS_GROUP_ID) == 0)
    {
	/* Master packs the string up and sends an RSR
	 * to each slave, then blocks.
	 */
	s = strlen(option_string) + 1;
	for (i = 1; i < sp_group_size(HARNESS_GROUP_ID); i++)
	{
	    if (globus_nexus_buffer_init(&buffer,
					 globus_nexus_sizeof_char(s) +
					 globus_nexus_sizeof_int(1),
					 0) != NEXUS_SUCCESS)
	    {
		globus_libc_printf("send_options(): buffer_init failed\n");
		harness_abort();
	    }
	    globus_nexus_put_int(&buffer, &s, 1);
	    globus_nexus_put_char(&buffer, option_string, s);
	    if (globus_nexus_send_rsr(&buffer,
				      sp_group_sp(HARNESS_GROUP_ID, i),
				      OPTION_STRING_HANDLER_ID,
				      GLOBUS_TRUE, GLOBUS_FALSE)
		!= GLOBUS_SUCCESS)
	    {
		globus_libc_printf("send_options(): send_rsr failed\n");
		harness_abort();
	    }
	    TraceTst(globus_libc_printf("send_option_string(): sent string"
				  "'%s'\n", option_string));
	}
	barrier_block(HARNESS_BARRIER_ID);
	TraceTst(globus_libc_printf("send_option_string(): send OK\n"));
    }
    else
    {
	/* Slaves sit and wait.
	 */
	barrier_block(HARNESS_BARRIER_ID);
	TraceTst(globus_libc_printf("send_option_string(): recv OK\n"));
    }
}


static int
do_cmd_line()
/* Only the master really does anything here.  It checks
 * the arg lists, and passes them to the test for
 * parsing.
 */
{
    int		i, n, t, r;

    /* Check if we want to do a self test (which currently
     * consists of pingpong).
     */
    if (sp_group_rank(HARNESS_GROUP_ID) == 0) {
	if (selftest_al.argc == -1)
	    option_string = "no_self_test";
	else
	    option_string = "pingpong trips=2 rounds=4";
    }
    send_option_string();
    if (sscanf(option_string,
	       "pingpong trips=%i rounds=%i",
	       &t, &r) == 2)
	harness_ping_test(t, r);

    /* The master node gives the `-ep' and `-run' arg lists
     * to the test to be converted to strings.
     */
    if (sp_group_rank(HARNESS_GROUP_ID) == 0) {
	n = n_tests + 1;
	parsed_options = (char **) globus_malloc(n * sizeof(char *));
	if (parsed_options == NULL) {
	    globus_libc_printf("malloc failed for parsed options\n");
	    harness_abort();
	}
	for (i = 0; i < n; i++)
	    parsed_options[i] = NULL;
	if (test_parse_options(&testep_al,
			       n_tests,
			       test_al,
			       parsed_options) != 0) {
	    globus_libc_printf("test_parse_options() returned error\n");
	    harness_abort();
	}
    }
    return (0);
}


static int
do_endpoint_options()
/* Create the test endpoint.
 * Create the test sp_group (if got an endpoint).
 * Create the test barrier.
 */
{
    globus_nexus_endpoint_t	*ep;
    void		(*cleanup_func)(globus_nexus_endpoint_t *);
    globus_bool_t	free_ep;

    /* Master broadcasts the `-ep' options.
     */
    if (sp_group_rank(HARNESS_GROUP_ID) == 0)
	option_string = parsed_options[0];
    send_option_string();

    /* Create the test endpoint.
     */
    if (test_create_endpoint(option_string,
			     &ep, &cleanup_func,
			     &free_ep) != 0)
    {
	globus_libc_printf("test_create_endpoint failed\n");
	harness_abort();
    }

    /* XXX
     * -- Check for and allow (ep == NULL)
     */

    /* Add the endpoint to the ep_list.
     */
    if (ep_list_new_ep(ep, free_ep, cleanup_func)
	!= A_TEST_ENDPOINT_ID)
    {
	globus_libc_printf("failed to add test ep\n");
	harness_abort();
    }
    barrier_block(HARNESS_BARRIER_ID);

    /* Exchange test SPs.
     */
    if (sp_group_create(0, A_TEST_ENDPOINT_ID, NULL)
	!= A_TEST_GROUP_ID)
    {
	globus_libc_printf("failed to create test sp_group\n");
	harness_abort();
    }

    /* Create a barrier for the test.
     */
    if (barrier_create(A_TEST_GROUP_ID, NULL)
	!= A_TEST_BARRIER_ID)
    {
	globus_libc_printf("failed to create test barrier\n");
	harness_abort();
    }
    return (0);
}


static int
do_next_test()
/* Do a single test with next `-run' options.
 */
{
    void	*test_arg;
#define FINISHED_MESSAGE "all_done_with_tests"

    /* Master sends options to others.
     */
    if (sp_group_rank(HARNESS_GROUP_ID) == 0)
    {
	if (next_test <= n_tests)
	{
	    option_string = parsed_options[next_test];
	    next_test++;
	}
	else
	    option_string = FINISHED_MESSAGE;
    }
    send_option_string();

    /* Check no more tests.
     */
    if (strcmp(option_string, FINISHED_MESSAGE) == 0)
	return (1);

    /* Call the test hooks, synching after each step.
     */
    test_arg = test_init(option_string,
			 sp_group_size(A_TEST_GROUP_ID),
			 sp_group_rank(A_TEST_GROUP_ID),
			 sp_group_list(A_TEST_GROUP_ID));
    TraceTst(globus_libc_printf("test_init done\n"));
    barrier_block(HARNESS_BARRIER_ID);
    TraceTst(globus_libc_printf("test_init OK\n"));
    test_run(test_arg);
    TraceTst(globus_libc_printf("test_run done\n"));
    barrier_block(HARNESS_BARRIER_ID);
    test_done(test_arg);
    TraceTst(globus_libc_printf("test_done done\n"));
    barrier_block(HARNESS_BARRIER_ID);

    return (0);
}


static int
do_shutdown()
/* Cleanup after test and harness.
 */
{
    int		i;

    /* Test cleanup and synch.
     */
    test_cleanup();
    TraceTst(globus_libc_printf("do_shutdown(): last synch\n"));
    barrier_block(HARNESS_BARRIER_ID);

    /* Cleanup harness barrier, sp_groups, and ep_list.
     */
    TraceTst(globus_libc_printf("do_shutdown(): continuing\n"));
    for (i = 0; i < MAX_N_BARRIERS; i++)
	barrier_delete(i);
    for (i = 0; i < MAX_N_GROUPS; i++)
	sp_group_delete(i);
    for (i = 0; i < MAX_N_ENDPOINTS; i++)
	ep_list_destroy_ep(i);

    /* XXX -- shutdown nexus!
     */
    globus_module_deactivate(GLOBUS_NEXUS_MODULE);
    return (0);
}


/*
 * Barrier stuff.
 *	Wrappers between the test and the barrier
 *	to make sure the test doesn't do anything
 *	bad.
 */


int
harness_barrier_n(int n)
{
    if (n != A_TEST_BARRIER_ID)
    {
	globus_libc_printf("harness_barrier_n(%i): bad val\n", n);
	harness_abort();
    }
    return (barrier_block(A_TEST_BARRIER_ID));
}


void
harness_barrier_passive_n(int n)
{
    if (n != A_TEST_BARRIER_ID)
    {
	globus_libc_printf("harness_barrier_n(%i): bad val\n", n);
	harness_abort();
    }
    barrier_passive(A_TEST_BARRIER_ID);
}

void
harness_barrier_set_udelay_n(int n, int udelay)
{
    if (n != A_TEST_BARRIER_ID)
    {
	globus_libc_printf("harness_barrier_set_udelay_n(%i): bad val\n", n);
	harness_abort();
    }
    barrier_set_udelay(n, udelay);
}


/*
 * handlers
 */


void
option_string_handler(globus_nexus_endpoint_t *endpoint,
		      globus_nexus_buffer_t *buffer,
		      globus_bool_t is_non_threaded_handler)
/* Free the old string and malloc a new one.
 * This is used relatively infrequently, so
 * synchronization is done with the already
 * working barrier.
 */
{
    int		s_len;

    globus_nexus_get_int(buffer, &s_len, 1);
    if (option_string != NULL)
	globus_free(option_string);
    option_string = (char *) globus_malloc(s_len * sizeof(char));
    if (option_string == NULL)
    {
	globus_libc_printf("option_string_handler(): malloc failed\n");
	return;
    }
    globus_nexus_get_char(buffer, option_string, s_len);
    TraceTst(globus_libc_printf("option_string_handler(): got string '%s'\n",
			  option_string));
}


void
set_debug_level_handler(globus_nexus_endpoint_t *endpoint,
			globus_nexus_buffer_t *buffer,
			globus_bool_t is_non_threaded_handler)
/* Set harness and test debug levels, and
 * the error code.
 * XXX -- separate handler for error code.
 */
{
    int		harness, test, error;

    globus_nexus_get_int(buffer, &harness, 1);
    globus_nexus_get_int(buffer, &test, 1);
    globus_nexus_get_int(buffer, &error, 1);
    locally_set_debug_level(harness, test);
    locally_set_error(error);
}
