
/*
 *  nexus_send_rsr()
 *
 * Take 'in_buffer', coalesce it into a canonical form, add a header
 * to it, and then call the appropriate protocol module.
 *
 * The 'new_buffer' that is passed to the protocol module's send_rsr()
 * function has the following properties:
 *	new_buffer->iovec_formatted is set to NEXUS_TRUE if the contents
 *		of new_buffer->base_segments is a struct iovec
 *	new_buffer->base_segments
 *		Points to either:
 *		- A single segment with the entire base message.
 *		  That message is at:
 *			new_buffer->base_segments->current
 *		  with a size of:
 *			new_buffer->base_segments->size_used
 *		- A single segment containing a struct iovec.
 *		  That iovec is at:
 *			new_buffer->base_segments->current
 *		  with an iocnt of:
 *			new_buffer->base_segments->size_used
 *	new_buffer->current_base_segment = new_buffer->base_segments
 *	new_buffer->base_segments->current
 *		See above.
 *	new_buffer->base_segments->size_used
 *		See above.
 *	new_buffer->reserved_header_size
 *		Set to the number of bytes at the
 *		start of new_buffer->base_segments->storage,
 *		before the body of the message.  The header that is
 *		added to the front of the message body comes out of
 *		this space, though reserved_header_size continues
 *		to be the number of bytes reserved before the message
 *		body.
 *	new_buffer->direct_segments
 *		Points to a single segment which contains
 *		all of the custom direct puts, or NULL if there
 *		are no direct segments.
 *		If new_buffer->iovec_formatted is NEXUS_TRUE, then this
 *		field is always NULL.
 *	new_buffer->n_direct
 *		Set to the number of custom direct puts
 *		contained in new_buffer->direct_segments.
 *		If this value is 0, then send_rsr()
 *		need not wait for the data to be written to the network
 *		before returning.
 *	new_buffer->proto
 *		Set to the nexus_proto_t from the startpoint
 *		to which this buffer is being sent.
 *	new_buffer->using_barrier = NEXUS_FALSE
 *		If the protocol want nexus_send_rsr() to wait for
 *		completion of the sent, it should change this
 *		value to NEXUS_TRUE, initialize new_buffer->barrier,
 *		and signal the barrier when the send has completed.
 *		Otherwise, the protocol module just call
 *		nexus_buffer_destroy() on the buffer after the
 *		send has completed.
 *
 * Input values are:
 *	in_buffer:
 *		The buffer to be sent.
 *	startpoint:
 *		The startpoint to which this message is being sent.
 *	handler_id:
 *		The handler_id to which this message is	being sent.
 *	destroy_buffer:
 *		If this is NEXUS_TRUE, then 'buffer' will be destroyed
 *		immediately after this send.  In that case, new_buffer
 *		can simply be set to point to in_buffer, rather than
 *		allocating a whole new buffer.
 *	called_from_non_threaded_handler:
 *		Is this call being made from a non-threaded handler?
 *		If so, then this function will copy all data so that
 *		there are no direct componenents.  This allows the
 *		send_rsr() to complete immediately.
 *
 * The message layout is standardized for use by all protocol modules
 * as follows:
 *
 *   Header:
 *       senders_data_format         1 byte u_int
 *                                     (<128 for format,
 *                                      >=128 for signaling)
 *       full_message_size           1 u_long
 *       liba_size                   1 byte u_int
 *       liba[0..liba_size]          byte array of size liba_size
 *   Transform Info:
 *       transform_info              (optional, size depends on transform)
 *   Message Info:
 *       flags                       1 byte
 *           has_direct_info            :1
 *           has_other_info             :1
 *           pad_size                   :3
 *       pad[0..pad_size]            (pad Header + Message Info size
 *                                    to NEXUS_ALIGN_MESSAGE_SIZE byte
 *                                    multiple)
 *       handler_id                  1 int
 *       if (has_direct_info)
 *           direct_info_offset      1 u_long
 *       endif
 *       if (has_other_info)
 *           other_info_offset       1 u_long
 *       endif
 *   Message Body:
 *       message_body
 *   Other Info:
 *       if (has_other_info)
 *           loop
 *               info_tag (DEBUG_TAG, PROFILE_TAG)
 *               info_size (not including tag and size header)
 *               info
 *               (examples:
 *                   if (info_tag==PROFILE_TAG)
 *                       info_size=2*sizeof(int)
 *                       info=
 *                           node_id
 *                           context_id
 *                   endif
 *                   if (info_tag==DEBUG_TAG)
 *                       info_size=???
 *                       info=
 *                           n_debug_tags
 *                       for (0..n_debug_tags)
 *                           size
 *                           datatype
 *                       endfor
 *                   endif
 *           endloop
 *       endif (has_other_info)
 *   Direct Info:
 *       if (has_direct_info)
 *           n_direct_components                1 u_int
 *           protocol_specific_info_size        1 u_int
 *           protocol_specific_info[0..protocol_specific_info_size]
 *           for (0..n_direct_components)
 *               component_size                 1 u_long
 *               component_datatype             1 byte u_int
 *               component_approach             1 byte u_int
 *               switch(component_approach)
 *                 case INLINE==0:
 *                   component_offset           1 u_long
 *                 case POINTER==1:
 *                   component_pointer          1 u_long
 *                 default:
 *                   protocol_specific_info     1 u_long
 *           endfor
 *       endif
 *
 * Comments about the message format:
 *   - Each *_offset value is a byte offset from the beginning of
 *     the message body.
 *   - A message contains five parts:
 *         header, message info, message body, other info, direct info.
 *     Only the header and message info are manditory
 *   - Everything except the message header may be
 *     transformed (i.e. encrypted, compressed, etc).
 *     Therefore, the header must contain enough information
 *     to allow a receiving context to transform the message, namely:
 *         liba: This is needed to allow the receiving context
 *           to find the endpoint to which this message is directed.
 *           The endpoint contains the transform operation that
 *           needs to be applied to the message, plus any state
 *           required to perform the transform (i.e. a key to decrypt).
 *         transform_info: If the transform operation requires
 *           information from the sending context, then this
 *           information is included in the message header in
 *           the transform_info. For example, an MD5 hash value
 *           may be included here.
 *   - It is important that the first byte of the message be
 *     overloadable by the protocol module so that the protocol
 *     module can use it for various signaling needs.
 *     For example, the TCP protocol module needs to send a one
 *     byte message down a socket before closing it so that the
 *     receiving end can tell that the socket was purposely closed
 *     and not the result of a process death. The MP protocol module
 *     can use this byte to flag a message that is bigger than the
 *     default buffer size. Since the senders_data_format does
 *     not require 256 values, we can steal all values >=128 for
 *     these signaling purposes.
 *   - On many machines it is desirable to keep the message body
 *     aligned on a word boundary. In order to do this, the size
 *     of the header plus message info is always padded out to
 *     an 8 byte boundary (NEXUS_ALIGN_MESSAGE_SIZE).
 *     Therefore, by aligning the whole
 *     message buffer, the message body will be guaranteed to be
 *     aligned. The size of padding needed to align the message
 *     body is contained in the flags.pad_size field.
 *   - All of the other information (debug tags, profile info, etc)
 *     is all kept at the end of the message, and is marked by a
 *     single bit in the message header. This allows the optimized
 *     version to deal with (i.e. ignore) these various other pieces
 *     of information with a single check to see if it needs to
 *     skip the other_info_offset field in the header. But it
 *     allows for an extensible set of additional information
 *     to be added to a message.
 *   - The direct information is protocol specific. For example,
 *     the MP protocol module will say how many subsequent messages
 *     there are and their sizes, whereas the shared memory protocol
 *     module may just keep a list of sizes and addresses that it
 *     can grab the data from.
 *   - In the fully optimized case (32-bit cpu, no profile, no debug,
 *     no direct components, no transform, no security), a message
 *     would have a 16 byte header followed by the message body:
 *         byte  1      senders_data_format
 *         bytes 2-5    full_message_size
 *         byte  6      flags (has_direct_info=0, has_other_info=0, pad_size=0)
 *         byte  7      liba_size
 *         bytes 8-12   liba
 *         bytes 13-16  handler_id
 *         bytes 17-N   message_body
 */
int nexus_send_rsr(nexus_buffer_t *in_buffer,
		   nexus_startpoint_t *startpoint,
		   int handler_id,
		   nexus_bool_t destroy_buffer,
		   nexus_bool_t called_from_non_threaded_handler)
{
    unsigned long total_size;
    unsigned long total_inline_sizes;
    unsigned long total_custom_puts;
    nexus_bool_t copy_inlines;
    nexus_bool_t copy_pointer;
    nexus_bool_t copy_custom;
    unsigned long required_header_size;
    unsigned long direct_info_size;
    unsigned long required_body_size;
    int iovec_count;
    int i, j;
    unsigned long data_size;
    nexus_byte_t *start_data;
    nexus_byte_t *cur_location;
    unsigned long cur_custom_put;
    nexus_base_segment_t *base_segment;
    nexus_direct_segment_t *direct_segment;
    nexus_byte_t tmp_byte;
    nexus_byte_t *cur_header;
    unsigned long total_message_size;
    nexus_byte_t pad_size;
    nexus_byte_t flags;
    unsigned long direct_info_offset;
    nexus_byte_t *start_transform;
    unsigned long save_header_size;
    unsigned long out_size;
    unsigned long out_header_size;
    unsigned long out_transform_info_size;
    unsigned long out_data_size;
    unsigned long out_untransform_size;
    nexus_bool_t out_must_be_freed;

    required_header_size
	= (nexus_transform_header_size(startpoint->trasform_id)
	   + startpoint->liba_size
	   + nexus_dc_sizeof_byte(1) * 3
	   + nexus_dc_sizeof_u_long(1)
	   + nexus_dc_sizeof_int(1)
	   + (total_direct_puts>0?nexus_dc_sizeof_u_long(1):0)
	   + (has_other_info?nexus_dc_sizeof_u_long(1):0));
    if (required_header_size % 8)
    {
        pad_size = 8 - (required_header_size % 8);
	required_header_size += pad_size;
    }
    else
    {
        pad_size = 0;
    }

    if (total_direct_puts > 0)
    {
	if (   !called_from_non_threaded_handler
	    && can_use_writev
	    && startpoint->transform_id == NEXUS_TRANSFORM_NONE)
	{
	    copy_inlines = NEXUS_FALSE;
	}
	else
	{
	    copy_inlines = NEXUS_TRUE;
	}

	if (   called_from_non_threaded_handler
	    || nexus_transform_modifies_data(startpoint->transform_id))
	{
	    copy_pointer = NEXUS_TRUE;
	    copy_custom = NEXUS_TRUE;
	}
	else
	{
	    copy_pointer = NEXUS_FALSE;
	    copy_custom = NEXUS_FALSE;
	}

	/*
	 * direct_info contains:
	 *
	 * u_int:          n_direct_components, proto_info_size
	 * byte_stream:    proto_info
	 * for (0..n_direct_components)
	 *   u_long:       size, {location,pointer,custom_info}
	 *   byte:         datatype, approach
	 * endfor
	 */
	direct_info_size = (total_direct_puts * (nexus_dc_sizeof_u_long(1) * 2
						 + nexus_dc_sizeof_byte(1) * 2)
			    + nexus_dc_sizeof_u_int(1) * 2
			    + (*buffer)->proto->funcs->direct_info_size());
    }
    else
    {
	direct_info_size = 0;
    }
    
    required_body_size = (total_size
			  + direct_info_size
			  - (*buffer)->reserved_header_size);

    if (   called_from_non_threaded_handler
	|| !can_use_writev)
    {
        (*c_buffer)->use_writev = NEXUS_FALSE;
    }
    else
    {
        (*c_buffer)->use_writev = NEXUS_TRUE;
    }

    total_inline_sizes = 0;
    total_custom_puts = 0;
    iovec_count = 1;
    for (direct_segment = (*buffer)->direct_segments;
	 direct_segment;
	 direct_segment = direct_segment->next)
    {
        for (i = 0; i < direct_segment->size; i++)
	{
	    switch (direct_segment->storage[i].action)
	    {
	      case NEXUS_DIRECT_INFO_ACTION_INLINE:
		if (copy_inlines)
		{
		    total_inline_sizes += direct_segment->storage[i].size;
		}
		else
		{
		    iovec_count++;
		}
		break;
	      case NEXUS_DIRECT_INFO_ACTION_POINTER:
		if (copy_pointer)
		{
		    total_inline_sizes += direct_segment->storage[i].size;
		}
		break;
	      case NEXUS_DIRECT_INFO_ACTION_CUSTOM:
		if (copy_custom)
		{
		    total_inline_sizes += direct_segment->storage[i].size;
		}
		else
		{
		    iovec_count++;
		    total_custom_puts++;
		}
		break;
	      default: /* error */
		break;
	    }
	}
    }
    required_body_size += total_inline_sizes;

    data_size = required_header_size + required_body_size;
    if (   destroy_buffer
	&& required_header_size < (*buffer)->reserved_header_size
	&& (*buffer)->base_segments->size > required_body_size + (*buffer)->reserved_header_size)
    {
        /* use first segment as coalesce buffer */
        start_data = ((*buffer)->base_segments->storage
		      + (*buffer)->reserved_header_size
		      - required_header_size);
    }
    else
    {
        /* get new coalesce buffer */
        NexusMalloc(nexus_send_rsr(),
		    start_data,
		    nexus_byte_t *,
		    data_size);
	cur_location = start_data + required_header_size;
	for (base_segment = (*buffer)->base_segments;
	     base_segment;
	     base_segment = base_segment->next)
	{
	    memcpy(cur_location, base_segment->storage, base_segment->size);
	    cur_location += base_segment->size;
	}
    }
    /*
     assert(we have a contiguous buffer big enough to hold the base message)
     assert(there is room for the header)
     assert(all non-direct info is in buffer)
     */
    
    if (!(*c_buffer)->use_writev)
    {
	(*c_buffer)->u.data.data = start_data;
	(*c_buffer)->u.data.data_size = data_size;
	NexusMalloc(nexus_send_rsr(),
		    (*c_buffer)->u.data.custom_direct_puts,
		    nexus_direct_info_t *,
		    sizeof(nexus_direct_info_t) * total_custom_puts);
	(*c_buffer)->u.data.num_custom_direct_puts = total_custom_puts;
	
	cur_location = start_data + data_size;
	cur_custom_put = 0;
	direct_segment = (*buffer)->direct_segments;
	for (i = 0;
	     i < total_direct_puts;
	     direct_segment = direct_segment->next)
	{
	    for (j = 0; j < direct_segment->size; i++, j++)
	    {
		switch(direct_segment->storage[j].action)
		{
		  case NEXUS_DIRECT_INFO_ACTION_INLINE:
		    /* assert(copy_inlines == NEXUS_TRUE) */
		    memcpy(cur_location,
			    direct_segment->storage[j].data,
			    direct_segment->storage[j].size);
		    cur_location += direct_segment->storage[j].size;
		    break;
		  case NEXUS_DIRECT_INFO_ACTION_POINTER:
		    if (copy_pointer)
		    {
			memcpy(cur_location,
			       direct_segment->storage[j].data,
			       direct_segment->storage[j].size);
			direct_segment->storage[j].data = cur_location;
			cur_location += direct_segment->storage[j].size;
		    }
		    else
		    {
		    }
		    break;
		  case NEXUS_DIRECT_INFO_ACTION_CUSTOM:
		    if (copy_custom)
		    {
			memcpy(cur_location,
			       direct_segment->storage[j].data,
			       direct_segment->storage[j].size);
			cur_location += direct_segment->storage[j].size;
		    }
		    else
		    {
		        memcpy((*c_buffer)->u.data.custom_direct_puts[cur_custom_put++].data,
			       direct_segment->storage[j].data,
			       sizeof(nexus_direct_info_t));
		    }
		    break;
		  default: /* error */
		    break;
		}
	    }
	}
		      
	(*c_buffer)->u.data.num_custom_direct_puts = total_custom_puts;
    }
    else
    {
        /* allocate iovec for writev */
        (*c_buffer)->u.writev.iovec_count = iovec_count;
	NexusMalloc(nexus_send_rsr(),
		    (*c_buffer)->u.writev.iovec,
		    struct iovec *,
		    sizeof(struct iovec) * (*c_buffer)->u.writev.iovec_count);
	/* fill in iovec */
	(*c_buffer)->u.writev.iovec[0].iov_base = (caddr_t)start_data;
	(*c_buffer)->u.writev.iovec[0].iov_len = data_size;
	
	direct_segment = (*buffer)->direct_segments;
	for (i = 0;
	     i < (*c_buffer)->u.writev.iovec_count;
	     direct_segment = direct_segment->next)
	{
	    for (j = 0; j < direct_segment->size; j++)
	    {
	        switch(direct_segment->storage[j].action)
		{
		  case NEXUS_DIRECT_INFO_ACTION_INLINE:
		    /* assert (copy_inline == NEXUS_FALSE) */
		    (*c_buffer)->u.writev.iovec[i].iov_base =
		        direct_segment->storage[j].data;
		    (*c_buffer)->u.writev.iovec[i].iov_len =
		        direct_segment->storage[j].size;
		    i++;
		    break;
		  case NEXUS_DIRECT_INFO_ACTION_POINTER:
		    /* assert (copy_pointer == NEXUS_FALSE) */
		    break;
		  case NEXUS_DIRECT_INFO_ACTION_CUSTOM:
		    /* assert (copy_custom == NEXUS_FALSE) */
		    (*c_buffer)->u.writev.iovec[i].iov_base =
		        direct_segment->storage[j].data;
		    (*c_buffer)->u.writev.iovec[i].iov_len =
		        direct_segment->storage[j].size;
		    i++;
		    break;
		  default: /* error */
		    break;
		}
	    }
	}
    }
    /*
     add direct info to segment
     if should use writev
         fill in writev segment
     else
         copy inlines to segment
     */

} /* nexus_send_rsr() */
