/*
 *	Ohio Trollius
 *	Copyright 1996 The Ohio State University
 *	NJN/RBD
 *
 *	$Id: bcast.c,v 6.1 96/11/23 22:50:55 nevin Rel $
 *
 *	Function:	- broadcast info to all nodes SIMD-style
 *	Accepts:	- buffer
 *			- count
 *			- datatype
 *			- root
 *			- communicator
 *	Returns:	- MPI_SUCCESS or an MPI error code
 */

#include <app_mgmt.h>
#include <blktype.h>
#include <mpi.h>
#include <mpisys.h>
#include <mpitrace.h>
#include <rpisys.h>

/*
 * local functions
 */
static int		bcast_lin();
static int		bcast_log();
static int		bcast_lin_lamd();
static int		bcast_log_lamd();

int
MPI_Bcast(buff, count, datatype, root, comm)

void			*buff;
int			count;
MPI_Datatype		datatype;
int			root;
MPI_Comm		comm;

{
    int			size;			/* group size */
    struct _gps		*p;			/* favourite pointer */

    lam_initerr();
    lam_setfunc(BLKMPIBCAST);
/*
 * Check for invalid arguments.
 */
    if ((comm == MPI_COMM_NULL) || LAM_IS_INTER(comm)) {
	return(lam_errfunc(comm, BLKMPIBCAST, lam_mkerr(MPI_ERR_COMM, 0)));
    }

    if (datatype == MPI_DATATYPE_NULL) {
	return(lam_errfunc(comm, BLKMPIBCAST, lam_mkerr(MPI_ERR_TYPE, 0)));
    }

    if (count < 0) {
	return(lam_errfunc(comm, BLKMPIBCAST, lam_mkerr(MPI_ERR_COUNT, 0)));
    }

    MPI_Comm_size(comm, &size);

    if ((root >= size) || (root < 0)) {
	return(lam_errfunc(comm, BLKMPIBCAST, lam_mkerr(MPI_ERR_ROOT, 0)));
    }

    LAM_TRACE(lam_tr_cffstart(BLKMPIBCAST));
/*
 * Set debugging parameters.
 */
    p = &(comm->c_group->g_procs[root]->p_gps);

    lam_setparam(BLKMPIBCAST,
	    root | (p->gps_grank << 16), (p->gps_node << 16) | p->gps_idx);
/*
 * Check for zero count case.
 */
    if (count == 0) {
	LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
	lam_resetfunc(BLKMPIBCAST);
	return(MPI_SUCCESS);
    }
/*
 * Decide which algorithm to use.
 */
    if (size <= 1) {
	lam_resetfunc(BLKMPIBCAST);
	return(MPI_SUCCESS);
    }
    else if (size <= LAM_COLLMAXLIN) {
	return(RPI_SPLIT(bcast_lin_lamd, bcast_lin,
	    		(buff, count, datatype, root, comm)));
    }
    else {
	return(RPI_SPLIT(bcast_log_lamd, bcast_log,
	    		(buff, count, datatype, root, comm)));
    }
}

/*
 *	bcast_lin
 *
 *	Function:	- broadcast using O(N) algorithm
 *	Accepts:	- same arguments as MPI_Bcast()
 *	Returns:	- MPI_SUCCESS or error code
 */
static int
bcast_lin(buff, count, datatype, root, comm)

void			*buff;
int			count;
MPI_Datatype		datatype;
int			root;
MPI_Comm		comm;

{
    int			i;			/* favourite index */
    int			size;			/* group size */
    int			rank;			/* caller rank */
    int			err;			/* error code */
    MPI_Request		*preq;			/* ptr request */
    MPI_Request		reqs[LAM_COLLMAXLIN];	/* requests */
    MPI_Status		stats[LAM_COLLMAXLIN];	/* status structures */

    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    lam_mkcoll(comm);
/*
 * Non-root receive the data.
 */
    if (rank != root) {
	err = MPI_Recv(buff, count, datatype, root, BLKMPIBCAST, comm, stats);
	lam_mkpt(comm);

	if (err != MPI_SUCCESS) {
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}

	LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
	lam_resetfunc(BLKMPIBCAST);
	return(MPI_SUCCESS);
    }
/*
 * Root sends data to all others.
 */
    for (i = 0, preq = reqs; i < size; ++i) {

	if (i == rank) continue;

	err = MPI_Send_init(buff, count, datatype, i, BLKMPIBCAST,
	    			comm, preq++);

	if (err != MPI_SUCCESS) {
	    lam_mkpt(comm);
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}
    }
/*
 * Start and wait on all requests.
 */
    err = MPI_Startall(size - 1, reqs);
    if (err != MPI_SUCCESS) {
	lam_mkpt(comm);
	return(lam_errfunc(comm, BLKMPIBCAST, err));
    }

    err = MPI_Waitall(size - 1, reqs, stats);
    lam_mkpt(comm);

    if (err != MPI_SUCCESS) {
	return(lam_errfunc(comm, BLKMPIBCAST, err));
    }
/*
 * Free the requests.
 */
    for (i = 0, preq = reqs; i < size; ++i) {

	if (i == rank) continue;

	err = MPI_Request_free(preq);
	if (err != MPI_SUCCESS) {
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}

	++preq;
    }

    LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
    lam_resetfunc(BLKMPIBCAST);
    return(MPI_SUCCESS);
}

/*
 *	bcast_log
 *
 *	Function:	- broadcast using O(log(N)) algorithm
 *	Accepts:	- same arguments as MPI_Bcast()
 *	Returns:	- MPI_SUCCESS or error code
 */
static int
bcast_log(buff, count, datatype, root, comm)

void			*buff;
int			count;
MPI_Datatype		datatype;
int			root;
MPI_Comm		comm;

{
    int			i;			/* favourite index */
    int			size;			/* group size */
    int			rank;			/* caller rank */
    int			vrank;			/* caller virtual rank */
    int			peer;			/* peer rank */
    int			dim;			/* cube dimension */
    int			hibit;			/* high ON bit position */
    int			mask;			/* rank bit mask */
    int			err;			/* error code */
    int			nreqs;			/* # requests */
    MPI_Request		*preq;			/* ptr request */
    MPI_Request		reqs[LAM_COLLMAXDIM];	/* requests */
    MPI_Status		stats[LAM_COLLMAXDIM];	/* status structures */

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    lam_mkcoll(comm);

    vrank = (rank + size - root) % size;

    dim = comm->c_cube_dim;
    hibit = lam_hibit(vrank, dim);
    --dim;
/*
 * Receive data from parent in the tree.
 */
    if (vrank > 0) {
	peer = ((vrank & ~(1 << hibit)) + root) % size;

	err = MPI_Recv(buff, count, datatype, peer, BLKMPIBCAST, comm, stats);
	if (err != MPI_SUCCESS) {
	    lam_mkpt(comm);
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}
    }
/*
 * Send data to the children.
 */
    preq = reqs;
    nreqs = 0;

    for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {

	peer = vrank | mask;
	if (peer < size) {
	    peer = (peer + root) % size;
	    ++nreqs;

	    err = MPI_Send_init(buff, count, datatype, peer, BLKMPIBCAST,
				comm, preq++);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}
    }
/*
 * Start and wait on all requests.
 */
    if (nreqs > 0) {
	err = MPI_Startall(nreqs, reqs);
	if (err != MPI_SUCCESS) {
	    lam_mkpt(comm);
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}

	err = MPI_Waitall(nreqs, reqs, stats);
	if (err != MPI_SUCCESS) {
	    lam_mkpt(comm);
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}

	for (i = 0, preq = reqs; i < nreqs; ++i, ++preq) {

	    err = MPI_Request_free(preq);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}
    }

    lam_mkpt(comm);
    LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
    lam_resetfunc(BLKMPIBCAST);
    return(MPI_SUCCESS);
}

/*
 *	bcast_lin_lamd
 *
 *	Function:	- LAMD broadcast using O(N) algorithm
 *	Accepts:	- same arguments as MPI_Bcast()
 *	Returns:	- MPI_SUCCESS or error code
 */
static int
bcast_lin_lamd(buff, count, datatype, root, comm)

void			*buff;
int			count;
MPI_Datatype		datatype;
int			root;
MPI_Comm		comm;

{
    int			i;			/* favourite index */
    int			size;			/* group size */
    int			rank;			/* caller rank */
    int			longproto;		/* long protocol */
    int			err;			/* error code */
    int			nreqs;			/* # requests */
    int			nacks;			/* # acknowledgements */
    int			done;			/* # done requests */
    MPI_Status		stat;			/* status */
    int			idx[LAM_COLLMAXDIM];	/* indices */
    MPI_Request		reqs[LAM_COLLMAXDIM];	/* requests */
    MPI_Status		stats[LAM_COLLMAXDIM];	/* status structures */

    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    longproto = (datatype->dt_size * count) > MAXNMSGLEN;
    lam_mkcoll(comm);
/*
 * Non-root receives the data.
 */
    if (rank != root) {
	if (longproto) {
	    err = MPI_Recv(buff, 0, MPI_BYTE, root, BLKMPIBCAST, comm, &stat);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }

	    err = MPI_Send(buff, 0, MPI_BYTE, root, BLKMPIBCAST, comm);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}

	err = MPI_Recv(buff, count, datatype, root, BLKMPIBCAST, comm, &stat);

	lam_mkpt(comm);
	if (err != MPI_SUCCESS) {
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}

	LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
	lam_resetfunc(BLKMPIBCAST);
	return(MPI_SUCCESS);
    }
/*
 * Root sends data to all others.
 */
    if (longproto) {
	nreqs = 0;
	for (i = 0; i < size; ++i) {
	    if (i == rank) continue;

	    err = MPI_Send(buff, 0, MPI_BYTE, i, BLKMPIBCAST, comm);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }

	    err = MPI_Irecv(buff, 0, MPI_BYTE,
				i, BLKMPIBCAST, comm, &reqs[nreqs++]);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}
/*
 * Wait for primer ack messages and send data.
 */
	nacks = 0;
	while (nacks < nreqs) {
	    err = MPI_Waitsome(nreqs, reqs, &done, idx, stats);

	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }

	    if (done == MPI_UNDEFINED) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST,
				lam_mkerr(MPI_ERR_UNKNOWN, 0)));
	    }

	    nacks += done;

	    for (i = 0; i < done; i++) {

		err = MPI_Send(buff, count, datatype,
				stats[i].MPI_SOURCE, BLKMPIBCAST, comm);
		
		if (err != MPI_SUCCESS) {
		    lam_mkpt(comm);
		    return(lam_errfunc(comm, BLKMPIBCAST, err));
		}
	    }
	}
    }
/*
 * short protocol
 */
    else {
	for (i = 0; i < size; i++) {
	    if (i == root) continue;

	    err = MPI_Send(buff, count, datatype, i, BLKMPIBCAST, comm);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}
    }
    
    lam_mkpt(comm);
    LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
    lam_resetfunc(BLKMPIBCAST);
    return(MPI_SUCCESS);
}

/*
 *	bcast_log_lamd
 *
 *	Function:	- LAMD broadcast using O(log(N)) algorithm
 *	Accepts:	- same arguments as MPI_Bcast()
 *	Returns:	- MPI_SUCCESS or error code
 */
static int
bcast_log_lamd(buff, count, datatype, root, comm)

void			*buff;
int			count;
MPI_Datatype		datatype;
int			root;
MPI_Comm		comm;

{
    int			i;			/* favourite index */
    int			size;			/* group size */
    int			rank;			/* caller rank */
    int			vrank;			/* caller virtual rank */
    int			peer;			/* peer rank */
    int			dim;			/* cube dimension */
    int			hibit;			/* high ON bit position */
    int			mask;			/* rank bit mask */
    int			longproto;		/* long protocol */
    int			err;			/* error code */
    int			nreqs;			/* # requests */
    int			nacks;			/* # acknowledgements */
    int			done;			/* # done requests */
    MPI_Status		stat;			/* request status */
    int			idx[LAM_COLLMAXDIM];	/* indices */
    MPI_Request		reqs[LAM_COLLMAXDIM];	/* requests */
    MPI_Status		stats[LAM_COLLMAXDIM];	/* status structures */

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    lam_mkcoll(comm);
    longproto = (datatype->dt_size * count) > MAXNMSGLEN;

    vrank = (rank + size - root) % size;

    dim = comm->c_cube_dim;
    hibit = lam_hibit(vrank, dim);
    --dim;
/*
 * Receive data from parent in the tree.
 */
    if (vrank > 0) {
	peer = ((vrank & ~(1 << hibit)) + root) % size;

	if (longproto) {
	    err = MPI_Recv(buff, 0, MPI_BYTE, peer, BLKMPIBCAST, comm, &stat);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }

	    err = MPI_Send(buff, 0, MPI_BYTE, peer, BLKMPIBCAST, comm);
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}

	err = MPI_Recv(buff, count, datatype, peer, BLKMPIBCAST, comm, &stat);
	if (err != MPI_SUCCESS) {
	    lam_mkpt(comm);
	    return(lam_errfunc(comm, BLKMPIBCAST, err));
	}
    }
/*
 * Send data or primer message to the children.
 */
    nreqs = 0;

    for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {

	peer = vrank | mask;
	if (peer < size) {
	    peer = (peer + root) % size;

	    if (longproto) {
		err = MPI_Send(buff, 0, MPI_BYTE, peer, BLKMPIBCAST, comm);
		if (err != MPI_SUCCESS) {
		    lam_mkpt(comm);
		    return(lam_errfunc(comm, BLKMPIBCAST, err));
		}

		err = MPI_Irecv(buff, 0, MPI_BYTE,
		    		peer, BLKMPIBCAST, comm, &reqs[nreqs++]);
		
	    } else {
		err = MPI_Send(buff, count, datatype, peer, BLKMPIBCAST, comm);
	    }
	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }
	}
    }

    if (longproto) {
/*
 * Wait for primer ack messages and send data.
 */
	nacks = 0;

	while (nacks < nreqs) {

	    err = MPI_Waitsome(nreqs, reqs, &done, idx, stats);

	    if (err != MPI_SUCCESS) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST, err));
	    }

	    if (done == MPI_UNDEFINED) {
		lam_mkpt(comm);
		return(lam_errfunc(comm, BLKMPIBCAST,
				lam_mkerr(MPI_ERR_UNKNOWN, 0)));
	    }

	    nacks += done;

	    for (i = 0; i < done; i++) {

		err = MPI_Send(buff, count, datatype,
				stats[i].MPI_SOURCE, BLKMPIBCAST, comm);
		
		if (err != MPI_SUCCESS) {
		    lam_mkpt(comm);
		    return(lam_errfunc(comm, BLKMPIBCAST, err));
		}
	    }
	}
    }
    
    lam_mkpt(comm);
    LAM_TRACE(lam_tr_cffend(BLKMPIBCAST, root, comm, datatype, count));
    lam_resetfunc(BLKMPIBCAST);
    return(MPI_SUCCESS);
}
