/*
 * Copyright 1998-2001, University of Notre Dame.
 * Authors: Jeffrey M. Squyres, Arun Rodrigues, and Brian Barrett with
 *          Kinis L. Meyer, M. D. McNally, and Andrew Lumsdaine
 * 
 * This file is part of the Notre Dame LAM implementation of MPI.
 * 
 * You should have received a copy of the License Agreement for the Notre
 * Dame LAM implementation of MPI along with the software; see the file
 * LICENSE.  If not, contact Office of Research, University of Notre
 * Dame, Notre Dame, IN 46556.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted subject to the conditions specified in the
 * LICENSE file.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * Additional copyrights may follow.
 * 
 *	Ohio Trollius
 *	Copyright 1997 The Ohio State University
 *	RBD
 *
 *	$Id: abort.c,v 6.10 2001/02/03 22:42:17 arodrig6 Exp $
 *
 *	Function:	- attempts to abort all processes in group
 *			- prints error message
 *	Accepts:	- communicator
 *			- error code
 *	Returns:	- MPI_SUCCESS or error code
 */

#include <lam_config.h>
#include <blktype.h>
#include <ksignal.h>
#include <mpi.h>
#include <mpisys.h>
#include <net.h>
#include <preq.h>
#include <rpisys.h>
#if LAM_WANT_IMPI
#include <impi.h>
#endif


/*@
   MPI_Abort - Terminates MPI execution environment

Input Parameters:
+ comm - communicator of tasks to abort 
- errcode - error code to return to invoking environment 

Notes:

Makes a "best attempt" to terminate all MPI processes associated with
the communicator 'comm'; for non-IMPI jobs, this will terminate all
processes.  'MPI_Abort' will hang, however, if the LAM daemon on the
node on which a rank was running has died.  For example, if a node
gets powered off in the middle of an MPI run and 'MPI_Abort' is
invoked, it may hang trying to contact the LAM daemon on the downed
node.

In the case of an IMPI job, in addition to terminating all local IMPI
procs, this function will also send an abort message to the local IMPI
host, which will, in turn send "closing" messages to all the other
IMPI hosts.  Their behavior upon receipt of these "closing" messages
is implementation dependant.

If the LAM IMPI host receives a premature "closing" message from a
remote IMPI host, it will attempt to continue processing.  Any new
messages to that host will likely hang, however.

'errcode' is split into three sections:

+ lower 8 bits: error class
. next 8 bits: function
- upper 16 bits: error code

If the error class is nonzero, the error code is the POSIX return
value for the program (i.e., exit(error_code) is eventually called to
terminate the program).  If the error class is zero, 'errcode' is
returned as the POSIX return value for the program.

This is mainly because 'MPI_Abort' is used internally in LAM/MPI to
abort MPI upon error; LAM packs all three pieces of data into the
POSIX return code.  Users who wish to return a specific value from the
program should use something similar to:

.vb
  return ((return_code << 16) + 1);
.ve

.N fortran

.N ACK
@*/
int MPI_Abort(MPI_Comm comm, int errcode)
{
	int		size;			/* group size */
	int		rank;			/* process rank */
	int		err;			/* error code */
	int		class;			/* error class */
	int		i;
	struct _proc	**p;

	lam_initerr();
	lam_setfunc(BLKMPIABORT);
/*
 * Switch error code to extracted errno value.  If the class is 0 then
 * the errno is taken to be the code.
 */
	lam_bkerr(errcode, &class, &i, &err);
	if (class) 
	  errcode = err;

	if (comm == MPI_COMM_NULL) 
	  comm = MPI_COMM_WORLD;

	err = MPI_Comm_size(comm, &size);
	if (err != MPI_SUCCESS) 
	  kexit(errcode);

	err = MPI_Comm_rank(comm, &rank);
	if (err != MPI_SUCCESS) 
	  kexit(errcode);

#if LAM_WANT_IMPI
	/* Tell the IMPI client daemon to abort.  IMPI_Abort simply
	   sends a message to the IMPI client daemon telling it to
	   quit; it does not perform any cleanup stuff. */

	if (LAM_IS_IMPI(MPI_COMM_WORLD))
	  IMPI_Abort();
#endif

/*
 * Doom all other processes in the group if we know their pids.
 */
	if ((_kio.ki_rtf & RTF_MPIRUN) || (_kio.ki_parent > 0)) {
		p = comm->c_group->g_procs;

		for (i = 0; i < size; ++i, ++p) {

			if (i != rank) {
				rpdoom((*p)->p_gps.gps_node, SELECT_PID,
						(*p)->p_gps.gps_pid, SIGUDIE);
			}
		}
	}
/*
 * Clean up any published names.
 */
	lam_nukepublished();

/*
 * Abort the local process if in the group.
 */
	if (rank >= 0) {
		kexit(errcode);
	}

	lam_resetfunc(BLKMPIABORT);
	return(MPI_SUCCESS);
}
