#include "tools.h"
void Cpsrow2col2(trans, N, nvec, X, IX, JX, descX, Y, IY, JY, descY)
char *trans;
int N;
int nvec;
float *X;
int IX;
int JX;
int *descX;
float *Y;
int IY;
int JY;
int  *descY;
{
   void Cblacs_gridinfo();
   void Csgesd2d();
   void Csgerv2d();
   void Cscpycvec2cvec();
   void Cscpyrvec2rvec();
   void Cscpycvec2rvec();
   void Cscpyrvec2cvec();
   void Cscpy();
   void CscpyTrans();

   int ctxt, nprow, npcol, myrow, mycol, xrow, xcol, yrow, ycol, csrc, rdest;
   int nblocks, lcm, rblkskip, cblkskip, mydist;
   int i, j, k, istart, iskip, nb, kb, LOCp, LOCq;
   float *x, *y, *work, *sptr;
   CPYPTR pack, unpack;

/*
 * Get some commonly used info
 */
   ctxt = descX[CTXT_];
   nb   = descX[NB_];    /* remember, descX[NB_] == descY[MB_] */
   Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);
   Cinfog2l(IX, JX, descX, nprow, npcol, myrow, mycol, &i, &j, &xrow, &xcol);
   x = &X[ i+j*descX[LLD_] ];
   Cinfog2l(IY, JY, descY, nprow, npcol, myrow, mycol, &i, &j, &yrow, &ycol);
   y = &Y[ i+j*descY[LLD_] ];
   lcm = Clcm(nprow, npcol);
   rblkskip = lcm / npcol;
   cblkskip = lcm / nprow;
/*
 * Get workspace
 */
   LOCp = Cnumroc2(N, IY, nb, myrow, descY[RSRC_], nprow);
   LOCq = Cnumroc2(N, JX, nb, mycol, descX[CSRC_], npcol);
   i = k = 0;
   if (myrow == xrow) i = ( ((LOCq + nb-1)/nb + rblkskip-1) / rblkskip );
   if (mycol == ycol) k = ( ((LOCp + nb-1)/nb + cblkskip-1) / cblkskip );
   if (i > k) k = nb * nvec * i;
   else k *= nb * nvec;
   work = (float *) malloc(k * sizeof(*x));
/*
 * See if we can use specially designed vector pack/unpack, or if we must
 * use general (un)packing routine.  Also, allow the user to perform either
 * tranpose or hermition transpose.
 */
   if (nvec == 1)
   {
      pack = Cscpyrvec2cvec;
      unpack = Cscpyrvec2cvec;
   }
   else
   {
      pack = Cscpy;
      if (*trans == 't') unpack = CscpyTrans;
      else unpack = Cscpy;
   }

/*
 * If the first block is partial, handle it seperately
 */
   kb = nb - (JX % nb);
   if (kb > N) kb = N;
   if (kb != nb)
   {
      if ( (myrow==yrow) && (mycol==ycol) )
      {
         if ( (myrow != xrow) || (mycol != xcol) )
         {
            Csgerv2d(ctxt, nvec, kb, work, nvec, xrow, xcol);
            unpack(nvec, kb, work, nvec, y, descY[LLD_]);
         }
         else
         {
            unpack(nvec, kb, x, descX[LLD_], y, descY[LLD_]);
            x += kb*descX[LLD_];
            LOCq -= kb;
         }
         LOCp -= kb;
         y += kb;
      }
      else if ( (myrow==xrow) && (mycol==xcol) )
      {
         Csgesd2d(ctxt, nvec, kb, x, descX[LLD_], yrow, ycol);
         x += kb * descX[LLD_];
         LOCq -= kb;
      }
      JX += kb;
      IY += kb;
      xcol = (xcol + 1) % npcol;
      yrow = (yrow + 1) % nprow;
      N -= kb;
   }
/*
 * If I'm in the process row owning X (the source vector)
 */
   if (myrow == xrow)
   {
/*
 *    Figure my distance from process owning first element of sub( X ).
 *    The process in ycol the same distance from the process owning the
 *    first element of sub( Y ) will want my first block.
 */
      istart = 0;
      mydist = (npcol + mycol - xcol) % npcol;
      rdest = (yrow + mydist) % nprow;
      iskip = nb * rblkskip;
/*
 *    Loop over all possible destination processes
 */
      for(k=0; k < rblkskip; k++)
      {
/*
 *       If I'm not the destination process
 */
         if ( (myrow != rdest) || (mycol != ycol) )
         {
/*
 *          Pack all relavent blocks into work
 */
            for (j=0, i=istart; i < LOCq; i += iskip, j += nb*nvec)
            {
               kb = LOCq - i;
               if (kb > nb) kb = nb;
               pack(nvec, kb, &x[ i*descX[LLD_] ], descX[LLD_], &work[j], nvec);
            }
/*
 *          Send appropriate blocks of X, if any.  Note that we send nb-kb extra
 *          rows if nb != kb, but it makes computation easier for receiving node.
 */
            if (j > 0) Csgesd2d(ctxt, j, 1, work, j, rdest, ycol);
         }
/*
 *       If I'm both source and destination, save where to start copying from
 */
         else sptr = &x[ istart*descX[LLD_] ];
/*
 *       Increment where we start packing from, and go on to next destination
 *       process
 */
         istart += nb;
         rdest = (rdest + npcol) % nprow;
      }
   }
/*
 * If I'm part of the process row owning sub( Y )
 */
   if (mycol == ycol)
   {
/*
 *    Figure my distance from process owning first element of sub( Y ).
 *    The process in xrow the same distance from the process owning the
 *    first element of sub( X ) will have my first block.
 */
      istart = 0;
      mydist = (nprow + myrow - yrow) % nprow;
      csrc = (xcol + mydist) % npcol;
      iskip = nb * cblkskip;
      for(k=0; k < cblkskip; k++)
      {
/*
 *       If I'm not the source of this portion of X, I'll need to recieve
 */
         if ( (myrow != xrow) || (mycol != csrc) )
         {
/*
 *          Figure how much data to receive, and if there is any, receive it
 */
            nblocks = (LOCp - istart + nb-1) / nb;
            j = nvec * nb * ( (nblocks+cblkskip-1) / cblkskip );
            if (j > 0) Csgerv2d(ctxt, j, 1, work, j, xrow, csrc);
/*
 *          Copy X's data to Y
 */
            for (i=istart, j=0; i < LOCp; i += iskip, j += nb*nvec)
            {
               kb = LOCp - i;
               if (kb > nb) kb = nb;
               unpack(nvec, kb, &work[j], nvec, &y[i], descY[LLD_]);
            }
         }
/*
 *       If I'm both source and destination, just copy
 */
         else
         {
            for (i=istart; i < LOCp; i += iskip)
            {
               kb = LOCp - i;
               if (kb > nb) kb = nb;
               unpack(nvec, kb, sptr, descX[LLD_], &y[i], descY[LLD_]);
               sptr += nb * descX[LLD_] * rblkskip;
            }
         }
/*
 *       Increment where we start packing from, and go on to next destination
 *       process
 */
         istart += nb;
         csrc = (csrc + nprow) % npcol;
      }
   }
   if (work) free(work);
}
