/*
 *             Automatically Tuned Linear Algebra Software v3.2
 *                      (C) Copyright 1999 Camm Maguire                      
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions, and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *   3. The name of the University of Tennessee, the ATLAS group,
 *      or the names of its contributers may not be used to endorse
 *      or promote products derived from this software without specific
 *      written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE. 
 *
 */
#ifndef STRIDE
   #define STRIDE 1
#endif

#ifndef NDPM 
   #define NDPM 3
#endif

#include <stdio.h>
#include <stdlib.h>

#define Mjoin(a,b) mjoin(a,b)
#define mjoin(a,b) a ## b

#ifdef BETA0
#define BL 0
#endif
#ifdef BETA1
#define BL 1
#endif
#ifdef BETAX
#define BL X
#endif

#define EXT6 Mjoin(6ma,BL)
#define EXT5 Mjoin(5ma,BL)
#define EXT4 Mjoin(4ma,BL)
#define EXT3 Mjoin(3ma,BL)
#define EXT2 Mjoin(2ma,BL)
#define EXT1 Mjoin(1ma,BL)


#define NDP 6
#define EXT EXT6
#include "camm_maa.h"
#undef NDP
#define NDP 5
#undef EXT
#define EXT EXT5
#include "camm_maa.h"
#undef NDP
#define NDP 4
#undef EXT
#define EXT EXT4
#include "camm_maa.h"
#undef NDP
#define NDP 3
#undef EXT
#define EXT EXT3
#include "camm_maa.h"
#undef NDP
#define NDP 2
#undef EXT
#define EXT EXT2
#include "camm_maa.h"
#undef NDP
#define NDP 1
#undef EXT
#define EXT EXT1
#include "camm_maa.h"
#undef NDP

#define NDP NDPM
#undef EXT
#define EXT Mjoin(Mjoin(Mjoin(NDP,ma),BL),m)
#include "camm_maa.h"


#ifdef BETA0
void 
ATL_sgemvN_a1_x1_b0_y1(int m,int n,float alpha,const float *a,
		       int lda,const float *b,int binc,
		       float beta,float *c,int cinc) {
#endif
#ifdef BETA1
void 
ATL_sgemvN_a1_x1_b1_y1(int m,int n,float alpha,const float *a,
		       int lda,const float *b,int binc,
		       float beta,float *c,int cinc) {
#endif
#ifdef BETAX
void 
ATL_sgemvN_a1_x1_bX_y1(int m,int n,float alpha,const float *a,
		       int lda,const float *b,int binc,
		       float beta,float *c,int cinc) {
#endif


  int i,mm,nn;
  const float *ae;

#ifdef BETA0
  memset(c,0,m*sizeof(*c));
#endif
#ifdef BETAX
  for (i=0;i<m;i++,c++)
    *c*=beta;
  c-=m;
#endif

  ae=a+n*lda;
  nn=STRIDE*lda;

#if NDPM == 1
  for (;a<ae;a+=lda,b++)
    Mjoin(ma,EXT)(b,STRIDE,a,nn,c,m);
#else

  while (a+NDPM*nn<=ae) {
    for (i=0;i<STRIDE;i++,a+=lda,b++) 
      Mjoin(ma,EXT)(b,STRIDE,a,nn,c,m);

    a+=(NDPM-1)*nn;
    b+=(NDPM-1)*STRIDE;
  }

  for (i=0;a<ae && i<STRIDE;i++,a+=lda,b++) {

    mm=(ae-a)/nn;
    if (((ae-a)/lda)%STRIDE)
      mm++;
    
    if (mm == 1)
      Mjoin(ma,EXT1)(b,STRIDE,a,nn,c,m);

    else if (mm == 2)
      Mjoin(ma,EXT2)(b,STRIDE,a,nn,c,m);

    else if (mm == 3)
      Mjoin(ma,EXT3)(b,STRIDE,a,nn,c,m);

    else if (mm == 4)
      Mjoin(ma,EXT4)(b,STRIDE,a,nn,c,m);

    else if (mm == 5)
      Mjoin(ma,EXT5)(b,STRIDE,a,nn,c,m);

    else if (mm == 6)
      Mjoin(ma,EXT6)(b,STRIDE,a,nn,c,m);

  }

#endif

}

