[Math-atlas-commits] CVS: AtlasBase/Clint atlas-lvl2.base, 1.28, 1.29
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-04-24 17:17:45
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv6326/Clint Modified Files: atlas-lvl2.base Log Message: Index: atlas-lvl2.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-lvl2.base,v retrieving revision 1.28 retrieving revision 1.29 diff -C2 -d -r1.28 -r1.29 *** atlas-lvl2.base 23 Apr 2009 22:04:00 -0000 1.28 --- atlas-lvl2.base 24 Apr 2009 17:17:28 -0000 1.29 *************** *** 10948,10951 **** --- 10948,10963 ---- # inst can be varied wt this macro, as can fetch distance. # + @ROUT ATL_gemvN_tM + #include "atlas_mvN_Mdegen.h" + + void Mjoin(PATL,gemvN_tM) + (ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ATL_CINT lda, + const TYPE *X, ATL_CINT incX, const SCALAR beta, TYPE *Y, ATL_CINT incY) + /* + * This routine is specialized for very small M dimension GEMV. + */ + { + ATL_assert(M <= 16); + } @ROUT ATL_gemv_new void Mjoin(PATL,gemv) *************** *** 10954,10970 **** const SCALAR beta, TYPE *Y, ATL_CINT incY) { ! /* ! * If we have an aligned kernel, we can use it if alls columns have the same ! * alignment (so we can force the required alignment through peeling. ! * gemv[N,T]_AL is the aligned driver routine, which will also align X & Y ! * to this same value. Otherwise, we call the general gemv drivers. ! */ ! #if ATL_MVKERNALIGN > ATL_sizeof ! i = ATL_MulBySize(lda); ! if ((i/ATL_MVKERNALIGN)*ATL_MVKERNALIGN == i) { } ! #endif } @ROUT ATL_gemvT @define TA @T@ --- 10966,10994 ---- const SCALAR beta, TYPE *Y, ATL_CINT incY) { ! if (M <= 8) ! { ! if (M > 0) { + if (TA == AtlasNoTrans || TA == AtlasConj) + Mjoin(PATL,gemvN_degenM)(M, N, alpha, A, lda, X, incX, beta, Y, incY); + else + Mjoin(PATL,gemvT_degenM)(M, N, alpha, A, lda, X, incX, beta, Y, incY); } ! return; ! } ! if (N <= 8) ! { ! if (N > 0) ! { ! if (TA == AtlasNoTrans || TA == AtlasConj) ! Mjoin(PATL,gemvN_degenN)(M, N, alpha, A, lda, X, incX, beta, Y, incY); ! else ! Mjoin(PATL,gemvT_degenN)(M, N, alpha, A, lda, X, incX, beta, Y, incY); ! } ! return; ! } } + typedef void (*mvkern_t)(ATL_CINT, ATL_CINT, const TYPE*, ATL_CINT, const TYPE*, + const SCALAR, TYPE*); @ROUT ATL_gemvT @define TA @T@ *************** *** 10991,11016 **** { int COPYX=0, COPYY=0; void (*getX)(const int N, const SCALAR alpha, const TYPE *X, const int incX, TYPE *Y, const int incY); void (*putY)(const int N, const SCALAR alpha, const TYPE *X, const int incX, const SCALAR beta, TYPE *Y, const int incY); ! void (*gemv0)(const int M, const int N, const SCALAR alpha, ! const TYPE *A, const int lda, const TYPE *X, const int incX, ! const SCALAR beta, TYPE *Y, const int incY); ! void (*gemv1)(const int M, const int N, const SCALAR alpha, ! const TYPE *A, const int lda, const TYPE *X, const int incX, ! const SCALAR beta, TYPE *Y, const int incY); ! ! ATL_GetPartMV@(TA)(A, lda, &mb, &nb); ! @ROUT ATL_gemvN /* ! * Compute if we need to copy the vectors. We must always copy Y to ! * contiguous storage for our kernels for NoTranspose. We will apply alpha ! * during the Y update, and if we don't copy, we must see if it is cheaper ! * to apply alpha by copying X or Y */ ! COPYY = (incY != 1 || (AlphaIsX && Nx <= Ny)); ! COPYX = (!COPYY && AlphaIsX); ! @ROUT ATL_gemvT /* * Compute if we need to copy the vectors. We must always copy X to --- 11015,11096 ---- { int COPYX=0, COPYY=0; + int mu, nu; /* M and N unrolling of kernel */ + int Fmu, Fnu; /* if nonzero, kernel only handles multiples of this size */ void (*getX)(const int N, const SCALAR alpha, const TYPE *X, const int incX, TYPE *Y, const int incY); void (*putY)(const int N, const SCALAR alpha, const TYPE *X, const int incX, const SCALAR beta, TYPE *Y, const int incY); ! mvkern_t gemv0, gemv1, gemvX; /* main gemv kernels */ ! mvkern_t mvdN0, mvdN1, mvdNX; /* small-N handling kernels */ ! int allalign=0; /* does kernel require X wt all alignments? */ ! @ROUT ATL_gemvT /* ! * Call special code if M is degenerate ! */ ! if (M <= 8) ! { ! if (M > 0) ! Mjoin(PATL,gemvT_degenM)(M, N, alpha, A, lda, X, incX, ! beta, Y, incY); ! return; ! } ! /* ! * ======================================= ! * Select kernel to be used by this driver ! * ======================================= */ ! /* ! * If we have a kernel that insists that A be alignable, use it. Note that ! * transpose kernels must take any value of M, but are allowed to insist they ! * handle only certain multiples of N, which is stored in Fnu ! */ ! gemv0 = gemv1 = gemvX = NULL; ! mu = nu = ku = 1; ! Fnu = 0; ! alignX = alignY = allalignX = 0; ! #if ATL_MVALIGNA > ATL_sizeof ! mvdN0 = mvdN1 = mvdNX = Mjoin(PATL,gemvT_degenN); ! i = ATL_MulBySize(lda); ! if (((i/ATL_MVALIGNA)*ATL_MVALIGNA) == i) ! { ! gemv0 = ATL_mvk_alignA_b0; ! gemv1 = ATL_mvk_alignA_b1; ! gemvX = ATL_mvk_alignA_bX; ! #ifdef ATL_MVA_ALLALIGNX ! allalignX = 1; ! #elif defined(ATL_MVA_ALIGNX) ! alignX = ATL_MVA_ALIGNX; ! #endif ! #if defined(ATL_MVA_ALIGNY) ! alignY = ATL_MVA_ALIGNY; ! #endif ! #if defined(ATL_MVA_FNU) ! Fnu = ATL_MVA_FNU; ! #ifndef ATL_MV_FNU ! mvdN0 = ATL_mvk_b0; ! mvdN1 = ATL_mvk_b1; ! mvdNX = ATL_mvk_bX; ! #endif ! #endif ! ATL_GetPartMVA@(TA)(A, lda, &mb, &nb); ! } ! if (!gemv0) ! { ! gemv0 = ATL_mvk_b0; ! gemv1 = ATL_mvk_b1; ! gemvX = ATL_mvk_bX; ! #ifdef ATL_MV_ALLALIGNX ! allalignX = 1; ! #elif defined(ATL_MV_ALIGNX) ! alignX = ATL_MV_ALIGNX; ! #endif ! #if defined(ATL_MV_ALIGNY) ! alignY = ATL_MV_ALIGNY; ! #endif ! #if defined(ATL_MV_FNU) ! Fnu = ATL_MV_FNU; ! #endif ! ATL_GetPartMV@(TA)(A, lda, &mb, &nb); ! } /* * Compute if we need to copy the vectors. We must always copy X to *************** *** 11019,11025 **** * to apply alpha by copying X or Y */ ! COPYX = (incX != 1 || (AlphaIsX && Nx <= Ny)); COPYY = (!COPYX && AlphaIsX); ! @ROUT ATL_gemvT ATL_gemvN if (COPYY || BetaIsZero) gemv0 = ATL_gemv@(TA)_b0; --- 11099,11126 ---- * to apply alpha by copying X or Y */ ! COPYX = (incX != 1 | (AlphaIsX & Nx <= Ny) | allalignX); ! if (!COPYX && alignX) ! { ! sz = (size_t) X; ! COPYX = ((sz/alignX)*alignX != sz); ! } COPYY = (!COPYX && AlphaIsX); ! if (!COPYY && alignY) ! { ! sz = (size_t) Y; ! COPYY = (((sz/alignY)*alignY != sz) | (incY != 1)); ! } ! /* ! * If N is not at least as large as the forced unroll of the selected kernel, ! * call the degenerate-N driver to handle the gemv ! */ ! if (N < Fnu) ! { ! Mjoin(PATL,gemvT_degenN)(M, N, alpha, A, lda, X, incX, beta, Y, incY); ! return; ! } ! /* ! * See where I'm going to apply BETA ! */ if (COPYY || BetaIsZero) gemv0 = ATL_gemv@(TA)_b0; |