[Math-atlas-commits] CVS: AtlasBase/Clint atlas-lvl2.base, 1.231, 1.232
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2010-12-03 17:24:58
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory sfp-cvsdas-4.v30.ch3.sourceforge.com:/tmp/cvs-serv20510/Clint Modified Files: atlas-lvl2.base Log Message: Index: atlas-lvl2.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-lvl2.base,v retrieving revision 1.231 retrieving revision 1.232 diff -C2 -d -r1.231 -r1.232 *** atlas-lvl2.base 2 Dec 2010 18:13:09 -0000 1.231 --- atlas-lvl2.base 3 Dec 2010 17:24:48 -0000 1.232 *************** *** 12052,12062 **** --- 12052,12186 ---- @ROUT mvthgen @define rt @mvt@ + @define vn @Y@ @ROUT mvnhgen @define rt @mvn@ + @define vn @X@ @ROUT r1hgen @define rt @r1@ + @define vn @Y@ @ROUT r2hgen @define rt @r2@ + @define vn @Y@ @ROUT r1hgen r2hgen mvthgen mvnhgen + void GenKernPeelN + ( + ATL_@(ru)node_t *kp, /* kernel that has passed all restrictions */ + ATL_@(ru)node_t *gp, /* general kernel w/o any restrictions */ + char pre, /* precision modifier */ + int beta, /* 0: beta is 0, else beta is 1 */ + FILE *fpout, /* output file */ + char *spc, /* string of spaces, can move ptr for more/less */ + int size, /* elt size */ + ) + /* + * Kernel kp has passed all kp tests, and needs to be invoked with proper + * N partitioning. The N partitions are: + * Np : peel amount to achieve vector alignment + * NN : amount to call main kernel with + * Nr : any remainder forced by FNU + */ + { + const int minN = FLAG_IS_SET(kp->flag, F@up@(ru)_FNU) ? + Mmax(kp->minN, kp->NU) : kp->minN; + int i; + + if (kp->alignY > size) /* we have Np partition */ + { + i = GetPower2(kp->alignY); + if (i) + fprintf(fpout, + "%s ATL_CINT Np = (TYPE*)((((size_t)@(vn)) + %d)>>%d) - @(vn);\n", + spc, kp->alignY-1, i); + else + fprintf(fpout, + "%s ATL_CINT Np = (TYPE*)((((size_t)@(vn)) + %d)/%d) - @(vn);\n", + spc, kp->alignY-1, kp->alignY); + if (FLAG_IS_SET(kp->flag, F@up@(ru)_FNU)) /* all 3 partitions */ + { + fprintf(fpout, "%s ATL_CINT N1 = N - N0;\n", spc); + i = GetPower2(kp->NU); + if (i) + fprintf(fpout, "%s ATL_CINT NN = (N1>>%d)<<%d;\n", spc, i, i); + else + fprintf(fpout, "%s ATL_CINT NN = (N1/%d)*%d;\n", + spc, kp->NU, kp->NU); + if (minN > 1) + { + fprintf(fpout, "%s if (NN > %d)\n", spc, minN); + spc -= 3; + } + fprintf(fpout, "%s ATL_CINT Nr = N - N0 - NN\n", spc); + + fprintf(fpout, "%s if (N0)\n", spc); + fprintf(fpout, "%s %s(M, N0, A, lda, X, Y);\n", spc, gp->kname); + fprintf(fpout, + "%s %s(M, NN, A+N0*(lda SHIFT), lda, X, Y+(N0 SHIFT));\n", + spc, kp->kname); + fprintf(fpout, "%s if (Nr)\n", spc); + fprintf(fpout, + "%s %s(M, NN, A+(N0+NN)*(lda SHIFT), lda, X, Y+((N0+NN) SHIFT));\n", + spc, kp->kname); + fprintf(fpout, "%s return;\n"); + if (minN > 1) + { + spc += 3; + fprintf(fpout, "%s } /* end NN > minN check */\n", spc); + } + } + else /* N0 and NN are only partitions */ + { + fprintf(fpout, "%s ATL_CINT NN = N - N0;\n", spc); + if (minN > 1) + { + fprintf(fpout, "%s if (NN > %d)\n", spc, minN); + spc -= 3; + } + fprintf(fpout, "%s if (N0)\n", spc); + fprintf(fpout, "%s %s(M, N0, A, lda, X, Y);\n", spc, gp->kname); + fprintf(fpout, + "%s %s(M, NN, A+N0*(lda SHIFT), lda, X, Y+(N0 SHIFT));\n", + spc, kp->kname); + fprintf(fpout, "%s return;\n"); + if (minN > 1) + { + spc += 3; + fprintf(fpout, "%s } /* end NN > minN check */\n", spc); + } + } + } + else if (FLAG_IS_SET(kp->flag, F@up@(ru)_FNU)) /* NN & Nr only part */ + { + i = GetPower2(kp->NU); + if (i) + fprintf(fpout, "%s ATL_CINT NN = (N>>%d)<<%d, Nr = N-NN;\n", + spc, i, i); + else + fprintf(fpout, "%s ATL_CINT NN = (N/%d)*%d, Nr = N-NN;\n", + spc, kp->NU, kp->NU); + if (minN > 1) + { + fprintf(fpout, "%s if (NN > %d)\n", spc, minN); + spc -= 3; + } + fprintf(fpout, "%s %s(M, NN, A, lda, X, Y);\n", spc, kp->kname); + fprintf(fpout, "%s if (Nr)\n", spc); + fprintf(fpout, + "%s %s(M, Nr, A+(lda SHIFT)*NN, lda, X, Y+(NN SHIFT));\n", + spc, gp->kname); + fprintf(fpout, "%s return;\n"); + + if (minN > 1) + { + spc += 3; + fprintf(fpout, "%s } /* end NN > minN check */\n", spc); + } + } + else /* kernel can do all of N at once */ + { + fprintf(fpout, "%s %s(M, N, A, lda, X, Y);\n", spc, kp->kname); + fprintf(fpout, "%s return;\n"); + } + } + void GenKernFunc ( *************** *** 12084,12087 **** --- 12208,12212 ---- */ { + ATL_@(ru)node_t *kp, *gp; char *bet; char spcs[64], *spc; *************** *** 12090,12093 **** --- 12215,12219 ---- int i, DOMPEEL=0, align, szsh; + for (gp=kb; gp->next; gp = gp->next); /* find general cleanup kernel */ szsh = GetPower2(size); for (i=0; i < 63; i++) *************** *** 12102,12106 **** * If there is only one kernel, and no size restrictions, just generate a macro */ ! if (!kb->next && kb->minN < 2 && kb->minM < 2) { fprintf(fpout, "#define ATL_GEN@(rt) %s\n", kb->rout); --- 12228,12232 ---- * If there is only one kernel, and no size restrictions, just generate a macro */ ! if (gp == kb) { fprintf(fpout, "#define ATL_GEN@(rt) %s\n", kb->rout); *************** *** 12135,12145 **** for (kp=kb; kp; kp = kp->next) { ! if (kp->minM > 0 && kp->minN > 0) ! fprintf(fpout, "%sif (M > %d && N > %d)\n", spc, kp->minM, kp->minN); else if (kp->minM > 0) fprintf(fpout, "%sif (M > %d)\n", spc, kp->minM); ! else if (kp->minN > 0) ! fprintf(fpout, "%sif (N > %d)\n", spc, kp->minN); ! if (kp->minM > 0 || kp->minN > 0) { fprintf(fpout, "%s{\n", spc); --- 12261,12273 ---- for (kp=kb; kp; kp = kp->next) { ! int minN = FLAG_IS_SET(kp->flag, F@up@(ru)_FNU) ? ! Mmax(kp->minN, kp->nu) : kp->minN; ! if (kp->minM > 0 && minN > 0) ! fprintf(fpout, "%sif (M > %d && N > %d)\n", spc, kp->minM, minN); else if (kp->minM > 0) fprintf(fpout, "%sif (M > %d)\n", spc, kp->minM); ! else if (minN > 0) ! fprintf(fpout, "%sif (N > %d)\n", spc, minN); ! if (kp->minM > 0 || minN > 0) { fprintf(fpout, "%s{\n", spc); *************** *** 12176,12180 **** spc -= 3; } - HERE HERE HERE if (kp->ldamul > size) { --- 12304,12307 ---- *************** *** 12188,12191 **** --- 12315,12329 ---- spc -= 3; } + GenKernPeelN(kp, gp, pre, beta, fpout, spc, size); + HERE HERE + /* + * If ldamul prevents our using good kernel, can pump ldamul up and + * call kernel multiple times. Need a lot of logic here. + */ + if (kp->ldamul > size) + { + spc += 3; + fprintf(fpout, "%s} /* end ldamul check */\n", spc); + } /* |