[Math-atlas-commits] CVS: AtlasBase/Clint atlas-lvl2.base, 1.90, 1.91 atlas-make.base, 1.260, 1.261
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2010-02-18 01:12:54
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv3750/Clint Modified Files: atlas-lvl2.base atlas-make.base Log Message: Index: atlas-lvl2.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-lvl2.base,v retrieving revision 1.90 retrieving revision 1.91 diff -C2 -d -r1.90 -r1.91 *** atlas-lvl2.base 17 Feb 2010 18:26:48 -0000 1.90 --- atlas-lvl2.base 18 Feb 2010 00:15:41 -0000 1.91 *************** *** 12668,12673 **** } ! @ROUT r1hgen ! @extract -b @(topd)/gen.inc what=cw @(cw09) #include <stdio.h> #include <stdlib.h> --- 12668,12673 ---- } ! @ROUT r1hgen r2hgen ! @extract -b @(topd)/cw.inc lang=c -define date 2009 -define date 2010 #include <stdio.h> #include <stdlib.h> *************** *** 12681,12691 **** @extract -b @(basd)/atlas.base rout=Mylcm ! /* ! * For SYR and SYR2, generate a macro which does * a small NUxNU ! * triangular matrix so that GER kernel can be called ! * on rest of NU-wide panel. ! */ ! @whiledef su 1 2 ! @addkeys RX=@(su) void UnrollSYR@(su) ( --- 12681,12689 ---- @extract -b @(basd)/atlas.base rout=Mylcm ! @ROUT r1hgen ! @define su @1@ ! @ROUT r2hgen ! @define su @2@ ! @ROUT r1hgen r2hgen void UnrollSYR@(su) ( *************** *** 12697,12700 **** --- 12695,12701 ---- ) /* + * For SYR and SYR2, generate a macro which does a small NUxNU + * triangular matrix so that GER kernel can be called + * on rest of NU-wide panel. * Real precision unroll of SYR@(su) */ *************** *** 12718,12728 **** for (j=0; j < nu; j++) for (i=0; i <= j; i++) ! @RX 2 fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_ + y%d_*x%d_; \\\n", j, i, i, j, i, j); ! @RX 1 fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_; \\\n", j, i, i, j); ! @RX 1 2 } else --- 12719,12729 ---- for (j=0; j < nu; j++) for (i=0; i <= j; i++) ! @ROUT r2hgen fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_ + y%d_*x%d_; \\\n", j, i, i, j, i, j); ! @ROUT r1hgen fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_; \\\n", j, i, i, j); ! @ROUT r2hgen r1hgen } else *************** *** 12730,12740 **** for (j=0; j < nu; j++) for (i=j; i < nu; i++) ! @RX 2 fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_ + y%d_*x%d_; \\\n", j, i, i, j, i, j); ! @RX 1 fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_; \\\n", j, i, i, j); ! @RX 1 2 } fprintf(fpout, "}\n"); --- 12731,12741 ---- for (j=0; j < nu; j++) for (i=j; i < nu; i++) ! @ROUT r2hgen fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_ + y%d_*x%d_; \\\n", j, i, i, j, i, j); ! @ROUT r1hgen fprintf(fpout, " aa[lda%d_+%d] += x%d_*y%d_; \\\n", j, i, i, j); ! @ROUT r1hgen r2hgen } fprintf(fpout, "}\n"); *************** *** 12755,12763 **** int i, j; ! @RX 2 fprintf(fpout, "#define %s(A_, lda_, x_, y_, xt_, yt_) \\\n{ \\\n", name); ! @RX 1 fprintf(fpout, "#define %s(A_, lda_, x_, xt_) \\\n{ \\\n", name); ! @RX 1 2 fprintf(fpout, " TYPE *aa=(A_); \\\n"); fprintf(fpout, " ATL_CINT lda0_ = 0"); --- 12756,12764 ---- int i, j; ! @ROUT r2hgen fprintf(fpout, "#define %s(A_, lda_, x_, y_, xt_, yt_) \\\n{ \\\n", name); ! @ROUT r1hgen fprintf(fpout, "#define %s(A_, lda_, x_, xt_) \\\n{ \\\n", name); ! @ROUT r1hgen r2hgen fprintf(fpout, " TYPE *aa=(A_); \\\n"); fprintf(fpout, " ATL_CINT lda0_ = 0"); *************** *** 12770,12774 **** for (i=1; i < nu; i++) fprintf(fpout, ", xt%dr=(xt_)[%d], xt%di=(xt_)[%d]", i, 2*i, i, 2*i+1); ! @RX 2 fprintf(fpout, "; \\\n const TYPE y0r=*(y_), y0i=(y_)[1]"); for (i=1; i < nu; i++) --- 12771,12775 ---- for (i=1; i < nu; i++) fprintf(fpout, ", xt%dr=(xt_)[%d], xt%di=(xt_)[%d]", i, 2*i, i, 2*i+1); ! @ROUT r2hgen fprintf(fpout, "; \\\n const TYPE y0r=*(y_), y0i=(y_)[1]"); for (i=1; i < nu; i++) *************** *** 12777,12781 **** for (i=1; i < nu; i++) fprintf(fpout, ", yt%dr=(yt_)[%d], yt%di=(yt_)[%d]", i, 2*i, i, 2*i+1); ! @RX 1 2 fprintf(fpout, "; \\\n"); if (Uplo == AtlasUpper) --- 12778,12782 ---- for (i=1; i < nu; i++) fprintf(fpout, ", yt%dr=(yt_)[%d], yt%di=(yt_)[%d]", i, 2*i, i, 2*i+1); ! @ROUT r1hgen r2hgen fprintf(fpout, "; \\\n"); if (Uplo == AtlasUpper) *************** *** 12785,12789 **** for (i=0; i < j; i++) { ! @RX 2 fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", --- 12786,12790 ---- for (i=0; i < j; i++) { ! @ROUT r2hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", *************** *** 12792,12811 **** " aa[lda%d_+%d] += x%dr*yt%di+x%di*yt%dr + y%dr*xt%di+y%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j, i, j, i, j); ! @RX 1 fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*i, i, j, i, j); fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%di+x%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j, i, j, i, j); ! @RX 1 2 } ! @RX 2 fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", j, 2*j, j, j, j, j, j, j, j, j); ! @RX 1 fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*j, j, j, j, j); ! @RX 1 2 fprintf(fpout, " aa[lda%d_+%d] = 0.0; \\\n", j, 2*j+1); } --- 12793,12812 ---- " aa[lda%d_+%d] += x%dr*yt%di+x%di*yt%dr + y%dr*xt%di+y%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j, i, j, i, j); ! @ROUT r1hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*i, i, j, i, j); fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%di+x%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j, i, j, i, j); ! @ROUT r1hgen r2hgen } ! @ROUT r2hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", j, 2*j, j, j, j, j, j, j, j, j); ! @ROUT r1hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*j, j, j, j, j); ! @ROUT r1hgen r2hgen fprintf(fpout, " aa[lda%d_+%d] = 0.0; \\\n", j, 2*j+1); } *************** *** 12815,12830 **** for (j=0; j < nu; j++) { ! @RX 2 fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", j, 2*j, j, j, j, j, j, j, j, j); ! @RX 1 fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*j, j, j, j, j); ! @RX 1 2 fprintf(fpout, " aa[lda%d_+%d] = 0.0; \\\n", j, 2*j+1); for (i=j+1; i < nu; i++) { ! @RX 2 fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", --- 12816,12831 ---- for (j=0; j < nu; j++) { ! @ROUT r2hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", j, 2*j, j, j, j, j, j, j, j, j); ! @ROUT r1hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*j, j, j, j, j); ! @ROUT r1hgen r2hgen fprintf(fpout, " aa[lda%d_+%d] = 0.0; \\\n", j, 2*j+1); for (i=j+1; i < nu; i++) { ! @ROUT r2hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*yt%dr-x%di*yt%di + y%dr*xt%dr-y%di*xt%di; \\\n", *************** *** 12833,12842 **** " aa[lda%d_+%d] += x%dr*yt%di+x%di*yt%dr + y%dr*xt%di+y%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j, i, j, i, j); ! @RX 1 fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*i, i, j, i, j); fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%di+x%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j); ! @RX 1 2 } } --- 12834,12843 ---- " aa[lda%d_+%d] += x%dr*yt%di+x%di*yt%dr + y%dr*xt%di+y%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j, i, j, i, j); ! @ROUT r1hgen fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%dr-x%di*xt%di; \\\n", j, 2*i, i, j, i, j); fprintf(fpout, " aa[lda%d_+%d] += x%dr*xt%di+x%di*xt%dr; \\\n", j, 2*i+1, i, j, i, j); ! @ROUT r1hgen r2hgen } } *************** *** 12844,12849 **** fprintf(fpout, "}\n"); } - @killkeys RX - @endwhile int FixMB(char pre, int mu, int mb) --- 12845,12848 ---- *************** *** 12877,12880 **** --- 12876,12880 ---- } + @ROUT r1hgen void s1hgen ( *************** *** 12997,13000 **** --- 12997,13001 ---- } + @ROUT r2hgen void s2hgen ( *************** *** 13125,13128 **** --- 13126,13130 ---- fclose(fpout); } + @ROUT r1hgen r2hgen void PrintPrototype(FILE *fpout, char pre, char *rout, char *type, char *styp) *************** *** 13153,13157 **** } else ! sprintf(ln, "cp R1CASES/%s %s/%s.c\n", r1p->rout, path, r1p->kname); if (system(ln)) { --- 13155,13159 ---- } else ! sprintf(ln, "cp R@(su)CASES/%s %s/%s.c\n", r1p->rout, path, r1p->kname); if (system(ln)) { *************** *** 13208,13212 **** __LINE__, __FILE__); fprintf(fpout, "include Make.inc\n\n"); ! fprintf(fpout, "R1CC = $(%cKC)\nR1FLAGS = $(CDEFS) $(%cKCFLAGS)", UPRE, UPRE); fprintf(fpout, " -D%s\n\n", typD); --- 13210,13214 ---- __LINE__, __FILE__); fprintf(fpout, "include Make.inc\n\n"); ! fprintf(fpout, "R@(su)CC = $(%cKC)\nR@(su)FLAGS = $(CDEFS) $(%cKCFLAGS)", UPRE, UPRE); fprintf(fpout, " -D%s\n\n", typD); *************** *** 13243,13252 **** fprintf(fpout, "\t %s", r1p->comp); else ! fprintf(fpout, "\t $(R1CC)"); fprintf(fpout, " -o %s.o -c -DATL_UGERK=%s", r1p->kname, r1p->kname); if (r1p->cflags) fprintf(fpout, " %s -D%s", r1p->cflags, typD); else ! fprintf(fpout, " $(R1FLAGS)"); fprintf(fpout, " %s.c\n", r1p->kname); } --- 13245,13254 ---- fprintf(fpout, "\t %s", r1p->comp); else ! fprintf(fpout, "\t $(R@(su)CC)"); fprintf(fpout, " -o %s.o -c -DATL_UGERK=%s", r1p->kname, r1p->kname); if (r1p->cflags) fprintf(fpout, " %s -D%s", r1p->cflags, typD); else ! fprintf(fpout, " $(R@(su)FLAGS)"); fprintf(fpout, " %s.c\n", r1p->kname); } *************** *** 13255,13259 **** } ! void r1khgen(char pre, char *path, ATL_r1node_t *r1b) { char *ln; --- 13257,13261 ---- } ! void r@(su)khgen(char pre, char *path, ATL_r1node_t *r1b) { char *ln; *************** *** 13277,13281 **** i = strlen(path); ln = malloc(i+32*sizeof(char)); ! sprintf(ln, "%s/atlas_%cr1kernels.h", path, pre); fpout = fopen(ln, "w"); --- 13279,13283 ---- i = strlen(path); ln = malloc(i+32*sizeof(char)); ! sprintf(ln, "%s/atlas_%cr@(su)kernels.h", path, pre); fpout = fopen(ln, "w"); *************** *** 13283,13301 **** __LINE__, __FILE__); fprintf(fpout, ! "#ifndef ATLAS_%cR1KERNELS_H\n #define ATLAS_%cR1KERNELS_H\n\n", PRE, PRE); PrintPrototypes(pre, fpout, r1b); - @beginskip - for (r1p=r1b; r1p; r1p = r1p->next) - PrintPrototype(fpout, pre, r1p->str, type, styp); - if (aliases) - { - fprintf(fpout, "\n"); - for (i=0; aliases[i]; i += 2) - fprintf(fpout, "#define %-24s %s\n", aliases[i], aliases[i+1]); - - } - @endskip fprintf(fpout, "\n#endif /* end guard around atlas_%cr1kernels.h */\n", pre); --- 13285,13292 ---- __LINE__, __FILE__); fprintf(fpout, ! "#ifndef ATLAS_%cR@(su)KERNELS_H\n #define ATLAS_%cR@(su)KERNELS_H\n\n", PRE, PRE); PrintPrototypes(pre, fpout, r1b); fprintf(fpout, "\n#endif /* end guard around atlas_%cr1kernels.h */\n", pre); *************** *** 13349,13353 **** spcs[127] = '\0'; ! fprintf(fpout, "static ATL_r1kern_t ATL_GetR1Kern\n"); fprintf(fpout, " (ATL_CINT M, ATL_CINT N, void *A, ATL_CINT lda, \n"); fprintf(fpout, --- 13340,13344 ---- spcs[127] = '\0'; ! fprintf(fpout, "static ATL_r1kern_t ATL_GetR@(su)Kern\n"); fprintf(fpout, " (ATL_CINT M, ATL_CINT N, void *A, ATL_CINT lda, \n"); fprintf(fpout, *************** *** 13439,13443 **** } ! void r1hgen(char pre, char *path, int LVL, ATL_r1node_t *kb) { ATL_r1node_t *kp, *kur; --- 13430,13434 ---- } ! void r@(su)hgen(char pre, char *path, int LVL, ATL_r1node_t *kb) { ATL_r1node_t *kp, *kur; *************** *** 13464,13470 **** assert(sp); if (!LVL) ! sprintf(sp, "%s/atlas_%cr1.h", path, pre); else ! sprintf(sp, "%s/atlas_%cr1_L%d.h", path, pre, LVL); fpout = fopen(sp, "w"); free(sp); --- 13455,13461 ---- assert(sp); if (!LVL) ! sprintf(sp, "%s/atlas_%cr@(su).h", path, pre); else ! sprintf(sp, "%s/atlas_%cr@(su)_L%d.h", path, pre, LVL); fpout = fopen(sp, "w"); free(sp); *************** *** 13472,13476 **** styp = Pre2ScalarType(pre); ! fprintf(fpout, "#ifndef ATLAS_%cR1_L%d_H\n#define ATLAS_%cR1_L%d_H\n\n", PRE, LVL, PRE, LVL); --- 13463,13467 ---- styp = Pre2ScalarType(pre); ! fprintf(fpout, "#ifndef ATLAS_%cR@(su)_L%d_H\n#define ATLAS_%cR@(su)_L%d_H\n\n", PRE, LVL, PRE, LVL); *************** *** 13581,13585 **** { *FNAM = malloc(16*sizeof(char)); ! sprintf(*FNAM, "res/%cR1K.sum", pre); } } --- 13572,13576 ---- { *FNAM = malloc(16*sizeof(char)); ! sprintf(*FNAM, "res/%cR@(su)K.sum", pre); } } *************** *** 13606,13610 **** r1b->flag = iflag; r1b->CacheElts = (l1mul/100.0) * GetL1CacheElts(pre); ! r1hgen(pre, path, -1, r1b); exit(0); } --- 13597,13601 ---- r1b->flag = iflag; r1b->CacheElts = (l1mul/100.0) * GetL1CacheElts(pre); ! r@(su)hgen(pre, path, -1, r1b); exit(0); } *************** *** 13615,13625 **** SetAllR1TypeFlags(pre, r1b); /* - * Find out which are geniune kernels, and which are aliased - */ - @skip r1B = GetSortedUniqueR1Kerns(pre, r1b, aliases); - /* * Generate prototype file for all routines */ ! r1khgen(pre, path, r1b); /* * For each cache level, generate a header file which provides the function --- 13606,13612 ---- SetAllR1TypeFlags(pre, r1b); /* * Generate prototype file for all routines */ ! r@(su)khgen(pre, path, r1b); /* * For each cache level, generate a header file which provides the function *************** *** 13628,13632 **** ATL_R1SplitContexts(r1b, bases, bases+1, bases+2, bases+3); for (i=0; i < 3; i++) ! r1hgen(pre, path, i, bases[i]); r1b = ATL_R1LinkContexts(bases[0], bases[1], bases[2], bases[3]); /* --- 13615,13619 ---- ATL_R1SplitContexts(r1b, bases, bases+1, bases+2, bases+3); for (i=0; i < 3; i++) ! r@(su)hgen(pre, path, i, bases[i]); r1b = ATL_R1LinkContexts(bases[0], bases[1], bases[2], bases[3]); /* *************** *** 14030,14037 **** getX = (COPYX) ? Mjoin(PATL,cpsc) : NULL; } ! @ROUT ATL_gerk_axpy #include "atlas_misc.h" #include "atlas_level1.h" #ifdef Conj_ void Mjoin(PATL,gerck_axpy) --- 14017,14089 ---- getX = (COPYX) ? Mjoin(PATL,cpsc) : NULL; } ! @ROUT ATL_ger2k_Nlt8 #include "atlas_misc.h" #include "atlas_level1.h" + #ifdef Conj_ + void Mjoin(PATL,ger2ck_Nlt8) + #else + void Mjoin(PATL,ger2k_Nlt8) + #endif + (ATL_CINT M, ATL_CINT N, const SCALAR alpha0, const TYPE *X0, + ATL_CINT incX0, const TYPE *Y0, ATL_CINT incY0, + const SCALAR alpha1, const TYPE *X1, ATL_CINT incX1, const TYPE *Y1, + ATL_CINT incY1, TYPE *A, ATL_CINT lda) + /* + * This routine typically called when N is very small, and so we can't afford + * to copy the vectors even if M is large; in this case we simply address + * the columns of A one-by-one with simple loops. + */ + { + #ifdef TCPLX + const TYPE *xp0, *xp1; + const TYPE al0r = *alpha0, al0i = alpha0[1]; + const TYPE al1r = *alpha1, al1i = alpha1[1]; + ATL_CINT incA=(lda-M)<<1, incY02=incY0+incY0, incY12=incY1+incY1; + ATL_CINT incX02=incX0+incX0, incX12=incX1+incX1; + ATL_INT i, j; + register TYPE y0r, y0i, y1r, y1i, x0r, x0i, x1r, x1i; + + for (j=0; j < N; j++, A += lda, Y0 += incY02, Y1 += incY12) + { + #ifdef Conj_ + y0r = *Y0; y0i = -Y0[1]; + y1r = *Y1; y1i = -Y1[1]; + #else + y0r = *Y0; y0i = Y0[1]; + y1r = *Y1; y1i = Y1[1]; + #endif + x0r = al0r * y0r - al0i * y0i; + y0i = al0r * y0i + al0i * y0r; + y0r = x0r; + x0r = al1r * y1r - al1i * y1i; + y1i = al1r * y1i + al1i * y1r; + y1r = x0r; + for (i=0; i < M; i++, A += 2, xp0 += incX02, xp0 += incX12) + { + x0r = *X0; x0i = X0[1]; + x1r = *X1; x1i = X1[1]; + *A += x0r * y0r - x0i * y0i; + A[1] += x0r * y0i + x0i * y0r; + } + } + #else + ATL_INT i, j; + const TYPE *xp0, *xp1; + register TYPE y0, y1; + for (j=0; j < N; j++, A += lda, Y0 += incY0, Y1 += incY1) + { + y0 = alpha0 * *Y0; + y1 = alpha1 * *Y1; + xp0 = X0; + xp1 = X1; + for (i=0; i < M; i++, xp0 += incX0, xp0 += incX1) + A[i] += *xp0 * y0 + *xp1 * y1; + } + #endif + } + @ROUT ATL_gerk_axpy + #include "atlas_misc.h" + #include "atlas_level1.h" #ifdef Conj_ void Mjoin(PATL,gerck_axpy) *************** *** 14072,14094 **** #endif } @ROUT ATL_gerk_Mlt16 #include "atlas_misc.h" typedef void (*gerk_t) (const int M, const int N, const SCALAR alpha, const TYPE *X, const int incX, const TYPE *Y, const int incY, TYPE *A, const int lda); #ifdef TREAL @BEGINPROC gerkN M static void ATL_gerk_Meq@(M) (const int M, const int N, const SCALAR alpha, const TYPE *X, const int incX, const TYPE *Y, const int incY, TYPE *A, const int lda) { register int j; @declare " register TYPE " n n ";" ! y0 @define i @0@ @iwhile i < @(M) ! x@(i) @iexp i 1 @(i) + @endiwhile --- 14124,14166 ---- #endif } + @ROUT ATL_ger2k_Mlt16 + @define rt @ger2@ @ROUT ATL_gerk_Mlt16 + @define rt @ger@ + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 #include "atlas_misc.h" + @ROUT ATL_ger2k_Mlt16 + typedef void (*ger2k_t) + (ATL_CINT M, ATL_CINT N, const SCALAR alp_a, const TYPE *Xa, ATL_CINT incXa, + const TYPE *Ya, ATL_CINT incYa, const SCALAR alp_b, const TYPE *Xb, + ATL_CINT incXb, const TYPE *Yb, ATL_CINT incYb, TYPE *A, const int lda); + @ROUT ATL_gerk_Mlt16 typedef void (*gerk_t) (const int M, const int N, const SCALAR alpha, const TYPE *X, const int incX, const TYPE *Y, const int incY, TYPE *A, const int lda); + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 #ifdef TREAL @BEGINPROC gerkN M + @ROUT ATL_gerk_Mlt16 static void ATL_gerk_Meq@(M) (const int M, const int N, const SCALAR alpha, const TYPE *X, const int incX, const TYPE *Y, const int incY, TYPE *A, const int lda) + @ROUT ATL_ger2k_Mlt16 + static void ATL_ger2k_Meq@(M) + (ATL_CINT M, ATL_CINT N, const SCALAR alp_a, const TYPE *Xa, ATL_CINT incXa, + const TYPE *Ya, const int incYa, const SCALAR alp_b, const TYPE *Xb, + ATL_CINT incXb, const TYPE *Yb, ATL_CINT incYb, TYPE *A, const int lda) + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 { register int j; @declare " register TYPE " n n ";" ! @ROUT ATL_gerk_Mlt16 `y0` ! @ROUT ATL_ger2k_Mlt16 `y0a y0b` @define i @0@ @iwhile i < @(M) ! @ROUT ATL_gerk_Mlt16 `x@(i)` ! @ROUT ATL_ger2k_Mlt16 `x@(i)a x@(i)b` @iexp i 1 @(i) + @endiwhile *************** *** 14096,14099 **** --- 14168,14184 ---- @enddeclare + @ROUT ATL_ger2k_Mlt16 + @whiledef alp a b + if (alp_@(alp) == ATL_rone) goto ALP_@up@(alp)_1; + else if (alp_@(alp) != ATL_rnone) goto ALP_@up@(alp)_X; + @define i @0@ + @iwhile i < @(M) + x@(i)@(alp) = -X@(alp)[@(i)*incX@(alp)]; + @iexp i 1 @(i) + + @endiwhile + @undef i + X@up@(alp)_IS_LOADED: + @endwhile + @ROUT ATL_gerk_Mlt16 if (alpha == ATL_rone) goto ALPHA1; else if (alpha != ATL_rnone) goto ALPHAX; *************** *** 14105,14115 **** @undef i X_IS_LOADED: for (j=0; j < N; j++, A += lda) { y0 = Y[incY*j]; @define i @0@ @iwhile i < @(M) ! A[@(i)] += x@(i) * y0; @iexp i 1 @(i) + @endiwhile --- 14190,14207 ---- @undef i X_IS_LOADED: + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 for (j=0; j < N; j++, A += lda) { + @ROUT ATL_gerk_Mlt16 y0 = Y[incY*j]; + @ROUT ATL_ger2k_Mlt16 + y0a = Ya[incYa*j]; + y0b = Yb[incYb*j]; + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 @define i @0@ @iwhile i < @(M) ! @ROUT ATL_gerk_Mlt16 ` A[@(i)] += x@(i) * y0;` ! @ROUT ATL_ger2k_Mlt16 ` A[@(i)] += x@(i)a * y0a + x@(i)b * y0b;` @iexp i 1 @(i) + @endiwhile *************** *** 14118,14121 **** --- 14210,14214 ---- return; + @ROUT ATL_gerk_Mlt16 ALPHAX: @define i @0@ *************** *** 14136,14139 **** --- 14229,14253 ---- goto X_IS_LOADED; } + @ROUT ATL_ger2k_Mlt16 + @whiledef ab a b + ALP_@up@(ab)_X: + @define i @0@ + @iwhile i < @(M) + y0a = alp_@(ab); + x@(i)@(ab) = X@(ab)[@(i)*incX@(ab)] * y0a; + @iexp i 1 @(i) + + @endiwhile + @undef i + goto X@up@(ab)_IS_LOADED; + ALP_@up@(ab)_1: + @define i @0@ + @iwhile i < @(M) + x@(i)@(ab) = X@(ab)[@(i)*incX@(ab)]; + @iexp i 1 @(i) + + @endiwhile + @undef i + goto X@up@(ab)_IS_LOADED; + @endwhile + } @ENDPROC *************** *** 14145,14161 **** @undef j ! void Mjoin(PATL,gerk_Mlt16) (const int M, const int N, const SCALAR alpha, const TYPE *X, const int incX, const TYPE *Y, const int incY, TYPE *A, const int lda) /* ! * ATLAS's normal GER kernels are optimized for long-M, and loop over rows in * the inner loop. To avoid this killing us on short, wide matrices, have * special case code for M < 16. This also allows kernels to assume M >= 16. */ { ! @declare " static gerk_t gerks[15]={" y n "};" @define i @1@ @iwhile i < 16 ! ATL_gerk_Meq@(i) @iexp i 1 @(i) + @endiwhile --- 14259,14282 ---- @undef j ! void Mjoin(PATL,@(rt)k_Mlt16) ! @ROUT ATL_ger2k_Mlt16 ! (const int M, const int N, const SCALAR alp_a, const TYPE *Xa, ! const int incXa, const TYPE *Ya, const int incYa, const SCALAR alp_b, ! const TYPE *Xb, const int incXb, const TYPE *Yb, const int incYb, ! TYPE *A, const int lda) ! @ROUT ATL_gerk_Mlt16 (const int M, const int N, const SCALAR alpha, const TYPE *X, const int incX, const TYPE *Y, const int incY, TYPE *A, const int lda) + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 /* ! * ATLAS's normal @up@(rt) kernels are optimized for long-M, and loop over rows in * the inner loop. To avoid this killing us on short, wide matrices, have * special case code for M < 16. This also allows kernels to assume M >= 16. */ { ! @declare " static @(rt)k_t @(rt)ks[15]={" y n "};" @define i @1@ @iwhile i < 16 ! ATL_@(rt)k_Meq@(i) @iexp i 1 @(i) + @endiwhile *************** *** 14163,14170 **** --- 14284,14317 ---- @enddeclare ATL_assert(M < 16); + @ROUT ATL_ger2k_Mlt16 + ger2ks[M-1](M, N, alp_a, Xa, incXa, Ya, incYa, + alp_b, Xb, incXb, Yb, incYb, A, lda); + @ROUT ATL_gerk_Mlt16 gerks[M-1](M, N, alpha, X, incX, Y, incY, A, lda); + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 } #else /* complex type */ + @ROUT ATL_ger2k_Mlt16 + #ifdef Conj_ + void Mjoin(PATL,ger2ck_Mlt16) + #else + void Mjoin(PATL,ger2k_Mlt16) + #endif + (ATL_CINT M, ATL_CINT N, const SCALAR alp_a, const TYPE *Xa, ATL_CINT incXa, const TYPE *Ya, ATL_CINT incYa, SCALAR alp_b, const TYPE *Xb, + ATL_CINT incXb, const TYPE *Yb, ATL_CINT incYb, TYPE *A, ATL_CINT lda) + { + /* + * For now, complex simply calls loop-based routine. + */ + #ifdef Conj_ + Mjoin(PATL,ger2ck_Nlt8)(M, N, alp_a, Xa, incXa, Ya, incYa, + alp_b, Xb, incXb, Yb, incYb, A, lda); + #else + Mjoin(PATL,ger2k_Nlt8)(M, N, alp_a, Xa, incXa, Ya, incYa, + alp_b, Xb, incXb, Yb, incYb, A, lda); + #endif + } + @ROUT ATL_gerk_Mlt16 #ifdef Conj_ void Mjoin(PATL,gerck_Mlt16) *************** *** 14184,14187 **** --- 14331,14335 ---- #endif } + @ROUT ATL_gerk_Mlt16 ATL_ger2k_Mlt16 #endif @ROUT gemvT_oc Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.260 retrieving revision 1.261 diff -C2 -d -r1.260 -r1.261 *** atlas-make.base 16 Feb 2010 22:21:02 -0000 1.260 --- atlas-make.base 18 Feb 2010 00:15:45 -0000 1.261 *************** *** 1123,1130 **** ln -s $(SYSdir)/res/L1CacheSize res/L1CacheSize ! ATL_ger.c : rm -f ATL_ger.c echo "#define ATL_TUNING" > ATL_ger.c cat $(r1SRCdir)/ATL_ger.c >> ATL_ger.c @multidef typ DCPLX SCPLX DREAL SREAL --- 1123,1134 ---- ln -s $(SYSdir)/res/L1CacheSize res/L1CacheSize ! ATL_ger.c : $(r1SRCdir)/ATL_ger.c rm -f ATL_ger.c echo "#define ATL_TUNING" > ATL_ger.c cat $(r1SRCdir)/ATL_ger.c >> ATL_ger.c + ATL_ger2.c : $(r1SRCdir)/ATL_ger2.c + rm -f ATL_ger2.c + echo "#define ATL_TUNING" > ATL_ger2.c + cat $(r1SRCdir)/ATL_ger2.c >> ATL_ger2.c @multidef typ DCPLX SCPLX DREAL SREAL *************** *** 1145,1148 **** --- 1149,1153 ---- @(pre)@(rt)obj = ATL_@(pre)ger.o ATL_@(pre)ge@(rt)k.o \ ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o + ATL_@(pre)ger2k_Mlt16.o ATL_@(pre)gerk_Nlt8.o @ptyp ! @undef r0 *************** *** 1152,1157 **** # This target is for directly timing a call to the kernel, w/o cache flushing # @whiledef rt r1 r2 ! @(pre)@(rt)ktime : @(pre)@(rt)ktime.o ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o rm -f ATL_@(pre)ge@(rt).c cp -f $(mySRCdir)/@up@(rt)CASES/$(@(rt)rout) ATL_@(pre)ge@(rt).c --- 1157,1166 ---- # This target is for directly timing a call to the kernel, w/o cache flushing # + @define degkern @ATL_@(pre)ger2k_Mlt16.o ATL_@(pre)ger2k_Nlt8.o@ + @define degkern @ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o@ + @multidef rk 1 2 @whiledef rt r1 r2 ! @addkeys rank=@rt ! @(pre)@(rt)ktime : @(pre)@(rt)ktime.o @(degkern) rm -f ATL_@(pre)ge@(rt).c cp -f $(mySRCdir)/@up@(rt)CASES/$(@(rt)rout) ATL_@(pre)ge@(rt).c *************** *** 1160,1164 **** $(CLINKER) $(CLINKFLAGS) -o x@(pre)@(rt)ktime @(pre)@(rt)ktime.o \ ATL_@(pre)ge@(rt).o \ ! ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o \ $(TESTlib) $(ATLASlib) $(ATLRUN) $(R1Tdir) x@(pre)@(rt)ktime -m $(M) -n $(N) -l $(lda) \ --- 1169,1173 ---- $(CLINKER) $(CLINKFLAGS) -o x@(pre)@(rt)ktime @(pre)@(rt)ktime.o \ ATL_@(pre)ge@(rt).o \ ! ` @(degkern) \` $(TESTlib) $(ATLASlib) $(ATLRUN) $(R1Tdir) x@(pre)@(rt)ktime -m $(M) -n $(N) -l $(lda) \ *************** *** 1209,1212 **** --- 1218,1224 ---- @(pre)ge@(rt)tune.o : $(mySRCdir)/ge@(rt)tune.c $(ICC) -c $(ICCFLAGS) -o $@ -D@(typ) $(mySRCdir)/ge@(rt)tune.c + @undef rk + @undef degkern + @killkeys rank @endwhile ATL_@(pre)ger.o : ATL_ger.c atlas_@(pre)r1.h *************** *** 1215,1221 **** $(ICC) -c $(ICCFLAGS) -o $@ -D@(typ) -I$(R1Tdir) ATL_ger2.c @ptyp z c `@define be @Xi0@` ! @whiledef suf Mlt16 axpy @whiledef rt r r2 ! ATL_@(pre)ge@(rt)k_@(suf).o : $(r1SRCdir)/ATL_ge@(rt)k_@(suf).c $(@up@(upr)KC) -c $(@up@(upr)KCFLAGS) $(CDEFS) -I$(R1Tdir) \ -o $@ -D@(typ) $(r1SRCdir)/ATL_ge@(rt)k_@(suf).c --- 1227,1233 ---- $(ICC) -c $(ICCFLAGS) -o $@ -D@(typ) -I$(R1Tdir) ATL_ger2.c @ptyp z c `@define be @Xi0@` ! @whiledef suf Mlt16 axpy Nlt8 @whiledef rt r r2 ! ATL_@(pre)ge(rt)k_@(suf).o : $(r1SRCdir)/ATL_ge@(rt)k_@(suf).c $(@up@(upr)KC) -c $(@up@(upr)KCFLAGS) $(CDEFS) -I$(R1Tdir) \ -o $@ -D@(typ) $(r1SRCdir)/ATL_ge@(rt)k_@(suf).c *************** *** 3923,3927 **** @whiledef pre d s @(pre)obj = ATL_@(pre)ger.o ATL_@(pre)ger_L1.o ATL_@(pre)ger_L2.o \ ! ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o @endwhile @whiledef pre z c --- 3935,3940 ---- @whiledef pre d s @(pre)obj = ATL_@(pre)ger.o ATL_@(pre)ger_L1.o ATL_@(pre)ger_L2.o \ ! ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o \ ! ATL_@(pre)ger2k_Mlt16.o ATL_@(pre)ger2k_Nlt8.o @endwhile @whiledef pre z c *************** *** 3930,3934 **** ATL_@(pre)geru_L2.o ATL_@(pre)gerc_L2.o \ ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o \ ! ATL_@(pre)gerck_Mlt16.o ATL_@(pre)gerck_axpy.o @endwhile --- 3943,3949 ---- ATL_@(pre)geru_L2.o ATL_@(pre)gerc_L2.o \ ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o \ ! ATL_@(pre)gerck_Mlt16.o ATL_@(pre)gerck_axpy.o \ ! ATL_@(pre)ger2k_Mlt16.o ATL_@(pre)ger2k_Nlt8.o \ ! ATL_@(pre)ger2ck_Mlt16.o ATL_@(pre)ger2ck_Nlt8.o @endwhile *************** *** 3961,3966 **** ATL_@(pre)ger.o : $(mySRCdir)/ATL_ger.c $(INCSdir)/atlas_r1.h $(@up@(pre)KC) -o $@ -c $(@(pre)KCFLAGS) -D@up@(pre)REAL $(mySRCdir)/ATL_ger.c - @skip ATL_@(pre)ger1_a1_x1_yX.o : ATL_@(pre)ger1.c - @skip $(@up@(pre)KC) -o $@ -c $(@(pre)KCFLAGS) -D@up@(pre)REAL ATL_@(pre)ger1.c @endwhile --- 3976,3979 ---- *************** *** 3993,3996 **** --- 4006,4013 ---- $(@up@(pre)KC) -o $@ -c $(@(pre)KCFLAGS) -D@(typ) $(mySRCdir)/ATL_gerk_@(suf).c @endwhile + @whiledef suf Mlt16 Nlt8 + ATL_@(pre)ger2k_@(suf).o : $(mySRCdir)/ATL_ger2k_@(suf).c + $(@up@(pre)KC) -o $@ -c $(@(pre)KCFLAGS) -D@(typ) $(mySRCdir)/ATL_ger2k_@(suf).c + @endwhile @undef typ @endwhile *************** *** 4001,4004 **** --- 4018,4025 ---- $(@up@(pre)KC) -o $@ -c $(@(pre)KCFLAGS) -D@(typ) -DConj_ $(mySRCdir)/ATL_gerk_@(suf).c @endwhile + @whiledef suf Mlt16 Nlt8 + ATL_@(pre)ger2ck_@(suf).o : $(mySRCdir)/ATL_ger2k_@(suf).c + $(@up@(pre)KC) -o $@ -c $(@(pre)KCFLAGS) -D@(typ) -DConj_ $(mySRCdir)/ATL_ger2k_@(suf).c + @endwhile @undef typ @endwhile |