[Math-atlas-commits] CVS: AtlasBase/Clint atlas-l2k.base, 1.7, 1.8 atlas-lvl2.base, 1.78, 1.79 atla
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-07-30 23:43:41
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv22995/Clint Modified Files: atlas-l2k.base atlas-lvl2.base atlas-make.base Log Message: Index: atlas-l2k.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-l2k.base,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** atlas-l2k.base 30 Jul 2009 17:28:49 -0000 1.7 --- atlas-l2k.base 30 Jul 2009 23:43:23 -0000 1.8 *************** *** 997,1000 **** --- 997,1004 ---- #endif @endskip + #define ATL_USEPF + #ifndef PFDIST + #define PFDIST 32 + #endif @define sf @@ @whiledef sf u *************** *** 1043,1056 **** --- 1047,1073 ---- * Do Ar += xr*yr; Ai += xi*yr; for all 4 columns of A */ + + #ifdef ATL_USEPF + _mm_prefetch(A0+PFDIST, _MM_HINT_T0); + #endif xn = x0a; /* xn = {Xi, Xr} */ x0a = _mm_mul_pd(x0a, y0a); /* x0a = {Xi*Yr, Xr*Yr} */ a00 = _mm_add_pd(a00, x0a); /* a00 = {Ai+Xi*Yr, Ar+Xr*Yr} */ x0a = xn; + #ifdef ATL_USEPF + _mm_prefetch(A1+PFDIST, _MM_HINT_T0); + #endif xn = _mm_mul_pd(xn, y1a); /* xn = {Xi*Yr, Xr*Yr} */ a01 = _mm_add_pd(a01, xn); /* a01 = {Ai+Xi*Yr, Ar+Xr*Yr} */ + #ifdef ATL_USEPF + _mm_prefetch(A2+PFDIST, _MM_HINT_T0); + #endif xn = x0a; x0a = _mm_mul_pd(x0a, y2a); /* x0a = {Xi*Yr, Xr*Yr} */ a02 = _mm_add_pd(a02, x0a); /* a02 = {Ai+Xi*Yr, Ar+Xr*Yr} */ x0a = _mm_load@(sf)_pd(X+i); /* x0a = {Xi, Xr} */ + #ifdef ATL_USEPF + _mm_prefetch(A3+PFDIST, _MM_HINT_T0); + #endif xn = _mm_mul_pd(xn, y3a); /* xn = {Xi*Yr, Xr*Yr} */ a03 = _mm_add_pd(a03, xn); /* a02 = {Ai+Xi*Yr, Ar+Xr*Yr} */ Index: atlas-lvl2.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-lvl2.base,v retrieving revision 1.78 retrieving revision 1.79 diff -C2 -d -r1.78 -r1.79 *** atlas-lvl2.base 30 Jul 2009 17:28:50 -0000 1.78 --- atlas-lvl2.base 30 Jul 2009 23:43:23 -0000 1.79 *************** *** 12860,12875 **** for (i=1; i < nu; i++) fprintf(fpout, ", lda%d_ = lda%d_+(lda_)+(lda_)", i, i-1); ! fprintf(fpout, "; \\\n const TYPE"); ! for (i=0; i < nu; i++) ! fprintf(fpout, " x%dr=(x_)[%d], x%di=(x_)[%d]", i, 2*i, i, 2*i+1); ! fprintf(fpout, "; \\\n const TYPE"); ! for (i=0; i < nu; i++) ! fprintf(fpout, " xt%dr=(xt_)[%d], xt%di=(xt_)[%d]", i, 2*i, i, 2*i+1); ! fprintf(fpout, "; \\\n const TYPE"); ! for (i=0; i < nu; i++) ! fprintf(fpout, " y%dr=(y_)[%d], y%di=(y_)[%d]", i, 2*i, i, 2*i+1); ! fprintf(fpout, "; \\\n const TYPE"); ! for (i=0; i < nu; i++) ! fprintf(fpout, " yt%dr=(yt_)[%d], yt%di=(yt_)[%d]", i, 2*i, i, 2*i+1); fprintf(fpout, "; \\\n"); if (Uplo == AtlasUpper) --- 12860,12875 ---- for (i=1; i < nu; i++) fprintf(fpout, ", lda%d_ = lda%d_+(lda_)+(lda_)", i, i-1); ! fprintf(fpout, "; \\\n const TYPE x0r=*(x_), x0i=(x_)[1]"); ! for (i=1; i < nu; i++) ! fprintf(fpout, ", x%dr=(x_)[%d], x%di=(x_)[%d]", i, 2*i, i, 2*i+1); ! fprintf(fpout, "; \\\n const TYPE xt0r=*(xt_), xt0i=(xt_)[1]"); ! for (i=1; i < nu; i++) ! fprintf(fpout, ", xt%dr=(xt_)[%d], xt%di=(xt_)[%d]", i, 2*i, i, 2*i+1); ! fprintf(fpout, "; \\\n const TYPE y0r=*(y_), y0i=(y_)[1]"); ! for (i=1; i < nu; i++) ! fprintf(fpout, ", y%dr=(y_)[%d], y%di=(y_)[%d]", i, 2*i, i, 2*i+1); ! fprintf(fpout, "; \\\n const TYPE yt0r=*(yt_), yt0i=(yt_)[1]"); ! for (i=1; i < nu; i++) ! fprintf(fpout, ", yt%dr=(yt_)[%d], yt%di=(yt_)[%d]", i, 2*i, i, 2*i+1); fprintf(fpout, "; \\\n"); if (Uplo == AtlasUpper) Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.224 retrieving revision 1.225 diff -C2 -d -r1.224 -r1.225 *** atlas-make.base 27 Jul 2009 16:35:32 -0000 1.224 --- atlas-make.base 30 Jul 2009 23:43:23 -0000 1.225 *************** *** 1110,1114 **** # This target is for directly timing a call to the kernel, w/o cache flushing # ! @(pre)r1ktime : @(pre)r1ktime.o rm -f ATL_@(pre)ger1.c cp -f $(mySRCdir)/CASES/$(r1rout) ATL_@(pre)ger1.c --- 1110,1114 ---- # This target is for directly timing a call to the kernel, w/o cache flushing # ! @(pre)r1ktime : @(pre)r1ktime.o ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o rm -f ATL_@(pre)ger1.c cp -f $(mySRCdir)/CASES/$(r1rout) ATL_@(pre)ger1.c *************** *** 1116,1120 **** -D@(typ) -DATL_@(pre)ger1_a1_x1_yX=ATL_UGERK ATL_@(pre)ger1.c $(CLINKER) $(CLINKFLAGS) -o x@(pre)r1ktime @(pre)r1ktime.o \ ! ATL_@(pre)ger1.o $(TESTlib) $(ATLASlib) $(ATLRUN) $(R1Tdir) x@(pre)r1ktime -m $(M) -n $(N) -l $(lda) \ -C $(flushKB) $(align) $(tflags) --- 1116,1122 ---- -D@(typ) -DATL_@(pre)ger1_a1_x1_yX=ATL_UGERK ATL_@(pre)ger1.c $(CLINKER) $(CLINKFLAGS) -o x@(pre)r1ktime @(pre)r1ktime.o \ ! ATL_@(pre)ger1.o \ ! ATL_@(pre)gerk_Mlt16.o ATL_@(pre)gerk_axpy.o \ ! $(TESTlib) $(ATLASlib) $(ATLRUN) $(R1Tdir) x@(pre)r1ktime -m $(M) -n $(N) -l $(lda) \ -C $(flushKB) $(align) $(tflags) |