[Math-atlas-commits] CVS: AtlasBase/Clint atlas-make.base, 1.102, 1.103 atlas.base, 1.107, 1.108
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2007-03-15 16:33:45
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv13333/Clint Modified Files: atlas-make.base atlas.base Log Message: Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.102 retrieving revision 1.103 diff -C2 -d -r1.102 -r1.103 *** atlas-make.base 10 Mar 2007 23:18:53 -0000 1.102 --- atlas-make.base 15 Mar 2007 16:33:35 -0000 1.103 *************** *** 258,261 **** --- 258,263 ---- mkdir include touch include/atlas_cacheedge.h + touch include/atlas_zdNKB.h + touch include/atlas_csNKB.h @beginskip touch include/atlas_zsysinfo.h *************** *** 1434,1437 **** --- 1436,1443 ---- $(MAKE) $(pre)RunFindCE + res/atlas_zdNKB.h : + $(MAKE) zdRunFindCE + res/atlas_csNKB.h : + $(MAKE) csRunFindCE xemit_mm : emit_mm.o *************** *** 1512,1515 **** --- 1518,1541 ---- @endwhile + @multidef typ DCPLX SCPLX + @multidef pr1 z c + @multidef pr2 d s + @whiledef pre zd cs + @(pre)RunFindCE : x@(pre)findCE + $(ATLRUN) $(MMTdir) x@(pre)findCE -f res/atlas_@(pre)NKB.h + cp -f res/atlas_@(pre)NKB.h $(INCAdir)/. + @(pre)findCE.o : $(mySRCdir)/findCE.c + $(ICC) $(ICCFLAGS) -c -D@(typ) -DATL_JITcp -o $@ $(mySRCdir)/findCE.c + + x@(pre)findCE : $(INCAdir)/atlas_type.h $(L3INCdep) @(pr1)mmlib @(pr2)mmlib \ + @(pre)findCE.o + cd $(GMMdir) ; $(MAKE) ATL_@(pre)FindCE_mm.o typ=@(typ) + $(CLINKER) $(CLINKFLAGS) -o $@ @(pre)findCE.o \ + $(GMMdir)/ATL_@(pre)FindCE_mm.o $(ATLASlib) $(LIBS) + @undef typ + @undef pr1 + @undef pr2 + @endwhile + @multidef typ DCPLX SCPLX DREAL SREAL @multidef upr d s d s *************** *** 1527,1533 **** - $(GMMdir)/ATL_@(pre)FindCE_mm.o : - cd $(GMMdir) ; $(MAKE) ATL_@(pre)FindCE_mm.o pre=@(pre) typ=@(typ) - @(pre)findCE.o : $(mySRCdir)/findCE.c $(ICC) $(ICCFLAGS) -c -D@(typ) -o $@ $(mySRCdir)/findCE.c --- 1553,1556 ---- *************** *** 1535,1539 **** x@(pre)findCE : $(INCAdir)/atlas_type.h $(L3INCdep) @(pre)mmlib \ ! $(GMMdir)/ATL_@(pre)FindCE_mm.o @(pre)findCE.o $(CLINKER) $(CLINKFLAGS) -o $@ @(pre)findCE.o \ $(GMMdir)/ATL_@(pre)FindCE_mm.o $(ATLASlib) $(LIBS) --- 1558,1563 ---- x@(pre)findCE : $(INCAdir)/atlas_type.h $(L3INCdep) @(pre)mmlib \ ! @(pre)findCE.o ! cd $(GMMdir) ; $(MAKE) ATL_@(pre)FindCE_mm.o pre=@(pre) typ=@(typ) $(CLINKER) $(CLINKFLAGS) -o $@ @(pre)findCE.o \ $(GMMdir)/ATL_@(pre)FindCE_mm.o $(ATLASlib) $(LIBS) *************** *** 2047,2050 **** --- 2071,2075 ---- - cp -f $(mmdir)/?Xover.h $(INCAdir)/. - cp -f $(mmdir)/atlas_cacheedge.h $(INCAdir)/. + - cp -f $(mmdir)/atlas_??NKB.h $(INCAdir)/. - cp -f $(mmdir)/* $(MMTdir)/res/. touch ISet$(pre)MMDef.grd *************** *** 2198,2201 **** --- 2223,2236 ---- cp $(MMTdir)/res/atlas_cacheedge.h $(INCAdir)/. cp $(MMTdir)/res/atlas_cacheedge.h INSTALL_LOG/. + $(MMTdir)/res/atlas_zdNKB.h : + cd $(MMTdir) ; $(MAKE) res/atlas_zdNKB.h + $(MMTdir)/res/atlas_csNKB.h : + cd $(MMTdir) ; $(MAKE) res/atlas_csNKB.h + INSTALL_LOG/atlas_zdNKB.h : $(MMTdir)/res/atlas_zdNKB.h + cp $(MMTdir)/res/atlas_zdNKB.h $(INCAdir)/. + cp $(MMTdir)/res/atlas_zdNKB.h INSTALL_LOG/. + INSTALL_LOG/atlas_csNKB.h : $(MMTdir)/res/atlas_csNKB.h + cp $(MMTdir)/res/atlas_csNKB.h $(INCAdir)/. + cp $(MMTdir)/res/atlas_csNKB.h INSTALL_LOG/. Il3lib : force_build *************** *** 3882,3891 **** @whiledef TB C T N @whiledef rout gemm ! ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_gemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) \ @(DA) @(DB) $(mySRCdir)/ATL_gemmXX.c @endwhile @whiledef rout Agemm ! ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_AgemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) @(DA) @(DB) \ -DALIASED_GEMM $(mySRCdir)/ATL_AgemmXX.c --- 3917,3936 ---- @whiledef TB C T N @whiledef rout gemm ! ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_gemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h \ ! @PTYP Z ! $(INCAdir)/atlas_dzNKB.h ! @PTYP C ! $(INCAdir)/atlas_csNKB.h ! @PTYP ! $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) \ @(DA) @(DB) $(mySRCdir)/ATL_gemmXX.c @endwhile @whiledef rout Agemm ! ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_AgemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h \ ! @PTYP Z ! $(INCAdir)/atlas_dzNKB.h ! @PTYP C ! $(INCAdir)/atlas_csNKB.h ! @PTYP ! $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) @(DA) @(DB) \ -DALIASED_GEMM $(mySRCdir)/ATL_AgemmXX.c *************** *** 3969,3972 **** --- 4014,4028 ---- @killkeys ptyp @endwhile + @multidef typ DCPLX SCPLX + @multidef pr2 d s + @multidef pr1 z c + @whiledef pre zd cs + ATL_@(pre)FindCE_mm.o : $(mySRCdir)/ATL_gemm.c $(mySRCdir)/ATL_gemmXX.c $(@(pr1)INCdep) \ + $(INCAdir)/@(pr1)Xover.h $(@(pr2)INCdep) + $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) -DFindingJITCPCE -I./ $(mySRCdir)/ATL_gemm.c + @undef typ + @undef pr1 + @undef pr2 + @endwhile ass.grd: - $(MAKE) ATL_dasrc2blk_NB_aX.o ATL_dasrc2blk_NB_a1.o Index: atlas.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas.base,v retrieving revision 1.107 retrieving revision 1.108 diff -C2 -d -r1.107 -r1.108 *** atlas.base 14 Mar 2007 22:49:18 -0000 1.107 --- atlas.base 15 Mar 2007 16:33:36 -0000 1.108 *************** *** 10549,10552 **** --- 10549,10567 ---- #include "atlas_lvl3.h" #include "atlas_cacheedge.h" + #ifdef DCPLX + #include "atlas_zdNKB.h" + #ifdef ATL_DZNKB + #define NKB_maxK (ATL_DZNKB * KB) + #else + #define NKB_maxK 0 + #endif + #elif defined(SCPLX) + #include "atlas_csNKB.h" + #ifdef ATL_CSNKB + #define NKB_maxK (ATL_CSNKB * KB) + #else + #define NKB_maxK 0 + #endif + #endif #ifdef NoTransA_ *************** *** 10610,10613 **** --- 10625,10632 ---- @endskip + #ifdef FindingJITCPCE + #define FindingCE + #endif + ATL_VOID Cgemm__(const int M, const int N, const int K, const SCALAR alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, *************** *** 10617,10626 **** #if defined(CacheEdge) || defined(FindingCE) #ifdef FindingCE ! extern int FoundCE; ! const int CE_maxK = ( (ATL_DivBySize(FoundCE)-(MB*NB SHIFT)) / ! ((MB+NB)*(KB SHIFT)) ) * KB; #else ! static const int CE_maxK = ( (ATL_DivBySize(CacheEdge)-(MB*NB SHIFT))/ ! ((KB SHIFT)*(MB+NB)) )*KB; #endif @beginskip --- 10636,10645 ---- #if defined(CacheEdge) || defined(FindingCE) #ifdef FindingCE ! extern int FoundCE, CompCE; ! const int CE_maxK = ( (ATL_DivBySize(FoundCE)-(MB*NB)) / ! ((MB+NB)*KB) ) * KB; #else ! static const int CE_maxK = ( (ATL_DivBySize(CacheEdge)-(MB*NB)) / ! (KB*(MB+NB)) )*KB; #endif @beginskip *************** *** 10645,10648 **** --- 10664,10677 ---- MMINTR mm1, mm2, mmNC; + /* + * If computing Kp only, return it w/o calling anything + */ + #ifdef FindingCE + if (CompCE) + { + CompCE = CE_maxK; + return; + } + #endif if (!M || !N || !K) return; /* quick return */ #ifdef USERGEMM *************** *** 10658,10661 **** --- 10687,10693 ---- mmNC = Mjoin(PATL,NCmmIJK); } + #elif defined(FindingJITCPCE) + mm2 = mm1 = Mjoin(PATL,mmJITcp); + mmNC = (N >= M) ? Mjoin(PATL,NCmmJIK) : Mjoin(PATL,NCmmIJK); #else if (N >= M) *************** *** 10728,10738 **** */ #if defined(CacheEdge) || defined(FindingCE) ! Kp = Mmin(CE_maxK, K); if (Kp < KB) Kp = K; /* * If partitioning K would cause us to do more than 1/2 as many extra writes ! * as we save in reads, forget it */ ! else if (ATL_MulByNB(K) >= (N<<1)*Kp) Kp = K; do { --- 10760,10797 ---- */ #if defined(CacheEdge) || defined(FindingCE) ! #ifdef TREAL ! Kp = Mmin(CE_maxK, K); ! #else ! if (mm1 == Mjoin(PATL,mmJITcp)) ! Kp = Mmin(NKB_maxK, K); ! else ! Kp = Mmin(CE_maxK, K); ! #endif if (Kp < KB) Kp = K; /* * If partitioning K would cause us to do more than 1/2 as many extra writes ! * as we save in reads, forget it, unless there is no reuse, in which case ! * we go ahead and cut K to avoid streaming A & B thru mem twice */ ! else if (ATL_MulByNB(K) >= (N<<1)*Kp) ! #ifdef TREAL ! Kp = (M > MB || N > NB) ? K : Kp; ! #else ! Kp = (mm1 == Mjoin(PATL,mmJITcp) || M > MB || N > NB) ? K : Kp; ! #endif ! /* ! * If we aren't cutting K, make sure we don't need to cut in order to be ! * able to allocate the required panels of A & B ! * K so that we have something that will fit ! */ ! #ifdef TREAL ! if (K == Kp) ! #else ! if (mm1 != Mjoin(PATL,mmJITcp) && K == Kp) ! #endif ! { ! Kp = (ATL_DivBySize(ATL_MaxMalloc) - MB*NB) / (MB+NB); ! if (Kp > K || Kp < KB) Kp = K; ! } do { *************** *** 10743,10746 **** --- 10802,10808 ---- if ( ATL_OOM Mjoin(PATL,mmMNK)(ETA, ETB, -M, N, Kp, alpha, A, lda, B, ldb, bet, C, ldc) ) + #else + if ( ATL_OOM Mjoin(PATL,mmJITcp)(ETA, ETB, -M, N, Kp, alpha, + A, lda, B, ldb, bet, C, ldc) ) #endif ATL_assert(mmNC(ETA, ETB, M, N, Kp, alpha, A, lda, B, ldb, *************** *** 10759,10762 **** --- 10821,10827 ---- if ( Mjoin(PATL,mmMNK)(ETA, ETB, -M, N, K, alpha, A, lda, B, ldb, beta, C, ldc) ) + #else + if ( Mjoin(PATL,mmJITcp)(ETA, ETB, -M, N, K, alpha, A, lda, B, ldb, + beta, C, ldc) ) #endif ATL_assert(mmNC(ETA, ETB, M, N, K, alpha, A, lda, B, ldb, *************** *** 11136,11140 **** int ATL_bigmmOutOfMem=0; #define Cgemm Mjoin(PATL,big_mm) ! #elif defined (FindingCE) #define Cgemm Mjoin(PATL,FindCE_mm) #elif defined (ATLGEMM) --- 11201,11205 ---- int ATL_bigmmOutOfMem=0; #define Cgemm Mjoin(PATL,big_mm) ! #elif defined(FindingCE) || defined(FindingJITCPCE) #define Cgemm Mjoin(PATL,FindCE_mm) #elif defined (ATLGEMM) *************** *** 25633,25636 **** --- 25698,25702 ---- int FoundCE; + int CompCE=0; double time00(); void Mjoin(PATL,FindCE_mm)(enum ATLAS_TRANS TA, enum ATLAS_TRANS TB, *************** *** 25669,25672 **** --- 25735,25751 ---- /* + * Make sure CE will be different than 0, if CE is not 0 + */ + if (CE) + { + FoundCE = CE; + CompCE = 1; + Mjoin(PATL,FindCE_mm)(TA, TB, M, N, K, alpha, NULL, lda, NULL, ldb, beta, + NULL, ldc); + if (CompCE < KB) + return(-2.0); + } + CompCE = 0; + /* * Blow off cache flushing if C is already twice as large as L2 */ *************** *** 25727,25730 **** --- 25806,25810 ---- FoundCE = CE; + t0 = time00(); Mjoin(PATL,FindCE_mm)(TA, TB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); *************** *** 25824,25833 **** { t1 = mmcase(TA, TB, M, N, K, alpha, beta, i*1024); ! ATL_assert(t1 > 0.0); ! #ifdef TREAL ! mf = (((2.0*M)*N)*K) / (1000000.0 * t1); ! #else ! mf = (((8.0*M)*N)*K) / (1000000.0 * t1); ! #endif if (mf > mmf) { --- 25904,25918 ---- { t1 = mmcase(TA, TB, M, N, K, alpha, beta, i*1024); ! if (t1 == -2.0) ! mf = 0.0; ! else ! { ! ATL_assert(t1 > 0.0); ! #ifdef TREAL ! mf = (((2.0*M)*N)*K) / (1000000.0 * t1); ! #else ! mf = (((8.0*M)*N)*K) / (1000000.0 * t1); ! #endif ! } if (mf > mmf) { *************** *** 25916,25919 **** --- 26001,26005 ---- TYPE alpha[2], beta[2]; #endif + char *sp; double mf, mf0; FILE *fpout; *************** *** 26015,26018 **** --- 26101,26123 ---- if (fpout) { + #ifdef ATL_JITcp + if (CE) + { + FoundCE = CE; + CompCE = 1; + Mjoin(PATL,FindCE_mm)(TA, TB, M, N, K, alpha, NULL, 1, NULL, 1, beta, + NULL, 1); + } + else CompCE = 0; + #ifdef DCPLX + sp = "ZD"; + #else + sp = "CS"; + #endif + fprintf(fpout, "#ifndef ATLAS_%sNKB_H\n", sp); + fprintf(fpout, " #define ATLAS_%sNKB_H\n", sp); + fprintf(fpout, " #define ATL_%sNKB %d\n", sp, CompCE/KB); + fprintf(fpout, "#endif\n"); + #else fprintf(fpout, "#ifndef ATLAS_CACHEEDGE_H\n"); fprintf(fpout, " #define ATLAS_CACHEEDGE_H\n"); *************** *** 26020,26023 **** --- 26125,26129 ---- fprintf(fpout, " #define CacheEdge %d\n", CE*1024); fprintf(fpout, "#endif\n"); + #endif fclose(fpout); } *************** *** 27600,27603 **** --- 27706,27736 ---- fclose(fp); /* + * Determine [ZD,CS]NKB, if necessary + */ + if (pre == 'z' || pre == 'c') + { + sprintf(ln3, "INSTALL_LOG/atlas_%c%cNKB.h", pre, upre); + if (!FileExists(ln3)) + { + sprintf(ln, "%s %s pre=%c %s %s\n", + fmake, ln3, pre, redir, ln2); + fprintf(stdout, ln); + ATL_Cassert(system(ln)==0, "CACHEEDGE DETECTION", ln2); + } + fp = fopen(ln3, "r"); + ATL_Cassert(fp, "CACHE EDGE DETECTION", NULL); + ATL_Cassert(fgets(ln, 256, fp), "CACHE EDGE DETECTION", NULL); + ATL_Cassert(fgets(ln, 256, fp), "CACHE EDGE DETECTION", NULL); + ATL_Cassert(fgets(ln, 256, fp), "CACHE EDGE DETECTION", NULL); + if (fgets(ln3, 256, fp)) + { + ATL_Cassert(sscanf(ln+21, " %d", &i)==1, + "CACHE EDGE DETECTION", NULL); + } + else i = 0; + fprintf(fpsum, " %c%cNKB set to %d bytes\n", pre, upre, i); + fclose(fp); + } + /* * If necessary, determine Xover for this data type */ |