[Math-atlas-commits] CVS: AtlasBase/Clint atlas-def.base, 1.10, 1.11 atlas-doc.base, 1.27, 1.28 atl
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2007-03-15 23:14:24
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory sc8-pr-cvs7.sourceforge.net:/tmp/cvs-serv2690/Clint Modified Files: atlas-def.base atlas-doc.base atlas-make.base atlas.base Log Message: Index: atlas-def.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-def.base,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** atlas-def.base 16 Aug 2006 15:44:39 -0000 1.10 --- atlas-def.base 15 Mar 2007 23:14:14 -0000 1.11 *************** *** 36,39 **** --- 36,40 ---- cp $mmdir/?uClean[M,N,K]F $defdir/. cp $incdir/atlas_cacheedge.h $defdir/. + cp $incdir/atlas_??NKB.h $defdir/. cp $incdir/?Xover.h $defdir/. cp $incdir/atlas_?sysinfo.h $defdir/. *************** *** 246,253 **** --- 247,256 ---- gemm_put : sgemm_put dgemm_put - cp $(adefd)/gemm/atlas_cacheedge.h $(INCAdir)/. + - cp $(adefd)/gemm/atlas_??KNB.h $(INCAdir)/. sys : ssys dsys kern : skern dkern gemm : sgemm dgemm - cp $(INCAdir)/atlas_cacheedge.h $(adefd)/gemm/ + - cp $(INCAdir)/atlas_??KNB.h $(adefd)/gemm/ ArchNew : $(mach) xnegflt $(MAKE) sys kern gemm Index: atlas-doc.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-doc.base,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** atlas-doc.base 7 Mar 2007 21:23:25 -0000 1.27 --- atlas-doc.base 15 Mar 2007 23:14:14 -0000 1.28 *************** *** 1089,1097 **** ask ATLAS to build the .so you want. If you want all libraries, including the Fortran77 routines, the target choices are : ! shared : Put all of ATLAS's non-threaded libs into libatlas.so ! ptshared : Put all of ATLAS's threaded libs into libatlas.so If you want only the C routines (eg. you don't have a fortran compiler): ! cshared : Put all of ATLAS's C non-threaded libs into libcatlas.so ! cptshared : Put all of ATLAS's C threaded libs into libcatlas.so ****************** NOTE ON BUILDING A FULL LAPACK LIBRARY ********************* --- 1089,1097 ---- ask ATLAS to build the .so you want. If you want all libraries, including the Fortran77 routines, the target choices are : ! shared : Create shared versions of ATLAS's sequential libs ! ptshared : Create shared versions of ATLAS's threaded libs If you want only the C routines (eg. you don't have a fortran compiler): ! cshared : Create shared versions of ATLAS's sequential libs ! cptshared : Create shared versions of ATLAS's threaded libs ****************** NOTE ON BUILDING A FULL LAPACK LIBRARY ********************* Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.103 retrieving revision 1.104 diff -C2 -d -r1.103 -r1.104 *** atlas-make.base 15 Mar 2007 16:33:35 -0000 1.103 --- atlas-make.base 15 Mar 2007 23:14:14 -0000 1.104 *************** *** 1541,1544 **** --- 1541,1545 ---- @multidef upr d s d s @whiledef pre z c d s + @addkeys ptyp=@(pre) @define MCC @$(@up@(upr)MC)@ @define MMFLAGS @$(CDEFS) $(@up@(upr)MCFLAGS)@ *************** *** 1565,1570 **** --- 1566,1580 ---- x@(pre)tfc : $(INCAdir)/atlas_type.h $(L3INCdep) \ $(pre)tfc.o @(pre)mmlib + cd $(GMMdir) ; $(MAKE) ATL_@(pre)big_mm.o ATL_@(pre)small_mm.o \ + @ptyp z c + ATL_@(pre)@(upr)big_mm.o pre=@(pre) typ=@(typ) + @ptyp ! z c + pre=@(pre) typ=@(typ) + @ptyp ! $(CLINKER) $(CLINKFLAGS) -o $@ @(pre)tfc.o \ $(GMMdir)/ATL_$(pre)big_mm.o $(GMMdir)/ATL_@(pre)small_mm.o \ + @ptyp z c + $(GMMdir)/ATL_@(pre)@(upr)big_mm.o \ + @ptyp ! $(ATLASlib) $(LIBS) *************** *** 1607,1610 **** --- 1617,1621 ---- @undef upr @undef typ + @killkeys ptyp @endwhile *************** *** 3919,3926 **** ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_gemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h \ @PTYP Z ! $(INCAdir)/atlas_dzNKB.h @PTYP C $(INCAdir)/atlas_csNKB.h ! @PTYP ! $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) \ @(DA) @(DB) $(mySRCdir)/ATL_gemmXX.c --- 3930,3937 ---- ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_gemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h \ @PTYP Z ! $(INCAdir)/atlas_zdNKB.h @PTYP C $(INCAdir)/atlas_csNKB.h ! @PTYP Z C $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) \ @(DA) @(DB) $(mySRCdir)/ATL_gemmXX.c *************** *** 3929,3936 **** ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_AgemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h \ @PTYP Z ! $(INCAdir)/atlas_dzNKB.h @PTYP C $(INCAdir)/atlas_csNKB.h ! @PTYP ! $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) @(DA) @(DB) \ -DALIASED_GEMM $(mySRCdir)/ATL_AgemmXX.c --- 3940,3947 ---- ATL_@(pre)@(rout)@(TA)@(TB).o : $(mySRCdir)/ATL_AgemmXX.c $(@(pre)INCdep) $(INCAdir)/atlas_cacheedge.h \ @PTYP Z ! $(INCAdir)/atlas_zdNKB.h @PTYP C $(INCAdir)/atlas_csNKB.h ! @PTYP Z C $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) @(DA) @(DB) \ -DALIASED_GEMM $(mySRCdir)/ATL_AgemmXX.c *************** *** 4000,4003 **** --- 4011,4028 ---- $(mySRCdir)/ATL_gemm.c @endwhile + @ptyp c + @define pp @cs@ + @ptyp z + @define pp @zd@ + @ptyp z c + @whiledef rout big_mm + ATL_@(pp)@(rout).o : $(mySRCdir)/ATL_gemm.c $(mySRCdir)/ATL_gemmXX.c $(@(pre)INCdep) \ + $(INCAdir)/@(pre)Xover.h $(INCAdir)/atlas_cacheedge.h \ + $(INCAdir)/atlas_@(pp)NKB.h + $(ICC) -o $@ -c $(ICCFLAGS) -D@(typ) -DCR@up@(rout) -I./ \ + $(mySRCdir)/ATL_gemm.c + @endwhile + @undef pp + @PTYP ! ATL_@(pre)gemm.o : $(mySRCdir)/ATL_gemm.c $(mySRCdir)/ATL_gemmXX.c $(@(pre)INCdep) \ $(INCAdir)/atlas_cacheedge.h $(INCAdir)/@(pre)Xover.h *************** *** 4197,4207 **** - rm -f libatlas.so liblapack.so $(MAKE) libatlas.so liblapack.so libf77blas.so libcblas.so liblapack.so ! C_ptshared : ! - rm -f libatlas.so liblapack.so ! $(MAKE) libatlas.so libclapack.so libptf77blas.so libf77blas.so \ ! libptcblas.so libcblas.so liblapack.so ! C_shared : ! - rm -f libatlas.so liblapack.so ! $(MAKE) libatlas.so liblapack.so libcblas.so liblapack.so libatlas.so : libatlas.a --- 4222,4231 ---- - rm -f libatlas.so liblapack.so $(MAKE) libatlas.so liblapack.so libf77blas.so libcblas.so liblapack.so ! cptshared : ! - rm -f libatlas.so libclapack.so ! $(MAKE) libatlas.so libclapack.so libptcblas.so libcblas.so ! cshared : ! - rm -f libatlas.so libclapack.so ! $(MAKE) libatlas.so libclapack.so libcblas.so libatlas.so : libatlas.a Index: atlas.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas.base,v retrieving revision 1.108 retrieving revision 1.109 diff -C2 -d -r1.108 -r1.109 *** atlas.base 15 Mar 2007 16:33:36 -0000 1.108 --- atlas.base 15 Mar 2007 23:14:14 -0000 1.109 *************** *** 10565,10568 **** --- 10565,10572 ---- #endif + #ifndef C2R_K + #define CR2_K (1<<31) + #endif + #ifdef NoTransA_ #define ETA AtlasNoTrans *************** *** 10601,10605 **** #define TATB Mjoin(TA_,TB_) ! #if defined(BIG_MM) || defined(SMALLK_MM) #define ATL_OOM ATL_bigmmOutOfMem = #else --- 10605,10609 ---- #define TATB Mjoin(TA_,TB_) ! #if defined(BIG_MM) || defined(SMALLK_MM) || defined(CRBIG_MM) #define ATL_OOM ATL_bigmmOutOfMem = #else *************** *** 10687,10691 **** mmNC = Mjoin(PATL,NCmmIJK); } ! #elif defined(FindingJITCPCE) mm2 = mm1 = Mjoin(PATL,mmJITcp); mmNC = (N >= M) ? Mjoin(PATL,NCmmJIK) : Mjoin(PATL,NCmmIJK); --- 10691,10695 ---- mmNC = Mjoin(PATL,NCmmIJK); } ! #elif defined(FindingJITCPCE) || defined(CRBIG_MM) mm2 = mm1 = Mjoin(PATL,mmJITcp); mmNC = (N >= M) ? Mjoin(PATL,NCmmJIK) : Mjoin(PATL,NCmmIJK); *************** *** 10707,10711 **** * if the reuse of the copied matrices is low */ ! #ifdef TREAL /* No mmMNK for complex yet */ if ( ((M <= MB || N <= NB) && K > ATL_3NB) || ((M <= MB+MB) && (N <= NB+NB) && K > M*N) ) --- 10711,10715 ---- * if the reuse of the copied matrices is low */ ! #ifdef TREAL if ( ((M <= MB || N <= NB) && K > ATL_3NB) || ((M <= MB+MB) && (N <= NB+NB) && K > M*N) ) *************** *** 10714,10717 **** --- 10718,10731 ---- mm1 = Mjoin(PATL,mmMNK); } + /* + * For complex, JIT code calls real GEMM, which is faster on some platforms + * for all shapes, once K is long enough + */ + #else + if (K >= C2R_K || ((M < MB || N < NB) && K > 4*KB)) + { + mm2 = mm1; + mm1 = Mjoin(PATL,mmJITcp); + } #endif #endif *************** *** 10729,10733 **** #if defined(SMALL_MM) DOCOPY = 0; ! #elif defined(BIG_MM) || defined(FindingCE) || defined(ALIASED_GEMM) DOCOPY = 1; #else --- 10743,10748 ---- #if defined(SMALL_MM) DOCOPY = 0; ! #elif defined(BIG_MM) || defined(FindingCE) || defined(ALIASED_GEMM) || \ ! defined(CRBIG_MM) DOCOPY = 1; #else *************** *** 11201,11204 **** --- 11216,11222 ---- int ATL_bigmmOutOfMem=0; #define Cgemm Mjoin(PATL,big_mm) + #elif defined (CRBIG_MM) + extern int ATL_bigmmOutOfMem; + #define Cgemm Mjoin(PATL,Mjoin(UPR,big_mm)) #elif defined(FindingCE) || defined(FindingJITCPCE) #define Cgemm Mjoin(PATL,FindCE_mm) *************** *** 26150,26154 **** #define nshape 5 enum ATLAS_MATSHAPE {AtlasM_NB=0, AtlasN_NB=1, AtlasMN_NB=2, AtlasK_NB=3, ! Atlas0_NB=4}; ___main(){} --- 26168,26172 ---- #define nshape 5 enum ATLAS_MATSHAPE {AtlasM_NB=0, AtlasN_NB=1, AtlasMN_NB=2, AtlasK_NB=3, ! Atlas0_NB=4, AtlasMN_REST}; ___main(){} *************** *** 26169,26175 **** #define big_gemm Mjoin(PRE,big_mm) #define small_gemm Mjoin(PRE,small_mm) - @endskip #define small_gemm Mjoin(PATL,small_mm) #define big_gemm Mjoin(PATL,big_mm) void matgen(int M0, int N, TYPE *A, int lda0, int seed) --- 26187,26219 ---- #define big_gemm Mjoin(PRE,big_mm) #define small_gemm Mjoin(PRE,small_mm) #define small_gemm Mjoin(PATL,small_mm) #define big_gemm Mjoin(PATL,big_mm) + @endskip + void Mjoin(PATL,small_mm) + (const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, + const int M, const int N, const int K, const SCALAR alpha, + const TYPE *A, const int lda, const TYPE *B, const int ldb, + const SCALAR beta, TYPE *C, const int ldc); + void Mjoin(PATL,big_mm) + (const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, + const int M, const int N, const int K, const SCALAR alpha, + const TYPE *A, const int lda, const TYPE *B, const int ldb, + const SCALAR beta, TYPE *C, const int ldc); + void Mjoin(PATL,dbig_mm) + (const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, + const int M, const int N, const int K, const SCALAR alpha, + const TYPE *A, const int lda, const TYPE *B, const int ldb, + const SCALAR beta, TYPE *C, const int ldc); + void Mjoin(PATL,sbig_mm) + (const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, + const int M, const int N, const int K, const SCALAR alpha, + const TYPE *A, const int lda, const TYPE *B, const int ldb, + const SCALAR beta, TYPE *C, const int ldc); + typedef void (*GEMMPTR) + (const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB, + const int M, const int N, const int K, const SCALAR alpha, + const TYPE *A, const int lda, const TYPE *B, const int ldb, + const SCALAR beta, TYPE *C, const int ldc); + static GEMMPTR big_gemm, small_gemm; void matgen(int M0, int N, TYPE *A, int lda0, int seed) *************** *** 26391,26394 **** --- 26435,26442 ---- *K = nb; break; + case AtlasMN_REST: /* restricted M & N, but basically square */ + if (n > 6*nb) + *M = *N = 6*nb; + break; case Atlas0_NB: break; *************** *** 26506,26510 **** } GetDims(shape, n, nb, &M, &N, &K); ! return(M*N*K); } --- 26554,26558 ---- } GetDims(shape, n, nb, &M, &N, &K); ! return(shape == AtlasMN_REST ? K : M*N*K); } *************** *** 26518,26521 **** --- 26566,26571 ---- static char *nm[nshape] = {"M", "N", "MN", "K", "GE"}; + big_gemm = Mjoin(PATL,big_mm); + small_gemm = Mjoin(PATL,small_mm); for (shape=AtlasM_NB; shape <= Atlas0_NB; shape++) { *************** *** 26539,26542 **** --- 26589,26594 ---- FILE *fp; + big_gemm = Mjoin(PATL,big_mm); + small_gemm = Mjoin(PATL,small_mm); #ifndef SM_FOUT if (nargs < 5) *************** *** 26569,26572 **** --- 26621,26630 ---- DoShapes(fp, 'T', 'N', nb, N0, NN, incN, alpha, beta); DoShapes(fp, 'T', 'T', nb, N0, NN, incN, alpha, beta); + #ifdef TCPLX + small_gemm = Mjoin(PATL,big_mm); + big_gemm = Mjoin(PATL,Mjoin(UPR,big_mm)); + icross = tloop(AtlasMN_REST, 'N', 'T', nb, alpha, beta); + fprintf(fp, "#define C2R_K %ld\n", icross); + #endif fprintf(fp, "\n#endif\n"); fclose(fp); |