[Math-atlas-commits] CVS: AtlasBase/Clint atlas-iaux.base, 1.26, 1.27 atlas-make.base, 1.184, 1.185
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-01-28 18:07:07
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv31025/Clint Modified Files: atlas-iaux.base atlas-make.base atlas.base Log Message: Index: atlas-iaux.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-iaux.base,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** atlas-iaux.base 16 Jan 2009 16:01:33 -0000 1.26 --- atlas-iaux.base 28 Jan 2009 18:06:50 -0000 1.27 *************** *** 1502,1509 **** #if ATL_LINEFLUSH /* do we have option of flushing by cacheline? */ ! void *ATL_ptCLF(ATL_LAUNCHSTRUCT_t *lp, void *vptr) { ATL_FlushAreasByCL(vptr); - return(NULL); } --- 1502,1508 ---- #if ATL_LINEFLUSH /* do we have option of flushing by cacheline? */ ! void ATL_ptCLF(ATL_LAUNCHSTRUCT_t *lp, void *vptr) { ATL_FlushAreasByCL(vptr); } *************** *** 1516,1520 **** ls.opstruct = (char*) fp; ls.opstructstride = 0; ! ls.OpStrucIsInit = NULL; ls.CombineOpStructs = NULL; ls.DoWork = ATL_ptCLF; --- 1515,1519 ---- ls.opstruct = (char*) fp; ls.opstructstride = 0; ! ls.OpStructIsInit = NULL; ls.CombineOpStructs = NULL; ls.DoWork = ATL_ptCLF; *************** *** 1522,1526 **** for (i=0; i < ATL_NTHREADS; i++) { ! tp[i].vp = &ls tp[i].rank = i; } --- 1521,1525 ---- for (i=0; i < ATL_NTHREADS; i++) { ! tp[i].vp = &ls; tp[i].rank = i; } Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.184 retrieving revision 1.185 diff -C2 -d -r1.184 -r1.185 *** atlas-make.base 5 Jan 2009 18:02:30 -0000 1.184 --- atlas-make.base 28 Jan 2009 18:06:52 -0000 1.185 *************** *** 2234,2237 **** --- 2234,2240 ---- pre=j nb = 88 + NBs = 31 1 4 8 12 16 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 96 \ + 112 128 144 160 176 192 224 256 + dmmobj = $(GMMdir)/ATL_dbig_mm.o $(GMMdir)/ATL_dbignork_mm.o \ $(GMMdir)/ATL_dsmall_mm.o $(GMMdir)/ATL_dsmallK_mm.o *************** *** 2324,2346 **** ./x$(pre)lanbtst -R 1 geqrf -oc ATL_$(pre)GetNB_geqrf \ -#t 5 0 12 100 8 500 6 800 4 1200 3 -% 4 16 50 0 \ ! -nbmin 3 100 8 400 16 800 24 \ -n 24 25 50 75 100 125 150 175 200 250 300 350 400 450 \ ! 500 600 700 800 900 1000 1200 1400 1600 1800 2000 \ ! -nb 31 1 4 8 12 16 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 \ ! 84 88 96 112 128 144 160 176 192 224 256 ; \ mv ATL_$(pre)GetNB_geqrf.h $(INCAdir)/atlas_$(pre)GetNB_geqrf.h ; \ fi if [ -s "$(INCAdir)/atlas_t$(pre)GetNB_geqrf.h" ]; then \ echo "Arch Defaults allow us to skip threaded tuning" ; \ ! elif [ -s "$(INCAdir)/atlas_pthreads.h" ] then \ $(MAKE) x$(pre)lanbtst_pt ; \ ./x$(pre)lanbtst_pt -R 1 geqrf -oc ATL_$(pre)tGetNB_geqrf \ -#t 5 0 12 100 8 500 6 800 4 1200 3 -% 4 16 50 0 \ ! -nbmin 3 100 8 400 16 800 24 \ -n 29 25 50 75 100 125 150 175 200 250 300 350 400 450 \ 500 600 700 800 900 1000 1200 1400 1600 1800 2000 \ ! 2400 2800 3200 3600 4000 \ ! -nb 31 1 4 8 12 16 24 28 32 36 40 44 48 52 56 60 64 68 72 76 \ ! 80 84 88 96 112 128 144 160 176 192 224 256 ; \ mv ATL_$(pre)tGetNB_geqrf.h $(INCAdir)/atlas_$(pre)tGetNB_geqrf.h ; \ fi --- 2327,2345 ---- ./x$(pre)lanbtst -R 1 geqrf -oc ATL_$(pre)GetNB_geqrf \ -#t 5 0 12 100 8 500 6 800 4 1200 3 -% 4 16 50 0 \ ! -nbmin 3 100 8 400 16 800 24 -nb $(NBs) \ -n 24 25 50 75 100 125 150 175 200 250 300 350 400 450 \ ! 500 600 700 800 900 1000 1200 1400 1600 1800 2000 ; \ mv ATL_$(pre)GetNB_geqrf.h $(INCAdir)/atlas_$(pre)GetNB_geqrf.h ; \ fi if [ -s "$(INCAdir)/atlas_t$(pre)GetNB_geqrf.h" ]; then \ echo "Arch Defaults allow us to skip threaded tuning" ; \ ! elif [ -s "$(INCAdir)/atlas_pthreads.h" ]; then \ $(MAKE) x$(pre)lanbtst_pt ; \ ./x$(pre)lanbtst_pt -R 1 geqrf -oc ATL_$(pre)tGetNB_geqrf \ -#t 5 0 12 100 8 500 6 800 4 1200 3 -% 4 16 50 0 \ ! -nbmin 3 100 8 400 16 800 24 -nb $(NBs) \ -n 29 25 50 75 100 125 150 175 200 250 300 350 400 450 \ 500 600 700 800 900 1000 1200 1400 1600 1800 2000 \ ! 2400 2800 3200 3600 4000 ; \ mv ATL_$(pre)tGetNB_geqrf.h $(INCAdir)/atlas_$(pre)tGetNB_geqrf.h ; \ fi Index: atlas.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas.base,v retrieving revision 1.162 retrieving revision 1.163 diff -C2 -d -r1.162 -r1.163 *** atlas.base 16 Jan 2009 16:01:33 -0000 1.162 --- atlas.base 28 Jan 2009 18:06:53 -0000 1.163 *************** *** 28171,28174 **** --- 28171,28223 ---- } + int LnIsCont(char *ln) + /* + * RETURNS: 1 if last non-whitespace char in ln is '\', and 0 otherwise + */ + { + int i; + for (i=0; ln[i]; i++); + for (i--; Mciswspace(ln[i]) && i > 0; i--); + return(ln[i] == '\\'); + } + + void GetuMMRES(char pre, int ID, int *muladd, int *lat, + int *mu, int *nu, int *ku) + /* + * Reads the user index file to obtain the settings for user-contributed + * kernel ID + */ + { + char ln[1024]; + char fnam[256], auth[256]; + FILE *fp; + int i, n, itmp, id; + + sprintf(ln, "../tune/blas/gemm/%ccases.dsc", pre); + fp = fopen(ln, "r"); + assert(fp); + fgets(ln, 1024, fp); /* skip comment line */ + fgets(ln, 1024, fp); /* get number of user cases line */ + assert(sscanf(ln, " %d", &n) == 1); + /* + * Now search file for ID + */ + for (i=0; i < n; i++) + { + assert(fgets(ln, 1024, fp)); + assert(sscanf(ln, " %d %d %d %d %d %d %d %d %d %d %s \"%[^\"]", &id, + &itmp, &itmp, &itmp, &itmp, muladd, lat, mu, nu, ku, + fnam, auth) == 12); + if (LnIsCont(ln)) + { + assert(fgets(ln, 1024, fp)); + assert(fgets(ln, 1024, fp)); + } + if (id == ID) + break; + } + assert(id == ID); + fclose(fp); + } void GetMMRES(char pre, int *muladd, int *lat, int *nb, int *pref, int *mu, int *nu, int *ku, int *ff, int *iff, int *nf, *************** *** 28190,28194 **** { assert(fscanf(fp, " %d %d %lf \"%[^\"]\" \"%[^\"]", ! &icase, &i, umf, ufile, auth) == 5); } else --- 28239,28243 ---- { assert(fscanf(fp, " %d %d %lf \"%[^\"]\" \"%[^\"]", ! icase, &i, umf, ufile, auth) == 5); } else *************** *** 28201,28209 **** } void GoToTown(int ARCHDEF, int L1DEF, int TuneLA) { const char TR[2] = {'N','T'}; char prec[4] = {'d', 's', 'z', 'c'}, pre, upre, *typ; ! char ln[512], tnam[256], ln2[512], ln3[512], fnam[128]; char *mulinst, *peakstr, *peakstr2; int nprec=4; --- 28250,28260 ---- } + @extract -b @(topd)/Clint/atlas-iaux.base rout=ATL_lcm + void GoToTown(int ARCHDEF, int L1DEF, int TuneLA) { const char TR[2] = {'N','T'}; char prec[4] = {'d', 's', 'z', 'c'}, pre, upre, *typ; ! char ln[1024], tnam[256], ln2[512], ln3[512], fnam[128]; char *mulinst, *peakstr, *peakstr2; int nprec=4; *************** *** 28215,28219 **** int DefInstall=0; long imf; ! int maU, latU, muU, nuU, kuU, il1mul, pfA; double mfU, mf4x1, mf4x4, mf, mfp, mmmf, mfpeak[2], l1mul; FILE *fp, *fpsum, *fpabr; --- 28266,28270 ---- int DefInstall=0; long imf; ! int idU, maU, latU, muU, nuU, kuU, il1mul, pfA; double mfU, mf4x1, mf4x4, mf, mfp, mmmf, mfpeak[2], l1mul; FILE *fp, *fpsum, *fpabr; *************** *** 28333,28337 **** } GetMMRES(pre, &muladd, &lat, &nb, &pfA, &mu, &nu, &ku, &ffetch, &ifetch, ! &nfetch, &mf, &i, fnam, ln, &mfU); #ifdef ATL_CPUMHZ fprintf(fpabr, "Clock_rate=%d Mhz\n", ATL_CPUMHZ); --- 28384,28388 ---- } GetMMRES(pre, &muladd, &lat, &nb, &pfA, &mu, &nu, &ku, &ffetch, &ifetch, ! &nfetch, &mf, &idU, fnam, ln, &mfU); #ifdef ATL_CPUMHZ fprintf(fpabr, "Clock_rate=%d Mhz\n", ATL_CPUMHZ); *************** *** 28717,28723 **** { pre = prec[ip]; sprintf(ln2, "INSTALL_LOG/%cLATUNE.LOG", pre); PrintBanner(ln2, 1, 5, 1, ip+1); ! sprintf(ln, "%s ILATune pre=%c %s %s\n", fmake, pre, redir, ln2); fprintf(stdout, ln); ATL_Cassert(system(ln)==0, "LAPACK TUNE", ln2); --- 28768,28816 ---- { pre = prec[ip]; + GetMMRES(pre, &muladd, &lat, &nb, &pfA, &mu, &nu, &ku, &ffetch, + &ifetch, &nfetch, &mf, &idU, fnam, ln, &mfU); + if (mfU > mf) /* if mu,nu need to be from user kernel */ + GetuMMRES(pre, idU, &maU, &latU, &mu, &nu, &kuU); + mu = ATL_lcm(mu, nu); /* mu now LCM(mu,nu); use as INC on NB */ + /* + * ========================================================== + * Figure out NBs for lanbtst to try; at end of this section: + * ku has number of NBs & ln3 has NBs to try + * ========================================================== + */ + /* + * For small mu, can just inc by small multiple of mu in search + */ + if (mu <= 8) + { + ku = 1; + j = sprintf(ln3, "1 "); + nu = (mu < 4) ? (mu+mu) : mu; + } + /* + * For large increment, need to scan by fraction of mu in initial inc + */ + else + { + j = sprintf(ln3, "1 4 8 12 16"); + ku = 5; + nu = (mu > 12 && ((mu>>1)<<1)==mu) ? (mu>>1) : mu; + } + for (i=nu; i <= 256; i += nu, ku++) + { + + j += sprintf(ln3+j, "%d ", i); + /* + * For large NB, see if should pump up gap for quicker search + */ + if (i > 64 && nu < 8) + nu += nu; + else if (i > 128 && nu < 12) + nu += nu; + } sprintf(ln2, "INSTALL_LOG/%cLATUNE.LOG", pre); PrintBanner(ln2, 1, 5, 1, ip+1); ! sprintf(ln, "%s ILATune pre=%c NBs=\"%d %s\" %s %s\n", fmake, pre, ! ku, ln3, redir, ln2); fprintf(stdout, ln); ATL_Cassert(system(ln)==0, "LAPACK TUNE", ln2); |