[Math-atlas-commits] CVS: AtlasBase/Clint atlas-tlp.base, 1.57, 1.58
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-11-17 00:39:04
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv9009/Clint Modified Files: atlas-tlp.base Log Message: Index: atlas-tlp.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-tlp.base,v retrieving revision 1.57 retrieving revision 1.58 diff -C2 -d -r1.57 -r1.58 *** atlas-tlp.base 24 Oct 2009 19:25:16 -0000 1.57 --- atlas-tlp.base 17 Nov 2009 00:38:53 -0000 1.58 *************** *** 5,11 **** @ROUT slvtst @extract -b @(topd)/gen.inc what=cw @(cw00) @ROUT ilaenvF.c lanbtst @extract -b @(topd)/gen.inc what=cw @(cw08) ! @ROUT lanbtst slvtst #include "atlas_misc.h" #include "atlas_lapack.h" --- 5,13 ---- @ROUT slvtst @extract -b @(topd)/gen.inc what=cw @(cw00) + @ROUT lanbsrch + @extract -b @(topd)/gen.inc what=cw @(cw09) @ROUT ilaenvF.c lanbtst @extract -b @(topd)/gen.inc what=cw @(cw08) ! @ROUT lanbtst slvtst lanbsrch #include "atlas_misc.h" #include "atlas_lapack.h" *************** *** 3965,3969 **** } ! @ROUT lanbtst qrtstGF void PrintUsage(char *name, int ierr, char *flag) { --- 3967,3971 ---- } ! @ROUT lanbtst qrtstGF lanbsrch void PrintUsage(char *name, int ierr, char *flag) { *************** *** 3976,3982 **** fprintf(stderr, "USAGE: %s [flags]:\n", name); fprintf(stderr, " -R <#> <rout1> ... <rout#>\n"); ! @ROUT lanbtst fprintf(stderr, " routs: getrf, potrf, geqrf, geqlf, gerqf, gelqf\n"); fprintf(stderr, " -F <mflop> : force <mflops> of timed computation\n"); @ROUT qrtstGF fprintf(stderr, " routs: geqrf, geqlf, gerqf, gelqf\n"); --- 3978,3990 ---- fprintf(stderr, "USAGE: %s [flags]:\n", name); fprintf(stderr, " -R <#> <rout1> ... <rout#>\n"); ! @ROUT lanbtst lanbsrch fprintf(stderr, " routs: getrf, potrf, geqrf, geqlf, gerqf, gelqf\n"); fprintf(stderr, " -F <mflop> : force <mflops> of timed computation\n"); + @ROUT lanbsrch + fprintf(stderr, " -r m/n # : restrict given dimension to #\n"); + fprintf(stderr, " -N # : largest problem to tune for is #\n") + fprintf(stderr, " -# <#> : repeat each timing # times\n"); + fprintf(stderr, + " -S # : stop repeating timing when # seconds are exceeded\n"); @ROUT qrtstGF fprintf(stderr, " routs: geqrf, geqlf, gerqf, gelqf\n"); *************** *** 3988,3991 **** --- 3996,4000 ---- fprintf(stderr, " -m <#> <M1> ... <M#>\n"); fprintf(stderr, " -M <Mstart> <Mend> <Minc>\n"); + @ROUT lanbtst qrtstGF lanbsrch fprintf(stderr, " -a <ldagap> : lda = M + <ldagap> foreach M\n"); fprintf(stderr, " -f <flushKB> : flush at least this mem in LRU timers\n"); *************** *** 4004,4010 **** " -%% <min%%> <minNB> <max%%> <maxNB> : set bounds on NB\n"); fprintf(stderr, " nb1=MIN(minNB, min%%*N), nbN=MIN(maxNB,max%%)\n"); fprintf(stderr, " -o[f,c,b] <file>: output nb selection as F77/C/both file\n"); ! @ROUT lanbtst qrtstGF exit(ierr ? ierr : -1); } --- 4013,4020 ---- " -%% <min%%> <minNB> <max%%> <maxNB> : set bounds on NB\n"); fprintf(stderr, " nb1=MIN(minNB, min%%*N), nbN=MIN(maxNB,max%%)\n"); + @ROUT lanbtst lanbsrch fprintf(stderr, " -o[f,c,b] <file>: output nb selection as F77/C/both file\n"); ! @ROUT lanbtst lanbsrch qrtstGF exit(ierr ? ierr : -1); } *************** *** 4113,4116 **** --- 4123,4130 ---- void GetFlags(int nargs, char **args, int *flsizeKB, TYPE *thresh, int **nreps, int **ROUTs, int *ldagap, int **Ms, int **Ns) + @ROUT lanbsrch + void GetFlags(int nargs, char **args, int **nreps, int *flsizeKB, int *mflop, + int *rout, int *ldagap, int *maxN, int **UPLOs, int **SDs, + int *F77out, char **outnam, int *restM, int *restN) @ROUT lanbtst int *GetFlags(int nargs, char **args, int **nreps, int *flsizeKB, int *mflop, *************** *** 4122,4144 **** * RETURNS: array of NBs, with NB[0] holding number of NBs to do */ ! @ROUT lanbtst qrtstGF { int *NBs=NULL, *ns=NULL, *ms=NULL, *ups=NULL, *sds=NULL, *ip; int i, k, n; ! *ROUTs = NULL; *ldagap = 0; *flsizeKB = L2SIZE/1024; *nreps = NULL; ! @ROUT lanbtst ! *minNB = *maxNB = 0; ! *minPerc = *maxPerc = 0.0; *F77out = 0; *outnam = NULL; *mflop = 0; *NBminbnd = *NBmaxbnd = NULL; @ROUT qrtstGF *thresh = 100.0; ! @ROUT lanbtst qrtstGF for (i=1; i < nargs; i++) { --- 4136,4164 ---- * RETURNS: array of NBs, with NB[0] holding number of NBs to do */ ! @ROUT lanbtst qrtstGF lanbsrch { int *NBs=NULL, *ns=NULL, *ms=NULL, *ups=NULL, *sds=NULL, *ip; int i, k, n; ! @ROUT lanbtst qrtstGF ` *ROUTs = NULL;` *ldagap = 0; *flsizeKB = L2SIZE/1024; + @ROUT lanbsrch + *rout = LAgeqrf; + *nreps = 1; + *restN = *restM = 0; + @ROUT lanbtst qrtstGF *nreps = NULL; ! @ROUT lanbtst lanbsrch *F77out = 0; *outnam = NULL; *mflop = 0; + @ROUT lanbtst + *minNB = *maxNB = 0; + *minPerc = *maxPerc = 0.0; *NBminbnd = *NBmaxbnd = NULL; @ROUT qrtstGF *thresh = 100.0; ! @ROUT lanbtst qrtstGF lanbsrch for (i=1; i < nargs; i++) { *************** *** 4147,4150 **** --- 4167,4181 ---- switch(args[i][1]) { + @ROUT lanbsrch + case 'r' : /* one dimension is restricted */ + if (++i >= nargs) + PrintUsage(args[0], i, NULL); + if (++i >= nargs) + PrintUsage(args[0], i, NULL); + if (args[i-1] == 'm' || args[i-1] == 'M') + *restM = atoi(args[i]); + else + *restN = atoi(args[i]); + break; @ROUT lanbtst case '%': /* -% <min%> <minNB> <max%> <maxNB> */ *************** *** 4162,4165 **** --- 4193,4197 ---- *maxNB = atoi(args[i]); break; + @ROUT lanbtst lanbsrch case 'o': /* -o[f,c,b] <file> */ *F77out = (args[i][2] == 'f' || args[i][2] == 'F'); *************** *** 4227,4234 **** --- 4259,4268 ---- i += 3; break; + @ROUT lanbtst qrtstGF lanbsrch case 'R': /* -R # <rout1> ... <routN#> */ *ROUTs = RoutNames2IntList(nargs, args, i); i += (*ROUTs)[0] + 1; break; + @ROUT lanbtst qrtstGF case '#': /* set nreps */ if (args[i][2] == 't') /* -#t N1 reps1 ... Nt repst */ *************** *** 4244,4247 **** --- 4278,4282 ---- } break; + @ROUT lanbtst qrtstGF lanbsrch case 'f': /* -f <flushKB> */ if (++i >= nargs) *************** *** 4249,4253 **** *flsizeKB = atoi(args[i]); break; ! @ROUT lanbtst case 'F': /* -F <mflop> */ if (++i >= nargs) --- 4284,4288 ---- *flsizeKB = atoi(args[i]); break; ! @ROUT lanbtst lanbsrch case 'F': /* -F <mflop> */ if (++i >= nargs) *************** *** 4303,4307 **** } break; ! @ROUT lanbtst qrtstGF case 'a': /* -a <ldagap> */ if (++i >= nargs) --- 4338,4342 ---- } break; ! @ROUT lanbtst qrtstGF lanbsrch case 'a': /* -a <ldagap> */ if (++i >= nargs) *************** *** 4316,4319 **** --- 4351,4355 ---- * Take default values */ + @ROUT lanbtst qrtstGF if (!(*nreps)) *nreps = GetIntList2(0, 1); *************** *** 4327,4330 **** --- 4363,4367 ---- if (!NBs) NBs = IntRange2IntList(4, 128, 4); + @ROUT lanbtst lanbsrch if (!ups) ups = GetIntList1(LALower); *************** *** 4335,4343 **** *Ns = ns; *Ms = ms; ! @ROUT lanbtst *UPLOs = ups; *SDs = sds; return(NBs); ! @ROUT lanbtst qrtstGF } --- 4372,4381 ---- *Ns = ns; *Ms = ms; ! @ROUT lanbtst lanbsrch *UPLOs = ups; *SDs = sds; + @ROUT lanbtst return(NBs); ! @ROUT lanbtst qrtstGF lanbsrch } *************** *** 4405,4409 **** return(mflop); } ! @ROUT lanbtst @beginskip /* this is now done in src/auxil/ATL_flushCacheByAddr.c & atlas_aux.h */ --- 4443,4447 ---- return(mflop); } ! @ROUT lanbtst lanbsrch @beginskip /* this is now done in src/auxil/ATL_flushCacheByAddr.c & atlas_aux.h */ *************** *** 5031,5034 **** --- 5069,5073 ---- } + @ROUT lanbtst int GetNBMinBnd(int N, int *NBminbnd) { *************** *** 5189,5190 **** --- 5228,5444 ---- return(0); } + @ROUT lanbsrch + + double GetMultSampleTimes + ( + int verb, /* 0 : no output, else print mflop on line */ + int retavg, /* 0: return min time, else return average */ + int nsample, /* # of times to repeat timing */ + int nsecs, /* max # secs to spend repeating */ + int flshszKB, /* KB to flush */ + int mflopF /* mflops to force in one timing*/ + int rout, /* routine to time */ + int uplo, + int side, + int M, int N, int lda, /* prob dims */ + int nb /* nb to use */ + ) + /* + * Returns the minimum of NSAMP timing samples, where NSAMP is either nsample, + * or nsecs seconds of execution, whichever happens first. + */ + { + double t0, t1, time, mintime, avgtime; + + avgtime = mintime = 0.0; + t0 = time00(); + for (i=0; i < nreps; i++) + { + time = GetTime(rout, mflopF, lda, M, N, nb, uplo, side, flshszKB); + if (verb) + printf("%.2f", Time2Flops(rout, uplo, side, M, N, time)); + avgtime += time; + if (mintime) + mintime = (mintime <= time) ? mintime : time; + else + mintime = time; + t1 = time00() - t0; + if (t1 > nsecs) break; + } + if (verb) + printf("\n"); + return(retavg ? avgtime/i : mintime); + } + + int findNB + ( + int rout, /* routine to time */ + int nreps, /* # of times to repeat timing */ + int nsecs, /* max # secs to spend repeating */ + int flshszKB, /* KB to flush */ + int mflopF, /* mflops to force in one timing*/ + int M, int N, int lda, /* prob dims */ + int minNB, int maxNB /* smallest & largest NB to try */ + ) + { + int i, nbB, munu; + double time, mintime; + + fprintf(" FINDING NB FOR M=%d, N=%d, LDA=%d:\n", M, N, lda); + /* + * Try all multiples of NB between min & max NB + */ + nbB = i = ((minNB+NB-1)/NB)*NB; /* smallest mul of NB bigger than min */ + mintime = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF rout, + uplo, side, M, N, lda, i) + for (i += NB; i <= maxNB; i += NB) + { + fprintf(" nb=%d: ", i); + time = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF rout, + uplo, side, M, N, lda, i) + if (time < mintime) + { + mintime = time; + nbB = i; + } + } + /* + * Try all multiples of lcm(MU,NU) and 4 between the NBs on either side of + * the best found NB -- for now use max instead of lcm + */ + munu = Mmax(MU, NU); + for (i=nbB-NB+1; i < nbB+NB; i++) + { + if (i == nbB || (i%4 && i%munu)) continue; + printf(" nb=%d: ", i); + time = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF rout, + uplo, side, M, N, lda, i) + printf(" --> %.2f MFLOP.\n", Time2Flops(rout, uplo, M, N, time); + if (time < mintime) + { + mintime = time; + nbB = i; + } + } + *timeB = mintime; + return(nbB); + } + + int *FindAllNBs + ( + int rout, /* routine to time */ + int nreps, /* # of times to repeat timing */ + int nsecs, /* max # secs to spend repeating */ + int flshszKB, /* KB to flush */ + int mflopF, /* mflops to force in one timing*/ + int iL, /* index in nnb to find left case */ + int iR, /* index in nnb to find right case */ + int *nnb /* array of all N/NB combos found so far */ + ) + /* + * Recursively searches the dimension space for good NBs given two known + * boundary points + */ + { + int m, n, k, lda, i, iM; + int *tp; + + k = (nnb[iR] + nnb[iL])>>1; + if (nnb[iR] - k < NB) /* don't get finer grained than NB */ + return(nnb); + m = (restM) ? restM : k; + n = (restN) ? restN : k; + lda = m + ldagap; + /* + * If nnb is too short, get roughly a double-length array and continue + */ + i = nnb[1]; + if (i+2 > nnb[0]) + { + tp = malloc(sizeof(int)*(i+i)); + assert(tp); + memcpy(tp, nnb, (i)*sizeof(int)); + free(nnb); + nnb = tp; + tp[0] = i+i; + } + /* + * Find best NB for a problem midway between left and right N's + */ + iM = i; + nnb[iM] = k; + nnb[iM+1] = findNB(rout, nreps, nsecs, flshszKB, mflopF, m, n, lda, + nnb[iL+1], nnb[iR+1]); + nnb[1] = i + 2; + printf(" M=%d, N=%d, lda=%d, NB=%d\n", m, n, lda, nnb[iM+1]); + + /* + * If middle NB greater than left NB, must search space between + */ + if (nnb[iM+1] > nnb[iL+1]) + nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, iL, iM, nnb); + /* + * If middle NB less than right NB, must search space between + */ + if (nnb[iM+1] < nnb[iR+1]) + nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, iM, iR, nnb); + return(nnb); + } + + int *FindNBByN + ( + int rout, /* routine to time */ + int nreps, /* # of times to repeat timing */ + int nsecs, /* max # secs to spend repeating */ + int flshszKB, /* KB to flush */ + int mflopF, /* mflops to force in one timing*/ + int maxN, /* largest dimension to try */ + int restM, /* 0 : vary M wt tuning dim, else fixed size */ + int restN, /* 0 : vary N wt tuning dim, else fixed size */ + int ldagap /* lda = ldagap+M */ + ) + /* + * RETURNS: integer array nnb: nnb[0] holds the array length, nnb[1] holds + * the number of used entries. For all other entries, even entries + * hold a unique N, and the next entry (odd) is the best NB to use + * The Ns are not sorted (will be in recursive order) + */ + { + int *nnb; + int m, n, lda; + double t0, tN; + /* + * Find best NB for minimum problem (dimension 4) + */ + nnb = malloc(sizeof(int)*4000); + nnb[2] = 4; + m = (restM) ? restM : 4; + n = (restN) ? restN : 4; + lda = m + ldagap; + nnb[3] = findNB(rout, nreps, nsecs, flshszKB, mflopF, m, n, lda, 1, 3); + /* + * Find best NB for maximum size problem + */ + nnb[4] = maxN; + m = (restM) ? restM : maxN; + n = (restN) ? restN : maxN; + lda = m + ldagap; + nnb[5] = findNB(rout, nreps, nsecs, flshszKB, mflopF, m, n, lda, + nb[3], 10*NB); + + nnb[0] = 4000; /* length of array in pos 0 */ + nnb[1] = 6; /* number of entries used so far in pos 1 */ + nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, 2, 4, nnb); + } + int main(int nargs, char **args) + /* + * lanbsrch is a specialization of lanbtst which tries to find good LAPACK + * blocking factors much faster. It does this by assuming good NBs rise + * monotonicly with N (this is not strictly true, but shouldn't cost us + * too much performance when wrong; lack of monotonic increase probably + * down to particular problems sizes and cleanup), and by using recursive + * halving to quickly search the space + */ + { + return(0); + } |