[Math-atlas-commits] CVS: AtlasBase/Clint atlas-make.base, 1.239, 1.240 atlas-tlp.base, 1.58, 1.59
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-11-17 17:07:27
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv26883/Clint Modified Files: atlas-make.base atlas-tlp.base Log Message: Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.239 retrieving revision 1.240 diff -C2 -d -r1.239 -r1.240 *** atlas-make.base 15 Nov 2009 00:39:54 -0000 1.239 --- atlas-make.base 17 Nov 2009 17:07:14 -0000 1.240 *************** *** 2918,2921 **** --- 2918,2928 ---- $(ATLRUN) $(BINdir) x@(pre)uumtst@(suf) -n 167 -O 2 c r -U 2 u l \ >> $(BINdir)/@low@(pt)sanity.out + @whiledef rt lanbsrch + x@(pre)@(rt)@(suf) : @(pre)@(rt)@(suf).o @(pre)lapack @(pre)tstlib@(suf) @(pre)l3lib@(suf) \ + @(pre)l2lib@(suf) @(pre)l1lib@(suf) @(pre)cblas@(suf) + $(FLINKER) $(FCLINKFLAGS) -o $@ @(pre)@(rt)@(suf).o \ + $(TESTlib) $(LAPACKlib) $(@(pt)CBLASlib) $(@(pt)F77BLASlib) \ + $(ATLASlib) $(LIBS) + @endwhile @whiledef rt lu llt slv uum trtri inv lanb qr x@(pre)@(rt)_all@(suf) : x@(pre)@(rt)tst@(suf) x@(pre)@(rt)tstF@(suf) \ *************** *** 3084,3087 **** --- 3091,3105 ---- $(INCAdir)/@(pre)mm.h $(INCSdir)/atlas_lvl3.h $(ICC) -o $@ -c $(ICCFLAGS) -DATLCINT @(df) -D@(typ) $(mySRCdir)/l3blastst.c + @whiledef rt lanbsrch + @(pre)@(rt)@(pt).o : $(mySRCdir)/@(rt).c $(INCSdir)/atlas_misc.h \ + $(INCAdir)/@(pre)mm.h $(INCSdir)/atlas_lvl3.h + $(ICC) -o $@ -c $(ICCFLAGS) -DATLCINT $(intf) @(df) -D@(typ) $(mySRCdir)/@(rt).c + @(pre)@(rt)C@(pt).o : $(mySRCdir)/@(rt).c $(INCSdir)/atlas_misc.h \ + $(INCAdir)/@(pre)mm.h $(INCSdir)/atlas_lvl3.h + $(ICC) -o $@ -c $(ICCFLAGS) -DATLCINT -DTimeC @(df) -D@(typ) $(mySRCdir)/@(rt).c + @(pre)@(rt)F@(pt).o : $(mySRCdir)/@(rt).c $(INCSdir)/atlas_misc.h \ + $(INCAdir)/@(pre)mm.h $(INCSdir)/atlas_lvl3.h + $(ICC) -o $@ -c $(ICCFLAGS) -DATLCINT -DTimeF77 @(df) -D@(typ) $(mySRCdir)/@(rt).c + @endwhile @whiledef rt uum llt lu slv trtri inv lanb qr @(pre)@(rt)tst@(pt).o : $(mySRCdir)/@(rt)tst.c $(INCSdir)/atlas_misc.h \ Index: atlas-tlp.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-tlp.base,v retrieving revision 1.58 retrieving revision 1.59 diff -C2 -d -r1.58 -r1.59 *** atlas-tlp.base 17 Nov 2009 00:38:53 -0000 1.58 --- atlas-tlp.base 17 Nov 2009 17:07:14 -0000 1.59 *************** *** 16,21 **** #include "atlas_tst.h" #include "atlas_level1.h" ! @ROUT lanbtst `#include <string.h>` ! @ROUT lanbtst `#include <ctype.h>` #ifdef GCCWIN ___main(){} __main(){} MAIN__(){} _MAIN_(){} --- 16,25 ---- #include "atlas_tst.h" #include "atlas_level1.h" ! @ROUT lanbsrch ! #include "atlas_lvl3.h" ! @ROUT lanbtst lanbsrch ! #include <string.h> ! #include <ctype.h> ! @ROUT lanbtst slvtst lanbsrch #ifdef GCCWIN ___main(){} __main(){} MAIN__(){} _MAIN_(){} *************** *** 27,31 **** double time00(); ! @ROUT lanbtst #define CAN_NB 32 #include "C_lapack.h" --- 31,35 ---- double time00(); ! @ROUT lanbtst lanbsrch #define CAN_NB 32 #include "C_lapack.h" *************** *** 240,244 **** } ! @ROUT GetMats slvtst lanbtst TYPE *GetGE(int M, int N, int lda) { --- 244,248 ---- } ! @ROUT GetMats slvtst lanbtst lanbsrch TYPE *GetGE(int M, int N, int lda) { *************** *** 3778,3782 **** exit(0); } ! @ROUT ilaenvF.c lanbtst static int ONB=0; /* optimal NB to return in ILAENV */ #if defined(ATL_USEPTHREADS) && defined(ATL_USE_ATL_ILAENV) --- 3782,3786 ---- exit(0); } ! @ROUT ilaenvF.c lanbtst lanbsrch static int ONB=0; /* optimal NB to return in ILAENV */ #if defined(ATL_USEPTHREADS) && defined(ATL_USE_ATL_ILAENV) *************** *** 3983,3987 **** @ROUT lanbsrch fprintf(stderr, " -r m/n # : restrict given dimension to #\n"); ! fprintf(stderr, " -N # : largest problem to tune for is #\n") fprintf(stderr, " -# <#> : repeat each timing # times\n"); fprintf(stderr, --- 3987,3991 ---- @ROUT lanbsrch fprintf(stderr, " -r m/n # : restrict given dimension to #\n"); ! fprintf(stderr, " -N # : largest problem to tune for is #\n"); fprintf(stderr, " -# <#> : repeat each timing # times\n"); fprintf(stderr, *************** *** 4120,4130 **** } @ROUT qrtstGF void GetFlags(int nargs, char **args, int *flsizeKB, TYPE *thresh, int **nreps, int **ROUTs, int *ldagap, int **Ms, int **Ns) @ROUT lanbsrch ! void GetFlags(int nargs, char **args, int **nreps, int *flsizeKB, int *mflop, ! int *rout, int *ldagap, int *maxN, int **UPLOs, int **SDs, ! int *F77out, char **outnam, int *restM, int *restN) @ROUT lanbtst int *GetFlags(int nargs, char **args, int **nreps, int *flsizeKB, int *mflop, --- 4124,4201 ---- } + @ROUT lanbsrch + #include "atlas_mmparse.h" + int GetGoodNForFlopRate(int rout, double nsec, int restM, int restN) + /* + * Reads speed of kernel code, and computes the problem size that can be + * completed in nsec seconds + */ + { + ATL_mmnode_t *mmp; + double floprate, time; + int m, n, k; + char pre; + double GetFlopCount(enum ATL_LAROUT rout, enum ATL_LAFLG flags, + int M, int N, int KL, int KU, int nb); + + #ifdef SCPLX + pre = 'c'; + #elif defined (DCPLX) + pre = 'z'; + #elif defined(SREAL) + pre = 's'; + #else + pre = 'd'; + #endif + #ifdef ATL_USEPTHREADS + k = ((4000+NB-1)/NB)*NB; + #else + k = ((2000+NB-1)/NB)*NB; + #endif + mmp = ReadMMFileWithPath(pre, "INSTALL_LOG", "MMRES.sum"); + if (!mmp) + return(k); + if (mmp->next) + floprate = mmp->next->mflop[0]; + else + floprate = mmp->mflop[0]; + KillAllMMNodes(mmp); + #ifdef ATL_USEPTHREADS + floprate *= 0.8*ATL_NCPU; + #endif + if (floprate < 100) /* nonsensical result */ + return(k); + k = (600/NB)*NB; + do + { + k += NB; + m = (restM) ? restM : k; + n = (restN) ? restN : k; + time = GetFlopCount(rout, 0, m, n, 0, 0, NB) / (floprate*1000000.0); + } + while (time < nsec); + k -= NB; + /* + * Don't let K be too huge, or extremely tiny regardless of computation + */ + if (restM || restN) + { + if (k > 50000) + k = 50000; + } + else if (k > 10000) + k = 10000; + if (k < 500) + k = 500; + return(k); + } @ROUT qrtstGF void GetFlags(int nargs, char **args, int *flsizeKB, TYPE *thresh, int **nreps, int **ROUTs, int *ldagap, int **Ms, int **Ns) @ROUT lanbsrch ! void GetFlags(int nargs, char **args, int *nreps, int *flsizeKB, int *mflop, ! int *secs, int *rout, int *ldagap, int *maxN, ! int **UPLOs, int **SDs, int *F77out, char **outnam, ! int *restM, int *restN) @ROUT lanbtst int *GetFlags(int nargs, char **args, int **nreps, int *flsizeKB, int *mflop, *************** *** 4145,4150 **** *flsizeKB = L2SIZE/1024; @ROUT lanbsrch *rout = LAgeqrf; ! *nreps = 1; *restN = *restM = 0; @ROUT lanbtst qrtstGF --- 4216,4228 ---- *flsizeKB = L2SIZE/1024; @ROUT lanbsrch + #ifdef ATL_USEPTHREADS + *maxN = ((4000+NB-1)/NB)*NB; + #else + *maxN = ((2000+NB-1)/NB)*NB; + #endif + *maxN = 0; *rout = LAgeqrf; ! *nreps = 3; ! *secs = 5; *restN = *restM = 0; @ROUT lanbtst qrtstGF *************** *** 4168,4171 **** --- 4246,4254 ---- { @ROUT lanbsrch + case 'R': /* -R # <rout1> ... <routN#> */ + NBs = RoutNames2IntList(nargs, args, i); + i += NBs[0] + 1; + *rout = NBs[1]; + break; case 'r' : /* one dimension is restricted */ if (++i >= nargs) *************** *** 4173,4181 **** if (++i >= nargs) PrintUsage(args[0], i, NULL); ! if (args[i-1] == 'm' || args[i-1] == 'M') *restM = atoi(args[i]); else *restN = atoi(args[i]); break; @ROUT lanbtst case '%': /* -% <min%> <minNB> <max%> <maxNB> */ --- 4256,4269 ---- if (++i >= nargs) PrintUsage(args[0], i, NULL); ! if (args[i-1][0] == 'm' || args[i-1][0] == 'M') *restM = atoi(args[i]); else *restN = atoi(args[i]); break; + case 'N': + if (++i >= nargs) + PrintUsage(args[0], i, NULL); + *maxN = atoi(args[i]); + break; @ROUT lanbtst case '%': /* -% <min%> <minNB> <max%> <maxNB> */ *************** *** 4259,4263 **** i += 3; break; ! @ROUT lanbtst qrtstGF lanbsrch case 'R': /* -R # <rout1> ... <routN#> */ *ROUTs = RoutNames2IntList(nargs, args, i); --- 4347,4351 ---- i += 3; break; ! @ROUT lanbtst qrtstGF case 'R': /* -R # <rout1> ... <routN#> */ *ROUTs = RoutNames2IntList(nargs, args, i); *************** *** 4360,4363 **** --- 4448,4457 ---- if (!ms) ms = GetIntList1(0); + @ROUT lanbsrch + if (!(*maxN)) + { + *maxN = GetGoodNForFlopRate(*rout, 0.5, *restM, *restN); + fprintf(stderr, "maxN = %d\n", *maxN); + } @ROUT lanbtst if (!NBs) *************** *** 4381,4390 **** double GetFlopCount(enum ATL_LAROUT rout, enum ATL_LAFLG flags, ! int M, int N, int KL, int KU, int NB) /* * These numbers copied from LAPACK timer routines TIMING/[EIG,LIN]/dopla[2].f */ { ! double m=(M?M:N), n=N, kl=KL, ku=ku, nb=NB, mn; double adds=0.0, muls=0.0; --- 4475,4484 ---- double GetFlopCount(enum ATL_LAROUT rout, enum ATL_LAFLG flags, ! int M, int N, int KL, int KU, int nb) /* * These numbers copied from LAPACK timer routines TIMING/[EIG,LIN]/dopla[2].f */ { ! double m=(M?M:N), n=N, kl=KL, ku=ku, mn; double adds=0.0, muls=0.0; *************** *** 5237,5241 **** int nsecs, /* max # secs to spend repeating */ int flshszKB, /* KB to flush */ ! int mflopF /* mflops to force in one timing*/ int rout, /* routine to time */ int uplo, --- 5331,5335 ---- int nsecs, /* max # secs to spend repeating */ int flshszKB, /* KB to flush */ ! int mflopF, /* mflops to force in one timing*/ int rout, /* routine to time */ int uplo, *************** *** 5250,5261 **** { double t0, t1, time, mintime, avgtime; avgtime = mintime = 0.0; t0 = time00(); ! for (i=0; i < nreps; i++) { time = GetTime(rout, mflopF, lda, M, N, nb, uplo, side, flshszKB); if (verb) ! printf("%.2f", Time2Flops(rout, uplo, side, M, N, time)); avgtime += time; if (mintime) --- 5344,5358 ---- { double t0, t1, time, mintime, avgtime; + int i; + extern int ONB; + ONB = nb; avgtime = mintime = 0.0; t0 = time00(); ! for (i=0; i < nsample; i++) { time = GetTime(rout, mflopF, lda, M, N, nb, uplo, side, flshszKB); if (verb) ! printf("%.2f ", Time2Flops(rout, uplo, M, N, time)); avgtime += time; if (mintime) *************** *** 5264,5271 **** mintime = time; t1 = time00() - t0; ! if (t1 > nsecs) break; } - if (verb) - printf("\n"); return(retavg ? avgtime/i : mintime); } --- 5361,5366 ---- mintime = time; t1 = time00() - t0; ! if (nsecs && t1 > nsecs) break; } return(retavg ? avgtime/i : mintime); } *************** *** 5278,5300 **** int flshszKB, /* KB to flush */ int mflopF, /* mflops to force in one timing*/ int M, int N, int lda, /* prob dims */ int minNB, int maxNB /* smallest & largest NB to try */ ) { ! int i, nbB, munu; ! double time, mintime; ! fprintf(" FINDING NB FOR M=%d, N=%d, LDA=%d:\n", M, N, lda); /* * Try all multiples of NB between min & max NB */ ! nbB = i = ((minNB+NB-1)/NB)*NB; /* smallest mul of NB bigger than min */ ! mintime = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF rout, ! uplo, side, M, N, lda, i) ! for (i += NB; i <= maxNB; i += NB) { ! fprintf(" nb=%d: ", i); ! time = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF rout, ! uplo, side, M, N, lda, i) if (time < mintime) { --- 5373,5410 ---- int flshszKB, /* KB to flush */ int mflopF, /* mflops to force in one timing*/ + int uplo, int side, int M, int N, int lda, /* prob dims */ int minNB, int maxNB /* smallest & largest NB to try */ ) { ! int i, nbB, n, mul1, mul2; ! double time, mintime, lasttime; ! printf(" FINDING NB FOR M=%d, N=%d, LDA=%d:\n", M, N, lda); /* * Try all multiples of NB between min & max NB */ ! i = ((minNB+NB-1)/NB)*NB; /* smallest mul of NB >= min */ ! nbB = Mmin(M,N)/2; ! if (!nbB) ! nbB = 1; ! if (nbB > i) ! nbB = i; ! else ! i = nbB; ! printf(" nb=%d: ", i); ! mintime = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF, rout, ! uplo, side, M, N, lda, i); ! printf("--> %.2f MFLOP.\n", Time2Flops(rout, uplo, M, N, mintime)); ! n = Mmin(N,M)>>1; ! n = Mmin(maxNB, n); ! n = (n/NB)*NB; ! for (i += NB; i <= n; i += NB) { ! printf(" nb=%d: ", i); ! time = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF, rout, ! uplo, side, M, N, lda, i); ! printf("--> %.2f MFLOP.\n", Time2Flops(rout, uplo, M, N, time)); ! if (mintime*1.05 < time) break; /* quit once we see big perf loss */ if (time < mintime) { *************** *** 5303,5318 **** } } /* ! * Try all multiples of lcm(MU,NU) and 4 between the NBs on either side of ! * the best found NB -- for now use max instead of lcm */ ! munu = Mmax(MU, NU); ! for (i=nbB-NB+1; i < nbB+NB; i++) { ! if (i == nbB || (i%4 && i%munu)) continue; printf(" nb=%d: ", i); ! time = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF rout, ! uplo, side, M, N, lda, i) ! printf(" --> %.2f MFLOP.\n", Time2Flops(rout, uplo, M, N, time); if (time < mintime) { --- 5413,5462 ---- } } + printf("\n NB refinement search:\n"); /* ! * Try all multiples of lcm(MU,NU) the NBs on either side of the best found NB ! * If lcm is large, and nbB is small, also try all modulo 4 cases */ ! mul1 = ATL_lcm(ATL_mmMU, ATL_mmNU); ! if (nbB >= 2*NB) /* big problems take large granularity refinement */ { ! while (mul1 < 8) ! mul1 *= 2; ! } ! else /* small problems mandate at least 4 granularity */ ! { ! while (mul1 < 4) /* too long to scope extremely small mul */ ! mul1 *= 2; ! } ! if (mul1 > 6 && nbB <= NB) ! mul2 = 4; ! else ! mul2 = 0; ! n = Mmin(nbB,N); ! if (mul2) ! n = nbB + NB - mul2; ! else ! n = nbB + NB - mul1; ! if (n > N/2) ! n = N/2; ! if (n > M/2) ! n = M/2; ! i = nbB - NB + mul1; ! if (i < 1) i = 1; ! for (; i < n; i++) ! { ! if (i == nbB) continue; /* already timed */ ! if (mul2) ! { ! if (i%mul2 && i%mul1) continue; ! } ! else if (i%mul1) continue; ! if (mul2 && i%mul2) continue; /* not mul of 4 if MU,NU too big */ ! if (i%mul1) continue; /* not a multiple of MU & NU */ ! printf(" nb=%d: ", i); ! time = GetMultSampleTimes(1, 0, nreps, nsecs, flshszKB, mflopF, rout, ! uplo, side, M, N, lda, i); ! printf(" --> %.2f MFLOP.\n", Time2Flops(rout, uplo, M, N, time)); if (time < mintime) { *************** *** 5321,5325 **** } } ! *timeB = mintime; return(nbB); } --- 5465,5469 ---- } } ! printf(" FOR M=%d, N=%d, LDA=%d, BEST NB=%d\n\n", M, N, lda, nbB); return(nbB); } *************** *** 5332,5335 **** --- 5476,5484 ---- int flshszKB, /* KB to flush */ int mflopF, /* mflops to force in one timing*/ + int uplo, + int side, + int restM, + int restN, + int ldagap, int iL, /* index in nnb to find left case */ int iR, /* index in nnb to find right case */ *************** *** 5345,5350 **** k = (nnb[iR] + nnb[iL])>>1; ! if (nnb[iR] - k < NB) /* don't get finer grained than NB */ ! return(nnb); m = (restM) ? restM : k; n = (restN) ? restN : k; --- 5494,5500 ---- k = (nnb[iR] + nnb[iL])>>1; ! k = (k/NB)*NB; /* keep N a mul of NB to avoid cleanup affects */ ! if (k-nnb[iL] < NB || nnb[iR]-k < NB) ! return(nnb); /* don't get finer grained than NB in search */ m = (restM) ? restM : k; n = (restN) ? restN : k; *************** *** 5357,5361 **** { tp = malloc(sizeof(int)*(i+i)); ! assert(tp); memcpy(tp, nnb, (i)*sizeof(int)); free(nnb); --- 5507,5511 ---- { tp = malloc(sizeof(int)*(i+i)); ! ATL_assert(tp); memcpy(tp, nnb, (i)*sizeof(int)); free(nnb); *************** *** 5368,5373 **** iM = i; nnb[iM] = k; ! nnb[iM+1] = findNB(rout, nreps, nsecs, flshszKB, mflopF, m, n, lda, ! nnb[iL+1], nnb[iR+1]); nnb[1] = i + 2; printf(" M=%d, N=%d, lda=%d, NB=%d\n", m, n, lda, nnb[iM+1]); --- 5518,5523 ---- iM = i; nnb[iM] = k; ! nnb[iM+1] = findNB(rout, nreps, nsecs, flshszKB, mflopF, uplo, side, ! m, n, lda, nnb[iL+1], nnb[iR+1]); nnb[1] = i + 2; printf(" M=%d, N=%d, lda=%d, NB=%d\n", m, n, lda, nnb[iM+1]); *************** *** 5377,5386 **** */ if (nnb[iM+1] > nnb[iL+1]) ! nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, iL, iM, nnb); /* * If middle NB less than right NB, must search space between */ if (nnb[iM+1] < nnb[iR+1]) ! nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, iM, iR, nnb); return(nnb); } --- 5527,5538 ---- */ if (nnb[iM+1] > nnb[iL+1]) ! nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, uplo, side, ! restM, restN, ldagap, iL, iM, nnb); /* * If middle NB less than right NB, must search space between */ if (nnb[iM+1] < nnb[iR+1]) ! nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, uplo, side, ! restM, restN, ldagap, iM, iR, nnb); return(nnb); } *************** *** 5393,5396 **** --- 5545,5550 ---- int flshszKB, /* KB to flush */ int mflopF, /* mflops to force in one timing*/ + int side, + int uplo, int maxN, /* largest dimension to try */ int restM, /* 0 : vary M wt tuning dim, else fixed size */ *************** *** 5412,5420 **** */ nnb = malloc(sizeof(int)*4000); ! nnb[2] = 4; ! m = (restM) ? restM : 4; ! n = (restN) ? restN : 4; lda = m + ldagap; ! nnb[3] = findNB(rout, nreps, nsecs, flshszKB, mflopF, m, n, lda, 1, 3); /* * Find best NB for maximum size problem --- 5566,5575 ---- */ nnb = malloc(sizeof(int)*4000); ! nnb[2] = NB; ! m = (restM) ? restM : NB; ! n = (restN) ? restN : NB; lda = m + ldagap; ! nnb[3] = findNB(rout, nreps, nsecs, flshszKB, mflopF, uplo, side, ! m, n, lda, 1, NB/2); /* * Find best NB for maximum size problem *************** *** 5424,5434 **** n = (restN) ? restN : maxN; lda = m + ldagap; ! nnb[5] = findNB(rout, nreps, nsecs, flshszKB, mflopF, m, n, lda, ! nb[3], 10*NB); nnb[0] = 4000; /* length of array in pos 0 */ nnb[1] = 6; /* number of entries used so far in pos 1 */ ! nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, 2, 4, nnb); } int main(int nargs, char **args) /* --- 5579,5668 ---- n = (restN) ? restN : maxN; lda = m + ldagap; ! nnb[5] = findNB(rout, nreps, nsecs, flshszKB, mflopF, uplo, side, m, n, lda, ! nnb[3], 10*NB); nnb[0] = 4000; /* length of array in pos 0 */ nnb[1] = 6; /* number of entries used so far in pos 1 */ ! nnb = FindAllNBs(rout, nreps, nsecs, flshszKB, mflopF, uplo, side, ! restM, restN, ldagap, 2, 4, nnb); ! } ! ! void SortNNB(int *nnb) ! { ! int i, j, n, nmin, imin; ! ! n = nnb[1]; ! for (i=2; i < n-2; i += 2) /* simple selection sort on N */ ! { ! imin = i; ! nmin = nnb[i]; ! for (j=i+2; j < n; j += 2) ! { ! if (nnb[j] < nmin) ! { ! nmin = nnb[j]; ! imin = j; ! } ! } ! if (imin != i) ! { ! nnb[imin] = nnb[i]; ! j = nnb[imin+1]; ! nnb[imin+1] = nnb[i+1]; ! nnb[i] = nmin; ! nnb[i+1] = j; ! } ! } ! } ! ! void PrintNNB(int *nnb) ! { ! int i, n; ! ! printf(" N NB\n"); ! printf("========== =====\n"); ! n = nnb[1]; ! for (i=2; i < n; i += 2) ! printf("%10d %5d\n", nnb[i], nnb[i+1]); ! } ! ! int TransNNB(int rout, int restM, int restN, int side, int uplo, int *nnb, ! int **FLAGs, int **Ms, int **Ns, int **NBs) ! /* ! * Translates lanbsrch's nnb array into lanbtst's style in order to call ! * common WriteNBFile ! */ ! { ! int n, nn, i, j, iflag; ! int *ms, *ns, *nbs, *flags; ! nn = nnb[1]; ! n = (nn - 2)>>1; ! ms = malloc(sizeof(int)*n); ! ATL_assert(ms); ! ns = malloc(sizeof(int)*n); ! ATL_assert(ns); ! ns = malloc(sizeof(int)*n); ! ATL_assert(ns); ! nbs = malloc(sizeof(int)*n); ! ATL_assert(nbs); ! flags = malloc(sizeof(int)*n); ! ATL_assert(flags); ! ! iflag = Flags2Bitmap(rout, uplo, side); ! for (j=0, i=2; i < nn; j++, i += 2) ! { ! ms[j] = (restM) ? restM : nnb[i]; ! ns[j] = (restN) ? restN : nnb[i]; ! nbs[j] = nnb[i+1]; ! flags[j] = iflag; ! } ! ! *Ms = ms; ! *Ns = ns; ! *NBs = nbs; ! *FLAGs = flags; ! return(n); } + int main(int nargs, char **args) /* *************** *** 5441,5444 **** --- 5675,5698 ---- */ { + int nsample, flsizeKB, mflopF, rout, ldagap, maxN, F77out, restM, restN; + int nsecs, n; + int *UPLOs, *SIDEs; + int *nnb, *Ms, *Ns, *NBs, *flags; + char *outnam; + + GetFlags(nargs, args, &nsample, &flsizeKB, &mflopF, &nsecs, &rout, &ldagap, + &maxN, &UPLOs, &SIDEs, &F77out, &outnam, &restM, &restN); + nnb = FindNBByN(rout, nsample, nsecs, flsizeKB, mflopF, SIDEs[1], + UPLOs[1], maxN, restM, restN, ldagap); + SortNNB(nnb); + PrintNNB(nnb); + n = TransNNB(rout, restM, restN, SIDEs[1], UPLOs[1], nnb, + &flags, &Ms, &Ns, &NBs); + free(nnb); + WriteNbFile(F77out, outnam, n, flags, Ms, Ns, NBs); + free(Ms); + free(Ns); + free(NBs); + free(flags); return(0); } |