[Math-atlas-commits] CVS: AtlasBase/Clint atlas-lvl2.base, 1.26, 1.27 atlas-make.base, 1.192, 1.193
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-04-20 13:45:45
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv18504/Clint Modified Files: atlas-lvl2.base atlas-make.base atlas-parse.base Log Message: Index: atlas-lvl2.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-lvl2.base,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** atlas-lvl2.base 17 Apr 2009 23:40:54 -0000 1.26 --- atlas-lvl2.base 20 Apr 2009 13:45:37 -0000 1.27 *************** *** 5,11 **** @ROUT r1test mvtest gmvsearchT @extract -b @(topd)/gen.inc what=cw @(cw00) ! @ROUT ! r1test mvtest ATL_gpmvN_32x4_1 ATL_gpmvN_1x1_1a @\ ! mvscases.dsc mvdcases.dsc mvccases.dsc mvzcases.dsc @\ ! r1scases.dsc r1dcases.dsc r1ccases.dsc r1zcases.dsc mvktime @extract -b @(topd)/gen.inc what=cw @(cw99) --- 5,21 ---- @ROUT r1test mvtest gmvsearchT @extract -b @(topd)/gen.inc what=cw @(cw00) ! @ROUT mvsearch r1search ATL_symv gemvtune ger1tune ATL_cger1_axpy @\ ! ATL_cger1_2x1p ATL_cger1_dummy ATL_ger1_4x4_1 ATL_ger1_8x4_0 @\ ! ATL_ger1_1x4_0 atlas_r1.h ATL_ger ATL_gemv ATL_gemvN_dummy2 @\ ! ATL_gemvT_dummy2 ATL_cgemvN_4x2_1 ATL_cgemvN_2x2_0 ATL_cgemvN_mm @\ ! ATL_cgemvN_1x1_1 ATL_cgemvN_1x1_1a ATL_gemvN_4x4_1 ATL_gemvN_4x2_0 @\ ! ATL_gemvN_mm ATL_gemvN_8x32_2 ATL_gemvN_8x4_1 ATL_gemvN_16x2_1 @\ ! ATL_gemvN_16x4_1 ATL_gemvN_32x4_1 ATL_gemvN_1x1_1 ATL_gemvN_1x1_1a @\ ! ATL_gpmvN_32x4_1 ATL_gpmvN_1x1_1a ATL_cgemvT_2x4_1 ATL_cgemvT_2x2_0 @\ ! ATL_cgemvT_mm ATL_cgemvT_1x1_1 ATL_gemvT_4x8_1 ATL_gemvT_2x8_0 @\ ! ATL_gemvT_mm ATL_gemvT_4x16_1 TL_gemvT_2x16_1 ATL_gemvT_1x1_1 @\ ! ATL_trmvUT ATL_trmvUN ATL_trmvLN ATL_trmvLT ATL_trmv symv l2blastst @\ ! atlas_lvl2.h atlas_level2.h ATL_gemvN_dummy ATL_gemvT_2x16_1 @\ ! ATL_gemvT_dummy @extract -b @(topd)/gen.inc what=cw @(cw99) *************** *** 10352,10356 **** @skip -fomit-frame-pointer -O2 3 0 2 1 ATL_cger1_2x1p.c "R. Clint Whaley" ! @ROUT mvktime @extract -b @(topd)/gen.inc what=cw @(cw09) #include <stdio.h> --- 10362,10366 ---- @skip -fomit-frame-pointer -O2 3 0 2 1 ATL_cger1_2x1p.c "R. Clint Whaley" ! @ROUT mvktime mvksearch @extract -b @(topd)/gen.inc what=cw @(cw09) #include <stdio.h> *************** *** 10359,10365 **** #include <string.h> #include "atlas_misc.h" ! void ATL_UGEMV(ATL_CINT M, ATL_CINT N, const TYPE *A, ATL_CINT lda, ! const TYPE *X, const SCALAR beta, TYPE *Y); @multidef UR yu xu @whiledef TA T N --- 10369,10378 ---- #include <string.h> #include "atlas_misc.h" + #include Mstr(Mjoin(Mjoin(atlas_,PRE),sysinfo.h)) ! @ROUT mvktime ! void ATL_UGEMV(ATL_CINT M, ATL_CINT N, const SCALAR alpha, const TYPE *A, ! ATL_CINT lda, const TYPE *X, ATL_CINT incX, ! const SCALAR beta, TYPE *Y, ATL_CINT incY); @multidef UR yu xu @whiledef TA T N *************** *** 10393,10397 **** --- 10406,10412 ---- #ifdef TREAL #define BETA *beta + const TYPE alpha = ATL_rone; #else + const TYPE alpha[2] = {ATL_rone, ATL_rzero}; #define BETA beta #endif *************** *** 10417,10424 **** m = (m > Mp) ? Mp : m; @mif TA = "T ! ATL_UGEMV(m, N, A+i, lda, X+i, BETA, Y); @endmif @mif TA = "N ! ATL_UGEMV(m, N, A+i, lda, X, BETA, Y+i); @endmif } --- 10432,10439 ---- m = (m > Mp) ? Mp : m; @mif TA = "T ! ATL_UGEMV(m, N, alpha, A+(i SHIFT), lda, X+(i SHIFT), 1, BETA, Y, 1); @endmif @mif TA = "N ! ATL_UGEMV(m, N, alpha, A+(i SHIFT), lda, X, 1, BETA, Y+(i SHIFT), 1); @endmif } *************** *** 10451,10454 **** --- 10466,10474 ---- */ { + #ifdef TREAL + TYPE NONE = -1.0; + #else + TYPE NONE[2] = {-1.0, 0.0}; + #endif double t0, t1; TYPE *A, *X, *Y, *a, *x, *y; *************** *** 10463,10466 **** --- 10483,10492 ---- mvsim = (TA == AtlasNoTrans || TA == AtlasConj) ? mvsimN : mvsimT; + if (MAx) + assert(MAx != FAx); + if (MAy) + assert(MAy != FAy); + if (MAa) + assert(MAa != FAa); /* * Find basic length of each operand in elements *************** *** 10476,10480 **** maxalign = (FAx >= FAa) ? FAx : FAa; maxalign = (maxalign >= FAy) ? maxalign : FAy; ! if (!maxalign && (MAx | MAy | MAa)) { maxalign = (MAx >= MAa) ? MAx : MAa; --- 10502,10507 ---- maxalign = (FAx >= FAa) ? FAx : FAa; maxalign = (maxalign >= FAy) ? maxalign : FAy; ! maxalign = (maxalign >= MAx) ? maxalign : MAx; ! if (MAx | MAy | MAa) { maxalign = (MAx >= MAa) ? MAx : MAa; *************** *** 10484,10488 **** { j = (FAx) ? FAx : ATL_sizeof; ! for (i=0; (i % j != 0 || i%MAx == 0); i += j); pregap = i; } --- 10511,10515 ---- { j = (FAx) ? FAx : ATL_sizeof; ! for (i=0; (i % j != 0 || i%MAx == 0); i += ATL_sizeof); pregap = i; } *************** *** 10492,10496 **** { j = (FAy) ? FAy : ATL_sizeof; ! for (i=pregap+xgap; (i%j != 0 || i%MAy == 0); i += j); xgap = i - pregap; } --- 10519,10526 ---- { j = (FAy) ? FAy : ATL_sizeof; ! if (MAy) ! for (i=pregap+xgap; (i%j != 0 || i%MAy == 0); i += ATL_sizeof); ! else ! for (i=pregap+xgap; (i%j != 0); i += ATL_sizeof); xgap = i - pregap; } *************** *** 10499,10559 **** { j = (FAa) ? FAa : ATL_sizeof; ! for (i=pregap+xgap+ygap; (i%j != 0 || i%MAa == 0); i += j); ygap = i - pregap - xgap; } agap = ATL_MulBySize(Aelts); ! j = pregap; ! for (i=pregap+xgap+ygap+agap; i%maxalign != 0; i++); ! agap = i-xgap-ygap; ! setspan = xgap + ygap + agap; assert(setspan%ATL_sizeof == 0); setsz = ATL_MulBySize(M+N+M*N); nsets = (ATL_MulBySize(flushelts)+setsz-1)/setsz; vmem = malloc(maxalign + nsets*setspan); assert(vmem); ! for (ptr_st = (size_t)vmem; ptr_st%maxalign; ptr_st++); /* start maxaligned */ X = (TYPE*) (ptr_st + pregap); Y = (TYPE*) (ptr_st + pregap + xgap); A = (TYPE*) (ptr_st + pregap + xgap + ygap); /* * Initialize memory from greatest to least; just zero for now */ y = (TYPE*) (ptr_st + setspan); - setspan /= ATL_sizeof; for (i=setspan; i; i--) *y-- = ATL_rzero; /* * Set ptrs to last set in memory */ ! A += (nsets-1) * setspan; ! X += (nsets-1) * setspan; ! Y += (nsets-1) * setspan; ! #define DEBUG_FA ! #ifdef DEBUG_FA ! if (FAa) ! assert(((size_t)A)%FAa == 0); ! if (FAx) ! assert(((size_t)X)%FAx == 0); ! if (FAy) ! assert(((size_t)Y)%FAy == 0); ! if (MAa) ! assert(((size_t)A)%MAa != 0); ! if (MAx) ! assert(((size_t)X)%MAx != 0); ! if (MAy) ! assert(((size_t)Y)%MAy != 0); ! #endif ! a = A; ! x = X; ! y = Y; ! ! j=0; ! t0 = time00(); ! for (i=nreps; i; i--) { ! mvsim(celts, pgelts, xu, yu, TA, M, N, a, lda, x, beta, y); ! if (++j < nsets) { a -= setspan; x -= setspan; y -= setspan; } ! else { a = A; x = X; y = Y; } #ifdef DEBUG_FA if (FAa) --- 10529,10577 ---- { j = (FAa) ? FAa : ATL_sizeof; ! if (MAa) ! for (i=pregap+xgap+ygap; (i%j != 0 || i%MAa == 0); i += ATL_sizeof); ! else ! for (i=pregap+xgap+ygap; (i%j != 0); i += ATL_sizeof); ygap = i - pregap - xgap; } agap = ATL_MulBySize(Aelts); ! if (maxalign) ! { ! j = pregap; ! for (i=pregap+xgap+ygap+agap; i%maxalign != 0; i++); ! agap = i-xgap-ygap; ! } setspan = xgap + ygap + agap; assert(setspan%ATL_sizeof == 0); setsz = ATL_MulBySize(M+N+M*N); nsets = (ATL_MulBySize(flushelts)+setsz-1)/setsz; + if (!nsets) + nsets = 1; vmem = malloc(maxalign + nsets*setspan); assert(vmem); ! if (maxalign) /* start maxaligned to guarantee all alignments */ ! for (ptr_st = (size_t)vmem; ptr_st%maxalign; ptr_st++); ! else ptr_st = (size_t) vmem; X = (TYPE*) (ptr_st + pregap); Y = (TYPE*) (ptr_st + pregap + xgap); A = (TYPE*) (ptr_st + pregap + xgap + ygap); + @beginskip /* * Initialize memory from greatest to least; just zero for now */ y = (TYPE*) (ptr_st + setspan); for (i=setspan; i; i--) *y-- = ATL_rzero; + @endskip /* * Set ptrs to last set in memory */ ! setspan /= ATL_sizeof; ! a = A += (nsets-1) * setspan; ! x = X += (nsets-1) * setspan; ! y = Y += (nsets-1) * setspan; ! for (i=nsets; i; i--) { ! #define DEBUG_FA #ifdef DEBUG_FA if (FAa) *************** *** 10570,10575 **** --- 10588,10610 ---- assert(((size_t)y)%MAy != 0); #endif + Mjoin(PATL,gegen)(Yelts, 1, y, Yelts, M); + Mjoin(PATL,gegen)(Xelts, 1, x, Xelts, N+127*50+77); + if (i&1) + Mjoin(PATL,scal)(Xelts, NONE, x, 1); + Mjoin(PATL,gegen)(M, N, A, lda, N*M+513*7+90); + a -= setspan; x -= setspan; y -= setspan; + } + a = A; x = X; y = Y; + + j=0; + t0 = time00(); + for (i=nreps; i; i--) + { + mvsim(celts, pgelts, xu, yu, TA, M, N, a, lda, x, beta, y); + if (++j < nsets) { a -= setspan; x -= setspan; y -= setspan; } + else { a = A; x = X; y = Y; j=0; } } t1 = time00(); + free(vmem); t1 = (t1-t0) / nreps; return(t1); *************** *** 10588,10592 **** imin = i; min = d[i]; ! for (j=i+1; j; j++) { if (d[j] < min) --- 10623,10627 ---- imin = i; min = d[i]; ! for (j=i+1; j < N; j++) { if (d[j] < min) *************** *** 10604,10607 **** --- 10639,10650 ---- } + double Time2Flop(ATL_INT M, ATL_INT N, double time) + { + #ifdef TREAL + return(((1.0e-6 * M)*(2.0*N+1.0))/time); + #else + return((((6.0*M)*(N+1.0) + (2.0*M)*N)*1.0e-6)/time); + #endif + } void Times2Flops(ATL_INT M, ATL_INT N, ATL_INT ntim, double *mf) /* *************** *** 10609,10617 **** */ { ! const double mfm = ((2.0*M)*N) * 1.0e-6; int i; for (i=0; i < ntim; i++) ! mf[i] *= mfm; } --- 10652,10664 ---- */ { ! #ifdef TREAL ! const double mfm = (1.0e-6 * M)*(2.0*N+1.0); ! #else ! const double mfm = ((6.0*M)*(N+1.0) + (2.0*M)*N)*1.0e-6; ! #endif int i; for (i=0; i < ntim; i++) ! mf[i] = mfm / mf[i]; } *************** *** 10627,10634 **** } ! int DoTimes(ATL_INT flshelts, ATL_INT celts, ATL_INT pgelts, ATL_INT ntim, ! ATL_INT nrep, ATL_INT xu, ATL_INT yu, enum ATLAS_TRANS TA, ! ATL_INT M, ATL_INT N, ATL_INT lda, TYPE *beta, ! int FAa, int MAa, int FAx, int MAx, int FAy, int MAy) { double *times; --- 10674,10681 ---- } ! void DoTimes(ATL_INT flshelts, ATL_INT celts, ATL_INT pgelts, ATL_INT ntim, ! ATL_INT nrep, ATL_INT xu, ATL_INT yu, enum ATLAS_TRANS TA, ! ATL_INT M, ATL_INT N, ATL_INT lda, TYPE *beta, ! int FAa, int MAa, int FAx, int MAx, int FAy, int MAy) { double *times; *************** *** 10648,10657 **** #endif for (i=0; i < ntim; i++) ! times[i] = mvtime_OC(nrep, celts, flshelts, pgelts, TA, M, N, lda, beta, xu, yu, FAa, MAa, FAx, MAx, FAy, MAy); SortDoubles(ntim, times); fprintf(stdout, "NSAMPLES=%d, MAX=%.2f, MIN=%.2f, AVG=%.2f, MED=%.2f\n", ntim, times[0], times[ntim-1], mysum(ntim, times)/ntim, times[ntim>>1]); } --- 10695,10706 ---- #endif for (i=0; i < ntim; i++) ! times[i] = mvtime_OC(nrep, flshelts, celts, pgelts, TA, M, N, lda, beta, xu, yu, FAa, MAa, FAx, MAx, FAy, MAy); SortDoubles(ntim, times); + Times2Flops(M, N, ntim, times); fprintf(stdout, "NSAMPLES=%d, MAX=%.2f, MIN=%.2f, AVG=%.2f, MED=%.2f\n", ntim, times[0], times[ntim-1], mysum(ntim, times)/ntim, times[ntim>>1]); + free(times); } *************** *** 10661,10668 **** fprintf(stderr, "BAD ARG '%s' on %dth FLAG\n", arg, i); fprintf(stderr, "USAGE: %s [flags], where flags are:\n", name); ! fprintf(stderr, " -C <#> : set flushsz = # (bytes)\n"); ! fprintf(stderr, " -p <#> : set pagesz = # (bytes)\n"); fprintf(stderr, ! " -G <#> : set GEMV cache size (for blocking) to # (bytes)\n"); fprintf(stderr, " -x <#> : unrolling for X in kernel is #\n"); fprintf(stderr, " -y <#> : unrolling for Y in kernel is #\n"); --- 10710,10717 ---- fprintf(stderr, "BAD ARG '%s' on %dth FLAG\n", arg, i); fprintf(stderr, "USAGE: %s [flags], where flags are:\n", name); ! fprintf(stderr, " -C <#> : set flushsz = # (kbytes)\n"); ! fprintf(stderr, " -p <#> : set pagesz = # (kbytes)\n"); fprintf(stderr, ! " -G <#> : set GEMV cache size (for blocking) to # (kbytes)\n"); fprintf(stderr, " -x <#> : unrolling for X in kernel is #\n"); fprintf(stderr, " -y <#> : unrolling for Y in kernel is #\n"); *************** *** 10701,10705 **** *pgelts = 4*ATL_DivBySize(1024); #endif ! *celts = 32*ATL_DivBySize(1024); *flushelts = 8*1024*ATL_DivBySize(1024); *xu = *yu = 1; --- 10750,10754 ---- *pgelts = 4*ATL_DivBySize(1024); #endif ! *celts = 0.75*ATL_L1elts; *flushelts = 8*1024*ATL_DivBySize(1024); *xu = *yu = 1; *************** *** 10709,10712 **** --- 10758,10765 ---- *ntim = 3; *FAa = *MAa = *FAx = *MAx = *FAy = *MAy = 0; + *beta = 1.0; + #ifdef TCPLX + beta[1] = 0.0; + #endif for (i=1; i < nargs; i++) *************** *** 10716,10724 **** switch(args[i][1]) { ! case 'G' : /* set flushsz in bytes */ if (++i >= nargs) PrintUsage(args[0], "out of flags in -G ", i-1); j = atoi(args[i]); ! *celts = ATL_DivBySize(j); break; case 'A' : /* set transpose */ --- 10769,10777 ---- switch(args[i][1]) { ! case 'G' : /* set GEMV blocking cache size in KB */ if (++i >= nargs) PrintUsage(args[0], "out of flags in -G ", i-1); j = atoi(args[i]); ! *celts = j*ATL_DivBySize(1024); break; case 'A' : /* set transpose */ *************** *** 10735,10749 **** *TA = AtlasNoTrans; break; ! case 'C' : /* set flushsz in bytes */ if (++i >= nargs) PrintUsage(args[0], "out of flags in -C ", i-1); j = atoi(args[i]); ! *flushelts = ATL_DivBySize(j); break; ! case 'p' : /* set pagesz in bytes */ if (++i >= nargs) PrintUsage(args[0], "out of flags in -p ", i-1); j = atoi(args[i]); ! *pgelts = ATL_DivBySize(j); break; case 'x' : /* set xu */ --- 10788,10802 ---- *TA = AtlasNoTrans; break; ! case 'C' : /* set flushsz in KB */ if (++i >= nargs) PrintUsage(args[0], "out of flags in -C ", i-1); j = atoi(args[i]); ! *flushelts = j*ATL_DivBySize(1024); break; ! case 'p' : /* set pagesz in KB */ if (++i >= nargs) PrintUsage(args[0], "out of flags in -p ", i-1); j = atoi(args[i]); ! *pgelts = j*ATL_DivBySize(1024); break; case 'x' : /* set xu */ *************** *** 10833,10839 **** } if (!(*nrep)) ! { ! flops = 2.0 * *m * *n; ! *nrep = (mfF * 1000000.0 + flops-1) / flops; if (*nrep < 1) *nrep = 1; } --- 10886,10892 ---- } if (!(*nrep)) ! { ! flops = Time2Flop(*m, *n, 1.0) * 1000.0; /* Get kiloFLOPS in GEMV */ ! *nrep = (ATL_nkflop+flops-1)/flops; if (*nrep < 1) *nrep = 1; } *************** *** 10854,10860 **** GetFlags(nargs, args, &flushelts, &celts, &pgelts, &xu, &yu, &ntim, &nrep, &TA, &m, &n, &lda, SADD beta, &FAa, &MAa, &FAx, &MAx, &FAy, &MAy); ! exit(DoTimes(flushelts, celts, pgelts, ntim, nrep, xu, yu, TA, m, n, lda, ! SADD beta, FAa, MAa, FAx, MAx, FAy, MAy)); } @ROUT mvksearch ! --- 10907,10949 ---- GetFlags(nargs, args, &flushelts, &celts, &pgelts, &xu, &yu, &ntim, &nrep, &TA, &m, &n, &lda, SADD beta, &FAa, &MAa, &FAx, &MAx, &FAy, &MAy); ! DoTimes(flushelts, celts, pgelts, ntim, nrep, xu, yu, TA, m, n, lda, ! SADD beta, FAa, MAa, FAx, MAx, FAy, MAy); ! exit(0); } @ROUT mvksearch ! @extract -b @(basd)/atlas-parse.base rout=mvread ! @ROUT smvcases.idx dmvcases.idx cmvcases.idx zmvcases.idx ! # ! # In this file, any line beginning with a '#' is ignored, but the # must be in ! # column 0. All multiple whitespace is reduced to one space (i.e. used only ! # to distinguish where words begin/end). Lines may be extended by putting '\' ! # as the *last* character of line. ! # ! # The file has the following format: ! # ROUT='routine name' AUTH='author names' COMP='compiler name' FLAGS='flags' ! # ID=<id> YU=<yu> XU=<xu> minY=<#> minX=<#> alignX=<#> alignY=<#> alignA=<#>, ! # TA='[t,n,c]' TB='[t,n,c]' SSE=[0,1,2,3] X87=[0,1] ! # LDTOP=[0,1] ALLALIGNXY=[0,1] AXPYBASED=[0,1] GEMMBASED=[0,1] CONJDEF=[0,1] ! # ASM=[asmlist], eg., asmlist is "GAS_x8664,GAS_x8632" or "GAS_SPARC" ! # ASM defaults to no assembly dialect required. ! # If YU/XU is negative, then the routine can only handle multiples of YU/XU. ! # ! # Assuming N is the length of X, and M is the length of Y, all routines ! # are assumed to handle any runtime value of M >= minY, N >= minX. ! # They must respect the compile-time macros BETA0, BETA1, & BETAX. ! # Some less-obvious keywords: ! # LDTOP : set to 1 if load Y at top of MV loop (more error) ! # CONJDEF : Defining Conj_ will cause A to be conjugated before use ! # if 0, then can only be used for TA setting ! # ALLALIGNXY: Create k copies of X and Y, where k is the number of times ! # the native alignment goes into the required alignment. So, ! # if ALIGNX=16, and we are doing single precision, X will be ! # passed as float *X[4], and X[0] will be 16-byte aligned, ! # X[1]: 4-byte aligned, X[2]: 8-byte aligned, X[3]: 12 byte aligned ! # if ALIGN[X/Y] == native length, then this flag has no effect on ! # that vector. ! # AXPYBASED : Routine employs the outer-product matvec (mainly for NoTrans) ! # GEMMBASED : Kernel calls GEMM to do matvec ! # PFTUNEx : Kernel uses pref_x(mem) macro for each op=x (A,y,x). prefetch ! # inst can be varied wt this macro, as can fetch distance. ! # Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.192 retrieving revision 1.193 diff -C2 -d -r1.192 -r1.193 *** atlas-make.base 17 Apr 2009 17:02:44 -0000 1.192 --- atlas-make.base 20 Apr 2009 13:45:37 -0000 1.193 *************** *** 1196,1199 **** --- 1196,1200 ---- SEXTFLAGS = opt= + BETA=1 xmvsearch : xemit_head mvsearch.o l1lib tstlib auxlib \ *************** *** 1350,1353 **** --- 1351,1361 ---- $(ATLRUN) $(MVTdir) x@(pre)mvtst -A T -m $(Mt) -n $(Nt) + x@(pre)mvktime : @(pre)mvktime.o force_build + cp $(mvrout) ATL_@(pre)gemvK.c + $(@(pre)MVCC) $(@(pre)MVFLAGS) $(CDEFS) $(EXTFLAGS) -D@(typ) \ + -DBETA$(BETA) -c ATL_@(pre)gemvK.c + $(CLINKER) $(CLINKFLAGS) -o $@ @(pre)mvktime.o ATL_@(pre)gemvK.o \ + $(TESTlib) $(ATLASlib) + @(pre)mvktime.o : $(mySRCdir)/mvktime.c $(ICC) -c $(ICCFLAGS) -o $@ -D@(typ) $(mySRCdir)/mvktime.c Index: atlas-parse.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-parse.base,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** atlas-parse.base 16 Apr 2009 16:58:29 -0000 1.2 --- atlas-parse.base 20 Apr 2009 13:45:37 -0000 1.3 *************** *** 1,3 **** ! @ROUT dcases.mv # # In this file, any line beginning with a '#' is ignored, but the # must be in --- 1,3 ---- ! @ROUT smvcases.idx dmvcases.idx cmvcases.idx zmvcases.idx # # In this file, any line beginning with a '#' is ignored, but the # must be in *************** *** 29,33 **** # if ALIGN[X/Y] == native length, then this flag has no effect on # that vector. ! # AXPYBASED : Routine employs the out-product matvec (mainly for transpose) # GEMMBASED : Kernel calls GEMM to do matvec # PFTUNEx : Kernel uses pref_x(mem) macro for each op=x (A,y,x). prefetch --- 29,33 ---- # if ALIGN[X/Y] == native length, then this flag has no effect on # that vector. ! # AXPYBASED : Routine employs the outer-product matvec (mainly for NoTrans) # GEMMBASED : Kernel calls GEMM to do matvec # PFTUNEx : Kernel uses pref_x(mem) macro for each op=x (A,y,x). prefetch *************** *** 44,48 **** #define MVF_DEFAULT ((1<<MVF_INCACHE) | (1<<MVF_OUTCACHE)) ! typedef struct MVNODE ATL_mvnode_t struct MVNODE { --- 44,48 ---- #define MVF_DEFAULT ((1<<MVF_INCACHE) | (1<<MVF_OUTCACHE)) ! typedef struct MVNODE ATL_mvnode_t; struct MVNODE { |