[Math-atlas-commits] CVS: AtlasBase/Clint atlas-lvl2.base, 1.112, 1.113 atlas-make.base, 1.267, 1.2
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2010-03-11 17:09:05
|
Update of /cvsroot/math-atlas/AtlasBase/Clint In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv27700/Clint Modified Files: atlas-lvl2.base atlas-make.base atlas-parse.base Log Message: Index: atlas-lvl2.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-lvl2.base,v retrieving revision 1.112 retrieving revision 1.113 diff -C2 -d -r1.112 -r1.113 *** atlas-lvl2.base 10 Mar 2010 23:49:04 -0000 1.112 --- atlas-lvl2.base 11 Mar 2010 17:08:53 -0000 1.113 *************** *** 11893,11903 **** @ROUT zr1cases.idx ID=3 XU=1 YU=4 ALIGNX2A=1 AUTH='R. Clint Whaley' ROUT='ATL_zgerk_1x4_sse3.c' @ROUT r1ksearch @define rt @r1@ @define ru @r1@ @ROUT r2ksearch @define rt @r2@ @define ru @r1@ ! @ROUT r1ksearch r2ksearch #include <stdio.h> [...1524 lines suppressed...] + x = (TYPE*) X; + /* + * Call optimized kernel (can be restricted or general) + */ + gerk(imb, Nm, one, x, 1, y, incy, A, lda); + /* + * Some kernels require N%NU=0; if so nr is remainder, do cleanup with axpy + */ + if (nr) + Mjoin(PATL,gerk_axpy)(imb, nr, one, x, 1, y+(Nm SHIFT), 1, + A+lda*(Nm SHIFT), lda); + A += imb SHIFT; + X += (imb*incX)SHIFT; + m -= imb; + imb = Mmin(m,mb); + } + while(m); + if (vp) + free(vp); + } Index: atlas-make.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-make.base,v retrieving revision 1.267 retrieving revision 1.268 diff -C2 -d -r1.267 -r1.268 *** atlas-make.base 10 Mar 2010 18:20:28 -0000 1.267 --- atlas-make.base 11 Mar 2010 17:08:55 -0000 1.268 *************** *** 1369,1373 **** emit_head.o : $(INCAdir)/atlas_type.h $(mySRCdir)/emit_head.c $(XCC) -c $(XCCFLAGS) -o emit_head.o $(mySRCdir)/emit_head.c ! @whiledef rt emit_rmvT gmvsearchT mvgen_sse x@(rt) : @(rt).o $(XCC) $(XCCFLAGS) -o $@ @(rt).o --- 1369,1373 ---- emit_head.o : $(INCAdir)/atlas_type.h $(mySRCdir)/emit_head.c $(XCC) -c $(XCCFLAGS) -o emit_head.o $(mySRCdir)/emit_head.c ! @whiledef rt emit_rmvT gmvsearchT mvgen_sse mvtksearch x@(rt) : @(rt).o $(XCC) $(XCCFLAGS) -o $@ @(rt).o *************** *** 1590,1594 **** @ROUT Make.mvtune_old xmvsearch : mvsearch.o $(SYSdir)/time.o ! $(XCC) $(XCCCFLAGS) -o $@ mvsearch.o xgenmv : xtlb genmv.o $(XCC) $(XCCFLAGS) -o xgenmv genmv.o --- 1590,1594 ---- @ROUT Make.mvtune_old xmvsearch : mvsearch.o $(SYSdir)/time.o ! $(XCC) $(XCCFLAGS) -o $@ mvsearch.o xgenmv : xtlb genmv.o $(XCC) $(XCCFLAGS) -o xgenmv genmv.o Index: atlas-parse.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Clint/atlas-parse.base,v retrieving revision 1.70 retrieving revision 1.71 diff -C2 -d -r1.70 -r1.71 *** atlas-parse.base 10 Mar 2010 18:20:43 -0000 1.70 --- atlas-parse.base 11 Mar 2010 17:08:55 -0000 1.71 *************** *** 159,163 **** #define MVF_SINGLE 9 /* 1: single precision, else double */ #define MVF_COMPLEX 10 /* 1: complex type, else real */ ! #define R1F_ADDCFLAGS 11 /* don't replace: append cflags to default flags */ #define MVF_DEFAULT ((1<<MVF_INCACHE) | (1<<MVF_OUTCACHE)) --- 159,164 ---- #define MVF_SINGLE 9 /* 1: single precision, else double */ #define MVF_COMPLEX 10 /* 1: complex type, else real */ ! #define MVF_ADDCFLAGS 11 /* don't replace: append cflags to default flags */ ! #define MVF_ALIGNX2A 12 /* X (Y if AXPYBASED) forced to same alignmnt as A */ #define MVF_DEFAULT ((1<<MVF_INCACHE) | (1<<MVF_OUTCACHE)) *************** *** 165,169 **** struct MVNODE { ! double mflop; ATL_mvnode_t *next; char *rout, *auth, *comp, *cflags; --- 166,170 ---- struct MVNODE { ! double mflop[8]; ATL_mvnode_t *next; char *rout, *auth, *comp, *cflags; *************** *** 172,181 **** --- 173,185 ---- char *genstr; /* system(genstr) will generate gened kernel */ int alignA, alignX, alignY; /* required alignments */ + int ldamul; /* lda must be a multiple of ldamul */ int ID, YU, XU; /* unrolling for Y & X vectors */ int minY, minX; /* min veclen to call the rout with */ + int CacheElts; /* # of cache elts to assume for blocking */ int SSE; /* 0: no SSE, 1: SSE1 req, 2: SSE2 req, etc */ int asmbits; /* valid assemblies in this file */ int rankR; /* restriction rank, higher faster kern */ enum ATLAS_TRANS TA; + int flag; /* bit vector of MVF_* */ }; *************** *** 962,966 **** ! @ROUT mvread mmread @multidef TA TA TB @ROUT mvread --- 966,970 ---- ! @ROUT mmread @multidef TA TA TB @ROUT mvread *************** *** 1578,1590 **** --- 1582,1601 ---- @ROUT mvread r1read mmread #endif /* end atlas_@(rt)parse.h guard */ + @ROUT atlas_mvtesttime.h + @define rt @mv@ + @define ru @mv@ + @define at @mvt@ @ROUT atlas_r1testtime.h @define rt @r1@ @define ru @r1@ + @define at @r1@ @ROUT atlas_r2testtime.h @define rt @r2@ @define ru @r1@ + @define at @r2@ @ROUT atlas_mmtesttime.h @define rt @mm@ @define ru @mm@ + @define at @mm@ @ROUT atlas_gentesttime.h #ifndef ATLAS_GENTESTTIME_H *************** *** 1735,1739 **** @ROUT atlas_gentesttime.h #endif /* end guard around atlas_gentesttime.h */ ! @ROUT atlas_mmtesttime.h atlas_r1testtime.h atlas_r2testtime.h #ifndef ATLAS_@up@(rt)TESTTIME_H #define ATLAS_@up@(rt)TESTTIME_H --- 1746,1751 ---- @ROUT atlas_gentesttime.h #endif /* end guard around atlas_gentesttime.h */ ! @ROUT atlas_mmtesttime.h atlas_r1testtime.h atlas_r2testtime.h @\ ! atlas_mvtesttime.h #ifndef ATLAS_@up@(rt)TESTTIME_H #define ATLAS_@up@(rt)TESTTIME_H *************** *** 1742,1746 **** #include "atlas_gentesttime.h" ! @ROUT atlas_r1testtime.h atlas_r2testtime.h @iexp ip @(ip) 1 + --- 1754,1758 ---- #include "atlas_gentesttime.h" ! @ROUT atlas_r1testtime.h atlas_r2testtime.h atlas_mvtesttime.h @iexp ip @(ip) 1 + *************** *** 1783,1787 **** assert (M >= kn->minX); assert (N >= kn->minY); ! i = sprintf(ln, "make %c@(rt)ktest @(rt)rout=%s align=\"%s\" ", pre, kn->rout, GetAlignStr(kn->alignX, kn->alignY, kn->alignA)); if (1) /* NOTE: replace with test on restrict or not! */ --- 1795,1799 ---- assert (M >= kn->minX); assert (N >= kn->minY); ! i = sprintf(ln, "make %c@(at)ktest @(rt)rout=%s align=\"%s\" ", pre, kn->rout, GetAlignStr(kn->alignX, kn->alignY, kn->alignA)); if (1) /* NOTE: replace with test on restrict or not! */ *************** *** 1828,1832 **** int FORCETIME, /* if nonzero, ignore existing timing file */ /* if negative, don't retain timing file */ ! ATL_r1node_t *r1p, /* ptr to kernel structure */ char pre, /* precision prefix */ ATL_INT M, ATL_INT N, /* dimensions to time */ --- 1840,1844 ---- int FORCETIME, /* if nonzero, ignore existing timing file */ /* if negative, don't retain timing file */ ! ATL_@(ru)node_t *r1p, /* ptr to kernel structure */ char pre, /* precision prefix */ ATL_INT M, ATL_INT N, /* dimensions to time */ *************** *** 1882,1887 **** --- 1894,1910 ---- pre, M, N, lda, percL1, r1p->rout); else + @ROUT atlas_mvtesttime.h + { + if (r1p->TA == AtlasNoTrans || r1p->TA == AtlasConjTrans) + i = sprintf(ln, "make %cmvnktime M=%d N=%d lda=%d @(rt)rout=\"%s\"", + pre, M, N, lda, r1p->rout); + else + i = sprintf(ln, "make %cmvtktime M=%d N=%d lda=%d @(rt)rout=\"%s\"", + pre, M, N, lda, r1p->rout); + } + @ROUT atlas_r1testtime.h atlas_r2testtime.h i = sprintf(ln, "make %c@(rt)ktime M=%d N=%d lda=%d @(rt)rout=\"%s\"", pre, M, N, lda, r1p->rout); + @ROUT atlas_r1testtime.h atlas_r2testtime.h atlas_mvtesttime.h if (r1p->comp) i += sprintf(ln+i, " %c@up@(rt)CC=\"%s\"", pre, r1p->comp); *************** *** 2378,2386 **** /* procedure @(ip) */ void *SortByTrans ! (ATL_@(rt)node_t *bp, /* original kernels wt mixture of trans cases */ ! ATL_@(rt)node_t **bN0, /* No trans cases */ ! ATL_@(rt)node_t **bT0, /* trans cases */ ! ATL_@(rt)node_t **bNC0, /* ConjNotrans cases */ ! ATL_@(rt)node_t **bTC0, /* Conjtrans cases */ /* * Sorts bp into the separate transpose queues, destroying bp in the process. --- 2401,2411 ---- /* procedure @(ip) */ void *SortByTrans ! ( ! ATL_@(rt)node_t *bp, /* original kernels wt mixture of trans cases */ ! ATL_@(rt)node_t **bN0, /* No trans cases */ ! ATL_@(rt)node_t **bT0, /* trans cases */ ! ATL_@(rt)node_t **bNC0, /* ConjNotrans cases */ ! ATL_@(rt)node_t **bTC0 /* Conjtrans cases */ ! ) /* * Sorts bp into the separate transpose queues, destroying bp in the process. |