[Math-atlas-commits] CVS: AtlasBase/Students qr.base,1.6,1.7
Brought to you by:
rwhaley,
tonyc040457
From: R. C. W. <rw...@us...> - 2009-10-24 19:25:41
|
Update of /cvsroot/math-atlas/AtlasBase/Students In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv24788/Students Modified Files: qr.base Log Message: Index: qr.base =================================================================== RCS file: /cvsroot/math-atlas/AtlasBase/Students/qr.base,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** qr.base 23 Oct 2009 22:48:00 -0000 1.6 --- qr.base 24 Oct 2009 19:25:17 -0000 1.7 *************** *** 632,635 **** --- 632,655 ---- } /* END AL_gelq2 */ @ROUT ATL_geqrf + #include "stdio.h" + #include "cblas.h" + #include "atlas_lapack.h" + + #ifdef SREAL + #define MYOPT LASreal + #endif + #ifdef DREAL + #define MYOPT LADreal + #endif + #ifdef SCPLX + #define MYOPT LAScplx + #endif + #ifdef DCPLX + #define MYOPT LADcplx + #endif + + + int ATL_geqrf(ATL_CINT M, ATL_CINT N, TYPE *A, ATL_CINT lda, TYPE *TAU, + TYPE *WORK, ATL_CINT LWORK) /*----------------------------------------------------------------------------- * This is the C translation of the standard LAPACK Fortran routine: *************** *** 708,864 **** * *----------------------------------------------------------------------------*/ - #include "stdio.h" - #include "cblas.h" - #include "atlas_lapack.h" - - #ifdef SREAL - #define MYOPT LASreal - #endif - #ifdef DREAL - #define MYOPT LADreal - #endif - #ifdef SCPLX - #define MYOPT LAScplx - #endif - #ifdef DCPLX - #define MYOPT LADcplx - #endif - - - int ATL_geqrf(int M, int N, TYPE *A, int LDA, TYPE *TAU, - TYPE *WORK, int LWORK) { ! int LQUERY, I, IB, IINFO, IWS, K, LDWORK, NB, NBMIN, LoopNB, NX, NEWN; ! int INFO, MAXMN; ! int origN = N; ! int i, LDA2, NB2, buildT; ! ! int NBOVERRIDE= 7; ! TYPE *ws_QR2, *ws_T, *ws_larfb; /* Workspace for QR2,T, larfb */ ! /* Get NB */ ! NB = ATL_ilaenv(LAIS_OPT_NB, LAgeqrf, MYOPT, M, N, -1, -1); ! ! MAXMN = (M<N)?N:M; /* Get Max */ ! INFO = 0; ! if (LWORK < 0) /* Query logic */ { ! /* Retun the size of work space required. This needs to be allocated */ ! /* by the caller */ ! /* Work = Workspace for ATL_larfb + WS for ATL_larft + WS for ATL_geqr2 */ ! ! ! //Temporary Logic ! if (NB < NBOVERRIDE ) NB = NBOVERRIDE; ! ! *WORK = ( MAXMN*NB + NB*NB + MAXMN ) ; return(0); ! } else if (LWORK < (MAXMN*NB + NB*NB + MAXMN)) ! { ! /* Check for the size of workspace allocated by the caller */ ! fprintf(stderr, "ATL_geqr2 Error LWORK SHORT! LWORK=%i, MAXMN*NB=%i. \n", ! LWORK, MAXMN*NB); ! fflush(stderr); ! } ! ! ! /* ! * Quick return if possible ! */ ! if (M < 0) INFO = -1; ! else if (N < 0) INFO = -2; ! else if (LDA < M) INFO = -4; ! if (INFO != 0) ! { ! fprintf(stderr, "ERROR, RF, INFO=%i.\n", -INFO); ! return(INFO); ! } ! ! K = (M<N)?M:N; /* Get min */ ! if (K == 0) { ! return(INFO); } ! @beginskip ! NBMIN = 2; ! /* Get NX */ ! NX = ATL_ilaenv(LAIS_NBXOVER, LAgeqrf, MYOPT, M, N, -1, -1); ! ! if (NX < 0) NX=0; ! //TODO : remove this hardcoding later ! NBMIN = 1; ! if (NB < NBOVERRIDE ) NB = NBOVERRIDE; ! NX = 2; ! @endskip ! NX = 0; NBMIN = 2; ! ! /* Make the Workspace for ATL_larft and ATL_geqr2 and ATL_larfb */ ! ws_T = WORK; /* T workspace to begining of work*/ ! ws_QR2 = (WORK +(NB SHIFT)*NB ); /* After T Work space */ ! ws_larfb = (WORK +(NB SHIFT)*NB ! + (MAXMN SHIFT) ); /* After workspace for T and QR2 */ ! ! /* Initialize I to zero */ ! I = 0; ! ! ! LDA2 = LDA SHIFT; /* for Complex LDA *2 */ ! NB2 = NB SHIFT; /* for complex NB *2 */ ! if (NB >= NBMIN && NB < K && NX < K) { ! /* ! * Use blocked code initially ! */ ! while (I < K-NX) { ! NEWN = K-I; ! ! LoopNB = NB; ! /**********************************************/ ! /* If not enough left for LoopNB, do leftover.*/ ! /**********************************************/ ! IB = (NEWN < LoopNB)?NEWN:LoopNB; /* Find min */ ! ! /* Compute the QR factorization of the current block */ ! /* A(i:m,i:i+ib-1). ATL_geqrr also produce T matrix */ ! ! ! buildT = 1; /* Set it for time being . */ ! IINFO = ATL_geqrr(M-I, IB , (A + (I SHIFT) + I*LDA2) , ! LDA, (TAU + ( I SHIFT)), ! ws_QR2, ws_T, NB, ws_larfb, buildT); ! ! if (I+IB < N) ! { ! ! /* Form the triangular factor of the block reflector */ ! /* H = H(i) H(i+1) . . . H(i+ib-1) */ ! /* */ ! /* After geqrr, ws_T contains 'T', the IB x IB triangular */ ! /* factor 'T' of the block reflector. It is an output used */ ! /* in the next call, dlarfb. */ ! /* H = Id - Y*T*Y', with Id=(M-I)x(M-I), Y=(M-I)xIB. */ ! /* */ ! /* Apply H' to A(i:m,i+ib:n) from the left */ ! /* */ ! /* The ws_T array used above is an input to dlarfb; it is */ ! /* 'T' in that routine, and LDT x K (translates here to */ ! /* LDWORK x IB). The use of WORK is an array that */ ! /* must be LDWORK x K (IB = K), but is not an output. */ ! ! ATL_larfb(CblasLeft, CblasTrans, LAForward, LAColumnStore, ! M-I, N-I-IB, IB, (A+(I SHIFT)+I*LDA2), LDA, ws_T, NB, ! A+(I SHIFT)+(I+IB)*LDA2, LDA, ws_larfb, N); ! } ! ! I = I + IB; ! } /* END WHILE I <= K-NX */ } ! ! /* Use unblocked code to factor the last or only block. */ if (I < K) { --- 728,812 ---- * *----------------------------------------------------------------------------*/ { ! ATL_CINT minMN = Mmin(M, N), maxMN = Mmax(M, N); ! ATL_INT n, nb, j; TYPE *ws_QR2, *ws_T, *ws_larfb; /* Workspace for QR2,T, larfb */ + void *vp=NULL; ! nb = ATL_ilaenv(LAIS_OPT_NB, LAgeqrf, MYOPT, M, N, -1, -1); ! /* ! * If it is a workspace query, return the size of work required. ! * wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2 ! * RCW Q: Why can't LARFB & GEQR2 workspaces be overlapped? ! */ ! if (LWORK < 0) { ! *WORK = ( maxMN*nb + nb*nb + maxMN ) ; return(0); ! } ! else if (M < 1 || N < 1) /* quick return if no work to do */ ! return(0); ! /* ! * If the user gives us too little space, see if we can allocate it ourselves ! */ ! else if (LWORK < (maxMN*nb + nb*nb + maxMN)) { ! vp = malloc(ATL_MulBySize(maxMN*nb + nb*nb + maxMN) + ATL_Cachelen); ! if (!vp) ! return(-7); ! WORK = ATL_AlignPtr(vp); } ! /* ! * Assign workspace areas for ATL_larft, ATL_geqr2, ATL_larfb ! * RCW Q: Why can't LARFB & GEQR2 workspaces be overlapped? ! */ ! ws_T = WORK; /* T at begining of work */ ! ws_QR2 = WORK +(nb SHIFT)*nb; /* After T Work space */ ! ws_larfb = ws_QR2 + (maxMN SHIFT); /* After workspace for T and QR2 */ ! /* ! * Leave one iteration to be done outside loop, so we don't build T ! * Any loop iterations are therefore known to be of size nb (no partial blocks) ! */ ! n = (minMN / nb) * nb; ! if (n == minMN) ! n -= Mmin(nb, minMN); ! ! for (j=0; j < n; j += nb) { ! ATL_assert(!ATL_geqrr(M-j, nb, A+(j SHIFT)*(lda+1), lda, TAU+(j SHIFT), ! ws_QR2, ws_T, nb, ws_larfb, 1)); ! if (j+nb < N) /* if there are more cols left to right, update them */ { ! /* ! * ====================================================================== ! * Form the triangular factor of the block reflector ! * H = H(i) H(i+1) . . . H(i+ib-1) ! * After geqrr, ws_T contains 'T', the nb x nb triangular factor 'T' ! * of the block reflector. It is an output used in the next call, dlarfb. ! * H = Id - Y*T*Y', with Id=(M-j)x(M-j), Y=(M-j)xNB. ! * ! * Apply H' to A(j:m,j+nb:N) from the left ! * ! * The ws_T array used above is an input to dlarfb; it is 'T' in ! * that routine, and LDT x K (translates here to LDWORK x NB). ! * WORK is an LDWORK x NB workspace (not input or output). ! * RCW: LDWORK is not defined! ! * ====================================================================== ! */ ! ATL_larfb(CblasLeft, CblasTrans, LAForward, LAColumnStore, ! M-j, N-j-nb, nb, A+(j SHIFT)*(lda+1), lda, ws_T, nb, ! A+(j SHIFT)+((j+nb)SHIFT)*lda, lda, ws_larfb, N); ! } } ! /* ! * Last panel has no need to build T ! */ ! nb = minMN - n; ! ATL_assert(!ATL_geqrr(M-n, N-n, A+(n SHIFT)*(lda+1), lda, TAU+(n SHIFT), ! ws_QR2, ws_T, nb, ws_larfb, 0)); ! @beginskip if (I < K) { *************** *** 867,872 **** ws_QR2); } ! return(INFO); } /* END ATL_dgeqrF */ --- 815,823 ---- ws_QR2); } + @endskip ! if (vp) ! free(vp); ! return(0); } /* END ATL_dgeqrF */ *************** *** 901,905 **** * The number of columns of the matrix A. N >= 0. * ! * A (input/output) array, dimension (LDA,N) * On entry, the M-by-N matrix A. * On exit, the elements on and above the diagonal of the array --- 852,856 ---- * The number of columns of the matrix A. N >= 0. * ! * A (input/output) array, dimension (LDA,N) * On entry, the M-by-N matrix A. * On exit, the elements on and above the diagonal of the array *************** *** 907,911 **** * upper triangular if m >= n); the elements below the diagonal, * with the array TAU, represent the orthogonal matrix Q as a ! * product of min(m,n) elementary reflectors (see Further * Details). * --- 858,862 ---- * upper triangular if m >= n); the elements below the diagonal, * with the array TAU, represent the orthogonal matrix Q as a ! * product of min(m,n) elementary reflectors (see Further * Details). * *************** *** 941,945 **** int LDT2 = LDT SHIFT; /* for complex LDT *2 */ ! if (N == 0) return; /* Nothing to do. */ @skip if (N <= NBQRF2) /* If we hit stopping point, */ --- 892,896 ---- int LDT2 = LDT SHIFT; /* for complex LDT *2 */ ! if (N == 0) return(0); /* Nothing to do. */ @skip if (N <= NBQRF2) /* If we hit stopping point, */ *************** *** 5871,5875 **** @ROUT qrtst ! @extract -b @(topd)/cw.inc lang=C -def cwdate 2009 -def cwauth "Siju Samuel" -def contrib "Anthony M. Castaldo, R. Clint Whaley" /* * This program is a tester for QR, RQ, LQ and QL factorization routines. --- 5822,5826 ---- @ROUT qrtst ! @extract -b @(topd)/cw.inc lang=C -def cwdate 2009 -def cwauth "Siju Samuel" -def contrib "R. Clint Whaley, Anthony M. Castaldo" /* * This program is a tester for QR, RQ, LQ and QL factorization routines. *************** *** 5948,5985 **** #define test_geqrf(Major_, M_, N_, A_, lda_, tau_, wrk_, lw_) \ Mjoin(Mjoin(ATL_,PRE),geqrf)(M_, N_, A_, lda_, tau_, wrk_, lw_) ! #endif ! ! @beginskip ! /* ! #ifdef TREAL ! #define call_f77lq(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77ormlq)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #else ! #define call_f77lq(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77unmlq)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #endif ! #ifdef TREAL ! #define call_f77ql(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77ormql)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #else ! #define call_f77ql(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77unmql)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #endif ! #ifdef TREAL ! #define call_f77rq(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77ormrq)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #else ! #define call_f77rq(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77unmrq)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #endif ! #ifdef TREAL ! #define call_f77qr(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77ormqr)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #else ! #define call_f77qr(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_) \ ! ATL_assert(!Mjoin(PATL,f77unmqr)(Major_, M_, N_, K_, A_, lda_, tau_, C_, ldc_, wrk_, lw_)) ! #endif ! */ ! @endskip /* --- 5899,5903 ---- #define test_geqrf(Major_, M_, N_, A_, lda_, tau_, wrk_, lw_) \ Mjoin(Mjoin(ATL_,PRE),geqrf)(M_, N_, A_, lda_, tau_, wrk_, lw_) ! #endif /* *************** *** 5989,5993 **** { if(PRINT_1) ! Mjoin(PATL,geprint)(msg, M, N, A, lda); } --- 5907,5911 ---- { if(PRINT_1) ! Mjoin(PATL,geprint)(msg, M, N, A, lda); } *************** *** 5998,6002 **** { if(PRINT_1) ! Mjoin(PATL,geprint)(msg, N, 1, X, N); } --- 5916,5920 ---- { if(PRINT_1) ! Mjoin(PATL,geprint)(msg, N, 1, X, N); } *************** *** 6043,6047 **** /* * Rreturns a duplicate of the A matrix, with new leading dimension ! * lda : leading dimension of A * ldc : leading dimension of Duplicate Matrix */ --- 5961,5965 ---- /* * Rreturns a duplicate of the A matrix, with new leading dimension ! * lda : leading dimension of A * ldc : leading dimension of Duplicate Matrix */ *************** *** 6071,6075 **** } - /* * Make input square matrix A[NxN] as Identity Matrix. --- 5989,5992 ---- *************** *** 6219,6223 **** } - /* * Populate Vi vector from the given ROW of A Matrix. A has the pointer --- 6136,6139 ---- *************** *** 6232,6236 **** int numberOfVielement, char UnitPos) { ! #ifdef TREAL static const TYPE ONE=ATL_rone; #else --- 6148,6152 ---- int numberOfVielement, char UnitPos) { ! #ifdef TREAL static const TYPE ONE=ATL_rone; #else *************** *** 6329,6333 **** } //------------------------------------ ! /* * Populate Vi vector from the given COLUMN of A Matrix. A has the pointer * to the top of the column. --- 6245,6249 ---- } //------------------------------------ ! /* * Populate Vi vector from the given COLUMN of A Matrix. A has the pointer * to the top of the column. *************** *** 6561,6567 **** ! static TYPE lqtest(int M, int N, int lda, int recurseInd) { ! printf("LQ test STARTS --------------------------------------------------\n"); TYPE *A, *TAU, *TAUNEG, *WORK, *WORKF, *L, *W ; TYPE *Q, *VI; --- 6477,6483 ---- ! static TYPE lqtest(int M, int N, int lda, int flushKB, double *time) { ! @skip printf("LQ test STARTS --------------------------------------------------\n"); TYPE *A, *TAU, *TAUNEG, *WORK, *WORKF, *L, *W ; TYPE *Q, *VI; *************** *** 6569,6572 **** --- 6485,6489 ---- int viCounter =0; TYPE dtmp, dtmp1; + double t0; int ITER ; *************** *** 6591,6595 **** ldmn = MNmin; ! //Epsilon eps = Mjoin(PATL,epsilon)(); --- 6508,6512 ---- ldmn = MNmin; ! //Epsilon eps = Mjoin(PATL,epsilon)(); *************** *** 6606,6610 **** &dtmp1, &dtmp, -1); - //Get Work and LWORK LWORK = dtmp; --- 6523,6526 ---- *************** *** 6615,6622 **** TAU = malloc(ATL_MulBySize(MNmin)); if (TAU == NULL) AllocationErr("TAU"); ! test_gelqf(CblasColMajor, M, N, (TYPE*)(A), lda, (TYPE*)(TAU), (TYPE*)(WORK), LWORK); //Allocate and Copy TAU to TAUNEG --- 6531,6546 ---- TAU = malloc(ATL_MulBySize(MNmin)); if (TAU == NULL) AllocationErr("TAU"); ! if (flushKB > 0) ! { ! t0 = ATL_flushcache(flushKB*1024); ! t0 += ATL_flushcache(-1); ! } ! t0 = time00(); test_gelqf(CblasColMajor, M, N, (TYPE*)(A), lda, (TYPE*)(TAU), (TYPE*)(WORK), LWORK); + *time = time00() - t0; + if (flushKB) + t0 += ATL_flushcache(0); //Allocate and Copy TAU to TAUNEG *************** *** 6798,6808 **** } ! static TYPE qltest(int M, int N, int lda, int recurseInd) { ! printf("QL test STARTS --------------------------------------------------\n"); TYPE *A, *TAU, *TAUNEG, *WORK, *L, *W ; TYPE *Q, *VI; TYPE *AORIG; ! TYPE dtmp, dtmp1; int ITER; --- 6722,6733 ---- } ! static TYPE qltest(int M, int N, int lda, int flushKB, double *time) { ! @skip printf("QL test STARTS --------------------------------------------------\n"); TYPE *A, *TAU, *TAUNEG, *WORK, *L, *W ; TYPE *Q, *VI; TYPE *AORIG; ! TYPE dtmp, dtmp1; ! double t0; int ITER; *************** *** 6849,6855 **** --- 6774,6789 ---- if (TAU == NULL) AllocationErr("TAU"); + if (flushKB) + { + t0 = ATL_flushcache(flushKB*1024); + t0 += ATL_flushcache(-1); + } + t0 = time00(); test_geqlf(CblasColMajor, M, N, (TYPE*)(A), lda, (TYPE*)(TAU), (TYPE*)(WORK), LWORK); + *time = time00() - t0; + if (flushKB) + t0 += ATL_flushcache(0); //Allocate and Copy TAU to TAUNEG *************** *** 7036,7046 **** * */ ! static TYPE rqtest(int M, int N, int lda, int recurseInd) { ! printf("RQ test STARTS --------------------------------------------------\n"); TYPE *A, *TAU, *TAUNEG, *WORK, *WORKF, *R, *W ; TYPE *Q, *VI; TYPE *AORIG; TYPE dtmp, dtmp1; int ITER; --- 6970,6981 ---- * */ ! static TYPE rqtest(int M, int N, int lda, int flushKB, double *time) { ! @skip printf("RQ test STARTS --------------------------------------------------\n"); TYPE *A, *TAU, *TAUNEG, *WORK, *WORKF, *R, *W ; TYPE *Q, *VI; TYPE *AORIG; TYPE dtmp, dtmp1; + double t0; int ITER; *************** *** 7086,7092 **** --- 7021,7036 ---- if (TAU == NULL) AllocationErr("TAU"); + if (flushKB) + { + t0 = ATL_flushcache(flushKB*1024); + t0 += ATL_flushcache(-1); + } + t0 = time00(); test_gerqf(CblasColMajor, M, N, (TYPE*)(A), lda, (TYPE*)(TAU), (TYPE*)(WORK), LWORK); + *time = time00() - t0; + if (flushKB) + t0 += ATL_flushcache(0); //Allocate and Copy TAU to TAUNEG *************** *** 7272,7282 **** * */ ! static TYPE qrtest(int M, int N, int lda, int recurseInd) { - TYPE *A, *TAU, *TAUNEG, *WORK, *WORKF, *R , *W ; TYPE *Q, *VI; TYPE *AORIG ; TYPE dtmp, dtmp1; int ITER; --- 7216,7226 ---- * */ ! TYPE qrtest(int M, int N, int lda, int flushKB, double *time) { TYPE *A, *TAU, *TAUNEG, *WORK, *WORKF, *R , *W ; TYPE *Q, *VI; TYPE *AORIG ; TYPE dtmp, dtmp1; + double t0; int ITER; *************** *** 7329,7335 **** --- 7273,7288 ---- if (TAU == NULL) AllocationErr("TAU"); + if (flushKB) + { + t0 = ATL_flushcache(flushKB*1024); + t0 += ATL_flushcache(-1); + } + t0 = time00(); test_geqrf(CblasColMajor, M, N, (TYPE*)(A), lda, (TYPE*)(TAU), (TYPE*)(WORK), LWORK); + *time = time00() - t0; + if (flushKB) + t0 += ATL_flushcache(0); *************** *** 7522,7629 **** } ! void RunCase( TYPE thresh, int M, int N , int lda, int TestProg, int recurseInd) { ! TYPE resid; ! ! if (thresh > ATL_rzero) ! { ! switch(TestProg) ! { ! case 1: ! resid =qrtest(M, N, lda, recurseInd ); ! break; ! case 2: ! resid =rqtest(M, N, lda, recurseInd ); ! break; ! case 3: ! resid =qltest(M, N, lda, recurseInd ); ! break; ! case 4: ! resid =lqtest(M, N, lda, recurseInd ); ! break; ! default: ! printf("Wrong Choice of Test Prog... \n"); ! exit(1); ! } ! } ! else resid = -1.0; ! if (resid > thresh ){ ! printf(" PROGRAM=%d, M=%d, N=%d, lda=%d, Residual=%f FAILED \n", ! TestProg, M,N,lda,resid); ! }else { ! printf(" PROGRAM=%d, M=%d, N=%d, lda=%d, Residual=%f SUCCESS \n", ! TestProg, M,N,lda,resid); } ! } ! ! void RunCases( TYPE thresh, int M, int N , int lda, int TestProg, int recurseInd ) { ! RunCase(thresh, M, N, lda, TestProg, recurseInd) ; ! } ! ! void PrintUsage(char *nam) ! { ! fprintf(stderr, "USAGE: %s -m <m> -n <n> -a <lda> -p <prog> \n", nam); ! exit(-1); } ! ! ! void GetFlags(int nargs,char ** args, int *M, int *N, int *lda, ! TYPE *thresh, int *TestProg, int *recurseInd) { ! char ch; ! int i, j, n; ! ! *M = 3; ! *N = 3; ! *lda = *M; ! *thresh = 100.0; ! *TestProg = 1; ! *recurseInd = 0; ! for (i=1; i < nargs; i++) { ! if (args[i][0] != '-') PrintUsage(args[0]); ! switch(args[i][1]) { ! case 'T': ! *thresh = atof(args[++i]); ! break; ! case 'm': ! *M = atoi(args[++i]); ! break; ! case 'n': ! *N = atoi(args[++i]); ! break; ! case 'a': ! *lda= atoi(args[++i]); ! break; ! case 'p': ! *TestProg= atoi(args[++i]); ! break; ! case 'r': ! *recurseInd= atoi(args[++i]); ! break; ! default: ! PrintUsage(args[0]); } } } ! main(int nargs, char **args) { ! int M, N, lda, TestProg, recurseInd; TYPE thresh; - - GetFlags(nargs, args, &M, &N , &lda, &thresh, &TestProg,&recurseInd ); - RunCases( thresh, M, N , lda, TestProg, recurseInd); ! exit(0); } --- 7475,7587 ---- } ! char *RoutInt2Str2(int rout) ! /* ! * returns 2-char abbreviation for LAPACK householder routine rout ! */ { ! char *sp = "QR"; ! if (rout == LAgeqlf) ! sp = "QL"; ! else if (rout == LAgerqf) ! sp = "RQ"; ! else if (rout == LAgelqf) ! sp = "LQ"; ! return(sp); ! } ! int GetMyReps(int N, int *nreps) ! /* ! * Finds the correct nreps for this N ! */ ! { ! int n, i; ! n = *nreps++; ! for (i=n+n-2; i>=0; i -= 2) { ! if (N >= nreps[i]) ! return(nreps[i+1]); } ! return(nreps[1]); } ! int RunCase(TYPE thresh, int flushKB, int rout, int M, int N, int lda) ! /* ! * RETURNS: 0 if residual is <= thresh, otherwise 1 ! */ { + TYPE resid; + TYPE (*qtest)(int, int, int, int, double*); + double time; + double Time2Flops(int rout, int UPLO, int M, int N, double time); ! @skip printf("%2s %3s %6d %6d %6d %10.4e %11.2f %9.2e\n", ! @skip RoutInt2Str2(rout), "Col", M, N, lda, 0.0, 0.0, -1.0); ! switch(rout) ! { ! case LAgeqrf: ! qtest = qrtest; ! break; ! case LAgeqlf: ! qtest = qltest; ! break; ! case LAgerqf: ! qtest = rqtest; ! break; ! case LAgelqf: ! qtest = qltest; ! break; ! } ! resid = qtest(M, N, lda, flushKB, &time); ! printf("%2s %3s %6d %6d %6d %10.4e %11.2f %9.2e\n", ! RoutInt2Str2(rout), "Col", M, N, lda, time, ! Time2Flops(rout, 0, M, N, time), resid); ! return(resid <= thresh ? 1 : 0); } ! int RunCases(TYPE thresh, int flushKB, int ldagap, int *NREPS, int *routs, ! int *Ms, int *Ns) ! /* ! * RETURNS: number of failed cases ! */ { ! int r, o, m, n, M, lda, npass=0, ntest=0, nreps, k; ! printf("Rt Maj M N lda TIME MFLOP RESIDUAL\n"); ! printf("== === ===== ===== ===== ========== ========== =========\n"); ! for (r=1; r <= routs[0]; r++) /* loop over routines */ { ! for (o=0; o < 1; o++) /* useless order loop, add later */ { ! for (n=1; n <= Ns[0]; n++) ! { ! for (m=1; m <= Ms[0]; m++) ! { ! M = (Ms[m]) ? Ms[m]:Ns[n]; ! nreps = GetMyReps(Mmin(M, Ns[n]), NREPS); ! for (k=0; k < nreps; k++) ! { ! npass += RunCase(thresh, flushKB, routs[r], M, Ns[n], ! M+ldagap); ! ntest++; ! } ! } ! } } } + printf("\n%d cases ran, %d cases passed\n\n", ntest, npass); + return(ntest-npass); } ! #define CAN_NB 0 ! @extract -b @(topd)/Clint/atlas-tlp.base rout=qrtstGF ! ! int main(int nargs, char **args) { ! int *Ns, *Ms, *ROUTs, *nreps; ! int flushKB, ldagap; TYPE thresh; ! GetFlags(nargs, args, &flushKB, &thresh, &nreps, &ROUTs, &ldagap, &Ms, &Ns); ! return(RunCases(thresh, flushKB, ldagap, nreps, ROUTs, Ms, Ns)); } |