# TODO 'further details' ======= ========= # If array not referenced, min 1. do('../Config'); our $VERSION = 0.04; use PDL::Exporter; #TODO # dot pp_setversion(0.04); sub generate_code($){ if ($config{WITHOUT_THREAD}){ return ' #if 0 threadloop%{ %} #endif'.$_[0]; } else{ return $_[0]; } } if ($config{CBLAS}){ pp_addhdr('#include <cblas.h>'); } if ($^O =~ /MSWin/) { pp_addhdr(' #include <float.h> '); } pp_addhdr(' #include <math.h> /* avoid annoying warnings */ typedef PDL_Long logical; typedef PDL_Long integer; typedef PDL_Long ftnlen; #ifdef __cplusplus typedef logical (*L_fp)(...); #else typedef logical (*L_fp)(); #endif #ifndef min #define min(a,b) ((a) <= (b) ? (a) : (b)) #endif #ifndef max #define max(a,b) ((a) >= (b) ? (a) : (b)) #endif static integer c_zero = 0; static integer c_nine = 9; '); pp_addpm({At=>'Top'},<<'EOD'); use strict; { package PDL; my $warningFlag; BEGIN{ $warningFlag = $^W; $^W = 0; } use overload ( 'x' => sub {PDL::mmult($_[0], $_[1])}, ); BEGIN{ $^W = $warningFlag;} } =head1 NAME PDL::LinearAlgebra::Real - PDL interface to the real lapack linear algebra programming library =head1 SYNOPSIS use PDL::LinearAlgebra::Real; $a = random (100,100); $s = zeroes(100); $u = zeroes(100,100); $v = zeroes(100,100); $info = 0; $job = 0; gesdd($a, $job, $info, $s , $u, $v); Blas vector routine use increment. =head1 DESCRIPTION This module provides an interface to parts of the real lapack library. These routines accept either float or double piddles. EOD pp_def("gesvd", HandleBad => 0, RedoDimsCode => '$SIZE(r) = $PDL(A)->ndims > 1 ? min($PDL(A)->dims[0], $PDL(A)->dims[1]) : 1;', Pars => '[io,phys]A(m,n); int jobu(); int jobvt(); [o,phys]s(r); [o,phys]U(p,q); [o,phys]VT(s,t); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' types(F) %{ extern int sgesvd_(char *jobu, char *jobvt, integer *m, integer *n, float *a, integer *lda, float *s, float *u, int *ldu, float *vt, integer *ldvt, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgesvd_(char *jobz,char *jobvt, integer *m, integer *n, double *a, integer *lda, double *s, double *u, int *ldu, double *vt, integer *ldvt, double *work, integer *lwork, integer *info); double tmp_work; %} integer lwork = -1; char trau, travt; switch ($jobu()) { case 1: trau = \'A\'; break; case 2: trau = \'S\'; break; case 3: trau = \'O\'; break; default: trau = \'N\'; } switch ($jobvt()) { case 1: travt = \'A\'; break; case 2: travt = \'S\'; break; case 3: travt = \'O\'; break; default: travt = \'N\'; } $TFD(sgesvd_,dgesvd_)( &trau, &travt, &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(s), $P(U), &$PRIV(__p_size), $P(VT), &$PRIV(__s_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgesvd_,dgesvd_)( &trau, &travt, &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(s), $P(U), &$PRIV(__p_size), $P(VT), &$PRIV(__s_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes the singular value decomposition (SVD) of a real M-by-N matrix A. The SVD is written A = U * SIGMA * V\' where SIGMA is an M-by-N matrix which is zero except for its min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA are the singular values of A; they are real and non-negative, and are returned in descending order. The first min(m,n) columns of U and V are the left and right singular vectors of A. Note that the routine returns VT = V\', not V. jobu: Specifies options for computing all or part of the matrix U: = 0: no columns of U (no left singular vectors) are computed. = 1: all M columns of U are returned in array U: = 2: the first min(m,n) columns of U (the left singular vectors) are returned in the array U; = 3: the first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; jobvt: Specifies options for computing all or part of the matrix V\': = 0: no rows of V\' (no right singular vectors) are computed. = 1: all N rows of V\' are returned in the array VT; = 2: the first min(m,n) rows of V\' (the right singular vectors) are returned in the array VT; = 3: the first min(m,n) rows of V\' (the right singular vectors) are overwritten on the array A; jobvt and jobu cannot both be 3. A: On entry, the M-by-N matrix A. On exit, if jobu = 3, A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if jobvt = 3, A is overwritten with the first min(m,n) rows of V\' (the right singular vectors, stored rowwise); if jobu != 3 and jobvt != 3, the contents of A are destroyed. s: The singular values of A, sorted so that s(i) >= s(i+1). U: If jobu = 1, U contains the M-by-M orthogonal matrix U; if jobu = 3, U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if jobu = 0 or 3, U is not referenced. Min size = [1,1]. VT: If jobvt = 1, VT contains the N-by-N orthogonal matrix V\'; if jobvt = 2, VT contains the first min(m,n) rows of V\' (the right singular vectors, stored rowwise); if jobvt = 0 or 3, VT is not referenced. Min size = [1,1]. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. > 0: if bdsqr did not converge, info specifies how many superdiagonals of an intermediate bidiagonal form B did not converge to zero. =for example $a = random (float, 100,100); $s = zeroes(float, 100); $u = zeroes(float, 100,100); $vt = zeroes(float, 100,100); $info = pdl(long, 0); gesvd($a, 2, 2, $s , $u, $vt, $info); '); pp_def("gesdd", HandleBad => 0, RedoDimsCode => '$SIZE(r) = $PDL(A)->ndims > 1 ? min($PDL(A)->dims[0], $PDL(A)->dims[1]) : 1;', Pars => '[io,phys]A(m,n); int job(); [o,phys]s(r); [o,phys]U(p,q); [o,phys]VT(s,t); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork; integer *iwork; integer smlsiz; char tra; types(F) %{ extern int sgesdd_(char *jobz, integer *m, integer *n, float * a, integer *lda, float *s, float *u, int *ldu, float *vt, integer *ldvt, float *work, integer *lwork, integer *iwork, integer *info); float tmp_work; %} types(D) %{ extern int dgesdd_(char *jobz, integer *m, integer *n, double * a, integer *lda, double *s, double *u, int *ldu, double *vt, integer *ldvt, double *work, integer *lwork, integer *iwork, integer *info); double tmp_work; %} lwork = ($PRIV(__m_size) < $PRIV(__n_size)) ? 8*$PRIV(__m_size) : 8*$PRIV(__n_size); iwork = (integer *)malloc(lwork * sizeof(integer)); lwork = -1; switch ($job()) { case 1: tra = \'A\'; break; case 2: tra = \'S\'; break; case 3: tra = \'O\'; break; default: tra = \'N\'; break; } $TFD(sgesdd_,dgesdd_)( &tra, &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(s), $P(U), &$PRIV(__p_size), $P(VT), &$PRIV(__s_size), &tmp_work, &lwork, iwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work; if (tra == \'N\'){ smlsiz = ilaenv_(&c_nine, "SGESDD", " ", &c_zero, &c_zero, &c_zero, &c_zero, (ftnlen)6, (ftnlen)1); lwork = max(14*min($PRIV(__m_size),$PRIV(__n_size))+4, 10*min($PRIV(__m_size), $PRIV(__n_size))+2+ smlsiz*(smlsiz+8)) + max($PRIV(__m_size),$PRIV(__n_size)); } work = (float *) malloc(lwork * sizeof(float)); %} types(D) %{ double *work; if (tra == \'N\'){ smlsiz = ilaenv_(&c_nine, "DGESDD", " ", &c_zero, &c_zero, &c_zero, &c_zero, (ftnlen)6, (ftnlen)1); lwork = max(14*min($PRIV(__m_size),$PRIV(__n_size))+4, 10*min($PRIV(__m_size), $PRIV(__n_size))+2+ smlsiz*(smlsiz+8)) + max($PRIV(__m_size),$PRIV(__n_size)); } work = (double *) malloc(lwork * sizeof(double)); %} $TFD(sgesdd_,dgesdd_)( &tra, &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(s), $P(U), &$PRIV(__p_size), $P(VT), &$PRIV(__s_size), work, &lwork, iwork, $P(info)); free(work); } free(iwork); ', Doc => ' =for ref Computes the singular value decomposition (SVD) of a real M-by-N matrix A. This routine use the Coppen\'s divide and conquer algorithm. It is much faster than the simple driver for large matrices, but uses more workspace. job: Specifies options for computing all or part of matrix: = 0: no columns of U or rows of V\' are computed; = 1: all M columns of U and all N rows of V\' are returned in the arrays U and VT; = 2: the first min(M,N) columns of U and the first min(M,N) rows of V\' are returned in the arrays U and VT; = 3: If M >= N, the first N columns of U are overwritten on the array A and all rows of V\' are returned in the array VT; otherwise, all columns of U are returned in the array U and the first M rows of V\' are overwritten on the array A. A: On entry, the M-by-N matrix A. On exit, if job = 3, A is overwritten with the first N columns of U (the left singular vectors, stored columnwise) if M >= N; A is overwritten with the first M rows of V\' (the right singular vectors, stored rowwise) otherwise. if job != 3, the contents of A are destroyed. s: The singular values of A, sorted so that s(i) >= s(i+1). U: If job = 1 or job = 3 and M < N, U contains the M-by-M orthogonal matrix U; if job = 2, U contains the first min(M,N) columns of U (the left singular vectors, stored columnwise); if job = 3 and M >= N, or job = 0, U is not referenced. Min size = [1,1]. VT: If job = 1 or job = 3 and M >= N, VT contains the N-by-N orthogonal matrix V\'; if job = 2, VT contains the first min(M,N) rows of V\' (the right singular vectors, stored rowwise); if job = 3 and M < N, or job = 0, VT is not referenced. Min size = [1,1]. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. > 0: bdsdc did not converge, updating process failed. =for example $lines = 50; $columns = 100; $a = random (float, $lines, $columns); $min = $lines < $columns ? $lines : $columns; $s = zeroes(float, $min); $u = zeroes(float, $lines, $lines); $vt = zeroes(float, $columns, $columns); $info = long (0); gesdd($a, 1, $s , $u, $vt, $info); '); pp_def("ggsvd", HandleBad => 0, Pars => '[io,phys]A(m,n); int jobu(); int jobv(); int jobq(); [io,phys]B(p,n); int [o,phys]k(); int [o,phys]l();[o,phys]alpha(n);[o,phys]beta(n); [o,phys]U(q,r); [o,phys]V(s,t); [o,phys]Q(u,v); int [o,phys]iwork(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pjobu = \'N\'; char pjobv = \'N\'; char pjobq = \'N\'; types(F) %{ extern int sggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, float *a, integer *lda, float *b, integer *ldb, float *alpha, float *beta, float *u, integer *ldu, float *v, integer *ldv, float *q, integer *ldq, float *work, integer *iwork, integer *info); float *work; %} types(D) %{ extern int dggsvd_(char *jobu, char *jobv, char *jobq, integer *m, integer *n, integer *p, integer *k, integer *l, double *a, integer *lda, double *b, integer *ldb, double *alpha, double *beta, double *u, integer *ldu, double *v, integer *ldv, double *q, integer *ldq, double *work, integer *iwork, integer *info); double *work; %} integer lwork = ($SIZE (m) < $SIZE (n)) ? $SIZE (n): $SIZE (m); if ($SIZE (p) > lwork) lwork = $SIZE (p); types(F) %{ work = (float *)malloc((3*lwork + $SIZE (n))* sizeof(float)); %} types(D) %{ work = (double *)malloc((3*lwork + $SIZE (n)) * sizeof(double)); %} if ($jobu()) pjobu = \'U\'; if ($jobv()) pjobv = \'V\'; if ($jobq()) pjobq = \'Q\'; $TFD(sggsvd_,dggsvd_)( &pjobu, &pjobv, &pjobq, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__p_size), $P(k), $P(l), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(alpha), $P(beta), $P(U), &$PRIV(__q_size), $P(V), &$PRIV(__s_size), $P(Q), &$PRIV(__u_size), work, $P(iwork), $P(info)); free(work); ', Doc => ' =for ref Computes the generalized singular value decomposition (GSVD) of an M-by-N real matrix A and P-by-N real matrix B: U\'*A*Q = D1*( 0 R ), V\'*B*Q = D2*( 0 R ) where U, V and Q are orthogonal matrices, and Z\' is the transpose of Z. Let K+L = the effective numerical rank of the matrix (A\',B\')\', then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the following structures, respectively: If M-K-L >= 0, K L D1 = K ( I 0 ) L ( 0 C ) M-K-L ( 0 0 ) K L D2 = L ( 0 S ) P-L ( 0 0 ) N-K-L K L ( 0 R ) = K ( 0 R11 R12 ) L ( 0 0 R22 ) where C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), S = diag( BETA(K+1), ... , BETA(K+L) ), C**2 + S**2 = I. R is stored in A(1:K+L,N-K-L+1:N) on exit. If M-K-L < 0, K M-K K+L-M D1 = K ( I 0 0 ) M-K ( 0 C 0 ) K M-K K+L-M D2 = M-K ( 0 S 0 ) K+L-M ( 0 0 I ) P-L ( 0 0 0 ) N-K-L K M-K K+L-M ( 0 R ) = K ( 0 R11 R12 R13 ) M-K ( 0 0 R22 R23 ) K+L-M ( 0 0 0 R33 ) where C = diag( ALPHA(K+1), ... , ALPHA(M) ), S = diag( BETA(K+1), ... , BETA(M) ), C**2 + S**2 = I. (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored ( 0 R22 R23 ) in B(M-K+1:L,N+M-K-L+1:N) on exit. The routine computes C, S, R, and optionally the orthogonal transformation matrices U, V and Q. In particular, if B is an N-by-N nonsingular matrix, then the GSVD of A and B implicitly gives the SVD of A*inv(B): A*inv(B) = U*(D1*inv(D2))*V\'. If ( A\',B\')\' has orthonormal columns, then the GSVD of A and B is also equal to the CS decomposition of A and B. Furthermore, the GSVD can be used to derive the solution of the eigenvalue problem: A\'*A x = lambda* B\'*B x. In some literature, the GSVD of A and B is presented in the form U\'*A*X = ( 0 D1 ), V\'*B*X = ( 0 D2 ) where U and V are orthogonal and X is nonsingular, D1 and D2 are "diagonal". The former GSVD form can be converted to the latter form by taking the nonsingular matrix X as X = Q*( I 0 ) ( 0 inv(R) ). Arguments ========= jobu: = 0: U is not computed. = 1: Orthogonal matrix U is computed; jobv: = 0: V is not computed. = 1: Orthogonal matrix V is computed; jobq: = 0: Q is not computed. = 1: Orthogonal matrix Q is computed; k: l: On exit, k and l specify the dimension of the subblocks described in the Purpose section. k + l = effective numerical rank of (A\',B\')\'. A: On entry, the M-by-N matrix A. On exit, A contains the triangular matrix R, or part of R. B: On entry, the P-by-N matrix B. On exit, B contains the triangular matrix R if M-k-l < 0. alpha: beta: On exit, alpha and beta contain the generalized singular value pairs of A and B; alpha(1:k) = 1, beta(1:k) = 0, and if M-k-l >= 0, alpha(k+1:k+l) = C, beta(k+1:k+l) = S, or if M-k-l < 0, alpha(k+1:M)=C, alpha(M+1:k+l)=0 beta(k+1:M) =S, beta(M+1:k+l) =1 and alpha(k+l+1:N) = 0 beta(k+l+1:N) = 0 U: If jobu = 1, U contains the M-by-M orthogonal matrix U. If jobu = 0, U is not referenced. Need a minimum array of (1,1) if jobu = 0; V: If jobv = 1, V contains the P-by-P orthogonal matrix V. If jobv = 0, V is not referenced. Need a minimum array of (1,1) if jobv = 0; Q: If jobq = 1, Q contains the N-by-N orthogonal matrix Q. If jobq = 0, Q is not referenced. Need a minimum array of (1,1) if jobq = 0; iwork: On exit, iwork stores the sorting information. More precisely, the following loop will sort alpha for I = k+1, min(M,k+l) swap alpha(I) and alpha(iwork(I)) endfor such that alpha(1) >= alpha(2) >= ... >= alpha(N). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: if info = 1, the Jacobi-type procedure failed to converge. For further details, see subroutine tgsja. =for example $k = null; $l = null; $A = random(5,6); $B = random(7,6); $alpha = zeroes(6); $beta = zeroes(6); $U = zeroes(5,5); $V = zeroes(7,7); $Q = zeroes(6,6); $iwork = zeroes(long, 6); $info = null; ggsvd($A,1,1,1,$B,$k,$l,$alpha, $beta,$U, $V, $Q, $iwork,$info); '); pp_def("geev", HandleBad => 0, Pars => '[phys]A(n,n); int jobvl(); int jobvr(); [o,phys]wr(n); [o,phys]wi(n); [o,phys]vl(m,m); [o,phys]vr(p,p); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jvl = \'N\'; char jvr = \'N\'; types(F) %{ extern int sgeev_(char *jobvl, char *jobvr, integer *n, float *a, integer *lda, float *wr, float *wi, float *vl, integer *ldvl, float *vr, integer *ldvr, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgeev_(char *jobvl, char *jobvr, integer *n, double * a, integer *lda, double *wr, double *wi, double *vl, integer *ldvl, double *vr, integer *ldvr, double *work, integer *lwork, integer *info); double tmp_work; %} integer lwork = -1; if ($jobvl()) jvl = \'V\'; if ($jobvr()) jvr = \'V\'; $TFD(sgeev_,dgeev_)( &jvl, &jvr, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(wr), $P(wi), $P(vl), &$PRIV(__m_size), $P(vr), &$PRIV(__p_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgeev_,dgeev_)( &jvl, &jvr, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(wr), $P(wi), $P(vl), &$PRIV(__m_size), $P(vr), &$PRIV(__p_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies: A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies: u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= jobvl: = 0: left eigenvectors of A are not computed; = 1: left eigenvectors of A are computed. jobvr: = 0: right eigenvectors of A are not computed; = 1: right eigenvectors of A are computed. A: A is overwritten. wr: wi: wr and wi contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. vl: If jobvl = 1, the left eigenvectors u(j) are stored one after another in the columns of vl, in the same order as their eigenvalues else vl is not referenced. If the j-th eigenvalue is real, then u(j) = vl(:,j), the j-th column of vl. If the j-th and (j+1)-st eigenvalues form a complex conjugate pair, then u(j) = vl(:,j) + i*vl(:,j+1) and u(j+1) = vl(:,j) - i*vl(:,j+1). Min size = [1]. vr: If jobvr = 1, the right eigenvectors v(j) are stored one after another in the columns of vr, in the same order as their eigenvalues else vr is not referenced. If the j-th eigenvalue is real, then v(j) = vr(:,j), the j-th column of vr. If the j-th and (j+1)-st eigenvalues form a complex conjugate pair, then v(j) = vr(:,j) + i*vr(:,j+1) and v(j+1) = vr(:,j) - i*vr(:,j+1). Min size = [1]. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: if info = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements i+1:N of wr and wi contain eigenvalues which have converged. =for example $a = random (5, 5); $wr = zeroes(5); $wi = zeroes($wr); $vl = zeroes($a); $vr = zeroes($a); $info = null; geev($a, 1, 1, $wr, $wi, $vl, $vr, $info); '); pp_def("geevx", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobvl(); int jobvr(); int balance(); int sense(); [o,phys]wr(n); [o,phys]wi(n); [o,phys]vl(m,m); [o,phys]vr(p,p); int [o,phys]ilo(); int [o,phys]ihi(); [o,phys]scale(n); [o,phys]abnrm(); [o,phys]rconde(q); [o,phys]rcondv(r); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jvl = \'N\'; char jvr = \'N\'; char balanc, sens; integer *iwork; integer lwork = -1; types(F) %{ extern int sgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, float *a, integer *lda, float *wr, float *wi, float *vl, integer *ldvl, float *vr, integer *ldvr, integer *ilo, integer *ihi, float *scale, float *abnrm, float *rconde, float *rcondv, float *work, integer *lwork, integer *iwork, integer *info); float tmp_work; %} types(D) %{ extern int dgeevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, double *a, integer *lda, double *wr, double *wi, double *vl, integer *ldvl, double *vr, integer *ldvr, integer *ilo, integer *ihi, double *scale, double *abnrm, double *rconde, double *rcondv, double *work, integer *lwork, integer *iwork, integer *info); double tmp_work; %} if ($jobvl()) jvl = \'V\'; if ($jobvr()) jvr = \'V\'; switch ($balance()) { case 1: balanc = \'P\'; break; case 2: balanc = \'S\'; break; case 3: balanc = \'B\'; break; default: balanc = \'N\'; } switch ($sense()) { case 1: sens = \'E\'; break; case 2: sens = \'V\'; iwork = (integer *)malloc ((2 * $PRIV(__n_size) -2)* sizeof (integer)); break; case 3: sens = \'B\'; iwork = (integer *)malloc ((2 * $PRIV(__n_size) -2)* sizeof (integer)); break; default: sens = \'N\'; } $TFD(sgeevx_,dgeevx_)( &balanc, &jvl, &jvr, &sens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(wr), $P(wi), $P(vl), &$PRIV(__m_size), $P(vr), &$PRIV(__p_size), $P(ilo), $P(ihi), $P(scale), $P(abnrm), $P(rconde), $P(rcondv), &tmp_work, &lwork, iwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgeevx_,dgeevx_)( &balanc, &jvl, &jvr, &sens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(wr), $P(wi), $P(vl), &$PRIV(__m_size), $P(vr), &$PRIV(__p_size), $P(ilo), $P(ihi), $P(scale), $P(abnrm), $P(rconde), $P(rcondv), work, &lwork, iwork, $P(info)); free(work); } if ($sense() == 2 || $sense() == 3) free(iwork); ', Doc => ' =for ref Computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. Optionally also, it computes a balancing transformation to improve the conditioning of the eigenvalues and eigenvectors (ilo, ihi, scale, and abnrm), reciprocal condition numbers for the eigenvalues (rconde), and reciprocal condition numbers for the right eigenvectors (rcondv). The right eigenvector v(j) of A satisfies: A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies: u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Balancing a matrix means permuting the rows and columns to make it more nearly upper triangular, and applying a diagonal similarity transformation D * A * D**(-1), where D is a diagonal matrix, to make its rows and columns closer in norm and the condition numbers of its eigenvalues and eigenvectors smaller. The computed reciprocal condition numbers correspond to the balanced matrix. Permuting rows and columns will not change the condition numbers (in exact arithmetic) but diagonal scaling will. For further explanation of balancing, see section 4.10.2 of the LAPACK Users\' Guide. Arguments ========= balance: Indicates how the input matrix should be diagonally scaled and/or permuted to improve the conditioning of its eigenvalues. = 0: Do not diagonally scale or permute; = 1: Perform permutations to make the matrix more nearly upper triangular. Do not diagonally scale; = 2: Diagonally scale the matrix, i.e. replace A by D*A*D**(-1), where D is a diagonal matrix chosen to make the rows and columns of A more equal in norm. Do not permute; = 3: Both diagonally scale and permute A. Computed reciprocal condition numbers will be for the matrix after balancing and/or permuting. Permuting does not change condition numbers (in exact arithmetic), but balancing does. jobvl: = 0: left eigenvectors of A are not computed; = 1: left eigenvectors of A are computed. If sense = 1 or 3, jobvl must = 1. jobvr; = 0: right eigenvectors of A are not computed; = 1: right eigenvectors of A are computed. If sense = 1 or 3, jobvr must = 1. sense: Determines which reciprocal condition numbers are computed. = 0: None are computed; = 1: Computed for eigenvalues only; = 2: Computed for right eigenvectors only; = 3: Computed for eigenvalues and right eigenvectors. If sense = 1 or 3, both left and right eigenvectors must also be computed (jobvl = 1 and jobvr = 1). A: The N-by-N matrix. It is overwritten. If jobvl = 1 or jobvr = 1, A contains the real Schur form of the balanced version of the input matrix A. wr wi: wr and wi contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues will appear consecutively with the eigenvalue having the positive imaginary part first. vl: If jobvl = 1, the left eigenvectors u(j) are stored one after another in the columns of vl, in the same order as their eigenvalues else vl is not referenced. If the j-th eigenvalue is real, then u(j) = vl(:,j), the j-th column of vl. If the j-th and (j+1)-st eigenvalues form a complex conjugate pair, then u(j) = vl(:,j) + i*vl(:,j+1) and u(j+1) = vl(:,j) - i*vl(:,j+1). Min size = [1]. vr: If jobvr = 1, the right eigenvectors v(j) are stored one after another in the columns of vr, in the same order as their eigenvalues else vr is not referenced. If the j-th eigenvalue is real, then v(j) = vr(:,j), the j-th column of vr. If the j-th and (j+1)-st eigenvalues form a complex conjugate pair, then v(j) = vr(:,j) + i*vr(:,j+1) and v(j+1) = vr(:,j) - i*vr(:,j+1). Min size = [1]. ilo,ihi:Integer values determined when A was balanced. The balanced A(i,j) = 0 if I > J and J = 1,...,ilo-1 or I = ihi+1,...,N. scale: Details of the permutations and scaling factors applied when balancing A. If P(j) is the index of the row and column interchanged with row and column j, and D(j) is the scaling factor applied to row and column j, then scale(J) = P(J), for J = 1,...,ilo-1 = D(J), for J = ilo,...,ihi = P(J) for J = ihi+1,...,N. The order in which the interchanges are made is N to ihi+1, then 1 to ilo-1. abnrm: The one-norm of the balanced matrix (the maximum of the sum of absolute values of elements of any column). rconde: rconde(j) is the reciprocal condition number of the j-th eigenvalue. rcondv: rcondv(j) is the reciprocal condition number of the j-th right eigenvector. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: if info = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors or condition numbers have been computed; elements 1:ilo-1 and i+1:N of wr and wi contain eigenvalues which have converged. =for example $a = random (5,5); $wr = zeroes(5); $wi = zeroes(5); $vl = zeroes(5,5); $vr = zeroes(5,5); $ilo = null; $ihi = null; $scale = zeroes(5); $abnrm = null; $rconde = zeroes(5); $rcondv = zeroes(5); $info = null; geevx($a, 1,1,3,3,$wr, $wi, $vl, $vr, $ilo, $ihi, $scale, $abnrm,$rconde, $rcondv, $info); '); pp_def("ggev", HandleBad => 0, Pars => '[phys]A(n,n); int jobvl();int jobvr();[phys]B(n,n);[o,phys]alphar(n);[o,phys]alphai(n);[o,phys]beta(n);[o,phys]VL(m,m);[o,phys]VR(p,p);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; char pjobvl = \'N\', pjobvr = \'N\'; types(F) %{ extern int sggev_(char *jobvl, char *jobvr, integer *n, float * a, integer *lda, float *b, integer *ldb, float *alphar, float *alphai, float *beta, float *vl, integer *ldvl, float *vr, integer *ldvr, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dggev_(char *jobvl, char *jobvr, integer *n, double * a, integer *lda, double *b, integer *ldb, double *alphar, double *alphai, double *beta, double *vl, integer *ldvl, double *vr, integer *ldvr, double *work, integer *lwork, integer *info); double tmp_work; %} if ($jobvl()) pjobvl = \'V\'; if ($jobvr()) pjobvr = \'V\'; $TFD(sggev_,dggev_)( &pjobvl, &pjobvr, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(alphar), $P(alphai), $P(beta), $P(VL), &$PRIV(__m_size), $P(VR), &$PRIV(__p_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sggev_,dggev_)( &pjobvl, &pjobvr, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(alphar), $P(alphai), $P(beta), $P(VL), &$PRIV(__m_size), $P(VR), &$PRIV(__p_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes for a pair of N-by-N real nonsymmetric matrices (A,B) the generalized eigenvalues, and optionally, the left and/or right generalized eigenvectors. A generalized eigenvalue for a pair of matrices (A,B) is a scalar lambda or a ratio alpha/beta = lambda, such that A - lambda*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. The right eigenvector v(j) corresponding to the eigenvalue lambda(j) of (A,B) satisfies A * v(j) = lambda(j) * B * v(j). The left eigenvector u(j) corresponding to the eigenvalue lambda(j) of (A,B) satisfies u(j)**H * A = lambda(j) * u(j)**H * B . where u(j)**H is the conjugate-transpose of u(j). Arguments ========= jobvl: = 0: do not compute the left generalized eigenvectors; = 1: compute the left generalized eigenvectors. jobvr: = 0: do not compute the right generalized eigenvectors; = 1: compute the right generalized eigenvectors. A: On entry, the matrix A in the pair (A,B). On exit, A has been overwritten. B: On entry, the matrix B in the pair (A,B). On exit, B has been overwritten. alphar: alphai: beta: On exit, (alphar(j) + alphai(j)*i)/beta(j), j=1,...,N, will be the generalized eigenvalues. If alphai(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with alphai(j+1) negative. Note: the quotients alphar(j)/beta(j) and alphai(j)/beta(j) may easily over- or underflow, and beta(j) may even be zero. Thus, the user should avoid naively computing the ratio alpha/beta. However, alphar and alphai will be always less than and usually comparable with norm(A) in magnitude, and beta always less than and usually comparable with norm(B). VL: If jobvl = 1, the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If the j-th eigenvalue is real, then u(j) = VL(:,j), the j-th column of VL. If the j-th and (j+1)-th eigenvalues form a complex conjugate pair, then u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). Each eigenvector will be scaled so the largest component have abs(real part)+abs(imag. part)=1. Not referenced if jobvl = 0. VR: If jobvr = 1, the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If the j-th eigenvalue is real, then v(j) = VR(:,j), the j-th column of VR. If the j-th and (j+1)-th eigenvalues form a complex conjugate pair, then v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). Each eigenvector will be scaled so the largest component have abs(real part)+abs(imag. part)=1. Not referenced if jobvr = 0. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. No eigenvectors have been calculated, but alphar(j), alphai(j), and beta(j) should be correct for j=info+1,...,N. > N: =N+1: other than QZ iteration failed in hgeqz. =N+2: error return from tgevc. =for example $a = random(5,5); $b = random(5,5); $alphar = zeroes(5); $alphai = zeroes(5); $beta = zeroes(5); $vl = zeroes(5,5); $vr = zeroes(5,5); ggev($a, 1, 1, $b, $alphar, $alphai, $beta, $vl, $vr, ($info=null)); '); pp_def("ggevx", HandleBad => 0, Pars => '[io,phys]A(n,n);int balanc();int jobvl();int jobvr();int sense();[io,phys]B(n,n);[o,phys]alphar(n);[o,phys]alphai(n);[o,phys]beta(n);[o,phys]VL(m,m);[o,phys]VR(p,p);int [o,phys]ilo();int [o,phys]ihi();[o,phys]lscale(n);[o,phys]rscale(n);[o,phys]abnrm();[o,phys]bbnrm();[o,phys]rconde(r);[o,phys]rcondv(s);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1, *iwork, *bwork; char pjobvl = \'N\', pjobvr = \'N\'; char pbalanc, psens; types(F) %{ int sggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, float *a, integer *lda, float *b, integer *ldb, float *alphar, float *alphai, float * beta, float *vl, integer *ldvl, float *vr, integer *ldvr, integer *ilo, integer *ihi, float *lscale, float *rscale, float *abnrm, float *bbnrm, float *rconde, float * rcondv, float *work, integer *lwork, integer *iwork, logical * bwork, integer *info); float tmp_work; %} types(D) %{ extern int dggevx_(char *balanc, char *jobvl, char *jobvr, char * sense, integer *n, double *a, integer *lda, double *b, integer *ldb, double *alphar, double *alphai, double * beta, double *vl, integer *ldvl, double *vr, integer *ldvr, integer *ilo, integer *ihi, double *lscale, double *rscale, double *abnrm, double *bbnrm, double *rconde, double * rcondv, double *work, integer *lwork, integer *iwork, logical * bwork, integer *info); double tmp_work; %} if ($jobvl()) pjobvl = \'V\'; if ($jobvr()) pjobvr = \'V\'; switch ($balanc()) { case 1: pbalanc = \'P\'; break; case 2: pbalanc = \'S\'; break; case 3: pbalanc = \'B\'; break; default: pbalanc = \'N\'; } switch ($sense()) { case 1: psens = \'E\'; bwork = (integer *)malloc($SIZE(n) * sizeof(integer)); break; case 2: psens = \'V\'; iwork = (integer *)malloc(($SIZE(n) + 6) * sizeof(integer)); bwork = (integer *)malloc($SIZE(n) * sizeof(integer)); break; case 3: psens = \'B\'; iwork = (integer *)malloc(($SIZE(n) + 6) * sizeof(integer)); bwork = (integer *)malloc($SIZE(n) * sizeof(integer)); break; default: psens = \'N\'; iwork = (integer *)malloc(($SIZE(n) + 6) * sizeof(integer)); } $TFD(sggevx_,dggevx_)( &pbalanc, &pjobvl, &pjobvr, &psens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(alphar), $P(alphai), $P(beta), $P(VL), &$PRIV(__m_size), $P(VR), &$PRIV(__p_size), $P(ilo), $P(ihi), $P(lscale), $P(rscale), $P(abnrm), $P(bbnrm), $P(rconde), $P(rcondv), &tmp_work, &lwork, iwork, bwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sggevx_,dggevx_)( &pbalanc, &pjobvl, &pjobvr, &psens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(alphar), $P(alphai), $P(beta), $P(VL), &$PRIV(__m_size), $P(VR), &$PRIV(__p_size), $P(ilo), $P(ihi), $P(lscale), $P(rscale), $P(abnrm), $P(bbnrm), $P(rconde), $P(rcondv), work, &lwork, iwork, bwork, $P(info)); free(work); } if ($sense()) free(bwork); if ($sense() != 1) free(iwork); ', Doc => ' =for ref Computes for a pair of N-by-N real nonsymmetric matrices (A,B) the generalized eigenvalues, and optionally, the left and/or right generalized eigenvectors. Optionally also, it computes a balancing transformation to improve the conditioning of the eigenvalues and eigenvectors (ilo, ihi, lscale, rscale, abnrm, and bbnrm), reciprocal condition numbers for the eigenvalues (rconde), and reciprocal condition numbers for the right eigenvectors (rcondv). A generalized eigenvalue for a pair of matrices (A,B) is a scalar lambda or a ratio alpha/beta = lambda, such that A - lambda*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0, and even for both being zero. The right eigenvector v(j) corresponding to the eigenvalue lambda(j) of (A,B) satisfies A * v(j) = lambda(j) * B * v(j) . The left eigenvector u(j) corresponding to the eigenvalue lambda(j) of (A,B) satisfies u(j)**H * A = lambda(j) * u(j)**H * B. where u(j)**H is the conjugate-transpose of u(j). Further Details =============== Balancing a matrix pair (A,B) includes, first, permuting rows and columns to isolate eigenvalues, second, applying diagonal similarity transformation to the rows and columns to make the rows and columns as close in norm as possible. The computed reciprocal condition numbers correspond to the balanced matrix. Permuting rows and columns will not change the condition numbers (in exact arithmetic) but diagonal scaling will. For further explanation of balancing, see section 4.11.1.2 of LAPACK Users\' Guide. An approximate error bound on the chordal distance between the i-th computed generalized eigenvalue w and the corresponding exact eigenvalue lambda is chord(w, lambda) <= EPS * norm(abnrm, bbnrm) / rconde(I) An approximate error bound for the angle between the i-th computed eigenvector vl(i) or vr(i) is given by EPS * norm(abnrm, bbnrm) / DIF(i). For further explanation of the reciprocal condition numbers rconde and rcondv, see section 4.11 of LAPACK User\'s Guide. Arguments ========= balanc: Specifies the balance option to be performed. = 0: do not diagonally scale or permute; = 1: permute only; = 2: scale only; = 3: both permute and scale. Computed reciprocal condition numbers will be for the matrices after permuting and/or balancing. Permuting does not change condition numbers (in exact arithmetic), but balancing does. jobvl: = 0: do not compute the left generalized eigenvectors; = 1: compute the left generalized eigenvectors. jobvr: = 0: do not compute the right generalized eigenvectors; = 1: compute the right generalized eigenvectors. sense: Determines which reciprocal condition numbers are computed. = 0: none are computed; = 1: computed for eigenvalues only; = 2: computed for eigenvectors only; = 3: computed for eigenvalues and eigenvectors. A: On entry, the matrix A in the pair (A,B). On exit, A has been overwritten. If jobvl=1 or jobvr=1 or both, then A contains the first part of the real Schur form of the "balanced" versions of the input A and B. B: On entry, the matrix B in the pair (A,B). On exit, B has been overwritten. If jobvl=1 or jobvr=1 or both, then B contains the second part of the real Schur form of the "balanced" versions of the input A and B. alphar: alphai: beta: On exit, (alphar(j) + alphai(j)*i)/beta(j), j=1,...,N, will be the generalized eigenvalues. If alphai(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with alphai(j+1) negative. Note: the quotients alphar(j)/beta(j) and alphai(j)/beta(j) may easily over- or underflow, and beta(j) may even be zero. Thus, the user should avoid naively computing the ratio ALPHA/beta. However, alphar and alphai will be always less than and usually comparable with norm(A) in magnitude, and beta always less than and usually comparable with norm(B). vl: If jobvl = 1, the left eigenvectors u(j) are stored one after another in the columns of vl, in the same order as their eigenvalues. If the j-th eigenvalue is real, then u(j) = vl(:,j), the j-th column of vl. If the j-th and (j+1)-th eigenvalues form a complex conjugate pair, then u(j) = vl(:,j)+i*vl(:,j+1) and u(j+1) = vl(:,j)-i*vl(:,j+1). Each eigenvector will be scaled so the largest component have abs(real part) + abs(imag. part) = 1. Not referenced if jobvl = 0. vr: If jobvr = 1, the right eigenvectors v(j) are stored one after another in the columns of vr, in the same order as their eigenvalues. If the j-th eigenvalue is real, then v(j) = vr(:,j), the j-th column of vr. If the j-th and (j+1)-th eigenvalues form a complex conjugate pair, then v(j) = vr(:,j)+i*vr(:,j+1) and v(j+1) = vr(:,j)-i*vr(:,j+1). Each eigenvector will be scaled so the largest component have abs(real part) + abs(imag. part) = 1. Not referenced if jobvr = 0. ilo,ihi:ilo and ihi are integer values such that on exit A(i,j) = 0 and B(i,j) = 0 if i > j and j = 1,...,ilo-1 or i = ihi+1,...,N. If balanc = 0 or 2, ilo = 1 and ihi = N. lscale: Details of the permutations and scaling factors applied to the left side of A and B. If PL(j) is the index of the row interchanged with row j, and DL(j) is the scaling factor applied to row j, then lscale(j) = PL(j) for j = 1,...,ilo-1 = DL(j) for j = ilo,...,ihi = PL(j) for j = ihi+1,...,N. The order in which the interchanges are made is N to ihi+1, then 1 to ilo-1. rscale: Details of the permutations and scaling factors applied to the right side of A and B. If PR(j) is the index of the column interchanged with column j, and DR(j) is the scaling factor applied to column j, then rscale(j) = PR(j) for j = 1,...,ilo-1 = DR(j) for j = ilo,...,ihi = PR(j) for j = ihi+1,...,N The order in which the interchanges are made is N to ihi+1, then 1 to ilo-1. abnrm: The one-norm of the balanced matrix A. bbnrm: The one-norm of the balanced matrix B. rconde: If sense = 1 or 3, the reciprocal condition numbers of the selected eigenvalues, stored in consecutive elements of the array. For a complex conjugate pair of eigenvalues two consecutive elements of rconde are set to the same value. Thus rconde(j), rcondv(j), and the j-th columns of vl and vr all correspond to the same eigenpair (but not in general the j-th eigenpair, unless all eigenpairs are selected). If sense = 2, rconde is not referenced. rcondv: If sense = 2 or 3, the estimated reciprocal condition numbers of the selected eigenvectors, stored in consecutive elements of the array. For a complex eigenvector two consecutive elements of rcondv are set to the same value. If the eigenvalues cannot be reordered to compute rcondv(j), rcondv(j) is set to 0; this can only occur when the true value would be very small anyway. If sense = 1, rcondv is not referenced. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. No eigenvectors have been calculated, but alphar(j), alphai(j), and beta(j) should be correct for j=info+1,...,N. > N: =N+1: other than QZ iteration failed in hgeqz. =N+2: error return from tgevc. =for example $a = random(5,5); $b = random(5,5); $alphar = zeroes(5); $alphai = zeroes(5); $beta = zeroes(5); $vl = zeroes(5,5); $vr = zeroes(5,5); $lscale = zeroes(5); $rscale = zeroes(5); $ilo = null; $ihi = null; $abnrm = null; $bbnrm = null; $rconde = zeroes(5); $rcondv = zeroes(5); ggevx($a, 3, 1, 1, 3, $b, $alphar, $alphai, $beta, $vl, $vr, $ilo, $ihi, $lscale, $rscale, $abnrm, $bbnrm, $rconde,$rcondv,($info=null)); '); pp_addhdr(' static SV* fselect_function; PDL_Long fselection_wrapper(float *wr, float *wi) { dSP ; long choice; int retval; ENTER ; SAVETMPS ; PUSHMARK(sp) ; XPUSHs(sv_2mortal(newSVnv((double ) *wr))); XPUSHs(sv_2mortal(newSVnv((double ) *wi))); PUTBACK ; retval = perl_call_sv(fselect_function, G_SCALAR); SPAGAIN; if (retval != 1) croak("Error calling perl function\n"); choice = (long ) POPl ; /* Return value */ PUTBACK ; FREETMPS ; LEAVE ; return choice; } static SV* dselect_function; PDL_Long dselection_wrapper(double *wr, double *wi) { dSP ; long choice; int retval; ENTER ; SAVETMPS ; PUSHMARK(sp) ; XPUSHs(sv_2mortal(newSVnv(*wr))); XPUSHs(sv_2mortal(newSVnv(*wi))); PUTBACK ; retval = perl_call_sv(dselect_function, G_SCALAR); SPAGAIN; if (retval != 1) croak("Error calling perl function\n"); choice = (long ) POPl ; /* Return value */ PUTBACK ; FREETMPS ; LEAVE ; return choice; } '); pp_def("gees", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobvs(); int sort(); [o,phys]wr(n); [o,phys]wi(n); [o,phys]vs(p,p); int [o,phys]sdim(); int [o,phys]info()', OtherPars => "SV* select_func" , GenericTypes => [F,D], Code => generate_code ' char jvs = \'N\'; char psort = \'N\'; integer *bwork; integer lwork = -1; types(F) %{ extern int sgees_(char *jobvs, char *sort, L_fp select, integer *n, float *a, integer *lda, integer *sdim, float *wr, float *wi, float *vs, integer *ldvs, float *work, integer *lwork, integer *bwork, integer *info); float tmp_work; fselect_function = $PRIV(select_func); %} types(D) %{ extern int dgees_(char *jobvs, char *sort, L_fp select, integer *n, double *a, integer *lda, integer *sdim, double *wr, double *wi, double *vs, integer *ldvs, double *work, integer *lwork, integer *bwork, integer *info); double tmp_work; dselect_function = $PRIV(select_func); %} if ($jobvs()) jvs = \'V\'; if ($sort()){ psort = \'S\'; bwork = (integer * ) malloc ($PRIV(__n_size) * sizeof (integer)); } types(F) %{ sgees_( &jvs, &psort, fselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(sdim), $P(wr), $P(wi), $P(vs), &$PRIV(__p_size), &tmp_work, &lwork, bwork, $P(info)); %} types(D) %{ dgees_( &jvs, &psort, dselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(sdim), $P(wr), $P(wi), $P(vs), &$PRIV(__p_size), &tmp_work, &lwork, bwork, $P(info)); %} lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} types(F) %{ sgees_( &jvs, &psort, fselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(sdim), $P(wr), $P(wi), $P(vs), &$PRIV(__p_size), work, &lwork, bwork, $P(info)); %} types(D) %{ dgees_( &jvs, &psort, dselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(sdim), $P(wr), $P(wi), $P(vs), &$PRIV(__p_size), work, &lwork, bwork, $P(info)); %} free(work); } if ($sort()) free(bwork); ', Doc => ' =for ref Computes for an N-by-N real nonsymmetric matrix A, the eigenvalues, the real Schur form T, and, optionally, the matrix of Schur vectors Z. This gives the Schur factorization A = Z*T*Z\'. Optionally, it also orders the eigenvalues on the diagonal of the real Schur form so that selected eigenvalues are at the top left. The leading columns of Z then form an orthonormal basis for the invariant subspace corresponding to the selected eigenvalues. A matrix is in real Schur form if it is upper quasi-triangular with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the form [ a b ] [ c a ] where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). Arguments ========= jobvs: = 0: Schur vectors are not computed; = 1: Schur vectors are computed. sort: Specifies whether or not to order the eigenvalues on the diagonal of the Schur form. = 0: Eigenvalues are not ordered; = 1: Eigenvalues are ordered (see select_func). select_func: If sort = 1, select_func is used to select eigenvalues to sort to the top left of the Schur form. If sort = 0, select_func is not referenced. An eigenvalue wr(j)+sqrt(-1)*wi(j) is selected if select_func(SCALAR(wr(j)), SCALAR(wi(j))) is true; i.e., if either one of a complex conjugate pair of eigenvalues is selected, then both complex eigenvalues are selected. Note that a selected complex eigenvalue may no longer satisfy select_func(wr(j),wi(j)) = 1 after ordering, since ordering may change the value of complex eigenvalues (especially if the eigenvalue is ill-conditioned); in this case info is set to N+2 (see info below). A: The N-by-N matrix A. On exit, A has been overwritten by its real Schur form T. sdim: If sort = 0, sdim = 0. If sort = 1, sdim = number of eigenvalues (after sorting) for which select_func is true. (Complex conjugate pairs for which select_func is true for either eigenvalue count as 2.) wr: wi: wr and wi contain the real and imaginary parts, respectively, of the computed eigenvalues in the same order that they appear on the diagonal of the output Schur form T. Complex conjugate pairs of eigenvalues will appear consecutively with the eigenvalue having the positive imaginary part first. vs: If jobvs = 1, vs contains the orthogonal matrix Z of Schur vectors else vs is not referenced. info = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: if info = i, and i is <= N: the QR algorithm failed to compute all the eigenvalues; elements 1:ILO-1 and i+1:N of wr and wi contain those eigenvalues which have converged; if jobvs = 1, vs contains the matrix which reduces A to its partially converged Schur form. = N+1: the eigenvalues could not be reordered because some eigenvalues were too close to separate (the problem is very ill-conditioned); = N+2: after reordering, roundoff changed values of some complex eigenvalues so that leading eigenvalues in the Schur form no longer satisfy select_func = 1 This could also be caused by underflow due to scaling. =for example sub select_function{ my ($a, $b ) = @_; # Stable "continuous time" eigenspace return $a < 0 ? 1 : 0; } $A = random (5,5); $wr= zeroes(5); $wi = zeroes(5); $vs = zeroes(5,5); $sdim = null; $info = null; gees($A, 1,1, $wr, $wi, $vs, $sdim, $info,\&select_function); '); pp_def("geesx", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobvs(); int sort(); int sense(); [o,phys]wr(n); [o,phys]wi(n); [o,phys]vs(p,p); int [o,phys]sdim(); [o,phys]rconde();[o,phys]rcondv(); int [o,phys]info()', OtherPars => "SV* select_func" , GenericTypes => [F,D], Code => generate_code ' char jvs = \'N\'; char psort = \'N\'; integer *bwork; integer lwork = 0; integer liwork = 1; integer *iwork; char sens; types(F) %{ extern int sgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, float *a, integer *lda, integer *sdim, float *wr, float *wi, float *vs, integer *ldvs, float *rconde, float *rcondv, float *work, integer *lwork, integer *iwork, integer *liwork, integer *bwork, integer *info); float *work; fselect_function = $PRIV(select_func); %} types(D) %{ extern int dgeesx_(char *jobvs, char *sort, L_fp select, char * sense, integer *n, double *a, integer *lda, integer *sdim, double *wr, double *wi, double *vs, integer *ldvs, double *rconde, double *rcondv, double *work, integer *lwork, integer *iwork, integer *liwork, integer *bwork, integer *info); double *work; dselect_function = $PRIV(select_func); %} if ($jobvs()) jvs = \'V\'; if ($sort()){ psort = \'S\'; bwork = (integer * ) malloc ($PRIV(__n_size) * sizeof (integer)); } switch ($sense()) { case 1: sens = \'E\'; lwork = (integer ) ($PRIV(__n_size) + $PRIV(__n_size) * ($PRIV(__n_size)/2+1)); iwork = (integer *) malloc (liwork * sizeof (integer)); break; case 2: sens = \'V\'; lwork = (integer ) ($PRIV(__n_size) + $PRIV(__n_size) * ($PRIV(__n_size)/2+1)); if ($sort()){ liwork = (integer )(pow((($PRIV(__n_size)/2)+1), 2)); iwork = (integer *) malloc (liwork * sizeof (integer)); } else{iwork = (integer *) malloc (liwork * sizeof (integer));} break; case 3: sens = \'B\'; lwork = (integer ) ($PRIV(__n_size) + $PRIV(__n_size) * ($PRIV(__n_size)/2+1)); if ($sort()){ liwork = (integer )(pow((($PRIV(__n_size)/2)+1), 2)); iwork = (integer *) malloc (liwork * sizeof (integer)); } else{iwork = (integer *) malloc (liwork * sizeof (integer));} break; default: sens = \'N\'; lwork = (integer ) ($PRIV(__n_size) * 3); iwork = (integer *) malloc (liwork * sizeof (integer)); } types(D) %{ work = (double * )malloc(lwork * sizeof (double)); dgeesx_( &jvs, &psort, dselection_wrapper, &sens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(sdim), $P(wr), $P(wi), $P(vs), &$PRIV(__p_size), $P(rconde), $P(rcondv), work, &lwork, iwork, &liwork, bwork, $P(info)); %} types(F) %{ work = (float * )malloc(lwork * sizeof (float)); sgeesx_( &jvs, &psort, fselection_wrapper, &sens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(sdim), $P(wr), $P(wi), $P(vs), &$PRIV(__p_size), $P(rconde), $P(rcondv), work, &lwork, iwork, &liwork, bwork, $P(info)); %} free(work); free(iwork); if ($sort()) free(bwork); ', Doc => ' =for ref Computes for an N-by-N real nonsymmetric matrix A, the eigenvalues, the real Schur form T, and, optionally, the matrix of Schur vectors Z. This gives the Schur factorization A = Z*T*Z\'. Optionally, it also orders the eigenvalues on the diagonal of the real Schur form so that selected eigenvalues are at the top left; computes a reciprocal condition number for the average of the selected eigenvalues (rconde); and computes a reciprocal condition number for the right invariant subspace corresponding to the selected eigenvalues (rcondv). The leading columns of Z form an orthonormal basis for this invariant subspace. For further explanation of the reciprocal condition numbers rconde and rcondv, see Section 4.10 of the LAPACK Users\' Guide (where these quantities are called s and sep respectively). A real matrix is in real Schur form if it is upper quasi-triangular with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the form [ a b ] [ c a ] where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). Arguments ========= jobvs: = 0: Schur vectors are not computed; = 1: Schur vectors are computed. sort: Specifies whether or not to order the eigenvalues on the diagonal of the Schur form. = 0: Eigenvalues are not ordered; = 1: Eigenvalues are ordered (see select_func). select_func: If sort = 1, select_func is used to select eigenvalues to sort to the top left of the Schur form else select_func is not referenced. An eigenvalue wr(j)+sqrt(-1)*wi(j) is selected if select_func(wr(j),wi(j)) is true; i.e., if either one of a complex conjugate pair of eigenvalues is selected, then both are. Note that a selected complex eigenvalue may no longer satisfy select_func(wr(j),wi(j)) = 1 after ordering, since ordering may change the value of complex eigenvalues (especially if the eigenvalue is ill-conditioned); in this case info may be set to N+3 (see info below). sense: Determines which reciprocal condition numbers are computed. = 0: None are computed; = 1: Computed for average of selected eigenvalues only; = 2: Computed for selected right invariant subspace only; = 3: Computed for both. If sense = 1, 2 or 3, sort must equal 1. A: On entry, the N-by-N matrix A. On exit, A is overwritten by its real Schur form T. sdim: If sort = 0, sdim = 0. If sort = 1, sdim = number of eigenvalues (after sorting) for which select_func is 1. (Complex conjugate pairs for which select_func is 1 for either eigenvalue count as 2.) wr: wi: wr and wi contain the real and imaginary parts, respectively, of the computed eigenvalues, in the same order that they appear on the diagonal of the output Schur form T. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. vs If jobvs = 1, vs contains the orthogonal matrix Z of Schur vectors else vs is not referenced. rconde: If sense = 1 or 3, rconde contains the reciprocal condition number for the average of the selected eigenvalues. Not referenced if sense = 0 or 2. rcondv: If sense = 2 or 3, rcondv contains the reciprocal condition number for the selected right invariant subspace. Not referenced if sense = 0 or 1. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: if info = i, and i is <= N: the QR algorithm failed to compute all the eigenvalues; elements 1:ilo-1 and i+1:N of wr and wi contain those eigenvalues which have converged; if jobvs = 1, vs contains the transformation which reduces A to its partially converged Schur form. = N+1: the eigenvalues could not be reordered because some eigenvalues were too close to separate (the problem is very ill-conditioned); = N+2: after reordering, roundoff changed values of some complex eigenvalues so that leading eigenvalues in the Schur form no longer satisfy select_func=1 This could also be caused by underflow due to scaling. =for example sub select_function{ my ($a, $b) = @_; # Stable "discrete time" eigenspace return sqrt($a**2 + $b**2) < 1 ? 1 : 0; } $A = random (5,5); $wr= zeroes(5); $wi = zeroes(5); $vs = zeroes(5,5); $sdim = null; $rconde = null; $rcondv = null; $info = null; geesx($A, 1,1, 3, $wr, $wi, $vs, $sdim, $rconde, $rcondv, $info, \&select_function); '); pp_addhdr(' static SV* fgselect_function; PDL_Long fgselection_wrapper(float *zr, float *zi, float *d) { dSP ; long choice; int retval; ENTER ; SAVETMPS ; PUSHMARK(sp) ; XPUSHs(sv_2mortal(newSVnv((double) *zr))); XPUSHs(sv_2mortal(newSVnv((double) *zi))); XPUSHs(sv_2mortal(newSVnv((double) *d))); PUTBACK ; retval = perl_call_sv(fgselect_function, G_SCALAR); SPAGAIN; if (retval != 1) croak("Error calling perl function\n"); choice = (long ) POPl ; /* Return value */ PUTBACK ; FREETMPS ; LEAVE ; return choice; } static SV* dgselect_function; PDL_Long dgselection_wrapper(double *zr, double *zi, double *d) { dSP ; long choice; int retval; ENTER ; SAVETMPS ; PUSHMARK(sp) ; XPUSHs(sv_2mortal(newSVnv(*zr))); XPUSHs(sv_2mortal(newSVnv(*zi))); XPUSHs(sv_2mortal(newSVnv(*d))); PUTBACK ; retval = perl_call_sv(dgselect_function, G_SCALAR); SPAGAIN; if (retval != 1) croak("Error calling perl function\n"); choice = (long ) POPl ; /* Return value */ PUTBACK ; FREETMPS ; LEAVE ; return choice; } '); pp_def("gges", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobvsl();int jobvsr();int sort();[io,phys]B(n,n);[o,phys]alphar(n);[o,phys]alphai(n);[o,phys]beta(n);[o,phys]VSL(m,m);[o,phys]VSR(p,p);int [o,phys]sdim();int [o,phys]info()', OtherPars => "SV* select_func" , GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; char pjobvsl = \'N\', pjobvsr = \'N\', psort = \'N\'; integer *bwork; types(F) %{ extern int sgges_(char *jobvsl, char *jobvsr, char *sort, L_fp delctg, integer *n, float *a, integer *lda, float *b, integer *ldb, integer *sdim, float *alphar, float *alphai, float *beta, float *vsl, integer *ldvsl, float *vsr, integer *ldvsr, float *work, integer *lwork, logical *bwork, integer *info); float tmp_work; fgselect_function = $PRIV(select_func); %} types(D) %{ extern int dgges_(char *jobvsl, char *jobvsr, char *sort, L_fp delctg, integer *n, double *a, integer *lda, double *b, integer *ldb, integer *sdim, double *alphar, double *alphai, double *beta, double *vsl, integer *ldvsl, double *vsr, integer *ldvsr, double *work, integer *lwork, logical *bwork, integer *info); double tmp_work; dgselect_function = $PRIV(select_func); %} if ($jobvsl()) pjobvsl = \'V\'; if ($jobvsr()) pjobvsr = \'V\'; if ($sort()){ psort = \'S\'; bwork = (integer *)malloc($PRIV(__n_size) * sizeof(integer)); } types(F) %{ sgges_( &pjobvsl, &pjobvsr, &psort, fgselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(sdim), $P(alphar), $P(alphai), $P(beta), $P(VSL), &$PRIV(__m_size), $P(VSR), &$PRIV(__p_size), &tmp_work, &lwork, bwork, $P(info)); %} types(D) %{ dgges_( &pjobvsl, &pjobvsr, &psort, dgselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(sdim), $P(alphar), $P(alphai), $P(beta), $P(VSL), &$PRIV(__m_size), $P(VSR), &$PRIV(__p_size), &tmp_work, &lwork, bwork, $P(info)); %} lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} types(F) %{ sgges_( &pjobvsl, &pjobvsr, &psort, fgselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(sdim), $P(alphar), $P(alphai), $P(beta), $P(VSL), &$PRIV(__m_size), $P(VSR), &$PRIV(__p_size), work, &lwork, bwork, $P(info)); %} types(D) %{ dgges_( &pjobvsl, &pjobvsr, &psort, dgselection_wrapper, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(sdim), $P(alphar), $P(alphai), $P(beta), $P(VSL), &$PRIV(__m_size), $P(VSR), &$PRIV(__p_size), work, &lwork, bwork, $P(info)); %} free(work); } if ($sort()) free (bwork); ', Doc => ' =for ref Computes for a pair of N-by-N real nonsymmetric matrices (A,B), the generalized eigenvalues, the generalized real Schur form (S,T), optionally, the left and/or right matrices of Schur vectors (VSL and VSR). This gives the generalized Schur factorization (A,B) = ( (VSL)*S*(VSR)\', (VSL)*T*(VSR)\' ) Optionally, it also orders the eigenvalues so that a selected cluster of eigenvalues appears in the leading diagonal blocks of the upper quasi-triangular matrix S and the upper triangular matrix T.The leading columns of VSL and VSR then form an orthonormal basis for the corresponding left and right eigenspaces (deflating subspaces). (If only the generalized eigenvalues are needed, use the driver ggev instead, which is faster.) A generalized eigenvalue for a pair of matrices (A,B) is a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0 or both being zero. A pair of matrices (S,T) is in generalized real Schur form if T is upper triangular with non-negative diagonal and S is block upper triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond to real generalized eigenvalues, while 2-by-2 blocks of S will be "standardized" by making the corresponding elements of T have the form: [ a 0 ] [ 0 b ] and the pair of corresponding 2-by-2 blocks in S and T will have a complex conjugate pair of generalized eigenvalues. Arguments ========= jobvsl: = 0: do not compute the left Schur vectors; = 1: compute the left Schur vectors. jobvsr: = 0: do not compute the right Schur vectors; = 1: compute the right Schur vectors. sort: Specifies whether or not to order the eigenvalues on the diagonal of the generalized Schur form. = 0: Eigenvalues are not ordered; = 1: Eigenvalues are ordered (see delztg); delztg: If sort = 0, delztg is not referenced. If sort = 1, delztg is used to select eigenvalues to sort to the top left of the Schur form. An eigenvalue (alphar(j)+alphai(j))/beta(j) is selected if delztg(alphar(j),alphai(j),beta(j)) is true; i.e. if either one of a complex conjugate pair of eigenvalues is selected, then both complex eigenvalues are selected. Note that in the ill-conditioned case, a selected complex eigenvalue may no longer satisfy delztg(alphar(j),alphai(j), beta(j)) = 1 after ordering. info is to be set to N+2 in this case. A: On entry, the first of the pair of matrices. On exit, A has been overwritten by its generalized Schur form S. B: On entry, the second of the pair of matrices. On exit, B has been overwritten by its generalized Schur form T. sdim: If sort = 0, sdim = 0. If sort = 1, sdim = number of eigenvalues (after sorting) for which delztg is true. (Complex conjugate pairs for which delztg is true for either eigenvalue count as 2.) alphar: alphai: beta: On exit, (alphar(j) + alphai(j)*i)/beta(j), j=1,...,N, will be the generalized eigenvalues. alphar(j) + alphai(j)*i, and beta(j),j=1,...,N are the diagonals of the complex Schur form (S,T) that would result if the 2-by-2 diagonal blocks of the real Schur form of (A,B) were further reduced to triangular form using 2-by-2 complex unitary transformations. If alphai(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with alphai(j+1) negative. Note: the quotients alphar(j)/beta(j) and alphai(j)/beta(j) may easily over- or underflow, and beta(j) may even be zero. Thus, the user should avoid naively computing the ratio. However, alphar and alphai will be always less than and usually comparable with norm(A) in magnitude, and beta always less than and usually comparable with norm(B). VSL: If jobvsl = 1, VSL will contain the left Schur vectors. Not referenced if jobvsl = 0. The leading dimension must always be >=1. VSR: If jobvsr = 1, VSR will contain the right Schur vectors. Not referenced if jobvsr = 0. The leading dimension must always be >=1. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. (A,B) are not in Schur form, but alphar(j), alphai(j), and beta(j) should be correct for j=info+1,...,N. > N: =N+1: other than QZ iteration failed in hgeqz. =N+2: after reordering, roundoff changed values of some complex eigenvalues so that leading eigenvalues in the Generalized Schur form no longer satisfy delztg=1 This could also be caused due to scaling. =N+3: reordering failed in tgsen. =for example sub my_select{ my ($zr, $zi, $d) = @_; # stable generalized eigenvalues for continuous time return ( ($zr < 0 && $d > 0 ) || ($zr > 0 && $d < 0) ) ? 1 : 0; } $a = random(5,5); $b = random(5,5); $sdim = null; $alphar = zeroes(5); $alphai = zeroes(5); $beta = zeroes(5); $vsl = zeroes(5,5); $vsr = zeroes(5,5); gges($a, 1, 1, 1, $b, $alphar, $alphai, $beta, $vsl, $vsr, $sdim,($info=null), \&my_select); '); pp_def("ggesx", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobvsl();int jobvsr();int sort();int sense();[io,phys]B(n,n);[o,phys]alphar(n);[o,phys]alphai(n);[o,phys]beta(n);[o,phys]VSL(m,m);[o,phys]VSR(p,p);int [o,phys]sdim();[o,phys]rconde(q);[o,phys]rcondv(r);int [o,phys]info()', OtherPars => "SV* select_func" , GenericTypes => [F,D], Code => generate_code ' integer maxwrk, lwork,liwork; integer minwrk = 1; static integer c__0 = 0; static integer c__1 = 1; static integer c_n1 = -1; char pjobvsl = \'N\'; char pjobvsr = \'N\'; char psort = \'N\'; char psens = \'N\'; integer *bwork; integer *iwork; extern integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen opts_len); types(F) %{ extern int sggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp delctg, char *sense, integer *n, float *a, integer *lda, float *b, integer *ldb, integer *sdim, float *alphar, float *alphai, float *beta, float *vsl, integer *ldvsl, float *vsr, integer *ldvsr, float *rconde, float *rcondv, float *work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); fgselect_function = $PRIV(select_func); %} types(D) %{ extern int dggesx_(char *jobvsl, char *jobvsr, char *sort, L_fp delctg, char *sense, integer *n, double *a, integer *lda, double *b, integer *ldb, integer *sdim, double *alphar, double *alphai, double *beta, double *vsl, integer *ldvsl, double *vsr, integer *ldvsr, double *rconde, double *rcondv, double *work, integer *lwork, integer *iwork, integer *liwork, logical *bwork, integer *info); dgselect_function = $PRIV(select_func); %} if ($jobvsr()) pjobvsr = \'V\'; if ($sort()){ psort = \'S\'; bwork = (integer *)malloc($PRIV(__n_size) * sizeof(integer)); } switch ($sense()) { case 1: psens = \'E\'; break; case 2: psens = \'V\'; break; case 3: psens = \'B\'; break; default: psens = \'N\'; } // Bug in Lapack ????? // if (!$sense()) // liwork = 1; // else // { liwork = $SIZE(n) + 6; iwork = (integer *)malloc(liwork * sizeof(integer)); // } // Code modified from Lapack // TODO other shur form above // The actual updated release (clapack 09/20/2000) do not allow // querying the workspace. See release notes of Lapack // for this feature. minwrk = ($SIZE(n) + 1 << 3) + 16; maxwrk = ($SIZE(n) + 1) * 7 + $SIZE(n) * (integer ) ilaenv_(&c__1, "DGEQRF", " ", &$PRIV(__n_size), &c__1, &$PRIV(__n_size), &c__0, (ftnlen)6, (ftnlen)1) + 16; if ($jobvsl()) { integer i__1 = maxwrk; integer i__2 = minwrk + $SIZE(n) * (integer )ilaenv_(&c__1, "DORGQR" , " ", &$PRIV(__n_size), &c__1, &$PRIV(__n_size), &c_n1, (ftnlen)6, (ftnlen)1); maxwrk = (integer ) max(i__1,i__2); pjobvsl = \'V\'; } lwork = (integer ) max(maxwrk,minwrk); { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); sggesx_( &pjobvsl, &pjobvsr, &psort, fgselection_wrapper, &psens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(sdim), $P(alphar), $P(alphai), $P(beta), $P(VSL), &$PRIV(__m_size), $P(VSR), &$PRIV(__p_size), $P(rconde), $P(rcondv), work, &lwork, iwork, &liwork, bwork, $P(info)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); dggesx_( &pjobvsl, &pjobvsr, &psort, dgselection_wrapper, &psens, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(sdim), $P(alphar), $P(alphai), $P(beta), $P(VSL), &$PRIV(__m_size), $P(VSR), &$PRIV(__p_size), $P(rconde), $P(rcondv), work, &lwork, iwork, &liwork, bwork, $P(info)); %} free(work); } if ($sort()) free(bwork); free(iwork); ', Doc => ' =for ref Computes for a pair of N-by-N real nonsymmetric matrices (A,B), the generalized eigenvalues, the real Schur form (S,T), and, optionally, the left and/or right matrices of Schur vectors (VSL and VSR). This gives the generalized Schur factorization (A,B) = ( (VSL) S (VSR)\', (VSL) T (VSR)\' ) Optionally, it also orders the eigenvalues so that a selected cluster of eigenvalues appears in the leading diagonal blocks of the upper quasi-triangular matrix S and the upper triangular matrix T; computes a reciprocal condition number for the average of the selected eigenvalues (RCONDE); and computes a reciprocal condition number for the right and left deflating subspaces corresponding to the selected eigenvalues (RCONDV). The leading columns of VSL and VSR then form an orthonormal basis for the corresponding left and right eigenspaces (deflating subspaces). A generalized eigenvalue for a pair of matrices (A,B) is a scalar w or a ratio alpha/beta = w, such that A - w*B is singular. It is usually represented as the pair (alpha,beta), as there is a reasonable interpretation for beta=0 or for both being zero. A pair of matrices (S,T) is in generalized real Schur form if T is upper triangular with non-negative diagonal and S is block upper triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond to real generalized eigenvalues, while 2-by-2 blocks of S will be "standardized" by making the corresponding elements of T have the form: [ a 0 ] [ 0 b ] and the pair of corresponding 2-by-2 blocks in S and T will have a complex conjugate pair of generalized eigenvalues. Further details =============== An approximate (asymptotic) bound on the average absolute error of the selected eigenvalues is EPS * norm((A, B)) / RCONDE( 1 ). An approximate (asymptotic) bound on the maximum angular error in the computed deflating subspaces is EPS * norm((A, B)) / RCONDV( 2 ). See LAPACK User\'s Guide, section 4.11 for more information. Arguments ========= jobvsl: = 0: do not compute the left Schur vectors; = 1: compute the left Schur vectors. jobvsr: = 0: do not compute the right Schur vectors; = 1: compute the right Schur vectors. sort: Specifies whether or not to order the eigenvalues on the diagonal of the generalized Schur form. = 0: Eigenvalues are not ordered; = 1: Eigenvalues are ordered (see delztg); delztg: If sort = 0, delztg is not referenced. If sort = 1, delztg is used to select eigenvalues to sort to the top left of the Schur form. An eigenvalue (alphar(j)+alphai(j))/beta(j) is selected if delztg(alphar(j),alphai(j),beta(j)) is true; i.e. if either one of a complex conjugate pair of eigenvalues is selected, then both complex eigenvalues are selected. Note that in the ill-conditioned case, a selected complex eigenvalue may no longer satisfy delztg(alphar(j),alphai(j), beta(j)) = 1 after ordering. info is to be set to N+2 in this case. sense: Determines which reciprocal condition numbers are computed. = 0 : None are computed; = 1 : Computed for average of selected eigenvalues only; = 2 : Computed for selected deflating subspaces only; = 3 : Computed for both. If sense = 1, 2, or 3, sort must equal 1. A: On entry, the first of the pair of matrices. On exit, A has been overwritten by its generalized Schur form S. B: On entry, the second of the pair of matrices. On exit, B has been overwritten by its generalized Schur form T. sdim: If sort = 0, sdim = 0. If sort = 1, sdim = number of eigenvalues (after sorting) for which delztg is true. (Complex conjugate pairs for which delztg is true for either eigenvalue count as 2.) alphar: alphai: beta: On exit, (alphar(j) + alphai(j)*i)/beta(j), j=1,...,N, will be the generalized eigenvalues. alphar(j) + alphai(j)*i, and beta(j),j=1,...,N are the diagonals of the complex Schur form (S,T) that would result if the 2-by-2 diagonal blocks of the real Schur form of (A,B) were further reduced to triangular form using 2-by-2 complex unitary transformations. If alphai(j) is zero, then the j-th eigenvalue is real; if positive, then the j-th and (j+1)-st eigenvalues are a complex conjugate pair, with alphai(j+1) negative. Note: the quotients alphar(j)/beta(j) and alphai(j)/beta(j) may easily over- or underflow, and beta(j) may even be zero. Thus, the user should avoid naively computing the ratio. However, alphar and alphai will be always less than and usually comparable with norm(A) in magnitude, and beta always less than and usually comparable with norm(B). VSL: If jobvsl = 1, VSL will contain the left Schur vectors. Not referenced if jobvsl = 0. The leading dimension must always be >=1. VSR: If jobvsr = 1, VSR will contain the right Schur vectors. Not referenced if jobvsr = 0. The leading dimension must always be >=1. rconde: If sense = 1 or 3, rconde(1) and rconde(2) contain the reciprocal condition numbers for the average of the selected eigenvalues. Not referenced if sense = 0 or 2. rcondv: If sense = 2 or 3, rcondv(1) and rcondv(2) contain the reciprocal condition numbers for the selected deflating subspaces. Not referenced if sense = 0 or 1. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. = 1,...,N: The QZ iteration failed. (A,B) are not in Schur form, but alphar(j), alphai(j), and beta(j) should be correct for j=info+1,...,N. > N: =N+1: other than QZ iteration failed in hgeqz. =N+2: after reordering, roundoff changed values of some complex eigenvalues so that leading eigenvalues in the Generalized Schur form no longer satisfy delztg=1 This could also be caused due to scaling. =N+3: reordering failed in tgsen. =for example sub my_select{ my ($zr, $zi, $d) = @_; # Eigenvalue : (ZR/D) + sqrt(-1)*(ZI/D) # stable generalized eigenvalues for discrete time return (sqrt($zr**2 + $zi**2) < abs($d) ) ? 1 : 0; } $a = random(5,5); $b = random(5,5); $sdim = null; $alphar = zeroes(5); $alphai = zeroes(5); $beta = zeroes(5); $vsl = zeroes(5,5); $vsr = zeroes(5,5); $rconde = zeroes(2); $rcondv = zeroes(2); ggesx($a, 1, 1, 1, 3,$b, $alphar, $alphai, $beta, $vsl, $vsr, $sdim, $rconde, $rcondv, ($info=null), \&my_select); '); pp_def("syev", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobz(); int uplo(); [o,phys]w(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; integer lwork = -1; types(F) %{ extern int ssyev_(char *jobz, char *uplo, integer *n, float *a, integer *lda, float *w, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dsyev_(char *jobz, char *uplo, integer *n, double *a, integer *lda, double *w, double *work, integer *lwork, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; $TFD(ssyev_,dsyev_)( &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(w), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssyev_,dsyev_)( &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(w), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. Arguments ========= jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. uplo = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if jobz = 1, then if info = 0, A contains the orthonormal eigenvectors of the matrix A. If jobz = 0, then on exit the lower triangle (if uplo=1) or the upper triangle (if uplo=0) of A, including the diagonal, is destroyed. w: If info = 0, the eigenvalues in ascending order. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero. =for example # Assume $a is symmetric ;) $a = random (5,5); syev($a, 1,1, (my $w = zeroes(5)), (my $info=null)); '); pp_def("syevd", HandleBad => 0, Pars => '[io,phys]A(n,n); int jobz(); int uplo(); [o,phys]w(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; integer lwork = -1; integer liwork = -1; integer tmp_liwork; integer *iwork; types(F) %{ extern int ssyevd_(char *jobz, char *uplo, integer *n, float *a, integer *lda, float *w, float *work, integer *lwork, integer *iwork, integer *liwork, integer *info); float tmp_work; %} types(D) %{ extern int dsyevd_(char *jobz, char *uplo, integer *n, double *a, integer *lda, double *w, double *work, integer *lwork, integer *iwork, integer *liwork, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; $TFD(ssyevd_,dsyevd_)( &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(w), &tmp_work, &lwork, &tmp_liwork, &liwork, $P(info)); lwork = (integer )tmp_work; liwork = (integer )tmp_liwork; iwork = (integer *)malloc(liwork * sizeof(integer)); { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssyevd_,dsyevd_)( &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(w), work, &lwork, iwork, &liwork, $P(info)); free(work); free(iwork); } ', Doc => ' =for ref Computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Because of large use of BLAS of level 3, syevd needs N**2 more workspace than syevx. Arguments ========= jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. uplo = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if jobz = 1, then if info = 0, A contains the orthonormal eigenvectors of the matrix A. If jobz = 0, then on exit the lower triangle (if uplo=1) or the upper triangle (if uplo=0) of A, including the diagonal, is destroyed. w: If info = 0, the eigenvalues in ascending order. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero. =for example # Assume $a is symmetric ;) $a = random (5,5); syevd($a, 1,1, (my $w = zeroes(5)), (my $info=null)); '); pp_def("syevx", HandleBad => 0, Pars => '[phys]A(n,n); int jobz(); int range(); int uplo(); [phys]vl(); [phys]vu(); int [phys]il(); int [phys]iu(); [phys]abstol(); int [o,phys]m(); [o,phys]w(n); [o,phys]z(p,q);int [o,phys]ifail(r); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; char prange = \'A\'; integer lwork = -1; integer *iwork; types(F) %{ extern int ssyevx_(char *jobz, char *range, char *uplo, integer *n, float *a, integer *lda, float *vl, float *vu, integer * il, integer *iu, float *abstol, integer *m, float *w, float *z__, integer *ldz, float *work, integer *lwork, integer *iwork, integer *ifail, integer *info); float tmp_work; %} types(D) %{ extern int dsyevx_(char *jobz, char *range, char *uplo, integer *n, double *a, integer *lda, double *vl, double *vu, integer * il, integer *iu, double *abstol, integer *m, double *w, double *z__, integer *ldz, double *work, integer *lwork, integer *iwork, integer *ifail, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; switch ($range()) { case 1: prange = \'V\'; break; case 2: prange = \'I\'; break; default: prange = \'A\'; } iwork = (integer *)malloc(5 * $SIZE (n) * sizeof(integer)); $TFD(ssyevx_,dsyevx_)( &jz, &prange, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(vl), $P(vu), $P(il), $P(iu), $P(abstol), $P(m), $P(w), $P(z), &$PRIV(__p_size), &tmp_work, &lwork, iwork, $P(ifail), $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssyevx_,dsyevx_)( &jz, &prange, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(vl), $P(vu), $P(il), $P(iu), $P(abstol), $P(m), $P(w), $P(z), &$PRIV(__p_size), work, &lwork, iwork, $P(ifail), $P(info)); free(work); free(iwork); } ', Doc => ' =for ref Computes selected eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Arguments ========= jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. range: = 0: all eigenvalues will be found. = 1: all eigenvalues in the half-open interval (vl,vu] will be found. = 1: the il-th through iu-th eigenvalues will be found. uplo = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, the lower triangle (if uplo=1) or the upper triangle (if uplo=0) of A, including the diagonal, is destroyed. vl: vu: If range=1, the lower and upper bounds of the interval to be searched for eigenvalues. vl < vu. Not referenced if range = 0 or 2. il: iu: If range=2, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= il <= iu <= N, if N > 0; il = 1 and iu = 0 if N = 0. Not referenced if range = 0 or 1. abstol: The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to abstol + EPS * max( |a|,|b| ) , where EPS is the machine precision. If abstol is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. Eigenvalues will be computed most accurately when abstol is set to twice the underflow threshold 2*lamch(1), not zero. If this routine returns with info>0, indicating that some eigenvectors did not converge, try setting abstol to 2*lamch(1). See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. m: The total number of eigenvalues found. 0 <= m <= N. If range = 0, m = N, and if range = 2, m = iu-il+1. w: On normal exit, the first M elements contain the selected eigenvalues in ascending order. z: If jobz = 1, then if info = 0, the first m columns of z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of z holding the eigenvector associated with w(i). If an eigenvector fails to converge, then that column of z contains the latest approximation to the eigenvector, and the index of the eigenvector is returned in ifail. If jobz = 0, then z is not referenced. Note: the user must ensure that at least max(1,m) columns are supplied in the array z; if range = 1, the exact value of m is not known in advance and an upper bound must be used. ifail: If jobz = 1, then if info = 0, the first m elements of ifail are zero. If info > 0, then ifail contains the indices of the eigenvectors that failed to converge. If jobz = 0, then ifail is not referenced. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, then i eigenvectors failed to converge. Their indices are stored in array ifail. =for example # Assume $a is symmetric ;) $a = random (5,5); $unfl = lamch(1); $ovfl = lamch(9); labad($unfl, $ovfl); $abstol = $unfl + $unfl; $m = null; $info = null; $ifail = zeroes(5); $w = zeroes(5); $z = zeroes(5,5); syevx($a, 1,0,1,0,0,0,0,$abstol, $m, $w, $z ,$ifail, $info); '); pp_def("syevr", HandleBad => 0, Pars => '[phys]A(n,n); int jobz(); int range(); int uplo(); [phys]vl(); [phys]vu(); int [phys]il(); int [phys]iu();[phys]abstol();int [o,phys]m();[o,phys]w(n); [o,phys]z(p,q);int [o,phys]isuppz(r); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; char prange = \'A\'; integer lwork = -1; integer liwork = -1; integer *iwork; integer tmp_iwork; types(F) %{ extern int ssyevr_(char *jobz, char *range, char *uplo, integer *n, float *a, integer *lda, float *vl, float *vu, integer * il, integer *iu, float *abstol, integer *m, float *w, float *z__, integer *ldz, integer *isuppz, float *work, integer *lwork, integer *iwork, integer *liwork, integer *info); float tmp_work; %} types(D) %{ extern int dsyevr_(char *jobz, char *range, char *uplo, integer *n, double *a, integer *lda, double *vl, double *vu, integer * il, integer *iu, double *abstol, integer *m, double *w, double *z__, integer *ldz, integer *isuppz, double *work, integer *lwork, integer *iwork, integer *liwork, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; switch ($range()) { case 1: prange = \'V\'; break; case 2: prange = \'I\'; break; default: prange = \'A\'; } $TFD(ssyevr_,dsyevr_)( &jz, &prange, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(vl), $P(vu), $P(il), $P(iu), $P(abstol), $P(m), $P(w), $P(z), &$PRIV(__p_size), $P(isuppz), &tmp_work, &lwork, &tmp_iwork, &liwork, $P(info)); lwork = (integer )tmp_work; liwork = (integer )tmp_iwork; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} iwork = (integer *)malloc(liwork * sizeof(integer)); $TFD(ssyevr_,dsyevr_)( &jz, &prange, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(vl), $P(vu), $P(il), $P(iu), $P(abstol), $P(m), $P(w), $P(z), &$PRIV(__p_size), $P(isuppz), work, &lwork, iwork, &liwork, $P(info)); free(work); free(iwork); } ', Doc => ' =for ref Computes selected eigenvalues and, optionally, eigenvectors of a real symmetric matrix T. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Whenever possible, syevr calls stegr to compute the eigenspectrum using Relatively Robust Representations. stegr computes eigenvalues by the dqds algorithm, while orthogonal eigenvectors are computed from various "good" L D L^T representations (also known as Relatively Robust Representations). Gram-Schmidt orthogonalization is avoided as far as possible. More specifically, the various steps of the algorithm are as follows. For the i-th unreduced block of T, (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T is a relatively robust representation, (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high relative accuracy by the dqds algorithm, (c) If there is a cluster of close eigenvalues, "choose" sigma_i close to the cluster, and go to step (a), (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, compute the corresponding eigenvector by forming a rank-revealing twisted factorization. The desired accuracy of the output can be specified by the input parameter abstol. For more details, see "A new O(n^2) algorithm for the symmetric tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, Computer Science Division Technical Report No. UCB//CSD-97-971, UC Berkeley, May 1997. Note 1 : syevr calls stegr when the full spectrum is requested on machines which conform to the ieee-754 floating point standard. syevr calls stebz and stein on non-ieee machines and when partial spectrum requests are made. Normal execution of stegr may create NaNs and infinities and hence may abort due to a floating point exception in environments which do not handle NaNs and infinities in the ieee standard default manner. Arguments ========= jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. range: = 0: all eigenvalues will be found. = 1: all eigenvalues in the half-open interval (vl,vu] will be found. = 2: the il-th through iu-th eigenvalues will be found. ********* For range = 1 or 2 and iu - il < N - 1, stebz and ********* stein are called uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, the lower triangle (if uplo=1) or the upper triangle (if uplo=0) of A, including the diagonal, is destroyed. vl: vu: If range=1, the lower and upper bounds of the interval to be searched for eigenvalues. vl < vu. Not referenced if range = 0 or 2. il: iu: If range=2, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= il <= iu <= N, if N > 0; il = 1 and iu = 0 if N = 0. Not referenced if range = 0 or 1. abstol: The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to abstol + EPS * max( |a|,|b| ) , where EPS is the machine precision. If abstol is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. See "Computing Small Singular Values of Bidiagonal Matrices with Guaranteed High Relative Accuracy," by Demmel and Kahan, LAPACK Working Note #3. If high relative accuracy is important, set abstol to lamch(1). Doing so will guarantee that eigenvalues are computed to high relative accuracy when possible in future releases. The current code does not make any guarantees about high relative accuracy, but furure releases will. See J. Barlow and J. Demmel, "Computing Accurate Eigensystems of Scaled Diagonally Dominant Matrices", LAPACK Working Note #7, for a discussion of which matrices define their eigenvalues to high relative accuracy. m: The total number of eigenvalues found. 0 <= m <= N. If range = 0, m = N, and if range = 2, m = iu-il+1. w: The first m elements contain the selected eigenvalues in ascending order. z: If jobz = 1, then if info = 0, the first m columns of z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of z holding the eigenvector associated with w(i). If jobz = 0, then z is not referenced. Note: the user must ensure that at least max(1,m) columns are supplied in the array z; if range = 1, the exact value of m is not known in advance and an upper bound must be used. isuppz: array of int, dimension ( 2*max(1,m) ) The support of the eigenvectors in z, i.e., the indices indicating the nonzero elements in z. The i-th eigenvector is nonzero only in elements isuppz( 2*i-1 ) through isuppz( 2*i ). ********* Implemented only for range = 0 or 2 and iu - il = N - 1 info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: Internal error =for example # Assume $a is symmetric ;) $a = random (5,5); $unfl = lamch(1); $ovfl = lamch(9); labad($unfl, $ovfl); $abstol = $unfl + $unfl; $m = null; $info = null; $isuppz = zeroes(10); $w = zeroes(5); $z = zeroes(5,5); syevr($a, 1,0,1,0,0,0,0,$abstol, $m, $w, $z ,$isuppz, $info); '); pp_def("sygv", HandleBad => 0, Pars => '[io,phys]A(n,n);int [phys]itype();int jobz(); int uplo();[io,phys]B(n,n);[o,phys]w(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; integer lwork = -1; types(F) %{ extern int ssygv_(integer *itype, char *jobz, char *uplo, integer * n, float *a, integer *lda, float *b, integer *ldb, float *w, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dsygv_(integer *itype, char *jobz, char *uplo, integer * n, double *a, integer *lda, double *b, integer *ldb, double *w, double *work, integer *lwork, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; $TFD(ssygv_,dsygv_)( $P(itype), &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(w), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssygv_,dsygv_)( $P(itype), &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(w), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes all the eigenvalues, and optionally, the eigenvectors of a real generalized symmetric-definite eigenproblem, of the form A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be symmetric and B is also positive definite. Arguments ========= itype: Specifies the problem type to be solved: = 1: A*x = (lambda)*B*x = 2: A*B*x = (lambda)*x = 3: B*A*x = (lambda)*x jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. uplo: = 0: Upper triangles of A and B are stored; = 1: Lower triangles of A and B are stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if jobz = 1, then if info = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if itype = 1 or 2, Z\'*B*Z = I; if itype = 3, Z\'*inv(B)*Z = I. If jobz = 0, then on exit the upper triangle (if uplo=0) or the lower triangle (if uplo=1) of A, including the diagonal, is destroyed. B: On entry, the symmetric positive definite matrix B. If uplo = 0, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B. If uplo = 1, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B. On exit, if info <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U\'*U or B = L*L\'. W: If info = 0, the eigenvalues in ascending order. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: potrf or syev returned an error code: <= N: if info = i, syev failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; > N: if info = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. =for example # Assume $a is symmetric ;) $a = random (5,5); # Assume $a is symmetric and positive definite ;) $b = random (5,5); sygv($a, 1,1, 0, $b, (my $w = zeroes(5)), (my $info=null)); '); pp_def("sygvd", HandleBad => 0, Pars => '[io,phys]A(n,n);int [phys]itype();int jobz(); int uplo();[io,phys]B(n,n);[o,phys]w(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; integer lwork = -1; integer liwork = -1; integer *iwork; integer tmp_iwork; types(F) %{ extern int ssygvd_(integer *itype, char *jobz, char *uplo, integer * n, float *a, integer *lda, float *b, integer *ldb, float *w, float *work, integer *lwork, integer *iwork, integer *liwork, integer *info); float tmp_work; %} types(D) %{ extern int dsygvd_(integer *itype, char *jobz, char *uplo, integer * n, double *a, integer *lda, double *b, integer *ldb, double *w, double *work, integer *lwork, integer *iwork, integer *liwork, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; $TFD(ssygvd_,dsygvd_)( $P(itype), &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(w), &tmp_work, &lwork, &tmp_iwork, &liwork, $P(info)); lwork = (integer )tmp_work; liwork = (integer )tmp_iwork; iwork = (integer *)malloc(liwork * sizeof(integer)); { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssygvd_,dsygvd_)( $P(itype), &jz, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(w), work, &lwork, iwork, &liwork, $P(info)); free(work); } free(iwork); ', Doc => ' =for ref Computes all the eigenvalues, and optionally, the eigenvectors of a real generalized symmetric-definite eigenproblem, of the form A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be symmetric and B is also positive definite. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= itype: Specifies the problem type to be solved: = 1: A*x = (lambda)*B*x = 2: A*B*x = (lambda)*x = 3: B*A*x = (lambda)*x jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. uplo: = 0: Upper triangles of A and B are stored; = 1: Lower triangles of A and B are stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if jobz = 1, then if info = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if itype = 1 or 2, Z\'*B*Z = I; if itype = 3, Z\'*inv(B)*Z = I. If jobz = 0, then on exit the upper triangle (if uplo=0) or the lower triangle (if uplo=1) of A, including the diagonal, is destroyed. B: On entry, the symmetric positive definite matrix B. If uplo = 0, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B. If uplo = 1, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B. On exit, if info <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U\'*U or B = L*L\'. W: If info = 0, the eigenvalues in ascending order. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: potrf or syev returned an error code: <= N: if info = i, syevd failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; > N: if info = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. =for example # Assume $a is symmetric ;) $a = random (5,5); # Assume $b is symmetric positive definite ;) $b = random (5,5); sygvd($a, 1,1, 0, $b, (my $w = zeroes(5)), (my $info=null)); '); pp_def("sygvx", HandleBad => 0, Pars => '[io,phys]A(n,n);int [phys]itype();int jobz();int range(); int uplo();[io,phys]B(n,n);[phys]vl();[phys]vu();int [phys]il();int [phys]iu();[phys]abstol();int [o,phys]m();[o,phys]w(n); [o,phys]Z(p,q);int [o,phys]ifail(r);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char jz = \'N\'; char puplo = \'U\'; char prange; integer lwork = -1; integer *iwork; types(F) %{ extern int ssygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, float *a, integer *lda, float *b, integer *ldb, float *vl, float *vu, integer *il, integer *iu, float *abstol, integer *m, float *w, float *z__, integer *ldz, float *work, integer *lwork, integer *iwork, integer *ifail, integer *info); float tmp_work; %} types(D) %{ extern int dsygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, double *a, integer *lda, double *b, integer *ldb, double *vl, double *vu, integer *il, integer *iu, double *abstol, integer *m, double *w, double *z__, integer *ldz, double *work, integer *lwork, integer *iwork, integer *ifail, integer *info); double tmp_work; %} if ($jobz()) jz = \'V\'; if ($uplo()) puplo = \'L\'; switch ($range()) { case 1: prange = \'V\'; break; case 2: prange = \'I\'; break; default: prange = \'A\'; } iwork = (integer *)malloc((5 * $SIZE(n)) * sizeof(integer)); $TFD(ssygvx_,dsygvx_)( $P(itype), &jz, &prange, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(vl), $P(vu), $P(il), $P(iu), $P(abstol), $P(m), $P(w), $P(Z), &$PRIV(__p_size), &tmp_work, &lwork, iwork, $P(ifail), $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssygvx_,dsygvx_)( $P(itype), &jz, &prange, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(vl), $P(vu), $P(il), $P(iu), $P(abstol), $P(m), $P(w), $P(Z), &$PRIV(__p_size), work, &lwork, iwork, $P(ifail), $P(info)); free(work); } free(iwork); ', Doc => ' =for ref Computes selected eigenvalues, and optionally, eigenvectors of a real generalized symmetric-definite eigenproblem, of the form A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be symmetric and B is also positive definite. Eigenvalues and eigenvectors can be selected by specifying either a range of values or a range of indices for the desired eigenvalues. Arguments ========= itype: Specifies the problem type to be solved: = 1: A*x = (lambda)*B*x = 2: A*B*x = (lambda)*x = 3: B*A*x = (lambda)*x jobz: = 0: Compute eigenvalues only; = 1: Compute eigenvalues and eigenvectors. range: = 0: all eigenvalues will be found. = 1: all eigenvalues in the half-open interval (vl,vu] will be found. = 2: the il-th through iu-th eigenvalues will be found. uplo: = 0: Upper triangle of A and B are stored; = 1: Lower triangle of A and B are stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, the lower triangle (if uplo=1) or the upper triangle (if uplo=0) of A, including the diagonal, is destroyed. B: On entry, the symmetric matrix B. If uplo = 0, the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B. If uplo = 1, the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B. On exit, if info <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U\'*U or B = L*L\'. vl: vu: If range=1, the lower and upper bounds of the interval to be searched for eigenvalues. vl < vu. Not referenced if range = 0 or 2. il: iu: If range=2, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= il <= iu <= N, if N > 0; il = 1 and iu = 0 if N = 0. Not referenced if range = 0 or 1. abstol: The absolute error tolerance for the eigenvalues. An approximate eigenvalue is accepted as converged when it is determined to lie in an interval [a,b] of width less than or equal to abstol + EPS * max( |a|,|b| ) , where EPS is the machine precision. If abstol is less than or equal to zero, then EPS*|T| will be used in its place, where |T| is the 1-norm of the tridiagonal matrix obtained by reducing A to tridiagonal form. Eigenvalues will be computed most accurately when abstol is set to twice the underflow threshold 2*lamch(1), not zero. If this routine returns with info>0, indicating that some eigenvectors did not converge, try setting abstol to 2* lamch(1). m: The total number of eigenvalues found. 0 <= m <= N. If range = 0, m = N, and if range = 2, m = iu-il+1. w: On normal exit, the first m elements contain the selected eigenvalues in ascending order. Z: If jobz = 0, then Z is not referenced. If jobz = 1, then if info = 0, the first m columns of Z contain the orthonormal eigenvectors of the matrix A corresponding to the selected eigenvalues, with the i-th column of Z holding the eigenvector associated with w(i). The eigenvectors are normalized as follows: if itype = 1 or 2, Z\'*B*Z = I; if itype = 3, Z\'*inv(B)*Z = I. If an eigenvector fails to converge, then that column of Z contains the latest approximation to the eigenvector, and the index of the eigenvector is returned in ifail. Note: the user must ensure that at least max(1,m) columns are supplied in the array Z; if range = 1, the exact value of m is not known in advance and an upper bound must be used. ifail: If jobz = 1, then if info = 0, the first M elements of ifail are zero. If info > 0, then ifail contains the indices of the eigenvectors that failed to converge. If jobz = 0, then ifail is not referenced. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: potrf or syevx returned an error code: <= N: if info = i, syevx failed to converge; i eigenvectors failed to converge. Their indices are stored in array ifail. > N: if info = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. =for example # Assume $a is symmetric ;) $a = random (5,5); # Assume $b is symmetric positive definite ;) $b = random (5,5); $unfl = lamch(1); $ovfl = lamch(9); labad($unfl, $ovfl); $abstol = $unfl + $unfl; $m = null; $w=zeroes(5); $z = zeroes(5,5); $ifail = zeroes(5); sygvx($a, 1,1, 0,0, $b, 0, 0, 0, 0, $abstol, $m, $w, $z,$ifail,(my $info=null)); '); pp_def("gesv", HandleBad => 0, Pars => '[io,phys]A(n,n); [io,phys]B(n,m); int [o,phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' types(F) %{ extern int sgesv_(integer *n, integer *nrhs, float *a, integer *lda, integer *ipiv, float *b, integer *ldb, integer *info); %} types(D) %{ extern int dgesv_(integer *n, integer *nrhs, double *a, integer *lda, integer *ipiv, double *b, integer *ldb, integer *info); %} $TFD(sgesv_,dgesv_)( &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the solution to a real system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. The LU decomposition with partial pivoting and row interchanges is used to factor A as A = P * L * U, where P is a permutation matrix, L is unit lower triangular, and U is upper triangular. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= A: On entry, the N-by-N coefficient matrix A. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. ipiv: The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row ipiv(i). B: On entry, the N-by-NRHS matrix of right hand side matrix B. On exit, if info = 0, the N-by-NRHS solution matrix X. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution could not be computed. =for example $a = random (5,5); $a = transpose($a); $b = random (5,5); $b = transpose($b); gesv($a,$b, (my $ipiv=zeroes(5)),(my $info=null)); print "The solution matrix X is :". transpose($b)."\n" unless $info; '); pp_def("gesvx", HandleBad => 0, Pars => '[io,phys]A(n,n); int trans(); int fact(); [io,phys]B(n,m); [io,phys]af(n,n); int [io,phys]ipiv(n); int [io]equed(); [io,phys]r(n); [io,phys]c(n); [o,phys]X(n,m); [o,phys]rcond(); [o,phys]ferr(m); [o,phys]berr(m);[o,phys]rpvgrw();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans, pfact, pequed; integer *iwork; types(F) %{ extern int sgesvx_(char *fact, char *trans, integer *n, integer * nrhs, float *a, integer *lda, float *af, integer *ldaf, integer *ipiv, char *equed, float *r__, float *c__, float *b, integer *ldb, float *x, integer *ldx, float * rcond, float *ferr, float *berr, float *work, integer * iwork, integer *info); float *work; %} types(D) %{ extern int dgesvx_(char *fact, char *trans, integer *n, integer * nrhs, double *a, integer *lda, double *af, integer *ldaf, integer *ipiv, char *equed, double *r__, double *c__, double *b, integer *ldb, double *x, integer *ldx, double * rcond, double *ferr, double *berr, double *work, integer * iwork, integer *info); double *work; %} switch ($trans()) { case 1: ptrans = \'T\'; break; case 2: ptrans = \'C\'; break; default: ptrans = \'N\'; } switch ($fact()) { case 1: pfact = \'N\'; break; case 2: pfact = \'E\'; break; default: pfact = \'F\'; } switch ($equed()) { case 1: pequed = \'R\'; break; case 2: pequed = \'C\'; break; case 3: pequed = \'B\'; break; default: pequed = \'N\'; } types(F) %{ work = (float *) malloc(4 * $PRIV(__n_size) * sizeof(float)); %} types(D) %{ work = (double *) malloc(4 * $PRIV(__n_size) * sizeof(double)); %} iwork = (integer *) malloc ($PRIV(__n_size)* sizeof (integer)); $TFD(sgesvx_,dgesvx_)( &pfact, &ptrans, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(af), &$PRIV(__n_size), $P(ipiv), &pequed, $P(r), $P(c), $P(B), &$PRIV(__n_size), $P(X), &$PRIV(__n_size), $P(rcond), $P(ferr), $P(berr), work, iwork, $P(info)); free(work); free(iwork); switch (pequed) { case \'R\': $equed() = 1; break; case \'C\': $equed() = 2; break; case \'B\': $equed() = 3; break; default: $equed()= 0; } $rpvgrw()=work[0]; ', Doc => ' =for ref Uses the LU factorization to compute the solution to a real system of linear equations A * X = B, where A is an N-by-N matrix and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. =for desc The following steps are performed: =over 3 =item 1 If fact = 2, real scaling factors are computed to equilibrate the system: trans = 0: diag(r)*A*diag(c) *inv(diag(c))*X = diag(c)*B trans = 1: (diag(r)*A*diag(c))\' *inv(diag(r))*X = diag(c)*B trans = 2: (diag(r)*A*diag(c))**H *inv(diag(r))*X = diag(c)*B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(r)*A*diag(c) and B by diag(r)*B (if trans=0) or diag(c)*B (if trans = 1 or 2). =item 2 If fact = 1 or 2, the LU decomposition is used to factor the matrix A (after equilibration if fact = 2) as A = P * L * U, where P is a permutation matrix, L is a unit lower triangular matrix, and U is upper triangular. =item 3 If some U(i,i)=0, so that U is exactly singular, then the routine returns with info = i. Otherwise, the factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, info = N+1 is returned as a warning, but the routine still goes on to solve for X and compute error bounds as described below. =item 4 The system of equations is solved for X using the factored form of A. =item 5 Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. =item 6 If equilibration was used, the matrix X is premultiplied by diag(c) (if trans = 0) or diag(r) (if trans = 1 or 2) so that it solves the original system before equilibration. =back Arguments ========= fact: Specifies whether or not the factored form of the matrix A is supplied on entry, and if not, whether the matrix A should be equilibrated before it is factored. = 0: On entry, af and ipiv contain the factored form of A. If equed is not 0, the matrix A has been equilibrated with scaling factors given by r and c. A, af, and ipiv are not modified. = 1: The matrix A will be copied to af and factored. = 2: The matrix A will be equilibrated if necessary, then copied to af and factored. trans: Specifies the form of the system of equations: = 0: A * X = B (No transpose) = 1: A\' * X = B (Transpose) = 2: A**H * X = B (Transpose) A: On entry, the N-by-N matrix A. If fact = 0 and equed is not 0, then A must have been equilibrated by the scaling factors in r and/or c. A is not modified if fact = 0 or 1, or if fact = 2 and equed = 0 on exit. On exit, if equed != 0, A is scaled as follows: equed = 1: A := diag(r) * A equed = 2: A := A * diag(c) equed = 3: A := diag(r) * A * diag(c). af: If fact = 0, then af is an input argument and on entry contains the factors L and U from the factorization A = P*L*U as computed by getrf. If equed != 0, then af is the factored form of the equilibrated matrix A. If fact = 1, then af is an output argument and on exit returns the factors L and U from the factorization A = P*L*U of the original matrix A. If fact = 2, then af is an output argument and on exit returns the factors L and U from the factorization A = P*L*U of the equilibrated matrix A (see the description of A for the form of the equilibrated matrix). ipiv: If fact = 0, then ipiv is an input argument and on entry contains the pivot indices from the factorization A = P*L*U as computed by getrf; row i of the matrix was interchanged with row ipiv(i). If fact = 1, then ipiv is an output argument and on exit contains the pivot indices from the factorization A = P*L*U of the original matrix A. If fact = 2, then ipiv is an output argument and on exit contains the pivot indices from the factorization A = P*L*U of the equilibrated matrix A. equed: Specifies the form of equilibration that was done. = 0: No equilibration (always true if fact = 1). = 1: Row equilibration, i.e., A has been premultiplied by diag(r). = 2: Column equilibration, i.e., A has been postmultiplied by diag(c). = 3: Both row and column equilibration, i.e., A has been replaced by diag(r) * A * diag(c). equed is an input argument if fact = 0; otherwise, it is an output argument. r: The row scale factors for A. If equed = 1 or 3, A is multiplied on the left by diag(r); if equed = 0 or 2, r is not accessed. r is an input argument if fact = 0; otherwise, r is an output argument. If fact = 0 and equed = 1 or 3, each element of r must be positive. c: The column scale factors for A. If equed = 2 or 3, A is multiplied on the right by diag(c); if equed = 0 or 1, c is not accessed. c is an input argument if fact = 0; otherwise, c is an output argument. If fact = 0 and equed = 2 or 3, each element of c must be positive. B: On entry, the N-by-NRHS right hand side matrix B. On exit, if equed = 0, B is not modified; if trans = 0 and equed = 1 or 3, B is overwritten by diag(r)*B; if trans = 1 or 2 and equed = 2 or 3, B is overwritten by diag(c)*B. X: If info = 0 or info = N+1, the N-by-NRHS solution matrix X to the original system of equations. Note that A and B are modified on exit if equed != 0, and the solution to the equilibrated system is inv(diag(c))*X if trans = 0 and equed = 2 or 3, or inv(diag(r))*X if trans = 1 or 2 and equed = 1 or 3. rcond: The estimate of the reciprocal condition number of the matrix A after equilibration (if done). If rcond is less than the machine precision (in particular, if rcond = 0), the matrix is singular to working precision. This condition is indicated by a return code of info > 0. ferr: The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), ferr(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for rcond, and is almost always a slight overestimate of the true error. berr: The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). rpvgrw: Contains the reciprocal pivot growth factor norm(A)/norm(U). The "max absolute element" norm is used. If it is much less than 1, then the stability of the LU factorization of the (equilibrated) matrix A could be poor. This also means that the solution X, condition estimator rcond, and forward error bound ferr could be unreliable. If factorization fails with 0<info<=N, then it contains the reciprocal pivot growth factor for the leading info columns of A. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= N: U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, so the solution and error bounds could not be computed. rcond = 0 is returned. = N+1: U is nonsingular, but rcond is less than machine precision, meaning that the matrix is singular to working precision. Nevertheless, the solution and error bounds are computed because there are a number of situations where the computed solution can be more accurate than the value of rcond would suggest. =for example $a= random(5,5); $b = random(5,5); $a = transpose($a); $b = transpose($b); $rcond = pdl(0); $rpvgrw = pdl(0); $equed = pdl(long,0); $info = pdl(long,0); $berr = zeroes(5); $ipiv = zeroes(5); $ferr = zeroes(5); $r = zeroes(5); $c = zeroes(5); $X = zeroes(5,5); $af = zeroes(5,5); gesvx($a,0, 2, $b, $af, $ipiv, $equed, $r, $c, $X, $rcond, $ferr, $berr, $rpvgrw, $info); print "The solution matrix X is :". transpose($X)."\n" unless $info; '); pp_def("sysv", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); [io,phys]B(n,m); int [o,phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; integer lwork = -1; types(F) %{ extern int ssysv_(char *uplo, integer *n, integer *nrhs, float *a, integer *lda, integer *ipiv, float *b, integer *ldb, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dsysv_(char *uplo, integer *n, integer *nrhs, double *a, integer *lda, integer *ipiv, double *b, integer *ldb, double *work, integer *lwork, integer *info); double tmp_work; %} if ($uplo()) puplo = \'L\'; $TFD(ssysv_,dsysv_)( &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssysv_,dsysv_)( &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), work, &lwork, $P(info)); } ', Doc => ' =for ref Computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. The diagonal pivoting method is used to factor A as A = U * D * U\', if uplo = 0, or A = L * D * L\', if uplo = 1, where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if info = 0, the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U\' or A = L*D*L\' as computed by sytrf. ipiv: Details of the interchanges and the block structure of D, as determined by sytrf. If ipiv(k) > 0, then rows and columns k and ipiv(k) were interchanged, and D(k,k) is a 1-by-1 diagonal block. If uplo = 0 and ipiv(k) = ipiv(k-1) < 0, then rows and columns k-1 and -ipiv(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If uplo = 1 and ipiv(k) = ipiv(k+1) < 0, then rows and columns k+1 and -ipiv(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. B: On entry, the N-by-NRHS right hand side matrix B. On exit, if info = 0, the N-by-NRHS solution matrix X. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, so the solution could not be computed. =for example # Assume $a is symmetric ;) $a = random (5,5); $a = transpose($a); $b = random(4,5); $b = transpose($b); sysv($a, 1, $b, (my $ipiv=zeroes(5)),(my $info=null)); print "The solution matrix X is :". transpose($b)."\n" unless $info; '); pp_def("sysvx", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); int fact(); [phys]B(n,m); [io,phys]af(n,n); int [io,phys]ipiv(n); [o,phys]X(n,m); [o,phys]rcond(); [o,phys]ferr(m); [o,phys]berr(m); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pfact = \'N\'; char puplo = \'U\'; integer lwork = -1; integer *iwork; types(F) %{ extern int ssysvx_(char *fact, char *uplo, integer *n, integer * nrhs, float *a, integer *lda, float *af, integer *ldaf, integer *ipiv, float *b, integer *ldb, float *x, integer * ldx, float *rcond, float *ferr, float *berr, float *work, integer *lwork, integer *iwork, integer *info); float tmp_work; %} types(D) %{ extern int dsysvx_(char *fact, char *uplo, integer *n, integer * nrhs, double *a, integer *lda, double *af, integer *ldaf, integer *ipiv, double *b, integer *ldb, double *x, integer * ldx, double *rcond, double *ferr, double *berr, double *work, integer *lwork, integer *iwork, integer *info); double tmp_work; %} if($fact()) pfact = \'F\'; if ($uplo()) puplo = \'L\'; iwork = (integer *) malloc ($PRIV(__n_size)* sizeof (integer)); $TFD(ssysvx_,dsysvx_)( &pfact, &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(af), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), $P(X), &$PRIV(__n_size), $P(rcond), $P(ferr), $P(berr), &tmp_work, &lwork, iwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssysvx_,dsysvx_)( &pfact, &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(af), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), $P(X), &$PRIV(__n_size), $P(rcond), $P(ferr), $P(berr), work, &lwork, iwork, $P(info)); free(work); } free(iwork); ', Doc => ' =for ref Uses the diagonal pivoting factorization to compute the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. The following steps are performed: =over 3 =item 1 If fact = 0, the diagonal pivoting method is used to factor A. The form of the factorization is A = U * D * U\', if uplo = 0, or A = L * D * L\', if uplo = 1, where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. =item 2 If some D(i,i)=0, so that D is exactly singular, then the routine returns with info = i. Otherwise, the factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, info = N+1 is returned as a warning, but the routine still goes on to solve for X and compute error bounds as described below. =item 3 The system of equations is solved for X using the factored form of A. =item 4 Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. =back Arguments ========= fact: Specifies whether or not the factored form of A has been supplied on entry. = 0: The matrix A will be copied to af and factored. = 1: On entry, af and ipiv contain the factored form of A. af and ipiv will not be modified. uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: The symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. af: If fact = 1, then af is an input argument and on entry contains the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U\' or A = L*D*L\' as computed by sytrf. If fact = 0, then af is an output argument and on exit returns the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U\' or A = L*D*L\'. ipiv: If fact = 1, then ipiv is an input argument and on entry contains details of the interchanges and the block structure of D, as determined by sytrf. If ipiv(k) > 0, then rows and columns k and ipiv(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If uplo = 0 and ipiv(k) = ipiv(k-1) < 0, then rows and columns k-1 and -ipiv(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If uplo = 1 and ipiv(k) = ipiv(k+1) < 0, then rows and columns k+1 and -ipiv(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. If fact = 0, then ipiv is an output argument and on exit contains details of the interchanges and the block structure of D, as determined by sytrf. B: The N-by-NRHS right hand side matrix B. X: If info = 0 or info = N+1, the N-by-NRHS solution matrix X. rcond: The estimate of the reciprocal condition number of the matrix A. If rcond is less than the machine precision (in particular, if rcond = 0), the matrix is singular to working precision. This condition is indicated by a return code of info > 0. ferr: The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), ferr(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for rcond, and is almost always a slight overestimate of the true error. berr: The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= N: D(i,i) is exactly zero. The factorization has been completed but the factor D is exactly singular, so the solution and error bounds could not be computed. rcond = 0 is returned. = N+1: D is nonsingular, but rcond is less than machine precision, meaning that the matrix is singular to working precision. Nevertheless, the solution and error bounds are computed because there are a number of situations where the computed solution can be more accurate than the value of rcond would suggest. =for example $a= random(5,5); $b = random(10,5); $a = transpose($a); $b = transpose($b); $X = zeroes($b); $af = zeroes($a); $ipiv = zeroes(long, 5); $rcond = pdl(0); $ferr = zeroes(10); $berr = zeroes(10); $info = pdl(long, 0); # Assume $a is symmetric sysvx($a, 0, 0, $b,$af, $ipiv, $X, $rcond, $ferr, $berr,$info); print "The solution matrix X is :". transpose($X)."\n"; '); pp_def("posv", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); [io,phys]B(n,m); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int sposv_(char *uplo, integer *n, integer *nrhs, float *a, integer *lda, float *b, integer *ldb, integer *info); %} types(D) %{ extern int dposv_(char *uplo, integer *n, integer *nrhs, double *a, integer *lda, double *b, integer *ldb, integer *info); %} if ($uplo()) puplo = \'L\'; $TFD(sposv_,dposv_)( &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. The Cholesky decomposition is used to factor A as A = U\'* U, if uplo = 0, or A = L * L\', if uplo = 1, where U is an upper triangular matrix and L is a lower triangular matrix. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if info = 0, the factor U or L from the Cholesky factorization A = U\'*U or A = L*L\'. B: On entry, the N-by-NRHS right hand side matrix B. On exit, if info = 0, the N-by-NRHS solution matrix X. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed. =for example # Assume $a is symmetric positive definite ;) $a = random (5,5); $a = transpose($a); $b = random(4,5); $b = transpose($b); posv($a, 1, $b, (my $info=null)); print "The solution matrix X is :". transpose($b)."\n" unless $info; '); pp_def("posvx", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int fact(); [io,phys]B(n,m); [io,phys]af(n,n); int [io]equed(); [io,phys]s(n); [o,phys]X(n,m); [o,phys]rcond(); [o,phys]ferr(m); [o,phys]berr(m); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pfact; char pequed = \'N\'; char puplo = \'U\'; integer *iwork; types(F) %{ extern int sposvx_(char *fact, char *uplo, integer *n, integer * nrhs, float *a, integer *lda, float *af, integer *ldaf, char *equed, float *s, float *b, integer *ldb, float * x, integer *ldx, float *rcond, float *ferr, float * berr, float *work, integer *iwork, integer *info); float *work; %} types(D) %{ extern int dposvx_(char *fact, char *uplo, integer *n, integer * nrhs, double *a, integer *lda, double *af, integer *ldaf, char *equed, double *s, double *b, integer *ldb, double * x, integer *ldx, double *rcond, double *ferr, double * berr, double *work, integer *iwork, integer *info); double *work; %} switch ($fact()) { case 1: pfact = \'N\'; break; case 2: pfact = \'E\'; break; default: pfact = \'F\'; } if ($equed()) pequed = \'Y\'; if ($uplo()) puplo = \'L\'; types(F) %{ work = (float *) malloc(3 * $PRIV(__n_size) * sizeof(float)); %} types(D) %{ work = (double *) malloc(3 * $PRIV(__n_size) * sizeof(double)); %} iwork = (integer *) malloc ($PRIV(__n_size)* sizeof (integer)); $TFD(sposvx_,dposvx_)( &pfact, &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(af), &$PRIV(__n_size), &pequed, $P(s), $P(B), &$PRIV(__n_size), $P(X), &$PRIV(__n_size), $P(rcond), $P(ferr), $P(berr), work, iwork, $P(info)); free(work); free(iwork); switch (pequed) { case \'Y\': $equed() = 1; break; default: $equed()= 0; } ', Doc => ' =for ref Uses the Cholesky factorization A = U\'*U or A = L*L\' to compute the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. The following steps are performed: =over 3 =item 1 If fact = 2, real scaling factors are computed to equilibrate the system: diag(s) * A * diag(s) * inv(diag(s)) * X = diag(s) * B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(s)*A*diag(s) and B by diag(s)*B. =item 2 If fact = 1 or 2, the Cholesky decomposition is used to factor the matrix A (after equilibration if fact = 2) as A = U\'* U, if uplo = 0, or A = L * L\', if uplo = 1, where U is an upper triangular matrix and L is a lower triangular matrix. =item 3 If the leading i-by-i principal minor is not positive definite, then the routine returns with info = i. Otherwise, the factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, info = N+1 is returned as a warning, but the routine still goes on to solve for X and compute error bounds as described below. =item 4 The system of equations is solved for X using the factored form of A. =item 5 Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. =item 6 If equilibration was used, the matrix X is premultiplied by diag(s) so that it solves the original system before equilibration. =back Arguments ========= fact: Specifies whether or not the factored form of the matrix A is supplied on entry, and if not, whether the matrix A should be equilibrated before it is factored. = 0: On entry, af contains the factored form of A. If equed = 1, the matrix A has been equilibrated with scaling factors given by s. A and af will not be modified. = 1: The matrix A will be copied to af and factored. = 2: The matrix A will be equilibrated if necessary, then copied to af and factored. uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A, except if fact = 0 and equed = 1, then A must contain the equilibrated matrix diag(s)*A*diag(s). If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. A is not modified if fact = 0 or 1, or if fact = 2 and equed = 0 on exit. On exit, if fact = 2 and equed = 1, A is overwritten by diag(s)*A*diag(s). af: If fact = 0, then af is an input argument and on entry contains the triangular factor U or L from the Cholesky factorization A = U\'*U or A = L*L\', in the same storage format as A. If equed != 0, then af is the factored form of the equilibrated matrix diag(s)*A*diag(s). If fact = 1, then af is an output argument and on exit returns the triangular factor U or L from the Cholesky factorization A = U\'*U or A = L*L\' of the original matrix A. If fact = 2, then af is an output argument and on exit returns the triangular factor U or L from the Cholesky factorization A = U\'*U or A = L*L\' of the equilibrated matrix A (see the description of A for the form of the equilibrated matrix). equed: Specifies the form of equilibration that was done. = 0: No equilibration (always true if fact = 1). = 1: Equilibration was done, i.e., A has been replaced by diag(s) * A * diag(s). equed is an input argument if fact = 0; otherwise, it is an output argument. s: The scale factors for A; not accessed if equed = 0. s is an input argument if fact = 0; otherwise, s is an output argument. If fact = 0 and equed = 1, each element of s must be positive. B: On entry, the N-by-NRHS right hand side matrix B. On exit, if equed = 0, B is not modified; if equed = 1, B is overwritten by diag(s) * B. X: If info = 0 or info = N+1, the N-by-NRHS solution matrix X to the original system of equations. Note that if equed = 1, A and B are modified on exit, and the solution to the equilibrated system is inv(diag(s))*X. rcond: The estimate of the reciprocal condition number of the matrix A after equilibration (if done). If rcond is less than the machine precision (in particular, if rcond = 0), the matrix is singular to working precision. This condition is indicated by a return code of info > 0. ferr: The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for rcond, and is almost always a slight overestimate of the true error. berr: The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, and i is <= N: the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution has not been computed. rcond = 0 is returned. = N+1: U is nonsingular, but rcond is less than machine precision, meaning that the matrix is singular to working precision. Nevertheless, the solution and error bounds are computed because there are a number of situations where the computed solution can be more accurate than the value of rcond would suggest. =for example $a= random(5,5); $b = random(5,5); $a = transpose($a); $b = transpose($b); # Assume $a is symmetric positive definite $rcond = pdl(0); $equed = pdl(long,0); $info = pdl(long,0); $berr = zeroes(5); $ferr = zeroes(5); $s = zeroes(5); $X = zeroes(5,5); $af = zeroes(5,5); posvx($a,0,2,$b,$af, $equed, $s, $X, $rcond, $ferr, $berr,$info); print "The solution matrix X is :". transpose($X)."\n" unless $info; '); pp_def("gels", HandleBad => 0, Pars => '[io,phys]A(m,n); int trans(); [io,phys]B(p,q);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans = \'N\'; integer lwork = -1; types(F) %{ extern int sgels_(char *trans, integer *m, integer *n, integer * nrhs, float *a, integer *lda, float *b, integer *ldb, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgels_(char *trans, integer *m, integer *n, integer * nrhs, double *a, integer *lda, double *b, integer *ldb, double *work, integer *lwork, integer *info); double tmp_work; %} if($trans()) ptrans = \'T\'; $TFD(sgels_,dgels_)( &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgels_,dgels_)( &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Solves overdetermined or underdetermined real linear systems involving an M-by-N matrix A, or its transpose, using a QR or LQ factorization of A. It is assumed that A has full rank. The following options are provided: =over 3 =item 1 If trans = 0 and m >= n: find the least squares solution of an overdetermined system, i.e., solve the least squares problem minimize || B - A*X ||. =item 2 If trans = 0 and m < n: find the minimum norm solution of an underdetermined system A * X = B. =item 3 If trans = 1 and m >= n: find the minimum norm solution of an undetermined system A\' * X = B. =item 4 If trans = 1 and m < n: find the least squares solution of an overdetermined system, i.e., solve the least squares problem minimize || B - A\' * X ||. =back Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. Arguments ========= trans: = 0: the linear system involves A; = 1: the linear system involves A\'. A: On entry, the M-by-N matrix A. On exit, if M >= N, A is overwritten by details of its QR factorization as returned by geqrf; if M < N, A is overwritten by details of its LQ factorization as returned by gelqf. B: On entry, the matrix B of right hand side vectors, stored columnwise; B is M-by-NRHS if trans = 0, or N-by-NRHS if trans = 1. On exit, B is overwritten by the solution vectors, stored columnwise: if trans = 0 and m >= n, rows 1 to n of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of elements N+1 to M in that column; if trans = 0 and m < n, rows 1 to N of B contain the minimum norm solution vectors; if trans = 1 and m >= n, rows 1 to M of B contain the minimum norm solution vectors; if trans = 1 and m < n, rows 1 to M of B contain the least squares solution vectors; the residual sum of squares for the solution in each column is given by the sum of squares of elements M+1 to N in that column. The leading dimension of the array B >= max(1,M,N). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a= random(7,5); # $b will contain X # TODO better example with slice $b = random(7,6); gels($a, 1, $b, ($info = null)); '); pp_def("gelsy", HandleBad => 0, Pars => '[io,phys]A(m,n); [io,phys]B(p,q); [phys]rcond(); int [io,phys]jpvt(n); int [o,phys]rank();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgelsy_(integer *m, integer *n, integer *nrhs, float *a, integer *lda, float *b, integer *ldb, integer * jpvt, float *rcond, integer *rank, float *work, integer * lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgelsy_(integer *m, integer *n, integer *nrhs, double *a, integer *lda, double *b, integer *ldb, integer * jpvt, double *rcond, integer *rank, double *work, integer * lwork, integer *info); double tmp_work; %} $TFD(sgelsy_,dgelsy_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(jpvt), $P(rcond), $P(rank), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgelsy_,dgelsy_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(jpvt), $P(rcond), $P(rank), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes the minimum-norm solution to a real linear least squares problem: minimize || A * X - B || using a complete orthogonal factorization of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The routine first computes a QR factorization with column pivoting: A * P = Q * [ R11 R12 ] [ 0 R22 ] with R11 defined as the largest leading submatrix whose estimated condition number is less than 1/rcond. The order of R11, rank, is the effective rank of A. Then, R22 is considered to be negligible, and R12 is annihilated by orthogonal transformations from the right, arriving at the complete orthogonal factorization: A * P = Q * [ T11 0 ] * Z [ 0 0 ] The minimum-norm solution is then X = P * Z\' [ inv(T11)*Q1\'*B ] [ 0 ] where Q1 consists of the first rank columns of Q. Arguments ========= A: On entry, the M-by-N matrix A. On exit, A has been overwritten by details of its complete orthogonal factorization. B: On entry, the M-by-NRHS right hand side matrix B. On exit, the N-by-NRHS solution matrix X. The leading dimension of the array B >= max(1,M,N). jpvt: On entry, if jpvt(i) != 0, the i-th column of A is permuted to the front of AP, otherwise column i is a free column. On exit, if jpvt(i) = k, then the i-th column of AP was the k-th column of A. rcond: rcond is used to determine the effective rank of A, which is defined as the order of the largest leading triangular submatrix R11 in the QR factorization with pivoting of A, whose estimated condition number < 1/rcond. rank: The effective rank of A, i.e., the order of the submatrix R11. This is the same as the order of the submatrix T11 in the complete orthogonal factorization of A. info: = 0: successful exit < 0: If info = -i, the i-th argument had an illegal value. =for example $a= random(7,5); # $b will contain X # TODO better example with slice $b = random(7,6); $jpvt = zeroes(long, 5); $eps = lamch(0); #Threshold for rank estimation $rcond = sqrt($eps) - (sqrt($eps) - $eps) / 2; gelsy($a, $b, $rcond, $jpvt,($rank=null),($info = null)); '); pp_def("gelss", HandleBad => 0, Pars => '[io,phys]A(m,n); [io,phys]B(p,q); [phys]rcond(); [o,phys]s(r); int [o,phys]rank();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgelss_(integer *m, integer *n, integer *nrhs, float *a, integer *lda, float *b, integer *ldb, float *s, float *rcond, integer *rank, float *work, integer * lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgelss_(integer *m, integer *n, integer *nrhs, double *a, integer *lda, double *b, integer *ldb, double *s,double *rcond, integer *rank, double *work, integer * lwork, integer *info); double tmp_work; %} $TFD(sgelss_,dgelss_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(s), $P(rcond), $P(rank), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgelss_,dgelss_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(s), $P(rcond), $P(rank), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes the minimum norm solution to a real linear least squares problem: Minimize 2-norm(| b - A*x |). using the singular value decomposition (SVD) of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The effective rank of A is determined by treating as zero those singular values which are less than rcond times the largest singular value. Arguments ========= A: On entry, the M-by-N matrix A. On exit, the first min(m,n) rows of A are overwritten with its right singular vectors, stored rowwise. B: On entry, the M-by-NRHS right hand side matrix B. On exit, B is overwritten by the N-by-NRHS solution matrix X. If m >= n and rank = n, the residual sum-of-squares for the solution in the i-th column is given by the sum of squares of elements n+1:m in that column. The leading dimension of the array B >= max(1,M,N). s: The singular values of A in decreasing order. The condition number of A in the 2-norm = s(1)/s(min(m,n)). rcond: rcond is used to determine the effective rank of A. Singular values s(i) <= rcond*s(1) are treated as zero. If rcond < 0, machine precision is used instead. rank: The effective rank of A, i.e., the number of singular values which are greater than rcond*s(1). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: the algorithm for computing the SVD failed to converge; if info = i, i off-diagonal elements of an intermediate bidiagonal form did not converge to zero. =for example $a= random(7,5); # $b will contain X # TODO better example with slice $b = random(7,6); $eps = lamch(0); $s =zeroes(5); #Threshold for rank estimation $rcond = sqrt($eps) - (sqrt($eps) - $eps) / 2; gelss($a, $b, $rcond, $s, ($rank=null),($info = null)); '); pp_def("gelsd", HandleBad => 0, Pars => '[io,phys]A(m,n); [io,phys]B(p,q); [phys]rcond(); [o,phys]s(r); int [o,phys]rank();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; integer smlsiz, size_i, nlvl, *iwork; integer minmn = min( $SIZE(m), $SIZE(n) ); extern integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen opts_len); types(F) %{ extern int sgelsd_(integer *m, integer *n, integer *nrhs, float *a, integer *lda, float *b, integer *ldb, float *s, float *rcond, integer *rank, float *work, integer * lwork, integer *iwork, integer *info); float tmp_work; %} types(D) %{ extern int dgelsd_(integer *m, integer *n, integer *nrhs, double *a, integer *lda, double *b, integer *ldb, double *s,double *rcond, integer *rank, double *work, integer * lwork, integer *iwork,integer *info); double tmp_work; %} minmn = max(1,minmn); types(F) %{ smlsiz = ilaenv_(&c_nine, "SGELSD", " ", &c_zero, &c_zero, &c_zero, &c_zero, (ftnlen)6, (ftnlen)1); size_i = (integer) (log((float) minmn / (float) (smlsiz + 1)) /log(2.)) + 1; %} types(D) %{ smlsiz = ilaenv_(&c_nine, "DGELSD", " ", &c_zero, &c_zero, &c_zero, &c_zero, (ftnlen)6, (ftnlen)1); size_i = (integer) (log((double) minmn / (double) (smlsiz + 1)) /log(2.)) + 1; %} nlvl = max(size_i, 0); iwork = (integer *)malloc((3 * minmn * nlvl + 11 * minmn) * sizeof(integer)); $TFD(sgelsd_,dgelsd_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(s), $P(rcond), $P(rank), &tmp_work, &lwork, iwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgelsd_,dgelsd_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__q_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(s), $P(rcond), $P(rank), work, &lwork, iwork, $P(info)); free(work); } free (iwork); ', Doc => ' =for ref Computes the minimum-norm solution to a real linear least squares problem: minimize 2-norm(| b - A*x |) using the singular value decomposition (SVD) of A. A is an M-by-N matrix which may be rank-deficient. Several right hand side vectors b and solution vectors x can be handled in a single call; they are stored as the columns of the M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix X. The problem is solved in three steps: =over 3 =item 1 Reduce the coefficient matrix A to bidiagonal form with Householder transformations, reducing the original problem into a "bidiagonal least squares problem" (BLS) =item 2 Solve the BLS using a divide and conquer approach. =item 3 Apply back all the Householder tranformations to solve the original least squares problem. =back The effective rank of A is determined by treating as zero those singular values which are less than rcond times the largest singular value. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= A: On entry, the M-by-N matrix A. On exit, A has been destroyed. B: On entry, the M-by-NRHS right hand side matrix B. On exit, B is overwritten by the N-by-NRHS solution matrix X. If m >= n and rank = n, the residual sum-of-squares for the solution in the i-th column is given by the sum of squares of elements n+1:m in that column. The leading dimension of the array B >= max(1,M,N). s: The singular values of A in decreasing order. The condition number of A in the 2-norm = s(1)/s(min(m,n)). rcond: rcond is used to determine the effective rank of A. Singular values s(i) <= rcond*s(1) are treated as zero. If rcond < 0, machine precision is used instead. rank: The effective rank of A, i.e., the number of singular values which are greater than rcond*s(1). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. > 0: the algorithm for computing the SVD failed to converge; if info = i, i off-diagonal elements of an intermediate bidiagonal form did not converge to zero. =for example $a= random(7,5); # $b will contain X # TODO better example with slice $b = random(7,6); $eps = lamch(0); $s =zeroes(5); #Threshold for rank estimation $rcond = sqrt($eps) - (sqrt($eps) - $eps) / 2; gelsd($a, $b, $rcond, $s, ($rank=null),($info = null)); '); pp_def("gglse", HandleBad => 0, Pars => '[phys]A(m,n); [phys]B(p,n);[io,phys]c(m);[phys]d(p);[o,phys]x(n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgglse_(integer *m, integer *n, integer *p, float * a, integer *lda, float *b, integer *ldb, float *c__, float *d__, float *x, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgglse_(integer *m, integer *n, integer *p, double * a, integer *lda, double *b, integer *ldb, double *c__, double *d__, double *x, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgglse_,dgglse_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__p_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(c), $P(d), $P(x), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgglse_,dgglse_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__p_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(c), $P(d), $P(x), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Solves the linear equality-constrained least squares (LSE) problem: minimize || c - A*x ||_2 subject to B*x = d where A is an M-by-N matrix, B is a P-by-N matrix, c is a given M-vector, and d is a given P-vector. It is assumed that P <= N <= M+P, and rank(B) = P and rank( ( A ) ) = N. ( ( B ) ) These conditions ensure that the LSE problem has a unique solution, which is obtained using a GRQ factorization of the matrices B and A. Arguments ========= A: On entry, the M-by-N matrix A. On exit, A is destroyed. B: On entry, the P-by-N matrix B. On exit, B is destroyed. c: On entry, c contains the right hand side vector for the least squares part of the LSE problem. On exit, the residual sum of squares for the solution is given by the sum of squares of elements N-P+1 to M of vector c. d: On entry, d contains the right hand side vector for the constrained equation. On exit, d is destroyed. x: On exit, x is the solution of the LSE problem. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random(7,5); $b = random(4,5); $c = random(7); $d = random(4); $x = zeroes(5); gglse($a, $b, $c, $d, $x, ($info=null)); '); pp_def("ggglm", HandleBad => 0, Pars => '[phys]A(n,m); [phys]B(n,p);[phys]d(n);[o,phys]x(m);[o,phys]y(p);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sggglm_(integer *n, integer *m, integer *p, float * a, integer *lda, float *b, integer *ldb, float *d__, float *x, float *y, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dggglm_(integer *n, integer *m, integer *p, double * a, integer *lda, double *b, integer *ldb, double *d__, double *x, double *y, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sggglm_,dggglm_)( &$PRIV(__n_size), &$PRIV(__m_size), &$PRIV(__p_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(d), $P(x), $P(y), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sggglm_,dggglm_)( &$PRIV(__n_size), &$PRIV(__m_size), &$PRIV(__p_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(d), $P(x), $P(y), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Solves a general Gauss-Markov linear model (GLM) problem: minimize || y ||_2 subject to d = A*x + B*y x where A is an N-by-M matrix, B is an N-by-P matrix, and d is a given N-vector. It is assumed that M <= N <= M+P, and rank(A) = M and rank( A B ) = N. Under these assumptions, the constrained equation is always consistent, and there is a unique solution x and a minimal 2-norm solution y, which is obtained using a generalized QR factorization of A and B. In particular, if matrix B is square nonsingular, then the problem GLM is equivalent to the following weighted linear least squares problem minimize || inv(B)*(d-A*x) ||_2 x where inv(B) denotes the inverse of B. Arguments ========= A: On entry, the N-by-M matrix A. On exit, A is destroyed. B: On entry, the N-by-P matrix B. On exit, B is destroyed. d: On entry, d is the left hand side of the GLM equation. On exit, d is destroyed. x: y: On exit, x and y are the solutions of the GLM problem. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random(7,5); $b = random(7,4); $d = random(7); $x = zeroes(5); $y = zeroes(4); ggglm($a, $b, $d, $x, $y,($info=null)); '); ################################################################################ # # COMPUTATIONAL LEVEL ROUTINES # ################################################################################ # TODO IPIV = min(m,n) pp_def("getrf", HandleBad => 0, RedoDimsCode => '$SIZE(p) = $PDL(A)->ndims > 1 ? min($PDL(A)->dims[0], $PDL(A)->dims[1]) : 1;', Pars => '[io,phys]A(m,n); int [o,phys]ipiv(p); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' types(F) %{ extern int sgetrf_(integer *m, integer *n, float *a, integer * lda, integer *ipiv, integer *info); %} types(D) %{ extern int dgetrf_(integer *m, integer *n, double *a, integer * lda, integer *ipiv, integer *info); %} $TFD(sgetrf_,dgetrf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(ipiv), $P(info)); ', Doc => ' =for ref Computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 3 BLAS version of the algorithm. Arguments ========= A: On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. ipiv: The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row ipiv(i). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. =for example $a = random (float, 100,50); $ipiv = zeroes(long, 50); $info = null; getrf($a, $ipiv, $info); '); pp_def("getf2", HandleBad => 0, RedoDimsCode => '$SIZE(p) = $PDL(A)->ndims > 1 ? min($PDL(A)->dims[0], $PDL(A)->dims[1]) : 1;', Pars => '[io,phys]A(m,n); int [o,phys]ipiv(p); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' types(F) %{ extern int sgetf2_(integer *m, integer *n, float *a, integer * lda, integer *ipiv, integer *info); %} types(D) %{ extern int dgetf2_(integer *m, integer *n, double *a, integer * lda, integer *ipiv, integer *info); %} $TFD(sgetf2_,dgetf2_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(ipiv), $P(info)); ', Doc => ' =for ref Computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. The factorization has the form A = P * L * U where P is a permutation matrix, L is lower triangular with unit diagonal elements (lower trapezoidal if m > n), and U is upper triangular (upper trapezoidal if m < n). This is the right-looking Level 2 BLAS version of the algorithm. Arguments ========= A: On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. ipiv: The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row ipiv(i). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) is exactly zero. The factorization has been completed, but the factor U is exactly singular, and division by zero will occur if it is used to solve a system of equations. =for example $a = random (float, 100,50); $ipiv = zeroes(long, 50); $info = null; getf2($a, $ipiv, $info); '); pp_def("sytrf", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int [o,phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; integer lwork = -1; types(F) %{ extern int ssytrf_(char *uplo, integer *n, float *a, integer * lda, integer *ipiv, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dsytrf_(char *uplo, integer *n, double *a, integer * lda, integer *ipiv, double *work, integer *lwork, integer *info); double tmp_work; %} if ($uplo()) puplo = \'L\'; $TFD(ssytrf_,dsytrf_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(ssytrf_,dsytrf_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), work, &lwork, $P(info)); free (work); } ', Doc => ' =for ref Computes the factorization of a real symmetric matrix A using the Bunch-Kaufman diagonal pivoting method. The form of the factorization is A = U*D*U\' or A = L*D*L\' where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. This is the blocked version of the algorithm, calling Level 3 BLAS. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the block diagonal matrix D and the multipliers used to obtain the factor U or L (see below for further details). ipiv: Details of the interchanges and the block structure of D. If ipiv(k) > 0, then rows and columns k and ipiv(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If uplo = 0 and ipiv(k) = ipiv(k-1) < 0, then rows and columns k-1 and -ipiv(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If uplo = 1 and ipiv(k) = ipiv(k+1) < 0, then rows and columns k+1 and -ipiv(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, and division by zero will occur if it is used to solve a system of equations. Further Details =============== If uplo = 0, then A = U*D*U\', where U = P(n)*U(n)* ... *P(k)U(k)* ..., i.e., U is a product of terms P(k)*U(k), where k decreases from n to 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as defined by ipiv(k), and U(k) is a unit upper triangular matrix, such that if the diagonal block D(k) is of order s (s = 1 or 2), then ( I v 0 ) k-s U(k) = ( 0 I 0 ) s ( 0 0 I ) n-k k-s s n-k If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), and A(k,k), and v overwrites A(1:k-2,k-1:k). If uplo = 1, then A = L*D*L\', where L = P(1)*L(1)* ... *P(k)*L(k)* ..., i.e., L is a product of terms P(k)*L(k), where k increases from 1 to n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as defined by ipiv(k), and L(k) is a unit lower triangular matrix, such that if the diagonal block D(k) is of order s (s = 1 or 2), then ( I 0 0 ) k-1 L(k) = ( 0 I 0 ) s ( 0 v I ) n-k-s+1 k-1 s n-k-s+1 If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). =for example $a = random(100,100); $ipiv = zeroes(100); $info = null; # Assume $a is symmetric sytrf($a, 0, $ipiv, $info); '); pp_def("sytf2", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int [o,phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int ssytf2_(char *uplo, integer *n, float *a, integer * lda, integer *ipiv, integer *info); %} types(D) %{ extern int dsytf2_(char *uplo, integer *n, double *a, integer * lda, integer *ipiv, integer *info); %} if ($uplo()) puplo = \'L\'; $TFD(ssytf2_,dsytf2_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(info)); ', Doc => ' =for ref Computes the factorization of a real symmetric matrix A using the Bunch-Kaufman diagonal pivoting method. The form of the factorization is A = U*D*U\' or A = L*D*L\' where U (or L) is a product of permutation and unit upper (lower) triangular matrices, and D is symmetric and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. This is the unblocked version of the algorithm, calling Level 2 BLAS. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, the block diagonal matrix D and the multipliers used to obtain the factor U or L (see below for further details). ipiv: Details of the interchanges and the block structure of D. If ipiv(k) > 0, then rows and columns k and ipiv(k) were interchanged and D(k,k) is a 1-by-1 diagonal block. If uplo = 0 and ipiv(k) = ipiv(k-1) < 0, then rows and columns k-1 and -ipiv(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If uplo = 1 and ipiv(k) = ipiv(k+1) < 0, then rows and columns k+1 and -ipiv(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, D(i,i) is exactly zero. The factorization has been completed, but the block diagonal matrix D is exactly singular, and division by zero will occur if it is used to solve a system of equations. For further details see sytrf =for example $a = random(100,100); $ipiv = zeroes(100); $info = null; # Assume $a is symmetric sytf2($a, 0, $ipiv, $info); '); pp_def("potrf", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int spotrf_(char *uplo, integer *n, float *a, integer * lda, integer *info); %} types(D) %{ extern int dpotrf_(char *uplo, integer *n, double *a, integer * lda, integer *info); %} if ($uplo()) puplo = \'L\'; $TFD(spotrf_,dpotrf_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the Cholesky factorization of a real symmetric positive definite matrix A. The factorization has the form A = U\' * U, if uplo = 0, or A = L * L\', if uplo = 1, where U is an upper triangular matrix and L is lower triangular. This is the block version of the algorithm, calling Level 3 BLAS. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if info = 0, the factor U or L from the Cholesky factorization A = U\'*U or A = L*L\'. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the leading minor of order i is not positive definite, and the factorization could not be completed. =for example $a = random(100,100); # Assume $a is symmetric positive definite potrf($a, 0, ($info = null)); '); pp_def("potf2", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int spotf2_(char *uplo, integer *n, float *a, integer * lda, integer *info); %} types(D) %{ extern int dpotf2_(char *uplo, integer *n, double *a, integer * lda, integer *info); %} if ($uplo()) puplo = \'L\'; $TFD(spotf2_,dpotf2_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the Cholesky factorization of a real symmetric positive definite matrix A. The factorization has the form A = U\' * U, if uplo = 0, or A = L * L\', if uplo = 1, where U is an upper triangular matrix and L is lower triangular. This is the unblocked version of the algorithm, calling Level 2 BLAS. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the symmetric matrix A. If uplo = 0, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if info = 0, the factor U or L from the Cholesky factorization A = U\'*U or A = L*L\'. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the leading minor of order i is not positive definite, and the factorization could not be completed. =for example $a = random(100,100); # Assume $a is symmetric positive definite potf2($a, 0, ($info = null)); '); pp_def("getri", HandleBad => 0, Pars => '[io,phys]A(n,n); int [phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgetri_(integer *n, float *a, integer *lda, integer *ipiv, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgetri_(integer *n, double *a, integer *lda, integer *ipiv, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgetri_,dgetri_)( &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgetri_,dgetri_)( &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes the inverse of a matrix using the LU factorization computed by C<getrf>. This method inverts U and then computes inv(A) by solving the system inv(A)*L = inv(U) for inv(A). Arguments ========= A: On entry, the factors L and U from the factorization A = P*L*U as computed by getrf. On exit, if info = 0, the inverse of the original matrix A. ipiv: The pivot indices from getrf; for 1<=i<=N, row i of the matrix was interchanged with row ipiv(i). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, U(i,i) is exactly zero; the matrix is singular and its inverse could not be computed. =for example $a = random (float, 100, 100); $ipiv = zeroes(long, 100); $info = null; getrf($a, $ipiv, $info); if ($info == 0){ getri($a, $ipiv, $info); } print "Inverse of \$a is :\n $a" unless $info; '); pp_def("sytri", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int [phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int ssytri_(char *uplo, integer *n, float *a, integer * lda, integer *ipiv, float *work, integer *info); float *work = (float *)malloc($PRIV(__n_size) * sizeof(float)); %} types(D) %{ extern int dsytri_(char *uplo, integer *n, double *a, integer * lda, integer *ipiv, double *work, integer *info); double *work = (double *)malloc($PRIV(__n_size) * sizeof(double)); %} if ($uplo()) puplo = \'L\'; $TFD(ssytri_, dsytri_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), work, $P(info)); free(work); ', Doc => ' =for ref Computes the inverse of a real symmetric indefinite matrix A using the factorization A = U*D*U\' or A = L*D*L\' computed by C<sytrf>. Arguments ========= uplo: Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 0: Upper triangular, form is A = U*D*U\'; = 1: Lower triangular, form is A = L*D*L\'. A: On entry, the block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by sytrf. On exit, if info = 0, the (symmetric) inverse of the original matrix. If uplo = 0, the upper triangular part of the inverse is formed and the part of A below the diagonal is not referenced; if uplo = 1 the lower triangular part of the inverse is formed and the part of A above the diagonal is not referenced. ipiv: Details of the interchanges and the block structure of D as determined by sytrf. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, D(i,i) = 0; the matrix is singular and its inverse could not be computed. =for example $a = random (float, 100, 100); # assume $a is symmetric $ipiv = zeroes(long, 100); sytrf($a, 0, $ipiv, ($info=null)); if ($info == 0){ sytri($a, 0, $ipiv, $info); } print "Inverse of \$a is :\n $a" unless $info; '); pp_def("potri", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int spotri_(char *uplo, integer *n, float *a, integer * lda, integer *info); %} types(D) %{ extern int dpotri_(char *uplo, integer *n, double *a, integer * lda, integer *info); %} if ($uplo()) puplo = \'L\'; $TFD(spotri_,dpotri_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the inverse of a real symmetric positive definite matrix A using the Cholesky factorization A = U\'*U or A = L*L\' computed by C<potrf>. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: On entry, the triangular factor U or L from the Cholesky factorization A = U\'*U or A = L*L\', as computed by potrf. On exit, the upper or lower triangle of the (symmetric) inverse of A, overwriting the input factor U or L. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the (i,i) element of the factor U or L is zero, and the inverse could not be computed. =for example $a = random (float, 100, 100); # Assume $a is symmetric positive definite potrf($a, 0, ($info = null)); if ($info == 0){ # Hum... is it positive definite???? potri($a, 0,$info); } print "Inverse of \$a is :\n $a" unless $info; '); pp_def("trtri", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int diag(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; char pdiag = \'N\'; types(F) %{ extern int strtri_(char *uplo, char *diag, integer *n, float *a, integer * lda, integer *info); %} types(D) %{ extern int dtrtri_(char *uplo, char *diag, integer *n, double *a, integer * lda, integer *info); %} if ($uplo()) puplo = \'L\'; if ($diag()) pdiag = \'U\'; $TFD(strtri_, dtrtri_)( &puplo, &pdiag, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the inverse of a real upper or lower triangular matrix A. This is the Level 3 BLAS version of the algorithm. Arguments ========= uplo: = 0: A is upper triangular; = 1: A is lower triangular. diag: = 0: A is non-unit triangular; = 1: A is unit triangular. A: On entry, the triangular matrix A. If uplo = 0, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = 1, the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, A(i,i) is exactly zero. The triangular matrix is singular and its inverse can not be computed. =for example $a = random (float, 100, 100); # assume $a is upper triangular trtri($a, 1, ($info=null)); print "Inverse of \$a is :\n transpose($a)" unless $info; '); pp_def("trti2", HandleBad => 0, Pars => '[io,phys]A(n,n); int uplo(); int diag(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; char pdiag = \'N\'; types(F) %{ extern int strti2_(char *uplo, char *diag, integer *n, float *a, integer * lda, integer *info); %} types(D) %{ extern int dtrti2_(char *uplo, char *diag, integer *n, double *a, integer * lda, integer *info); %} if ($uplo()) puplo = \'L\'; if ($diag()) pdiag = \'U\'; $TFD(strti2_, dtrti2_)( &puplo, &pdiag, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Computes the inverse of a real upper or lower triangular matrix A. This is the Level 2 BLAS version of the algorithm. Arguments ========= uplo: = 0: A is upper triangular; = 1: A is lower triangular. diag: = 0: A is non-unit triangular; = 1: A is unit triangular. A: On entry, the triangular matrix A. If uplo = 0, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = 1, the diagonal elements of A are also not referenced and are assumed to be 1. On exit, the (triangular) inverse of the original matrix, in the same storage format. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 100); # assume $a is upper triangular trtri2($a, 1, ($info=null)); print "Inverse of \$a is :\n transpose($a)" unless $info; '); pp_def("getrs", HandleBad => 0, Pars => '[phys]A(n,n); int trans(); [io,phys]B(n,m); int [phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char transp = \'N\'; types(F) %{ extern int sgetrs_(char *trans, integer *n, integer *nrhs, float *a, integer *lda, integer *ipiv, float *b, integer * ldb, integer *info); %} types(D) %{ extern int dgetrs_(char *trans, integer *n, integer *nrhs, double *a, integer *lda, integer *ipiv, double *b, integer * ldb, integer *info); %} if($trans()) transp = \'T\'; $TFD(sgetrs_,dgetrs_)( &transp, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Solves a system of linear equations A * X = B or A\' * X = B with a general N-by-N matrix A using the LU factorization computed by getrf. Arguments ========= trans: Specifies the form of the system of equations: = 0: A * X = B (No transpose) = 1: A\'* X = B (Transpose) A: The factors L and U from the factorization A = P*L*U as computed by getrf. ipiv: The pivot indices from getrf; for 1<=i<=N, row i of the matrix was interchanged with row ipiv(i). B: On entry, the right hand side matrix B. On exit, the solution matrix X. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 100); $ipiv = zeroes(long, 100); $b = random(100,50); getrf($a, $ipiv, ($info=null)); if ($info == 0){ getrs($a, 0, $b, $ipiv, $info); } print "X is :\n $b" unless $info; '); pp_def("sytrs", HandleBad => 0, Pars => '[phys]A(n,n); int uplo();[io,phys]B(n,m); int [phys]ipiv(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int ssytrs_(char *uplo, integer *n, integer *nrhs, float *a, integer *lda, integer *ipiv, float *b, integer * ldb, integer *info); %} types(D) %{ extern int dsytrs_(char *uplo, integer *n, integer *nrhs, double *a, integer *lda, integer *ipiv, double *b, integer * ldb, integer *info); %} if($uplo()) puplo = \'L\'; $TFD(ssytrs_,dsytrs_)( &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(B), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Solves a system of linear equations A*X = B with a real symmetric matrix A using the factorization A = U*D*U\' or A = L*D*L\' computed by C<sytrf>. Arguments ========= uplo: Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 0: Upper triangular, form is A = U*D*U\'; = 1: Lower triangular, form is A = L*D*L\'. A: The block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by sytrf. ipiv: Details of the interchanges and the block structure of D as determined by sytrf. B: On entry, the right hand side matrix B. On exit, the solution matrix X. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 100); $b = random(50,100); $a = transpose($a); $b = transpose($b); # Assume $a is symmetric sytrf($a, 0, ($ipiv=zeroes(100)), ($info=null)); if ($info == 0){ sytrs($a, 0, $b, $ipiv, $info); } print("X is :\n".transpose($b))unless $info; '); pp_def("potrs", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); [io,phys]B(n,m); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int spotrs_(char *uplo, integer *n, integer *nrhs, float *a, integer *lda, float *b, integer *ldb, integer * info); %} types(D) %{ extern int dpotrs_(char *uplo, integer *n, integer *nrhs, double *a, integer *lda, double *b, integer *ldb, integer * info); %} if($uplo()) puplo = \'L\'; $TFD(spotrs_,dpotrs_)( &puplo, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Solves a system of linear equations A*X = B with a symmetric positive definite matrix A using the Cholesky factorization A = U\'*U or A = L*L\' computed by C<potrf>. Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: The triangular factor U or L from the Cholesky factorization A = U\'*U or A = L*L\', as computed by potrf. B: On entry, the right hand side matrix B. On exit, the solution matrix X. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 100); $b = random(50,100); $a = transpose($a); $b = transpose($b); # Assume $a is symmetric positive definite potrf($a, 0, ($info=null)); if ($info == 0){ potrs($a, 0, $b, $info); } print("X is :\n".transpose($b))unless $info; '); pp_def("trtrs", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); int trans(); int diag();[io,phys]B(n,m); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; char ptrans = \'N\'; char pdiag = \'N\'; types(F) %{ extern int strtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, float *a, integer *lda, float *b, integer * ldb, integer *info); %} types(D) %{ extern int dtrtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, double *a, integer *lda, double *b, integer * ldb, integer *info); %} if($uplo()) puplo = \'L\'; if($trans()) ptrans = \'T\'; if($diag()) pdiag = \'U\'; $TFD(strtrs_,dtrtrs_)( &puplo, &ptrans, &pdiag, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref Solves a triangular system of the form A * X = B or A\' * X = B, where A is a triangular matrix of order N, and B is an N-by-NRHS matrix. A check is made to verify that A is nonsingular. Arguments ========= uplo: = 0: A is upper triangular; = 1: A is lower triangular. trans: Specifies the form of the system of equations: = 0: A * X = B (No transpose) = 1: A**T * X = B (Transpose) diag: = 0: A is non-unit triangular; = 1: A is unit triangular. A: The triangular matrix A. If uplo = 0, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = 1, the diagonal elements of A are also not referenced and are assumed to be 1. B: On entry, the right hand side matrix B. On exit, if info = 0, the solution matrix X. info = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, the i-th diagonal element of A is zero, indicating that the matrix is singular and the solutions X have not been computed. =for example # Assume $a is upper triangular $a = random (float, 100, 100); $b = random(50,100); $a = transpose($a); $b = transpose($b); $info = null; trtrs($a, 0, 0, 0, $b, $info); print("X is :\n".transpose($b))unless $info; '); pp_def("latrs", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); int trans(); int diag(); int normin();[io,phys]x(n); [o,phys]scale();[io,phys]cnorm(n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; char ptrans = \'N\'; char pdiag = \'N\'; char pnormin = \'N\'; types(F) %{ extern int slatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, float *a, integer *lda, float *x, float *scale, float *cnorm, integer *info); %} types(D) %{ extern int dlatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, double *a, integer *lda, double *x, double *scale, double *cnorm, integer *info); %} if($uplo()) puplo = \'L\'; if($trans()) ptrans = \'T\'; if($diag()) pdiag = \'U\'; if($normin()) pnormin = \'Y\'; $TFD(slatrs_,dlatrs_)( &puplo, &ptrans, &pdiag, &pnormin, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(x), $P(scale), $P(cnorm), $P(info)); ', Doc => ' =for ref Solves one of the triangular systems A *x = s*b or A\'*x = s*b with scaling to prevent overflow. Here A is an upper or lower triangular matrix, A\' denotes the transpose of A, x and b are n-element vectors, and s is a scaling factor, usually less than or equal to 1, chosen so that the components of x will be less than the overflow threshold. If the unscaled problem will not cause overflow, the Level 2 BLAS routine C<trsv> is called. If the matrix A is singular (A(j,j) = 0 for some j), then s is set to 0 and a non-trivial solution to A*x = 0 is returned. Further Details ======= ======= A rough bound on x is computed; if that is less than overflow, trsv is called, otherwise, specific code is used which checks for possible overflow or divide-by-zero at every operation. A columnwise scheme is used for solving A*x = b. The basic algorithm if A is lower triangular is x[1:n] := b[1:n] for j = 1, ..., n x(j) := x(j) / A(j,j) x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] end Define bounds on the components of x after j iterations of the loop: M(j) = bound on x[1:j] G(j) = bound on x[j+1:n] Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. Then for iteration j+1 we have M(j+1) <= G(j) / | A(j+1,j+1) | G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | <= G(j) ( 1 + cnorm(j+1) / | A(j+1,j+1) | ) where cnorm(j+1) is greater than or equal to the infinity-norm of column j+1 of A, not counting the diagonal. Hence G(j) <= G(0) product ( 1 + cnorm(i) / | A(i,i) | ) 1<=i<=j and |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + cnorm(i) / |A(i,i)| ) 1<=i< j Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the reciprocal of the largest M(j), j=1,..,n, is larger than max(underflow, 1/overflow). The bound on x(j) is also used to determine when a step in the columnwise method can be performed without fear of overflow. If the computed bound is greater than a large constant, x is scaled to prevent overflow, but if the bound overflows, x is set to 0, x(j) to 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. Similarly, a row-wise scheme is used to solve A\'*x = b. The basic algorithm for A upper triangular is for j = 1, ..., n x(j) := ( b(j) - A[1:j-1,j]\' * x[1:j-1] ) / A(j,j) end We simultaneously compute two bounds G(j) = bound on ( b(i) - A[1:i-1,i]\' * x[1:i-1] ), 1<=i<=j M(j) = bound on x(i), 1<=i<=j The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. Then the bound on x(j) is M(j) <= M(j-1) * ( 1 + cnorm(j) ) / | A(j,j) | <= M(0) * product ( ( 1 + cnorm(i) ) / |A(i,i)| ) 1<=i<=j and we can safely call trsv if 1/M(n) and 1/G(n) are both greater than max(underflow, 1/overflow). Arguments ========= uplo: Specifies whether the matrix A is upper or lower triangular. = 0: Upper triangular = 1: Lower triangular trans: Specifies the operation applied to A. = 0: Solve A * x = s*b (No transpose) = 1: Solve A\'* x = s*b (Transpose) diag: Specifies whether or not the matrix A is unit triangular. = 0: Non-unit triangular = 1: Unit triangular normin: Specifies whether cnorm has been set or not. = 1: cnorm contains the column norms on entry = 0: cnorm is not set on entry. On exit, the norms will be computed and stored in cnorm. A: The triangular matrix A. If uplo = 0, the leading n by n upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading n by n lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = 1, the diagonal elements of A are also not referenced and are assumed to be 1. x: On entry, the right hand side b of the triangular system. On exit, x is overwritten by the solution vector x. scale: The scaling factor s for the triangular system A * x = s*b or A\'* x = s*b. If scale = 0, the matrix A is singular or badly scaled, and the vector x is an exact or approximate solution to A*x = 0. cnorm: If normin = 0, cnorm is an output argument and cnorm(j) returns the 1-norm of the offdiagonal part of the j-th column of A. If normin = 1, cnorm is an input argument and cnorm(j) contains the norm of the off-diagonal part of the j-th column of A. If trans = 0, cnorm(j) must be greater than or equal to the infinity-norm, and if trans = 1, cnorm(j) must be greater than or equal to the 1-norm. info: = 0: successful exit < 0: if info = -k, the k-th argument had an illegal value =for example # Assume $a is upper triangular $a = random (float, 100, 100); $b = random(100); $a = transpose($a); $info = null; $scale= null; $cnorm = zeroes(100); latrs($a, 0, 0, 0, 0,$b, $scale, $cnorm,$info); '); pp_def("gecon", HandleBad => 0, Pars => '[phys]A(n,n); int norm(); [phys]anorm(); [o,phys]rcond();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pnorm = \'I\'; types(F) %{ extern int sgecon_(char *norm, integer *n, float *a, integer * lda, float *anorm, float *rcond, float *work, integer * iwork, integer *info); float *work = (float *) malloc(($PRIV(__n_size) * 4) * sizeof(float)); %} types(D) %{ extern int dgecon_(char *norm, integer *n, double *a, integer * lda, double *anorm, double *rcond, double *work, integer * iwork, integer *info); double *work = (double *) malloc(($PRIV(__n_size)*4) * sizeof(double)); %} integer *iwork = (integer *) malloc($PRIV(__n_size) * sizeof(integer)); if($norm()) pnorm = \'O\'; $TFD(sgecon_,dgecon_)( &pnorm, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(anorm), $P(rcond), work, iwork, $P(info)); free (work); free(iwork); ', Doc => ' =for ref Estimates the reciprocal of the condition number of a general real matrix A, in either the 1-norm or the infinity-norm, using the LU factorization computed by C<getrf>. An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as rcond = 1 / ( norm(A) * norm(inv(A)) ). Arguments ========= norm: Specifies whether the 1-norm condition number or the infinity-norm condition number is required: = 0: Infinity-norm. = 1: 1-norm; A: The factors L and U from the factorization A = P*L*U as computed by getrf. anorm: If norm = 0, the infinity-norm of the original matrix A. If norm = 1, the 1-norm of the original matrix A. rcond: The reciprocal of the condition number of the matrix A, computed as rcond = 1/(norm(A) * norm(inv(A))). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 100); $anorm = $a->lange(1); $ipiv = zeroes(long, 100); $info = null; getrf($a, $ipiv, $info); ($rcond, $info) = gecon($a, 1, $anorm) unless $info != 0; '); pp_def("sycon", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); int ipiv(n); [phys]anorm(); [o,phys]rcond();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int ssycon_(char *uplo, integer *n, float *a, integer * lda, integer *ipiv, float *anorm, float *rcond, float * work, integer *iwork, integer *info); float *work = (float *) malloc(($PRIV(__n_size) * 2) * sizeof(float)); %} types(D) %{ extern int dsycon_(char *uplo, integer *n, double *a, integer * lda, integer *ipiv, double *anorm, double *rcond, double * work, integer *iwork, integer *info); double *work = (double *) malloc(($PRIV(__n_size)*2) * sizeof(double)); %} integer *iwork = (integer *) malloc($PRIV(__n_size) * sizeof(integer)); if($uplo()) puplo = \'L\'; $TFD(ssycon_,dsycon_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ipiv), $P(anorm), $P(rcond), work, iwork, $P(info)); free (work); free(iwork); ', Doc => ' =for ref Estimates the reciprocal of the condition number (in the 1-norm) of a real symmetric matrix A using the factorization A = U*D*U\' or A = L*D*L\' computed by C<sytrf>. An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as rcond = 1 / (anorm * norm(inv(A))). Arguments ========= uplo: Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 0: Upper triangular, form is A = U*D*U\'; = 1: Lower triangular, form is A = L*D*L\'. A: The block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by sytrf. ipiv: Details of the interchanges and the block structure of D as determined by sytrf. anorm: The 1-norm of the original matrix A. rcond: The reciprocal of the condition number of the matrix A, computed as rcond = 1/(anorm * aimvnm), where ainvnm is an estimate of the 1-norm of inv(A) computed in this routine. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. =for example # Assume $a is symmetric $a = random (float, 100, 100); $anorm = $a->lansy(1,1); $ipiv = zeroes(long, 100); $info = null; sytrf($a, 1,$ipiv, $info); ($rcond, $info) = sycon($a, 1, $anorm) unless $info != 0; '); pp_def("pocon", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); [phys]anorm(); [o,phys]rcond();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; types(F) %{ extern int spocon_(char *uplo, integer *n, float *a, integer * lda, float *anorm, float *rcond, float *work, integer * iwork, integer *info); float *work = (float *) malloc(($PRIV(__n_size) * 3) * sizeof(float)); %} types(D) %{ extern int dpocon_(char *uplo, integer *n, double *a, integer * lda, double *anorm, double *rcond, double *work, integer * iwork, integer *info); double *work = (double *) malloc(($PRIV(__n_size)*3) * sizeof(double)); %} integer *iwork = (integer *) malloc($PRIV(__n_size) * sizeof(integer)); if($uplo()) puplo = \'L\'; $TFD(spocon_,dpocon_)( &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(anorm), $P(rcond), work, iwork, $P(info)); free (work); free(iwork); ', Doc => ' =for ref Estimates the reciprocal of the condition number (in the 1-norm) of a real symmetric positive definite matrix using the Cholesky factorization A = U\'*U or A = L*L\' computed by C<potrf>. An estimate is obtained for norm(inv(A)), and the reciprocal of the condition number is computed as rcond = 1 / (anorm * norm(inv(A))). Arguments ========= uplo: = 0: Upper triangle of A is stored; = 1: Lower triangle of A is stored. A: The triangular factor U or L from the Cholesky factorization A = U\'*U or A = L*L\', as computed by potrf. anorm: The 1-norm of the matrix A. rcond: The reciprocal of the condition number of the matrix A, computed as rcond = 1/(anorm * ainvnm), where ainvnm is an estimate of the 1-norm of inv(A) computed in this routine. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example # Assume $a is symmetric positive definite $a = random (float, 100, 100); $anorm = $a->lansy(1,1); $info = null; potrf($a, 0, $info); ($rcond, $info) = pocon($a, 1, $anorm) unless $info != 0; '); pp_def("trcon", HandleBad => 0, Pars => '[phys]A(n,n); int norm();int uplo();int diag(); [o,phys]rcond();int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char puplo = \'U\'; char pdiag = \'N\'; char pnorm = \'I\'; types(F) %{ extern int strcon_(char *norm, char *uplo, char *diag,integer *n, float *a, integer * lda, float *rcond, float *work, integer *iwork, integer *info); float *work = (float *) malloc(($PRIV(__n_size) * 3) * sizeof(float)); %} types(D) %{ extern int dtrcon_(char *norm, char *uplo, char *diag, integer *n, double *a, integer * lda, double *rcond, double * work, integer *iwork, integer *info); double *work = (double *) malloc(($PRIV(__n_size)*3) * sizeof(double)); %} integer *iwork = (integer *) malloc($PRIV(__n_size) * sizeof(integer)); if($uplo()) puplo = \'L\'; if($diag()) pdiag = \'U\'; if($norm()) pnorm = \'O\'; $TFD(strcon_,dtrcon_)( &pnorm, &puplo, &pdiag, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(rcond), work, iwork, $P(info)); free (work); free(iwork); ', Doc => ' =for ref Estimates the reciprocal of the condition number of a triangular matrix A, in either the 1-norm or the infinity-norm. The norm of A is computed and an estimate is obtained for norm(inv(A)), then the reciprocal of the condition number is computed as rcond = 1 / ( norm(A) * norm(inv(A)) ). Arguments ========= norm: Specifies whether the 1-norm condition number or the infinity-norm condition number is required: = 0: Infinity-norm. = 1: 1-norm; uplo: = 0: A is upper triangular; = 1: A is lower triangular. diag: = 0: A is non-unit triangular; = 1: A is unit triangular. A: The triangular matrix A. If uplo = 0, the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If diag = 1, the diagonal elements of A are also not referenced and are assumed to be 1. rcond: The reciprocal of the condition number of the matrix A, computed as rcond = 1/(norm(A) * norm(inv(A))). info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example # Assume $a is upper triangular $a = random (float, 100, 100); $info = null; ($rcond, $info) = trcon($a, 1, 1, 0) unless $info != 0; '); pp_def("geqp3", HandleBad => 0, Pars => '[io,phys]A(m,n); int [io,phys]jpvt(n); [o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgeqp3_(integer *m, integer *n, float *a, integer * lda, integer *jpvt, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgeqp3_(integer *m, integer *n, double *a, integer * lda, integer *jpvt, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgeqp3_,dgeqp3_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(jpvt), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgeqp3_,dgeqp3_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(jpvt), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref geqp3 computes a QR factorization using Level 3 BLAS with column pivoting of a matrix A: A*P = Q*R The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v\' where tau is a real/complex scalar, and v is a real/complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in tau(i). Arguments ========= A: On entry, the M-by-N matrix A. On exit, the upper triangle of the array contains the min(M,N)-by-N upper trapezoidal matrix R; the elements below the diagonal, together with the array tau, represent the orthogonal matrix Q as a product of min(M,N) elementary reflectors. jpvt: On entry, if jpvt(J)!=0, the J-th column of A is permuted to the front of A*P (a leading column); if jpvt(J)=0, the J-th column of A is a free column. On exit, if jpvt(J)=K, then the J-th column of A*P was the the K-th column of A. tau: The scalar factors of the elementary reflectors. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); $jpvt = zeroes(long, 50); geqp3($a, $jpvt, $tau, $info); '); pp_def("geqrf", HandleBad => 0, Pars => '[io,phys]A(m,n); [o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgeqrf_(integer *m, integer *n, float *a, integer * lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgeqrf_(integer *m, integer *n, double *a, integer * lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgeqrf_,dgeqrf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgeqrf_,dgeqrf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref geqrf computes a QR factorization of a matrix A: A = Q * R The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v\' where tau is a real/complex scalar, and v is a real/complex vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), and tau in tau(i). Arguments ========= A: On exit, the elements on and above the diagonal of the array contain the min(M,N)-by-N upper trapezoidal matrix R (R is upper triangular if m >= n); the elements below the diagonal, with the array TAU, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors. tau: The scalar factors of the elementary reflectors. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); geqrf($a, $tau, $info); '); pp_def("orgqr", HandleBad => 0, Pars => '[io,phys]A(m,n); [phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sorgqr_(integer *m, integer *n, integer *k, float * a, integer *lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dorgqr_(integer *m, integer *n, integer *k, double * a, integer *lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sorgqr_,dorgqr_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sorgqr_,dorgqr_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Generates an M-by-N real matrix Q with orthonormal columns, which is defined as the first N columns of a product of K elementary reflectors of order M Q = H(1) H(2) . . . H(k) as returned by geqrf or geqp3. Arguments ========= A: On entry, the i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by geqrf or geqp3 in the first k columns of its array argument A. On exit, the M-by-N matrix Q. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by geqrf or geqp3. info: = 0: successful exit < 0: if info = -i, the i-th argument has an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); geqrf($a, $tau, $info); orgqr($a, $tau, $info) unless $info != 0; '); pp_def("ormqr", HandleBad => 0, Pars => '[phys]A(p,k); int side(); int trans(); [phys]tau(k); [io,phys]C(m,n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans = \'N\', pside = \'L\'; integer lwork = -1; types(F) %{ extern int sormqr_(char *side, char *trans, integer *m, integer *n, integer *k, float *a, integer *lda, float *tau, float * c__, integer *ldc, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dormqr_(char *side, char *trans, integer *m, integer *n, integer *k, double *a, integer *lda, double *tau, double * c__, integer *ldc, double *work, integer *lwork, integer *info); double tmp_work; %} if($trans()) ptrans = \'T\'; if($side()) pside = \'R\'; $TFD(sormqr_,dormqr_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__p_size), $P(tau), $P(C), &$PRIV(__m_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sormqr_,dormqr_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__p_size), $P(tau), $P(C), &$PRIV(__m_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Overwrites the general real M-by-N matrix C with side = 0 side = 1 trans = 0: Q * C C * Q trans = 1: Q\' * C C * Q\' where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(1) H(2) . . . H(k) as returned by geqrf or geqp3. Q is of order M if C<side> = 0 and of order N if C<side> = 1. Arguments ========= side: = 0: apply Q or Q\' from the Left; = 1: apply Q or Q\' from the Right. trans: = 0: No transpose, apply Q; = 1: Transpose, apply Q\'. A: The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by geqrf or geqp3 in the first k columns of its array argument A. A is modified by the routine but restored on exit. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by geqrf or geqp3. C: On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q\'*C or C*Q\' or C*Q. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 50, 100); $a = transpose($a); $info = null; $tau = zeroes(float, 50); geqrf($a, $tau, $info); $c = random(70,50); # $c will contain the result $c->reshape(70,100); $c = transpose($c); ormqr($a, $tau, $c, $info); '); pp_def("gelqf", HandleBad => 0, Pars => '[io,phys]A(m,n); [o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgelqf_(integer *m, integer *n, float *a, integer * lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgelqf_(integer *m, integer *n, double *a, integer * lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgelqf_,dgelqf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgelqf_,dgelqf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes an LQ factorization of a real M-by-N matrix A: A = L * Q. The matrix Q is represented as a product of elementary reflectors Q = H(k) . . . H(2) H(1), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v\' where tau is a real scalar, and v is a real vector with v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), and tau in tau(i). Arguments ========= A: On entry, the M-by-N matrix A. On exit, the elements on and below the diagonal of the array contain the m-by-min(m,n) lower trapezoidal matrix L (L is lower triangular if m <= n); the elements above the diagonal, with the array tau, represent the orthogonal matrix Q as a product of elementary reflectors. tau: The scalar factors of the elementary reflectors. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); gelqf($a, $tau, $info); '); pp_def("orglq", HandleBad => 0, Pars => '[io,phys]A(m,n); [phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sorglq_(integer *m, integer *n, integer *k, float * a, integer *lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dorglq_(integer *m, integer *n, integer *k, double * a, integer *lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sorglq_,dorglq_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sorglq_,dorglq_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Generates an M-by-N real matrix Q with orthonormal rows, which is defined as the first M rows of a product of K elementary reflectors of order N Q = H(k) . . . H(2) H(1) as returned by gelqf. Arguments ========= A: On entry, the i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by gelqf in the first k rows of its array argument A. On exit, the M-by-N matrix Q. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by gelqf. info: = 0: successful exit < 0: if info = -i, the i-th argument has an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); gelqf($a, $tau, $info); orglq($a, $tau, $info) unless $info != 0; '); pp_def("ormlq", HandleBad => 0, Pars => '[phys]A(k,p); int side(); int trans(); [phys]tau(k); [io,phys]C(m,n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans = \'N\', pside = \'L\'; integer lwork = -1; types(F) %{ extern int sormlq_(char *side, char *trans, integer *m, integer *n, integer *k, float *a, integer *lda, float *tau, float * c__, integer *ldc, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dormlq_(char *side, char *trans, integer *m, integer *n, integer *k, double *a, integer *lda, double *tau, double * c__, integer *ldc, double *work, integer *lwork, integer *info); double tmp_work; %} if($trans()) ptrans = \'T\'; if($side()) pside = \'R\'; $TFD(sormlq_,dormlq_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__k_size), $P(tau), $P(C), &$PRIV(__m_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sormlq_,dormlq_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__k_size), $P(tau), $P(C), &$PRIV(__m_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Overwrites the general real M-by-N matrix C with side = 0 side = 1 trans = 0: Q * C C * Q trans = 1: Q\' * C C * Q\' where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(k) . . . H(2) H(1) as returned by gelqf. Q is of order M if C<side> = 0 and of order N if C<side> = 1. Arguments ========= side: = 0: apply Q or Q\' from the Left; = 1: apply Q or Q\' from the Right. trans: = 0: No transpose, apply Q; = 1: Transpose, apply Q\'. A: The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by gelqf in the first k rows of its array argument A. A is modified by the routine but restored on exit. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by gelqf. C: On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q\'*C or C*Q\' or C*Q. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 50, 100); $a = transpose($a); $info = null; $tau = zeroes(float, 50); gelqf($a, $tau, $info); $c = random(70,50); # $c will contain the result $c->reshape(70,100); $c = transpose($c); ormlq($a, $tau, $c, $info); '); pp_def("geqlf", HandleBad => 0, Pars => '[io,phys]A(m,n); [o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgeqlf_(integer *m, integer *n, float *a, integer * lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgeqlf_(integer *m, integer *n, double *a, integer * lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgeqlf_,dgeqlf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgeqlf_,dgeqlf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes a QL factorization of a real M-by-N matrix A: A = Q * L The matrix Q is represented as a product of elementary reflectors Q = H(k) . . . H(2) H(1), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v\' where tau is a real scalar, and v is a real vector with v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in A(1:m-k+i-1,n-k+i), and tau in TAU(i). Arguments ========= A: On entry, the M-by-N matrix A. On exit, if m >= n, the lower triangle of the subarray A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; if m <= n, the elements on and below the (n-m)-th superdiagonal contain the M-by-N lower trapezoidal matrix L; the remaining elements, with the array tau, represent the orthogonal matrix Q as a product of elementary reflectors. tau: The scalar factors of the elementary reflectors. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); geqlf($a, $tau, $info); '); pp_def("orgql", HandleBad => 0, Pars => '[io,phys]A(m,n); [phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sorgql_(integer *m, integer *n, integer *k, float * a, integer *lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dorgql_(integer *m, integer *n, integer *k, double * a, integer *lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sorgql_,dorgql_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sorgql_,dorgql_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Generates an M-by-N real matrix Q with orthonormal columns, which is defined as the last N columns of a product of K elementary reflectors of order M Q = H(k) . . . H(2) H(1) as returned by geqlf. Arguments ========= A: On entry, the (n-k+i)-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by geqlf in the last k columns of its array argument A. On exit, the M-by-N matrix Q. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by geqlf. info: = 0: successful exit < 0: if info = -i, the i-th argument has an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); geqlf($a, $tau, $info); orgql($a, $tau, $info) unless $info != 0; '); pp_def("ormql", HandleBad => 0, Pars => '[phys]A(p,k); int side(); int trans(); [phys]tau(k); [io,phys]C(m,n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans = \'N\', pside = \'L\'; integer lwork = -1; types(F) %{ extern int sormql_(char *side, char *trans, integer *m, integer *n, integer *k, float *a, integer *lda, float *tau, float * c__, integer *ldc, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dormql_(char *side, char *trans, integer *m, integer *n, integer *k, double *a, integer *lda, double *tau, double * c__, integer *ldc, double *work, integer *lwork, integer *info); double tmp_work; %} if($trans()) ptrans = \'T\'; if($side()) pside = \'R\'; $TFD(sormql_,dormql_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__p_size), $P(tau), $P(C), &$PRIV(__m_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sormql_,dormql_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__p_size), $P(tau), $P(C), &$PRIV(__m_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Overwrites the general real M-by-N matrix C with side = 0 side = 1 trans = 0: Q * C C * Q trans = 1: Q\' * C C * Q\' where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(k) . . . H(2) H(1) as returned by geqlf. Q is of order M if C<side> = 0 and of order N if C<side> = 1. Arguments ========= side: = 0: apply Q or Q\' from the Left; = 1: apply Q or Q\' from the Right. trans: = 0: No transpose, apply Q; = 1: Transpose, apply Q\'. A: The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by geqlf in the last k rows of its array argument A. A is modified by the routine but restored on exit. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by geqlf. C: On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q\'*C or C*Q\' or C*Q. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 50, 100); $a = transpose($a); $info = null; $tau = zeroes(float, 50); geqlf($a, $tau, $info); $c = random(70,50); # $c will contain the result $c->reshape(70,100); $c = transpose($c); ormql($a, $tau, $c, $info); '); pp_def("gerqf", HandleBad => 0, Pars => '[io,phys]A(m,n); [o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgerqf_(integer *m, integer *n, float *a, integer * lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgerqf_(integer *m, integer *n, double *a, integer * lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgerqf_,dgerqf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgerqf_,dgerqf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes an RQ factorization of a real M-by-N matrix A: A = R * Q. The matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(k), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v\' where tau is a real scalar, and v is a real vector with v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). Arguments ========= A: On entry, the M-by-N matrix A. On exit, if m <= n, the upper triangle of the subarray A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; if m >= n, the elements on and above the (m-n)-th subdiagonal contain the M-by-N upper trapezoidal matrix R; the remaining elements, with the array tau, represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors (see Further Details). tau: The scalar factors of the elementary reflectors. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); gerqf($a, $tau, $info); '); pp_def("orgrq", HandleBad => 0, Pars => '[io,phys]A(m,n); [phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sorgrq_(integer *m, integer *n, integer *k, float * a, integer *lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dorgrq_(integer *m, integer *n, integer *k, double * a, integer *lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sorgrq_,dorgrq_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sorgrq_,dorgrq_)( &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Generates an M-by-N real matrix Q with orthonormal rows, which is defined as the last M rows of a product of K elementary reflectors of order N Q = H(1) H(2) . . . H(k) as returned by gerqf. Arguments ========= A: On entry, the (m-k+i)-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by gerqf in the last k rows of its array argument A. On exit, the M-by-N matrix Q. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by gerqf. info: = 0: successful exit < 0: if info = -i, the i-th argument has an illegal value =for example $a = random (float, 100, 50); $info = null; $tau = zeroes(float, 50); gerqf($a, $tau, $info); orgrq($a, $tau, $info) unless $info != 0; '); pp_def("ormrq", HandleBad => 0, Pars => '[phys]A(k,p); int side(); int trans(); [phys]tau(k); [io,phys]C(m,n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans = \'N\', pside = \'L\'; integer lwork = -1; types(F) %{ extern int sormrq_(char *side, char *trans, integer *m, integer *n, integer *k, float *a, integer *lda, float *tau, float * c__, integer *ldc, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dormrq_(char *side, char *trans, integer *m, integer *n, integer *k, double *a, integer *lda, double *tau, double * c__, integer *ldc, double *work, integer *lwork, integer *info); double tmp_work; %} if($trans()) ptrans = \'T\'; if($side()) pside = \'R\'; $TFD(sormrq_,dormrq_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__k_size), $P(tau), $P(C), &$PRIV(__m_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sormrq_,dormrq_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), $P(A), &$PRIV(__k_size), $P(tau), $P(C), &$PRIV(__m_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Overwrites the general real M-by-N matrix C with side = 0 side = 1 trans = 0: Q * C C * Q trans = 1: Q\' * C C * Q\' where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(1) H(2) . . . H(k) as returned by gerqf. Q is of order M if C<side> = 0 and of order N if C<side> = 1. Arguments ========= side: = 0: apply Q or Q\' from the Left; = 1: apply Q or Q\' from the Right. trans: = 0: No transpose, apply Q; = 1: Transpose, apply Q\'. A: The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by gerqf in the last k rows of its array argument A. A is modified by the routine but restored on exit. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by gerqf. C: On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q\'*C or C*Q\' or C*Q. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 50, 100); $a = transpose($a); $info = null; $tau = zeroes(float, 50); gerqf($a, $tau, $info); $c = random(70,50); # $c will contain the result $c->reshape(70,100); $c = transpose($c); ormrq($a, $tau, $c, $info); '); pp_def("tzrzf", HandleBad => 0, Pars => '[io,phys]A(m,n); [o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int stzrzf_(integer *m, integer *n, float *a, integer * lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dtzrzf_(integer *m, integer *n, double *a, integer * lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(stzrzf_,dtzrzf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(stzrzf_,dtzrzf_)( &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Reduces the M-by-N ( M <= N ) real upper trapezoidal matrix A to upper triangular form by means of orthogonal transformations. The upper trapezoidal matrix A is factored as A = ( R 0 ) * Z, where Z is an N-by-N orthogonal matrix and R is an M-by-M upper triangular matrix. The factorization is obtained by Householder\'s method. The kth transformation matrix, Z( k ), which is used to introduce zeros into the ( m - k + 1 )th row of A, is given in the form Z( k ) = ( I 0 ), ( 0 T( k ) ) where T( k ) = I - tau*u( k )*u( k )\', u( k ) = ( 1 ), ( 0 ) ( z( k ) ) tau is a scalar and z( k ) is an ( n - m ) element vector. tau and z( k ) are chosen to annihilate the elements of the kth row of X. The scalar tau is returned in the kth element of C<tau> and the vector u( k ) in the kth row of A, such that the elements of z( k ) are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in the upper triangular part of A. Z is given by Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). Arguments ========= A: On entry, the leading M-by-N upper trapezoidal part of the array A must contain the matrix to be factorized. On exit, the leading M-by-M upper triangular part of A contains the upper triangular matrix R, and elements M+1 to N of the first M rows of A, with the array tau, represent the orthogonal matrix Z as a product of M elementary reflectors. tau: The scalar factors of the elementary reflectors. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 50, 100); $info = null; $tau = zeroes(float, 50); tzrzf($a, $tau, $info); '); pp_def("ormrz", HandleBad => 0, Pars => '[phys]A(k,p); int side(); int trans(); [phys]tau(k); [io,phys]C(m,n);int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char ptrans = \'N\', pside = \'L\'; integer lwork = -1; integer kk = $SIZE(p) - $SIZE(k); types(F) %{ extern int sormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, float *a, integer *lda, float *tau, float * c__, integer *ldc, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dormrz_(char *side, char *trans, integer *m, integer *n, integer *k, integer *l, double *a, integer *lda, double *tau, double * c__, integer *ldc, double *work, integer *lwork, integer *info); double tmp_work; %} if($trans()) ptrans = \'T\'; if($side()) pside = \'R\'; $TFD(sormrz_,dormrz_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), &kk, $P(A), &$PRIV(__k_size), $P(tau), $P(C), &$PRIV(__m_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sormrz_,dormrz_)( &pside, &ptrans, &$PRIV(__m_size), &$PRIV(__n_size), &$PRIV(__k_size), &kk, $P(A), &$PRIV(__k_size), $P(tau), $P(C), &$PRIV(__m_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Overwrites the general real M-by-N matrix C with side = 0 side = 1 trans = 0: Q * C C * Q trans = 1: Q\' * C C * Q\' where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(1) H(2) . . . H(k) as returned by tzrzf. Q is of order M if C<side> = 0 and of order N if C<side> = 1. Arguments ========= side: = 0: apply Q or Q\' from the Left; = 1: apply Q or Q\' from the Right. trans: = 0: No transpose, apply Q; = 1: Transpose, apply Q\'. A: The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by tzrzf in the last k rows of its array argument A. A is modified by the routine but restored on exit. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by tzrzf. C: On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q\'*C or C*Q\' or C*Q. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (float, 50, 100); $a = transpose($a); $info = null; $tau = zeroes(float, 50); tzrzf($a, $tau, $info); $c = random(70,50); # $c will contain the result $c->reshape(70,100); $c = transpose($c); ormrz($a, $tau, $c, $info); '); pp_def("gehrd", HandleBad => 0, Pars => '[io,phys]A(n,n); int [phys]ilo();int [phys]ihi();[o,phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sgehrd_(integer *n, integer *ilo, integer *ihi, float *a, integer *lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dgehrd_(integer *n, integer *ilo, integer *ihi, double *a, integer *lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sgehrd_,dgehrd_)( &$PRIV(__n_size), $P(ilo), $P(ihi), $P(A), &$PRIV(__n_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sgehrd_,dgehrd_)( &$PRIV(__n_size), $P(ilo), $P(ihi), $P(A), &$PRIV(__n_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Reduces a real general matrix A to upper Hessenberg form H by an orthogonal similarity transformation: Q\' * A * Q = H . Further Details =============== The matrix Q is represented as a product of (ihi-ilo) elementary reflectors Q = H(ilo) H(ilo+1) . . . H(ihi-1). Each H(i) has the form H(i) = I - tau * v * v\' where tau is a real scalar, and v is a real vector with v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on exit in A(i+2:ihi,i), and tau in tau(i). The contents of A are illustrated by the following example, with n = 7, ilo = 2 and ihi = 6: on entry, on exit, ( a a a a a a a ) ( a a h h h h a ) ( a a a a a a ) ( a h h h h a ) ( a a a a a a ) ( h h h h h h ) ( a a a a a a ) ( v2 h h h h h ) ( a a a a a a ) ( v2 v3 h h h h ) ( a a a a a a ) ( v2 v3 v4 h h h ) ( a ) ( a ) where a denotes an element of the original matrix A, h denotes a modified element of the upper Hessenberg matrix H, and vi denotes an element of the vector defining H(i). Arguments ========= ilo: ihi: It is assumed that A is already upper triangular in rows and columns 1:ilo-1 and ihi+1:N. ilo and ihi are normally set by a previous call to gebal; otherwise they should be set to 1 and N respectively. See Further Details. 1 <= ilo <= ihi <= N, if N > 0; ilo=1 and ihi=0, if N=0. A: On entry, the N-by-N general matrix to be reduced. On exit, the upper triangle and the first subdiagonal of A are overwritten with the upper Hessenberg matrix H, and the elements below the first subdiagonal, with the array tau, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. tau: The scalar factors of the elementary reflectors (see Further Details). Elements 1:ilo-1 and ihi:N-1 of tau are set to zero. (dimension (N-1)) info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random (50, 50); $info = null; $tau = zeroes(50); gehrd($a, 1, 50, $tau, $info); '); pp_def("orghr", HandleBad => 0, Pars => '[io,phys]A(n,n); int [phys]ilo();int [phys]ihi();[phys]tau(k); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' integer lwork = -1; types(F) %{ extern int sorghr_(integer *n, integer *ilo, integer *ihi, float *a, integer *lda, float *tau, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dorghr_(integer *n, integer *ilo, integer *ihi, double *a, integer *lda, double *tau, double *work, integer *lwork, integer *info); double tmp_work; %} $TFD(sorghr_,dorghr_)( &$PRIV(__n_size), $P(ilo), $P(ihi), $P(A), &$PRIV(__n_size), $P(tau), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(sorghr_,dorghr_)( &$PRIV(__n_size), $P(ilo), $P(ihi), $P(A), &$PRIV(__n_size), $P(tau), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Generates a real orthogonal matrix Q which is defined as the product of ihi-ilo elementary reflectors of order N, as returned by C<gehrd>: Q = H(ilo) H(ilo+1) . . . H(ihi-1). Arguments ========= ilo: ihi: ilo and ihi must have the same values as in the previous call of gehrd. Q is equal to the unit matrix except in the submatrix Q(ilo+1:ihi,ilo+1:ihi). 1 <= ilo <= ihi <= N, if N > 0; ilo=1 and ihi=0, if N=0. A: On entry, the vectors which define the elementary reflectors, as returned by gehrd. On exit, the N-by-N orthogonal matrix Q. tau: tau(i) must contain the scalar factor of the elementary reflector H(i), as returned by gehrd.(dimension (N-1)) info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (50, 50); $info = null; $tau = zeroes(50); gehrd($a, 1, 50, $tau, $info); orghr($a, 1, 50, $tau, $info); '); pp_def("hseqr", HandleBad => 0, Pars => '[io,phys]H(n,n); int job();int compz();int [phys]ilo();int [phys]ihi();[o,phys]wr(n); [o,phys]wi(n);[o,phys]Z(m,m); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pcompz; char pjob = \'E\'; integer lwork = -1; types(F) %{ extern int shseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, float *h__, integer *ldh, float *wr, float *wi, float *z__, integer *ldz, float *work, integer *lwork, integer *info); float tmp_work; %} types(D) %{ extern int dhseqr_(char *job, char *compz, integer *n, integer *ilo, integer *ihi, double *h__, integer *ldh, double *wr, double *wi, double *z__, integer *ldz, double *work, integer *lwork, integer *info); double tmp_work; %} if($job()) pjob = \'S\'; switch ($compz()) { case 1: pcompz = \'I\'; break; case 2: pcompz = \'V\'; break; default: pcompz = \'N\'; } $TFD(shseqr_,dhseqr_)( &pjob, &pcompz, &$PRIV(__n_size), $P(ilo), $P(ihi), $P(H), &$PRIV(__n_size), $P(wr), $P(wi), $P(Z), &$PRIV(__m_size), &tmp_work, &lwork, $P(info)); lwork = (integer )tmp_work; { types(F) %{ float *work = (float *)malloc(lwork * sizeof(float)); %} types(D) %{ double *work = (double *)malloc(lwork * sizeof(double)); %} $TFD(shseqr_,dhseqr_)( &pjob, &pcompz, &$PRIV(__n_size), $P(ilo), $P(ihi), $P(H), &$PRIV(__n_size), $P(wr), $P(wi), $P(Z), &$PRIV(__m_size), work, &lwork, $P(info)); free(work); } ', Doc => ' =for ref Computes the eigenvalues of a real upper Hessenberg matrix H and, optionally, the matrices T and Z from the Schur decomposition H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur form), and Z is the orthogonal matrix of Schur vectors. Optionally Z may be postmultiplied into an input orthogonal matrix Q, so that this routine can give the Schur factorization of a matrix A which has been reduced to the Hessenberg form H by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. Arguments ========= job: = 0: compute eigenvalues only; = 1: compute eigenvalues and the Schur form T. compz: = 0: no Schur vectors are computed; = 1: Z is initialized to the unit matrix and the matrix Z of Schur vectors of H is returned; = 2: Z must contain an orthogonal matrix Q on entry, and the product Q*Z is returned. ilo: ihi: It is assumed that H is already upper triangular in rows and columns 1:ilo-1 and ihi+1:N. ilo and ihi are normally set by a previous call to gebal, and then passed to gehrd when the matrix output by gebal is reduced to Hessenberg form. Otherwise ilo and ihi should be set to 1 and N respectively. 1 <= ilo <= ihi <= N, if N > 0; ilo=1 and ihi=0, if N=0. H: On entry, the upper Hessenberg matrix H. On exit, if job = 1, H contains the upper quasi-triangular matrix T from the Schur decomposition (the Schur form); 2-by-2 diagonal blocks (corresponding to complex conjugate pairs of eigenvalues) are returned in standard form, with H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If job = 0, the contents of H are unspecified on exit. wr: wi: The real and imaginary parts, respectively, of the computed eigenvalues. If two eigenvalues are computed as a complex conjugate pair, they are stored in consecutive elements of wr and wi, say the i-th and (i+1)th, with wi(i) > 0 and wi(i+1) < 0. If job = 1, the eigenvalues are stored in the same order as on the diagonal of the Schur form returned in H, with wr(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal block, wi(i) = sqrt(H(i+1,i)*H(i,i+1)) and wi(i+1) = -wi(i). Z: If compz = 0: Z is not referenced. If compz = 1: on entry, Z need not be set, and on exit, Z contains the orthogonal matrix Z of the Schur vectors of H. If compz = 2: on entry Z must contain an N-by-N matrix Q, which is assumed to be equal to the unit matrix except for the submatrix Z(ilo:ihi,ilo:ihi); on exit Z contains Q*Z. Normally Q is the orthogonal matrix generated by orghr after the call to gehrd which formed the Hessenberg matrix H. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value > 0: if info = i, hseqr failed to compute all of the eigenvalues in a total of 30*(ihi-ilo+1) iterations; elements 1:ilo-1 and i+1:n of wr and wi contain those eigenvalues which have been successfully computed. =for example $a = random (50, 50); $info = null; $tau = zeroes(50); $z= zeroes(1,1); gehrd($a, 1, 50, $tau, $info); hseqr($a,0,0,1,50,($wr=null),($wi=null),$z,$info); '); pp_def("trevc", HandleBad => 0, Pars => '[io,phys]T(n,n); int side();int howmny();int [phys]select(q);[io,phys]VL(m,r); [io,phys]VR(p,s);int [o,phys]m(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pside,phowmny; integer mm = 0; types(F) %{ extern int strevc_(char *side, char *howmny, logical *select, integer *n, float *t, integer *ldt, float *vl, integer * ldvl, float *vr, integer *ldvr, integer *mm, integer *m, float *work, integer *info); float *work = (float *) malloc(3* $SIZE(n) *sizeof(float)); %} types(D) %{ extern int dtrevc_(char *side, char *howmny, logical *select, integer *n, double *t, integer *ldt, double *vl, integer * ldvl, double *vr, integer *ldvr, integer *mm, integer *m, double *work, integer *info); double *work = (double *) malloc (3 * $SIZE(n) * sizeof(double)); %} switch ($howmny()) { case 1: phowmny = \'B\'; break; case 2: phowmny = \'S\'; break; default: phowmny = \'A\'; } switch ($side()) { case 1: pside = \'R\'; mm = $SIZE(s); break; case 2: pside = \'L\'; mm = $SIZE(r); break; default:pside = \'B\'; mm = $SIZE(s); } $TFD(strevc_,dtrevc_)( &pside, &phowmny, $P(select), &$PRIV(__n_size), $P(T), &$PRIV(__n_size), $P(VL), &$PRIV(__m_size), $P(VR), &$PRIV(__p_size), &mm, $P(m), work, $P(info)); free(work); ', Doc => ' =for ref Computes some or all of the right and/or left eigenvectors of a real upper quasi-triangular matrix T. The right eigenvector x and the left eigenvector y of T corresponding to an eigenvalue w are defined by: T*x = w*x, y\'*T = w*y\' where y\' denotes the conjugate transpose of the vector y. If all eigenvectors are requested, the routine may either return the matrices X and/or Y of right or left eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an input orthogonal matrix. If T was obtained from the real-Schur factorization of an original matrix A = Q*T*Q\', then Q*X and Q*Y are the matrices of right or left eigenvectors of A. T must be in Schur canonical form (as returned by hseqr), that is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block has its diagonal elements equal and its off-diagonal elements of opposite sign. Corresponding to each 2-by-2 diagonal block is a complex conjugate pair of eigenvalues and eigenvectors; only one eigenvector of the pair is computed, namely the one corresponding to the eigenvalue with positive imaginary part. Further Details =============== The algorithm used in this program is basically backward (forward) substitution, with scaling to make the the code robust against possible overflow. Each eigenvector is normalized so that the element of largest magnitude has magnitude 1; here the magnitude of a complex number (x,y) is taken to be |x| + |y|. Arguments ========= side: = 0 : compute both right and left eigenvectors; = 1 : compute right eigenvectors only; = 2 : compute left eigenvectors only. howmny: = 0: compute all right and/or left eigenvectors; = 1: compute all right and/or left eigenvectors, and backtransform them using the input matrices supplied in VR and/or VL; = 2: compute selected right and/or left eigenvectors, specified by the logical array select. select: If howmny = 2, select specifies the eigenvectors to be computed. If howmny = 0 or 1, select is not referenced. To select the real eigenvector corresponding to a real eigenvalue w(j), select(j) must be set to TRUE. To select the complex eigenvector corresponding to a complex conjugate pair w(j) and w(j+1), either select(j) or select(j+1) must be set to TRUE; then on exit select(j) is TRUE and select(j+1) is FALSE. T: The upper quasi-triangular matrix T in Schur canonical form. VL: On entry, if side = 2 or 0 and howmny = 1, VL must contain an N-by-N matrix Q (usually the orthogonal matrix Q of Schur vectors returned by hseqr). On exit, if side = 2 or 0, VL contains: if howmny = 0, the matrix Y of left eigenvectors of T; VL has the same quasi-lower triangular form as T\'. If T(i,i) is a real eigenvalue, then the i-th column VL(i) of VL is its corresponding eigenvector. If T(i:i+1,i:i+1) is a 2-by-2 block whose eigenvalues are complex-conjugate eigenvalues of T, then VL(i)+sqrt(-1)*VL(i+1) is the complex eigenvector corresponding to the eigenvalue with positive real part. if howmny = 1, the matrix Q*Y; if howmny = 2, the left eigenvectors of T specified by select, stored consecutively in the columns of VL, in the same order as their eigenvalues. A complex eigenvector corresponding to a complex eigenvalue is stored in two consecutive columns, the first holding the real part, and the second the imaginary part. If side = 1, VL is not referenced. VR: On entry, if side = 1 or 0 and howmny = 1, VR must contain an N-by-N matrix Q (usually the orthogonal matrix Q of Schur vectors returned by hseqr). On exit, if side = 1 or 0, VR contains: if howmny = 0, the matrix X of right eigenvectors of T; VR has the same quasi-upper triangular form as T. If T(i,i) is a real eigenvalue, then the i-th column VR(i) of VR is its corresponding eigenvector. If T(i:i+1,i:i+1) is a 2-by-2 block whose eigenvalues are complex-conjugate eigenvalues of T, then VR(i)+sqrt(-1)*VR(i+1) is the complex eigenvector corresponding to the eigenvalue with positive real part. if howmny = 1, the matrix Q*X; if howmny = 2, the right eigenvectors of T specified by select, stored consecutively in the columns of VR, in the same order as their eigenvalues. A complex eigenvector corresponding to a complex eigenvalue is stored in two consecutive columns, the first holding the real part and the second the imaginary part. If side = 2, VR is not referenced. m: The number of columns in the arrays VL and/or VR actually used to store the eigenvectors. If howmny = 0 or 1, m is set to N. Each selected real eigenvector occupies one column and each selected complex eigenvector occupies two columns. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random (50, 50); $info = null; $tau = zeroes(50); $z= zeroes(1,1); gehrd($a, 1, 50, $tau, $info); hseqr($a,0,0,1,50,($wr=null),($wi=null),$z,$info); '); pp_def("tgevc", HandleBad => 0, Pars => '[io,phys]A(n,n); int side();int howmny();[io,phys]B(n,n);int [phys]select(q);[io,phys]VL(m,r); [io,phys]VR(p,s);int [o,phys]m(); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pside,phowmny; integer mm = 0; types(F) %{ extern int stgevc_(char *side, char *howmny, logical *select, integer *n, float *a, integer *lda, float *b,integer *ldb, float *vl, integer *ldvl, float *vr, integer *ldvr, integer *mm, integer *m, float *work, integer *info); float *work = (float *) malloc(6* $SIZE(n) *sizeof(float)); %} types(D) %{ extern int dtgevc_(char *side, char *howmny, logical *select, integer *n, double *a, integer *lda, double *b, integer *ldb, double *vl, integer *ldvl, double *vr, integer *ldvr, integer *mm, integer *m, double *work, integer *info); double *work = (double *) malloc (6 * $SIZE(n) * sizeof(double)); %} switch ($howmny()) { case 1: phowmny = \'B\'; break; case 2: phowmny = \'S\'; break; default: phowmny = \'A\'; } switch ($side()) { case 1: pside = \'R\'; mm = $SIZE(s); break; case 2: pside = \'L\'; mm = $SIZE(r); break; default:pside = \'B\'; mm = $SIZE(s); } $TFD(stgevc_,dtgevc_)( &pside, &phowmny, $P(select), &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(B), &$PRIV(__n_size), $P(VL), &$PRIV(__m_size), $P(VR), &$PRIV(__p_size), &mm, $P(m), work, $P(info)); free(work); ', Doc => ' =for ref Computes some or all of the right and/or left generalized eigenvectors of a pair of real upper triangular matrices (A,B). The right generalized eigenvector x and the left generalized eigenvector y of (A,B) corresponding to a generalized eigenvalue w are defined by: (A - wB) * x = 0 and y**H * (A - wB) = 0 where y**H denotes the conjugate tranpose of y. If an eigenvalue w is determined by zero diagonal elements of both A and B, a unit vector is returned as the corresponding eigenvector. If all eigenvectors are requested, the routine may either return the matrices X and/or Y of right or left eigenvectors of (A,B), or the products Z*X and/or Q*Y, where Z and Q are input orthogonal matrices. If (A,B) was obtained from the generalized real-Schur factorization of an original pair of matrices (A0,B0) = (Q*A*Z**H,Q*B*Z**H), then Z*X and Q*Y are the matrices of right or left eigenvectors of A. A must be block upper triangular, with 1-by-1 and 2-by-2 diagonal blocks. Corresponding to each 2-by-2 diagonal block is a complex conjugate pair of eigenvalues and eigenvectors; only one eigenvector of the pair is computed, namely the one corresponding to the eigenvalue with positive imaginary part. Arguments ========= side: = 0 : compute both right and left eigenvectors; = 1 : compute right eigenvectors only; = 2 : compute left eigenvectors only. howmny: = 0 : compute all right and/or left eigenvectors; = 1 : compute all right and/or left eigenvectors, and backtransform them using the input matrices supplied in VR and/or VL; = 2 : compute selected right and/or left eigenvectors, specified by the logical array select. select: If howmny=2, select specifies the eigenvectors to be computed. If howmny=0 or 1, select is not referenced. To select the real eigenvector corresponding to the real eigenvalue w(j), select(j) must be set to TRUE To select the complex eigenvector corresponding to a complex conjugate pair w(j) and w(j+1), either select(j) or select(j+1) must be set to TRUE. A: The upper quasi-triangular matrix A. B: The upper triangular matrix B. If A has a 2-by-2 diagonal block, then the corresponding 2-by-2 block of B must be diagonal with positive elements. VL: On entry, if side = 2 or 0 and howmny = 1, VL must contain an N-by-N matrix Q (usually the orthogonal matrix Q of left Schur vectors returned by hgqez). On exit, if side = 2 or 0, VL contains: if howmny = 0, the matrix Y of left eigenvectors of (A,B); if howmny = 1, the matrix Q*Y; if howmny = 2, the left eigenvectors of (A,B) specified by select, stored consecutively in the columns of VL, in the same order as their eigenvalues. If side = 1, VL is not referenced. A complex eigenvector corresponding to a complex eigenvalue is stored in two consecutive columns, the first holding the real part, and the second the imaginary part. VR: On entry, if side = 1 or 0 and howmny = 1, VR must contain an N-by-N matrix Q (usually the orthogonal matrix Z of right Schur vectors returned by hgeqz). On exit, if side = 1 or 0, VR contains: if howmny = 0, the matrix X of right eigenvectors of (A,B); if howmny = 1, the matrix Z*X; if howmny = 2, the right eigenvectors of (A,B) specified by select, stored consecutively in the columns of VR, in the same order as their eigenvalues. If side = 2, VR is not referenced. A complex eigenvector corresponding to a complex eigenvalue is stored in two consecutive columns, the first holding the real part and the second the imaginary part. M: The number of columns in the arrays VL and/or VR actually used to store the eigenvectors. If howmny = 0 or 1, M is set to N. Each selected real eigenvector occupies one column and each selected complex eigenvector occupies two columns. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. > 0: the 2-by-2 block (info:info+1) does not have a complex eigenvalue. =for example $a = random (50, 50); $info = null; $tau = zeroes(50); $z= zeroes(1,1); gehrd($a, 1, 50, $tau, $info); hseqr($a,0,0,1,50,($wr=null),($wi=null),$z,$info); '); pp_def("gebal", HandleBad => 0, Pars => '[io,phys]A(n,n); int job(); int [o,phys]ilo();int [o,phys]ihi();[o,phys]scale(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pjob; types(F) %{ extern int sgebal_(char *job, integer *n, float *a, integer * lda, integer *ilo, integer *ihi, float *scale, integer *info); %} types(D) %{ extern int dgebal_(char *job, integer *n, double *a, integer * lda, integer *ilo, integer *ihi, double *scale, integer *info); %} switch ($job()) { case 1: pjob = \'P\'; break; case 2: pjob = \'S\'; break; case 3: pjob = \'B\'; break; default: pjob = \'N\'; } $TFD(sgebal_,dgebal_)( &pjob, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), $P(ilo), $P(ihi), $P(scale), $P(info)); ', Doc => ' =for ref Balances a general real matrix A. This involves, first, permuting A by a similarity transformation to isolate eigenvalues in the first 1 to ilo-1 and last ihi+1 to N elements on the diagonal; and second, applying a diagonal similarity transformation to rows and columns ilo to ihi to make the rows and columns as close in norm as possible. Both steps are optional. Balancing may reduce the 1-norm of the matrix, and improve the accuracy of the computed eigenvalues and/or eigenvectors. Further Details =============== The permutations consist of row and column interchanges which put the matrix in the form ( T1 X Y ) P A P = ( 0 B Z ) ( 0 0 T2 ) where T1 and T2 are upper triangular matrices whose eigenvalues lie along the diagonal. The column indices ilo and ihi mark the starting and ending columns of the submatrix B. Balancing consists of applying a diagonal similarity transformation inv(D) * B * D to make the 1-norms of each row of B and its corresponding column nearly equal. The output matrix is ( T1 X*D Y ) ( 0 inv(D)*B*D inv(D)*Z ). ( 0 0 T2 ) Information about the permutations P and the diagonal matrix D is returned in the vector C<scale>. Arguments ========= job: Specifies the operations to be performed on A: = 0: none: simply set ilo = 1, ihi = N, scale(I) = 1.0 for i = 1,...,N; = 1: permute only; = 2: scale only; = 3: both permute and scale. A: On entry, the input matrix A. On exit, A is overwritten by the balanced matrix. If job = 0, A is not referenced. See Further Details. ilo: ihi: ilo and ihi are set to integers such that on exit A(i,j) = 0 if i > j and j = 1,...,ilo-1 or I = ihi+1,...,N. If job = 0 or 2, ilo = 1 and ihi = N. scale: Details of the permutations and scaling factors applied to A. If P(j) is the index of the row and column interchanged with row and column j and D(j) is the scaling factor applied to row and column j, then scale(j) = P(j) for j = 1,...,ilo-1 = D(j) for j = ilo,...,ihi = P(j) for j = ihi+1,...,N. The order in which the interchanges are made is N to ihi+1, then 1 to ilo-1. info: = 0: successful exit. < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random (50, 50); $scale = zeroes(50); $info = null; $ilo = null; $ihi = null; gebal($a, $ilo, $ihi, $scale, $info); '); pp_def("gebak", HandleBad => 0, Pars => '[io,phys]A(n,m); int job(); int side();int [phys]ilo();int [phys]ihi();[phys]scale(n); int [o,phys]info()', GenericTypes => [F,D], Code => generate_code ' char pjob; char pside =\'L\' ; types(F) %{ extern int sgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, float *scale, integer *m, float *v, integer * ldv, integer *info); %} types(D) %{ extern int dgebak_(char *job, char *side, integer *n, integer *ilo, integer *ihi, double *scale, integer *m, double *v, integer * ldv, integer *info); %} switch ($job()) { case 1: pjob = \'P\'; break; case 2: pjob = \'S\'; break; case 3: pjob = \'B\'; break; default: pjob = \'N\'; } if ($side()) pside = \'R\'; $TFD(sgebak_,dgebak_)( &pjob, &pside, &$PRIV(__n_size), $P(ilo), $P(ihi), $P(scale), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), $P(info)); ', Doc => ' =for ref gebak forms the right or left eigenvectors of a real general matrix by backward transformation on the computed eigenvectors of the balanced matrix output by gebal. Arguments ========= A: On entry, the matrix of right or left eigenvectors to be transformed, as returned by hsein or trevc. On exit, A is overwritten by the transformed eigenvectors. job: Specifies the type of backward transformation required: = 0 , do nothing, return immediately; = 1, do backward transformation for permutation only; = 2, do backward transformation for scaling only; = 3, do backward transformations for both permutation and scaling. job must be the same as the argument job supplied to gebal. side: = 0: V contains left eigenvectors. = 1: V contains right eigenvectors; ilo: ihi: The integers ilo and ihi determined by gebal. 1 <= ilo <= ihi <= N, if N > 0; ilo=1 and ihi=0, if N=0. Here N is the the number of rows of the matrix A. scale: Details of the permutation and scaling factors, as returned by gebal. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value. =for example $a = random (50, 50); $scale = zeroes(50); $info = null; $ilo = null; $ihi = null; gebal($a, $ilo, $ihi, $scale, $info); # Compute eigenvectors ($ev) gebak($ev, $ilo, $ihi, $scale, $info); '); pp_def("lange", HandleBad => 0, Pars => '[phys]A(n,m); int norm(); [o]b()', GenericTypes => [F,D], Code => ' char pnorm; types(F) %{ extern float slange_(char *norm, integer *m, integer *n, float *a, integer *lda, float *work); float *work; %} types(D) %{ extern double dlange_(char *norm, integer *m, integer *n, double *a, integer *lda, double *work); double *work; %} switch ($norm()) { case 1: pnorm = \'O\'; break; case 2: pnorm = \'I\'; types(F) %{ work = (float *)malloc($PRIV(__n_size) * sizeof(float)); %} types(D) %{ work = (double *)malloc($PRIV(__n_size) * sizeof(double)); %} break; case 3: pnorm = \'F\'; break; default: pnorm = \'M\'; } $b() = $TFD(slange_,dlange_)( &pnorm, &$PRIV(__n_size), &$PRIV(__m_size), $P(A), &$PRIV(__n_size), work); if ($norm() == 2) free (work); ', Doc => ' =for ref Computes the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real matrix A. Description =========== returns the value lange = ( max(abs(A(i,j))), norm = 0 ( ( norm1(A), norm = 1 ( ( normI(A), norm = 2 ( ( normF(A), norm = 3 where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. Arguments ========= norm: Specifies the value to be returned in lange as described above. A: The n by m matrix A. =for example $a = random (float, 100, 100); $norm = $a->lange(1); '); pp_def("lansy", HandleBad => 0, Pars => '[phys]A(n,n); int uplo(); int norm(); [o]b()', GenericTypes => [F,D], Code => ' char pnorm, puplo = \'U\'; types(F) %{ extern float slansy_(char *norm, char *uplo, integer *n, float *a, integer *lda, float *work); float *work; %} types(D) %{ extern double dlansy_(char *norm, char *uplo, integer *n, double *a, integer *lda, double *work); double *work; %} switch ($norm()) { case 1: pnorm = \'O\'; types(F) %{ work = (float *)malloc($PRIV(__n_size) * sizeof(float)); %} types(D) %{ work = (double *)malloc($PRIV(__n_size) * sizeof(double)); %} break; case 2: pnorm = \'I\'; types(F) %{ work = (float *)malloc($PRIV(__n_size) * sizeof(float)); %} types(D) %{ work = (double *)malloc($PRIV(__n_size) * sizeof(double)); %} break; case 3: pnorm = \'F\'; break; default: pnorm = \'M\'; } if($uplo()) puplo = \'L\'; $b() = $TFD(slansy_,dlansy_)( &pnorm, &puplo, &$PRIV(__n_size), $P(A), &$PRIV(__n_size), work); if ($norm() == 2 || $norm() == 1) free (work); ', Doc => ' =for ref Computes the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric matrix A. Description =========== returns the value lansy = ( max(abs(A(i,j))), norm = 0 ( ( norm1(A), norm = 1 ( ( normI(A), norm = 2 ( ( normF(A), norm = 3 where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. norm: Specifies the value to be returned in lansy as described above. uplo: Specifies whether the upper or lower triangular part of the symmetric matrix A is to be referenced. = 0: Upper triangular part of A is referenced = 1: Lower triangular part of A is referenced A: The symmetric matrix A. If uplo = 0, the leading n by n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading n by n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. =for example # Assume $a is symmetric $a = random (float, 100, 100); $norm = $a->lansy(1, 1); '); pp_def("lantr", HandleBad => 0, Pars => '[phys]A(m,n);int uplo();int norm();int diag();[o]b()', GenericTypes => [F,D], Code => ' char pnorm, puplo = \'U\'; char pdiag = \'N\'; types(F) %{ extern float slantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, float *a, integer *lda, float *work); float *work; %} types(D) %{ extern double dlantr_(char *norm, char *uplo, char *diag, integer *m, integer *n, double *a, integer *lda, double *work); double *work; %} switch ($norm()) { case 1: pnorm = \'O\'; break; case 2: pnorm = \'I\'; types(F) %{ work = (float *)malloc($PRIV(__m_size) * sizeof(float)); %} types(D) %{ work = (double *)malloc($PRIV(__m_size) * sizeof(double)); %} break; case 3: pnorm = \'F\'; break; default: pnorm = \'M\'; } if($uplo()) puplo = \'L\'; if($diag()) pdiag = \'U\'; $b() = $TFD(slantr_,dlantr_)( &pnorm, &puplo, &pdiag, &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__n_size), work); if ($norm() == 2) free (work); ', Doc => ' =for ref Computes the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix A. Description =========== returns the value lantr = ( max(abs(A(i,j))), norm = 0 ( ( norm1(A), norm = 1 ( ( normI(A), norm = 2 ( ( normF(A), norm = 3 where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that max(abs(A(i,j))) is not a matrix norm. norm: Specifies the value to be returned in lantr as described above. uplo: Specifies whether the matrix A is upper or lower trapezoidal. = 0: Upper triangular part of A is referenced = 1: Lower triangular part of A is referenced Note that A is triangular instead of trapezoidal if M = N. diag: Specifies whether or not the matrix A has unit diagonal. = 0: Non-unit diagonal = 1: Unit diagonal A: The trapezoidal matrix A (A is triangular if m = n). If uplo = 0, the leading m by n upper trapezoidal part of the array A contains the upper trapezoidal matrix, and the strictly lower triangular part of A is not referenced. If uplo = 1, the leading m by n lower trapezoidal part of the array A contains the lower trapezoidal matrix, and the strictly upper triangular part of A is not referenced. Note that when diag = 1, the diagonal elements of A are not referenced and are assumed to be one. =for example # Assume $a is upper triangular $a = random (float, 100, 100); $norm = $a->lantr(1, 1, 0); '); ################################################################################ # # BLAS ROUTINES # ################################################################################ pp_def("gemm", HandleBad => 0, Pars => '[phys]A(m,n); int transa(); int transb(); [phys]B(p,q);[phys]alpha(); [phys]beta(); [io,phys]C(r,s)', GenericTypes => [F,D], Code => ' char ptransa = \'N\'; char ptransb = \'N\'; types(F) %{ extern int sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *ldb, float *beta, float *c__, integer *ldc); %} types(D) %{ extern int dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, double *alpha, double *a, integer *lda, double *b, integer *ldb, double *beta, double *c__, integer *ldc); %} integer kk = $transa() ? $SIZE(m) : $SIZE(n); if ($transa()) ptransa = \'T\'; if ($transb()) ptransb = \'T\'; $TFD(sgemm_,dgemm_)( &ptransa, &ptransb, &$PRIV(__r_size), &$PRIV(__s_size), &kk, $P(alpha), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size), $P(beta), $P(C), &$PRIV(__r_size)); ', Doc => ' =for ref Performs one of the matrix-matrix operations C := alpha*op( A )*op( B ) + beta*C, where op( X ) is one of p( X ) = X or op( X ) = X\', alpha and beta are scalars, and A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. Parameters ========== transa: On entry, transa specifies the form of op( A ) to be used in the matrix multiplication as follows: transa = 0, op( A ) = A. transa = 1, op( A ) = A\'. transb: On entry, transb specifies the form of op( B ) to be used in the matrix multiplication as follows: transb = 0, op( B ) = B. transb = 1, op( B ) = B\'. alpha: On entry, alpha specifies the scalar alpha. A: Before entry with transa = 0, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. B: Before entry with transb = 0, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. beta: On entry, beta specifies the scalar beta. When beta is supplied as zero then C need not be set on input. C: Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). =for example $a = random(5,4); $b = random(5,4); $alpha = pdl(0.5); $beta = pdl(0); $c = zeroes(5,5); gemm($a, 0, 1,$b, $alpha, $beta, $c); '); if ($config{CBLAS}){ pp_def("rmgemm", HandleBad => 0, Pars => '[phys]A(m,n); int transa(); int transb(); [phys]B(p,q);[phys]alpha(); [phys]beta(); [io,phys]C(r,s)', GenericTypes => [F,D], Code => ' int ptransa = CblasNoTrans; int ptransb = CblasNoTrans; types(F) %{ extern void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc); %} types(D) %{ extern void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc); %} integer kk = $transa() ? $SIZE(n) : $SIZE(m); if ($transa()) ptransa = CblasTrans; if ($transb()) ptransb = CblasTrans; $TFD(cblas_sgemm,cblas_dgemm)( CblasRowMajor, ptransa, ptransb, $PRIV(__s_size), $PRIV(__r_size), kk, $alpha(), $P(A), $PRIV(__m_size), $P(B), $PRIV(__p_size), $beta(), $P(C), $PRIV(__r_size)); ', Doc => ' =for ref Row major version of gemm =for example $a = random(5,4); $b = random(5,4); $alpha = pdl(0.5); $beta = pdl(0); $c = zeroes(4,4); rmgemm($a, 0, 1,$b, $alpha, $beta, $c); '); } pp_def("mmult", HandleBad => 0, Pars => '[phys]A(m,n); [phys]B(p,m); [o,phys]C(p,n)', GenericTypes => [F,D], Code => ' char ptrans = \'N\'; types(F) %{ extern int sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *ldb, float *beta, float *c__, integer *ldc); float alpha = 1; float beta = 0; %} types(D) %{ extern int dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, double *alpha, double *a, integer *lda, double *b, integer *ldb, double *beta, double *c__, integer *ldc); double alpha = 1; double beta = 0; %} $TFD(sgemm_,dgemm_)( &ptrans, &ptrans, &$PRIV(__p_size), &$PRIV(__n_size), &$PRIV(__m_size), &alpha, $P(B), &$PRIV(__p_size), $P(A), &$PRIV(__m_size), &beta, $P(C), &$PRIV(__p_size)); ', Doc => ' =for ref Blas matrix multiplication based on gemm '); if ($config{STRASSEN}){ pp_def("smmult", HandleBad => 0, Pars => '[phys]A(m,n); [phys]B(p,m); [o,phys]C(p,n)', GenericTypes => [F,D], Code => ' char ptrans = \'N\'; types(F) %{ extern int sgemmb_(char *transa, char *transb, integer *m, integer * n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *ldb, float *beta, float *c__, integer *ldc); float alpha = 1; float beta = 0; %} types(D) %{ extern int dgemmb_(char *transa, char *transb, integer *m, integer * n, integer *k, double *alpha, double *a, integer *lda, double *b, integer *ldb, double *beta, double *c__, integer *ldc); double alpha = 1; double beta = 0; %} $TFD(sgemmb_,dgemmb_)( &ptrans, &ptrans, &$PRIV(__p_size), &$PRIV(__n_size), &$PRIV(__m_size), &alpha, $P(B), &$PRIV(__p_size), $P(A), &$PRIV(__m_size), &beta, $P(C), &$PRIV(__p_size)); ', Doc => ' =for ref Blas matrix multiplication based on Strassen Algorithm. '); } pp_def("crossprod", HandleBad => 0, Pars => '[phys]A(n,m); [phys]B(p,m); [o,phys]C(p,n)', GenericTypes => [F,D], Code => ' char btrans = \'N\'; char atrans = \'T\'; types(F) %{ extern int sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *ldb, float *beta, float *c__, integer *ldc); float alpha = 1; float beta = 0; %} types(D) %{ extern int dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, double *alpha, double *a, integer *lda, double *b, integer *ldb, double *beta, double *c__, integer *ldc); double alpha = 1; double beta = 0; %} $TFD(sgemm_,dgemm_)( &btrans, &atrans, &$PRIV(__p_size), &$PRIV(__n_size), &$PRIV(__m_size), &alpha, $P(B), &$PRIV(__p_size), $P(A), &$PRIV(__n_size), &beta, $P(C), &$PRIV(__p_size)); ', Doc => ' =for ref Blas matrix cross product based on gemm '); pp_def("syrk", HandleBad => 0, Pars => '[phys]A(m,n); int uplo(); int trans(); [phys]alpha(); [phys]beta(); [io,phys]C(p,p)', GenericTypes => [F,D], Code => ' char puplo = \'U\'; char ptrans = \'N\'; types(F) %{ extern int ssyrk_(char *uplo, char *trans, integer *n, integer *k, float *alpha, float *a, integer *lda, float *beta, float *c__, integer *ldc); %} types(D) %{ extern int dsyrk_(char *uplo, char *trans, integer *n, integer *k, double *alpha, double *a, integer *lda, double *beta, double *c__, integer *ldc); %} integer kk = $trans() ? $SIZE(m) : $SIZE(n); if ($uplo()) puplo = \'L\'; if ($trans()) ptrans = \'T\'; $TFD(ssyrk_,dsyrk_)( &puplo, &ptrans, &$PRIV(__p_size), &kk, $P(alpha), $P(A), &$PRIV(__m_size), $P(beta), $P(C), &$PRIV(__p_size)); ', Doc => ' =for ref Performs one of the symmetric rank k operations C := alpha*A*A\' + beta*C, or C := alpha*A\'*A + beta*C, where alpha and beta are scalars, C is an n by n symmetric matrix and A is an n by k matrix in the first case and a k by n matrix in the second case. Parameters ========== uplo: On entry, uplo specifies whether the upper or lower triangular part of the array C is to be referenced as follows: uplo = 0 Only the upper triangular part of C is to be referenced. uplo = 1 Only the lower triangular part of C is to be referenced. Unchanged on exit. trans: On entry, trans specifies the operation to be performed as follows: trans = 0 C := alpha*A*A\' + beta*C. trans = 1 C := alpha*A\'*A + beta*C. alpha: On entry, alpha specifies the scalar alpha. Unchanged on exit. A: Before entry with trans = 0, the leading n by k part of the array A must contain the matrix A, otherwise the leading k by n part of the array A must contain the matrix A. beta: On entry, beta specifies the scalar beta. C: Before entry with uplo = 0, the leading n by n upper triangular part of the array C must contain the upper triangular part of the symmetric matrix and the strictly lower triangular part of C is not referenced. On exit, the upper triangular part of the array C is overwritten by the upper triangular part of the updated matrix. Before entry with uplo = 1, the leading n by n lower triangular part of the array C must contain the lower triangular part of the symmetric matrix and the strictly upper triangular part of C is not referenced. On exit, the lower triangular part of the array C is overwritten by the lower triangular part of the updated matrix. =for example $a = random(5,4); $b = zeroes(5,5); $alpha = 1; $beta = 0; syrk ($a, 1,0,$alpha, $beta , $b); '); if ($config{CBLAS}){ pp_def("rmsyrk", HandleBad => 0, Pars => '[phys]A(m,n); int uplo(); int trans(); [phys]alpha(); [phys]beta(); [io,phys]C(p,p)', GenericTypes => [F,D], Code => ' int puplo = 121; int ptrans = 111; types(F) %{ extern void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc); %} types(D) %{ extern void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc); %} integer kk = $trans() ? $SIZE(n) : $SIZE(m); if ($uplo()) puplo = 122; if ($trans()) ptrans = 112; $TFD(cblas_ssyrk,cblas_dsyrk)( 101, puplo, ptrans, $PRIV(__p_size), kk, $alpha(), $P(A), $PRIV(__m_size), $beta(), $P(C), $PRIV(__p_size)); ', Doc => ' =for ref Row major version of syrk =for example $a = random(5,4); $b = zeroes(4,4); $alpha = 1; $beta = 0; rmsyrk ($a, 1,0,$alpha, $beta , $b); '); } pp_def("dot", HandleBad => 0, Pars => '[phys]a(n);int [phys]inca();[phys]b(m);int [phys]incb();[o,phys]c()', GenericTypes => [F,D], Code => ' types(F) %{ extern float sdot_(integer *n, float *dx, integer *incx, float *dy, integer *incy); %} types(D) %{ extern double ddot_(integer *n, double *dx, integer *incx, double *dy, integer *incy); %} integer n = (integer ) $PRIV(__n_size)/$inca(); $c() = $TFD(sdot_,ddot_)( &n, $P(a), $P(inca), $P(b), $P(incb)); ', Doc => ' =for ref Dot product of two vectors using Blas. =for example $a = random(5); $b = random(5); $c = dot($a, 1, $b, 1) '); pp_def("axpy", HandleBad => 0, Pars => '[phys]a(n);int [phys]inca();[phys] alpha();[io,phys]b(m);int [phys]incb()', GenericTypes => [F,D], Code => ' types(F) %{ extern int saxpy_(integer *n, float *da, float *dx, integer *incx, float *dy, integer *incy); %} types(D) %{ extern int daxpy_(integer *n, double *da, double *dx, integer *incx, double *dy, integer *incy); %} integer n = (integer ) $PRIV(__n_size)/$inca(); $TFD(saxpy_,daxpy_)( &n, $P(alpha), $P(a), $P(inca), $P(b), $P(incb)); ', Doc => ' =for ref Linear combination of vectors ax + b using Blas. Returns result in b. =for example $a = random(5); $b = random(5); axpy($a, 1, 12, $b, 1) '); pp_def("nrm2", HandleBad => 0, Pars => '[phys]a(n);int [phys]inca();[o,phys]b()', GenericTypes => [F,D], Code => ' types(F) %{ extern float snrm2_(integer *n, float *dx, integer *incx); %} types(D) %{ extern double dnrm2_(integer *n, double *dx, integer *incx); %} integer n = (integer ) $PRIV(__n_size)/$inca(); $b() = $TFD(snrm2_,dnrm2_)( &n, $P(a), $P(inca)); ', Doc => ' =for ref Euclidean norm of a vector using Blas. =for example $a = random(5); $norm2 = norm2($a,1) '); pp_def("asum", HandleBad => 0, Pars => '[phys]a(n);int [phys]inca();[o,phys]b()', GenericTypes => [F,D], Code => ' types(F) %{ extern float sasum_(integer *n, float *dx, integer *incx); %} types(D) %{ extern double dasum_(integer *n, double *dx, integer *incx); %} integer n = (integer ) $PRIV(__n_size)/$inca(); $b() = $TFD(sasum_,dasum_)( &n, $P(a), $P(inca)); ', Doc => ' =for ref Sum of absolute values of a vector using Blas. =for example $a = random(5); $absum = asum($a,1) '); pp_def("scal", HandleBad => 0, Pars => '[io,phys]a(n);int [phys]inca();[phys]scale()', GenericTypes => [F,D], Code => ' types(F) %{ extern int sscal_(integer *n, float *sa, float *dx, integer *incx); %} types(D) %{ extern int dscal_(integer *n, double *sa, double *dx,integer *incx); %} integer n = (integer ) $PRIV(__n_size)/$inca(); $TFD(sscal_,dscal_)( &n, $P(scale), $P(a), $P(inca)); ', Doc => ' =for ref Scale a vector by a constant using Blas. =for example $a = random(5); $a->scal(1, 0.5) '); pp_def("rot", HandleBad => 0, Pars => '[io,phys]a(n);int [phys]inca();[phys]c(); [phys]s();[io,phys]b(n);int [phys]incb()', GenericTypes => [F,D], Code => ' types(F) %{ extern int srot_(integer *n, float *dx, integer *incx, float *dy, integer *incy, float *c, float *s); %} types(D) %{ extern int drot_(integer *n, double *dx, integer *incx, double *dy, integer *incy, double *c, double *s); %} integer n = (integer ) $PRIV(__n_size)/$inca(); $TFD(srot_,drot_)( &n, $P(a), $P(inca), $P(b), $P(incb), $P(c), $P(s) ); ', Doc => ' =for ref Applies plane rotation using Blas. =for example $a = random(5); $b = random(5); rot($a, 1, 0.5, 0.7, $b, 1) '); pp_def("rotg", HandleBad => 0, Pars => '[io,phys]a();[io,phys]b();[o,phys]c(); [o,phys]s()', GenericTypes => [F,D], Code => ' types(F) %{ extern int srotg_(float *dx, float *dy, float *c, float *s); %} types(D) %{ extern int drotg_(double *dx, double *dy, double *c, double *s); %} $TFD(srotg_,drotg_)( $P(a), $P(b), $P(c), $P(s) ); ', Doc => ' =for ref Generates plane rotation using Blas. =for example $a = sequence(4); rotg($a(0), $a(1),$a(2),$a(3)) '); ################################################################################ # # LAPACK AUXILIARY ROUTINES # ################################################################################ pp_def("lasrt", HandleBad => 0, Pars => '[io,phys]d(n); int id();int [o,phys]info()', GenericTypes => [F,D], Code => ' char pwork = \'I\'; types(F) %{ extern int slasrt_(char *id, integer *n, float *d__, integer * info); %} types(D) %{ extern int dlasrt_(char *id, integer *n, double *d__, integer * info); %} if ($id()) pwork = \'D\'; $TFD(slasrt_,dlasrt_)( &pwork, &$PRIV(__n_size), $P(d), $P(info)); ', Doc => ' =for ref Sort the numbers in d in increasing order (if id = 0) or in decreasing order (if id = 1 ). Use Quick Sort, reverting to Insertion sort on arrays of size <= 20. Dimension of stack limits N to about 2**32. Arguments ========= id: = 0: sort d in increasing order; = 1: sort d in decreasing order. d: On entry, the array to be sorted. On exit, d has been sorted into increasing order (d(1) <= ... <= d(N) ) or into decreasing order (d(1) >= ... >= d(N) ), depending on id. info: = 0: successful exit < 0: if info = -i, the i-th argument had an illegal value =for example $a = random(5); lasrt ($a, 0, ($info = null)); '); pp_def("lacpy", HandleBad => 0, Pars => '[phys]A(m,n); int uplo(); [o,phys]B(p,n)', GenericTypes => [F,D], Code => ' char puplo; types(F) %{ extern int slacpy_(char *uplo, integer *m, integer *n, float * a, integer *lda, float *b, integer *ldb); %} types(D) %{ extern int dlacpy_(char *uplo, integer *m, integer *n, double * a, integer *lda, double *b, integer *ldb); %} switch ($uplo()) { case 0: puplo = \'U\'; break; case 1: puplo = \'L\'; break; default: puplo = \'A\'; } $TFD(slacpy_,dlacpy_)( &puplo, &$PRIV(__m_size), &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(B), &$PRIV(__p_size)); ', Doc => ' =for ref Copies all or part of a two-dimensional matrix A to another matrix B. Arguments ========= uplo: Specifies the part of the matrix A to be copied to B. = 0: Upper triangular part = 1: Lower triangular part Otherwise: All of the matrix A A: The m by n matrix A. If uplo = 0, only the upper triangle or trapezoid is accessed; if uplo = 1, only the lower triangle or trapezoid is accessed. B: On exit, B = A in the locations specified by uplo. =for example $a = random(5,5); $b = zeroes($a); lacpy ($a, 0, $b); '); pp_def("laswp", HandleBad => 0, Pars => '[io,phys]A(m,n);int [phys]k1();int [phys] k2(); int [phys]ipiv(p);int [phys]inc()', GenericTypes => [F,D], Code => ' types(F) %{ extern int slaswp_(integer *n, float *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); %} types(D) %{ extern int dlaswp_(integer *n, double *a, integer *lda, integer *k1, integer *k2, integer *ipiv, integer *incx); %} $TFD(slaswp_,dlaswp_)( &$PRIV(__n_size), $P(A), &$PRIV(__m_size), $P(k1), $P(k2), $P(ipiv), $P(inc)); ', Doc => ' =for ref Performs a series of row interchanges on the matrix A. One row interchange is initiated for each of rows k1 through k2 of A. Dosen\'t use PDL indice (start = 1). Arguments ========= A: On entry, the matrix of column dimension N to which the row interchanges will be applied. On exit, the permuted matrix. k1: The first element of ipiv for which a row interchange will be done. k2: The last element of ipiv for which a row interchange will be done. ipiv: The vector of pivot indices. Only the elements in positions k1 through k2 of ipiv are accessed. ipiv(k) = l implies rows k and l are to be interchanged. inc: The increment between successive values of ipiv. If ipiv is negative, the pivots are applied in reverse order. =for example $a = random(5,5); # reverse row (col for PDL) $b = pdl([5,4,3,2,1]); $a->laswp(1,2,$b,1); '); pp_def("lamch", HandleBad => 0, Pars => 'cmach(); [o]precision()', GenericTypes => [F,D], Inplace => 1, Code => ' char pcmach; int tmp; types(F) %{ extern float slamch_(char *cmach); %} types(D) %{ extern double dlamch_(char *cmach); %} tmp = (int ) $cmach(); switch (tmp) { case 1: pcmach = \'S\'; break; case 2: pcmach = \'B\'; break; case 3: pcmach = \'P\'; break; case 4: pcmach = \'N\'; break; case 5: pcmach = \'R\'; break; case 6: pcmach = \'M\'; break; case 7: pcmach = \'U\'; break; case 8: pcmach = \'L\'; break; case 9: pcmach = \'O\'; break; default: pcmach = \'E\'; } $precision() = $TFD(slamch_,dlamch_)(&pcmach); ', Doc => ' =for ref Determines precision machine parameters. Works inplace. Arguments ========= cmach: Specifies the value to be returned by lamch: = 0 LAMCH := eps = 1 LAMCH := sfmin = 2 LAMCH := base = 3 LAMCH := eps*base = 4 LAMCH := t = 5 LAMCH := rnd = 6 LAMCH := emin = 7 LAMCH := rmin = 8 LAMCH := emax = 9 LAMCH := rmax where eps = relative machine precision sfmin = safe minimum, such that 1/sfmin does not overflow base = base of the machine prec = eps*base t = number of (base) digits in the mantissa rnd = 1.0 when rounding occurs in addition, 0.0 otherwise emin = minimum exponent before (gradual) underflow rmin = underflow threshold - base**(emin-1) emax = largest exponent before overflow rmax = overflow threshold - (base**emax)*(1-eps) =for example $a = lamch (0); print "EPS is $a for double\n"; '); pp_def("labad", HandleBad => 0, Pars => '[io,phys]small(); [io,phys]large()', GenericTypes => [F,D], Code => ' types(F) %{ extern int slabad_(float *small, float *large); %} types(D) %{ extern int dlabad_(double *small, double *large); %} $TFD(slabad_, dlabad_)($P(small),$P(large)); ', Doc => ' =for ref Takes as input the values computed by C<lamch> for underflow and overflow, and returns the square root of each of these values if the log of large is sufficiently large. This subroutine is intended to identify machines with a large exponent range, such as the Crays, and redefine the underflow and overflow limits to be the square roots of the values computed by C<lamch>. This subroutine is needed because lamch does not compensate for poor arithmetic in the upper half of the exponent range, as is found on a Cray. Arguments ========= small: On entry, the underflow threshold as computed by lamch. On exit, if LOG10(large) is sufficiently large, the square root of small, otherwise unchanged. large: On entry, the overflow threshold as computed by lamch. On exit, if LOG10(large) is sufficiently large, the square root of large, otherwise unchanged. =for example $underflow = lamch(7); $overflow = lamch(9); labad ($underflow, $overflow); '); ################################################################################ # # OTHER AUXILIARY ROUTINES # ################################################################################ pp_def( 'tricpy', Pars => 'A(m,n);int uplo();[o] C(m,n)', Code => ' PDL_Long i, j, k; if ($uplo()) { for (i = 0; i < $SIZE(n);i++) { k = min(i,($SIZE(m)-1)); for (j = 0; j <= k; j++) $C(m=>j,n=>i) = $A(m=>j,n=>i); } } else { for (i = 0; i < $SIZE(n);i++) { for (j = i; j < $SIZE(m); j++) $C(m=>j,n=>i) = $A(m=>j,n=>i); if (i >= $SIZE(m)) break; } } ', Doc => <<EOT =for ref Copy triangular part to another matrix. If uplo == 0 copy upper triangular part. =cut EOT ); pp_def( 'cplx_eigen', Pars => 'eigreval(n);eigimval(n); eigvec(n,p);int fortran();[o]cplx_val(q=2,n);[o]cplx_vec(r=2,n,p)', Code => ' register PDL_Long i,j; PDL_Long index; if ($fortran()) { if ($SIZE(n) != $SIZE(p)) croak("Fortran storage type needs square eigvec!"); for(i = 0; i < $SIZE(p);i++) { if ($eigimval(n=>i) == 0) { $cplx_val(q=>0,n=>i) = $eigreval(n=>i); $cplx_val(q=>1,n=>i) = $eigimval(n=>i); for (j = 0; j < $SIZE(n); j++) { $cplx_vec(r=>0,n=>j,p=>i) = $eigvec(n=>j,p=>i); $cplx_vec(r=>1,n=>j,p=>i) = 0; } } else { index = i+1; $cplx_val(q=>0,n=>i) = $eigreval(n=>i); $cplx_val(q=>1,n=>i) = $eigimval(n=>i); $cplx_val(q=>0,n=>index) = $eigreval(n=>index); $cplx_val(q=>1,n=>index) = $eigimval(n=>index); for (j = 0; j < $SIZE(n); j++) { $cplx_vec(r=>0,n=>j,p=>i) = $cplx_vec(r=>0,n=>j,p=>index) = $eigvec(n=>j,p=>i); $cplx_vec(r=>1,n=>j,p=>i) = $eigvec(n=>j,p=>index); $cplx_vec(r=>1,n=>j,p=>index) = - $eigvec(n=>j,p=>index); } i = index; } } } else{ for(i = 0; i < $SIZE(n);i++) { if ($eigimval(n=>i) == 0) { $cplx_val(q=>0,n=>i) = $eigreval(n=>i); $cplx_val(q=>1,n=>i) = $eigimval(n=>i); for (j = 0; j < $SIZE(p); j++) { $cplx_vec(r=>0,n=>i,p=>j) = $eigvec(n=>i,p=>j); $cplx_vec(r=>1,n=>i,p=>j) = 0; } } else { index = i+1; $cplx_val(q=>0,n=>i) = $eigreval(n=>i); $cplx_val(q=>1,n=>i) = $eigimval(n=>i); $cplx_val(q=>0,n=>index) = $eigreval(n=>index); $cplx_val(q=>1,n=>index) = $eigimval(n=>index); for (j = 0; j < $SIZE(p); j++) { $cplx_vec(r=>0,n=>i,p=>j) = $cplx_vec(r=>0,n=>index,p=>j) = $eigvec(n=>i,p=>j); $cplx_vec(r=>1,n=>i,p=>j) = $eigvec(n=>index,p=>j); $cplx_vec(r=>1,n=>index,p=>j) = - $eigvec(n=>index,p=>j); } i = index; } } } ', Doc => <<EOT =for ref Output complex eigen-values/vectors from eigen-values/vectors as computed by geev or geevx. 'fortran' means fortran storage type. =cut EOT ); pp_def( 'augment', DefaultFlow => 1, Reversible => 1, Pars => 'x(n); y(p);[o]out(q)', RedoDimsCode => '$SIZE(q) = ( $PDL(x)->ndims > 0 ? $PDL(x)->dims[0] : 1) + ( $PDL(y)->ndims > 0 ? $PDL(y)->dims[0] : 1);', Code => ' register PDL_Long i,j; for (i=0; i < $SIZE(n); i++){ $out(q=>i) = $x(n=>i); } j = 0; for (i=$SIZE(n); i < $SIZE(q); i++){ $out(q=>i) = $y(p=>j); j++; } ', BackCode => ' register PDL_Long i,j; for (i=0; i < $SIZE(n); i++){ $x(n=>i) = $out(q=>i); } j = 0; for (i=$SIZE(n); i < $SIZE(q); i++){ $y(p=>j) = $out(q=>i); j++; } ', Doc => <<EOT =for ref Combine two pidlles into a single piddle. This routine does backward and forward dataflow automatically. =cut EOT ); pp_def( 'mstack', DefaultFlow => 1, Reversible => 1, Pars => 'x(n,m);y(n,p);[o]out(n,q);', RedoDimsCode => '$SIZE(q) = $PDL(x)->dims[1] + $PDL(y)->dims[1];', Code => ' register PDL_Long i,j; loop(m)%{ loop(n)%{ $out(n=>n,q=>m) = $x(); %} %} j=0; for (i = $SIZE(m); i < $SIZE(q) ;i++,j++) { loop(n)%{ $out(n=>n,q=>i) = $y(n=>n,p=>j); %} } ', BackCode => ' register PDL_Long i,j; loop(m)%{ loop(n)%{ $x() = $out(n=>n,q=>m); %} %} j=0; for (i = $SIZE(m); i < $SIZE(q) ;i++,j++) { loop(n)%{ $y(n=>n,p=>j) = $out(n=>n,q=>i); %} } ', Doc => <<EOT =for ref Combine two pidlles into a single piddle. This routine does backward and forward dataflow automatically. =cut EOT ); # RedoDimsCode => '$SIZE(q) = $PDL(x)->dims[1] + $PDL(y)->dims[1];', pp_addhdr(' float ftrace(int n, float *mat) { int i; float sum = mat[0]; for (i = 1; i < n; i++) sum += mat[i*(n+1)]; return sum; } double dtrace(int n, double *mat) { int i; double sum = mat[0]; for (i = 1; i < n; i++) sum += mat[i*(n+1)]; return sum; } '); pp_def( 'charpol', RedoDimsCode => '$SIZE(p) = $PDL(A)->dims[0] + 1;', Pars => '[phys]A(n,n);[phys,o]Y(n,n);[phys,o]out(p);', GenericTypes => [F,D], Code => ' int i,j,k; $GENERIC() *p, b; //$GENERIC() *tmp; char ptrans = \'N\'; types(F) %{ extern int sgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, float *alpha, float *a, integer *lda, float *b, integer *ldb, float *beta, float *c__, integer *ldc); float alpha = 1; float beta = 0; %} types(D) %{ extern int dgemm_(char *transa, char *transb, integer *m, integer * n, integer *k, double *alpha, double *a, integer *lda, double *b, integer *ldb, double *beta, double *c__, integer *ldc); double alpha = 1; double beta = 0; %} p = ($GENERIC() * ) malloc ( $SIZE(n) * $SIZE(n) * sizeof($GENERIC())); loop(n0)%{ loop(n1)%{ $Y(n0=>n0,n1=>n1) = (n0 == n1) ? ($GENERIC()) 1.0 : ($GENERIC()) 0.0; %} %} $out(p=>0) = 1; i = 0; for (;;) { i++; $TFD(sgemm_,dgemm_)(&ptrans,&ptrans,&$PRIV(__n_size),&$PRIV(__n_size), &$PRIV(__n_size),&alpha,$P(A),&$PRIV(__n_size), $P(Y), &$PRIV(__n_size), &beta, p, &$PRIV(__n_size)); if (i == $SIZE(n)) break; //tmp = $P(Y); //$P(Y) = p; //p = tmp; memmove($P(Y), p, $SIZE(n) * $SIZE(n) * sizeof($GENERIC())); // loop(n1) // %{ // loop(n0) // %{ // $Y(n0=>n0,n1=>n1) = p[(n1*$SIZE(n))+n0]; // %} // %} b = $out(p=>i) = - $TFD(ftrace,dtrace)($SIZE(n), $P(Y)) / i; for (j = 0; j < $SIZE(n); j++) $Y(n0=>j,n1=>j) += b; } k = $SIZE(n); $out(p=>k) = - $TFD(ftrace,dtrace)(k, p) / k; if ((k+1) & 1) { loop(n0) %{ loop(n1) %{ $Y(n0=>n0,n1=>n1) = -$Y(n0=>n0,n1=>n1); %} %} } free(p); ', Doc => <<EOT =for ref Compute adjoint matrix and characteristic polynomial. =cut EOT ); pp_addpm({At=>'Bot'},<<'EOD'); =head1 AUTHOR Copyright (C) Grégory Vanuxem 2005-2007. This library is free software; you can redistribute it and/or modify it under the terms of the artistic license as specified in the Artistic file. =cut EOD pp_done(); # you will need this to finish pp processing