From: Akshay S. <ak...@us...> - 2012-08-13 18:44:40
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "matlisp". The branch, tensor has been updated via c0ba7e46b390f6e744e6865192b6eb57eb95c585 (commit) from 4c6e88337126ac9344c8735b98f54aaa69daa99e (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit c0ba7e46b390f6e744e6865192b6eb57eb95c585 Author: Akshay Srinivasan <aks...@gm...> Date: Tue Aug 14 00:07:52 2012 +0530 o Added a very quick sort-permute function. o Added conditions to avoid BLAS checks, when it is known that the fortran function is not going to be called (removes overhead). diff --git a/src/base/blas-helpers.lisp b/src/base/blas-helpers.lisp index 00f8bc6..86391ca 100644 --- a/src/base/blas-helpers.lisp +++ b/src/base/blas-helpers.lisp @@ -4,7 +4,9 @@ (defun blas-copyable-p (ten-a ten-b) (declare (type standard-tensor ten-a ten-b)) (mlet* - (((sort-std-a std-a-perm) (idx-sort-permute (copy-seq (strides ten-a)) #'<) :type (index-store-vector permutation)) + (((sort-std-a std-a-perm) (let-typed ((std-a (strides ten-a) :type index-store-vector)) + (very-quickly (sort-permute (copy-seq std-a) #'<))) + :type (index-store-vector permutation-action)) (perm-a-dims (permute (dimensions ten-a) std-a-perm) :type index-store-vector) ;;If blas-copyable then the strides must have the same sorting permutation. (sort-std-b (permute (strides ten-b) std-a-perm) :type index-store-vector) @@ -26,7 +28,9 @@ (defun consecutive-store-p (tensor) (declare (type standard-tensor tensor)) - (mlet* (((sort-std std-perm) (idx-sort-permute (copy-seq (strides tensor)) #'<) :type (index-store-vector permutation)) + (mlet* (((sort-std std-perm) (let-typed ((strd (strides tensor) :type index-store-vector)) + (very-quickly (sort-permute (copy-seq (strides tensor)) #'<))) + :type (index-store-vector permutation)) (perm-dims (permute (dimensions tensor) std-perm) :type index-store-vector)) (very-quickly (loop diff --git a/src/base/permutation.lisp b/src/base/permutation.lisp index e43cad1..81a5281 100644 --- a/src/base/permutation.lisp +++ b/src/base/permutation.lisp @@ -28,30 +28,6 @@ do (setf (aref ret i) i))) ret)) -(defun perrepr-max (seq) - (declare (type perrepr-vector seq)) - (very-quickly - (loop for ele of-type perrepr-type across seq - for idx of-type index-type = 0 then (+ idx 1) - with max of-type perrepr-type = (aref seq 0) - with max-idx of-type index-type = 0 - do (when (> ele max) - (setf max ele - max-idx idx)) - finally (return (values max max-idx))))) - -(defun perrepr-min (seq) - (declare (type perrepr-vector seq)) - (very-quickly - (loop for ele of-type perrepr-type across seq - for idx of-type index-type = 0 then (+ idx 1) - with min of-type perrepr-type = (aref seq 0) - with min-idx of-type index-type = 0 - do (when (< ele min) - (setf min ele - min-idx idx)) - finally (return (values min min-idx))))) - (definline perv (&rest contents) (make-array (length contents) :element-type 'perrepr-type :initial-contents contents)) @@ -133,7 +109,7 @@ for cyc of-type perrepr-vector in (repr per) unless (cycle-repr-p cyc) do (error 'permutation-invalid-error) - maximizing (perrepr-max cyc) into g-rnk of-type index-type + maximizing (lvec-max cyc) into g-rnk of-type index-type finally (setf (group-rank per) (the index-type (1+ g-rnk)))))))) ;; @@ -392,7 +368,7 @@ do (let ((val (aref idiv i))) (unless (= val i) (rotatef (aref act i) (aref act val)))))) - (make-instance 'permutation-action :repr act)))) + (make-instance 'permutation-action :repr act)))) (defun mod-max (seq lidx uidx) (declare (type perrepr-vector seq)) @@ -460,62 +436,67 @@ (lambda (&rest args) (apply func-a (permute! (multiple-value-list (funcall func-b args)) perm)))) ;; - -;;Optimize: pick different pivot. -(defgeneric sort-permute (seq predicate) - (:documentation " - (sort-permute seq predicate) - - Sorts the given sequence and return - the permutation required to move - from the given sequence to the sorted form. - ")) - -(defun idx-sort-permute (seq predicate) + +(definline sort-permute (seq predicate) " - (sort-permute seq predicate) - - Sorts a index-array and also returns - the permutation-action required to move - from the given sequence to the sorted form. - - Takes about 10x the running time which can be - achieved with cl:sort. + Sorts a lisp-vector in-place, by using the function @arg{predicate} as the + order. Also computes the permutation which would sort the original sequence + @arg{seq}. " - (declare (type index-store-vector seq) - (type function predicate)) - (let* ((len (length seq)) - (perm (perrepr-id-action len))) - (declare (type index-type len) - (type perrepr-vector perm)) - (labels ((qsort-bounds (todo) - (declare (type list todo)) - (if (null todo) t - (destructuring-bind (lb ub) (pop todo) - (declare (type index-type lb ub)) - #+nil(format t "~a lb:~a ub:~a ~%" seq lb ub) - (if (= ub (1+ lb)) t - (let* ((ele (aref seq lb)) - (ele-idx (very-quickly - (loop - for i of-type index-type from (1+ lb) below ub - with ele-idx of-type index-type = lb - do (unless (funcall predicate ele (aref seq i)) - (when (> i (1+ ele-idx)) - (rotatef (aref seq ele-idx) (aref seq (1+ ele-idx))) - (rotatef (aref perm ele-idx) (aref perm (1+ ele-idx)))) - (rotatef (aref seq ele-idx) (aref seq i)) - (rotatef (aref perm ele-idx) (aref perm i)) - (incf ele-idx) - #+nil(format t " ~a ~%" seq)) - finally (return ele-idx))))) - ;;The things we do for tail recursion! - (when (> (- ub ele-idx) 2) - (push (list (1+ ele-idx) ub) todo)) - (when (> (- ele-idx lb) 1) - (push (list lb ele-idx) todo)) - (qsort-bounds todo))))))) - (qsort-bounds `((0 ,len))) - (values seq (action->cycle (make-paction perm)))))) -;;Add a general sorter, this is a very useful thing to have. -;;Add a function to apply permutations to a matrices, tensors. + (declare (type vector seq)) + ;;This function is ugly of-course, but is also very very quick! + (let*-typed ((len (length seq) :type fixnum) + (perm (perrepr-id-action len) :type perrepr-vector) + (jobs (list `(0 ,len)))) + (loop ;;:repeat 10 + :for bounds := (pop jobs) :then (pop jobs) + :until (null bounds) + :finally (return (values seq (make-instance 'permutation-action :repr perm))) + :do (let*-typed ((below-idx (first bounds) :type fixnum) + (above-idx (second bounds) :type fixnum) + (piv (+ below-idx (floor (- above-idx below-idx) 2)) :type fixnum)) + (loop ;;:repeat 10 + :with ele := (aref seq piv) + :with lbound :of-type fixnum := below-idx + :with ubound :of-type fixnum := (1- above-idx) + :until (progn + ;;(format t "~%~a ~%" (list lbound piv ubound)) + (loop :for i :of-type fixnum :from lbound :to piv + :until (or (= i piv) (funcall predicate ele (aref seq i))) + :finally (setq lbound i)) + (loop :for i :of-type fixnum :downfrom ubound :to piv + :until (or (= i piv) (funcall predicate (aref seq i) ele)) + :finally (setq ubound i)) + ;;(format t "~a ~%" (list lbound piv ubound)) + (cond + ((= ubound lbound piv) + (when (> (- piv below-idx) 1) + (push `(,below-idx ,piv) jobs)) + (when (> (- above-idx (1+ piv)) 1) + (push `(,(1+ piv) ,above-idx) jobs)) + ;;(format t "~a~%" jobs) + t) + ((< lbound piv ubound) + (rotatef (aref seq lbound) (aref seq ubound)) + (rotatef (aref perm lbound) (aref perm ubound)) + (incf lbound) + (decf ubound) + nil) + ((= lbound piv) + (rotatef (aref seq piv) (aref seq (1+ piv))) + (rotatef (aref perm piv) (aref perm (1+ piv))) + (unless (= ubound (1+ piv)) + (rotatef (aref seq piv) (aref seq ubound)) + (rotatef (aref perm piv) (aref perm ubound))) + (incf piv) + (incf lbound) + nil) + ((= ubound piv) + (rotatef (aref seq (1- piv)) (aref seq piv)) + (rotatef (aref perm (1- piv)) (aref perm piv)) + (unless (= lbound (1- piv)) + (rotatef (aref seq lbound) (aref seq piv)) + (rotatef (aref perm lbound) (aref perm piv))) + (decf piv) + (decf ubound) + nil)))))))) diff --git a/src/level-1/axpy.lisp b/src/level-1/axpy.lisp index ec77357..df29906 100644 --- a/src/level-1/axpy.lisp +++ b/src/level-1/axpy.lisp @@ -37,11 +37,11 @@ `(definline ,func (alpha from to) (declare (type ,tensor-class from to) (type ,(getf opt :element-type) alpha)) - (let ((strd-p (blas-copyable-p from to)) - (call-fortran? (> (number-of-elements to) - ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) + ,fortran-lb)) + (strd-p (when call-fortran? (blas-copyable-p from to)))) (cond - ((and strd-p call-fortran?) + ((and call-fortran? strd-p) (,blas-func (number-of-elements from) alpha (store from) (first strd-p) (store to) (second strd-p) @@ -76,10 +76,10 @@ `(definline ,func (num-from to) (declare (type ,tensor-class to) (type ,(getf opt :element-type) num-from)) - (let ((min-strd (consecutive-store-p to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (min-strd (when call-fortran? (consecutive-store-p to)))) (cond - ((and min-strd call-fortran?) + ((and call-fortran? min-strd) (let ((num-array (,(getf opt :store-allocator) 1))) (declare (type ,(linear-array-type (getf opt :store-type)) num-array)) (let-typed ((id (,(getf opt :coercer) 1) :type ,(getf opt :element-type))) diff --git a/src/level-1/copy.lisp b/src/level-1/copy.lisp index a9ee5ee..96f2786 100644 --- a/src/level-1/copy.lisp +++ b/src/level-1/copy.lisp @@ -36,8 +36,8 @@ (assert opt nil 'tensor-cannot-find-optimization :tensor-class tensor-class) `(definline ,func (from to) (declare (type ,tensor-class from to)) - (let ((strd-p (blas-copyable-p from to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (strd-p (when call-fortran? (blas-copyable-p from to)))) (cond ((and strd-p call-fortran?) (,blas-func (number-of-elements from) @@ -69,10 +69,10 @@ `(definline ,func (num-from to) (declare (type ,tensor-class to) (type ,(getf opt :element-type) num-from)) - (let ((min-stride (consecutive-store-p to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (min-stride (when call-fortran? (consecutive-store-p to)))) (cond - ((and min-stride call-fortran?) + ((and call-fortran? min-stride) (let ((num-array (,(getf opt :store-allocator) 1))) (declare (type ,(linear-array-type (getf opt :store-type)) num-array)) ,(funcall (getf opt :value-writer) 'num-from 'num-array 0) diff --git a/src/level-1/scal.lisp b/src/level-1/scal.lisp index 2a573d7..0aea8b7 100644 --- a/src/level-1/scal.lisp +++ b/src/level-1/scal.lisp @@ -33,8 +33,8 @@ (assert opt nil 'tensor-cannot-find-optimization :tensor-class tensor-class) `(definline ,func (from to) (declare (type ,tensor-class from to)) - (let ((strd-p (blas-copyable-p from to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (strd-p (when call-fortran? (blas-copyable-p from to)))) (cond ((and strd-p call-fortran?) (,fortran-func (number-of-elements from) @@ -62,10 +62,10 @@ `(definline ,func (alpha to) (declare (type ,tensor-class to) (type ,(getf opt :element-type) alpha)) - (let ((min-stride (consecutive-store-p to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (min-stride (when call-fortran? (consecutive-store-p to)))) (cond - ((and min-stride call-fortran?) + ((and call-fortran? min-stride) (,blas-func (number-of-elements to) alpha (store to) min-stride (head to))) (t (let ((t-sto (store to))) @@ -83,8 +83,8 @@ (assert opt nil 'tensor-cannot-find-optimization :tensor-class tensor-class) `(definline ,func (from to) (declare (type ,tensor-class from to)) - (let ((strd-p (blas-copyable-p from to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (strd-p (when call-fortran? (blas-copyable-p from to)))) (cond ((and strd-p call-fortran?) (,fortran-func (number-of-elements from) @@ -112,10 +112,10 @@ `(definline ,func (alpha to) (declare (type ,tensor-class to) (type ,(getf opt :element-type) alpha)) - (let ((min-stride (consecutive-store-p to)) - (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements to) ,fortran-lb)) + (min-stride (when call-fortran? (consecutive-store-p to)))) (cond - ((and min-stride call-fortran?) + ((and call-fortran? min-stride) (let ((num-array (,(getf opt :store-allocator) 1))) (declare (type ,(linear-array-type (getf opt :store-type)) num-array)) (let-typed ((id (,(getf opt :coercer) 1) :type ,(getf opt :element-type))) diff --git a/src/level-1/swap.lisp b/src/level-1/swap.lisp index 8c4a790..f2eb094 100644 --- a/src/level-1/swap.lisp +++ b/src/level-1/swap.lisp @@ -36,8 +36,8 @@ (assert opt nil 'tensor-cannot-find-optimization :tensor-class tensor-class) `(definline ,func (x y) (declare (type ,tensor-class x y)) - (let ((strd-p (blas-copyable-p x y)) - (call-fortran? (> (number-of-elements x) ,fortran-lb))) + (let* ((call-fortran? (> (number-of-elements x) ,fortran-lb)) + (strd-p (when call-fortran? (blas-copyable-p x y)))) (cond ((and strd-p call-fortran?) (,blas-func (number-of-elements x) (store x) (first strd-p) (store y) (second strd-p) (head x) (head y))) diff --git a/src/level-2/gemv.lisp b/src/level-2/gemv.lisp index 8891d33..c483454 100644 --- a/src/level-2/gemv.lisp +++ b/src/level-2/gemv.lisp @@ -15,49 +15,49 @@ (type ,vector-class x y) (type symbol job)) (mlet* - (((maj-A ld-A fop-A) (blas-matrix-compatible-p A job) :type (symbol index-type (string 1)))) - (let ((call-fortran? (> (max (nrows A) (ncols A)) ,fortran-call-lb))) - (cond - ((and maj-a call-fortran?) - (let-typed ((nr-A (nrows A) :type index-type) - (nc-A (ncols A) :type index-type)) - (when (eq maj-A :row-major) - (rotatef nr-A nc-A)) - (,blas-gemv-func fop-a nr-A nc-A - alpha (store A) ld-A - (store x) (aref (strides x) 0) - beta - (store y) (aref (strides y) 0) - (head A) (head x) (head y)))) - (t - (let-typed ((nr-A (nrows A) :type index-type) - (nc-A (ncols A) :type index-type) - (rs-A (row-stride A) :type index-type) - (cs-A (col-stride A) :type index-type) - (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + ((call-fortran? (> (max (nrows A) (ncols A)) ,fortran-call-lb)) + ((maj-A ld-A fop-A) (if call-fortran? (blas-matrix-compatible-p A job) (values nil 0 "?")) :type (symbol index-type (string 1)))) + (cond + ((and maj-a call-fortran?) + (let-typed ((nr-A (nrows A) :type index-type) + (nc-A (ncols A) :type index-type)) + (when (eq maj-A :row-major) + (rotatef nr-A nc-A)) + (,blas-gemv-func fop-a nr-A nc-A + alpha (store A) ld-A + (store x) (aref (strides x) 0) + beta + (store y) (aref (strides y) 0) + (head A) (head x) (head y)))) + (t + (let-typed ((nr-A (nrows A) :type index-type) + (nc-A (ncols A) :type index-type) + (rs-A (row-stride A) :type index-type) + (cs-A (col-stride A) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) ; - (stp-x (aref (strides x) 0) :type index-type) - (sto-x (store x) :type ,(linear-array-type (getf opt :store-type))) - (hd-x (head x) :type index-type) + (stp-x (aref (strides x) 0) :type index-type) + (sto-x (store x) :type ,(linear-array-type (getf opt :store-type))) + (hd-x (head x) :type index-type) ; - (stp-y (aref (strides y) 0) :type index-type) - (sto-y (store y) :type ,(linear-array-type (getf opt :store-type)))) - (when (eq job :t) - (rotatef nr-A nc-A) - (rotatef rs-A cs-A)) - (very-quickly - (loop repeat nr-A - for of-y of-type index-type = (head y) then (+ of-y stp-y) - for rof-A of-type index-type = (head A) then (+ rof-A rs-A) - do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-y 'of-y)) :type ,(getf opt :element-type))) - (loop repeat nc-A - for of-x of-type index-type = hd-x then (+ of-x stp-x) - for of-A of-type index-type = rof-A then (+ of-A cs-A) - summing (* ,(funcall (getf opt :reader) 'sto-x 'of-x) - ,(funcall (getf opt :reader) 'sto-A 'of-A)) into dotp of-type ,(getf opt :element-type) - finally ,(funcall (getf opt :value-writer) - `(+ (* alpha dotp) val) 'sto-y 'of-y)))))))))) - y))) + (stp-y (aref (strides y) 0) :type index-type) + (sto-y (store y) :type ,(linear-array-type (getf opt :store-type)))) + (when (eq job :t) + (rotatef nr-A nc-A) + (rotatef rs-A cs-A)) + (very-quickly + (loop repeat nr-A + for of-y of-type index-type = (head y) then (+ of-y stp-y) + for rof-A of-type index-type = (head A) then (+ rof-A rs-A) + do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-y 'of-y)) :type ,(getf opt :element-type))) + (loop repeat nc-A + for of-x of-type index-type = hd-x then (+ of-x stp-x) + for of-A of-type index-type = rof-A then (+ of-A cs-A) + summing (* ,(funcall (getf opt :reader) 'sto-x 'of-x) + ,(funcall (getf opt :reader) 'sto-A 'of-A)) into dotp of-type ,(getf opt :element-type) + finally ,(funcall (getf opt :value-writer) + `(+ (* alpha dotp) val) 'sto-y 'of-y))))))))) + y))) ;;Real (generate-typed-gemv! real-base-typed-gemv! diff --git a/src/level-3/gemm.lisp b/src/level-3/gemm.lisp index 386c193..e6ba5ab 100644 --- a/src/level-3/gemm.lisp +++ b/src/level-3/gemm.lisp @@ -41,115 +41,115 @@ (:tn (values :t :n)) (:tt (values :t :t))) :type (symbol symbol)) - ((maj-A ld-A fop-A) (blas-matrix-compatible-p A job-A) :type (symbol index-type (string 1))) - ((maj-B ld-B fop-B) (blas-matrix-compatible-p B job-B) :type (symbol index-type (string 1))) - ((maj-C ld-C fop-C) (blas-matrix-compatible-p C :n) :type (symbol index-type nil))) - (let ((call-fortran? (> (max (nrows C) (ncols C) (if (eq job-A :n) (ncols A) (nrows A))) - ,fortran-lb-parameter))) - (cond - ((and maj-A maj-B maj-C call-fortran?) - (let-typed ((nr-C (nrows C) :type index-type) - (nc-C (ncols C) :type index-type) - (dotl (ecase job-A (:n (ncols A)) (:t (nrows A))) :type index-type)) - (when (eq maj-C :row-major) - (rotatef A B) - (rotatef ld-A ld-B) - (rotatef maj-A maj-B) - (rotatef nr-C nc-C) - (setf (values fop-A fop-B) - (values (fortran-snop fop-B) (fortran-snop fop-A)))) - (,blas-gemm-func fop-A fop-B nr-C nc-C dotl - alpha (store A) ld-A (store B) ld-B - beta (store C) ld-C - (head A) (head B) (head C)))) - ((and maj-A call-fortran?) - (let-typed ((nc-C (ncols C) :type index-type) - (strd-C (col-stride C) :type index-type) - (stp-C (row-stride C) :type index-type) - (sto-C (store C) :type ,(linear-array-type (getf opt :store-type))) - ; - (nr-A (nrows A) :type index-type) - (nc-A (ncols A) :type index-type) - (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) - (hd-A (head A) :type index-type) - ; - (stp-B (if (eq job-B :n) (row-stride B) (col-stride B)) :type index-type) - (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) - (strd-B (if (eq job-B :n) (col-stride B) (row-stride B)) :type index-type)) - (when (eq maj-A :row-major) - (rotatef nr-A nc-A)) - (very-quickly - (loop repeat nc-C - for of-B of-type index-type = (head B) then (+ of-B strd-B) - for of-C of-type index-type = (head C) then (+ of-C strd-C) - do (,blas-gemv-func fop-A nr-A nc-A - alpha sto-A ld-A - sto-B stp-B - beta sto-C stp-C - hd-A of-B of-C))))) - ((and maj-B call-fortran?) - (let-typed ((nr-C (nrows C) :type index-type) - (stp-C (col-stride C) :type index-type) - (strd-C (row-stride C) :type index-type) - (sto-C (store c) :type ,(linear-array-type (getf opt :store-type))) - ; - (stp-A (if (eq job-A :n) (col-stride A) (row-stride A)) :type index-type) - (strd-A (if (eq job-A :n) (row-stride A) (col-stride A)) :type index-type) - (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) - ; - (nr-B (nrows B) :type index-type) - (nc-B (ncols B) :type index-type) - (hd-B (head B) :type index-type) - (fop-B (fortran-snop fop-B) :type (string 1)) - (sto-B (store B) :type ,(linear-array-type (getf opt :store-type)))) - (when (eq maj-B :row-major) - (rotatef nr-B nc-B)) - (very-quickly - (loop repeat nr-C - for of-A of-type index-type = (head A) then (+ of-A strd-A) - for of-C of-type index-type = (head C) then (+ of-C strd-C) - do (,blas-gemv-func fop-B nr-B nc-B - alpha sto-B ld-B - sto-A stp-A - beta sto-C stp-C - hd-B of-A of-C))))) - (t - (let-typed ((nr-C (nrows C) :type index-type) - (nc-C (ncols C) :type index-type) - (dotl (ecase job-A (:n (ncols A)) (:t (nrows A))) :type index-type) - ; - (rstp-A (row-stride A) :type index-type) - (cstp-A (col-stride A) :type index-type) - (hd-A (head A) :type index-type) - (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) - ; - (rstp-B (row-stride B) :type index-type) - (cstp-B (col-stride B) :type index-type) - (hd-B (head B) :type index-type) - (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) - ; - (rstp-C (row-stride C) :type index-type) - (cstp-C (col-stride C) :type index-type) - (hd-C (head C) :type index-type) - (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) - (when (eq job-A :t) - (rotatef rstp-A cstp-A)) - (when (eq job-B :t) - (rotatef rstp-B cstp-B)) - (very-quickly - (loop repeat nr-C - for rof-A of-type index-type = hd-A then (+ rof-A rstp-A) - for rof-C of-type index-type = hd-C then (+ rof-C rstp-C) - do (loop repeat nc-C - for cof-B of-type index-type = hd-B then (+ cof-B cstp-B) - for of-C of-type index-type = rof-C then (+ of-C cstp-C) - do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-C 'of-C)) :type ,(getf opt :element-type))) - (loop repeat dotl - for of-A of-type index-type = rof-A then (+ of-A cstp-A) - for of-B of-type index-type = cof-B then (+ of-B rstp-B) - summing (* ,(funcall (getf opt :reader) 'sto-A 'of-A) - ,(funcall (getf opt :reader) 'sto-B 'of-B)) into sum of-type ,(getf opt :element-type) - finally ,(funcall (getf opt :value-writer) '(+ (* alpha sum) val) 'sto-C 'of-C))))))))))) + (call-fortran? (> (max (nrows C) (ncols C) (if (eq job-A :n) (ncols A) (nrows A))) + ,fortran-lb-parameter)) + ((maj-A ld-A fop-A) (if call-fortran? (blas-matrix-compatible-p A job-A) (values nil 0 "?")) :type (symbol index-type (string 1))) + ((maj-B ld-B fop-B) (if call-fortran? (blas-matrix-compatible-p B job-B) (values nil 0 "?")) :type (symbol index-type (string 1))) + ((maj-C ld-C fop-C) (if call-fortran? (blas-matrix-compatible-p C :n) (values nil 0 "?")) :type (symbol index-type nil))) + (cond + ((and call-fortran? maj-A maj-B maj-C) + (let-typed ((nr-C (nrows C) :type index-type) + (nc-C (ncols C) :type index-type) + (dotl (ecase job-A (:n (ncols A)) (:t (nrows A))) :type index-type)) + (when (eq maj-C :row-major) + (rotatef A B) + (rotatef ld-A ld-B) + (rotatef maj-A maj-B) + (rotatef nr-C nc-C) + (setf (values fop-A fop-B) + (values (fortran-snop fop-B) (fortran-snop fop-A)))) + (,blas-gemm-func fop-A fop-B nr-C nc-C dotl + alpha (store A) ld-A (store B) ld-B + beta (store C) ld-C + (head A) (head B) (head C)))) + ((and call-fortran? maj-A) + (let-typed ((nc-C (ncols C) :type index-type) + (strd-C (col-stride C) :type index-type) + (stp-C (row-stride C) :type index-type) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type))) + ; + (nr-A (nrows A) :type index-type) + (nc-A (ncols A) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (hd-A (head A) :type index-type) + ; + (stp-B (if (eq job-B :n) (row-stride B) (col-stride B)) :type index-type) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (strd-B (if (eq job-B :n) (col-stride B) (row-stride B)) :type index-type)) + (when (eq maj-A :row-major) + (rotatef nr-A nc-A)) + (very-quickly + (loop repeat nc-C + for of-B of-type index-type = (head B) then (+ of-B strd-B) + for of-C of-type index-type = (head C) then (+ of-C strd-C) + do (,blas-gemv-func fop-A nr-A nc-A + alpha sto-A ld-A + sto-B stp-B + beta sto-C stp-C + hd-A of-B of-C))))) + ((and call-fortran? maj-B) + (let-typed ((nr-C (nrows C) :type index-type) + (stp-C (col-stride C) :type index-type) + (strd-C (row-stride C) :type index-type) + (sto-C (store c) :type ,(linear-array-type (getf opt :store-type))) + ; + (stp-A (if (eq job-A :n) (col-stride A) (row-stride A)) :type index-type) + (strd-A (if (eq job-A :n) (row-stride A) (col-stride A)) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + ; + (nr-B (nrows B) :type index-type) + (nc-B (ncols B) :type index-type) + (hd-B (head B) :type index-type) + (fop-B (fortran-snop fop-B) :type (string 1)) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type)))) + (when (eq maj-B :row-major) + (rotatef nr-B nc-B)) + (very-quickly + (loop repeat nr-C + for of-A of-type index-type = (head A) then (+ of-A strd-A) + for of-C of-type index-type = (head C) then (+ of-C strd-C) + do (,blas-gemv-func fop-B nr-B nc-B + alpha sto-B ld-B + sto-A stp-A + beta sto-C stp-C + hd-B of-A of-C))))) + (t + (let-typed ((nr-C (nrows C) :type index-type) + (nc-C (ncols C) :type index-type) + (dotl (ecase job-A (:n (ncols A)) (:t (nrows A))) :type index-type) + ; + (rstp-A (row-stride A) :type index-type) + (cstp-A (col-stride A) :type index-type) + (hd-A (head A) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + ; + (rstp-B (row-stride B) :type index-type) + (cstp-B (col-stride B) :type index-type) + (hd-B (head B) :type index-type) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + ; + (rstp-C (row-stride C) :type index-type) + (cstp-C (col-stride C) :type index-type) + (hd-C (head C) :type index-type) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) + (when (eq job-A :t) + (rotatef rstp-A cstp-A)) + (when (eq job-B :t) + (rotatef rstp-B cstp-B)) + (very-quickly + (loop repeat nr-C + for rof-A of-type index-type = hd-A then (+ rof-A rstp-A) + for rof-C of-type index-type = hd-C then (+ rof-C rstp-C) + do (loop repeat nc-C + for cof-B of-type index-type = hd-B then (+ cof-B cstp-B) + for of-C of-type index-type = rof-C then (+ of-C cstp-C) + do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-C 'of-C)) :type ,(getf opt :element-type))) + (loop repeat dotl + for of-A of-type index-type = rof-A then (+ of-A cstp-A) + for of-B of-type index-type = cof-B then (+ of-B rstp-B) + summing (* ,(funcall (getf opt :reader) 'sto-A 'of-A) + ,(funcall (getf opt :reader) 'sto-B 'of-B)) into sum of-type ,(getf opt :element-type) + finally ,(funcall (getf opt :value-writer) '(+ (* alpha sum) val) 'sto-C 'of-C)))))))))) C))) ;;Real ----------------------------------------------------------------------- Summary of changes: src/base/blas-helpers.lisp | 8 +- src/base/permutation.lisp | 147 +++++++++++++----------------- src/level-1/axpy.lisp | 14 ++-- src/level-1/copy.lisp | 10 +- src/level-1/scal.lisp | 20 ++-- src/level-1/swap.lisp | 4 +- src/level-2/gemv.lisp | 82 ++++++++-------- src/level-3/gemm.lisp | 218 ++++++++++++++++++++++---------------------- 8 files changed, 244 insertions(+), 259 deletions(-) hooks/post-receive -- matlisp |