From: Akshay S. <ak...@us...> - 2013-01-27 02:41:08
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "matlisp". The branch, tensor has been updated via 8230a46f93849cdb60ce09173ab31687f998d07d (commit) from a9eca9b0cc4287b81b325360972175771d7f6c71 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 8230a46f93849cdb60ce09173ab31687f998d07d Author: Akshay Srinivasan <aks...@gm...> Date: Sat Jan 26 18:37:12 2013 -0800 o Added more loops to GEMM. o Fixed bugs in :h :c handling inside complex-typed-gemm! diff --git a/src/level-1/trans.lisp b/src/level-1/trans.lisp index b1c0e00..2f7ab5b 100644 --- a/src/level-1/trans.lisp +++ b/src/level-1/trans.lisp @@ -81,9 +81,10 @@ (copy! value (TRANSPOSE~ tensor permutation))" (declare (type standard-tensor A)) (let ((displaced (make-instance (class-of A) :store (store A) - :dimensions (copy-seq (dimensions A)) - :strides (copy-seq (strides A)) - :parent-tensor A))) + :store-size (store-size A) + :dimensions (copy-seq (dimensions A)) + :strides (copy-seq (strides A)) + :parent-tensor A))) (transpose! displaced permutation))) (definline (setf transpose~) (value A &optional permutation) @@ -139,7 +140,7 @@ (etypecase A (real-tensor A) (complex-tensor - (scal! -1 (tensor-imagpart~ A)) + (real-typed-num-scal! -1d0 (tensor-imagpart~ A)) A) (number (conjugate A)))) diff --git a/src/level-3/gemm.lisp b/src/level-3/gemm.lisp index 48831ee..9ed038c 100644 --- a/src/level-3/gemm.lisp +++ b/src/level-3/gemm.lisp @@ -52,18 +52,15 @@ (rstp-A (row-stride A) :type index-type) (cstp-A (col-stride A) :type index-type) (hd-A (head A) :type index-type) - (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) ; (rstp-B (row-stride B) :type index-type) (cstp-B (col-stride B) :type index-type) (hd-B (head B) :type index-type) - (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) ; (rstp-C (row-stride C) :type index-type) (cstp-C (col-stride C) :type index-type) - (hd-C (head C) :type index-type) - (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) - ;;Replace with separate loops to maximize Row-ordered MM performance + (hd-C (head C) :type index-type)) + ;;Replace with separate loops to maximize Row-ordered MM performance (when (eq job-A :t) (rotatef rstp-A cstp-A)) (when (eq job-B :t) @@ -71,14 +68,18 @@ ;; (unless (,(getf opt :f=) beta (,(getf opt :fid*))) (,(getf opt :num-scal) beta C)) - ;; + ;;Most of these loop orderings are borrowed from the Fortran reference + ;;implementation of BLAS. (cond - ((and (= cstp-C 1) (= cstp-B 1) nil) + ((and (= cstp-C 1) (= cstp-B 1)) (let-typed ((of-A hd-A :type index-type) (of-B hd-B :type index-type) (of-C hd-C :type index-type) (d.rstp-B (- rstp-B nc-C) :type index-type) - (d.rstp-A (- rstp-A (* cstp-A dotl)) :type index-type)) + (d.rstp-A (- rstp-A (* cstp-A dotl)) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) (very-quickly (loop :repeat nr-C :do (progn @@ -97,13 +98,99 @@ (incf of-C rstp-C) (incf of-A d.rstp-A) (setf of-B hd-B)))))) + ((and (= cstp-A 1) (= rstp-B 1)) + (let-typed ((of-A hd-A :type index-type) + (of-B hd-B :type index-type) + (of-C hd-C :type index-type) + (d.cstp-B (- cstp-B dotl) :type index-type) + (d.rstp-C (- rstp-C (* nc-C cstp-C)) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type))) + (dot (,(getf opt :fid+)) :type ,(getf opt :element-type))) + (very-quickly + (loop :repeat nr-C + :do (progn + (loop :repeat nc-C + :do (progn + (setf dot (,(getf opt :fid+))) + (loop :repeat dotl + :do (progn + (setf dot (,(getf opt :f+) dot (,(getf opt :f*) (,(getf opt :reader) sto-A of-A) (,(getf opt :reader) sto-B of-B)))) + (incf of-A) + (incf of-B))) + (,(getf opt :value-incfer) dot sto-C of-C) + (incf of-C cstp-C) + (decf of-A dotl) + (incf of-B d.cstp-B))) + (incf of-C d.rstp-C) + (incf of-A rstp-A) + (setf of-B hd-B)))))) + ((and (= cstp-A 1) (= rstp-B 1)) + (let-typed ((of-A hd-A :type index-type) + (of-B hd-B :type index-type) + (of-C hd-C :type index-type) + (d.cstp-B (- cstp-B dotl) :type index-type) + (d.rstp-C (- rstp-C (* nc-C cstp-C)) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type))) + (dot (,(getf opt :fid+)) :type ,(getf opt :element-type))) + (very-quickly + (loop :repeat nr-C + :do (progn + (loop :repeat nc-C + :do (progn + (setf dot (,(getf opt :fid+))) + (loop :repeat dotl + :do (progn + (setf dot (,(getf opt :f+) dot (,(getf opt :f*) (,(getf opt :reader) sto-A of-A) (,(getf opt :reader) sto-B of-B)))) + (incf of-A) + (incf of-B))) + (,(getf opt :value-incfer) dot sto-C of-C) + (incf of-C cstp-C) + (decf of-A dotl) + (incf of-B d.cstp-B))) + (incf of-C d.rstp-C) + (incf of-A rstp-A) + (setf of-B hd-B)))))) + ((and (= rstp-A 1) (= rstp-C 1)) + (let-typed ((of-A hd-A :type index-type) + (of-B hd-B :type index-type) + (of-C hd-C :type index-type) + (d.cstp-B (- cstp-B (* rstp-B dotl)) :type index-type) + (d.cstp-A (- cstp-A nr-C) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) + (very-quickly + (loop :repeat nc-C + :do (progn + (loop :repeat dotl + :do (let-typed + ((ele-B (,(getf opt :f*) alpha (,(getf opt :reader) sto-B of-B)) :type ,(getf opt :element-type))) + (loop :repeat nr-C + :do (progn + (,(getf opt :value-incfer) (,(getf opt :f*) ele-B (,(getf opt :reader) sto-A of-A)) + sto-C of-C) + (incf of-C) + (incf of-A))) + (decf of-C nr-C) + (incf of-A d.cstp-A) + (incf of-B rstp-B))) + (incf of-C cstp-C) + (setf of-A hd-A) + (incf of-B d.cstp-B)))))) (t (let-typed ((of-A hd-A :type index-type) (of-B hd-B :type index-type) (of-C hd-C :type index-type) (r.cstp-C (* cstp-C nc-C) :type index-type) (d.rstp-B (- rstp-B (* cstp-B nc-C)) :type index-type) - (d.rstp-A (- rstp-A (* cstp-A dotl)) :type index-type)) + (d.rstp-A (- rstp-A (* cstp-A dotl)) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) (very-quickly (loop :repeat nr-C :do (progn @@ -193,18 +280,14 @@ (complex-base-typed-gemm! alpha A B beta C job) (let ((A (ecase job-A ((:n :t) A) - ((:h :c) - ;;BUG! - ;;Multiplication does not yield the complex conjugate! - (let ((ret (apply #'make-complex-tensor (lvec->list (dimensions A))))) - (complex-typed-axpy! #c(-1d0 0d0) A ret))))) + ((:h :c) (let ((ret (complex-typed-copy! A (complex-typed-zeros (dimensions A))))) + (real-typed-num-scal! -1d0 (tensor-imagpart~ ret)) + ret)))) (B (ecase job-B ((:n :t) B) - ((:h :c) - ;;BUG! - ;;Multiplication does not yield the complex conjugate! - (let ((ret (apply #'make-complex-tensor (lvec->list (dimensions B))))) - (complex-typed-axpy! #c(-1d0 0d0) B ret))))) + ((:h :c) (let ((ret (complex-typed-copy! A (complex-typed-zeros (dimensions A))))) + (real-typed-num-scal! -1d0 (tensor-imagpart~ ret)) + ret)))) (tjob (combine-jobs (ecase job-A ((:n :t) job-A) (:h :t) (:c :n)) (ecase job-B ((:n :t) job-B) (:h :t) (:c :n))))) (complex-base-typed-gemm! alpha A B ----------------------------------------------------------------------- Summary of changes: src/level-1/trans.lisp | 9 ++-- src/level-3/gemm.lisp | 121 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 107 insertions(+), 23 deletions(-) hooks/post-receive -- matlisp |