From: Akshay S. <ak...@us...> - 2012-07-12 07:55:05
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "matlisp". The branch, tensor has been updated via 63d6b10a662cb7b8ad0b3dfd288db7a5f921abff (commit) from c30fe6989eac02b31688733a8268ba0f4cc04891 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 63d6b10a662cb7b8ad0b3dfd288db7a5f921abff Author: Akshay Srinivasan <aks...@gm...> Date: Thu Jul 12 13:18:06 2012 +0530 o More tweaks to matrix-multiplication. Native lisp GEMM is now just about 30% slower than C on SBCL. o Removed axpy, ddot versions from gemv.lisp. Lisp(SBCL) version is sufficiently fast as it is anyway. diff --git a/matlisp.asd b/matlisp.asd index 26df9c3..6858f31 100644 --- a/matlisp.asd +++ b/matlisp.asd @@ -121,7 +121,11 @@ (:module "matlisp-level-2" :pathname "level-2" :depends-on ("matlisp-base" "matlisp-classes" "foreign-core" "matlisp-level-1") - :components ((:file "gemv"))))) + :components ((:file "gemv"))) + (:module "matlisp-level-3" + :pathname "level-3" + :depends-on ("matlisp-base" "matlisp-classes" "foreign-core" "matlisp-level-1") + :components ((:file "gemm"))))) ;; (defclass f2cl-cl-source-file (asdf:cl-source-file) diff --git a/src/classes/matrix.lisp b/src/classes/matrix.lisp index df19960..4ad783b 100644 --- a/src/classes/matrix.lisp +++ b/src/classes/matrix.lisp @@ -54,7 +54,7 @@ Purpose ======= Return T if X is either a row or a column matrix." - (or (row-vector-p matrix) (col-vector-p matrix))) + (or (row-matrix-p matrix) (col-matrix-p matrix))) (definline square-matrix-p (matrix) (and (square-p matrix) (matrix-p matrix))) diff --git a/src/level-2/gemv.lisp b/src/level-2/gemv.lisp index 392461c..c67af94 100644 --- a/src/level-2/gemv.lisp +++ b/src/level-2/gemv.lisp @@ -2,7 +2,8 @@ (defmacro generate-typed-gemv! (func (matrix-class vector-class) - (blas-gemv-func blas-axpy-func blas-dot-func blas-scal-func)) + blas-gemv-func + fortran-call-lb) ;;Be very careful when using functions generated by this macro. ;;Indexes can be tricky and this has no safety net. ;;Use only after checking the arguments for compatibility. @@ -14,71 +15,75 @@ (type ,vector-class x y) (type symbol job)) (mlet* - (((maj-a ld-a fop-a) (blas-matrix-compatible-p A job) :type (symbol index-type (string 1)))) - (if maj-a - (let ((nr-a (aref (dimensions A) 0)) - (nc-a (aref (dimensions A) 1))) - (declare (type index-type nr-a nc-a)) - (when (eq maj-a :row-major) - (rotatef nr-a nc-a)) - (,blas-gemv-func fop-a nr-a nc-a - alpha (store a) ld-a - (store x) (aref (strides x) 0) - beta - (store y) (aref (strides y) 0) - (head A) (head x) (head y))) - (let ((nr-a (aref (dimensions A) 0)) - (nc-a (aref (dimensions A) 1)) - (rs-a (aref (strides A) 0)) - (cs-a (aref (strides A) 1))) - (declare (type index-type nr-a nc-a rs-a cs-a)) - (when (eq job :t) - (rotatef nr-a nc-a) - (rotatef rs-a cs-a)) - (let ((sto-a (store a)) - (sto-x (store x)) - (std-x (aref (strides x) 0)) - (hd-x (head x)) - (sto-y (store y)) - (std-y (aref (strides y) 0)) - (hd-y (head y))) - (declare (type ,(linear-array-type (getf opt :store-type)) sto-a sto-x sto-y) - (type index-type std-y std-x hd-x hd-y)) - (if (> nr-a nc-a) - (progn - (unless (= beta 1d0) - (,blas-scal-func nr-a beta - sto-y std-y hd-y)) - (very-quickly - (mod-dotimes (idx (idxv nc-a)) - with (linear-sums - (of-x (strides x) (head x)) - (of-a (idxv cs-a) (head A))) - do (,blas-axpy-func nr-a (* alpha ,(funcall (getf opt :reader) 'sto-x 'of-x)) - sto-a rs-a sto-y std-y - of-a hd-y)))) - (very-quickly - (mod-dotimes (idx (idxv nr-a)) - with (linear-sums - (of-y (strides y) (head y)) - (of-a (idxv rs-a) (head A))) - do (let ((val (* beta ,(funcall (getf opt :reader) 'sto-y 'of-y))) - (dotp (,blas-dot-func nc-a - sto-a cs-a sto-x std-x - of-a hd-x))) - (declare (type ,(getf opt :element-type) val dotp)) - ,(funcall (getf opt :value-writer) - `(+ val (* alpha dotp)) 'sto-y 'of-y))))))))) + (((maj-A ld-A fop-A) (blas-matrix-compatible-p A job) :type (symbol index-type (string 1)))) + (let ((call-fortran? (> (max (nrows A) (ncols A)) ,fortran-call-lb))) + (cond + ((and maj-a call-fortran?) + (let-typed ((nr-A (nrows A) :type index-type) + (nc-A (ncols A) :type index-type)) + (when (eq maj-A :row-major) + (rotatef nr-A nc-A)) + (,blas-gemv-func fop-a nr-A nc-A + alpha (store A) ld-A + (store x) (aref (strides x) 0) + beta + (store y) (aref (strides y) 0) + (head A) (head x) (head y)))) + (t + (let-typed ((nr-A (nrows A) :type index-type) + (nc-A (ncols A) :type index-type) + (rs-A (row-stride A) :type index-type) + (cs-A (col-stride A) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + ; + (stp-x (aref (strides x) 0) :type index-type) + (sto-x (store x) :type ,(linear-array-type (getf opt :store-type))) + (hd-x (head x) :type index-type) + ; + (stp-y (aref (strides y) 0) :type index-type) + (sto-y (store y) :type ,(linear-array-type (getf opt :store-type)))) + (when (eq job :t) + (rotatef nr-A nc-A) + (rotatef rs-A cs-A)) + (very-quickly + (loop repeat nr-A + for of-y of-type index-type = (head y) then (+ of-y stp-y) + for rof-A of-type index-type = (head A) then (+ rof-A rs-A) + do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-y 'of-y)) :type ,(getf opt :element-type))) + (loop repeat nc-A + for of-x of-type index-type = hd-x then (+ of-x stp-x) + for of-A of-type index-type = rof-A then (+ of-A cs-A) + summing (* ,(funcall (getf opt :reader) 'sto-x 'of-x) + ,(funcall (getf opt :reader) 'sto-A 'of-A)) into dotp of-type ,(getf opt :element-type) + finally ,(funcall (getf opt :value-writer) + `(+ (* alpha dotp) val) 'sto-y 'of-y)))))))))) y))) +;;Tweakable +(defparameter *real-gemv-fortran-call-lower-bound* 1000 + " + If the maximum dimension in the MV is lower than this + parameter, then the lisp code is used by default, instead of + calling BLAS. Used to avoid the FFI overhead when calling + MM with small matrices. + Default set with SBCL on x86-64 linux. A reasonable value + is something between 800 and 2000.") (generate-typed-gemv! real-typed-gemv! - (real-matrix real-vector) - (dgemv daxpy ddot dscal)) - + (real-matrix real-vector) dgemv + *real-gemv-fortran-call-lower-bound*) + +;;Tweakable +(defparameter *complex-gemv-fortran-call-lower-bound* 600 + " + If the maximum dimension in the MV is lower than this + parameter, then the lisp code is used by default, instead of + calling BLAS. Used to avoid the FFI overhead when calling + MM with small matrices. + Default set with SBCL on x86-64 linux. A reasonable value + is something between 400 and 1000.") (generate-typed-gemv! complex-typed-gemv! - (complex-matrix complex-vector) - (zgemv zaxpy zdotu zscal)) - + (complex-matrix complex-vector) zgemv + *complex-gemv-fortran-call-lower-bound*) ;;---------------------------------------------------------------;; ;;Can't support "C" because the dual isn't supported by BLAS. diff --git a/src/level-3/gemm.lisp b/src/level-3/gemm.lisp index ae6fb35..d56732e 100644 --- a/src/level-3/gemm.lisp +++ b/src/level-3/gemm.lisp @@ -28,130 +28,153 @@ (in-package #:matlisp) -;;Tweakable -(defparameter *gemm-fortran-call-lower-bound* 50 - " - If the maximum dimension in the MM is lower than this - parameter, then the lisp code is used by default, instead of - calling BLAS. Used to avoid the FFI overhead when calling - MM with small matrices. - Default set with SBCL on x86-64 linux. A reasonable value - is something between 20 and 200.") - -(defmacro generate-typed-gemm! (func (matrix-class) (blas-gemm-func blas-gemv-func)) +(defmacro generate-typed-gemm! (func (matrix-class) (blas-gemm-func blas-gemv-func) fortran-lb-parameter) (let* ((opt (get-tensor-class-optimization matrix-class))) (assert opt nil 'tensor-cannot-find-optimization :tensor-class matrix-class) `(defun ,func (alpha A B beta C job) (declare (type ,(getf opt :element-type) alpha beta) (type ,matrix-class A B C) (type symbol job)) - (mlet* (((job-a job-b) (ecase job + (mlet* (((job-A job-B) (ecase job (:nn (values :n :n)) (:nt (values :n :t)) (:tn (values :t :n)) (:tt (values :t :t))) :type (symbol symbol)) - ((maj-a ld-a fop-a) (blas-matrix-compatible-p A job-a) :type (symbol index-type (string 1))) - ((maj-b ld-b fop-b) (blas-matrix-compatible-p B job-b) :type (symbol index-type (string 1))) - ((maj-c ld-c fop-c) (blas-matrix-compatible-p C :n) :type (symbol index-type nil))) - (let ((call-fortran? (> (max (nrows C) (ncols C) (if (eq job-a :n) (ncols A) (nrows A))) - *gemm-fortran-call-lower-bound*))) + ((maj-A ld-A fop-A) (blas-matrix-compatible-p A job-A) :type (symbol index-type (string 1))) + ((maj-B ld-B fop-B) (blas-matrix-compatible-p B job-B) :type (symbol index-type (string 1))) + ((maj-C ld-C fop-C) (blas-matrix-compatible-p C :n) :type (symbol index-type nil))) + (let ((call-fortran? (> (max (nrows C) (ncols C) (if (eq job-A :n) (ncols A) (nrows A))) + ,fortran-lb-parameter))) (cond - ((and maj-a maj-b maj-c call-fortran?) - (let-typed ((nr-c (nrows C) :type index-type) - (nc-c (ncols C) :type index-type) - (dotl (ecase job-a (:n (ncols A)) (:t (nrows A))) :type index-type)) - (when (eq maj-c :row-major) + ((and maj-A maj-B maj-C call-fortran?) + (let-typed ((nr-C (nrows C) :type index-type) + (nc-C (ncols C) :type index-type) + (dotl (ecase job-A (:n (ncols A)) (:t (nrows A))) :type index-type)) + (when (eq maj-C :row-major) (rotatef A B) - (rotatef ld-a ld-b) - (rotatef maj-a maj-b) - (rotatef nr-c nc-c) - (setf (values fop-a fop-b) - (values (fortran-snop fop-b) (fortran-snop fop-a)))) - (,blas-gemm-func fop-a fop-b nr-c nc-c dotl - alpha (store A) ld-a (store B) ld-b - beta (store C) ld-c + (rotatef ld-A ld-B) + (rotatef maj-A maj-B) + (rotatef nr-C nc-C) + (setf (values fop-A fop-B) + (values (fortran-snop fop-B) (fortran-snop fop-A)))) + (,blas-gemm-func fop-A fop-B nr-C nc-C dotl + alpha (store A) ld-A (store B) ld-B + beta (store C) ld-C (head A) (head B) (head C)))) - ((and maj-a call-fortran?) - (let-typed ((nc-c (ncols C) :type index-type) - (sto-c (store C) :type ,(linear-array-type (getf opt :store-type))) - (stp-c (row-stride C) :type index-type) - (nr-a (nrows A) :type index-type) - (nc-a (ncols A) :type index-type) - (sto-a (store A) :type ,(linear-array-type (getf opt :store-type))) - (hd-a (head A) :type index-type) - (stp-b (if (eq job-b :n) (row-stride B) (col-stride B)) :type index-type) - (sto-b (store B) :type ,(linear-array-type (getf opt :store-type))) - (strd-b (if (eq job-b :n) (col-stride B) (row-stride B)) :type index-type) - (strd-c (col-stride C) :type index-type)) - (when (eq maj-a :row-major) - (rotatef nr-a nc-a)) + ((and maj-A call-fortran?) + (let-typed ((nc-C (ncols C) :type index-type) + (strd-C (col-stride C) :type index-type) + (stp-C (row-stride C) :type index-type) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type))) + ; + (nr-A (nrows A) :type index-type) + (nc-A (ncols A) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + (hd-A (head A) :type index-type) + ; + (stp-B (if (eq job-B :n) (row-stride B) (col-stride B)) :type index-type) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + (strd-B (if (eq job-B :n) (col-stride B) (row-stride B)) :type index-type)) + (when (eq maj-A :row-major) + (rotatef nr-A nc-A)) (very-quickly - (mod-dotimes (idx (idxv nc-c)) - with (linear-sums - (of-b (idxv strd-b) (head B)) - (of-c (idxv strd-c) (head C))) - do (,blas-gemv-func fop-a nr-a nc-a - alpha sto-a ld-a - sto-b stp-b - beta sto-c stp-c - hd-a of-b of-c))))) - ((and maj-b call-fortran?) - (let-typed ((nr-c (nrows C) :type index-type) - (stp-c (col-stride C) :type index-type) - (sto-c (store c) :type ,(linear-array-type (getf opt :store-type))) - (stp-a (if (eq job-a :n) (col-stride A) (row-stride A)) :type index-type) - (sto-a (store A) :type ,(linear-array-type (getf opt :store-type))) - (nr-b (nrows B) :type index-type) - (nc-b (ncols B) :type index-type) - (hd-b (head B) :type index-type) - (fop-b (fortran-snop fop-b) :type (string 1)) - (sto-b (store B) :type ,(linear-array-type (getf opt :store-type))) - (strd-a (if (eq job-A :n) (row-stride A) (col-stride A)) :type index-type) - (strd-c (row-stride C) :type index-type)) - (when (eq maj-b :row-major) - (rotatef nr-b nc-b)) + (loop repeat nc-C + for of-B of-type index-type = (head B) then (+ of-B strd-B) + for of-C of-type index-type = (head C) then (+ of-C strd-C) + do (,blas-gemv-func fop-A nr-A nc-A + alpha sto-A ld-A + sto-B stp-B + beta sto-C stp-C + hd-A of-B of-C))))) + ((and maj-B call-fortran?) + (let-typed ((nr-C (nrows C) :type index-type) + (stp-C (col-stride C) :type index-type) + (strd-C (row-stride C) :type index-type) + (sto-C (store c) :type ,(linear-array-type (getf opt :store-type))) + ; + (stp-A (if (eq job-A :n) (col-stride A) (row-stride A)) :type index-type) + (strd-A (if (eq job-A :n) (row-stride A) (col-stride A)) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + ; + (nr-B (nrows B) :type index-type) + (nc-B (ncols B) :type index-type) + (hd-B (head B) :type index-type) + (fop-B (fortran-snop fop-B) :type (string 1)) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type)))) + (when (eq maj-B :row-major) + (rotatef nr-B nc-B)) (very-quickly - (mod-dotimes (idx (idxv nr-c)) - with (linear-sums - (of-A (idxv strd-a) (head A)) - (of-c (idxv strd-c) (head C))) - do (,blas-gemv-func fop-b nr-b nc-b - alpha sto-b ld-b - sto-a stp-a - beta sto-c stp-c - hd-b of-a of-c))))) + (loop repeat nr-C + for of-A of-type index-type = (head A) then (+ of-A strd-A) + for of-C of-type index-type = (head C) then (+ of-C strd-C) + do (,blas-gemv-func fop-B nr-B nc-B + alpha sto-B ld-B + sto-A stp-A + beta sto-C stp-C + hd-B of-A of-C))))) (t - (let-typed ((dotl (ecase job-a (:n (ncols A)) (:t (nrows A))) :type index-type) - (rstp-a (row-stride A) :type index-type) - (cstp-a (col-stride A) :type index-type) - (rstp-b (row-stride A) :type index-type) - (cstp-b (col-stride A) :type index-type) - (sto-a (store A) :type ,(linear-array-type (getf opt :store-type))) - (sto-b (store B) :type ,(linear-array-type (getf opt :store-type))) - (sto-c (store C) :type ,(linear-array-type (getf opt :store-type)))) - (when (eq job-a :t) - (rotatef rstp-a cstp-a)) - (when (eq job-b :t) - (rotatef rstp-b cstp-b)) + (let-typed ((nr-C (nrows C) :type index-type) + (nc-C (ncols C) :type index-type) + (dotl (ecase job-A (:n (ncols A)) (:t (nrows A))) :type index-type) + ; + (rstp-A (row-stride A) :type index-type) + (cstp-A (col-stride A) :type index-type) + (hd-A (head A) :type index-type) + (sto-A (store A) :type ,(linear-array-type (getf opt :store-type))) + ; + (rstp-B (row-stride B) :type index-type) + (cstp-B (col-stride B) :type index-type) + (hd-B (head B) :type index-type) + (sto-B (store B) :type ,(linear-array-type (getf opt :store-type))) + ; + (rstp-C (row-stride C) :type index-type) + (cstp-C (col-stride C) :type index-type) + (hd-C (head C) :type index-type) + (sto-C (store C) :type ,(linear-array-type (getf opt :store-type)))) + (when (eq job-A :t) + (rotatef rstp-A cstp-A)) + (when (eq job-B :t) + (rotatef rstp-B cstp-B)) (very-quickly - (mod-dotimes (idx (dimensions C)) - with (loop-order :row-major) - with (linear-sums - (of-a (idxv rstp-a 0) (head A)) - (of-b (idxv 0 cstp-b) (head B)) - (of-c (strides C) (head C))) - do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-c 'of-c)) :type ,(getf opt :element-type))) - (loop repeat dotl - for dof-a of-type index-type = of-a then (+ dof-a cstp-a) - for dof-b of-type index-type = of-b then (+ dof-b rstp-b) - summing (* ,(funcall (getf opt :reader) 'sto-a 'dof-a) - ,(funcall (getf opt :reader) 'sto-b 'dof-b)) into tmp of-type ,(getf opt :element-type) - finally ,(funcall (getf opt :value-writer) '(+ (* alpha tmp) val) 'sto-c 'of-c)))))))))) + (loop repeat nr-C + for rof-A of-type index-type = hd-A then (+ rof-A rstp-A) + for rof-C of-type index-type = hd-C then (+ rof-C rstp-C) + do (loop repeat nc-C + for cof-B of-type index-type = hd-B then (+ cof-B cstp-B) + for of-C of-type index-type = rof-C then (+ of-C cstp-C) + do (let-typed ((val (* beta ,(funcall (getf opt :reader) 'sto-C 'of-C)) :type ,(getf opt :element-type))) + (loop repeat dotl + for of-A of-type index-type = rof-A then (+ of-A cstp-A) + for of-B of-type index-type = cof-B then (+ of-B rstp-B) + summing (* ,(funcall (getf opt :reader) 'sto-A 'of-A) + ,(funcall (getf opt :reader) 'sto-B 'of-B)) into sum of-type ,(getf opt :element-type) + finally ,(funcall (getf opt :value-writer) '(+ (* alpha sum) val) 'sto-C 'of-C))))))))))) C))) -(generate-typed-gemm! real-typed-gemm! (real-matrix) (dgemm dgemv)) -(generate-typed-gemm! complex-typed-gemm! (complex-matrix) (zgemm zgemv)) +;;Tweakable +(defparameter *real-gemm-fortran-call-lower-bound* 100 + " + If the maximum dimension in the MM is lower than this + parameter, then the lisp code is used by default, instead of + calling BLAS. Used to avoid the FFI overhead when calling + MM with small matrices. + Default set with SBCL on x86-64 linux. A reasonable value + is something between 20 and 200.") +(generate-typed-gemm! real-typed-gemm! (real-matrix) (dgemm dgemv) + *real-gemm-fortran-call-lower-bound*) + +;;Tweakable +(defparameter *complex-gemm-fortran-call-lower-bound* 60 + " + If the maximum dimension in the MM is lower than this + parameter, then the lisp code is used by default, instead of + calling BLAS. Used to avoid the FFI overhead when calling + MM with small matrices. + Default set with SBCL on x86-64 linux. A reasonable value + is something between 20 and 200.") +(generate-typed-gemm! complex-typed-gemm! (complex-matrix) (zgemm zgemv) + *complex-gemm-fortran-call-lower-bound*) ;;---------------------------------------------------------------;; (defgeneric gemm! (alpha A B beta C &optional job) diff --git a/tests/loopy-tests.lisp b/tests/loopy-tests.lisp index de334ec..e17a747 100644 --- a/tests/loopy-tests.lisp +++ b/tests/loopy-tests.lisp @@ -39,27 +39,108 @@ (declare (type real-tensor t-a t-b t-c)) (let ((st-a (store t-a)) (st-b (store t-b)) - (st-c (store t-c))) - (declare (type (real-array *) st-a st-b st-c)) - (very-quickly - (mod-dotimes (idx (dimensions t-a)) - with (linear-sums - (of-a (strides t-a)) - (of-b (strides t-b)) - (of-c (strides t-c))) - do (setf (aref st-a of-a) (random 1d0) - (aref st-b of-b) (random 1d0) - (aref st-c of-c) 0d0))) - (time + (st-c (store t-c)) + (rstrd-a (row-stride t-a)) + (cstrd-a (col-stride t-a)) + (rstrd-b (row-stride t-b)) + (cstrd-b (col-stride t-b)) + (rstrd-c (row-stride t-c)) + (cstrd-c (col-stride t-c)) + (nr-c (nrows t-c)) + (nc-c (ncols t-c)) + (nc-a (ncols t-a)) + (hd-a (head t-a)) + (hd-b (head t-b)) + (hd-c (head t-c))) + (declare (type (real-array *) st-a st-b st-c) + (type index-type rstrd-a cstrd-a rstrd-b cstrd-b rstrd-c cstrd-c nr-c + nc-c nc-a hd-a hd-b hd-c)) + (mod-dotimes (idx (dimensions t-a)) + with (linear-sums + (of-a (strides t-a)) + (of-b (strides t-b)) + (of-c (strides t-c))) + do (setf (aref st-a of-a) (random 1d0) + (aref st-b of-b) (random 1d0) + (aref st-c of-c) 0d0)) + (time (very-quickly - (mod-dotimes (idx (idxv n n n)) + (loop repeat nr-c + for rof-a of-type index-type = hd-a then (+ rof-a rstrd-a) + for rof-c of-type index-type = hd-c then (+ rof-c rstrd-c) + do (loop repeat nc-c + for cof-b of-type index-type = hd-b then (+ cof-b cstrd-b) + for of-c of-type index-type = rof-c then (+ of-c cstrd-c) + do (loop repeat nc-a + for of-a of-type index-type = rof-a then (+ of-a cstrd-a) + for of-b of-type index-type = cof-b then (+ of-b rstrd-b) + summing (* (aref st-a of-a) (aref st-b of-b)) into sum of-type real-type + finally (setf (aref st-c of-c) sum)))) + #+nil(mod-dotimes (idx (dimensions t-c)) with (loop-order :row-major) with (linear-sums - (of-a (idxv n 1 0)) - (of-b (idxv 0 n 1)) - (of-c (idxv n 0 1))) + (of-a (idxv (row-stride t-a) 0) (head t-a)) + (of-b (idxv 0 (col-stride t-b)) (head t-b)) + (of-c (strides t-c) (head t-c))) do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b))))))))) + + +(defun test-mm-ddot (n) + (let ((t-a (make-real-tensor n n)) + (t-b (make-real-tensor n n)) + (t-c (make-real-tensor n n))) + (declare (type real-tensor t-a t-b t-c)) + (let ((st-a (store t-a)) + (st-b (store t-b)) + (st-c (store t-c)) + (rstrd-a (row-stride t-a)) + (cstrd-a (col-stride t-a)) + (rstrd-b (row-stride t-b)) + (cstrd-b (col-stride t-b)) + (rstrd-c (row-stride t-c)) + (cstrd-c (col-stride t-c)) + (nr-c (nrows t-c)) + (nc-c (ncols t-c)) + (nc-a (ncols t-a)) + (hd-a (head t-a)) + (hd-b (head t-b)) + (hd-c (head t-c))) + (declare (type (real-array *) st-a st-b st-c) + (type index-type rstrd-a cstrd-a rstrd-b cstrd-b rstrd-c cstrd-c nr-c + nc-c nc-a hd-a hd-b hd-c)) + (mod-dotimes (idx (dimensions t-a)) + with (linear-sums + (of-a (strides t-a)) + (of-b (strides t-b)) + (of-c (strides t-c))) + do (setf (aref st-a of-a) (random 1d0) + (aref st-b of-b) (random 1d0) + (aref st-c of-c) 0d0)) + (time + (very-quickly + (loop repeat nr-c + for rof-a of-type index-type = hd-a then (+ rof-a rstrd-a) + for rof-c of-type index-type = hd-c then (+ rof-c rstrd-c) + do (loop repeat nc-c + for cof-b of-type index-type = hd-b then (+ cof-b cstrd-b) + for of-c of-type index-type = rof-c then (+ of-c cstrd-c) + do (let ((dotp (ddot nc-a + st-a cstrd-a + st-b rstrd-b + rof-a cof-b))) + (declare (type real-type dotp)) + (setf (aref st-c of-c) dotp)))) + + #+nil(mod-dotimes (idx (dimensions t-c)) + with (loop-order :row-major) + with (linear-sums + (of-a (idxv (row-stride t-a) 0) (head t-a)) + (of-b (idxv 0 (col-stride t-b)) (head t-b)) + (of-c (strides t-c) (head t-c))) + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b))))))))) + + (defun test-mm-daxpy (n) (let* ((t-a (make-real-tensor n n)) (t-b (make-real-tensor n n)) ----------------------------------------------------------------------- Summary of changes: matlisp.asd | 6 +- src/classes/matrix.lisp | 2 +- src/level-2/gemv.lisp | 127 ++++++++++++++------------- src/level-3/gemm.lisp | 231 ++++++++++++++++++++++++++--------------------- tests/loopy-tests.lisp | 113 ++++++++++++++++++++---- 5 files changed, 296 insertions(+), 183 deletions(-) hooks/post-receive -- matlisp |