From: Akshay S. <ak...@us...> - 2012-07-13 13:48:36
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "matlisp". The branch, tensor has been updated via 2b87e86f1392efee853a1807d7c9299fee1f7958 (commit) from 04ac7f795b17225ad7f942b85bad9508a885ee1e (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 2b87e86f1392efee853a1807d7c9299fee1f7958 Author: Akshay Srinivasan <aks...@gm...> Date: Fri Jul 13 11:36:34 2012 +0530 o Added fortran-call-lower-bound parameters to scal, dot and swap. diff --git a/matlisp.asd b/matlisp.asd index 6858f31..f5cb98e 100644 --- a/matlisp.asd +++ b/matlisp.asd @@ -109,13 +109,14 @@ :depends-on ("matlisp-base" "matlisp-classes" "foreign-core") :components ((:file "tensor-maker") (:file "swap") - (:file "dot") (:file "copy" :depends-on ("tensor-maker")) (:file "scal" :depends-on ("copy" "tensor-maker")) (:file "realimag" :depends-on ("copy")) + (:file "dot" + :depends-on ("realimag")) (:file "axpy" :depends-on ("copy")))) (:module "matlisp-level-2" diff --git a/src/classes/complex-tensor.lisp b/src/classes/complex-tensor.lisp index 828af4b..7eca0b9 100644 --- a/src/classes/complex-tensor.lisp +++ b/src/classes/complex-tensor.lisp @@ -1,5 +1,6 @@ (in-package #:matlisp) +;;Complex-base-type must always equal real-type. (deftype complex-base-type () "The type of the elements stored in a COMPLEX-MATRIX" 'double-float) diff --git a/src/level-1/copy.lisp b/src/level-1/copy.lisp index 24bb2d3..cf626f6 100644 --- a/src/level-1/copy.lisp +++ b/src/level-1/copy.lisp @@ -96,7 +96,7 @@ " If the dimension of the arguments is less than this parameter, then the Lisp version of copy is used. Default set with SBCL running - on x86-64 linux. A reasonable value would be something about 1000.") + on x86-64 linux. A reasonable value would be something above 1000.") (generate-typed-copy! real-typed-copy! (real-tensor dcopy *real-copy-fortran-call-lower-bound*)) diff --git a/src/level-1/dot.lisp b/src/level-1/dot.lisp index 59d00ab..09a4728 100644 --- a/src/level-1/dot.lisp +++ b/src/level-1/dot.lisp @@ -27,6 +27,87 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (in-package #:matlisp) +(defparameter *real-dot-fortran-call-lower-bound* 20000 + " + If the dimension of the arguments is less than this parameter, + then the Lisp version of copy is used. Default set with SBCL running + on x86-64 linux. A reasonable value would be something above 1000.") +(defun real-typed-dot (x y conjugate-p) + (declare (type real-vector x y) + (ignore conjugate-p)) + (let ((call-fortran? (> (number-of-elements x) + *real-dot-fortran-call-lower-bound*))) + (cond + (call-fortran? + (ddot (number-of-elements x) + (store x) (aref (strides x) 0) + (store y) (aref (strides y) 0) + (head x) (head y))) + (t + (let-typed + ((stp-x (aref (strides x) 0) :type index-type) + (sto-x (store x) :type (real-array *)) + (stp-y (aref (strides y) 0) :type index-type) + (sto-y (store y) :type (real-array *)) + (nele (number-of-elements x) :type index-type)) + (very-quickly + (loop repeat nele + for of-x of-type index-type = (head x) then (+ of-x stp-x) + for of-y of-type index-type = (head y) then (+ of-y stp-y) + summing (* (aref sto-x of-x) (aref sto-y of-y)) into dot of-type real-type + finally (return dot)))))))) + + +(defparameter *complex-dot-fortran-call-lower-bound* 10000 + " + If the dimension of the arguments is less than this parameter, + then the Lisp version of copy is used. Default set with SBCL running + on x86-64 linux. A reasonable value would be something above 1000.") +(defun complex-typed-dot (x y conjugate-p) + (declare (type complex-vector x y)) + (let ((call-fortran? (> (number-of-elements x) + *complex-dot-fortran-call-lower-bound*))) + (cond + (call-fortran? + (if conjugate-p + (zdotc (number-of-elements x) + (store x) (aref (strides x) 0) + (store y) (aref (strides y) 0) + (head x) (head y)) + (zdotu (number-of-elements x) + (store x) (aref (strides x) 0) + (store y) (aref (strides y) 0) + (head x) (head y)))) + (t + (let-typed + ((stp-x (aref (strides x) 0) :type index-type) + (sto-x (store x) :type (complex-base-array *)) + (stp-y (aref (strides y) 0) :type index-type) + (sto-y (store y) :type (complex-base-array *)) + (nele (number-of-elements x) :type index-type)) + (if conjugate-p + (very-quickly + (loop repeat nele + for of-x of-type index-type = (head x) then (+ of-x stp-x) + for of-y of-type index-type = (head y) then (+ of-y stp-y) + summing (let-typed ((xval (complex (aref sto-x (* 2 of-x)) (- (aref sto-x (1+ (* 2 of-x))))) :type complex-type) + (yval (complex (aref sto-y (* 2 of-y)) (aref sto-y (1+ (* 2 of-y)))) :type complex-type)) + (* xval yval)) + into dot of-type complex-type + finally (return dot))) + (very-quickly + (loop repeat nele + for of-x of-type index-type = (head x) then (+ of-x stp-x) + for of-y of-type index-type = (head y) then (+ of-y stp-y) + summing (let-typed ((xval (complex (aref sto-x (* 2 of-x)) (aref sto-x (1+ (* 2 of-x)))) :type complex-type) + (yval (complex (aref sto-y (* 2 of-y)) (aref sto-y (1+ (* 2 of-y)))) :type complex-type)) + (* xval yval)) + into dot of-type complex-type + finally (return dot))))))))) + +;;---------------------------------------------------------------;; + + (defgeneric dot (x y &optional conjugate-p) (:documentation " @@ -71,21 +152,16 @@ (defmethod dot ((x real-vector) (y real-vector) &optional (conjugate-p t)) (declare (ignore conjugate-p)) - (ddot (number-of-elements x) - (store x) (aref (strides x) 0) - (store y) (aref (strides y) 0) - (head x) (head y))) + (real-typed-dot x y nil)) (defmethod dot ((x real-vector) (y complex-vector) &optional (conjugate-p t)) (declare (ignore conjugate-p)) - (let ((nele (number-of-elements x)) - (std-x (aref (strides x) 0)) - (hd-x (head x)) - (std-y (aref (strides y) 0)) - (hd-y (head y))) - (declare (type index-type nele std-x std-y hd-x hd-y)) - (let ((rpart (ddot nele (store x) std-x (store y) (* 2 std-y) hd-x (* 2 hd-y))) - (ipart (ddot nele (store x) std-x (store y) (* 2 std-y) hd-x (1+ (* 2 hd-y))))) + (let ((vw.y (tensor-realpart~ y))) + (declare (type real-vector vw.y)) + (let ((rpart (prog1 (real-typed-dot x vw.y nil) + ;;Move view to complex-part + (incf (head vw.y)))) + (ipart (real-typed-dot x vw.y nil))) (declare (type complex-base-type rpart ipart)) (if (zerop ipart) rpart @@ -98,21 +174,4 @@ cres))) (defmethod dot ((x complex-vector) (y complex-vector) &optional (conjugate-p t)) - (let ((nele (number-of-elements x)) - (std-x (aref (strides x) 0)) - (hd-x (head x)) - (std-y (aref (strides y) 0)) - (hd-y (head y))) - (declare (type index-type nele std-x hd-x std-y hd-y)) - (let ((ret (if conjugate-p - (zdotc nele - (store x) std-x - (store y) std-y - hd-x hd-y) - (zdotu nele - (store x) std-x - (store y) std-y - hd-x hd-y)))) - (if (zerop (imagpart ret)) - (realpart ret) - ret)))) + (complex-typed-dot x y conjugate-p)) diff --git a/src/level-1/scal.lisp b/src/level-1/scal.lisp index 6d12a0e..c7604b1 100644 --- a/src/level-1/scal.lisp +++ b/src/level-1/scal.lisp @@ -28,31 +28,43 @@ (in-package #:matlisp) -(defmacro generate-typed-scal! (func (tensor-class blas-func)) +(defmacro generate-typed-scal! (func (tensor-class blas-func fortran-lb)) (let ((opt (get-tensor-class-optimization tensor-class))) (assert opt nil 'tensor-cannot-find-optimization :tensor-class tensor-class) `(defun ,func (alpha to) (declare (type ,tensor-class to) (type ,(getf opt :element-type) alpha)) - (if-let (min-stride (consecutive-store-p to)) - (,blas-func (number-of-elements to) alpha (store to) min-stride (head to)) - (let ((t-sto (store to))) - (declare (type ,(linear-array-type (getf opt :store-type)) t-sto)) - (very-quickly - ;;Can possibly make this faster (x2) by using ,blas-func in one of - ;;the inner loops, but this is to me messy and as of now unnecessary. - ;;SBCL can already achieve Fortran-ish speed inside this loop. - (mod-dotimes (idx (dimensions to)) - with (linear-sums - (t-of (strides to) (head to))) - do (let ((scal-val (* ,(funcall (getf opt :reader) 't-sto 't-of) alpha))) - ,(funcall (getf opt :value-writer) 'scal-val 't-sto 't-of)))))) + (let ((min-stride (consecutive-store-p to)) + (call-fortran? (> (number-of-elements to) ,fortran-lb))) + (cond + ((and min-stride call-fortran?) + (,blas-func (number-of-elements to) alpha (store to) min-stride (head to))) + (t + (let ((t-sto (store to))) + (declare (type ,(linear-array-type (getf opt :store-type)) t-sto)) + (very-quickly + (mod-dotimes (idx (dimensions to)) + with (linear-sums + (t-of (strides to) (head to))) + do (let ((scal-val (* ,(funcall (getf opt :reader) 't-sto 't-of) alpha))) + ,(funcall (getf opt :value-writer) 'scal-val 't-sto 't-of)))))))) to))) ;; TODO: Maybe add zdscal support ? Don't think the difference between ;; zdscal and zscal is significant, except for very large arrays. -(generate-typed-scal! real-typed-scal! (real-tensor dscal)) -(generate-typed-scal! complex-typed-scal! (complex-tensor zscal)) +(defparameter *real-scal-fortran-call-lower-bound* 20000 + " + If the dimension of the arguments is less than this parameter, + then the Lisp version of copy is used. Default set with SBCL running + on x86-64 linux. A reasonable value would be something above 1000.") +(generate-typed-scal! real-typed-scal! (real-tensor dscal *real-scal-fortran-call-lower-bound*)) + +(defparameter *complex-scal-fortran-call-lower-bound* 10000 + " + If the dimension of the arguments is less than this parameter, + then the Lisp version of copy is used. Default set with SBCL running + on x86-64 linux. A reasonable value would be something above 1000.") +(generate-typed-scal! complex-typed-scal! (complex-tensor zscal *complex-scal-fortran-call-lower-bound*)) ;;---------------------------------------------------------------;; (defgeneric scal! (alpha x) diff --git a/src/level-1/swap.lisp b/src/level-1/swap.lisp index 849c62b..7f80c78 100644 --- a/src/level-1/swap.lisp +++ b/src/level-1/swap.lisp @@ -28,7 +28,7 @@ (in-package #:matlisp) -(defmacro generate-typed-swap! (func (tensor-class blas-func)) +(defmacro generate-typed-swap! (func (tensor-class blas-func fortran-lb)) ;;Be very careful when using functions generated by this macro. ;;Indexes can be tricky and this has no safety net ;;Use only after checking the arguments for compatibility. @@ -36,24 +36,36 @@ (assert opt nil 'tensor-cannot-find-optimization :tensor-class tensor-class) `(defun ,func (x y) (declare (type ,tensor-class x y)) - (if-let (strd-p (blas-copyable-p x y)) - (,blas-func (number-of-elements x) (store x) (first strd-p) (store y) (second strd-p) (head x) (head y)) - (let ((f-sto (store x)) - (t-sto (store y))) - (declare (type ,(linear-array-type (getf opt :store-type)) f-sto t-sto)) - (very-quickly - ;;One would question the wisdom in calling the Fortran method here. - ;;Simple benchmarks proved that SBCL is as quick as or better than - ;;OpenBLAS's methods - (mod-dotimes (idx (dimensions x)) - with (linear-sums - (f-of (strides x) (head x)) - (t-of (strides y) (head y))) - do ,(funcall (getf opt :swapper) 'f-sto 'f-of 't-sto 't-of))))) + (let ((strd-p (blas-copyable-p x y)) + (call-fortran? (> (number-of-elements x) ,fortran-lb))) + (cond + ((and strd-p call-fortran?) + (,blas-func (number-of-elements x) (store x) (first strd-p) (store y) (second strd-p) (head x) (head y))) + (t + (let ((f-sto (store x)) + (t-sto (store y))) + (declare (type ,(linear-array-type (getf opt :store-type)) f-sto t-sto)) + (very-quickly + (mod-dotimes (idx (dimensions x)) + with (linear-sums + (f-of (strides x) (head x)) + (t-of (strides y) (head y))) + do ,(funcall (getf opt :swapper) 'f-sto 'f-of 't-sto 't-of))))))) y))) -(generate-typed-swap! real-typed-swap! (real-tensor dswap)) -(generate-typed-swap! complex-typed-swap! (complex-tensor zswap)) +(defparameter *real-swap-fortran-call-lower-bound* 20000 + " + If the dimension of the arguments is less than this parameter, + then the Lisp version of copy is used. Default set with SBCL running + on x86-64 linux. A reasonable value would be something above 1000.") +(generate-typed-swap! real-typed-swap! (real-tensor dswap *real-swap-fortran-call-lower-bound*)) + +(defparameter *complex-scal-fortran-call-lower-bound* 10000 + " + If the dimension of the arguments is less than this parameter, + then the Lisp version of copy is used. Default set with SBCL running + on x86-64 linux. A reasonable value would be something above 1000.") +(generate-typed-swap! complex-typed-swap! (complex-tensor zswap *complex-scal-fortran-call-lower-bound*)) ;;---------------------------------------------------------------;; (defgeneric swap! (x y) ----------------------------------------------------------------------- Summary of changes: matlisp.asd | 3 +- src/classes/complex-tensor.lisp | 1 + src/level-1/copy.lisp | 2 +- src/level-1/dot.lisp | 119 +++++++++++++++++++++++++++++---------- src/level-1/scal.lisp | 44 +++++++++----- src/level-1/swap.lisp | 46 ++++++++++------ 6 files changed, 150 insertions(+), 65 deletions(-) hooks/post-receive -- matlisp |