From: Akshay S. <aks...@gm...> - 2012-06-26 14:55:50
|
On 06/26/2012 08:24 PM, Akshay Srinivasan wrote: > This is an automated email from the git hooks/post-receive script. It was > generated because a ref change was pushed to the repository containing > the project "matlisp". > > The branch, tensor has been updated > via 82125cbf389c2f1bc43a5c661067400efcec64c3 (commit) > from 578dc43e356575b8c860f46f157c07d773843af8 (commit) > > Those revisions listed above that are new to this repository have > not appeared on any other notification email; so we list those > revisions in full, below. > > - Log ----------------------------------------------------------------- > commit 82125cbf389c2f1bc43a5c661067400efcec64c3 > Author: Akshay Srinivasan <aks...@gm...> > Date: Tue Jun 26 20:17:02 2012 +0530 > > Wrote a mid-way solution "mod-dotimes" to allow for very effective > representation of matrix multiplication. Naive matrix multiplication > on SBCL is about 200x than dgemm in OpenBLAS, but only about 2x slower That should be 200x slower (obviously). > compared to using naive nested loops in C. > > diff --git a/src/ffi-cffi-interpreter-specific.lisp b/src/ffi-cffi-interpreter-specific.lisp > index 4d51dc0..92c285f 100644 > --- a/src/ffi-cffi-interpreter-specific.lisp > +++ b/src/ffi-cffi-interpreter-specific.lisp > @@ -3,7 +3,7 @@ > > (in-package :ffi) > > -;;TODO: Add support for {Allegro CL, Lispworks, ECL, clisp} > +;;TODO: Add support for {ECL, clisp, Allegro CL, Lispworks} > > (defmacro with-fortran-float-modes (&body body) > "Execute the body with the IEEE FP modes appropriately set for Fortran" > diff --git a/src/loopy.lisp b/src/loopy.lisp > index d0cb80b..9d2e587 100644 > --- a/src/loopy.lisp > +++ b/src/loopy.lisp > @@ -23,7 +23,71 @@ is used, else the fortran routine is called instead. > unless (= off accumulated-off) do (return nil) > finally (return t)))) > > -(defmacro mod-loop ((idx dims) &body body) > + > +(linloop (idx #(2 2)) > + with (loop-order row-order) > + with (linear-sums > + (of-t #(..)) > + (of-a #(..))) > + do (..)) > + > +(defmacro mod-dotimes ((idx dims) &body body) > + (check-type idx symbol) > + (labels ((parse-code (body ret) > + (cond > + ((null body) > + (values nil ret)) > + ((eq (car body) 'with) > + (multiple-value-bind (indic decl) (parse-with (cadr body)) > + (setf (getf ret indic) decl)) > + (parse-code (cddr body) ret)) > + ((eq (car body) 'do) > + (values (cadr body) ret)) > + (t (error "unknown word passed to linloop: ~a" (car body))))) > + (parse-with (code) > + (cond > + ((eq (car code) 'linear-sums) > + (values :linear-sums > + (loop for decl in (cdr code) > + collect (destructuring-bind (offst strds &optional (init 0)) decl > + (list :offset-sym offst > + :offset-init init > + :stride-sym (gensym (string+ (symbol-name offst) "-stride")) > + :stride-expr strds))))) > + ((and (eq (car code) 'loop-order) > + (member (cadr code) '(:row-major :col-major))) > + (values :loop-order (second code))) > + (t (error "unknown word passed to linloop: ~a" (car code)))))) > + (multiple-value-bind (code sdecl) (parse-code body nil) > + (with-gensyms (dims-sym rank-sym count-sym) > + `(let* ((,dims-sym ,dims) > + (,rank-sym (length ,dims-sym)) > + (,idx (allocate-index-store ,rank-sym)) > + ,@(mapcar #'(lambda (x) `(,(getf x :stride-sym) ,(getf x :stride-expr))) (getf sdecl :linear-sums))) > + ,@(when (getf sdecl :linear-sums) > + `((declare (type (index-array *) ,@(mapcar #'(lambda (x) (getf x :stride-sym)) (getf sdecl :linear-sums)))))) > + (loop ,@(loop for decl in (getf sdecl :linear-sums) > + append `(with ,(getf decl :offset-sym) of-type index-type = ,(getf decl :offset-init))) > + do (,@code) > + while ,(append > + (if (member (getf sdecl :loop-order) '(nil :row-major)) > + `(loop for ,count-sym of-type index-type from (1- ,rank-sym) downto 0) > + `(loop for ,count-sym of-type index-type from 0 below ,rank-sym)) > + `(do (if (= (aref ,idx ,count-sym) (1- (aref ,dims-sym ,count-sym))) > + (progn > + (setf (aref ,idx ,count-sym) 0) > + ,@(loop for decl in (getf sdecl :linear-sums) > + collect `(decf ,(getf decl :offset-sym) (* (aref ,(getf decl :stride-sym) ,count-sym) (1- (aref ,dims-sym ,count-sym)))))) > + (progn > + (incf (aref ,idx ,count-sym)) > + ,@(loop for decl in (getf sdecl :linear-sums) > + collect `(incf ,(getf decl :offset-sym) (aref ,(getf decl :stride-sym) ,count-sym))) > + (return t))) > + finally (return nil))))))))) > + > + > +;;Very ugly inflexible code; get rid of this in some time or make use of mod-dotimes. > +#+nil(defmacro mod-loop ((idx dims) &body body) > (check-type idx symbol) > (let ((tensor-table (make-hash-table))) > (labels ((get-tensors (decl) > @@ -71,12 +135,7 @@ is used, else the fortran routine is called instead. > (let ((to-opt (gethash (second to) tensor-table))) > ;;Add type checking here! > (cdr (funcall (getf (get-tensor-class-optimization (getf to-opt :class)) :value-writer) > - from (getf to-opt :store-sym) (getf to-opt :offset-sym))))) > - (fr-t? > - (incref (second from)) > - (let ((fr-opt (gethash (second from) tensor-table))) > - (cons to (funcall (getf (get-tensor-class-optimization (getf fr-opt :class)) :reader) > - (getf fr-opt :store-sym) (getf fr-opt :offset-sym))))) > + (find-tensor-refs from nil) (getf to-opt :store-sym) (getf to-opt :offset-sym))))) > (t > (list to (find-tensor-refs from nil)))))))))) > (transform-tensor-ref (snippet) > diff --git a/src/standard-tensor.lisp b/src/standard-tensor.lisp > index f4c257b..84d796c 100644 > --- a/src/standard-tensor.lisp > +++ b/src/standard-tensor.lisp > @@ -49,6 +49,9 @@ > (make-array size :element-type 'index-type > :initial-contents contents))) > > +(definline idxv (&rest contents) > + (make-index-store contents)) > + > ;; > (defclass standard-tensor () > ((rank > @@ -121,7 +124,7 @@ > ((symbolp opt) > (get-tensor-class-optimization opt)) > ((null opt) nil) > - (t (value opt clname))))) > + (t (values opt clname))))) > > ;; Akshay: I have no idea what this does, or why we want it > ;; (inherited from standard-matrix.lisp > @@ -153,7 +156,7 @@ > (type (index-array *) idx strides dims)) > (let ((rank (length strides))) > (declare (type index-type rank)) > - (if (not (= rank (length idx))) > + (if (not (= rank (length idx) (length dims))) > (error 'tensor-index-rank-mismatch :index-rank (length idx) :rank rank) > (very-quickly > (loop > diff --git a/src/tensor-copy.lisp b/src/tensor-copy.lisp > index 2a1a2a3..3074106 100644 > --- a/src/tensor-copy.lisp > +++ b/src/tensor-copy.lisp > @@ -16,6 +16,70 @@ > (setf (tensor-ref x idx) (random 1d0))) > (time (tensor-copy x y))) > > +(defun test-mm (n) > + (let ((t-a (make-real-tensor-dims n n)) > + (t-b (make-real-tensor-dims n n)) > + (t-c (make-real-tensor-dims n n))) > + (declare (type real-tensor t-a t-b t-c)) > + (with-optimization (:speed 3 :safety 0 :space 0) > + (let ((st-a (store t-a)) > + (st-b (store t-b)) > + (st-c (store t-c))) > + (declare (type (real-array *) st-a st-b st-c)) > + (mod-dotimes (idx (dimensions t-a)) > + with (linear-sums > + (of-a (strides t-a)) > + (of-b (strides t-b)) > + (of-c (strides t-c))) > + do (setf (aref st-a of-a) (random 1d0) > + (aref st-b of-b) (random 1d0) > + (aref st-c of-c) 0d0)) > + (time (mod-dotimes (idx (idxv n n n)) > + with (loop-order :row-major) > + with (linear-sums > + (of-a (idxv n 1 0)) > + (of-b (idxv 0 n 1)) > + (of-c (idxv n 0 1))) > + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b))))))))) > + > + > +(defun test-mm () > + (let ((t-a (make-real-tensor-dims 1000 1000)) > + (t-b (make-real-tensor-dims 1000 1000)) > + (t-c (make-real-tensor-dims 1000 1000))) > + (declare (type real-tensor t-a t-b t-c)) > + (mod-loop (idx #(1000 1000)) > + (declare (type real-tensor t-a t-b)) > + (setf (tensor-ref t-a idx) (random 1d0) > + (tensor-ref t-b idx) (random 1d0))) > + (let* ((sr-a (strides t-a)) > + (st-a (store t-a)) > + (sr-b (strides t-b)) > + (st-b (store t-b)) > + (sr-c (strides t-c)) > + (st-c (store t-c)) > + (dims (dimensions t-a)) > + (rank 2) > + (idx (allocate-index-store rank))) > + (declare (type (index-array *) sr-a sr-b sr-c dims idx) > + (type (real-array *) st-a st-b st-c)) > + (time (very-quickly > + (loop > + with of-a of-type index-type = (head t-a) > + with of-b of-type index-type = (head t-b) > + with of-c of-type index-type = (head t-c) > + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b))) > + while (loop > + for i of-type index-type from (1- rank) downto 0 > + do (if (= (aref idx i) (1- (aref dims i))) > + (progn > + (setf (aref idx i) 0) > + (decf of-a (* (1- (aref dims i)) > + > + > + (loop for k from 0 below 1000 > + summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1))))))))) > + > (defmacro generate-typed-copy!-func (func store-type matrix-type blas-func) > ;;Be very careful when using functions generated by this macro. > ;;Indexes can be tricky and this has no safety net > @@ -41,23 +105,13 @@ > (loop for i from 0 below nr-a > do (,blas-func nc-a st-a cs-a st-b cs-b :head-x (+ hd-a (* i rs-a)) :head-y (+ hd-b (* i rs-b))))))) > mat-b)) > - > -(defun test-mm () > - (let ((t-a (make-real-tensor 1000 1000)) > - (t-b (make-real-tensor 1000 1000)) > - (t-c (make-real-tensor 1000 1000))) > - (declare (type real-tensor t-a t-b t-c)) > - (mod-loop (idx #(1000 1000)) > - (setf (tensor-ref t-c idx) > - (loop for k from 0 below 1000 > - summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1))))))))) > ;; > -#+nil > + > (defun test-tensor-1k-dot () > (declare (optimize (speed 3) (safety 0))) > - (let ((t-a (make-real-tensor 1000 1000)) > - (t-b (make-real-tensor 1000 1000)) > - (t-c (make-real-tensor 1000 1000))) > + (let ((t-a (make-real-tensor-dims 1000 1000)) > + (t-b (make-real-tensor-dims 1000 1000)) > + (t-c (make-real-tensor-dims 1000 1000))) > (declare (type real-tensor t-a t-b t-c)) > (let ((s-a (store t-a)) > (s-b (store t-b)) > @@ -71,5 +125,6 @@ > (multiple-value-bind (i j) (floor n 1000) > (declare (type index-type i j)) > (setf (aref s-c (+ (* i 1000) j)) > - (ddot 1000 (vector-data-address s-a) 1 (vector-data-address s-b) 1000 :head-x (* i 1000) :head-y j)))))))) > + (loop for k from 0 below 1000 > + summing (* (aref s-a (+ (* i 1000) k)) (aref s-b (+ (* k 1000) j))))))))))) > > > ----------------------------------------------------------------------- > > Summary of changes: > src/ffi-cffi-interpreter-specific.lisp | 2 +- > src/loopy.lisp | 73 ++++++++++++++++++++++++--- > src/standard-tensor.lisp | 7 ++- > src/tensor-copy.lisp | 85 ++++++++++++++++++++++++++------ > 4 files changed, 142 insertions(+), 25 deletions(-) > > > hooks/post-receive > |