From: Akshay S. <ak...@us...> - 2012-06-26 14:54:21
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "matlisp". The branch, tensor has been updated via 82125cbf389c2f1bc43a5c661067400efcec64c3 (commit) from 578dc43e356575b8c860f46f157c07d773843af8 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 82125cbf389c2f1bc43a5c661067400efcec64c3 Author: Akshay Srinivasan <aks...@gm...> Date: Tue Jun 26 20:17:02 2012 +0530 Wrote a mid-way solution "mod-dotimes" to allow for very effective representation of matrix multiplication. Naive matrix multiplication on SBCL is about 200x than dgemm in OpenBLAS, but only about 2x slower compared to using naive nested loops in C. diff --git a/src/ffi-cffi-interpreter-specific.lisp b/src/ffi-cffi-interpreter-specific.lisp index 4d51dc0..92c285f 100644 --- a/src/ffi-cffi-interpreter-specific.lisp +++ b/src/ffi-cffi-interpreter-specific.lisp @@ -3,7 +3,7 @@ (in-package :ffi) -;;TODO: Add support for {Allegro CL, Lispworks, ECL, clisp} +;;TODO: Add support for {ECL, clisp, Allegro CL, Lispworks} (defmacro with-fortran-float-modes (&body body) "Execute the body with the IEEE FP modes appropriately set for Fortran" diff --git a/src/loopy.lisp b/src/loopy.lisp index d0cb80b..9d2e587 100644 --- a/src/loopy.lisp +++ b/src/loopy.lisp @@ -23,7 +23,71 @@ is used, else the fortran routine is called instead. unless (= off accumulated-off) do (return nil) finally (return t)))) -(defmacro mod-loop ((idx dims) &body body) + +(linloop (idx #(2 2)) + with (loop-order row-order) + with (linear-sums + (of-t #(..)) + (of-a #(..))) + do (..)) + +(defmacro mod-dotimes ((idx dims) &body body) + (check-type idx symbol) + (labels ((parse-code (body ret) + (cond + ((null body) + (values nil ret)) + ((eq (car body) 'with) + (multiple-value-bind (indic decl) (parse-with (cadr body)) + (setf (getf ret indic) decl)) + (parse-code (cddr body) ret)) + ((eq (car body) 'do) + (values (cadr body) ret)) + (t (error "unknown word passed to linloop: ~a" (car body))))) + (parse-with (code) + (cond + ((eq (car code) 'linear-sums) + (values :linear-sums + (loop for decl in (cdr code) + collect (destructuring-bind (offst strds &optional (init 0)) decl + (list :offset-sym offst + :offset-init init + :stride-sym (gensym (string+ (symbol-name offst) "-stride")) + :stride-expr strds))))) + ((and (eq (car code) 'loop-order) + (member (cadr code) '(:row-major :col-major))) + (values :loop-order (second code))) + (t (error "unknown word passed to linloop: ~a" (car code)))))) + (multiple-value-bind (code sdecl) (parse-code body nil) + (with-gensyms (dims-sym rank-sym count-sym) + `(let* ((,dims-sym ,dims) + (,rank-sym (length ,dims-sym)) + (,idx (allocate-index-store ,rank-sym)) + ,@(mapcar #'(lambda (x) `(,(getf x :stride-sym) ,(getf x :stride-expr))) (getf sdecl :linear-sums))) + ,@(when (getf sdecl :linear-sums) + `((declare (type (index-array *) ,@(mapcar #'(lambda (x) (getf x :stride-sym)) (getf sdecl :linear-sums)))))) + (loop ,@(loop for decl in (getf sdecl :linear-sums) + append `(with ,(getf decl :offset-sym) of-type index-type = ,(getf decl :offset-init))) + do (,@code) + while ,(append + (if (member (getf sdecl :loop-order) '(nil :row-major)) + `(loop for ,count-sym of-type index-type from (1- ,rank-sym) downto 0) + `(loop for ,count-sym of-type index-type from 0 below ,rank-sym)) + `(do (if (= (aref ,idx ,count-sym) (1- (aref ,dims-sym ,count-sym))) + (progn + (setf (aref ,idx ,count-sym) 0) + ,@(loop for decl in (getf sdecl :linear-sums) + collect `(decf ,(getf decl :offset-sym) (* (aref ,(getf decl :stride-sym) ,count-sym) (1- (aref ,dims-sym ,count-sym)))))) + (progn + (incf (aref ,idx ,count-sym)) + ,@(loop for decl in (getf sdecl :linear-sums) + collect `(incf ,(getf decl :offset-sym) (aref ,(getf decl :stride-sym) ,count-sym))) + (return t))) + finally (return nil))))))))) + + +;;Very ugly inflexible code; get rid of this in some time or make use of mod-dotimes. +#+nil(defmacro mod-loop ((idx dims) &body body) (check-type idx symbol) (let ((tensor-table (make-hash-table))) (labels ((get-tensors (decl) @@ -71,12 +135,7 @@ is used, else the fortran routine is called instead. (let ((to-opt (gethash (second to) tensor-table))) ;;Add type checking here! (cdr (funcall (getf (get-tensor-class-optimization (getf to-opt :class)) :value-writer) - from (getf to-opt :store-sym) (getf to-opt :offset-sym))))) - (fr-t? - (incref (second from)) - (let ((fr-opt (gethash (second from) tensor-table))) - (cons to (funcall (getf (get-tensor-class-optimization (getf fr-opt :class)) :reader) - (getf fr-opt :store-sym) (getf fr-opt :offset-sym))))) + (find-tensor-refs from nil) (getf to-opt :store-sym) (getf to-opt :offset-sym))))) (t (list to (find-tensor-refs from nil)))))))))) (transform-tensor-ref (snippet) diff --git a/src/standard-tensor.lisp b/src/standard-tensor.lisp index f4c257b..84d796c 100644 --- a/src/standard-tensor.lisp +++ b/src/standard-tensor.lisp @@ -49,6 +49,9 @@ (make-array size :element-type 'index-type :initial-contents contents))) +(definline idxv (&rest contents) + (make-index-store contents)) + ;; (defclass standard-tensor () ((rank @@ -121,7 +124,7 @@ ((symbolp opt) (get-tensor-class-optimization opt)) ((null opt) nil) - (t (value opt clname))))) + (t (values opt clname))))) ;; Akshay: I have no idea what this does, or why we want it ;; (inherited from standard-matrix.lisp @@ -153,7 +156,7 @@ (type (index-array *) idx strides dims)) (let ((rank (length strides))) (declare (type index-type rank)) - (if (not (= rank (length idx))) + (if (not (= rank (length idx) (length dims))) (error 'tensor-index-rank-mismatch :index-rank (length idx) :rank rank) (very-quickly (loop diff --git a/src/tensor-copy.lisp b/src/tensor-copy.lisp index 2a1a2a3..3074106 100644 --- a/src/tensor-copy.lisp +++ b/src/tensor-copy.lisp @@ -16,6 +16,70 @@ (setf (tensor-ref x idx) (random 1d0))) (time (tensor-copy x y))) +(defun test-mm (n) + (let ((t-a (make-real-tensor-dims n n)) + (t-b (make-real-tensor-dims n n)) + (t-c (make-real-tensor-dims n n))) + (declare (type real-tensor t-a t-b t-c)) + (with-optimization (:speed 3 :safety 0 :space 0) + (let ((st-a (store t-a)) + (st-b (store t-b)) + (st-c (store t-c))) + (declare (type (real-array *) st-a st-b st-c)) + (mod-dotimes (idx (dimensions t-a)) + with (linear-sums + (of-a (strides t-a)) + (of-b (strides t-b)) + (of-c (strides t-c))) + do (setf (aref st-a of-a) (random 1d0) + (aref st-b of-b) (random 1d0) + (aref st-c of-c) 0d0)) + (time (mod-dotimes (idx (idxv n n n)) + with (loop-order :row-major) + with (linear-sums + (of-a (idxv n 1 0)) + (of-b (idxv 0 n 1)) + (of-c (idxv n 0 1))) + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b))))))))) + + +(defun test-mm () + (let ((t-a (make-real-tensor-dims 1000 1000)) + (t-b (make-real-tensor-dims 1000 1000)) + (t-c (make-real-tensor-dims 1000 1000))) + (declare (type real-tensor t-a t-b t-c)) + (mod-loop (idx #(1000 1000)) + (declare (type real-tensor t-a t-b)) + (setf (tensor-ref t-a idx) (random 1d0) + (tensor-ref t-b idx) (random 1d0))) + (let* ((sr-a (strides t-a)) + (st-a (store t-a)) + (sr-b (strides t-b)) + (st-b (store t-b)) + (sr-c (strides t-c)) + (st-c (store t-c)) + (dims (dimensions t-a)) + (rank 2) + (idx (allocate-index-store rank))) + (declare (type (index-array *) sr-a sr-b sr-c dims idx) + (type (real-array *) st-a st-b st-c)) + (time (very-quickly + (loop + with of-a of-type index-type = (head t-a) + with of-b of-type index-type = (head t-b) + with of-c of-type index-type = (head t-c) + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b))) + while (loop + for i of-type index-type from (1- rank) downto 0 + do (if (= (aref idx i) (1- (aref dims i))) + (progn + (setf (aref idx i) 0) + (decf of-a (* (1- (aref dims i)) + + + (loop for k from 0 below 1000 + summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1))))))))) + (defmacro generate-typed-copy!-func (func store-type matrix-type blas-func) ;;Be very careful when using functions generated by this macro. ;;Indexes can be tricky and this has no safety net @@ -41,23 +105,13 @@ (loop for i from 0 below nr-a do (,blas-func nc-a st-a cs-a st-b cs-b :head-x (+ hd-a (* i rs-a)) :head-y (+ hd-b (* i rs-b))))))) mat-b)) - -(defun test-mm () - (let ((t-a (make-real-tensor 1000 1000)) - (t-b (make-real-tensor 1000 1000)) - (t-c (make-real-tensor 1000 1000))) - (declare (type real-tensor t-a t-b t-c)) - (mod-loop (idx #(1000 1000)) - (setf (tensor-ref t-c idx) - (loop for k from 0 below 1000 - summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1))))))))) ;; -#+nil + (defun test-tensor-1k-dot () (declare (optimize (speed 3) (safety 0))) - (let ((t-a (make-real-tensor 1000 1000)) - (t-b (make-real-tensor 1000 1000)) - (t-c (make-real-tensor 1000 1000))) + (let ((t-a (make-real-tensor-dims 1000 1000)) + (t-b (make-real-tensor-dims 1000 1000)) + (t-c (make-real-tensor-dims 1000 1000))) (declare (type real-tensor t-a t-b t-c)) (let ((s-a (store t-a)) (s-b (store t-b)) @@ -71,5 +125,6 @@ (multiple-value-bind (i j) (floor n 1000) (declare (type index-type i j)) (setf (aref s-c (+ (* i 1000) j)) - (ddot 1000 (vector-data-address s-a) 1 (vector-data-address s-b) 1000 :head-x (* i 1000) :head-y j)))))))) + (loop for k from 0 below 1000 + summing (* (aref s-a (+ (* i 1000) k)) (aref s-b (+ (* k 1000) j))))))))))) ----------------------------------------------------------------------- Summary of changes: src/ffi-cffi-interpreter-specific.lisp | 2 +- src/loopy.lisp | 73 ++++++++++++++++++++++++--- src/standard-tensor.lisp | 7 ++- src/tensor-copy.lisp | 85 ++++++++++++++++++++++++++------ 4 files changed, 142 insertions(+), 25 deletions(-) hooks/post-receive -- matlisp |