Re: [Matlisp-commit] [matlisp-git]matlisp branch tensor updated. 2012-02-23-94-g82125cb

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

On 06/26/2012 08:24 PM, Akshay Srinivasan wrote:
> This is an automated email from the git hooks/post-receive script. It was
> generated because a ref change was pushed to the repository containing
> the project "matlisp".
> 
> The branch, tensor has been updated
>        via  82125cbf389c2f1bc43a5c661067400efcec64c3 (commit)
>       from  578dc43e356575b8c860f46f157c07d773843af8 (commit)
> 
> Those revisions listed above that are new to this repository have
> not appeared on any other notification email; so we list those
> revisions in full, below.
> 
> - Log -----------------------------------------------------------------
> commit 82125cbf389c2f1bc43a5c661067400efcec64c3
> Author: Akshay Srinivasan <aks...@gm...>
> Date:   Tue Jun 26 20:17:02 2012 +0530
> 
>     Wrote a mid-way solution "mod-dotimes" to allow for very effective
>     representation  of matrix multiplication. Naive matrix multiplication
>     on SBCL is about 200x than dgemm in OpenBLAS, but only about 2x slower
That should be 200x slower (obviously).

>     compared to using naive nested loops in C.
> 
> diff --git a/src/ffi-cffi-interpreter-specific.lisp b/src/ffi-cffi-interpreter-specific.lisp
> index 4d51dc0..92c285f 100644
> --- a/src/ffi-cffi-interpreter-specific.lisp
> +++ b/src/ffi-cffi-interpreter-specific.lisp
> @@ -3,7 +3,7 @@
>  
>  (in-package :ffi)
>  
> -;;TODO: Add support for {Allegro CL, Lispworks, ECL, clisp}
> +;;TODO: Add support for {ECL, clisp, Allegro CL, Lispworks}
>  
>  (defmacro with-fortran-float-modes (&body body)
>    "Execute the body with the IEEE FP modes appropriately set for Fortran"
> diff --git a/src/loopy.lisp b/src/loopy.lisp
> index d0cb80b..9d2e587 100644
> --- a/src/loopy.lisp
> +++ b/src/loopy.lisp
> @@ -23,7 +23,71 @@ is used, else the fortran routine is called instead.
>         unless (= off accumulated-off) do (return nil)
>         finally (return t))))
>  
> -(defmacro mod-loop ((idx dims) &body body)
> +
> +(linloop (idx #(2 2))
> +  with (loop-order row-order)
> +  with (linear-sums
> +	(of-t #(..))
> +	(of-a #(..)))
> +  do (..))
> +
> +(defmacro mod-dotimes ((idx dims) &body body)
> +  (check-type idx symbol)
> +  (labels ((parse-code (body ret)
> +	     (cond
> +	       ((null body)
> +		(values nil ret))
> +	       ((eq (car body) 'with)
> +		(multiple-value-bind (indic decl) (parse-with (cadr body))
> +		  (setf (getf ret indic) decl))
> +		(parse-code (cddr body) ret))
> +	       ((eq (car body) 'do)
> +		(values (cadr body) ret))
> +	       (t (error "unknown word passed to linloop: ~a" (car body)))))
> +	   (parse-with (code)
> +	     (cond
> +	       ((eq (car code) 'linear-sums)
> +		(values :linear-sums
> +			(loop for decl in (cdr code)
> +			   collect (destructuring-bind (offst strds &optional (init 0)) decl
> +				     (list :offset-sym offst
> +					   :offset-init init
> +					   :stride-sym (gensym (string+ (symbol-name offst) "-stride"))
> +					   :stride-expr strds)))))
> +	       ((and (eq (car code) 'loop-order)
> +		     (member (cadr code) '(:row-major :col-major)))
> +		(values :loop-order (second code)))
> +	       (t (error "unknown word passed to linloop: ~a" (car code))))))
> +    (multiple-value-bind (code sdecl) (parse-code body nil)
> +      (with-gensyms (dims-sym rank-sym count-sym)
> +	`(let* ((,dims-sym ,dims)
> +		(,rank-sym (length ,dims-sym))
> +		(,idx (allocate-index-store ,rank-sym))
> +		,@(mapcar #'(lambda (x) `(,(getf x :stride-sym) ,(getf x :stride-expr))) (getf sdecl :linear-sums)))
> +	   ,@(when (getf sdecl :linear-sums)
> +		   `((declare (type (index-array *) ,@(mapcar #'(lambda (x) (getf x :stride-sym)) (getf sdecl :linear-sums))))))
> +	   (loop ,@(loop for decl in (getf sdecl :linear-sums)
> +		      append `(with ,(getf decl :offset-sym) of-type index-type = ,(getf decl :offset-init)))
> +	      do (,@code)
> +	      while ,(append
> +		      (if (member (getf sdecl :loop-order) '(nil :row-major))
> +			  `(loop for ,count-sym of-type index-type from (1- ,rank-sym) downto 0)
> +			  `(loop for ,count-sym of-type index-type from 0 below ,rank-sym))
> +		      `(do (if (= (aref ,idx ,count-sym) (1- (aref ,dims-sym ,count-sym)))
> +			       (progn
> +				 (setf (aref ,idx ,count-sym) 0)
> +				 ,@(loop for decl in (getf sdecl :linear-sums)
> +				      collect `(decf ,(getf decl :offset-sym) (* (aref ,(getf decl :stride-sym) ,count-sym) (1- (aref ,dims-sym ,count-sym))))))
> +			       (progn
> +				 (incf (aref ,idx ,count-sym))
> +				 ,@(loop for decl in (getf sdecl :linear-sums)
> +				      collect `(incf ,(getf decl :offset-sym) (aref ,(getf decl :stride-sym) ,count-sym)))
> +				 (return t)))
> +			   finally (return nil)))))))))
> +
> +
> +;;Very ugly inflexible code; get rid of this in some time or make use of mod-dotimes.
> +#+nil(defmacro mod-loop ((idx dims) &body body)
>    (check-type idx symbol)
>    (let ((tensor-table (make-hash-table)))
>      (labels ((get-tensors (decl)
> @@ -71,12 +135,7 @@ is used, else the fortran routine is called instead.
>  				   (let ((to-opt (gethash (second to) tensor-table)))
>  				     ;;Add type checking here!
>  				     (cdr (funcall (getf (get-tensor-class-optimization (getf to-opt :class)) :value-writer)
> -						   from (getf to-opt :store-sym) (getf to-opt :offset-sym)))))
> -				  (fr-t?
> -				   (incref (second from))
> -				   (let ((fr-opt (gethash (second from) tensor-table)))
> -				     (cons to (funcall (getf (get-tensor-class-optimization (getf fr-opt :class)) :reader)
> -						       (getf fr-opt :store-sym) (getf fr-opt :offset-sym)))))
> +						   (find-tensor-refs from nil) (getf to-opt :store-sym) (getf to-opt :offset-sym)))))
>  				  (t
>  				   (list to (find-tensor-refs from nil))))))))))
>  	     (transform-tensor-ref (snippet)
> diff --git a/src/standard-tensor.lisp b/src/standard-tensor.lisp
> index f4c257b..84d796c 100644
> --- a/src/standard-tensor.lisp
> +++ b/src/standard-tensor.lisp
> @@ -49,6 +49,9 @@
>      (make-array size :element-type 'index-type
>  		:initial-contents contents)))
>  
> +(definline idxv (&rest contents)
> +  (make-index-store contents))
> +
>  ;;
>  (defclass standard-tensor ()
>    ((rank
> @@ -121,7 +124,7 @@
>        ((symbolp opt)
>         (get-tensor-class-optimization opt))
>        ((null opt) nil)
> -      (t (value opt clname)))))
> +      (t (values opt clname)))))
>  
>  ;; Akshay: I have no idea what this does, or why we want it
>  ;; (inherited from standard-matrix.lisp
> @@ -153,7 +156,7 @@
>  	   (type (index-array *) idx strides dims))
>    (let ((rank (length strides)))
>      (declare (type index-type rank))
> -    (if (not (= rank (length idx)))
> +    (if (not (= rank (length idx) (length dims)))
>  	(error 'tensor-index-rank-mismatch :index-rank (length idx) :rank rank)
>  	(very-quickly
>  	  (loop
> diff --git a/src/tensor-copy.lisp b/src/tensor-copy.lisp
> index 2a1a2a3..3074106 100644
> --- a/src/tensor-copy.lisp
> +++ b/src/tensor-copy.lisp
> @@ -16,6 +16,70 @@
>      (setf (tensor-ref x idx) (random 1d0)))
>    (time (tensor-copy x y)))
>  
> +(defun test-mm (n)
> +  (let ((t-a (make-real-tensor-dims n n))
> +	(t-b (make-real-tensor-dims n n))
> +	(t-c (make-real-tensor-dims n n)))
> +    (declare (type real-tensor t-a t-b t-c))
> +    (with-optimization (:speed 3 :safety 0 :space 0)
> +      (let ((st-a (store t-a))
> +	    (st-b (store t-b))
> +	    (st-c (store t-c)))
> +	(declare (type (real-array *) st-a st-b st-c))
> +	(mod-dotimes (idx (dimensions t-a))
> +	  with (linear-sums
> +		(of-a (strides t-a))
> +		(of-b (strides t-b))
> +		(of-c (strides t-c)))
> +	  do (setf (aref st-a of-a) (random 1d0)
> +		   (aref st-b of-b) (random 1d0)
> +		   (aref st-c of-c) 0d0))
> +	(time (mod-dotimes (idx (idxv n n n))
> +		with (loop-order :row-major)
> +		with (linear-sums
> +		      (of-a (idxv n 1 0))
> +		      (of-b (idxv 0 n 1))
> +		      (of-c (idxv n 0 1)))
> +		do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b)))))))))
> +
> +    
> +(defun test-mm ()
> +  (let ((t-a (make-real-tensor-dims 1000 1000))
> +	(t-b (make-real-tensor-dims 1000 1000))
> +	(t-c (make-real-tensor-dims 1000 1000)))
> +    (declare (type real-tensor t-a t-b t-c))
> +    (mod-loop (idx #(1000 1000))
> +      (declare (type real-tensor t-a t-b))
> +      (setf (tensor-ref t-a idx) (random 1d0)
> +	    (tensor-ref t-b idx) (random 1d0)))
> +    (let* ((sr-a (strides t-a))
> +	   (st-a (store t-a))
> +	   (sr-b (strides t-b))
> +	   (st-b (store t-b))
> +	   (sr-c (strides t-c))
> +	   (st-c (store t-c))
> +	   (dims (dimensions t-a))
> +	   (rank 2)
> +	   (idx (allocate-index-store rank)))
> +      (declare (type (index-array *) sr-a sr-b sr-c dims idx)
> +	       (type (real-array *) st-a st-b st-c))
> +      (time (very-quickly
> +	(loop
> +	   with of-a of-type index-type = (head t-a)
> +	   with of-b of-type index-type = (head t-b)
> +	   with of-c of-type index-type = (head t-c)
> +	   do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b)))
> +	   while (loop
> +		    for i of-type index-type from (1- rank) downto 0
> +		    do (if (= (aref idx i) (1- (aref dims i)))
> +			   (progn
> +			     (setf (aref idx i) 0)
> +			     (decf of-a (* (1- (aref dims i))
> +	   
> +
> +	    (loop for k from 0 below 1000
> +	       summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1)))))))))
> +
>  (defmacro generate-typed-copy!-func (func store-type matrix-type blas-func)
>    ;;Be very careful when using functions generated by this macro.
>    ;;Indexes can be tricky and this has no safety net
> @@ -41,23 +105,13 @@
>  		       (loop for i from 0 below nr-a
>  			  do (,blas-func nc-a st-a cs-a st-b cs-b :head-x (+ hd-a (* i rs-a)) :head-y (+ hd-b (* i rs-b)))))))
>       mat-b))
> -
> -(defun test-mm ()
> -  (let ((t-a (make-real-tensor 1000 1000))
> -	(t-b (make-real-tensor 1000 1000))
> -	(t-c (make-real-tensor 1000 1000)))
> -    (declare (type real-tensor t-a t-b t-c))
> -    (mod-loop (idx #(1000 1000))
> -      (setf (tensor-ref t-c idx)
> -	    (loop for k from 0 below 1000
> -	       summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1)))))))))
>  ;;
> -#+nil
> +
>  (defun test-tensor-1k-dot ()
>    (declare (optimize (speed 3) (safety 0)))
> -  (let ((t-a (make-real-tensor 1000 1000))
> -	(t-b (make-real-tensor 1000 1000))
> -	(t-c (make-real-tensor 1000 1000)))
> +  (let ((t-a (make-real-tensor-dims 1000 1000))
> +	(t-b (make-real-tensor-dims 1000 1000))
> +	(t-c (make-real-tensor-dims 1000 1000)))
>      (declare (type real-tensor t-a t-b t-c))
>      (let ((s-a (store t-a))
>  	  (s-b (store t-b))
> @@ -71,5 +125,6 @@
>  	    (multiple-value-bind (i j) (floor n 1000)
>  	      (declare (type index-type i j))
>  	      (setf (aref s-c (+ (* i 1000) j))
> -		    (ddot 1000 (vector-data-address s-a) 1 (vector-data-address s-b) 1000 :head-x (* i 1000) :head-y j))))))))
> +		    (loop for k from 0 below 1000
> +			 summing (* (aref s-a (+ (* i 1000) k)) (aref s-b (+ (* k 1000) j)))))))))))
>  
> 
> -----------------------------------------------------------------------
> 
> Summary of changes:
>  src/ffi-cffi-interpreter-specific.lisp |    2 +-
>  src/loopy.lisp                         |   73 ++++++++++++++++++++++++---
>  src/standard-tensor.lisp               |    7 ++-
>  src/tensor-copy.lisp                   |   85 ++++++++++++++++++++++++++------
>  4 files changed, 142 insertions(+), 25 deletions(-)
> 
> 
> hooks/post-receive
>