|
From: Akshay S. <aks...@gm...> - 2012-06-26 14:55:50
|
On 06/26/2012 08:24 PM, Akshay Srinivasan wrote:
> This is an automated email from the git hooks/post-receive script. It was
> generated because a ref change was pushed to the repository containing
> the project "matlisp".
>
> The branch, tensor has been updated
> via 82125cbf389c2f1bc43a5c661067400efcec64c3 (commit)
> from 578dc43e356575b8c860f46f157c07d773843af8 (commit)
>
> Those revisions listed above that are new to this repository have
> not appeared on any other notification email; so we list those
> revisions in full, below.
>
> - Log -----------------------------------------------------------------
> commit 82125cbf389c2f1bc43a5c661067400efcec64c3
> Author: Akshay Srinivasan <aks...@gm...>
> Date: Tue Jun 26 20:17:02 2012 +0530
>
> Wrote a mid-way solution "mod-dotimes" to allow for very effective
> representation of matrix multiplication. Naive matrix multiplication
> on SBCL is about 200x than dgemm in OpenBLAS, but only about 2x slower
That should be 200x slower (obviously).
> compared to using naive nested loops in C.
>
> diff --git a/src/ffi-cffi-interpreter-specific.lisp b/src/ffi-cffi-interpreter-specific.lisp
> index 4d51dc0..92c285f 100644
> --- a/src/ffi-cffi-interpreter-specific.lisp
> +++ b/src/ffi-cffi-interpreter-specific.lisp
> @@ -3,7 +3,7 @@
>
> (in-package :ffi)
>
> -;;TODO: Add support for {Allegro CL, Lispworks, ECL, clisp}
> +;;TODO: Add support for {ECL, clisp, Allegro CL, Lispworks}
>
> (defmacro with-fortran-float-modes (&body body)
> "Execute the body with the IEEE FP modes appropriately set for Fortran"
> diff --git a/src/loopy.lisp b/src/loopy.lisp
> index d0cb80b..9d2e587 100644
> --- a/src/loopy.lisp
> +++ b/src/loopy.lisp
> @@ -23,7 +23,71 @@ is used, else the fortran routine is called instead.
> unless (= off accumulated-off) do (return nil)
> finally (return t))))
>
> -(defmacro mod-loop ((idx dims) &body body)
> +
> +(linloop (idx #(2 2))
> + with (loop-order row-order)
> + with (linear-sums
> + (of-t #(..))
> + (of-a #(..)))
> + do (..))
> +
> +(defmacro mod-dotimes ((idx dims) &body body)
> + (check-type idx symbol)
> + (labels ((parse-code (body ret)
> + (cond
> + ((null body)
> + (values nil ret))
> + ((eq (car body) 'with)
> + (multiple-value-bind (indic decl) (parse-with (cadr body))
> + (setf (getf ret indic) decl))
> + (parse-code (cddr body) ret))
> + ((eq (car body) 'do)
> + (values (cadr body) ret))
> + (t (error "unknown word passed to linloop: ~a" (car body)))))
> + (parse-with (code)
> + (cond
> + ((eq (car code) 'linear-sums)
> + (values :linear-sums
> + (loop for decl in (cdr code)
> + collect (destructuring-bind (offst strds &optional (init 0)) decl
> + (list :offset-sym offst
> + :offset-init init
> + :stride-sym (gensym (string+ (symbol-name offst) "-stride"))
> + :stride-expr strds)))))
> + ((and (eq (car code) 'loop-order)
> + (member (cadr code) '(:row-major :col-major)))
> + (values :loop-order (second code)))
> + (t (error "unknown word passed to linloop: ~a" (car code))))))
> + (multiple-value-bind (code sdecl) (parse-code body nil)
> + (with-gensyms (dims-sym rank-sym count-sym)
> + `(let* ((,dims-sym ,dims)
> + (,rank-sym (length ,dims-sym))
> + (,idx (allocate-index-store ,rank-sym))
> + ,@(mapcar #'(lambda (x) `(,(getf x :stride-sym) ,(getf x :stride-expr))) (getf sdecl :linear-sums)))
> + ,@(when (getf sdecl :linear-sums)
> + `((declare (type (index-array *) ,@(mapcar #'(lambda (x) (getf x :stride-sym)) (getf sdecl :linear-sums))))))
> + (loop ,@(loop for decl in (getf sdecl :linear-sums)
> + append `(with ,(getf decl :offset-sym) of-type index-type = ,(getf decl :offset-init)))
> + do (,@code)
> + while ,(append
> + (if (member (getf sdecl :loop-order) '(nil :row-major))
> + `(loop for ,count-sym of-type index-type from (1- ,rank-sym) downto 0)
> + `(loop for ,count-sym of-type index-type from 0 below ,rank-sym))
> + `(do (if (= (aref ,idx ,count-sym) (1- (aref ,dims-sym ,count-sym)))
> + (progn
> + (setf (aref ,idx ,count-sym) 0)
> + ,@(loop for decl in (getf sdecl :linear-sums)
> + collect `(decf ,(getf decl :offset-sym) (* (aref ,(getf decl :stride-sym) ,count-sym) (1- (aref ,dims-sym ,count-sym))))))
> + (progn
> + (incf (aref ,idx ,count-sym))
> + ,@(loop for decl in (getf sdecl :linear-sums)
> + collect `(incf ,(getf decl :offset-sym) (aref ,(getf decl :stride-sym) ,count-sym)))
> + (return t)))
> + finally (return nil)))))))))
> +
> +
> +;;Very ugly inflexible code; get rid of this in some time or make use of mod-dotimes.
> +#+nil(defmacro mod-loop ((idx dims) &body body)
> (check-type idx symbol)
> (let ((tensor-table (make-hash-table)))
> (labels ((get-tensors (decl)
> @@ -71,12 +135,7 @@ is used, else the fortran routine is called instead.
> (let ((to-opt (gethash (second to) tensor-table)))
> ;;Add type checking here!
> (cdr (funcall (getf (get-tensor-class-optimization (getf to-opt :class)) :value-writer)
> - from (getf to-opt :store-sym) (getf to-opt :offset-sym)))))
> - (fr-t?
> - (incref (second from))
> - (let ((fr-opt (gethash (second from) tensor-table)))
> - (cons to (funcall (getf (get-tensor-class-optimization (getf fr-opt :class)) :reader)
> - (getf fr-opt :store-sym) (getf fr-opt :offset-sym)))))
> + (find-tensor-refs from nil) (getf to-opt :store-sym) (getf to-opt :offset-sym)))))
> (t
> (list to (find-tensor-refs from nil))))))))))
> (transform-tensor-ref (snippet)
> diff --git a/src/standard-tensor.lisp b/src/standard-tensor.lisp
> index f4c257b..84d796c 100644
> --- a/src/standard-tensor.lisp
> +++ b/src/standard-tensor.lisp
> @@ -49,6 +49,9 @@
> (make-array size :element-type 'index-type
> :initial-contents contents)))
>
> +(definline idxv (&rest contents)
> + (make-index-store contents))
> +
> ;;
> (defclass standard-tensor ()
> ((rank
> @@ -121,7 +124,7 @@
> ((symbolp opt)
> (get-tensor-class-optimization opt))
> ((null opt) nil)
> - (t (value opt clname)))))
> + (t (values opt clname)))))
>
> ;; Akshay: I have no idea what this does, or why we want it
> ;; (inherited from standard-matrix.lisp
> @@ -153,7 +156,7 @@
> (type (index-array *) idx strides dims))
> (let ((rank (length strides)))
> (declare (type index-type rank))
> - (if (not (= rank (length idx)))
> + (if (not (= rank (length idx) (length dims)))
> (error 'tensor-index-rank-mismatch :index-rank (length idx) :rank rank)
> (very-quickly
> (loop
> diff --git a/src/tensor-copy.lisp b/src/tensor-copy.lisp
> index 2a1a2a3..3074106 100644
> --- a/src/tensor-copy.lisp
> +++ b/src/tensor-copy.lisp
> @@ -16,6 +16,70 @@
> (setf (tensor-ref x idx) (random 1d0)))
> (time (tensor-copy x y)))
>
> +(defun test-mm (n)
> + (let ((t-a (make-real-tensor-dims n n))
> + (t-b (make-real-tensor-dims n n))
> + (t-c (make-real-tensor-dims n n)))
> + (declare (type real-tensor t-a t-b t-c))
> + (with-optimization (:speed 3 :safety 0 :space 0)
> + (let ((st-a (store t-a))
> + (st-b (store t-b))
> + (st-c (store t-c)))
> + (declare (type (real-array *) st-a st-b st-c))
> + (mod-dotimes (idx (dimensions t-a))
> + with (linear-sums
> + (of-a (strides t-a))
> + (of-b (strides t-b))
> + (of-c (strides t-c)))
> + do (setf (aref st-a of-a) (random 1d0)
> + (aref st-b of-b) (random 1d0)
> + (aref st-c of-c) 0d0))
> + (time (mod-dotimes (idx (idxv n n n))
> + with (loop-order :row-major)
> + with (linear-sums
> + (of-a (idxv n 1 0))
> + (of-b (idxv 0 n 1))
> + (of-c (idxv n 0 1)))
> + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b)))))))))
> +
> +
> +(defun test-mm ()
> + (let ((t-a (make-real-tensor-dims 1000 1000))
> + (t-b (make-real-tensor-dims 1000 1000))
> + (t-c (make-real-tensor-dims 1000 1000)))
> + (declare (type real-tensor t-a t-b t-c))
> + (mod-loop (idx #(1000 1000))
> + (declare (type real-tensor t-a t-b))
> + (setf (tensor-ref t-a idx) (random 1d0)
> + (tensor-ref t-b idx) (random 1d0)))
> + (let* ((sr-a (strides t-a))
> + (st-a (store t-a))
> + (sr-b (strides t-b))
> + (st-b (store t-b))
> + (sr-c (strides t-c))
> + (st-c (store t-c))
> + (dims (dimensions t-a))
> + (rank 2)
> + (idx (allocate-index-store rank)))
> + (declare (type (index-array *) sr-a sr-b sr-c dims idx)
> + (type (real-array *) st-a st-b st-c))
> + (time (very-quickly
> + (loop
> + with of-a of-type index-type = (head t-a)
> + with of-b of-type index-type = (head t-b)
> + with of-c of-type index-type = (head t-c)
> + do (incf (aref st-c of-c) (* (aref st-a of-a) (aref st-b of-b)))
> + while (loop
> + for i of-type index-type from (1- rank) downto 0
> + do (if (= (aref idx i) (1- (aref dims i)))
> + (progn
> + (setf (aref idx i) 0)
> + (decf of-a (* (1- (aref dims i))
> +
> +
> + (loop for k from 0 below 1000
> + summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1)))))))))
> +
> (defmacro generate-typed-copy!-func (func store-type matrix-type blas-func)
> ;;Be very careful when using functions generated by this macro.
> ;;Indexes can be tricky and this has no safety net
> @@ -41,23 +105,13 @@
> (loop for i from 0 below nr-a
> do (,blas-func nc-a st-a cs-a st-b cs-b :head-x (+ hd-a (* i rs-a)) :head-y (+ hd-b (* i rs-b)))))))
> mat-b))
> -
> -(defun test-mm ()
> - (let ((t-a (make-real-tensor 1000 1000))
> - (t-b (make-real-tensor 1000 1000))
> - (t-c (make-real-tensor 1000 1000)))
> - (declare (type real-tensor t-a t-b t-c))
> - (mod-loop (idx #(1000 1000))
> - (setf (tensor-ref t-c idx)
> - (loop for k from 0 below 1000
> - summing (* (tensor-ref t-a `(,(aref idx 0) ,k)) (tensor-ref t-b `(,k ,(aref idx 1)))))))))
> ;;
> -#+nil
> +
> (defun test-tensor-1k-dot ()
> (declare (optimize (speed 3) (safety 0)))
> - (let ((t-a (make-real-tensor 1000 1000))
> - (t-b (make-real-tensor 1000 1000))
> - (t-c (make-real-tensor 1000 1000)))
> + (let ((t-a (make-real-tensor-dims 1000 1000))
> + (t-b (make-real-tensor-dims 1000 1000))
> + (t-c (make-real-tensor-dims 1000 1000)))
> (declare (type real-tensor t-a t-b t-c))
> (let ((s-a (store t-a))
> (s-b (store t-b))
> @@ -71,5 +125,6 @@
> (multiple-value-bind (i j) (floor n 1000)
> (declare (type index-type i j))
> (setf (aref s-c (+ (* i 1000) j))
> - (ddot 1000 (vector-data-address s-a) 1 (vector-data-address s-b) 1000 :head-x (* i 1000) :head-y j))))))))
> + (loop for k from 0 below 1000
> + summing (* (aref s-a (+ (* i 1000) k)) (aref s-b (+ (* k 1000) j)))))))))))
>
>
> -----------------------------------------------------------------------
>
> Summary of changes:
> src/ffi-cffi-interpreter-specific.lisp | 2 +-
> src/loopy.lisp | 73 ++++++++++++++++++++++++---
> src/standard-tensor.lisp | 7 ++-
> src/tensor-copy.lisp | 85 ++++++++++++++++++++++++++------
> 4 files changed, 142 insertions(+), 25 deletions(-)
>
>
> hooks/post-receive
>
|