From: Paul Khuong <pkhuong@us...>  20110814 20:46:42

The branch "master" has been updated in SBCL: via f17e3d27d7ff599f9443d011d17017a2a858c81a (commit) from 4e431db581c21b8d54119e2892567f5fc09562f1 (commit)  Log  commit f17e3d27d7ff599f9443d011d17017a2a858c81a Author: Paul Khuong <pvk@...> Date: Sun Aug 14 16:46:01 2011 0400 More efficient integer division by multiplication * Exploit restricted range for inputs (e.g. fixnums). * When the divisor is even, simplify with a mask instead of a shift. * Clean up the code a bit: we don't want modular arithmetic, it's actually all guaranteed not to wrap around. * Change the test so that larger divisors are a bit more likely to get tested too. * Lots more things can be done, including generalizing the transform to pretty much arbitrary rational divisor, or avoiding the costly general code sequence in nearly all cases. Unfortunately, it's a lot of (somewhat original, at that) code, and can be fairly slow; if it matters to someone, I can try and find a compromise (contrib?).  src/compiler/srctran.lisp  48 ++++++++++++++++++++++++++++ tests/arith.pure.lisp  3 + 2 files changed, 32 insertions(+), 19 deletions() diff git a/src/compiler/srctran.lisp b/src/compiler/srctran.lisp index 03bb32a..44806ec 100644  a/src/compiler/srctran.lisp +++ b/src/compiler/srctran.lisp @@ 3283,14 +3283,22 @@ ;;; Integers using Multiplication", 1994 by Torbj\"{o}rn Granlund and ;;; Peter L. Montgomery, Figures 4.2 and 6.2, modified to exclude the ;;; case of division by powers of two. +;;; The algorithm includes an adaptive precision argument. Use it, since +;;; we often have subword value ranges. Careful, in this case, we need +;;; p s.t 2^p > n, not the ceiling of the binary log. +;;; Also, for some reason, the paper prefers shifting to masking. Mask +;;; instead. Masking is equivalent to shifting right, then left again; +;;; all the intermediate values are still words, so we just have to shift +;;; right a bit more to compensate, at the end. +;;; ;;; The following two examples show an average case and the worst case ;;; with respect to the complexity of the generated expression, under ;;; a word size of 64 bits: ;;; ;;; (UNSIGNEDDIVTRANSFORMER 10) > ;;; (ASH (%MULTIPLY (ASH X 0) 14757395258967641293) 3) +;;; (UNSIGNEDDIVTRANSFORMER 10 MOSTPOSITIVEWORD) > +;;; (ASH (%MULTIPLY (LOGANDC2 X 0) 14757395258967641293) 3) ;;; ;;; (UNSIGNEDDIVTRANSFORMER 7) > +;;; (UNSIGNEDDIVTRANSFORMER 7 MOSTPOSITIVEWORD) > ;;; (LET* ((NUM X) ;;; (T1 (%MULTIPLY NUM 2635249153387078803))) ;;; (ASH (LDB (BYTE 64 0) @@ 3299,8 +3307,9 @@ ;;; 1))) ;;; 2)) ;;; (defun genunsigneddivbyconstantexpr (y)  (declare (type (integer 3 #.mostpositiveword) y)) +(defun genunsigneddivbyconstantexpr (y maxx) + (declare (type (integer 3 #.mostpositiveword) y) + (type word maxx)) (aver (not (zerop (logand y (1 y))))) (labels ((ld (x) ;; the floor of the binary logarithm of (positive) X @@ 3318,24 +3327,25 @@ (> shift 0))) (values mhigh shift))))) (let ((n (expt 2 sb!vm:nwordbits)) + (precision (integerlength maxx)) (shift1 0)) (multiplevaluebind (m shift2)  (choosemultiplier y sb!vm:nwordbits) + (choosemultiplier y precision) (when (and (>= m n) (evenp y)) (setq shift1 (ld (logand y ( y)))) (multiplevaluesetq (m shift2) (choosemultiplier (/ y (ash 1 shift1))  ( sb!vm:nwordbits shift1)))) + ( precision shift1)))) (if (>= m n)  (flet ((wordmod (x)  `(ldb (byte #.sb!vm:nwordbits 0) ,x))) + (flet ((word (x) + `(trulythe word ,x))) `(let* ((num x) (t1 (%multiply num ,( m n))))  (ash ,(wordmod `(+ t1 (ash ,(wordmod `( num t1))  1))) + (ash ,(word `(+ t1 (ash ,(word `( num t1)) + 1))) ,( 1 shift2))))  `(ash (%multiply (ash x ,( shift1)) ,m)  ,( shift2))))))) + `(ash (%multiply (logandc2 x ,(1 (ash 1 shift1))) ,m) + ,( (+ shift1 shift2)))))))) ;;; If the divisor is constant and both args are positive and fit in a ;;; machine word, replace the division by a multiplication and possibly @@ 3346,18 +3356,20 @@ ;;; the same value, emit much simpler code to handle that. (This case ;;; may be rare but it's easy to detect and the compiler doesn't find ;;; this optimization on its own.) (deftransform truncate ((x y) ((unsignedbyte #.sb!vm:nwordbits)  (constantarg  (unsignedbyte #.sb!vm:nwordbits))) +(deftransform truncate ((x y) (word (constantarg word)) * :policy (and (> speed compilationspeed) (> speed space))) "convert integer division to multiplication"  (let ((y (lvarvalue y))) + (let* ((y (lvarvalue y)) + (xtype (lvartype x)) + (maxx (or (and (numerictypep xtype) + (numerictypehigh xtype)) + mostpositiveword))) ;; Division by zero, one or powers of two is handled elsewhere. (when (zerop (logand y (1 y))) (giveupir1transform))  `(let* ((quot ,(genunsigneddivbyconstantexpr y)) + `(let* ((quot ,(genunsigneddivbyconstantexpr y maxx)) (rem (ldb (byte #.sb!vm:nwordbits 0) ( x (* quot ,y))))) (values quot rem)))) diff git a/tests/arith.pure.lisp b/tests/arith.pure.lisp index b1ebdb2..d58da25 100644  a/tests/arith.pure.lisp +++ b/tests/arith.pure.lisp @@ 456,7 +456,8 @@ ,@(loop repeat 4 collect (+ 10000 (random 101))) ,@(loop for i from 4 to sbvm:nwordbits  for r = (random (expt 2 i)) + for pow = (expt 2 (1 i)) + for r = (+ pow (random pow)) collect r))) (when (typep dividend dividendtype) (multiplevaluebind (q1 r1)  hooks/postreceive  SBCL 