From: Lutz Euler <leuler@us...>  20130429 21:01:53

The branch "master" has been updated in SBCL: via dabb26b93be9bbf2718951ea2200fca874d3d587 (commit) from 62f92bd02e9c04a46893ff9e7b88acdaeab230fa (commit)  Log  commit dabb26b93be9bbf2718951ea2200fca874d3d587 Author: Lutz Euler <lutz.euler@...> Date: Mon Apr 29 22:57:41 2013 +0200 Faster ISQRT on small (about fixnum sized) numbers. ISQRT is implemented using a recursive algorithm for arguments above 24 which is compiled using generic arithmetic only (as it must support both fixnums and bignums). Improve this by compiling this recursive part twice, once using generic and once fixnumonly arithmetic, and dispatching on function entry into the applicable part. For maximum speed, the fixnum part recurs directly into itself, thereby avoiding further type dispatching. This makes ISQRT run about three times as fast on fixnum inputs while the generated code is about 40 percent larger (both measured on x8664). For bignums a speedup can be seen, too, as ISQRT always recurs into fixnum territory eventually, but the relative gain obviously becomes smaller very fast with increasing size of the argument. I have changed the variable names in the recursive part; they no longer have an "n" prefix as this in SBCL by convention means "number of" and as the argument of the recursive part is no longer visibly "n". Slightly augment the test case.  NEWS  1 + src/code/numbers.lisp  77 +++++++++++++++++++++++++++++++++++ tests/arith.pure.lisp  2 + 3 files changed, 59 insertions(+), 21 deletions() diff git a/NEWS b/NEWS index 053e3a2..afc31de 100644  a/NEWS +++ b/NEWS @@ 9,6 +9,7 @@ changes relative to sbcl1.1.7: * bug fix: handle errors when initializing *defaultpathnamedefaults*, sbext:*runtimepathname*, sbext:*posixargv* on startup, like character decoding errors, or directories being deleted. + * optimization: faster ISQRT on fixnums and small bignums changes in sbcl1.1.7 relative to sbcl1.1.6: * enhancement: TRACE :PRINTALL handles multiplevalued forms. diff git a/src/code/numbers.lisp b/src/code/numbers.lisp index 89d35de..bf3a03a 100644  a/src/code/numbers.lisp +++ b/src/code/numbers.lisp @@ 1406,31 +1406,66 @@ the first." ((fixnum bignum) (bignumgcd (makesmallbignum u) v)))))) ;;;; from Robert Smith; slightly changed not to cons unnecessarily. +;;; from Robert Smith; changed not to cons unnecessarily, and tuned for +;;; faster operation on fixnum inputs by compiling the central recursive +;;; algorithm twice, once using generic and once fixnum arithmetic, and +;;; dispatching on function entry into the applicable part. For maximum +;;; speed, the fixnum part recurs into itself, thereby avoiding further +;;; type dispatching. This pattern is not supported by NUMBERDISPATCH +;;; thus some specialpurpose macrology is needed. (defun isqrt (n) #!+sbdoc "Return the greatest integer less than or equal to the square root of N." (declare (type unsignedbyte n))  (cond  ((> n 24)  (let* ((nfourthsize (ash (1 (integerlength n)) 2))  (nsignificanthalf (ash n ( (ash nfourthsize 1))))  (nsignificanthalfisqrt (isqrt nsignificanthalf))  (zerothiteration (ash nsignificanthalfisqrt nfourthsize)))  (multiplevaluebind (quot rem)  (floor n zerothiteration)  (let ((firstiteration (ash (+ zerothiteration quot) 1)))  (cond ((oddp quot)  firstiteration)  ((> (expt ( firstiteration zerothiteration) 2) rem)  (1 firstiteration))  (t  firstiteration))))))  ((> n 15) 4)  ((> n 8) 3)  ((> n 3) 2)  ((> n 0) 1)  ((= n 0) 0))) + (macrolet + ((isqrtrecursion (arg recurse fixnump) + ;; Expands into code for the recursive step of the ISQRT + ;; calculation. ARG is the input variable and RECURSE the name + ;; of the function to recur into. If FIXNUMP is true, some + ;; type declarations are added that, together with ARG being + ;; declared as a fixnum outside of here, make the resulting code + ;; compile into fixnumspecialized code without any calls to + ;; generic arithmetic. Else, the code works for bignums, too. + ;; The input must be at least 16 to ensure that RECURSE is called + ;; with a strictly smaller number and that the result is correct + ;; (provided that RECURSE correctly implements ISQRT, itself). + `(macrolet ((iffixnumptrulythe (type expr) + ,@(if fixnump + '(`(trulythe ,type ,expr)) + '((declare (ignore type)) + expr)))) + (let* ((fourthsize (ash (1 (integerlength ,arg)) 2)) + (significanthalf (ash ,arg ( (ash fourthsize 1)))) + (significanthalfisqrt + (iffixnumptrulythe + (integer 1 #.(isqrt sb!xc:mostpositivefixnum)) + (,recurse significanthalf))) + (zerothiteration (ash significanthalfisqrt + fourthsize))) + (multiplevaluebind (quot rem) + (floor ,arg zerothiteration) + (let ((firstiteration (ash (+ zerothiteration quot) 1))) + (cond ((oddp quot) + firstiteration) + ((> (iffixnumptrulythe + fixnum + (expt ( firstiteration zerothiteration) 2)) + rem) + (1 firstiteration)) + (t + firstiteration)))))))) + (typecase n + (fixnum (labels ((fixnumisqrt (n) + (declare (type fixnum n)) + (cond ((> n 24) + (isqrtrecursion n fixnumisqrt t)) + ((> n 15) 4) + ((> n 8) 3) + ((> n 3) 2) + ((> n 0) 1) + ((= n 0) 0)))) + (fixnumisqrt n))) + (bignum (isqrtrecursion n isqrt nil))))) ;;;; miscellaneous number predicates diff git a/tests/arith.pure.lisp b/tests/arith.pure.lisp index 777b370..c78b328 100644  a/tests/arith.pure.lisp +++ b/tests/arith.pure.lisp @@ 576,6 +576,8 @@ (test x2) (test (1+ x2)) (test (1 x2))))) + (test mostpositivefixnum) + (test (1+ mostpositivefixnum)) (loop for i from 1 to 200 for pow = (expt 2 (1 i)) for j = (+ pow (random pow))  hooks/postreceive  SBCL 