From: Nathan F. <nf...@us...> - 2006-07-15 04:26:33
|
Update of /cvsroot/sbcl/sbcl/src/compiler/x86 In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv6380/src/compiler/x86 Modified Files: array.lisp Log Message: 0.9.14.13: Micro-optimize bit-vector accesses on x86 and x86-64: ... processor does the necessary masking for us, so we can eliminate an AND instruction; ... in the process, remove dodgy interior pointer usage lurking inside the small data-vector-ref VOPs. Index: array.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/compiler/x86/array.lisp,v retrieving revision 1.23 retrieving revision 1.24 diff -u -d -r1.23 -r1.24 --- array.lisp 4 Jun 2006 14:25:08 -0000 1.23 +++ array.lisp 15 Jul 2006 04:26:24 -0000 1.24 @@ -173,9 +173,14 @@ :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) (move ecx index) - (inst and ecx ,(1- elements-per-word)) - ,@(unless (= bits 1) - `((inst shl ecx ,(1- (integer-length bits))))) + ;; We used to mask ECX for all values of ELEMENT-PER-WORD, + ;; but since Intel's documentation says that the chip will + ;; mask shift and rotate counts by 31 automatically, we can + ;; safely move the masking operation under the protection of + ;; this UNLESS in the bit-vector case. --njf, 2006-07-14 + ,@(unless (= elements-per-word n-word-bits) + `((inst and ecx ,(1- elements-per-word)) + (inst shl ecx ,(1- (integer-length bits))))) (inst shr result :cl) (inst and result ,(1- (ash 1 bits))))) (define-vop (,(symbolicate 'data-vector-ref-c/ type)) @@ -198,28 +203,31 @@ (:note "inline array store") (:translate data-vector-set) (:policy :fast-safe) - (:args (object :scs (descriptor-reg) :target ptr) + (:args (object :scs (descriptor-reg)) (index :scs (unsigned-reg) :target ecx) (value :scs (unsigned-reg immediate) :target result)) (:arg-types ,type positive-fixnum positive-fixnum) (:results (result :scs (unsigned-reg))) (:result-types positive-fixnum) (:temporary (:sc unsigned-reg) word-index) - (:temporary (:sc unsigned-reg :from (:argument 0)) ptr old) - (:temporary (:sc unsigned-reg :offset ecx-offset :from (:argument 1)) - ecx) + (:temporary (:sc unsigned-reg) old) + (:temporary (:sc unsigned-reg :offset ecx-offset) ecx) (:generator 25 (move word-index index) (inst shr word-index ,bit-shift) - (inst lea ptr + (inst mov old (make-ea :dword :base object :index word-index :scale 4 :disp (- (* vector-data-offset n-word-bytes) other-pointer-lowtag))) - (loadw old ptr) (move ecx index) - (inst and ecx ,(1- elements-per-word)) - ,@(unless (= bits 1) - `((inst shl ecx ,(1- (integer-length bits))))) + ;; We used to mask ECX for all values of ELEMENT-PER-WORD, + ;; but since Intel's documentation says that the chip will + ;; mask shift and rotate counts by 31 automatically, we can + ;; safely move the masking operation under the protection of + ;; this UNLESS in the bit-vector case. --njf, 2006-07-14 + ,@(unless (= elements-per-word n-word-bits) + `((inst and ecx ,(1- elements-per-word)) + (inst shl ecx ,(1- (integer-length bits))))) (inst ror old :cl) (unless (and (sc-is value immediate) (= (tn-value value) ,(1- (ash 1 bits)))) @@ -231,7 +239,10 @@ (unsigned-reg (inst or old value))) (inst rol old :cl) - (storew old ptr) + (inst mov (make-ea :dword :base object :index word-index :scale 4 + :disp (- (* vector-data-offset n-word-bytes) + other-pointer-lowtag)) + old) (sc-case value (immediate (inst mov result (tn-value value))) |