From: Juho S. <js...@us...> - 2006-11-13 06:10:33
|
Update of /cvsroot/sbcl/sbcl/src/compiler/x86 In directory sc8-pr-cvs8.sourceforge.net:/tmp/cvs-serv13393/src/compiler/x86 Modified Files: call.lisp values.lisp Log Message: 0.9.18.47: Faster &MORE-handling VOPs on x86 and x86-64. * The performance of LOOP is really bad on modern x86, rewrite the loops in %LISTIFY-REST-ARGS and %MORE-ARG-VALUES to do the index manipulation and branching explicitly. * REP MOVS isn't very good for copying small blocks of memory, use an explicit loop in COPY-MORE-ARG. * Rewrite the x86-64 COPY-MORE-ARG to take advantage of the extra registers. * Implement %MORE-ARG (exists on all other platforms). Index: call.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/compiler/x86/call.lisp,v retrieving revision 1.33 retrieving revision 1.34 diff -u -d -r1.33 -r1.34 --- call.lisp 18 Sep 2006 20:09:14 -0000 1.33 +++ call.lisp 13 Nov 2006 06:10:16 -0000 1.34 @@ -1236,24 +1236,33 @@ ;; Save edi and esi register args. (inst push edi-tn) (inst push esi-tn) + (inst push ebx-tn) ;; Okay, we have pushed the register args. We can trash them ;; now. - ;; Initialize dst to be end of stack; skiping the values pushed - ;; above. - (inst lea edi-tn (make-ea :dword :base esp-tn :disp 8)) - ;; Initialize src to be end of args. (inst mov esi-tn ebp-tn) (inst sub esi-tn ebx-tn) - (inst shr ecx-tn word-shift) ; make word count - ;; And copy the args. - (inst cld) ; auto-inc ESI and EDI. - (inst rep) - (inst movs :dword) + ;; We need to copy from downwards up to avoid overwriting some of + ;; the yet uncopied args. So we need to use EBX as the copy index + ;; and ECX as the loop counter, rather than using ECX for both. + (inst xor ebx-tn ebx-tn) + + ;; We used to use REP MOVS here, but on modern x86 it performs + ;; much worse than an explicit loop for small blocks. + COPY-LOOP + (inst mov edi-tn (make-ea :dword :base esi-tn :index ebx-tn)) + ;; The :DISP is to account for the registers saved on the stack + (inst mov (make-ea :dword :base esp-tn :disp (* 3 n-word-bytes) + :index ebx-tn) + edi-tn) + (inst add ebx-tn n-word-bytes) + (inst sub ecx-tn n-word-bytes) + (inst jmp :nz COPY-LOOP) ;; So now we need to restore EDI and ESI. + (inst pop ebx-tn) (inst pop esi-tn) (inst pop edi-tn) @@ -1315,6 +1324,19 @@ (inst mov keyword (make-ea :dword :base object :index index :disp n-word-bytes)))))) +(define-vop (more-arg) + (:translate sb!c::%more-arg) + (:policy :fast-safe) + (:args (object :scs (descriptor-reg) :to (:result 1)) + (index :scs (any-reg) :to (:result 1) :target value)) + (:arg-types * tagged-num) + (:results (value :scs (descriptor-reg any-reg))) + (:result-types *) + (:generator 4 + (move value index) + (inst neg value) + (inst mov value (make-ea :dword :base object :index value)))) + ;;; Turn more arg (context, count) into a list. (defoptimizer (%listify-rest-args stack-allocate-result) ((&rest args)) t) @@ -1345,8 +1367,6 @@ (maybe-pseudo-atomic stack-allocate-p (allocation dst dst node stack-allocate-p) (inst lea dst (make-ea :byte :base dst :disp list-pointer-lowtag)) - ;; Convert the count into a raw value, so that we can use the - ;; LOOP instruction. (inst shr ecx 2) ;; Set decrement mode (successive args at lower addresses) (inst std) @@ -1365,7 +1385,8 @@ (inst lods eax) (storew eax dst 0 list-pointer-lowtag) ;; Go back for more. - (inst loop loop) + (inst sub ecx 1) + (inst jmp :nz loop) ;; NIL out the last cons. (storew nil-value dst 1 list-pointer-lowtag)) (emit-label done) Index: values.lisp =================================================================== RCS file: /cvsroot/sbcl/sbcl/src/compiler/x86/values.lisp,v retrieving revision 1.9 retrieving revision 1.10 diff -u -d -r1.9 -r1.10 --- values.lisp 19 Aug 2005 12:15:15 -0000 1.9 +++ values.lisp 13 Nov 2006 06:10:16 -0000 1.10 @@ -115,7 +115,7 @@ (:arg-types * positive-fixnum positive-fixnum) (:temporary (:sc any-reg :offset esi-offset :from (:argument 0)) src) (:temporary (:sc descriptor-reg :offset eax-offset) temp) - (:temporary (:sc unsigned-reg :offset ecx-offset) temp1) + (:temporary (:sc unsigned-reg :offset ecx-offset) loop-index) (:results (start :scs (any-reg)) (count :scs (any-reg))) (:generator 20 @@ -137,17 +137,18 @@ (move count num) (inst sub count skip))) - (move temp1 count) + (move loop-index count) (inst mov start esp-tn) (inst jecxz done) ; check for 0 count? - (inst shr temp1 word-shift) ; convert the fixnum to a count. + (inst sub esp-tn count) + (inst sub src count) - (inst std) ; move down the stack as more value are copied to the bottom. LOOP - (inst lods temp) - (inst push temp) - (inst loop loop) + (inst mov temp (make-ea :dword :base src :index loop-index)) + (inst sub loop-index n-word-bytes) + (inst mov (make-ea :dword :base esp-tn :index loop-index) temp) + (inst jmp :nz LOOP) DONE ;; solaris requires DF being zero. |