From: <cap...@us...> - 2007-10-24 14:04:13
|
Revision: 13827 http://jikesrvm.svn.sourceforge.net/jikesrvm/?rev=13827&view=rev Author: captain5050 Date: 2007-10-24 07:04:12 -0700 (Wed, 24 Oct 2007) Log Message: ----------- In baseline compiled code with SSE2, avoid branch in long shift left and unisgned shift right using PSLLQ and PSRLQ MMX instructions. Modified Paths: -------------- rvmroot/trunk/rvm/src/org/jikesrvm/compilers/baseline/ia32/VM_Compiler.java rvmroot/trunk/rvm/src-generated/ia32-assembler/genAssembler.sh rvmroot/trunk/rvm/src-generated/opt-ir/ia32/OperatorList.dat Modified: rvmroot/trunk/rvm/src/org/jikesrvm/compilers/baseline/ia32/VM_Compiler.java =================================================================== --- rvmroot/trunk/rvm/src/org/jikesrvm/compilers/baseline/ia32/VM_Compiler.java 2007-10-24 14:02:02 UTC (rev 13826) +++ rvmroot/trunk/rvm/src/org/jikesrvm/compilers/baseline/ia32/VM_Compiler.java 2007-10-24 14:04:12 UTC (rev 13827) @@ -1146,23 +1146,32 @@ */ @Override protected final void emit_lshl() { - if (VM.VerifyAssertions) VM._assert(ECX != T0); // ECX is constrained to be the shift count - if (VM.VerifyAssertions) VM._assert(ECX != T1); - asm.emitPOP_Reg(ECX); // shift amount (6 bits) - asm.emitPOP_Reg(T0); // pop low half - asm.emitPOP_Reg(T1); // pop high half - asm.emitTEST_Reg_Imm(ECX, 32); - VM_ForwardReference fr1 = asm.forwardJcc(VM_Assembler.NE); - asm.emitSHLD_Reg_Reg_Reg(T1, T0, ECX); // shift high half - asm.emitSHL_Reg_Reg(T0, ECX); // shift low half - VM_ForwardReference fr2 = asm.forwardJMP(); - fr1.resolve(asm); - asm.emitMOV_Reg_Reg(T1, T0); // shift high half - asm.emitSHL_Reg_Reg(T1, ECX); - asm.emitXOR_Reg_Reg(T0, T0); // low half == 0 - fr2.resolve(asm); - asm.emitPUSH_Reg(T1); // push high half - asm.emitPUSH_Reg(T0); // push low half + if (SSE2_BASE) { + asm.emitPOP_Reg(T0); // shift amount (6 bits) + asm.emitMOVQ_Reg_RegInd(XMM1, SP); // XMM1 <- [SP] + asm.emitAND_Reg_Imm(T0, 0x3F); // mask to 6bits + asm.emitMOVDr_Reg_Reg(XMM0, T0); // XMM0 <- T0 + asm.emitPSLLQ_Reg_Reg(XMM1, XMM0); // XMM1 <<= XMM0 + asm.emitMOVQ_RegInd_Reg(SP, XMM1); // [SP] <- XMM1 + } else { + if (VM.VerifyAssertions) VM._assert(ECX != T0); // ECX is constrained to be the shift count + if (VM.VerifyAssertions) VM._assert(ECX != T1); + asm.emitPOP_Reg(ECX); // shift amount (6 bits) + asm.emitPOP_Reg(T0); // pop low half + asm.emitPOP_Reg(T1); // pop high half + asm.emitTEST_Reg_Imm(ECX, 32); + VM_ForwardReference fr1 = asm.forwardJcc(VM_Assembler.NE); + asm.emitSHLD_Reg_Reg_Reg(T1, T0, ECX); // shift high half + asm.emitSHL_Reg_Reg(T0, ECX); // shift low half + VM_ForwardReference fr2 = asm.forwardJMP(); + fr1.resolve(asm); + asm.emitMOV_Reg_Reg(T1, T0); // shift high half + asm.emitSHL_Reg_Reg(T1, ECX); + asm.emitXOR_Reg_Reg(T0, T0); // low half == 0 + fr2.resolve(asm); + asm.emitPUSH_Reg(T1); // push high half + asm.emitPUSH_Reg(T0); // push low half + } } /** @@ -1194,23 +1203,32 @@ */ @Override protected final void emit_lushr() { - if (VM.VerifyAssertions) VM._assert(ECX != T0); // ECX is constrained to be the shift count - if (VM.VerifyAssertions) VM._assert(ECX != T1); - asm.emitPOP_Reg(ECX); // shift amount (6 bits) - asm.emitPOP_Reg(T0); // pop low half - asm.emitPOP_Reg(T1); // pop high half - asm.emitTEST_Reg_Imm(ECX, 32); - VM_ForwardReference fr1 = asm.forwardJcc(VM_Assembler.NE); - asm.emitSHRD_Reg_Reg_Reg(T0, T1, ECX); // shift high half - asm.emitSHR_Reg_Reg(T1, ECX); // shift low half - VM_ForwardReference fr2 = asm.forwardJMP(); - fr1.resolve(asm); - asm.emitMOV_Reg_Reg(T0, T1); // low half = high half - asm.emitXOR_Reg_Reg(T1, T1); // high half = 0 - asm.emitSHR_Reg_Reg(T0, ECX); // low half = high half >>> ecx - fr2.resolve(asm); - asm.emitPUSH_Reg(T1); // push high half - asm.emitPUSH_Reg(T0); // push low half + if (SSE2_BASE) { + asm.emitPOP_Reg(T0); // shift amount (6 bits) + asm.emitMOVQ_Reg_RegInd(XMM1, SP); // XMM1 <- [SP] + asm.emitAND_Reg_Imm(T0, 0x3F); // mask to 6bits + asm.emitMOVDr_Reg_Reg(XMM0, T0); // XMM0 <- T0 + asm.emitPSRLQ_Reg_Reg(XMM1, XMM0); // XMM1 >>>= XMM0 + asm.emitMOVQ_RegInd_Reg(SP, XMM1); // [SP] <- XMM1 + } else { + if (VM.VerifyAssertions) VM._assert(ECX != T0); // ECX is constrained to be the shift count + if (VM.VerifyAssertions) VM._assert(ECX != T1); + asm.emitPOP_Reg(ECX); // shift amount (6 bits) + asm.emitPOP_Reg(T0); // pop low half + asm.emitPOP_Reg(T1); // pop high half + asm.emitTEST_Reg_Imm(ECX, 32); + VM_ForwardReference fr1 = asm.forwardJcc(VM_Assembler.NE); + asm.emitSHRD_Reg_Reg_Reg(T0, T1, ECX); // shift high half + asm.emitSHR_Reg_Reg(T1, ECX); // shift low half + VM_ForwardReference fr2 = asm.forwardJMP(); + fr1.resolve(asm); + asm.emitMOV_Reg_Reg(T0, T1); // low half = high half + asm.emitXOR_Reg_Reg(T1, T1); // high half = 0 + asm.emitSHR_Reg_Reg(T0, ECX); // low half = high half >>> ecx + fr2.resolve(asm); + asm.emitPUSH_Reg(T1); // push high half + asm.emitPUSH_Reg(T0); // push low half + } } /** Modified: rvmroot/trunk/rvm/src-generated/ia32-assembler/genAssembler.sh =================================================================== --- rvmroot/trunk/rvm/src-generated/ia32-assembler/genAssembler.sh 2007-10-24 14:02:02 UTC (rev 13826) +++ rvmroot/trunk/rvm/src-generated/ia32-assembler/genAssembler.sh 2007-10-24 14:04:12 UTC (rev 13827) @@ -1656,6 +1656,7 @@ # Generic data move ops. emitSSE2Op none 0x66 MOVD none 0x7E +emitSSE2Op none 0x66 MOVDr none 0x6E emitSSE2Op 0xF3 0x66 MOVQ 0x7E 0xD6 # Double precision FP ops. @@ -1680,6 +1681,10 @@ emitSSE2Op 0xF2 none CMPNLESD 0xC2 none 6 emitSSE2Op 0xF2 none CMPORDSD 0xC2 none 7 +# Long ops. +emitSSE2Op 0x66 0x0F PSLLQ 0xF3 none +emitSSE2Op 0x66 0x0F PSRLQ 0xD3 none + emitFloatMemAcc() { local acronym=$1 local op=$2 Modified: rvmroot/trunk/rvm/src-generated/opt-ir/ia32/OperatorList.dat =================================================================== --- rvmroot/trunk/rvm/src-generated/opt-ir/ia32/OperatorList.dat 2007-10-24 14:02:02 UTC (rev 13826) +++ rvmroot/trunk/rvm/src-generated/opt-ir/ia32/OperatorList.dat 2007-10-24 14:04:12 UTC (rev 13827) @@ -1307,6 +1307,13 @@ #################### +IA32_MOVDr +MIR_Move +move + + + +#################### IA32_MOVQ MIR_Move move @@ -1314,6 +1321,20 @@ #################### +IA32_PSLLQ +MIR_BinaryAcc +none + + + +#################### +IA32_PSRLQ +MIR_BinaryAcc +none + + + +#################### IA32_CVTSI2SS MIR_Unary move This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |