|
From: <sv...@va...> - 2005-04-26 01:53:51
|
Author: sewardj
Date: 2005-04-26 02:53:48 +0100 (Tue, 26 Apr 2005)
New Revision: 1143
Modified:
trunk/priv/guest-amd64/ghelpers.c
Log:
Add various %rflag-helper specialisation cases and fast paths. This
more or less doubles performance of the baseline simulation on integer
code.
Modified: trunk/priv/guest-amd64/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/ghelpers.c 2005-04-26 01:52:29 UTC (rev 1142)
+++ trunk/priv/guest-amd64/ghelpers.c 2005-04-26 01:53:48 UTC (rev 1143)
@@ -525,12 +525,14 @@
for (op =3D 0; op < AMD64G_CC_OP_NUMBER; op++) {
=20
ch =3D ' ';
- if (op > 0 && (op-1) % 3 =3D=3D 0)=20
+ if (op > 0 && (op-1) % 4 =3D=3D 0)=20
ch =3D 'B';
- if (op > 0 && (op-1) % 3 =3D=3D 1)=20
+ if (op > 0 && (op-1) % 4 =3D=3D 1)=20
ch =3D 'W';
- if (op > 0 && (op-1) % 3 =3D=3D 2)=20
+ if (op > 0 && (op-1) % 4 =3D=3D 2)=20
ch =3D 'L';
+ if (op > 0 && (op-1) % 4 =3D=3D 3)=20
+ ch =3D 'Q';
=20
vex_printf("%2d%c: ", op, ch);
vex_printf("%6u ", tabc_slow[op]);
@@ -694,24 +696,23 @@
=20
/* Fast-case some common ones. */
switch (cc_op) {
-# if 0 // REINSTATE CAREFULLY
+ case AMD64G_CC_OP_LOGICQ:=20
case AMD64G_CC_OP_LOGICL:=20
case AMD64G_CC_OP_LOGICW:=20
case AMD64G_CC_OP_LOGICB:
return 0;
- case AMD64G_CC_OP_SUBL:
- return ((UInt)cc_dep1) < ((UInt)cc_dep2)
- ? AMD64G_CC_MASK_C : 0;
- case AMD64G_CC_OP_SUBW:
- return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFFF))
- ? AMD64G_CC_MASK_C : 0;
- case AMD64G_CC_OP_SUBB:
- return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
- ? AMD64G_CC_MASK_C : 0;
- case AMD64G_CC_OP_INCL:
- case AMD64G_CC_OP_DECL:
- return cc_ndep & AMD64G_CC_MASK_C;
-# endif // REINSTATE CAREFULLY
+ // case AMD64G_CC_OP_SUBL:
+ // return ((UInt)cc_dep1) < ((UInt)cc_dep2)
+ // ? AMD64G_CC_MASK_C : 0;
+ // case AMD64G_CC_OP_SUBW:
+ // return ((UInt)(cc_dep1 & 0xFFFF)) < ((UInt)(cc_dep2 & 0xFFF=
F))
+ // ? AMD64G_CC_MASK_C : 0;
+ // case AMD64G_CC_OP_SUBB:
+ // return ((UInt)(cc_dep1 & 0xFF)) < ((UInt)(cc_dep2 & 0xFF))
+ // ? AMD64G_CC_MASK_C : 0;
+ // case AMD64G_CC_OP_INCL:
+ // case AMD64G_CC_OP_DECL:
+ // return cc_ndep & AMD64G_CC_MASK_C;
default:=20
break;
}
@@ -830,48 +831,48 @@
/*--- %rflags functions. ---*/
/*---------------------------------------------------------------*/
=20
-//.. /* Used by the optimiser to try specialisations. Returns an
-//.. equivalent expression, or NULL if none. */
-//..=20
-//.. static Bool isU32 ( IRExpr* e, UInt n )
-//.. {
-//.. return e->tag =3D=3D Iex_Const
-//.. && e->Iex.Const.con->tag =3D=3D Ico_U32
-//.. && e->Iex.Const.con->Ico.U32 =3D=3D n;
-//.. }
+/* Used by the optimiser to try specialisations. Returns an
+ equivalent expression, or NULL if none. */
=20
+static Bool isU64 ( IRExpr* e, ULong n )
+{
+ return e->tag =3D=3D Iex_Const
+ && e->Iex.Const.con->tag =3D=3D Ico_U64
+ && e->Iex.Const.con->Ico.U64 =3D=3D n;
+}
+
IRExpr* guest_amd64_spechelper ( HChar* function_name,
IRExpr** args )
{
-//.. # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
-//.. # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
-//.. # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
-//.. # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
-//..=20
-//.. Int i, arity =3D 0;
-//.. for (i =3D 0; args[i]; i++)
-//.. arity++;
-//.. # if 0
-//.. vex_printf("spec request:\n");
-//.. vex_printf(" %s ", function_name);
-//.. for (i =3D 0; i < arity; i++) {
-//.. vex_printf(" ");
-//.. ppIRExpr(args[i]);
-//.. }
-//.. vex_printf("\n");
-//.. # endif
-//..=20
-//.. /* --------- specialising "x86g_calculate_condition" --------- *=
/
-//..=20
-//.. if (vex_streq(function_name, "x86g_calculate_condition")) {
-//.. /* specialise calls to above "calculate condition" function *=
/
-//.. IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
-//.. vassert(arity =3D=3D 5);
-//.. cond =3D args[0];
-//.. cc_op =3D args[1];
-//.. cc_dep1 =3D args[2];
-//.. cc_dep2 =3D args[3];
-//..=20
+# define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
+# define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
+# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
+# define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
+
+ Int i, arity =3D 0;
+ for (i =3D 0; args[i]; i++)
+ arity++;
+# if 0
+ vex_printf("spec request:\n");
+ vex_printf(" %s ", function_name);
+ for (i =3D 0; i < arity; i++) {
+ vex_printf(" ");
+ ppIRExpr(args[i]);
+ }
+ vex_printf("\n");
+# endif
+
+ /* --------- specialising "amd64g_calculate_condition" --------- */
+
+ if (vex_streq(function_name, "amd64g_calculate_condition")) {
+ /* specialise calls to above "calculate condition" function */
+ IRExpr *cond, *cc_op, *cc_dep1, *cc_dep2;
+ vassert(arity =3D=3D 5);
+ cond =3D args[0];
+ cc_op =3D args[1];
+ cc_dep1 =3D args[2];
+ cc_dep2 =3D args[3];
+
//.. /*---------------- ADDL ----------------*/
//..=20
//.. if (isU32(cc_op, AMD64G_CC_OP_ADDL) && isU32(cond, X86CondZ))=
{
@@ -881,9 +882,9 @@
//.. binop(Iop_Add32, cc_dep1, cc_dep2),
//.. mkU32(0)));
//.. }
-//..=20
-//.. /*---------------- SUBL ----------------*/
-//..=20
+
+ /*---------------- SUBL ----------------*/
+
//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondZ))=
{
//.. /* long sub/cmp, then Z --> test dst=3D=3Dsrc */
//.. return unop(Iop_1Uto32,
@@ -895,21 +896,30 @@
//.. return unop(Iop_1Uto32,
//.. binop(Iop_CmpNE32, cc_dep1, cc_dep2));
//.. }
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondL))=
{
-//.. /* long sub/cmp, then L (signed less than)=20
-//.. --> test dst <s src */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpLT32S, cc_dep1, cc_dep2));
-//.. }
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondLE)=
) {
-//.. /* long sub/cmp, then LE (signed less than or equal)
-//.. --> test dst <=3Ds src */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpLE32S, cc_dep1, cc_dep2));
-//.. }
-//..=20
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
+ /* long sub/cmp, then L (signed less than)=20
+ --> test dst <s src */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLT64S,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32)))));
+
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
+ /* long sub/cmp, then L (signed less than or equal)=20
+ --> test dst <s src */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLE64S,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32)))));
+
+ }
+
+
//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondBE)=
) {
//.. /* long sub/cmp, then BE (unsigned less than or equal)
//.. --> test dst <=3Du src */
@@ -923,35 +933,38 @@
//.. return unop(Iop_1Uto32,
//.. binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
//.. }
-//..=20
-//.. /*---------------- SUBW ----------------*/
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBW) && isU32(cond, X86CondZ))=
{
-//.. /* byte sub/cmp, then Z --> test dst=3D=3Dsrc */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpEQ16,=20
-//.. unop(Iop_32to16,cc_dep1),=20
-//.. unop(Iop_32to16,cc_dep2)));
-//.. }
-//..=20
-//.. /*---------------- SUBB ----------------*/
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBB) && isU32(cond, X86CondZ))=
{
-//.. /* byte sub/cmp, then Z --> test dst=3D=3Dsrc */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpEQ8,=20
-//.. unop(Iop_32to8,cc_dep1),=20
-//.. unop(Iop_32to8,cc_dep2)));
-//.. }
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBB) && isU32(cond, X86CondNZ)=
) {
-//.. /* byte sub/cmp, then NZ --> test dst!=3Dsrc */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpNE8,=20
-//.. unop(Iop_32to8,cc_dep1),=20
-//.. unop(Iop_32to8,cc_dep2)));
-//.. }
-//..=20
+
+ /*---------------- SUBW ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBW) && isU64(cond, AMD64CondZ)) {
+ /* word sub/cmp, then Z --> test dst=3D=3Dsrc */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpEQ16,=20
+ unop(Iop_32to16,unop(Iop_64to32,cc_dep1)),
+ unop(Iop_32to16,unop(Iop_64to32,cc_dep2)))));
+ }
+
+ /*---------------- SUBB ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondZ)) {
+ /* byte sub/cmp, then Z --> test dst=3D=3Dsrc */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpEQ8,=20
+ unop(Iop_32to8,unop(Iop_64to32,cc_dep1)),
+ unop(Iop_32to8,unop(Iop_64to32,cc_dep2)))));
+ }
+
+// if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondNZ))=
{
+// /* byte sub/cmp, then NZ --> test dst!=3Dsrc */
+// return unop(Iop_32Uto64,
+// unop(Iop_1Uto32,
+// binop(Iop_CmpNE8,=20
+// unop(Iop_32to8,unop(Iop_64to32,cc_dep1)),
+// unop(Iop_32to8,unop(Iop_64to32,cc_dep2)))))=
;
+// }
+
//.. if (isU32(cc_op, AMD64G_CC_OP_SUBB) && isU32(cond, X86CondNBE=
)) {
//.. /* long sub/cmp, then NBE (unsigned greater than)
//.. --> test src <=3Du dst */
@@ -961,29 +974,35 @@
//.. binop(Iop_And32,cc_dep2,mkU32(0xFF)),
//.. binop(Iop_And32,cc_dep1,mkU32(0xFF))));
//.. }
-//..=20
-//.. /*---------------- LOGICL ----------------*/
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL) && isU32(cond, X86CondZ=
)) {
-//.. /* long and/or/xor, then Z --> test dst=3D=3D0 */
-//.. return unop(Iop_1Uto32,binop(Iop_CmpEQ32, cc_dep1, mkU32(0=
)));
-//.. }
-//..=20
+
+ /*---------------- LOGICL ----------------*/
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) =
{
+ /* long and/or/xor, then Z --> test dst=3D=3D0 */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,binop(Iop_CmpEQ64,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),=
=20
+ mkU64(0))));
+ }
+
//.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL) && isU32(cond, X86CondS=
)) {
//.. /* long and/or/xor, then S --> test dst <s 0 */
//.. return unop(Iop_1Uto32,binop(Iop_CmpLT32S, cc_dep1, mkU32(=
0)));
//.. }
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL) && isU32(cond, X86CondL=
E)) {
-//.. /* long and/or/xor, then LE
-//.. This is pretty subtle. LOGIC sets SF and ZF according =
to the
-//.. result and makes OF be zero. LE computes (SZ ^ OF) | Z=
F, but
-//.. OF is zero, so this reduces to SZ | ZF -- which will be=
1 iff
-//.. the result is <=3Dsigned 0. Hence ...
-//.. */
-//.. return unop(Iop_1Uto32,binop(Iop_CmpLE32S, cc_dep1, mkU32(=
0)));
-//.. }
-//..=20
+
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE))=
{
+ /* long and/or/xor, then LE
+ This is pretty subtle. LOGIC sets SF and ZF according to th=
e
+ result and makes OF be zero. LE computes (SZ ^ OF) | ZF, bu=
t
+ OF is zero, so this reduces to SZ | ZF -- which will be 1 if=
f
+ the result is <=3Dsigned 0. Hence ...
+ */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,binop(Iop_CmpLE64S,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),=
=20
+ mkU64(0))));
+ }
+
//.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL) && isU32(cond, X86CondB=
E)) {
//.. /* long and/or/xor, then BE
//.. LOGIC sets ZF according to the result and makes CF be z=
ero.
@@ -1076,43 +1095,49 @@
//.. )
//.. );
//.. }
-//..=20
-//.. return NULL;
-//.. }
-//..=20
-//.. /* --------- specialising "x86g_calculate_eflags_c" --------- */
-//..=20
-//.. if (vex_streq(function_name, "x86g_calculate_eflags_c")) {
-//.. /* specialise calls to above "calculate_eflags_c" function */
-//.. IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
-//.. vassert(arity =3D=3D 4);
-//.. cc_op =3D args[0];
-//.. cc_dep1 =3D args[1];
-//.. cc_dep2 =3D args[2];
-//.. cc_ndep =3D args[3];
-//..=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL)) {
-//.. /* C after sub denotes unsigned less than */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
-//.. }
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBB)) {
-//.. /* C after sub denotes unsigned less than */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpLT32U,=20
-//.. binop(Iop_And32,cc_dep1,mkU32(0xFF)),
-//.. binop(Iop_And32,cc_dep2,mkU32(0xFF))));
-//.. }
-//.. if (isU32(cc_op, AMD64G_CC_OP_LOGICL)
-//.. || isU32(cc_op, AMD64G_CC_OP_LOGICW)
-//.. || isU32(cc_op, AMD64G_CC_OP_LOGICB)) {
-//.. /* cflag after logic is zero */
-//.. return mkU32(0);
-//.. }
-//.. if (isU32(cc_op, AMD64G_CC_OP_DECL) || isU32(cc_op, AMD64G_CC=
_OP_INCL)) {
-//.. /* If the thunk is dec or inc, the cflag is supplied as CC=
_NDEP. */
-//.. return cc_ndep;
-//.. }
+
+ return NULL;
+ }
+
+ /* --------- specialising "amd64g_calculate_rflags_c" --------- */
+
+ if (vex_streq(function_name, "amd64g_calculate_rflags_c")) {
+ /* specialise calls to above "calculate_rflags_c" function */
+ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+ vassert(arity =3D=3D 4);
+ cc_op =3D args[0];
+ cc_dep1 =3D args[1];
+ cc_dep2 =3D args[2];
+ cc_ndep =3D args[3];
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLT64U,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),=20
+ binop(Iop_Shl64,cc_dep2,mkU8(32)))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
+ /* C after sub denotes unsigned less than */
+ return unop(Iop_32Uto64,
+ unop(Iop_1Uto32,
+ binop(Iop_CmpLT64U,=20
+ binop(Iop_And64,cc_dep1,mkU64(0xFF)),
+ binop(Iop_And64,cc_dep2,mkU64(0xFF)))));
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_LOGICQ)
+ || isU64(cc_op, AMD64G_CC_OP_LOGICL)
+ || isU64(cc_op, AMD64G_CC_OP_LOGICW)
+ || isU64(cc_op, AMD64G_CC_OP_LOGICB)) {
+ /* cflag after logic is zero */
+ return mkU64(0);
+ }
+ if (isU64(cc_op, AMD64G_CC_OP_DECL) || isU64(cc_op, AMD64G_CC_OP_I=
NCL)
+ || isU64(cc_op, AMD64G_CC_OP_DECQ) || isU64(cc_op, AMD64G_CC_O=
P_INCQ)) {
+ /* If the thunk is dec or inc, the cflag is supplied as CC_NDEP=
. */
+ return cc_ndep;
+ }
//.. if (isU32(cc_op, AMD64G_CC_OP_COPY)) {
//.. /* cflag after COPY is stored in DEP1. */
//.. return
@@ -1127,14 +1152,14 @@
//.. vex_printf("CFLAG "); ppIRExpr(cc_op); vex_printf("\n");
//.. }
//.. # endif
+
+ return NULL;
+ }
+
+//.. /* --------- specialising "x86g_calculate_rflags_all" --------- =
*/
//..=20
-//.. return NULL;
-//.. }
-//..=20
-//.. /* --------- specialising "x86g_calculate_eflags_all" --------- =
*/
-//..=20
-//.. if (vex_streq(function_name, "x86g_calculate_eflags_all")) {
-//.. /* specialise calls to above "calculate_eflags_all" function =
*/
+//.. if (vex_streq(function_name, "x86g_calculate_rflags_all")) {
+//.. /* specialise calls to above "calculate_rflags_all" function =
*/
//.. IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
//.. vassert(arity =3D=3D 4);
//.. cc_op =3D args[0];
@@ -1153,11 +1178,11 @@
//.. );
//.. }
//.. return NULL;
-//.. }
+//.. }
=20
# undef unop
# undef binop
-# undef mkU32
+# undef mkU64
# undef mkU8
=20
return NULL;
|