|
From: <sv...@va...> - 2006-08-16 00:25:31
|
Author: sewardj
Date: 2006-08-16 01:25:28 +0100 (Wed, 16 Aug 2006)
New Revision: 1638
Log:
Generate less verbose IR for amd64 'bswapq'. Fixes #132146.
Modified:
trunk/priv/guest-amd64/toIR.c
trunk/priv/main/vex_util.c
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2006-08-16 00:23:21 UTC (rev 1637)
+++ trunk/priv/guest-amd64/toIR.c 2006-08-16 00:25:28 UTC (rev 1638)
@@ -13631,7 +13631,7 @@
binop(Iop_Or32,
binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
binop(Iop_Or32,
- binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8))=
,=20
+ binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8))=
,
mkU32(0x00FF0000)),
binop(Iop_Or32,
binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8))=
,
@@ -13645,33 +13645,50 @@
break;
}
else if (sz =3D=3D 8) {
+ IRTemp m8 =3D newTemp(Ity_I64);
+ IRTemp s8 =3D newTemp(Ity_I64);
+ IRTemp m16 =3D newTemp(Ity_I64);
+ IRTemp s16 =3D newTemp(Ity_I64);
+ IRTemp m32 =3D newTemp(Ity_I64);
t1 =3D newTemp(Ity_I64);
t2 =3D newTemp(Ity_I64);
assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
=20
-# define LANE(_nn) \
- binop( Iop_Shl64, \
- binop( Iop_And64, \
- binop(Iop_Shr64, mkexpr(t1), \
- mkU8(8 * (7 - (_nn)))), \
- mkU64(0xFF)), \
- mkU8(8 * (_nn)))
+ assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
+ assign( s8,
+ binop(Iop_Or64,
+ binop(Iop_Shr64,
+ binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
+ mkU8(8)),
+ binop(Iop_And64,
+ binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
+ mkexpr(m8))
+ )=20
+ );
=20
- assign(=20
- t2,
- binop(Iop_Or64,
- binop(Iop_Or64,
- binop(Iop_Or64,LANE(0),LANE(1)),
- binop(Iop_Or64,LANE(2),LANE(3))
- ),
- binop(Iop_Or64,
- binop(Iop_Or64,LANE(4),LANE(5)),
- binop(Iop_Or64,LANE(6),LANE(7))
- )
- )
- );
+ assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
+ assign( s16,
+ binop(Iop_Or64,
+ binop(Iop_Shr64,
+ binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
+ mkU8(16)),
+ binop(Iop_And64,
+ binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
+ mkexpr(m16))
+ )=20
+ );
=20
-# undef LANE
+ assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
+ assign( t2,
+ binop(Iop_Or64,
+ binop(Iop_Shr64,
+ binop(Iop_And64,mkexpr(s16),mkexpr(m32))=
,
+ mkU8(32)),
+ binop(Iop_And64,
+ binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
+ mkexpr(m32))
+ )=20
+ );
=20
putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
Modified: trunk/priv/main/vex_util.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/main/vex_util.c 2006-08-16 00:23:21 UTC (rev 1637)
+++ trunk/priv/main/vex_util.c 2006-08-16 00:25:28 UTC (rev 1638)
@@ -62,7 +62,7 @@
MByte/sec. Once the size increases enough to fall out of the cache
into memory, the rate falls by about a factor of 3.=20
*/
-#define N_TEMPORARY_BYTES 2400000
+#define N_TEMPORARY_BYTES 4000000
=20
static HChar temporary[N_TEMPORARY_BYTES] __attribute__((aligned(8)));
static HChar* temporary_first =3D &temporary[0];
|