You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
1
(1) |
2
(10) |
3
(10) |
4
(14) |
5
(10) |
|
6
|
7
(1) |
8
(14) |
9
(10) |
10
(4) |
11
(2) |
12
(4) |
|
13
|
14
(2) |
15
(11) |
16
(10) |
17
|
18
|
19
(18) |
|
20
(14) |
21
(10) |
22
(1) |
23
(11) |
24
(14) |
25
(10) |
26
(6) |
|
27
(13) |
28
(13) |
29
(12) |
30
(13) |
|
|
|
|
From: <sv...@va...> - 2014-04-03 23:03:43
|
Author: sewardj
Date: Thu Apr 3 23:03:32 2014
New Revision: 2846
Log:
Implement
LD2/ST2 (multiple structures, post index) (some cases)
LD1/ST1 (multiple structures, no offset) (some cases)
Modified:
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Thu Apr 3 23:03:32 2014
@@ -4139,15 +4139,22 @@
return True;
}
- /*
+ /* 31 23
+ 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
+ 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
+ 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
+ 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
+ 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
+ 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
Note that #8 is implied and cannot be any other value.
FIXME does this assume that the host is little endian?
*/
- if ( (insn & 0xFFFFFC00) == 0x0C9F7800 // st1 {vT.2s}, [xN|SP], #8
- || (insn & 0xFFFFFC00) == 0x0C9F7400 // st1 {vT.4h}, [xN|SP], #8
+ if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
+ || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
) {
+ Bool isLD = INSN(22,22) == 1;
UInt rN = INSN(9,5);
UInt vT = INSN(4,0);
IRTemp tEA = newTemp(Ity_I64);
@@ -4155,9 +4162,120 @@
const HChar* name = names[INSN(11,10)];
assign(tEA, getIReg64orSP(rN));
if (rN == 31) { /* FIXME generate stack alignment check */ }
- storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
+ if (isLD) {
+ putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
+ putQRegLane(vT, 1, mkU64(0));
+ } else {
+ storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
+ }
putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
- DIP("st1 {v%u.%s}, [%s], #8\n", vT, name, nameIReg64orSP(rN));
+ DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
+ vT, name, nameIReg64orSP(rN));
+ return True;
+ }
+
+ /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
+ /* Only a very few cases. */
+ /* 31 23 11 9 4
+ 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
+ 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
+ 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
+ 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
+ */
+ if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
+ || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
+ || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
+ || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
+ ) {
+ Bool isLD = INSN(22,22) == 1;
+ UInt rN = INSN(9,5);
+ UInt vT = INSN(4,0);
+ IRTemp tEA = newTemp(Ity_I64);
+ UInt sz = INSN(11,10);
+ const HChar* name = "??";
+ assign(tEA, getIReg64orSP(rN));
+ if (rN == 31) { /* FIXME generate stack alignment check */ }
+ IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
+ IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
+ IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
+ IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
+ if (sz == BITS2(1,1)) {
+ name = "2d";
+ if (isLD) {
+ putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
+ putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
+ putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
+ putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
+ } else {
+ storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
+ storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
+ storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
+ storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
+ }
+ }
+ else if (sz == BITS2(1,0)) {
+ /* Uh, this is ugly. TODO: better. */
+ name = "4s";
+ IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
+ IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
+ IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
+ IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
+ if (isLD) {
+ putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
+ putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
+ putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
+ putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
+ putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
+ putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
+ putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
+ putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
+ } else {
+ storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
+ storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
+ storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
+ storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
+ storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
+ storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
+ storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
+ storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
+ }
+ }
+ else {
+ vassert(0); // Can't happen.
+ }
+ putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
+ DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
+ (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
+ return True;
+ }
+
+ /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
+ /* Only a very few cases. */
+ /* 31 23
+ 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
+ 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
+ */
+ if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
+ || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
+ ) {
+ Bool isLD = INSN(22,22) == 1;
+ UInt rN = INSN(9,5);
+ UInt vT = INSN(4,0);
+ IRTemp tEA = newTemp(Ity_I64);
+ const HChar* name = "16b";
+ assign(tEA, getIReg64orSP(rN));
+ if (rN == 31) { /* FIXME generate stack alignment check */ }
+ IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
+ IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
+ if (isLD) {
+ putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
+ putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
+ } else {
+ storeLE(tEA_0, getQReg128((vT+0) % 32));
+ storeLE(tEA_16, getQReg128((vT+1) % 32));
+ }
+ DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
+ (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
return True;
}
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Thu Apr 3 23:03:32 2014
@@ -5157,6 +5157,9 @@
case ARM64vecu_FNEG64x2:
*p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
break;
+ case ARM64vecu_FNEG32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
+ break;
case ARM64vecu_NOT:
*p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
break;
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Thu Apr 3 23:03:32 2014
@@ -4402,7 +4402,8 @@
case Iop_NotV128:
case Iop_Abs64Fx2:
case Iop_Abs32Fx4:
- case Iop_Neg64Fx2: {
+ case Iop_Neg64Fx2:
+ case Iop_Neg32Fx4: {
HReg res = newVRegV(env);
HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
ARM64VecUnaryOp op = ARM64vecu_INVALID;
@@ -4411,6 +4412,7 @@
case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
+ case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
default: vassert(0);
}
addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
|
|
From: <sv...@va...> - 2014-04-03 23:01:33
|
Author: sewardj
Date: Thu Apr 3 23:01:24 2014
New Revision: 13889
Log:
Add tests for some variants of LD{1,2}/ST{1,2} that load or
store two registers.
Modified:
trunk/none/tests/arm64/test_arm64_int.c
Modified: trunk/none/tests/arm64/test_arm64_int.c
==============================================================================
--- trunk/none/tests/arm64/test_arm64_int.c (original)
+++ trunk/none/tests/arm64/test_arm64_int.c Thu Apr 3 23:01:24 2014
@@ -10794,18 +10794,18 @@
// Out: memory area, xferred vec regs, xferred int regs, addr reg1, addr reg2
//
// INSN may mention the following regs as containing load/store data:
-// x13 x23 v17 v31
+// x13 x23 v17 v18
// and
// x5 as containing the base address
// x6 as containing an offset, if required
-// A memory area is filled with random data, and x13, x23, v17 and v31
+// A memory area is filled with random data, and x13, x23, v17 and v18
// are loaded with random data too. INSN is then executed, with
// x5 set to the middle of the memory area + AREG1OFF, and x6 set to AREG2VAL.
//
// What is printed out: the XOR of the old and new versions of the
// following:
// the memory area
-// x13 x23 v17 v31
+// x13 x23 v17 v18
// and the new-old values of these
// x5 x6
// If the insn modifies its base register then the x5 version will
@@ -10817,7 +10817,7 @@
UChar* area = memalign16(N); \
UChar area2[N]; \
for (i = 0; i < N; i++) area[i] = area2[i] = randUChar(); \
- ULong block[8]; /* x13 x23 v17.d[0] v17.d[1] v31.d[0] v31.d[1] x5 x6 */ \
+ ULong block[8]; /* x13 x23 v17.d[0] v17.d[1] v18.d[0] v18.d[1] x5 x6 */ \
for (i = 0; i < 6; i++) block[i] = randULong(); \
block[6] = (ULong)(&area[128]) + (Long)(Int)AREG1OFF; \
block[7] = (Long)AREG2VAL; \
@@ -10827,17 +10827,17 @@
"ldr x13, [%0, #0] ; " \
"ldr x23, [%0, #8] ; " \
"ldr q17, [%0, #16] ; " \
- "ldr q31, [%0, #32] ; " \
+ "ldr q18, [%0, #32] ; " \
"ldr x5, [%0, #48] ; " \
"ldr x6, [%0, #56] ; " \
INSN " ; " \
"str x13, [%0, #0] ; " \
"str x23, [%0, #8] ; " \
"str q17, [%0, #16] ; " \
- "str q31, [%0, #32] ; " \
+ "str q18, [%0, #32] ; " \
"str x5, [%0, #48] ; " \
"str x6, [%0, #56] ; " \
- : : "r"(&block[0]) : "x5", "x6", "x13","x23","v17","v31","memory","cc" \
+ : : "r"(&block[0]) : "x5", "x6", "x13","x23","v17","v18","memory","cc" \
); \
printf("%s with x5 = middle_of_block+%lld, x6=%lld\n", \
INSN, (Long)AREG1OFF, (Long)AREG2VAL); \
@@ -10846,8 +10846,8 @@
printf(" %016llx x23 (xfer intreg #2)\n", block[1] ^ block2[1]); \
printf(" %016llx v17.d[0] (xfer vecreg #1)\n", block[2] ^ block2[2]); \
printf(" %016llx v17.d[1] (xfer vecreg #1)\n", block[3] ^ block2[3]); \
- printf(" %016llx v31.d[0] (xfer vecreg #2)\n", block[4] ^ block2[4]); \
- printf(" %016llx v31.d[1] (xfer vecreg #2)\n", block[5] ^ block2[5]); \
+ printf(" %016llx v18.d[0] (xfer vecreg #2)\n", block[4] ^ block2[4]); \
+ printf(" %016llx v18.d[1] (xfer vecreg #2)\n", block[5] ^ block2[5]); \
printf(" %16lld x5 (base reg)\n", block[6] - block2[6]); \
printf(" %16lld x6 (index reg)\n", block[7] - block2[7]); \
printf("\n"); \
@@ -10982,29 +10982,29 @@
////////////////////////////////////////////////////////////////
printf("LDP,STP (immediate, simm7) (FP&VEC)\n");
-MEM_TEST("stp q17, q31, [x5, 32]", -16, 4);
-MEM_TEST("stp q17, q31, [x5, 32]!", -16, 4);
-MEM_TEST("stp q17, q31, [x5], 32", -16, 4);
-
-MEM_TEST("stp d17, d31, [x5, 32]", -16, 4);
-MEM_TEST("stp d17, d31, [x5, 32]!", -16, 4);
-MEM_TEST("stp d17, d31, [x5], 32", -16, 4);
-
-//MEM_TEST("stp s17, s31, [x5, 32]", -16, 4);
-//MEM_TEST("stp s17, s31, [x5, 32]!", -16, 4);
-//MEM_TEST("stp s17, s31, [x5], 32", -16, 4);
-
-MEM_TEST("ldp q17, q31, [x5, 32]", -16, 4);
-MEM_TEST("ldp q17, q31, [x5, 32]!", -16, 4);
-MEM_TEST("ldp q17, q31, [x5], 32", -16, 4);
-
-MEM_TEST("ldp d17, d31, [x5, 32]", -16, 4);
-MEM_TEST("ldp d17, d31, [x5, 32]!", -16, 4);
-MEM_TEST("ldp d17, d31, [x5], 32", -16, 4);
-
-//MEM_TEST("ldp s17, s31, [x5, 32]", -16, 4);
-//MEM_TEST("ldp s17, s31, [x5, 32]!", -16, 4);
-//MEM_TEST("ldp s17, s31, [x5], 32", -16, 4);
+MEM_TEST("stp q17, q18, [x5, 32]", -16, 4);
+MEM_TEST("stp q17, q18, [x5, 32]!", -16, 4);
+MEM_TEST("stp q17, q18, [x5], 32", -16, 4);
+
+MEM_TEST("stp d17, d18, [x5, 32]", -16, 4);
+MEM_TEST("stp d17, d18, [x5, 32]!", -16, 4);
+MEM_TEST("stp d17, d18, [x5], 32", -16, 4);
+
+//MEM_TEST("stp s17, s18, [x5, 32]", -16, 4);
+//MEM_TEST("stp s17, s18, [x5, 32]!", -16, 4);
+//MEM_TEST("stp s17, s18, [x5], 32", -16, 4);
+
+MEM_TEST("ldp q17, q18, [x5, 32]", -16, 4);
+MEM_TEST("ldp q17, q18, [x5, 32]!", -16, 4);
+MEM_TEST("ldp q17, q18, [x5], 32", -16, 4);
+
+MEM_TEST("ldp d17, d18, [x5, 32]", -16, 4);
+MEM_TEST("ldp d17, d18, [x5, 32]!", -16, 4);
+MEM_TEST("ldp d17, d18, [x5], 32", -16, 4);
+
+//MEM_TEST("ldp s17, s18, [x5, 32]", -16, 4);
+//MEM_TEST("ldp s17, s18, [x5, 32]!", -16, 4);
+//MEM_TEST("ldp s17, s18, [x5], 32", -16, 4);
////////////////////////////////////////////////////////////////
printf("{LD,ST}R (vector register)\n");
@@ -11198,19 +11198,36 @@
MEM_TEST("st1 {v17.4s}, [x5], #16", 5, 0)
MEM_TEST("st1 {v17.8h}, [x5], #16", 7, 0)
MEM_TEST("st1 {v17.16b}, [x5], #16", 13, 0)
-//MEM_TEST("st1 {v17.1d}, [x5], #8", 3, 0)
+MEM_TEST("st1 {v17.1d}, [x5], #8", 3, 0)
MEM_TEST("st1 {v17.2s}, [x5], #8", 5, 0)
MEM_TEST("st1 {v17.4h}, [x5], #8", 7, 0)
-//MEM_TEST("st1 {v17.8b}, [x5], #8", 13, 0)
+MEM_TEST("st1 {v17.8b}, [x5], #8", 13, 0)
MEM_TEST("ld1 {v17.2d}, [x5], #16", 3, 0)
MEM_TEST("ld1 {v17.4s}, [x5], #16", 5, 0)
MEM_TEST("ld1 {v17.8h}, [x5], #16", 7, 0)
MEM_TEST("ld1 {v17.16b}, [x5], #16", 13, 0)
-//MEM_TEST("ld1 {v17.1d}, [x5], #8", 3, 0)
-//MEM_TEST("ld1 {v17.2s}, [x5], #8", 5, 0)
-//MEM_TEST("ld1 {v17.4h}, [x5], #8", 7, 0)
-//MEM_TEST("ld1 {v17.8b}, [x5], #8", 13, 0)
+MEM_TEST("ld1 {v17.1d}, [x5], #8", 3, 0)
+MEM_TEST("ld1 {v17.2s}, [x5], #8", 5, 0)
+MEM_TEST("ld1 {v17.4h}, [x5], #8", 7, 0)
+MEM_TEST("ld1 {v17.8b}, [x5], #8", 13, 0)
+
+////////////////////////////////////////////////////////////////
+printf("LD2/ST2 (multiple structures, post index) (VERY INCOMPLETE)\n");
+
+MEM_TEST("ld2 {v17.2d, v18.2d}, [x5], #32", 3, 0)
+MEM_TEST("st2 {v17.2d, v18.2d}, [x5], #32", 7, 0)
+
+MEM_TEST("ld2 {v17.4s, v18.4s}, [x5], #32", 13, 0)
+MEM_TEST("st2 {v17.4s, v18.4s}, [x5], #32", 17, 0)
+
+
+////////////////////////////////////////////////////////////////
+printf("LD1/ST1 (multiple structures, no offset) (VERY INCOMPLETE)\n");
+
+MEM_TEST("ld1 {v17.16b, v18.16b}, [x5]", 3, 0)
+MEM_TEST("st1 {v17.16b, v18.16b}, [x5]", 7, 0)
+
} /* end of test_memory2() */
@@ -11225,6 +11242,6 @@
{
if (1) test_arith();
if (1) test_memory();
- test_memory2();
+ if (1) test_memory2();
return 0;
}
|
|
From: <sv...@va...> - 2014-04-03 23:01:03
|
Author: sewardj
Date: Thu Apr 3 23:00:45 2014
New Revision: 13888
Log:
Add tests for FCMEQ, FCMGE, FCMGT, FACGE, FACGT
(reg-vs-reg variants only)
Add tests for all TBL and TBX variants.
Modified:
trunk/none/tests/arm64/test_arm64_fp_and_simd.c
Modified: trunk/none/tests/arm64/test_arm64_fp_and_simd.c
==============================================================================
--- trunk/none/tests/arm64/test_arm64_fp_and_simd.c (original)
+++ trunk/none/tests/arm64/test_arm64_fp_and_simd.c Thu Apr 3 23:00:45 2014
@@ -1089,7 +1089,8 @@
GEN_BINARY_TEST(fabd, 2s)
/* Generate a test that involves three vector regs,
- with no bias as towards which is input or output. */
+ with no bias as towards which is input or output. It's also OK
+ to use v16, v17, v18 as scratch. */
#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
__attribute__((noinline)) \
static void test_##TESTNAME ( void ) { \
@@ -1111,7 +1112,9 @@
"str q"#VECREG1NO", [%0, #48] ; " \
"str q"#VECREG2NO", [%0, #64] ; " \
"str q"#VECREG3NO", [%0, #80] ; " \
- : : "r"(&block[0]) : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO \
+ : : "r"(&block[0]) \
+ : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
+ "v16", "v17", "v18" \
); \
printf(INSN " "); \
showV128(&block[0]); printf(" "); \
@@ -1187,6 +1190,98 @@
GEN_UNARY_TEST(fabs, 4s, 4s)
GEN_UNARY_TEST(fabs, 2s, 2s)
+GEN_BINARY_TEST(fcmeq, 2d)
+GEN_BINARY_TEST(fcmeq, 4s)
+GEN_BINARY_TEST(fcmeq, 2s)
+GEN_BINARY_TEST(fcmge, 2d)
+GEN_BINARY_TEST(fcmge, 4s)
+GEN_BINARY_TEST(fcmge, 2s)
+GEN_BINARY_TEST(fcmgt, 2d)
+GEN_BINARY_TEST(fcmgt, 4s)
+GEN_BINARY_TEST(fcmgt, 2s)
+GEN_BINARY_TEST(facge, 2d)
+GEN_BINARY_TEST(facge, 4s)
+GEN_BINARY_TEST(facge, 2s)
+GEN_BINARY_TEST(facgt, 2d)
+GEN_BINARY_TEST(facgt, 4s)
+GEN_BINARY_TEST(facgt, 2s)
+
+// Uses v15 as the first table entry
+GEN_THREEVEC_TEST(
+ tbl_16b_1reg, "tbl v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
+// and v15 ^ v21 as the second table entry
+GEN_THREEVEC_TEST(
+ tbl_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "tbl v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
+// and v15 ^ v23 as the third table entry
+GEN_THREEVEC_TEST(
+ tbl_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "tbl v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
+ 21, 15, 23)
+// and v21 ^ v23 as the fourth table entry
+GEN_THREEVEC_TEST(
+ tbl_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "eor v18.16b, v21.16b, v23.16b ; "
+ "tbl v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
+ 21, 15, 23)
+
+// Same register scheme for tbl .8b, tbx .16b, tbx.8b
+GEN_THREEVEC_TEST(
+ tbl_8b_1reg, "tbl v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbl_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "tbl v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbl_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "tbl v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
+ 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbl_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "eor v18.16b, v21.16b, v23.16b ; "
+ "tbl v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
+ 21, 15, 23)
+
+GEN_THREEVEC_TEST(
+ tbx_16b_1reg, "tbx v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbx_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "tbx v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbx_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "tbx v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
+ 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbx_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "eor v18.16b, v21.16b, v23.16b ; "
+ "tbx v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
+ 21, 15, 23)
+
+// Same register scheme for tbx .8b, tbx .16b, tbx.8b
+GEN_THREEVEC_TEST(
+ tbx_8b_1reg, "tbx v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbx_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "tbx v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbx_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "tbx v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
+ 21, 15, 23)
+GEN_THREEVEC_TEST(
+ tbx_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
+ "eor v17.16b, v15.16b, v23.16b ; "
+ "eor v18.16b, v21.16b, v23.16b ; "
+ "tbx v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
+ 21, 15, 23)
+
+
+
/* IMPORTANT: keep the tests in here in the same order as the
implementations are in guest_arm64_toIR.c. */
int main ( void )
@@ -1305,6 +1400,24 @@
//test_fabd_2s();
printf("END: F{ADD,SUB,MUL,DIV,MLA,MLS,ABD} (vector) (MISSING fabd 2s/4s)\n\n");
+ printf("BEGIN: FCM{EQ,GE,GT}, FAC{GE,GT} (vector)\n");
+ test_fcmeq_2d();
+ test_fcmeq_4s();
+ test_fcmeq_2s();
+ test_fcmge_2d();
+ test_fcmge_4s();
+ test_fcmge_2s();
+ test_fcmgt_2d();
+ test_fcmgt_4s();
+ test_fcmgt_2s();
+ test_facge_2d();
+ test_facge_4s();
+ test_facge_2s();
+ test_facgt_2d();
+ test_facgt_4s();
+ test_facgt_2s();
+ printf("END: FCM{EQ,GE,GT}, FAC{GE,GT} (vector)\n");
+
printf("BEGIN: FCVTN (MISSING 16F <- 32F cases)\n");
test_fcvtn_01();
test_fcvtn_02();
@@ -1316,16 +1429,16 @@
test_add_2s();
test_add_8h();
test_add_4h();
- //test_add_16b();
- //test_add_8b();
+ test_add_16b();
+ test_add_8b();
test_sub_2d();
test_sub_4s();
test_sub_2s();
test_sub_8h();
test_sub_4h();
- //test_sub_16b();
- //test_sub_8b();
- printf("END: ADD/SUB (vector) (MISSING b16/b8 cases)\n\n");
+ test_sub_16b();
+ test_sub_8b();
+ printf("END: ADD/SUB (vector)\n\n");
printf("BEGIN: ADD/SUB (scalar)\n");
test_add_d_d_d();
@@ -1586,5 +1699,24 @@
//test_neg_8b_8b();
printf("END: NEG (vector) (MISSING 8b/16b)\n\n");
+ printf("BEGIN: TBL, TBX\n");
+ test_tbl_16b_1reg();
+ test_tbl_16b_2reg();
+ test_tbl_16b_3reg();
+ test_tbl_16b_4reg();
+ test_tbl_8b_1reg();
+ test_tbl_8b_2reg();
+ test_tbl_8b_3reg();
+ test_tbl_8b_4reg();
+ test_tbx_16b_1reg();
+ test_tbx_16b_2reg();
+ test_tbx_16b_3reg();
+ test_tbx_16b_4reg();
+ test_tbx_8b_1reg();
+ test_tbx_8b_2reg();
+ test_tbx_8b_3reg();
+ test_tbx_8b_4reg();
+ printf("END: TBL, TBX\n");
+
return 0;
}
|
|
From: Julian S. <js...@ac...> - 2014-04-03 22:49:40
|
> also get warning from other window > > ==25684== Conditional jump or move depends on uninitialised value(s) > ==25684== at 0x40CA0D8: pthread_join (in /usr/lib64/libpthread-2.18.90.so) > ==25684== by 0x10000847: main (hg05_race2.c:31) Try running your test cases again with --track-origins=yes. That might shed some light on the issue. J |
|
From: Philippe W. <phi...@sk...> - 2014-04-03 19:27:48
|
On Thu, 2014-04-03 at 11:23 -0700, Carl E. Love wrote: Hello Carl, I am giving here and there below some comments. I am not at all sure what I give can help, but it will not do damage in any case :). > I working on porting the current Valgrind Power PC Big Endian code to > also support Power PC Little Endian code. I have seen numerous messages > where Valgrind says "Conditional jump or move depends on uninitialised > value". I am running gdb on the guest code trying to examine the V-bits > to see what bits/bytes are not valid. Unfortunately, from what I get > from Valgrind, it shows the bytes are all valid yet I get an error > message. I have put debug print statements thoughout the various > routines that fetch the V-bits to verify the endianess mode is Little > Endian. That seems to be fine. I am still trying to sort out how the > V-bit checking works, where and how it is done. Here is what I have done > to check the state of the bytes at the location of the conditional > branch. Please let me know if I am not accessing the V-bits correctly > or I am miss interpreting the output. Assuming I am doing things > correctly, where exactly in Valgrind does the Vbit test occur? I would > like to step through the code. I have found routines for > reading/writing the shadow bits in mc_translate.c but am still trying to > find where the actual test is done that generates the error. Thanks for > your help. > > Code > > x40ca0c4 <pthread_join+116> addi r26,r30,-30512 │ > │0x40ca0c8 <pthread_join+120> bl 0x40d2308 <__pthread_enable_asynccancel+8>│ > │0x40ca0cc <pthread_join+124> nop │ > B+>│0x40ca0d0 <pthread_join+128> cmpld cr7,r31,r26 │ > │0x40ca0d4 <pthread_join+132> mtlr r3 │ > │0x40ca0d8 <pthread_join+136> beq cr7,0x40ca1bc <pthread_join+364> │ > > > running with > valgrind --vgdb=yes --vgdb-shadow-registers=yes --vgdb-error=0 ./hg05_race2 When doing really lowlevel debugging, you might use --vgdb=full (which automatically activates -vex-iropt-register-updates=allregs-at-each-insn). It might maybe also help to disable all or most of the VEX optimisations using the various --vex-.... options. When you are stopped on a breakpoint with gdb+vgdb, you might maybe look at the generated code (containing the VA bits helper calls) by using monitor v.translate <addr> [<traceflags>] (same as vex tracing, but can be used interactively on precisely what you want when you want). > > When I give gdb "info reg" it shows the register, registers1 and registers2. Not sure what the resgisters2 > is all about I only see registers1 discussed in http://valgrind.org/docs/manual/mc-manual.html#mc-manual.machine > but we will print both for completeness. I believe that the register set shadow1 is used for V bits, while the register set shadow2 is used for origin tracking. BTW, have you tried with --track-origins=yes ? This might maybe give a hint ? > > Run to address 0x40ca0d0 and start poking around. Note, we get the error about > "Conditional jump or move depends on uninitialised value" at 0x40ca0d8. > > (gdb) p $r31 > $4 = 78508480 > (gdb) p $r31s1 > $5 = 0 > (gdb) p $r31s2 > $6 = 0 > (gdb) p $r26 > $7 = 67386896 > (gdb) p $r26s1 > $8 = 0 > (gdb) p $r26s2 > $9 = 0 > (gdb) p $r3 > $10 = 0 > (gdb) p $r3s1 > $11 = 0 > (gdb) p $r3s2 > $12 = 0 > > (gdb) p $cr > $14 = 0 > (gdb) p $crs1 > $15 = 0 > (gdb) p $crs2 > $16 = 0 > > So, it appeard that r26, r31 and the condition code register are > completely defined. > > 0x00000000040ca0d4 in pthread_join () from /lib64/libpthread.so.0 > 1: x/i $pc > => 0x40ca0d4 <pthread_join+132>: mtlr r3 > > (gdb) p $r3 > $17 = 0 > (gdb) p $r3s1 > $18 = 0 > (gdb) p $r3s2 > $19 = 0 > (gdb) p $lr > $20 = (void (*)()) 0x0 > (gdb) p $lrs1 > $21 = (void (*)()) 0x0 > (gdb) p $lrs2 > $22 = (void (*)()) 0x0 > (gdb) > > (gdb) si > 0x00000000040ca0d8 in pthread_join () from /lib64/libpthread.so.0 > 1: x/i $pc > => 0x40ca0d8 <pthread_join+136>: beq cr7,0x40ca1bc <pthread_join+364> > > (gdb) p $cr > $24 = 0 > (gdb) p $crs1s > $25 = 0 > (gdb) p $crs2 > $26 = 0 > > condition register bits are all zero so eq is not true > > 0x00000000040ca0d8 in pthread_join () from /lib64/libpthread.so.0 > 1: x/i $pc > => 0x40ca0d8 <pthread_join+136>: beq cr7,0x40ca1bc > <pthread_join+364> > > also get warning from other window > > ==25684== Conditional jump or move depends on uninitialised value(s) > ==25684== at 0x40CA0D8: pthread_join (in /usr/lib64/libpthread-2.18.90.so) > ==25684== by 0x10000847: main (hg05_race2.c:31) > ==25684== > ==25684== (action on error) vgdb me ... > > (gdb) p $cr > $27 = 0 > (gdb) p $crs1 > $28 = 0 > (gdb) p $crs2 > $29 = 0 > > So, if I understand this, the zero in crs1 and crs2 says all bytes (particularly byte 7) > are all defined yet I get an error?? So, where in Valgrind does the test happen? Need to > debug the code I guess. I think your understanding about the above is correct (i.e. that all bits are initiatialised and that no error should be reported). So, looks like a nasty bug. Philippe |
|
From: Carl E. L. <ce...@li...> - 2014-04-03 18:23:16
|
I working on porting the current Valgrind Power PC Big Endian code to also support Power PC Little Endian code. I have seen numerous messages where Valgrind says "Conditional jump or move depends on uninitialised value". I am running gdb on the guest code trying to examine the V-bits to see what bits/bytes are not valid. Unfortunately, from what I get from Valgrind, it shows the bytes are all valid yet I get an error message. I have put debug print statements thoughout the various routines that fetch the V-bits to verify the endianess mode is Little Endian. That seems to be fine. I am still trying to sort out how the V-bit checking works, where and how it is done. Here is what I have done to check the state of the bytes at the location of the conditional branch. Please let me know if I am not accessing the V-bits correctly or I am miss interpreting the output. Assuming I am doing things correctly, where exactly in Valgrind does the Vbit test occur? I would like to step through the code. I have found routines for reading/writing the shadow bits in mc_translate.c but am still trying to find where the actual test is done that generates the error. Thanks for your help. Code x40ca0c4 <pthread_join+116> addi r26,r30,-30512 │ │0x40ca0c8 <pthread_join+120> bl 0x40d2308 <__pthread_enable_asynccancel+8>│ │0x40ca0cc <pthread_join+124> nop │ B+>│0x40ca0d0 <pthread_join+128> cmpld cr7,r31,r26 │ │0x40ca0d4 <pthread_join+132> mtlr r3 │ │0x40ca0d8 <pthread_join+136> beq cr7,0x40ca1bc <pthread_join+364> │ running with valgrind --vgdb=yes --vgdb-shadow-registers=yes --vgdb-error=0 ./hg05_race2 When I give gdb "info reg" it shows the register, registers1 and registers2. Not sure what the resgisters2 is all about I only see registers1 discussed in http://valgrind.org/docs/manual/mc-manual.html#mc-manual.machine but we will print both for completeness. Run to address 0x40ca0d0 and start poking around. Note, we get the error about "Conditional jump or move depends on uninitialised value" at 0x40ca0d8. (gdb) p $r31 $4 = 78508480 (gdb) p $r31s1 $5 = 0 (gdb) p $r31s2 $6 = 0 (gdb) p $r26 $7 = 67386896 (gdb) p $r26s1 $8 = 0 (gdb) p $r26s2 $9 = 0 (gdb) p $r3 $10 = 0 (gdb) p $r3s1 $11 = 0 (gdb) p $r3s2 $12 = 0 (gdb) p $cr $14 = 0 (gdb) p $crs1 $15 = 0 (gdb) p $crs2 $16 = 0 So, it appeard that r26, r31 and the condition code register are completely defined. 0x00000000040ca0d4 in pthread_join () from /lib64/libpthread.so.0 1: x/i $pc => 0x40ca0d4 <pthread_join+132>: mtlr r3 (gdb) p $r3 $17 = 0 (gdb) p $r3s1 $18 = 0 (gdb) p $r3s2 $19 = 0 (gdb) p $lr $20 = (void (*)()) 0x0 (gdb) p $lrs1 $21 = (void (*)()) 0x0 (gdb) p $lrs2 $22 = (void (*)()) 0x0 (gdb) (gdb) si 0x00000000040ca0d8 in pthread_join () from /lib64/libpthread.so.0 1: x/i $pc => 0x40ca0d8 <pthread_join+136>: beq cr7,0x40ca1bc <pthread_join+364> (gdb) p $cr $24 = 0 (gdb) p $crs1s $25 = 0 (gdb) p $crs2 $26 = 0 condition register bits are all zero so eq is not true 0x00000000040ca0d8 in pthread_join () from /lib64/libpthread.so.0 1: x/i $pc => 0x40ca0d8 <pthread_join+136>: beq cr7,0x40ca1bc <pthread_join+364> also get warning from other window ==25684== Conditional jump or move depends on uninitialised value(s) ==25684== at 0x40CA0D8: pthread_join (in /usr/lib64/libpthread-2.18.90.so) ==25684== by 0x10000847: main (hg05_race2.c:31) ==25684== ==25684== (action on error) vgdb me ... (gdb) p $cr $27 = 0 (gdb) p $crs1 $28 = 0 (gdb) p $crs2 $29 = 0 So, if I understand this, the zero in crs1 and crs2 says all bytes (particularly byte 7) are all defined yet I get an error?? So, where in Valgrind does the test happen? Need to debug the code I guess. Carl Love |
|
From: Philippe W. <phi...@sk...> - 2014-04-03 16:38:41
|
On Wed, 2014-04-02 at 22:25 -0400, Emanuel Schorsch wrote: > I recently came across the error message: "Address 0x%llx "is not > stack'd, malloc'd or (recently) free'd%s\n" and was confused for a > second until I realized the not didn't apply to the free'd. I think the current message is correct, and that your change would make it incorrect. If the address was really recently freed, then in this case, the above message is *not* given. Instead, if the address is (recently) freed, the message given is something like: Invalid read of size 1 at 0x........: main (big_blocks_freed_list.c:34) Address 0x........ is 2,000 bytes inside a block of size 900,000 free'd at 0x........: free (vg_replace_malloc.c:...) by 0x........: main (big_blocks_freed_list.c:20) In other words, as long as the freed block is in the free list, valgrind can determine the block was (recently) freed and will output a message telling a freed block was accessed. If the freed block is not anymore in the free list block (because the free list volume was exceeded), then valgrind cannot make a very precise message, and outputs the If the block is not in the free list anymore (because the free list volume was exceeded), then valgrind cannot make a "precise" diagnostic and then outputs: "Address 0x%llx "is not stack'd, malloc'd or (recently) free'd%s\n" If you believe that the above is not matching your specific case, then the best is to provide a small test case producing the "wrong message" for which you would expect the changed message. Philippe |
|
From: <sv...@va...> - 2014-04-03 13:49:03
|
Author: sewardj
Date: Thu Apr 3 13:48:54 2014
New Revision: 2845
Log:
Implement TBL and TBX instructions.
Modified:
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Thu Apr 3 13:48:54 2014
@@ -4987,6 +4987,96 @@
}
+/* Generate IR for TBL and TBX. This deals with the 128 bit case
+ only. */
+static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
+ IRTemp oor_values )
+{
+ vassert(len >= 0 && len <= 3);
+
+ /* Generate some useful constants as concisely as possible. */
+ IRTemp half15 = newTemp(Ity_I64);
+ assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
+ IRTemp half16 = newTemp(Ity_I64);
+ assign(half16, mkU64(0x1010101010101010ULL));
+
+ /* A zero vector */
+ IRTemp allZero = newTemp(Ity_V128);
+ assign(allZero, mkV128(0x0000));
+ /* A vector containing 15 in each 8-bit lane */
+ IRTemp all15 = newTemp(Ity_V128);
+ assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
+ /* A vector containing 16 in each 8-bit lane */
+ IRTemp all16 = newTemp(Ity_V128);
+ assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
+ /* A vector containing 32 in each 8-bit lane */
+ IRTemp all32 = newTemp(Ity_V128);
+ assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
+ /* A vector containing 48 in each 8-bit lane */
+ IRTemp all48 = newTemp(Ity_V128);
+ assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
+ /* A vector containing 64 in each 8-bit lane */
+ IRTemp all64 = newTemp(Ity_V128);
+ assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
+
+ /* Group the 16/32/48/64 vectors so as to be indexable. */
+ IRTemp allXX[4] = { all16, all32, all48, all64 };
+
+ /* Compute the result for each table vector, with zeroes in places
+ where the index values are out of range, and OR them into the
+ running vector. */
+ IRTemp running_result = newTemp(Ity_V128);
+ assign(running_result, mkV128(0));
+
+ UInt tabent;
+ for (tabent = 0; tabent <= len; tabent++) {
+ vassert(tabent >= 0 && tabent < 4);
+ IRTemp bias = newTemp(Ity_V128);
+ assign(bias,
+ mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
+ IRTemp biased_indices = newTemp(Ity_V128);
+ assign(biased_indices,
+ binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
+ IRTemp valid_mask = newTemp(Ity_V128);
+ assign(valid_mask,
+ binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
+ IRTemp safe_biased_indices = newTemp(Ity_V128);
+ assign(safe_biased_indices,
+ binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
+ IRTemp results_or_junk = newTemp(Ity_V128);
+ assign(results_or_junk,
+ binop(Iop_Perm8x16, mkexpr(tab[tabent]),
+ mkexpr(safe_biased_indices)));
+ IRTemp results_or_zero = newTemp(Ity_V128);
+ assign(results_or_zero,
+ binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
+ /* And OR that into the running result. */
+ IRTemp tmp = newTemp(Ity_V128);
+ assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
+ mkexpr(running_result)));
+ running_result = tmp;
+ }
+
+ /* So now running_result holds the overall result where the indices
+ are in range, and zero in out-of-range lanes. Now we need to
+ compute an overall validity mask and use this to copy in the
+ lanes in the oor_values for out of range indices. This is
+ unnecessary for TBL but will get folded out by iropt, so we lean
+ on that and generate the same code for TBL and TBX here. */
+ IRTemp overall_valid_mask = newTemp(Ity_V128);
+ assign(overall_valid_mask,
+ binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
+ IRTemp result = newTemp(Ity_V128);
+ assign(result,
+ binop(Iop_OrV128,
+ mkexpr(running_result),
+ binop(Iop_AndV128,
+ mkexpr(oor_values),
+ unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
+ return result;
+}
+
+
static
Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
{
@@ -6734,6 +6824,43 @@
/* else fall through */
}
+ /* -------------------- TBL, TBX -------------------- */
+ /* 31 28 20 15 14 12 9 4
+ 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
+ 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
+ where Ta = 16b(q=1) or 8b(q=0)
+ */
+ if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
+ && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
+ Bool isQ = INSN(30,30) == 1;
+ Bool isTBX = INSN(12,12) == 1;
+ UInt mm = INSN(20,16);
+ UInt len = INSN(14,13);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ /* The out-of-range values to use. */
+ IRTemp oor_values = newTemp(Ity_V128);
+ assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
+ /* src value */
+ IRTemp src = newTemp(Ity_V128);
+ assign(src, getQReg128(mm));
+ /* The table values */
+ IRTemp tab[4];
+ UInt i;
+ for (i = 0; i <= len; i++) {
+ vassert(i < 4);
+ tab[i] = newTemp(Ity_V128);
+ assign(tab[i], getQReg128((nn + i) % 32));
+ }
+ IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
+ putQReg128(dd, isQ ? mkexpr(res)
+ : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
+ const HChar* Ta = isQ ? "16b" : "8b";
+ const HChar* nm = isTBX ? "tbx" : "tbl";
+ DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
+ nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
+ return True;
+ }
/* FIXME Temporary hacks to get through ld.so FIXME */
/* ------------------ movi vD.4s, #0x0 ------------------ */
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Thu Apr 3 13:48:54 2014
@@ -853,9 +853,11 @@
case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
+ case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
+ case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
case ARM64vecb_FADD64x2: *nm = "fadd"; *ar = "2d"; return;
@@ -891,6 +893,8 @@
case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return;
case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return;
case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return;
+ case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
+ case ARM64vecb_CMHI8x16: *nm = "cmhi"; *ar = "16b"; return;
default: vpanic("showARM64VecBinOp");
}
}
@@ -3337,6 +3341,7 @@
#define X001000 BITS8(0,0, 0,0,1,0,0,0)
#define X001001 BITS8(0,0, 0,0,1,0,0,1)
#define X001010 BITS8(0,0, 0,0,1,0,1,0)
+#define X001101 BITS8(0,0, 0,0,1,1,0,1)
#define X001111 BITS8(0,0, 0,0,1,1,1,1)
#define X010000 BITS8(0,0, 0,1,0,0,0,0)
#define X010001 BITS8(0,0, 0,1,0,0,0,1)
@@ -4916,10 +4921,12 @@
010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
+ 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
+ 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
@@ -4970,6 +4977,10 @@
011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
+
+ 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
+
+ 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
*/
UInt vD = qregNo(i->ARM64in.VBinV.dst);
UInt vN = qregNo(i->ARM64in.VBinV.argL);
@@ -4984,6 +4995,9 @@
case ARM64vecb_ADD16x8:
*p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
break;
+ case ARM64vecb_ADD8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
+ break;
case ARM64vecb_SUB64x2:
*p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
break;
@@ -4993,6 +5007,9 @@
case ARM64vecb_SUB16x8:
*p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
break;
+ case ARM64vecb_SUB8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
+ break;
case ARM64vecb_MUL32x4:
*p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
break;
@@ -5107,6 +5124,14 @@
case ARM64vecb_FCMGT32x4:
*p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
break;
+
+ case ARM64vecb_TBL1:
+ *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
+ break;
+
+ case ARM64vecb_CMHI8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
+ break;
default:
goto bad;
}
Modified: trunk/priv/host_arm64_defs.h
==============================================================================
--- trunk/priv/host_arm64_defs.h (original)
+++ trunk/priv/host_arm64_defs.h Thu Apr 3 13:48:54 2014
@@ -310,9 +310,11 @@
ARM64vecb_ADD64x2=120,
ARM64vecb_ADD32x4,
ARM64vecb_ADD16x8,
+ ARM64vecb_ADD8x16,
ARM64vecb_SUB64x2,
ARM64vecb_SUB32x4,
ARM64vecb_SUB16x8,
+ ARM64vecb_SUB8x16,
ARM64vecb_MUL32x4,
ARM64vecb_MUL16x8,
ARM64vecb_FADD64x2,
@@ -348,6 +350,8 @@
ARM64vecb_FCMGE32x4,
ARM64vecb_FCMGT64x2,
ARM64vecb_FCMGT32x4,
+ ARM64vecb_TBL1,
+ ARM64vecb_CMHI8x16,
ARM64vecb_INVALID
}
ARM64VecBinOp;
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Thu Apr 3 13:48:54 2014
@@ -4918,9 +4918,11 @@
case Iop_Add64x2:
case Iop_Add32x4:
case Iop_Add16x8:
+ case Iop_Add8x16:
case Iop_Sub64x2:
case Iop_Sub32x4:
case Iop_Sub16x8:
+ case Iop_Sub8x16:
case Iop_Mul32x4:
case Iop_Mul16x8:
case Iop_CmpEQ64x2:
@@ -4930,6 +4932,8 @@
case Iop_CmpLE32Fx4:
case Iop_CmpLT64Fx2:
case Iop_CmpLT32Fx4:
+ case Iop_Perm8x16:
+ case Iop_CmpGT8Ux16:
{
HReg res = newVRegV(env);
HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
@@ -4955,9 +4959,11 @@
case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
+ case Iop_Add8x16: op = ARM64vecb_ADD8x16; break;
case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break;
case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break;
case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break;
+ case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break;
case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break;
case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break;
case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break;
@@ -4967,6 +4973,8 @@
case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
+ case Iop_Perm8x16: op = ARM64vecb_TBL1; break;
+ case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
default: vassert(0);
}
if (sw) {
|
|
From: <sv...@va...> - 2014-04-03 13:48:32
|
Author: sewardj
Date: Thu Apr 3 13:48:21 2014
New Revision: 2844
Log:
Add a couple more constant folding rules for vectors.
Modified:
trunk/priv/ir_opt.c
Modified: trunk/priv/ir_opt.c
==============================================================================
--- trunk/priv/ir_opt.c (original)
+++ trunk/priv/ir_opt.c Thu Apr 3 13:48:21 2014
@@ -1251,7 +1251,8 @@
case Iop_And64:
case Iop_Sub64:
case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
- case Iop_XorV128: return IRExpr_Const(IRConst_V128(0));
+ case Iop_XorV128:
+ case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
default: vpanic("mkZeroOfPrimopResultType: bad primop");
}
}
@@ -2150,6 +2151,13 @@
break;
}
break;
+ case Iop_Sub8x16:
+ /* Sub8x16(x,0) ==> x */
+ if (isZeroV128(e->Iex.Binop.arg2)) {
+ e2 = e->Iex.Binop.arg1;
+ break;
+ }
+ break;
case Iop_And64:
case Iop_And32:
@@ -2185,13 +2193,18 @@
e2 = e->Iex.Binop.arg1;
break;
}
- if (/* could handle other And cases here too, but so
- far not */
- e->Iex.Binop.op == Iop_And64
+ /* Deal with either arg zero. Could handle other And
+ cases here too. */
+ if (e->Iex.Binop.op == Iop_And64
&& (isZeroU64(e->Iex.Binop.arg1)
|| isZeroU64(e->Iex.Binop.arg2))) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
break;
+ } else if (e->Iex.Binop.op == Iop_AndV128
+ && (isZeroV128(e->Iex.Binop.arg1)
+ || isZeroV128(e->Iex.Binop.arg2))) {
+ e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
+ break;
}
break;
|
|
From: Emanuel S. <ems...@gm...> - 2014-04-03 02:25:38
|
I recently came across the error message: "Address 0x%llx "is not stack'd, malloc'd or (recently) free'd%s\n" and was confused for a second until I realized the not didn't apply to the free'd. I realize this is minor but it seemed like the following might be a slightly better message: "%sAddress 0x%llx was (recently) free'd or is not stack'd or malloc'd%s\n". This change is on line 311 in memcheck/.svn/text-base/mc_errors.c.svn-base. I tried to figure out what the proper procedure for suggesting this was but I couldn't find anything other than to email this mailing list. Emanuel |