You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
1
|
2
(1) |
|
3
|
4
(4) |
5
(4) |
6
|
7
|
8
|
9
|
|
10
(2) |
11
(2) |
12
(2) |
13
|
14
|
15
(2) |
16
(1) |
|
17
(2) |
18
(2) |
19
(3) |
20
(4) |
21
(1) |
22
|
23
|
|
24
(7) |
25
|
26
(4) |
27
(7) |
28
(2) |
29
(1) |
30
(2) |
|
31
|
|
|
|
|
|
|
|
From: <sv...@va...> - 2016-07-24 18:59:08
|
Author: sewardj
Date: Sun Jul 24 19:59:02 2016
New Revision: 15915
Log:
Enable test cases for PMULL 1q,1d,1d and PMULL2 1q,2d,2d. n-i-bz.
Modified:
trunk/none/tests/arm64/fp_and_simd.c
trunk/none/tests/arm64/fp_and_simd.stdout.exp
Modified: trunk/none/tests/arm64/fp_and_simd.c
==============================================================================
--- trunk/none/tests/arm64/fp_and_simd.c (original)
+++ trunk/none/tests/arm64/fp_and_simd.c Sun Jul 24 19:59:02 2016
@@ -3104,8 +3104,8 @@
GEN_BINARY_TEST(pmull, 8h, 8b, 8b)
GEN_BINARY_TEST(pmull2, 8h, 16b, 16b)
-//GEN_BINARY_TEST(pmull, 1q, 1d, 1d)
-//GEN_BINARY_TEST(pmull, 1q, 2d, 2d)
+GEN_BINARY_TEST(pmull, 1q, 1d, 1d)
+GEN_BINARY_TEST(pmull2, 1q, 2d, 2d)
GEN_UNARY_TEST(rbit, 16b, 16b)
GEN_UNARY_TEST(rbit, 8b, 8b)
@@ -5791,11 +5791,10 @@
if (1) test_pmul_16b_16b_16b(TyB);
if (1) test_pmul_8b_8b_8b(TyB);
- // pmull{2} 8h_8b_8b,8h_16b_16b,1q_1d_1d,1q_2d_2d
+ // pmull{2} 8h_8b_8b,8h_16b_16b
+ // pmull{2} 1q_1d_1d,1q_2d_2d is in the crypto section below
if (1) test_pmull_8h_8b_8b(TyB);
if (1) test_pmull2_8h_16b_16b(TyB);
- //if (0) test_pmull_1q_1d_1d(TyD);
- //if (0) test_pmull_1q_2d_2d(TyD);
// rbit 16b,8b
// rev16 16b,8b
@@ -7412,7 +7411,11 @@
if (1) DO50( test_sha256su0_4s_4s(TyNONE) );
if (1) DO50( test_sha256su1_4s_4s_4s(TyNONE) );
- return 0;
+ // pmull{2} 1q_1d_1d,1q_2d_2d
+ if (1) test_pmull_1q_1d_1d(TyD);
+ if (1) test_pmull2_1q_2d_2d(TyD);
+
+return 0;
}
Modified: trunk/none/tests/arm64/fp_and_simd.stdout.exp
==============================================================================
--- trunk/none/tests/arm64/fp_and_simd.stdout.exp (original)
+++ trunk/none/tests/arm64/fp_and_simd.stdout.exp Sun Jul 24 19:59:02 2016
@@ -29507,3 +29507,5 @@
sha256su1 v29.4s, v28.4s, v27.4s eda7a9269a60e51084bf647b799a97f3 26cbe2af584fd72af3af1a0396bba5eb 45f0d4e0f3356206ee5c8e05444d7dd2 0d51d4db6f296e394e0c92188ac91c9a 26cbe2af584fd72af3af1a0396bba5eb 45f0d4e0f3356206ee5c8e05444d7dd2 fpsr=00000000
sha256su1 v29.4s, v28.4s, v27.4s d35c419b47ed0f14b582787d24a48b1d cb4b6a89aee886e87d166c48ce1576d3 a1775cd45b4897e4ab0310a252c10b40 e58afd6aeeca7085aa4398a7fad36c4d cb4b6a89aee886e87d166c48ce1576d3 a1775cd45b4897e4ab0310a252c10b40 fpsr=00000000
sha256su1 v29.4s, v28.4s, v27.4s c462740a142b655fb54f224a658ad94e 0f49b15b5a81270bacf4bb9612d6a622 b22a4a3fd02904c46211750661ad7ef1 1086338cba86b6677c44ef17b54e8d34 0f49b15b5a81270bacf4bb9612d6a622 b22a4a3fd02904c46211750661ad7ef1 fpsr=00000000
+pmull v9.1q, v7.1d, v8.1d c24da4e7c56e0c25073948d77e1d233a f75a1d9a216edfc7045febe1a4d5d98a 001d7606b697893ced1721a29aea08a4 fpsr=00000000
+pmull2 v9.1q, v7.2d, v8.2d cb8acfd884207d2a8de4b3b5e64f7f43 bd6c634aafa1f3630c23d386c401bb44 759bae09ccbf866ea596eb35c23576be fpsr=00000000
|
|
From: <sv...@va...> - 2016-07-24 18:58:28
|
Author: sewardj
Date: Sun Jul 24 19:58:21 2016
New Revision: 3232
Log:
Implement PMULL 1q,1d,1d and PMULL2 1q,2d,2d. n-i-bz.
Modified:
trunk/priv/guest_arm64_defs.h
trunk/priv/guest_arm64_helpers.c
trunk/priv/guest_arm64_toIR.c
Modified: trunk/priv/guest_arm64_defs.h
==============================================================================
--- trunk/priv/guest_arm64_defs.h (original)
+++ trunk/priv/guest_arm64_defs.h Sun Jul 24 19:58:21 2016
@@ -115,6 +115,9 @@
extern ULong arm64g_dirtyhelper_MRS_CNTVCT_EL0 ( void );
+extern void arm64g_dirtyhelper_PMULLQ ( /*OUT*/V128* res,
+ ULong arg1, ULong arg2 );
+
extern void arm64g_dirtyhelper_AESE ( /*OUT*/V128* res,
ULong argHi, ULong argLo );
extern void arm64g_dirtyhelper_AESD ( /*OUT*/V128* res,
Modified: trunk/priv/guest_arm64_helpers.c
==============================================================================
--- trunk/priv/guest_arm64_helpers.c (original)
+++ trunk/priv/guest_arm64_helpers.c Sun Jul 24 19:58:21 2016
@@ -692,6 +692,33 @@
}
+void arm64g_dirtyhelper_PMULLQ ( /*OUT*/V128* res, ULong arg1, ULong arg2 )
+{
+ /* This doesn't need to be a dirty helper, except for the fact that
+ a clean helper can't return a 128 bit value. This is a pretty
+ lame implementation of PMULLQ, but at least it doesn't contain any
+ data dependent branches, and has lots of ILP. I guess we could unroll
+ the loop completely and offer extensive prayers to the gods of ILP
+ if more performance is needed. */
+ UInt i;
+ ULong accHi = 0, accLo = 0;
+ ULong op2Hi = 0, op2Lo = arg2;
+ for (i = 0; i < 64; i++) {
+ /* Make |mask| be all 0s or all 1s, a copy of arg1[i] */
+ Long mask = arg1 << (63-i);
+ mask >>= 63;
+ accHi ^= (op2Hi & mask);
+ accLo ^= (op2Lo & mask);
+ /* do: op2Hi:op2Lo <<=u 1 */
+ op2Hi <<= 1;
+ op2Hi |= ((op2Lo >> 63) & 1);
+ op2Lo <<= 1;
+ }
+ res->w64[1] = accHi;
+ res->w64[0] = accLo;
+}
+
+
/*---------------------------------------------------------------*/
/*--- Crypto instruction helpers ---*/
/*---------------------------------------------------------------*/
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Sun Jul 24 19:58:21 2016
@@ -11153,16 +11153,41 @@
if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
/* -------- 0,1110 PMULL{2} -------- */
/* Widens, and size refers to the narrow lanes. */
- if (size != X00) return False;
- IRTemp res
- = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
- getQReg128(nn), getQReg128(mm));
- putQReg128(dd, mkexpr(res));
- const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
- const HChar* arrWide = nameArr_Q_SZ(1, size+1);
+ if (size != X00 && size != X11) return False;
+ IRTemp res = IRTemp_INVALID;
+ IRExpr* srcN = getQReg128(nn);
+ IRExpr* srcM = getQReg128(mm);
+ const HChar* arrNarrow = NULL;
+ const HChar* arrWide = NULL;
+ if (size == X00) {
+ res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
+ srcN, srcM);
+ arrNarrow = nameArr_Q_SZ(bitQ, size);
+ arrWide = nameArr_Q_SZ(1, size+1);
+ } else {
+ /* The same thing as the X00 case, except we have to call
+ a helper to do it. */
+ vassert(size == X11);
+ res = newTemp(Ity_V128);
+ IROp slice
+ = is2 ? Iop_V128HIto64 : Iop_V128to64;
+ IRExpr** args
+ = mkIRExprVec_3( IRExpr_VECRET(),
+ unop(slice, srcN), unop(slice, srcM));
+ IRDirty* di
+ = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ "arm64g_dirtyhelper_PMULLQ",
+ &arm64g_dirtyhelper_PMULLQ, args);
+ stmt(IRStmt_Dirty(di));
+ /* We can't use nameArr_Q_SZ for this because it can't deal with
+ Q-sized (128 bit) results. Hence do it by hand. */
+ arrNarrow = bitQ == 0 ? "1d" : "2d";
+ arrWide = "1q";
+ }
+ putQReg128(dd, mkexpr(res));
DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
- nameQReg128(dd), arrNarrow,
- nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
+ nameQReg128(dd), arrWide,
+ nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
return True;
}
|
|
From: <sv...@va...> - 2016-07-24 16:56:30
|
Author: sewardj
Date: Sun Jul 24 17:56:22 2016
New Revision: 3231
Log:
Fix grammatically nonsensical comments. No functional change.
Modified:
trunk/priv/guest_arm64_toIR.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Sun Jul 24 17:56:22 2016
@@ -10957,7 +10957,7 @@
/* -------- 1,0000 UADDL{2} -------- */
/* -------- 0,0010 SSUBL{2} -------- */
/* -------- 1,0010 USUBL{2} -------- */
- /* Widens, and size refers to the narrowed lanes. */
+ /* Widens, and size refers to the narrow lanes. */
if (size == X11) return False;
vassert(size <= 2);
Bool isU = bitU == 1;
@@ -10983,7 +10983,7 @@
/* -------- 1,0001 UADDW{2} -------- */
/* -------- 0,0011 SSUBW{2} -------- */
/* -------- 1,0011 USUBW{2} -------- */
- /* Widens, and size refers to the narrowed lanes. */
+ /* Widens, and size refers to the narrow lanes. */
if (size == X11) return False;
vassert(size <= 2);
Bool isU = bitU == 1;
@@ -11047,7 +11047,7 @@
/* -------- 1,0101 UABAL{2} -------- */
/* -------- 0,0111 SABDL{2} -------- */
/* -------- 1,0111 UABDL{2} -------- */
- /* Widens, and size refers to the narrowed lanes. */
+ /* Widens, and size refers to the narrow lanes. */
if (size == X11) return False;
vassert(size <= 2);
Bool isU = bitU == 1;
@@ -11077,7 +11077,7 @@
/* -------- 1,1000 UMLAL{2} -------- */ // 1
/* -------- 0,1010 SMLSL{2} -------- */ // 2
/* -------- 1,1010 UMLSL{2} -------- */ // 2
- /* Widens, and size refers to the narrowed lanes. */
+ /* Widens, and size refers to the narrow lanes. */
UInt ks = 3;
switch (opcode) {
case BITS4(1,1,0,0): ks = 0; break;
@@ -11114,7 +11114,7 @@
/* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
/* -------- 0,1001 SQDMLAL{2} -------- */ // 1
/* -------- 0,1011 SQDMLSL{2} -------- */ // 2
- /* Widens, and size refers to the narrowed lanes. */
+ /* Widens, and size refers to the narrow lanes. */
UInt ks = 3;
switch (opcode) {
case BITS4(1,1,0,1): ks = 0; break;
@@ -11152,7 +11152,7 @@
if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
/* -------- 0,1110 PMULL{2} -------- */
- /* Widens, and size refers to the narrowed lanes. */
+ /* Widens, and size refers to the narrow lanes. */
if (size != X00) return False;
IRTemp res
= math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
|
|
From: Julian S. <js...@ac...> - 2016-07-24 12:52:31
|
> as you may be aware, we are currently working on sparcv9 support
> for Valgrind [1]. Part of the ISA are crypto instructions, such as md5,
> sha256, AES, Camelia, Montgmery multiplication and squaring, and others [2].
> Some of these instructions can have inputs of up to several kilobytes,
> utilizing sparc register windows and floating-point registers.
Hmm, I see that XMONT* access 7 register windows (!). That must be a
complete nightmare to implement in hardware.
> 1. Describe a crypto instruction with IR opcodes.
> Pros: clean approach, does not require any isel support, works for
> cross-arch analysis
> Cons: bloated IR tree, especially after a tool instrumentation, expecting
> slow processing
Agree .. I don't think this is a good solution.
> 2. Utilize a clean helper function which will compute required output.
> Pros: IR tree will be small, instrumentation relatively fast
> Cons: unclear how to pass effectively inputs and outputs from the helper,
> logic is "hidden" behind the helper
This is probably your best bet, if you can do it. Can you split the
problem into a sequence of C helper calls, in which each helper takes
256 or 384 bits of input, and returns a 128 bit result? I used this
technique recently to implement arm64 AES and SHA instructions -- have
a look.
> 3. Add a new IR opcode for a crypto instruction.
> Pros: clear intent, smallest IR tree
> Cons: need to add support to VEX isel and all tools,
> unclear how to effectively allocate virtual/host registers
That sounds complex and difficult from a register allocation point of view.
There are two other possible solutions:
[1] This is a horrible hack. Try to avoid it. It can cause
the guest program to observe different results natively vs on Valgrind,
if it is buggy. But anyway:
Generate IR like this:
* move the (guest) SP down by (eg) 1024 bytes.
* copy all guest registers into the newly created area on the guest
stack
* call a dirty helper function to do the computation, passing it the
SP value as a parameter
* copy values out of memory area back into guest registers
* move SP back up 1024 bytes
Problem is that if the guest program has for any reason stored values
on the stack below SP then they will be corrupted. We had an obscure
and longstanding bug on x86_64 for this reason. Also, if the program
takes a signal in the middle of this sequence then the state may be
corrupted. (Not entirely sure about that, but ..)
[2] This is better but whether it actually works depends on the exact
details of which registers are accessed, and whether you can describe
that in the dirty-helper side-effect annotations. Which -- I suspect
you will have problems with because of the register windows. Anyway:
* generate a single dirty helper call, passing it a pointer to the
VexGuestSPARC64State struct and any other params you require.
* Write C to do the operations directly on that state
* [the difficult bit] make sure you can actually describe, in the
IRDirty::fxState area, which parts of the register state the
helper reads and writes. If you can't, it's game over :-(
See guest_x86_toIR.c "FNSAVE m108" for an example.
J
|
|
From: <sv...@va...> - 2016-07-24 11:46:04
|
Author: sewardj
Date: Sun Jul 24 12:45:58 2016
New Revision: 15914
Log:
Update.
Modified:
trunk/NEWS
trunk/docs/internals/3_11_BUGSTATUS.txt
Modified: trunk/NEWS
==============================================================================
--- trunk/NEWS (original)
+++ trunk/NEWS Sun Jul 24 12:45:58 2016
@@ -65,9 +65,11 @@
353137 www: update info for Supported Platforms
353138 www: update "The Valgrind Developers" page
353370 don't advertise RDRAND in cpuid for Core-i7-4910-like avx2 machine
+353384 amd64->IR: 0x66 0xF 0x3A 0x62 0xD1 0x62 (pcmpXstrX $0x62)
353398 WARNING: unhandled amd64-solaris syscall: 207
353660 XML in auxwhat tag not escaping reserved symbols properly
353680 s390x: Crash with certain glibc versions due to non-implemented TBEGIN
+353727 amd64->IR: 0x66 0xF 0x3A 0x62 0xD1 0x72 (pcmpXstrX $0x72)
353802 ELF debug info reader confused with multiple .rodata sections
353891 Assert 'bad_scanned_addr < VG_ROUNDDN(start+len, sizeof(Addr))' failed
353917 unhandled amd64-solaris syscall fchdir(120)
@@ -123,6 +125,7 @@
362009 Valgrind dumps core on unimplemented functionality before threads are created
362329 Valgrind does not support the IBM POWER ISA 3.0 instructions, part 3
362894 missing (broken) support for wbit field on mtfsfi instruction (ppc64)
+362935 [AsusWRT] Assertion 'sizeof(TTEntryC) <= 88' failed
363680 add renameat2() support
363705 arm64 missing syscall name_to_handle_at and open_by_handle_at
363714 ppc64 missing syscalls sync, waitid and name_to/open_by_handle_at
Modified: trunk/docs/internals/3_11_BUGSTATUS.txt
==============================================================================
--- trunk/docs/internals/3_11_BUGSTATUS.txt (original)
+++ trunk/docs/internals/3_11_BUGSTATUS.txt Sun Jul 24 12:45:58 2016
@@ -9,14 +9,6 @@
"unhandled instruction bytes"
[Probably invalid]
-353384 unhandled instruction bytes: 0x66 0xF 0x3A 0x62 0xD1 0x62 0x41 0x3B
- __intel_sse4_strpbrk
- pcmpistrm $0x62,%xmm1,%xmm2
-
-353727 unhandled instruction bytes: 0x66 0xF 0x3A 0x62 0xD1 0x72 0x45 0x3B
- __intel_sse4_strspn
- pcmpistrm $0x72,%xmm1,%xmm2
-
354931 Analyze of qt app results in incompatible processor message
[Gentoo stupidity]
@@ -169,7 +161,6 @@
=== other/arm ==========================================================
-362935 [AsusWRT] Assertion 'sizeof(TTEntryC) <= 88' failed
364533 Process terminating with default action of signal 4 (SIGILL): dumping
core, : at 0x4000E7C: ??? (in /lib/ld-uClibc.so.0)
|
|
From: <sv...@va...> - 2016-07-24 11:41:19
|
Author: sewardj
Date: Sun Jul 24 12:41:12 2016
New Revision: 15913
Log:
Add test cases for PCMPxSTRx cases 0x62 and 0x72, and reformat the associated
switch statements a bit more consistently. Pertains to #353384 and #353727.
Modified:
trunk/none/tests/amd64/pcmpstr64.c
trunk/none/tests/amd64/pcmpstr64.stdout.exp
Modified: trunk/none/tests/amd64/pcmpstr64.c
==============================================================================
--- trunk/none/tests/amd64/pcmpstr64.c (original)
+++ trunk/none/tests/amd64/pcmpstr64.c Sun Jul 24 12:41:12 2016
@@ -203,11 +203,16 @@
even if they would probably work. Life is too short to have
unvalidated cases in the code base. */
switch (imm8) {
- case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
- case 0x12: case 0x14: case 0x18: case 0x1A:
- case 0x30: case 0x34: case 0x38: case 0x3A:
- case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
- case 0x70:
+ case 0x00: case 0x02:
+ case 0x08: case 0x0A: case 0x0C: case 0x0E:
+ case 0x12: case 0x14:
+ case 0x18: case 0x1A:
+ case 0x30: case 0x34:
+ case 0x38: case 0x3A:
+ case 0x40: case 0x42: case 0x44: case 0x46:
+ case 0x4A:
+ case 0x62:
+ case 0x70: case 0x72:
break;
default:
return False;
@@ -2052,6 +2057,166 @@
//////////////////////////////////////////////////////////
// //
+// ISTRI_62 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_62 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x62, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_62 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x62, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_62 ( void )
+{
+ char* wot = "62";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_62;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_62;
+
+ try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
+ try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
+
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
+
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
+
+ try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
+ try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
+}
+
+
+//////////////////////////////////////////////////////////
+// //
+// ISTRI_72 //
+// //
+//////////////////////////////////////////////////////////
+
+UInt h_pcmpistri_72 ( V128* argL, V128* argR )
+{
+ V128 block[2];
+ memcpy(&block[0], argL, sizeof(V128));
+ memcpy(&block[1], argR, sizeof(V128));
+ ULong res, flags;
+ __asm__ __volatile__(
+ "subq $1024, %%rsp" "\n\t"
+ "movdqu 0(%2), %%xmm2" "\n\t"
+ "movdqu 16(%2), %%xmm11" "\n\t"
+ "pcmpistri $0x72, %%xmm2, %%xmm11" "\n\t"
+ "pushfq" "\n\t"
+ "popq %%rdx" "\n\t"
+ "movq %%rcx, %0" "\n\t"
+ "movq %%rdx, %1" "\n\t"
+ "addq $1024, %%rsp" "\n\t"
+ : /*out*/ "=r"(res), "=r"(flags) : "r"/*in*/(&block[0])
+ : "rcx","rdx","xmm0","xmm2","xmm11","cc","memory"
+ );
+ return ((flags & 0x8D5) << 16) | (res & 0xFFFF);
+}
+
+UInt s_pcmpistri_72 ( V128* argLU, V128* argRU )
+{
+ V128 resV;
+ UInt resOSZACP, resECX;
+ Bool ok
+ = pcmpXstrX_WRK( &resV, &resOSZACP, argLU, argRU,
+ zmask_from_V128(argLU),
+ zmask_from_V128(argRU),
+ 0x72, False/*!isSTRM*/
+ );
+ assert(ok);
+ resECX = resV.uInt[0];
+ return (resOSZACP << 16) | resECX;
+}
+
+void istri_72 ( void )
+{
+ char* wot = "72";
+ UInt(*h)(V128*,V128*) = h_pcmpistri_72;
+ UInt(*s)(V128*,V128*) = s_pcmpistri_72;
+
+ try_istri(wot,h,s, "abcdacbdabcdabcd", "000000000000000a");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000000b");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "00000000000000ab");
+ try_istri(wot,h,s, "abcdabc0abcdabcd", "000000000000abcd");
+
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0bcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcda0cd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdab0d", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdabc0", "000000000000abcd");
+
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000a0cd");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000ab0d");
+ try_istri(wot,h,s, "abcdabcdabcdabcd", "000000000000abc0");
+
+ try_istri(wot,h,s, "0000000000000000", "0000000000000000");
+ try_istri(wot,h,s, "aaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaa");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000abcd");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000dcba");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000bbbb");
+ try_istri(wot,h,s, "0000abcdabcdabcd", "000000000000baba");
+
+ try_istri(wot,h,s, "0000abcdabcdabcd", "00000000000baba0");
+
+ try_istri(wot,h,s, "0ddc0ffeebadf00d", "00000000cafebabe");
+ try_istri(wot,h,s, "0ddc0ffeebadfeed", "00000000cafebabe");
+}
+
+
+//////////////////////////////////////////////////////////
+// //
// main //
// //
//////////////////////////////////////////////////////////
@@ -2077,5 +2242,7 @@
istri_14();
istri_34();
istri_70();
+ istri_62();
+ istri_72();
return 0;
}
Modified: trunk/none/tests/amd64/pcmpstr64.stdout.exp
==============================================================================
--- trunk/none/tests/amd64/pcmpstr64.stdout.exp (original)
+++ trunk/none/tests/amd64/pcmpstr64.stdout.exp Sun Jul 24 12:41:12 2016
@@ -480,3 +480,47 @@
istri 70 0000abcdabcdabcd 00000000000baba0 -> 08c1000b 08c1000b
istri 70 0ddc0ffeebadf00d 00000000cafebabe -> 08c10000 08c10000
istri 70 0ddc0ffeebadfeed 00000000cafebabe -> 08c10004 08c10004
+istri 62 abcdacbdabcdabcd 000000000000000a -> 0081000f 0081000f
+istri 62 abcdabcdabcdabcd 000000000000000b -> 0081000e 0081000e
+istri 62 abcdabcdabcdabcd 00000000000000ab -> 0081000f 0081000f
+istri 62 abcdabc0abcdabcd 000000000000abcd -> 08c10007 08c10007
+istri 62 abcdabcdabcdabcd 000000000000abcd -> 0881000f 0881000f
+istri 62 0bcdabcdabcdabcd 000000000000abcd -> 08c1000e 08c1000e
+istri 62 abcdabcdabcda0cd 000000000000abcd -> 08c10001 08c10001
+istri 62 abcdabcdabcdab0d 000000000000abcd -> 08c10000 08c10000
+istri 62 abcdabcdabcdabc0 000000000000abcd -> 00c00010 00c00010
+istri 62 abcdabcdabcdabcd 000000000000abcd -> 0881000f 0881000f
+istri 62 abcdabcdabcdabcd 000000000000a0cd -> 0881000d 0881000d
+istri 62 abcdabcdabcdabcd 000000000000ab0d -> 0881000c 0881000c
+istri 62 abcdabcdabcdabcd 000000000000abc0 -> 00800010 00800010
+istri 62 0000000000000000 0000000000000000 -> 00c00010 00c00010
+istri 62 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 0801000f 0801000f
+istri 62 0000abcdabcdabcd 000000000000abcd -> 08c1000b 08c1000b
+istri 62 0000abcdabcdabcd 000000000000dcba -> 08c1000b 08c1000b
+istri 62 0000abcdabcdabcd 000000000000bbbb -> 00c1000a 00c1000a
+istri 62 0000abcdabcdabcd 000000000000baba -> 00c1000b 00c1000b
+istri 62 0000abcdabcdabcd 00000000000baba0 -> 00c00010 00c00010
+istri 62 0ddc0ffeebadf00d 00000000cafebabe -> 00c00010 00c00010
+istri 62 0ddc0ffeebadfeed 00000000cafebabe -> 00c1000a 00c1000a
+istri 72 abcdacbdabcdabcd 000000000000000a -> 0881000e 0881000e
+istri 72 abcdabcdabcdabcd 000000000000000b -> 0881000f 0881000f
+istri 72 abcdabcdabcdabcd 00000000000000ab -> 0881000d 0881000d
+istri 72 abcdabc0abcdabcd 000000000000abcd -> 00c00010 00c00010
+istri 72 abcdabcdabcdabcd 000000000000abcd -> 00800010 00800010
+istri 72 0bcdabcdabcdabcd 000000000000abcd -> 00c00010 00c00010
+istri 72 abcdabcdabcda0cd 000000000000abcd -> 00c00010 00c00010
+istri 72 abcdabcdabcdab0d 000000000000abcd -> 00c00010 00c00010
+istri 72 abcdabcdabcdabc0 000000000000abcd -> 00c00010 00c00010
+istri 72 abcdabcdabcdabcd 000000000000abcd -> 00800010 00800010
+istri 72 abcdabcdabcdabcd 000000000000a0cd -> 0081000f 0081000f
+istri 72 abcdabcdabcdabcd 000000000000ab0d -> 0081000f 0081000f
+istri 72 abcdabcdabcdabcd 000000000000abc0 -> 0881000f 0881000f
+istri 72 0000000000000000 0000000000000000 -> 00c00010 00c00010
+istri 72 aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa -> 00000010 00000010
+istri 72 0000abcdabcdabcd 000000000000abcd -> 00c00010 00c00010
+istri 72 0000abcdabcdabcd 000000000000dcba -> 00c00010 00c00010
+istri 72 0000abcdabcdabcd 000000000000bbbb -> 08c1000b 08c1000b
+istri 72 0000abcdabcdabcd 000000000000baba -> 08c10009 08c10009
+istri 72 0000abcdabcdabcd 00000000000baba0 -> 08c1000b 08c1000b
+istri 72 0ddc0ffeebadf00d 00000000cafebabe -> 08c10000 08c10000
+istri 72 0ddc0ffeebadfeed 00000000cafebabe -> 08c10004 08c10004
|
|
From: <sv...@va...> - 2016-07-24 11:40:16
|
Author: sewardj
Date: Sun Jul 24 12:40:07 2016
New Revision: 3230
Log:
Enable PCMPxSTRx cases 0x62 and 0x72, and reformat the associated
switch statements a bit more consistently. Fixes #353384 and #353727.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/guest_generic_x87.c
Modified: trunk/priv/guest_amd64_toIR.c
==============================================================================
--- trunk/priv/guest_amd64_toIR.c (original)
+++ trunk/priv/guest_amd64_toIR.c Sun Jul 24 12:40:07 2016
@@ -18667,17 +18667,25 @@
immediate byte. Is it one we can actually handle? Throw out any
cases for which the helper function has not been verified. */
switch (imm) {
- case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
- case 0x12: case 0x14: case 0x18: case 0x1A:
- case 0x30: case 0x34: case 0x38: case 0x3A:
- case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
- case 0x70:
+ case 0x00: case 0x02:
+ case 0x08: case 0x0A: case 0x0C: case 0x0E:
+ case 0x12: case 0x14:
+ case 0x18: case 0x1A:
+ case 0x30: case 0x34:
+ case 0x38: case 0x3A:
+ case 0x40: case 0x42: case 0x44: case 0x46:
+ case 0x4A:
+ case 0x62:
+ case 0x70: case 0x72:
break;
// the 16-bit character versions of the above
- case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
- case 0x13: case 0x19: case 0x1B:
- case 0x39: case 0x3B:
- case 0x45: case 0x4B:
+ case 0x01: case 0x03:
+ case 0x09: case 0x0B: case 0x0D:
+ case 0x13:
+ case 0x19: case 0x1B:
+ case 0x39: case 0x3B:
+ case 0x45:
+ case 0x4B:
break;
default:
return delta0; /*FAIL*/
Modified: trunk/priv/guest_generic_x87.c
==============================================================================
--- trunk/priv/guest_generic_x87.c (original)
+++ trunk/priv/guest_generic_x87.c Sun Jul 24 12:40:07 2016
@@ -795,11 +795,16 @@
even if they would probably work. Life is too short to have
unvalidated cases in the code base. */
switch (imm8) {
- case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
- case 0x12: case 0x14: case 0x18: case 0x1A:
- case 0x30: case 0x34: case 0x38: case 0x3A:
- case 0x40: case 0x42: case 0x44: case 0x46: case 0x4A:
- case 0x70:
+ case 0x00: case 0x02:
+ case 0x08: case 0x0A: case 0x0C: case 0x0E:
+ case 0x12: case 0x14:
+ case 0x18: case 0x1A:
+ case 0x30: case 0x34:
+ case 0x38: case 0x3A:
+ case 0x40: case 0x42: case 0x44: case 0x46:
+ case 0x4A:
+ case 0x62:
+ case 0x70: case 0x72:
break;
default:
return False;
|