You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
1
(1) |
2
(1) |
3
(5) |
4
(4) |
5
(7) |
6
(4) |
|
7
(3) |
8
(5) |
9
|
10
(5) |
11
|
12
(4) |
13
|
|
14
|
15
(3) |
16
(1) |
17
(2) |
18
|
19
(1) |
20
|
|
21
(1) |
22
|
23
|
24
|
25
|
26
(3) |
27
|
|
28
(2) |
29
(1) |
30
|
31
|
|
|
|
|
From: Mark W. <mj...@re...> - 2016-08-08 19:10:11
|
Hi Julian, On Sun, 2016-08-07 at 23:36 +0000, sv...@va... wrote: > none/tests/arm/v8crypto: > * enable all tests in this file > * add an expected output (stderr.exp) file The post-regtest-tests don't like that: none/tests/arm/Makefile.am:1: error: v8crypto.stdout.exp is missing in EXTRA_DIST There also is no .vgtest yet which I assume is on purpose? Thanks, Mark |
|
From: Petar J. <mip...@gm...> - 2016-08-08 15:44:25
|
Hi All, I have done a quick review of the patch provided at #366079 [1], and it seems OK. I will do a better look before I believe it can be committed it, the change is mostly MIPS-specific, but since it touches some coregrind common code in m_ume/elf.c and m_initimg/initimg-linux.c, you may want to take a look. Let me know if you believe it has some issues. Thanks. Regards, Petar [1] Bug 366079 - FPXX Support for MIPS32 Valgrind, https://bugs.kde.org/show_bug.cgi?id=366079 |
|
From: <sv...@va...> - 2016-08-07 23:36:22
|
Author: sewardj
Date: Mon Aug 8 00:36:10 2016
New Revision: 15933
Log:
none/tests/arm/v8crypto:
* enable all tests in this file
* add an expected output (stderr.exp) file
Added:
trunk/none/tests/arm/v8crypto.stdout.exp
Modified:
trunk/none/tests/arm/v8crypto.c
Modified: trunk/none/tests/arm/v8crypto.c
==============================================================================
--- trunk/none/tests/arm/v8crypto.c (original)
+++ trunk/none/tests/arm/v8crypto.c Mon Aug 8 00:36:10 2016
@@ -49,15 +49,15 @@
return (seed >> 17) & 0xFF;
}
-static ULong randULong ( LaneTy ty )
-{
- Int i;
- ULong r = 0;
- for (i = 0; i < 8; i++) {
- r = (r << 8) | (ULong)(0xFF & randUChar());
- }
- return r;
-}
+//static ULong randULong ( LaneTy ty )
+//{
+// Int i;
+// ULong r = 0;
+// for (i = 0; i < 8; i++) {
+// r = (r << 8) | (ULong)(0xFF & randUChar());
+// }
+// return r;
+//}
/* Generates a random V128. Ensures that that it contains normalised
FP numbers when viewed as either F32x4 or F64x2, so that it is
@@ -87,16 +87,16 @@
printf("%02x", (Int)v->u8[i]);
}
-static void showBlock ( const char* msg, V128* block, Int nBlock )
-{
- Int i;
- printf("%s\n", msg);
- for (i = 0; i < nBlock; i++) {
- printf(" ");
- showV128(&block[i]);
- printf("\n");
- }
-}
+//static void showBlock ( const char* msg, V128* block, Int nBlock )
+//{
+// Int i;
+// printf("%s\n", msg);
+// for (i = 0; i < nBlock; i++) {
+// printf(" ");
+// showV128(&block[i]);
+// printf("\n");
+// }
+//}
/* ---------------------------------------------------------------- */
@@ -204,8 +204,14 @@
GEN_TWOVEC_TEST(sha256su0_q_q, "sha256su0.32 q11, q10", 11, 10)
GEN_THREEVEC_TEST(sha256su1_q_q_q, "sha256su1.32 q12, q11, q10", 12, 11, 10)
-// This is a bit complex.
-//GEN_THREEVEC_TEST(pmull_q_d_d, 1q, 1d, 1d)
+// This is a bit complex. This really mentions three registers, so it
+// should really be a THREEVEC variant. But the two source registers
+// are D registers. So we say it is just a TWOVEC insn, producing a Q
+// and taking a single Q (q7); q7 is the d14-d15 register pair, which
+// is why the insn itself is mentions d14 and d15 whereas the
+// numbers that follow mention q7. The result (q7) is 128 bits wide and
+// so is unaffected by these shenanigans.
+GEN_TWOVEC_TEST(pmull_q_d_d, "vmull.p64 q13, d14, d15", 13, 7)
int main ( void )
{
@@ -220,7 +226,6 @@
if (1) DO50( test_aesimc_q_q(TyNONE) );
if (1) DO50( test_aesmc_q_q(TyNONE) );
-#if 0
// sha1c.32 q_q_q
// sha1h.32 q_q
// sha1m.32 q_q_q
@@ -244,7 +249,7 @@
if (1) DO50( test_sha256su1_q_q_q(TyNONE) );
// vmull.64 q_d_d
- if (1) test_pmull_q_d_d(TyD);
-#endif
+ if (1) DO50( test_pmull_q_d_d(TyD) );
+
return 0;
}
Added: trunk/none/tests/arm/v8crypto.stdout.exp
==============================================================================
--- trunk/none/tests/arm/v8crypto.stdout.exp (added)
+++ trunk/none/tests/arm/v8crypto.stdout.exp Mon Aug 8 00:36:10 2016
@@ -0,0 +1,764 @@
+aesd.8 q3, q4 5175e39d19c9ca1e98f24a4984175700 7d6528c5fa956a0d69c3e9a6af27d13b 0b2e475e420871824d7cdc612ba75949 7d6528c5fa956a0d69c3e9a6af27d13b fpscr=00000000
+aesd.8 q3, q4 19a348215c3a67fd399182c2dbcc2d38 065d77195d623e6b842adc6450659e17 cefe1576cbb79d35090c4bc5cd5e254e 065d77195d623e6b842adc6450659e17 fpscr=00000000
+aesd.8 q3, q4 f9dd4a29f8c093db56b01a12b0ca1583 5ff85bc9535c191fd3a727d1a705f65d 8787cfa0c55f8b880ec24d33671ce39c 5ff85bc9535c191fd3a727d1a705f65d fpscr=00000000
+aesd.8 q3, q4 d182c916cebc2e17cfaff39be272ef40 6897b536bbe4da8a369dab4f9465b86e 0fa1ba54db875e753f2fda19695e01c3 6897b536bbe4da8a369dab4f9465b86e fpscr=00000000
+aesd.8 q3, q4 81f2a547be8d181184ededbc53239dcf 019963bf7459630b8d69483df7e8c6a9 1d4f03e13a592943100557914019c7d3 019963bf7459630b8d69483df7e8c6a9 fpscr=00000000
+aesd.8 q3, q4 e98ebd1ca893312a54cae7d5e13dfe91 0a5f45c55f1c9202b76ddefcb0ebfe6e 708971e54d4a5bee2651524c4d73e17d 0a5f45c55f1c9202b76ddefcb0ebfe6e fpscr=00000000
+aesd.8 q3, q4 e9b5f3f66b2e58c121a6c3476d21f1e5 63483da65c8c49d096084deb9ed0411e 7ebee36ccf2be6e3b221fcaa201aec63 63483da65c8c49d096084deb9ed0411e fpscr=00000000
+aesd.8 q3, q4 61c82534e9bf6f37c9e25f72d82e582b ecb42ac54b0966d4089b756aa3f77018 03af402bb4e5954d1a01ee34dd79fb66 ecb42ac54b0966d4089b756aa3f77018 fpscr=00000000
+aesd.8 q3, q4 8404eb7f0cf4ca6fee8536da9dbf68bc ff6f850f2c57ea2a2c810e6dc1a1833d a73054d003e9766854053c20a8714591 ff6f850f2c57ea2a2c810e6dc1a1833d fpscr=00000000
+aesd.8 q3, q4 36b2a38dcef18acf0e0f01a829ba3c66 f078b65e01737fd22bfa8f668c8b14f4 297777a9c72ee6de5f10eeecc2112f74 f078b65e01737fd22bfa8f668c8b14f4 fpscr=00000000
+aesd.8 q3, q4 c5e48064a393c8e947a34273c10a3c47 6ec34f98a2199d3c810bdacfab80ee3d 586fed550ecfe2b5093d7f78c7cf5fbd 6ec34f98a2199d3c810bdacfab80ee3d fpscr=00000000
+aesd.8 q3, q4 b984aed62671e865e6f21d40fc7bc013 5d0f926ce1157eaa95c45b338afcb3df 0f2435c0aeea985f31ce8f8f8f8c6d27 5d0f926ce1157eaa95c45b338afcb3df fpscr=00000000
+aesd.8 q3, q4 acb722146c6cbfa9ea4a022e1d3d7dbb 048612e51a468e36c51cdd8f87e12ab4 37b92e2b6f93ef6e0f2edaf14e9508fb 048612e51a468e36c51cdd8f87e12ab4 fpscr=00000000
+aesd.8 q3, q4 80ddba7e53e42d123208cf9b04b0569c 4288ae612c0dad40f0733f448390351b ea033acba85417486bed00efa8eb9bea 4288ae612c0dad40f0733f448390351b fpscr=00000000
+aesd.8 q3, q4 14575775bc3a12029d8e66ea90352a18 f9754842f9c9ba28f82a63b15c68b274 271d6fb2538d36956894e257bc7ecbb8 f9754842f9c9ba28f82a63b15c68b274 fpscr=00000000
+aesd.8 q3, q4 4784d95987cd4ed80c3ca578a32bd88e 08aebee85fda964fbba02737f3c98220 6c1c2d56923b11852d95469220870abe 08aebee85fda964fbba02737f3c98220 fpscr=00000000
+aesd.8 q3, q4 fbc4208894fdc0f55f706da71bf2425f 4e92f1b240a122141a366d352714867e 6d983ba2d2f552e019b9887468a7517b 4e92f1b240a122141a366d352714867e fpscr=00000000
+aesd.8 q3, q4 0e780c65c22b4ab8778d9ed6d9eb46ea ac82c1007a7d3cd8f54b130cdaa89cef d5c70fbc1a64b4909a147a7a11b98036 ac82c1007a7d3cd8f54b130cdaa89cef fpscr=00000000
+aesd.8 q3, q4 61ff7d4df3b6ca8131f01866bd76c58f 02dd0e32eecfc5fa2c3ffa1aebe6a4d2 b95ffb6b00963b03de94d801deaf8f8d 02dd0e32eecfc5fa2c3ffa1aebe6a4d2 fpscr=00000000
+aesd.8 q3, q4 d4ba52a206ff21b170fbbab6a7f19faf 3004b7a97cf69dda9f7301c1392d8087 df97789eae93fe05bd5acb0261402aee 3004b7a97cf69dda9f7301c1392d8087 fpscr=00000000
+aesd.8 q3, q4 47086cc3da642fa7130d662777beb4a9 16559ec50352a3d92d460a61a5dd0f6f 7fccf0a57000b88ae58dfe98d12404c7 16559ec50352a3d92d460a61a5dd0f6f fpscr=00000000
+aesd.8 q3, q4 9a49ac115048d4c4f987fa170d3ce4dd 9432a2e46543b956b819f459105730e9 dedfb377d705d774d90319b6f89ed728 9432a2e46543b956b819f459105730e9 fpscr=00000000
+aesd.8 q3, q4 adddf0eb4808f06704c857e949cc0fac 89fba268812abdb21e4a9e0958fac555 e3116541a62412b5122310a043944869 89fba268812abdb21e4a9e0958fac555 fpscr=00000000
+aesd.8 q3, q4 5f2619b1a20662f012305efa0acd1475 d70f7fb13667914c413cead25e27ac14 fd817e5297bbc678e74c9aee50d8d3d8 d70f7fb13667914c413cead25e27ac14 fpscr=00000000
+aesd.8 q3, q4 918107c43ea20cc00420edac31a0d599 5cce191e65591384ff4cb613013cc685 08b8cb7a801c5786579282f46363e9c4 5cce191e65591384ff4cb613013cc685 fpscr=00000000
+aesd.8 q3, q4 24509983fc3bcc36baf7e45e9fa43077 fa99500fef6024ba39dce32c239cf309 780bc8f09c7638f08212331e4157128a fa99500fef6024ba39dce32c239cf309 fpscr=00000000
+aesd.8 q3, q4 f6f2b14fbb3184b2141625713239066f 8fcf04e5b2dca44fcf4c517ea3a413ff ac465462af75ca21408b2ffb9f53d296 8fcf04e5b2dca44fcf4c517ea3a413ff fpscr=00000000
+aesd.8 q3, q4 e8c72e865de41295f2db8f44cbbf37e2 fcd015ff8f2e73a3a0fae06860b606c7 0e7bd8af9b4006247f872e42481049c2 fcd015ff8f2e73a3a0fae06860b606c7 fpscr=00000000
+aesd.8 q3, q4 da30ef8bc0b5573e34a901384a97a32f 20fd62bd65b571158e48704b3c31abc2 0fe0232414c52c0b2980bf8fc052b453 20fd62bd65b571158e48704b3c31abc2 fpscr=00000000
+aesd.8 q3, q4 ac8dd5bbc503330eb9dd5dab8e212ab7 ddb5cd8016d27d057796e0861576e44f e8ccb6492cdacd9ea976ecfaec5134e1 ddb5cd8016d27d057796e0861576e44f fpscr=00000000
+aesd.8 q3, q4 3d3cc0784c2f856363d9810079bbabd9 125934a781e479d33d431279cce48fce d23755ef4e8422fc80bca6af9d59ba87 125934a781e479d33d431279cce48fce fpscr=00000000
+aesd.8 q3, q4 6f9f902235982fa010fd4e94e9c808f5 9e477892854b43e0beafe48541dc8da0 6f48b8fc2b9b6272fc2d67e3bea9c8ed 9e477892854b43e0beafe48541dc8da0 fpscr=00000000
+aesd.8 q3, q4 20162517609f0f22a1a7a4c9c0a51f6b 63e279a20368bc8bdb3b370954bcbf24 e71c4bd2648e22b700ba471fbd26a792 63e279a20368bc8bdb3b370954bcbf24 fpscr=00000000
+aesd.8 q3, q4 31005fb9ada2074bf63a63fedcb4d29c 3f871736dc9ac5357446eb65e4e703bb 7601a873d750978a2cea51e81176d43d 3f871736dc9ac5357446eb65e4e703bb fpscr=00000000
+aesd.8 q3, q4 83bd1e68fb03f57bef136b941e54ffe8 139832afee423c3d6930e0fad3ba39c4 803212319699ce982fc2c745dcf84242 139832afee423c3d6930e0fad3ba39c4 fpscr=00000000
+aesd.8 q3, q4 f4ad41832c22ba116c949cea66e687ae 2ced5f927f2b383caf8484c5f3078d2a ad7c11e32de034fa5072a34e3340e94f 2ced5f927f2b383caf8484c5f3078d2a fpscr=00000000
+aesd.8 q3, q4 e309aef8a605af130821eb96e737777e b5a9377eb31749ef710cf757885d2728 06faf5dcb958c4552f476cddaf39f9b9 b5a9377eb31749ef710cf757885d2728 fpscr=00000000
+aesd.8 q3, q4 dbacfa35b7d2b75af8ad6b99bb3fa4c2 c673c91ec9aed3f8b9c3e32f2103009d 37458c0bde6d971a8aef1d79f8016684 c673c91ec9aed3f8b9c3e32f2103009d fpscr=00000000
+aesd.8 q3, q4 9f043af6a1aed58f1ee978efa4b054d2 76f140aa4182b4e706a17746411ab40c 2ad4d8a7eb62fbf7a077a0b73442bd9c 76f140aa4182b4e706a17746411ab40c fpscr=00000000
+aesd.8 q3, q4 2ad7482a960fb2b27014160ebbdb47e4 a7837c83faf3cb1d360794fec60222d6 1382afb7b4e5111bb8fdbc17985528a1 a7837c83faf3cb1d360794fec60222d6 fpscr=00000000
+aesd.8 q3, q4 5e86033374552e23ce8e2455e0205c58 37885d08d662faf92a541ab7911c2b5a 2c7a1949e46dd17a1ad7023baeb29d6a 37885d08d662faf92a541ab7911c2b5a fpscr=00000000
+aesd.8 q3, q4 19714a711ce1284318b88425f2de758f 0760c299b42e1fdcc2e9e9cf82c7aff8 d070b2c8e98eb36e6fe37abb7a5f9702 0760c299b42e1fdcc2e9e9cf82c7aff8 fpscr=00000000
+aesd.8 q3, q4 3cf6fe426e1281712ef114ddd37570e8 f76b8d9773b81b24de24e0a879648e11 62b537b559e3baedde750c3f17628f69 f76b8d9773b81b24de24e0a879648e11 fpscr=00000000
+aesd.8 q3, q4 a77700084a491a0ef099b6dd61462ec3 e70a9c61f55fce335d68e1a25652a804 b22b19e4729bda8bf413dc6b18ff1c31 e70a9c61f55fce335d68e1a25652a804 fpscr=00000000
+aesd.8 q3, q4 3a542e238fe5d1793d1148867eb08f81 b79cd058188318692112ca1cf9f1dd31 ead51203b4f8117c85b14837c4d30cfc b79cd058188318692112ca1cf9f1dd31 fpscr=00000000
+aesd.8 q3, q4 d4ec68f21f468712f7b8ab3708137382 478209dbbd84d92508847c7642a20df9 5c6d9d4c22560db21a458af87da8d803 478209dbbd84d92508847c7642a20df9 fpscr=00000000
+aesd.8 q3, q4 579f90d5d9cd1c3afceebf50e0d0ba24 761b274ac4c4f0c7f31ed81010c417bc 1717836e7b9b0a21de4f1872fb4020e2 761b274ac4c4f0c7f31ed81010c417bc fpscr=00000000
+aesd.8 q3, q4 a1cd852d9cd970502d146432e64644c9 25c80a060da03fb0c33ebc4b44b8ddd8 1a95920b4f0c2da0ac36f9af99af73e3 25c80a060da03fb0c33ebc4b44b8ddd8 fpscr=00000000
+aesd.8 q3, q4 94d7265949ca62b46a8a793cf9d5f0d1 35e7926e777aa43f56470887bfdd3daf 9880c7b2f1bf2cced10880fe6dfcc68a 35e7926e777aa43f56470887bfdd3daf fpscr=00000000
+aesd.8 q3, q4 84db9fe3e4b100d48d969e225f9318a0 04b4378bce1492e08680a7399beeae16 88ff74f73a135b28950679449e296f79 04b4378bce1492e08680a7399beeae16 fpscr=00000000
+aese.8 q12, q13 f30110c432a534d0478d5d7e053a4e0c 2e467d8e98e7468c75a0cbeda561e618 acd8c2d623393c4ae0a040dcc12c90fa 2e467d8e98e7468c75a0cbeda561e618 fpscr=00000000
+aese.8 q12, q13 62bbc77143b71e92668b24fb9133bf52 9fedb2229a090d2c018b42f3d3ec8415 3563e2ed859e9dae2cb17d3054ae33a0 9fedb2229a090d2c018b42f3d3ec8415 fpscr=00000000
+aese.8 q12, q13 894d9fe1f98d1aa0861ef69cf4e34e11 f2789356f1fb0d2b99885af4db13d1b7 3090dba9c08cfe3d1596f04521389124 f2789356f1fb0d2b99885af4db13d1b7 fpscr=00000000
+aese.8 q12, q13 74876ac63afb7562c67d2c86fa7c09a3 07121ecd88441b7dd2cc3eca9347d80f 37c83e2bfae292c0f92a9f298f08c991 07121ecd88441b7dd2cc3eca9347d80f fpscr=00000000
+aese.8 q12, q13 077815d35567232e66c997070e860c39 109cfa471afbe686e2ede96f8809f947 8436e6225f73dfc24469a645f0def3f3 109cfa471afbe686e2ede96f8809f947 fpscr=00000000
+aese.8 q12, q13 89ad76dc21a1f8f15acd7ad9f991bada c201829797974fddfe5d063c8be33ce1 4e6044b34940bf714091a9d9b30510e2 c201829797974fddfe5d063c8be33ce1 fpscr=00000000
+aese.8 q12, q13 fba1981add7938e3067d74917c37833e f82db3448c8c9a654f1c8c8db3b639e1 d1eff4583b0cf1448a643a9c7be6419e f82db3448c8c9a654f1c8c8db3b639e1 fpscr=00000000
+aese.8 q12, q13 9cdd1a32cd007ff7daac12cf3a64acbd e76fcc086aeb0414a9cd126c0869c6a0 5cef02808fd7f6112337210a21e963a4 e76fcc086aeb0414a9cd126c0869c6a0 fpscr=00000000
+aese.8 q12, q13 8514e93e478d067a5a4ac156a6cb98bf d4442998096825896787a06c436d8e39 2fbd47242724ba0dd9532680d1d9ef44 d4442998096825896787a06c436d8e39 fpscr=00000000
+aese.8 q12, q13 95a6e59e2a7fabcb65b86284a1cb27a3 a20cab554a62dd2468a718ec4422710c d0c0b11fd71e2fdfd9ac38459aa4da79 a20cab554a62dd2468a718ec4422710c fpscr=00000000
+aese.8 q12, q13 aef4eeb358364f4add55d3bb09c439c9 3028339e0d3a0c468e8f584ceae94e7a fc57f5d8edd8c1fe11861a680bfe3d6d 3028339e0d3a0c468e8f584ceae94e7a fpscr=00000000
+aese.8 q12, q13 af5de4ddb013d258a082f55bbf17ae91 5df79fd3324f914fb79f41ec172107e2 13a4d3abf00521f0c2ac1aa9894a8d8f 5df79fd3324f914fb79f41ec172107e2 fpscr=00000000
+aese.8 q12, q13 7742a77a117513548f9ea7c3a323665c 0bd9cf5599014e9dc435b32da92a7aa5 c4629c15b30145dd67144c281092fa99 0bd9cf5599014e9dc435b32da92a7aa5 fpscr=00000000
+randV128: 256 calls, 266 iters
+aese.8 q12, q13 e70216ec5cbcf49e8a09cb539549408a 182fa58322b1219295b48e6f81658922 f37adda8c0716dfefad803eb16d76ec2 182fa58322b1219295b48e6f81658922 fpscr=00000000
+aese.8 q12, q13 e0fd1393714954977124406c74e81e7a d52f1cc78e47c9e383314ed9438203c8 1659a420890276929ab55ed596abab37 d52f1cc78e47c9e383314ed9438203c8 fpscr=00000000
+aese.8 q12, q13 d2b5bf6419898df003e6fe7283eff6cb 7d772f10f5706b75304780122c8b69f0 ce32db92c343609779258ed07999f3e2 7d772f10f5706b75304780122c8b69f0 fpscr=00000000
+aese.8 q12, q13 3fa5c4d84771e518605a54f56dfe15b7 ddeb80fe57ce3c26f9fcb34432fe8249 ca2488f7ee631bb2cf2f35c8980894bb ddeb80fe57ce3c26f9fcb34432fe8249 fpscr=00000000
+aese.8 q12, q13 d4eaedef93c21b55bdb0c6ce36392d36 8cf3c5a6e236ba0ab0c81fb7053f6b55 a3bc5a3bd76f34cfc3d432b66abf35fb 8cf3c5a6e236ba0ab0c81fb7053f6b55 fpscr=00000000
+aese.8 q12, q13 44d5584589abea635dc49b10189f4c14 0d37eadc490b8fa61a337e4f82bd51e7 ba68a4eea09337a6b8984dcf3be0d90d 0d37eadc490b8fa61a337e4f82bd51e7 fpscr=00000000
+aese.8 q12, q13 0b0b9f6018e987aeba97106bb88dbd45 9d5fe4af824eabd8f8f577d6f4dd0223 b8aa088a2c5321382920717a905c8533 9d5fe4af824eabd8f8f577d6f4dd0223 fpscr=00000000
+aese.8 q12, q13 0beca39f21ddd399b28a073ef6656128 1eca927d6d5eee012a6fe8ae3cfe5e6a 29d975984614c74674f7276059ecdf2c 1eca927d6d5eee012a6fe8ae3cfe5e6a fpscr=00000000
+aese.8 q12, q13 22d9446284e6ae8126fc5ee9b286181e 6ed9d5a9ea9b388090ffb3373b81451b 9f7b4c1f4ec5817ca763901d29ff556b 6ed9d5a9ea9b388090ffb3373b81451b fpscr=00000000
+aese.8 q12, q13 3131620a2265f8c8f64df6cdcb51c286 6eeb8d90d86668b60a08b6d0cfc59797 2d6efcb8b022dff3f25760a4cf7b0982 6eeb8d90d86668b60a08b6d0cfc59797 fpscr=00000000
+aese.8 q12, q13 1854ddf6d8b991ce01deaf4923243fc0 fe609a94181e600278e7d2d9d92a333d ba12feaab6aba04b2d18a1608e5cff54 fe609a94181e600278e7d2d9d92a333d fpscr=00000000
+aese.8 q12, q13 b7a39486894259f1290e68be98626e2d fe98dc158b24fec4bafee7b33811fa6d 778c22dcdc8f5296e0e25cd73b337309 fe98dc158b24fec4bafee7b33811fa6d fpscr=00000000
+aese.8 q12, q13 ee7d691b146130944d3d038a0b69312c 4df433720fd7245dafacd5bdced9cd88 af81b0f998e7bedda6a7fa9a0a4ef649 4df433720fd7245dafacd5bdced9cd88 fpscr=00000000
+aese.8 q12, q13 9c423a145875f5144ccc5e105c99661d cdd47e0b8597b02c38527c577ae28aed c10bcec092211b07f7906ea0d198938c cdd47e0b8597b02c38527c577ae28aed fpscr=00000000
+aese.8 q12, q13 a353e8d137de89d3071b5bad6b52ee61 5c979f40cdc58392364fbbe21b8d12fc 2d20b081c79ef583511c678416afe15e 5c979f40cdc58392364fbbe21b8d12fc fpscr=00000000
+aese.8 q12, q13 e11053b38ffdcd305e88d8c318f5aa57 dc9d7472c7c07dee870474bd92394516 5264df78354bcc1d7e5de7f327279183 dc9d7472c7c07dee870474bd92394516 fpscr=00000000
+aese.8 q12, q13 37d75b1941319f8c3175b6b243e17860 2b46de0152e87ea00ccf8549bf47029a 7df4daad27249771b081f80f9c35c32d 2b46de0152e87ea00ccf8549bf47029a fpscr=00000000
+aese.8 q12, q13 2af3bd4b509e6608a513cfe482162be8 6f8ae74d5f7960b4a01933ef595f6af1 7667836f6b3bbe65b9b66f2b6e94b0d4 6f8ae74d5f7960b4a01933ef595f6af1 fpscr=00000000
+aese.8 q12, q13 b903f1b29f411487312d32f1bb069e61 95d26cc246074b10bda9f7bf92a71bac 355f975164325e88a53ecf2f715aa6bd 95d26cc246074b10bda9f7bf92a71bac fpscr=00000000
+aese.8 q12, q13 b8d75a9620326a7d927f8ecd4a783d65 e3761d8b97fa553a6508ac365a886f48 a9f500a4688ca0a0ca32750f39e893d8 e3761d8b97fa553a6508ac365a886f48 fpscr=00000000
+aese.8 q12, q13 39d4db0931b25e927a9632b68f624628 527594f68adebded1af4c541ebe715af eaaaed16d09784d2433211687f506817 527594f68adebded1af4c541ebe715af fpscr=00000000
+aese.8 q12, q13 764f859cf68f4679dab3699f129680a9 fc95f5d55c34e70e2034036b2540d210 ac17003b2df651f59a5732bf7eea0256 fc95f5d55c34e70e2034036b2540d210 fpscr=00000000
+aese.8 q12, q13 7c44fda2c4f3ed4e66c03150c383fd2d 27c81bff702749760afcca34c46a4acc 8deba94c501e8e07c564494339480ff8 27c81bff702749760afcca34c46a4acc fpscr=00000000
+aese.8 q12, q13 2915227d7d3b3371fe1c6a2981899c14 b16fe6d6a518c184b9abfaffa9c65e42 61a92562a0841ce634da89f6462660b1 b16fe6d6a518c184b9abfaffa9c65e42 fpscr=00000000
+aese.8 q12, q13 7be936badd6630980aa27329b5b3ecd2 d2bc96d6b1a87f5bc30eedfc43f567c8 50913d50dd5ae02e42fc8403d38b0ba2 d2bc96d6b1a87f5bc30eedfc43f567c8 fpscr=00000000
+aese.8 q12, q13 6597ea0af6727713e0401415c692d5dd 3795df08065206478d94b3ff795f1228 8c48c6773cbd96200877a38700b75ce6 3795df08065206478d94b3ff795f1228 fpscr=00000000
+aese.8 q12, q13 4fd7e326d29b74541ae5bf20bcc2f9c2 549a65de5531bc5072bb7bf9cc326fbb 17589041458c44f251e3e835afac1cb6 549a65de5531bc5072bb7bf9cc326fbb fpscr=00000000
+aese.8 q12, q13 190c026f4f4108bb97f152ac79a338e2 082a07b97ea580d954e0244c1dcf60e0 c7826af62e506baa43f7c4e182693877 082a07b97ea580d954e0244c1dcf60e0 fpscr=00000000
+aese.8 q12, q13 a29325444ec512a939c5af18dc96719b 35a6a7f8600f343f11658d574d95c3f7 31e03765347b13908196f78488749350 35a6a7f8600f343f11658d574d95c3f7 fpscr=00000000
+aese.8 q12, q13 cccf2d05af86747edec1b4c5c4fa8650 ba6d23fbddcfb6e48aa9987b39e47961 404516bb2072abb8543a25ae383b71c7 ba6d23fbddcfb6e48aa9987b39e47961 fpscr=00000000
+aese.8 q12, q13 751dfa1352e40c98674442111330555e 76df5c23d344e7279f0d2317c41d637d 0c3b050441d824080e25e96f7be0ef26 76df5c23d344e7279f0d2317c41d637d fpscr=00000000
+aese.8 q12, q13 4a5c32cf23cea86930f00f8bcd9f5fac 8da998f88c8b32a6eaf8d1b431daa560 79302d9a576eac8ab0e6b875c66e1d4b 8da998f88c8b32a6eaf8d1b431daa560 fpscr=00000000
+aese.8 q12, q13 16458560adcdd7091db23c3834cb4d4d 8a8cc509a7178875c1b1aa5552bf7b54 677b05f98692091033ddcf3cde5790d4 8a8cc509a7178875c1b1aa5552bf7b54 fpscr=00000000
+aese.8 q12, q13 17d247361590a45a8c419b68e9c69d73 23de85e7f3ba676cd7ca3327879cb597 8e3d343e39be25059ffe2e8418e5c269 23de85e7f3ba676cd7ca3327879cb597 fpscr=00000000
+aese.8 q12, q13 14dcfee0b45668b52a09854ad64de91c ef3804f7e2035f7c3d1ff6252d13375a b1471df0f0582ddd0f699aa80ffc8f5a ef3804f7e2035f7c3d1ff6252d13375a fpscr=00000000
+aese.8 q12, q13 6109ca6565cab2e77d69475df9b640b0 c34d90bb1a1256ba10a38a2b40833c5f d274101d3c96be4c561b69383a61bddf c34d90bb1a1256ba10a38a2b40833c5f fpscr=00000000
+aese.8 q12, q13 ddb98a28084c634f63bfc3013161828e 7e7d09937d452c872eb7cf99a14da407 9d30f7eae371ece8601c84460a01fea7 7e7d09937d452c872eb7cf99a14da407 fpscr=00000000
+aesimc.8 q15, q0 6a4d20867d3a5b4dbd6dd8955fad8f17 02284fdfe9fec14278baa5d030d04fb1 9190b3085c4e47c112eebaf1207a6327 02284fdfe9fec14278baa5d030d04fb1 fpscr=00000000
+aesimc.8 q15, q0 e6246ae1a4f77a426cd3657964fa47a9 2daf41013f9df44bce0cec2fcc6d1cbd 56b6a5872342d2ae9616b63743599389 2daf41013f9df44bce0cec2fcc6d1cbd fpscr=00000000
+aesimc.8 q15, q0 329e49985ce0a08d4e504c0d1ea88aa7 e072c1566081a703100e83175782ed8c 3ef015decc3d8135d2e04ef647688e15 e072c1566081a703100e83175782ed8c fpscr=00000000
+aesimc.8 q15, q0 2e1c9d0c8757ad8f43446bb26e18386e fbcfad402a0ab8c91e1f4ce7b072a07d 9ee661c0668456e5295d34ea9636b807 fbcfad402a0ab8c91e1f4ce7b072a07d fpscr=00000000
+aesimc.8 q15, q0 bafd469c03bb81a72d0fa3c734a93060 5e28e61e7d9809fed89f25ffb69a16f0 9a8e25bf2948790a56938ad24cb16354 5e28e61e7d9809fed89f25ffb69a16f0 fpscr=00000000
+aesimc.8 q15, q0 b6a224a9b26dfb35eb12d4ad50bc53dc e9dd4c503b8c78011defefc04a5c2f46 51c4a419bc1f781536c3654d77b2ec56 e9dd4c503b8c78011defefc04a5c2f46 fpscr=00000000
+aesimc.8 q15, q0 7c4dbf374346e632cf6e8a894c18cbde 2c59ee263f9ae6eb5ef02a0e24fd533c 3f769c685a8bc8b183d4f825a20ca2ba 2c59ee263f9ae6eb5ef02a0e24fd533c fpscr=00000000
+aesimc.8 q15, q0 f6d81f33742433f2cc7dd6bb9c2cca19 53ca44aebd31b5254262bdc16b771596 28aed92c4a9e7ab20412430962ed3929 53ca44aebd31b5254262bdc16b771596 fpscr=00000000
+aesimc.8 q15, q0 39df4ba2b0883fa0f57ab3b51afb0c56 fb4f5f827e66bca6095bd91417c2934b a329d536e26924ad55d3a0b9ec047297 fb4f5f827e66bca6095bd91417c2934b fpscr=00000000
+aesimc.8 q15, q0 23c025e6d5d2e99c2ac801d7a6e270f6 02471f026197d9cd943b5e67093fabba d86c759947a02025265fdb34ce7522be 02471f026197d9cd943b5e67093fabba fpscr=00000000
+aesimc.8 q15, q0 95de8b5fc46113474bc49f812043d857 4912638e4626edfac3622c1b224d3e43 059358785d59186bb38e6bc012292a03 4912638e4626edfac3622c1b224d3e43 fpscr=00000000
+aesimc.8 q15, q0 6f975f6b5d959b0038d06f14677d22db b0100d870c73d98e7631228f404d2c47 7d40948319279c8a2f8ab4fb9e043ec2 b0100d870c73d98e7631228f404d2c47 fpscr=00000000
+aesimc.8 q15, q0 914b7f6c80ce6328d14c4ff05df12fe2 17a2fb4c94dd7be88c072223439e5525 2e54c6beadcbed51f12c5007655732ad 17a2fb4c94dd7be88c072223439e5525 fpscr=00000000
+aesimc.8 q15, q0 db5accc20d6d491ef5972073e0fedfcb 5e270e3ebfc4b369e7450a380da0993e f286300de66eb39aa78971cf57f5d77f 5e270e3ebfc4b369e7450a380da0993e fpscr=00000000
+aesimc.8 q15, q0 2c2526cce3d22e428611c200d10412f6 640027bc6b896370654abc2d7db4d8f1 9e48381145a7b4a785aa3baa7d93030d 640027bc6b896370654abc2d7db4d8f1 fpscr=00000000
+aesimc.8 q15, q0 660b6deae45bf2f5621a15f41064a8c4 0b8c2426798b6a5de77616637239f19e 2cc6127dcab648f1989eba58941a5ff5 0b8c2426798b6a5de77616637239f19e fpscr=00000000
+aesimc.8 q15, q0 676d807dee6a75966a13f9b17d7d8194 312ce5ddc92aa7904e2af939ce90c5a5 99e90752f6072b0eb95d5d1de12d9664 312ce5ddc92aa7904e2af939ce90c5a5 fpscr=00000000
+aesimc.8 q15, q0 11aa41e4e25f96857f5b4e96f8b07cc6 b83e4c403ac7fc6a78c5450f6f173567 187ce30db1baa1c16cafa4906d5d6379 b83e4c403ac7fc6a78c5450f6f173567 fpscr=00000000
+aesimc.8 q15, q0 42228e7fa19937237e53f304605c7bbb 7e2538b0aec1474b46a8d94636311f44 ebe010c8a0578f1bd82ea522fc7a6db7 7e2538b0aec1474b46a8d94636311f44 fpscr=00000000
+aesimc.8 q15, q0 db3648af097836cf4a5aca5a97e15cd2 643e888b037969929732973d033b649a 9bcfc5c86ce91f1baf28b1390155f163 643e888b037969929732973d033b649a fpscr=00000000
+aesimc.8 q15, q0 bc4550d3fa5c74eac2d1b1f87b9f006c 4aeb1e341b4e429f4dc35e54b697e4cc 425c1b8ed32981f3fb66fbe250e310aa 4aeb1e341b4e429f4dc35e54b697e4cc fpscr=00000000
+aesimc.8 q15, q0 c5af844c56a6d2d3c616893fedf747e7 0f8bd808d4a0b2d247bb0dec2ea57f37 b260cd4bef2fa175cceabc87c2570e58 0f8bd808d4a0b2d247bb0dec2ea57f37 fpscr=00000000
+aesimc.8 q15, q0 d5d5c579fcb62eea358c328ece4911a6 957f97690fcf998c647b85644dc3143d ca5ce0625b647c968276848e3af93652 957f97690fcf998c647b85644dc3143d fpscr=00000000
+aesimc.8 q15, q0 ce16f2bacbea6990f0908c45fcf43e06 bb263bb7ac3dd62d8563a61df253853d 0cecae5f5153ff97a1d4381061782424 bb263bb7ac3dd62d8563a61df253853d fpscr=00000000
+aesimc.8 q15, q0 8ed3ed6fa5a46224d78477c55858ae69 60e0a4508b474b138ad25076fcb5b098 c4f4e8acd5d726b08f67a93f5bd7d538 60e0a4508b474b138ad25076fcb5b098 fpscr=00000000
+aesimc.8 q15, q0 f76b95fa6844fb06cbc7d36dc1d5402f 650eb2968b4fd6a0532863cf4c4877ad e88fa189e718c588855a00080d8d91cf 650eb2968b4fd6a0532863cf4c4877ad fpscr=00000000
+aesimc.8 q15, q0 e73ec9b8f5291397a9ba7f9e19ccd6b6 aa0f44e98eb45934c0c5bf89c26cb8dc f1a30f55492f300105541c7ecd1d3822 aa0f44e98eb45934c0c5bf89c26cb8dc fpscr=00000000
+randV128: 512 calls, 530 iters
+aesimc.8 q15, q0 3fad6a0b2cb3893654bc5db73e9c4e61 0f443ca873d6b22db10a44033e825486 eb327d7b17b7a2386a1e7cf4d5c2631a 0f443ca873d6b22db10a44033e825486 fpscr=00000000
+aesimc.8 q15, q0 df175852ed423e44ab2d4b1812a6898d 740c78331916c2ee0656d19da0e92b0a 7ab010e98de5105ba60b49f8f0dfabec 740c78331916c2ee0656d19da0e92b0a fpscr=00000000
+aesimc.8 q15, q0 a7dc73ed183713208e6e2a227349679c b9c7d9eb61d469d49e0a48b8c8011cc8 9c91642578ed60fdceaed0d44074ae87 b9c7d9eb61d469d49e0a48b8c8011cc8 fpscr=00000000
+aesimc.8 q15, q0 765d9b3d8cf2e62adcdeda3442e5c8ed bed6402f2b6e86415b8587b3952b0921 6df09d0783a63196360125f8ea2e4113 bed6402f2b6e86415b8587b3952b0921 fpscr=00000000
+aesimc.8 q15, q0 2ef9b0a22bd197c376de3baf5fdb8ce1 62988b5f5746fb941b276fefe9c6d174 d4036198efa2bf8c08791fd29ed0a96d 62988b5f5746fb941b276fefe9c6d174 fpscr=00000000
+aesimc.8 q15, q0 ad11927ad336084a3ccd2df1aa8a93d7 876d9bdcc5bca72ebf51e0cba2325322 f3e2ae12aa013368877f4974f8e1d028 876d9bdcc5bca72ebf51e0cba2325322 fpscr=00000000
+aesimc.8 q15, q0 d50420276581181f0f0b8f5d0353bc2f 0bb64f05552e696e2762baa7a1d0708a 1223a3656272a6cad07ffd0a21fefda9 0bb64f05552e696e2762baa7a1d0708a fpscr=00000000
+aesimc.8 q15, q0 84323c09c110a7a3ccf943504995e94a cfd2893ae6ff22b433bbdde4c7ff080c 94cc9563e461252f5d99cabf9ca92b22 cfd2893ae6ff22b433bbdde4c7ff080c fpscr=00000000
+aesimc.8 q15, q0 9bfcc47ec746943556f6272c5eb0f887 b32227dc5a8cb261c3bb28e1f220fb09 1cd95cf38c8f6e684876da5500004969 b32227dc5a8cb261c3bb28e1f220fb09 fpscr=00000000
+aesimc.8 q15, q0 fac199e95780c0368c621d512005ca47 97050b4a8f37f9d4b7c27dfe029229e0 610c40feabacec7e27df1a14d6dd4f1d 97050b4a8f37f9d4b7c27dfe029229e0 fpscr=00000000
+aesimc.8 q15, q0 81e19ba751200b054e9e031d71f33fe9 5bdb13e5665fd76eee30ba9cd9b572f2 ecd348016805c22f3868872fd8a7b023 5bdb13e5665fd76eee30ba9cd9b572f2 fpscr=00000000
+aesimc.8 q15, q0 0fbdaa1a958555027b09baf22fda37cd de05200cbf652c8e4966c11a56eab69e ba82b17e32de51c5ecc69749281afb5d de05200cbf652c8e4966c11a56eab69e fpscr=00000000
+aesimc.8 q15, q0 86b5a6a102107e8ef40422303b1b9254 02e2121f7aa8d894a9c470d95890d444 bc246d1801e584febed4f45ac8c8f4ac 02e2121f7aa8d894a9c470d95890d444 fpscr=00000000
+aesimc.8 q15, q0 a5d2c97f7788bae1eca9a838c108ae44 748db6b4df58784ca3da435209d5ce30 a8e287361441ce28f23bc26363e6e047 748db6b4df58784ca3da435209d5ce30 fpscr=00000000
+aesimc.8 q15, q0 a936258b9666b4d4f37549976fb022ff c32cf63309e402406e9f5a58ac1a54d1 8df4ecbf172359c2f87d31472bdcb571 c32cf63309e402406e9f5a58ac1a54d1 fpscr=00000000
+aesimc.8 q15, q0 ec6d05a4b6a1a4cd9e88325743eb11d5 0937b3956de6fb929444b197ac07cce3 2c2da6bf55931c380199d3bd613f12c8 0937b3956de6fb929444b197ac07cce3 fpscr=00000000
+aesimc.8 q15, q0 4fd84b29b99a6b2dcd4345d71d165b24 270ecc3cebbd43a2f727286eebfe18c9 dbdf994497154a7fc5e92d97f0b69012 270ecc3cebbd43a2f727286eebfe18c9 fpscr=00000000
+aesimc.8 q15, q0 b2d6d57a7db0e9535f056177dd93e04f fd0f238763c9b9d176aaa13e475e17e0 8c36fe12d16e94e9fa02ab10967d0104 fd0f238763c9b9d176aaa13e475e17e0 fpscr=00000000
+aesimc.8 q15, q0 f4c785f8e443fea0362f659862c280b3 6a9d96d7b56b3f7ef02dfb66a188a88b a72a2c172d9382a39dcd16068b8c969b 6a9d96d7b56b3f7ef02dfb66a188a88b fpscr=00000000
+aesimc.8 q15, q0 f70c3901ccb48a72302032998e011bb2 5015078bc002b309470f1546d9dbad27 610abf1d0b2488dfd563b11c51a62e51 5015078bc002b309470f1546d9dbad27 fpscr=00000000
+aesimc.8 q15, q0 9a04d2f816626c2c2f38a8db40b290ab 8dd9540466eef7d359b0d13fcfb80416 e90c38d9deaa0cd4bc4c3acd07b806dc 8dd9540466eef7d359b0d13fcfb80416 fpscr=00000000
+aesimc.8 q15, q0 bc0f303ba1ad862b11d8a7bd5735c0ff 03485ea08590e93c07700db0637e8eb7 5b3977a0dda62a919a731f3c5652f3d3 03485ea08590e93c07700db0637e8eb7 fpscr=00000000
+aesimc.8 q15, q0 3e8e322a4ff6b6d1b75f0f9fb4e98c0d 90c305c2fe476aa231b0aaf9758d2b6b 0e39a80bf4438244f88edc7825dc1051 90c305c2fe476aa231b0aaf9758d2b6b fpscr=00000000
+aesmc.8 q1, q9 15a929c7b1735a67b7d0887be445bb91 5f3646169d10a4a4a96e8ad5ed65b981 f4dd9f8f629c5a29c963c1f385deaa41 5f3646169d10a4a4a96e8ad5ed65b981 fpscr=00000000
+aesmc.8 q1, q9 725aaa117e7599eb792f879592071e89 e5bad9f874f6c1ee4d9b6cfdcd38bc65 a17a61c4f644021d716bde83aae583e0 e5bad9f874f6c1ee4d9b6cfdcd38bc65 fpscr=00000000
+aesmc.8 q1, q9 db00a0567c2a86afc40047f4038de9d3 1c7493622cfa2597b6855d5cd44e174a 793ddf022529e48c4bd1c46c34a66237 1c7493622cfa2597b6855d5cd44e174a fpscr=00000000
+aesmc.8 q1, q9 216a7d91960bd145f0fdfb1c6ec3212c 2906701b0eb55d7aa1e722b770cb3d54 091cd8897a440aa85eb86055ea741854 2906701b0eb55d7aa1e722b770cb3d54 fpscr=00000000
+aesmc.8 q1, q9 96584f08a2f98312aff067d5f03b44cf 1eb33ae9199674ecd8d7987a5aa2c601 958c4d2aff84bcd06a24eb48d37631ab 1eb33ae9199674ecd8d7987a5aa2c601 fpscr=00000000
+aesmc.8 q1, q9 dadbd02a2efc4a4c3cb79f06723292b0 cb24ce0e442a090e2de0df5a65ba8b51 75ce2ebab99f26698b29e903081aecfb cb24ce0e442a090e2de0df5a65ba8b51 fpscr=00000000
+aesmc.8 q1, q9 3edf14402e48bffaabe616bb98dc80c1 ccbe78e080dd716773220ebf7179adb2 7eb005211e2c79001060b626fb7e099b ccbe78e080dd716773220ebf7179adb2 fpscr=00000000
+aesmc.8 q1, q9 2a1fe48bf7d8b25706c5dff7abfe7295 0d2ba7bfbfefcfc75bab8685a4c94b2d 4c599ab11717d78f0fa32f70a6185fea 0d2ba7bfbfefcfc75bab8685a4c94b2d fpscr=00000000
+aesmc.8 q1, q9 7ef922696a0f05c22cb3d81c8dfb468b 6ecc3a09dfbd048cc8dc260bdc0b4323 31025cfe9393876d6ccdf0688e096454 6ecc3a09dfbd048cc8dc260bdc0b4323 fpscr=00000000
+aesmc.8 q1, q9 1acfac3d674a969cff10e3891c30dd04 cfff1221c2a9f0189914d0b1f99e76f3 0b9cd044eefcc15025f9af9f0fb25f00 cfff1221c2a9f0189914d0b1f99e76f3 fpscr=00000000
+aesmc.8 q1, q9 de016364cdeb46445e3dde9f39ff175e 10271064461273cbadb462d8dde2c3fe bb0a3dcfab565d4ce4257614999e8386 10271064461273cbadb462d8dde2c3fe fpscr=00000000
+aesmc.8 q1, q9 aaee27407e51f51a2899aabdc5c8d4fc 10a112344c596d04e61bbedf67380ca3 cf4ff8efa00f79aa0866738104769416 10a112344c596d04e61bbedf67380ca3 fpscr=00000000
+aesmc.8 q1, q9 5df6d73059dd837e3e8527449ee9f43b b1cff9f1b4ddbe2322a9c22776ff3042 4745e39775fb8cf646ca7a98e50d4e5d b1cff9f1b4ddbe2322a9c22776ff3042 fpscr=00000000
+aesmc.8 q1, q9 d97955953deed0d281603593a4c5577d d111a5f95efe458942bf4f0fec980e3b 1d164add87c9446665e3093218317810 d111a5f95efe458942bf4f0fec980e3b fpscr=00000000
+aesmc.8 q1, q9 51c6f6af2a1de39526bc45f7a76187ef 1378cc285014632fa05bf3836f7b0267 ea213074a69485bf6d3d33e80e2281dc 1378cc285014632fa05bf3836f7b0267 fpscr=00000000
+aesmc.8 q1, q9 124ecb70f79979a7ae01844088bd7bbe 1614be74a19641dae470df8abcc9c0b3 1ad83933fb54585bf9825be1a425d453 1614be74a19641dae470df8abcc9c0b3 fpscr=00000000
+aesmc.8 q1, q9 f209069ea7d3e520baedeb496f09ca07 f17c0c040cee6e4364c5ec2b482151d1 85f869915dfebad69cfad8d8881a5823 f17c0c040cee6e4364c5ec2b482151d1 fpscr=00000000
+aesmc.8 q1, q9 d2588598182a085f2ae05c723ca7542a a73535507f7e7497cdba2d21fe93853b a5fdc26d569e82a8752f6342bc9a7a8f a73535507f7e7497cdba2d21fe93853b fpscr=00000000
+aesmc.8 q1, q9 ae2d4071b09e34d197ade8b4986d6b05 ebd55f51b7352d94362d6f4fc8df6c3a b499607dca1126c6ff20d03476b050d7 ebd55f51b7352d94362d6f4fc8df6c3a fpscr=00000000
+aesmc.8 q1, q9 5136e60ea8b68eb60aff985d1d21b4da 5ff87f8ee1fa4dc734966a0ea86c3d83 b0fb3c213c5da959860f4f008485e59e 5ff87f8ee1fa4dc734966a0ea86c3d83 fpscr=00000000
+aesmc.8 q1, q9 4c8b686f3a23b6d93a12e81f605f5002 e2ff7367dd2bb590a557fdbc7d9bd976 facc7946940f4109241b018d2205147a e2ff7367dd2bb590a557fdbc7d9bd976 fpscr=00000000
+aesmc.8 q1, q9 7e8ba7f547468d9a0543b959418620dc 56491d3d8b2a43506ad00abc28cb1f74 bf488a4294c123c4d1b3a9c7189e242a 56491d3d8b2a43506ad00abc28cb1f74 fpscr=00000000
+aesmc.8 q1, q9 c89783ffad7ef35a4cf4eb6ba0f602c9 99375b70cb57d76663606f6b895df1dc d5f506a3a759e13274617664da174b7f 99375b70cb57d76663606f6b895df1dc fpscr=00000000
+aesmc.8 q1, q9 0a0edcee4d2cc878ef855eb65c10d628 8c280e5e7c11533370670d2b81b02e0f c78fb60aefc6dafef778e85696c2195d 8c280e5e7c11533370670d2b81b02e0f fpscr=00000000
+aesmc.8 q1, q9 2450922107afec54cd54f29957327e59 0f7c166980b896167145c55bed24b56c cf96ce9b0f7072b58f8774d6e4bd9cd5 0f7c166980b896167145c55bed24b56c fpscr=00000000
+aesmc.8 q1, q9 f6be84f8bb673f4fc8c387756fbfd9bd 029353f1b5ac7f6f455b745cb01a6853 cc99fb9d1397cb4641511c3afcc41db4 029353f1b5ac7f6f455b745cb01a6853 fpscr=00000000
+aesmc.8 q1, q9 60b794d448b4a0c8be32fda98514c6b3 44cea455fc4eef9fce09fd8da9f12525 1dba0bd7f8f374bdff2bb9daf219ce7d 44cea455fc4eef9fce09fd8da9f12525 fpscr=00000000
+aesmc.8 q1, q9 419ca01490f7f11f910033957a94279b b78debf535fec705eaad3f4eb709cd41 17dd03ed5c7abc938f15369a725c6c70 b78debf535fec705eaad3f4eb709cd41 fpscr=00000000
+aesmc.8 q1, q9 7bcc8918719010b51f8d0a9a2b9cdad6 392f0631401ae5027aa91a00bcc34007 0822757e7913bd6a47ddaefde90565b1 392f0631401ae5027aa91a00bcc34007 fpscr=00000000
+aesmc.8 q1, q9 eca72f41cddddee9493a63177b8ec0c3 ac14d76afd052af45e5d6d02967e5ed8 3e7a4f0ec9c852758a3761b064db70a1 ac14d76afd052af45e5d6d02967e5ed8 fpscr=00000000
+aesmc.8 q1, q9 768d72ee82413c1cef651c6c49c9b9c3 ee9c3c004c1c753e762819b5269b0613 67362936b3a7bcb319668904e4528f91 ee9c3c004c1c753e762819b5269b0613 fpscr=00000000
+aesmc.8 q1, q9 f7df3280711908adf17116fa75aea535 e02816510cc1a83da26aff774c781a19 162ce5503618225453a18e3cd127e928 e02816510cc1a83da26aff774c781a19 fpscr=00000000
+aesmc.8 q1, q9 50fd4f567ac722fc2fbc3120de9c6479 621745bf1e53a253c283fdabe8787949 4c726edf3875e7160716161011e3db89 621745bf1e53a253c283fdabe8787949 fpscr=00000000
+aesmc.8 q1, q9 6146a9d07daa6c6a88a64c3f65f3d5f0 54caa9aa633342e0b5d4f4aedaf80203 2e70f2318c61524db82d8f21509fce22 54caa9aa633342e0b5d4f4aedaf80203 fpscr=00000000
+aesmc.8 q1, q9 091a1f4f5923c556de8f49b6eb14daf9 95a02171b9c06a425dbdc3e2025a96a7 23af5db405637443f9a7fe613a837cac 95a02171b9c06a425dbdc3e2025a96a7 fpscr=00000000
+aesmc.8 q1, q9 2ad99232f0910c210fd806e54e5e51f5 07f98f74015af8db989c4ca73ffe1597 e41b669cd694dfe5097b188537249bcb 07f98f74015af8db989c4ca73ffe1597 fpscr=00000000
+aesmc.8 q1, q9 a2e4e3d92054232afde0652d6f311b42 8836d1131b62cd0b48d36e5b73435f30 d92d78f0842f3723c050a19faa7c38b1 8836d1131b62cd0b48d36e5b73435f30 fpscr=00000000
+aesmc.8 q1, q9 539af0a5cbcde8d1860844ed2dee1843 fab6c9afe737c9314bc208617c8954d4 7a041d4978a406f2ff2b6753420d80ba fab6c9afe737c9314bc208617c8954d4 fpscr=00000000
+aesmc.8 q1, q9 1b5c9af6cf5b3c778baf84856af42855 3bda55a84439ccad82c8fc163b30d4e2 1a1f4c5591df217311fc3479af1b3ab3 3bda55a84439ccad82c8fc163b30d4e2 fpscr=00000000
+aesmc.8 q1, q9 db89c02a0d5fff7bec35055605a42bda 2c00565c14c9b5e0cc4528dc9099beba ea7edc6e6fe0c5c291318f52c986fdbf 2c00565c14c9b5e0cc4528dc9099beba fpscr=00000000
+aesmc.8 q1, q9 738244a36537113d88fba7bfdd5d0131 ad8bac2d354666290b996d125b24f4bd 11604593319c1a8bd44b7301baec7919 ad8bac2d354666290b996d125b24f4bd fpscr=00000000
+randV128: 768 calls, 793 iters
+aesmc.8 q1, q9 c3a505c0b746521e41604a20d37f8abb 9ed8377b8811bde81e25ab197c2f544b 455ef8e984f4328e99da25ef5ec5ee39 9ed8377b8811bde81e25ab197c2f544b fpscr=00000000
+aesmc.8 q1, q9 df49d6a4ee899c7ee449c250d31cbfc2 5efe32c3f031858af4d4fac6c72a608a 2ef4e06bca66385a8c88baa25aecf342 5efe32c3f031858af4d4fac6c72a608a fpscr=00000000
+aesmc.8 q1, q9 4f3e6b0b450ee14a3f6391173f4b1584 0d727f3f0669eeba87ac92595c78dc16 56b35a805e8cc029c01a0e3426de6177 0d727f3f0669eeba87ac92595c78dc16 fpscr=00000000
+aesmc.8 q1, q9 84629830272617d94573bcb41f7f9ba4 9c342f9777c1646445ad3ed3b57d49d5 9a6f09ece70083d27763061721a275a2 9c342f9777c1646445ad3ed3b57d49d5 fpscr=00000000
+aesmc.8 q1, q9 a42d5ff531c8df470f895737f09f7c94 18551cd87e8cde3ca5964fc72c731ed5 0a4607c2ea636af3da4b5d77515950cc 18551cd87e8cde3ca5964fc72c731ed5 fpscr=00000000
+aesmc.8 q1, q9 4b33b2ee05b02563c54f4243ae396147 d4498d2688b44f1aa9c7aa1cc9fa6eef 1d5e285ddea5cbd900c3a8b3372eef44 d4498d2688b44f1aa9c7aa1cc9fa6eef fpscr=00000000
+aesmc.8 q1, q9 5ad5737c833dcb8d47245f373bad281c b07164e0739a975f319f2e316c731984 55ad0bb60a72acf58069e6be25cf4f27 b07164e0739a975f319f2e316c731984 fpscr=00000000
+aesmc.8 q1, q9 b17280fe8bd0af2574688c737559b274 8c2c7f66219d966a1d7eba67f53dfff3 face60edf7bec0c9570697783d72a42f 8c2c7f66219d966a1d7eba67f53dfff3 fpscr=00000000
+aesmc.8 q1, q9 306bbad4fdc8b28c2e7caa583d9fdfad 48dac018701d2c9c4cc5301c44b8009d a2afbef96e1a933a496964e18c3a0add 48dac018701d2c9c4cc5301c44b8009d fpscr=00000000
+sha1c.32 q11, q10, q2 b61e015fb885b42253bf994574df8f2a c4dc0557417a3954a0d36eb33845fce0 e3dd1fb618cc6becf41055cc5eface6f f9ee102ddf818909278e8bbf23d641d8 c4dc0557417a3954a0d36eb33845fce0 e3dd1fb618cc6becf41055cc5eface6f fpscr=00000000
+sha1c.32 q11, q10, q2 0fcd0f1f38f8c5e5caafbe5b484ac64a 5b3835128dd0355762546ab8a9c9f869 7a591efbe94e37d632c5c500931264b7 95076aeb0dc0c464b55c1553ad9b5989 5b3835128dd0355762546ab8a9c9f869 7a591efbe94e37d632c5c500931264b7 fpscr=00000000
+sha1c.32 q11, q10, q2 7695b11f8084e96131699e77ba13900a 4e81904cd19865f4aac9fdff14522f41 bddc2f3f9e7ca05380906821cd649648 5aaf5c12b5daa2f5bb196c2268128745 4e81904cd19865f4aac9fdff14522f41 bddc2f3f9e7ca05380906821cd649648 fpscr=00000000
+sha1c.32 q11, q10, q2 ac397eba8be1f8ec098322ed80e32fc6 6ebcc4c757e84ee401f6228492d84a9b 0ce747ec936710ebfbe70be75829395f 2d483e7255e82718f5bd5ff4ac0c6f35 6ebcc4c757e84ee401f6228492d84a9b 0ce747ec936710ebfbe70be75829395f fpscr=00000000
+sha1c.32 q11, q10, q2 67bd0a55cfd546a986815ea08ffe7621 70ed65e99385444d9a5ced2c189f1b19 1a7dfb643cd5dcc0d64fc13628a521a2 4a3411f8266dc796657f0404567b6570 70ed65e99385444d9a5ced2c189f1b19 1a7dfb643cd5dcc0d64fc13628a521a2 fpscr=00000000
+sha1c.32 q11, q10, q2 5924e954bf2328bcdbe96673dca83ac0 07170714f9319c52aa8271db98eb7661 9ca3dd0d0d8957f8464ca0f2311c20b5 7e8d2619776bebf0b98cc2a743c3b1d1 07170714f9319c52aa8271db98eb7661 9ca3dd0d0d8957f8464ca0f2311c20b5 fpscr=00000000
+sha1c.32 q11, q10, q2 3773ae1bcf8ff14a3d3c4e4b5a254e46 e93e3eadfdb2aa1765eac47508003017 465c83497b47d5b77f61bafe67d20d3b f0f2f99dfff87ac6d5a27e11bae452ae e93e3eadfdb2aa1765eac47508003017 465c83497b47d5b77f61bafe67d20d3b fpscr=00000000
+sha1c.32 q11, q10, q2 b4adef0e74ddf675df012a0cfeb98757 c8679e1814cac1c1ff19f9de5b231cdd cbad7f7df9d3ab1fb513253fbf0bbad8 99836a6e188eaf6f2dd5bcb15babc892 c8679e1814cac1c1ff19f9de5b231cdd cbad7f7df9d3ab1fb513253fbf0bbad8 fpscr=00000000
+sha1c.32 q11, q10, q2 86d73e9120d28a64f7ba0e9abaa9b898 5896bbb9b13f3573ab9223f985960f59 e19a660dfbf22d561be6f39a2c0cfb30 49ec55e8d478ea4c1828a44a1485ba1a 5896bbb9b13f3573ab9223f985960f59 e19a660dfbf22d561be6f39a2c0cfb30 fpscr=00000000
+sha1c.32 q11, q10, q2 5ff5310749300238b7eb0dd98437b5ac 4fce29f348d45b519eda58ac7a9edc2f 3926cc5df566ae80e65d3af1a217a5e8 7b3859f14bec79fa55ef9350905043c0 4fce29f348d45b519eda58ac7a9edc2f 3926cc5df566ae80e65d3af1a217a5e8 fpscr=00000000
+sha1c.32 q11, q10, q2 f30a5bd562bcb117541a3dac4fa95437 5f147c2c4d4d86800c74abd92f805802 8a5546d05cf482bf4afd0c2915728ba3 c74d77780275f2610bccbb60ec609745 5f147c2c4d4d86800c74abd92f805802 8a5546d05cf482bf4afd0c2915728ba3 fpscr=00000000
+sha1c.32 q11, q10, q2 f81b4f5fe03bec2401c8b1f90f4166de 3d6c48c6356e0b2329e52f65977e5676 862d66cca261fe397b497f257a5f8205 bc89a898262cf1d81b66e2aecf3d81a4 3d6c48c6356e0b2329e52f65977e5676 862d66cca261fe397b497f257a5f8205 fpscr=00000000
+sha1c.32 q11, q10, q2 202ba309356f0684f47c7da2b845c144 9cd9212673fb3d5f28b1fa34a6deaa2f e2b0c1b33d707512acc6a5cbc3245db3 a9caadc01bde0c3a9ffad10bc873635f 9cd9212673fb3d5f28b1fa34a6deaa2f e2b0c1b33d707512acc6a5cbc3245db3 fpscr=00000000
+sha1c.32 q11, q10, q2 203fe937d71e545a5fb8b48c3ff8390d 32609bb07bb871573e5b1e2950e32ad2 52e2ebeaa1e43c6c12f793fde604f150 2b43a6c12b3e52c7381a227ed95c6f17 32609bb07bb871573e5b1e2950e32ad2 52e2ebeaa1e43c6c12f793fde604f150 fpscr=00000000
+sha1c.32 q11, q10, q2 ab5bb74d390b29ca76006c9b969ea1de b10449c8c169fa2f9e68b12a89d1a702 8ac878d54083a66de0615ea0d6431181 fb512c083c7508dc6997e97c90b96b8f b10449c8c169fa2f9e68b12a89d1a702 8ac878d54083a66de0615ea0d6431181 fpscr=00000000
+sha1c.32 q11, q10, q2 76829faecffadaf96fd9b8b3b37bcd5a cdcac1d1b9d32d0b7e5bc51a46ecf763 3d66fcd8901008394c881899882591e9 e799b517dbf5e8b828d63a151bbe9ec0 cdcac1d1b9d32d0b7e5bc51a46ecf763 3d66fcd8901008394c881899882591e9 fpscr=00000000
+sha1c.32 q11, q10, q2 36b936c00eafba0a7cc7abb889d39225 3cb59631d8b85d1010b970dc7a78ee9a 20bf0b56034fb5f388efd6c9eeee462c eca64def48836beb82e6526afd2f9141 3cb59631d8b85d1010b970dc7a78ee9a 20bf0b56034fb5f388efd6c9eeee462c fpscr=00000000
+sha1c.32 q11, q10, q2 9c0311e568ee1d21d24d5b8d0deac3e4 b0cb5c4a91dedf618906c45519b95f4a e6d839b5100402bfc81bac17fee204ee 88907e8644391808ec50d8fbe921a0ed b0cb5c4a91dedf618906c45519b95f4a e6d839b5100402bfc81bac17fee204ee fpscr=00000000
+sha1c.32 q11, q10, q2 5f65c282537b5863a5f0da183204353a de0da681580706231cc5d66918f31e18 45b41a5728f342c14290ae66ab459ed4 180d2f2dc28360edc767200be8a161c2 de0da681580706231cc5d66918f31e18 45b41a5728f342c14290ae66ab459ed4 fpscr=00000000
+sha1c.32 q11, q10, q2 31e2defb421abff328343e3beb65bccc 7a820a3aa1f92679ff7bbbfc696b00a6 ef5942a1c0e0ca1e28d1f199ea5ce881 f494a06d5bc905d301db82f969b419ce 7a820a3aa1f92679ff7bbbfc696b00a6 ef5942a1c0e0ca1e28d1f199ea5ce881 fpscr=00000000
+sha1c.32 q11, q10, q2 c77ffab3aa8fa5f5909c99db2e522a3d 372b1ad9e177948764ab85f10263d89b 99c845f74d90edf9af628796ad69b79a d080aef256f268e9577a9758bdd70221 372b1ad9e177948764ab85f10263d89b 99c845f74d90edf9af628796ad69b79a fpscr=00000000
+sha1c.32 q11, q10, q2 f607b7bd42c5ff760bc9853feab2dfc2 f403d8df0862c9a25fa293bc1e526883 403cff659fbb365906ee33b246f3df12 2c9a7deac34b107e7ac42d171a3dfb8d f403d8df0862c9a25fa293bc1e526883 403cff659fbb365906ee33b246f3df12 fpscr=00000000
+sha1c.32 q11, q10, q2 bb1a2c57134556ba6f87ff79947a339d 6701d88dd7e8dd3f864752b984649c70 a55d087862f1b691959314a4bdd8ff31 592859647e0fdab9fd6508d90b08effb 6701d88dd7e8dd3f864752b984649c70 a55d087862f1b691959314a4bdd8ff31 fpscr=00000000
+sha1c.32 q11, q10, q2 9c05382834d243e8102309289f0487d0 53ec1d197532560d881e8f12092fdd3a 6202838ff120e8c01d40e15790ebbb33 05acb0ea6d1c543abaa54bc0c254efec 53ec1d197532560d881e8f12092fdd3a 6202838ff120e8c01d40e15790ebbb33 fpscr=00000000
+sha1c.32 q11, q10, q2 6ec93ce65404893099ad5ea9a1f60285 2b2e060ec00e220ad279adafb572e8bd 9f559c6abf12bf38fa2e0670b01bcafa 46a4cb344befda4a777dc1383c5de4c2 2b2e060ec00e220ad279adafb572e8bd 9f559c6abf12bf38fa2e0670b01bcafa fpscr=00000000
+sha1c.32 q11, q10, q2 699bc759eb22cacbec75d26340ffddf6 b3e72358437eb891e7c38e901fb05a72 b9ec899de43c17ad36d3ef68f9d79fd4 48cba01abc57469fd342274f5dbacd4a b3e72358437eb891e7c38e901fb05a72 b9ec899de43c17ad36d3ef68f9d79fd4 fpscr=00000000
+sha1c.32 q11, q10, q2 fb6753d7ab516c02b6fd0124da8d4230 af2e6fd3ee34fd7b92a297dec2e9e3f6 a52952a5b063eccba562eeb33a849b04 0e9a85fac5faaae490c21bb811b7db9e af2e6fd3ee34fd7b92a297dec2e9e3f6 a52952a5b063eccba562eeb33a849b04 fpscr=00000000
+sha1c.32 q11, q10, q2 d63174c20953c3f92bc7fed064e506d7 d30a7fe136f446eb0499db7c93615aee 160f8de8964a92b57b5f18366666912d 5cf72276b5c563f8db89cabdee7827ce d30a7fe136f446eb0499db7c93615aee 160f8de8964a92b57b5f18366666912d fpscr=00000000
+sha1c.32 q11, q10, q2 affcbe7f7aee24d57f58dd4bd149fc90 d27de5e78e82e606742d7050845d91fd c1a3cdc80ab45f90eb507fd372c255f4 06d2e8208029e0aa6f6910f7c36f212b d27de5e78e82e606742d7050845d91fd c1a3cdc80ab45f90eb507fd372c255f4 fpscr=00000000
+sha1c.32 q11, q10, q2 3acdc57171e5e3b9e533b17815fef9fe 618b36496aa231ef14e2683c8b1f5cc7 5edc01923794338d74faa2ddf8e1a090 aff3889406b6a7418a1ca6276bb55d60 618b36496aa231ef14e2683c8b1f5cc7 5edc01923794338d74faa2ddf8e1a090 fpscr=00000000
+sha1c.32 q11, q10, q2 3439076a3e197cca193bd9259bec90f1 94e3abf26c26b7a66d1858edf6f399e9 c5dd39d1b0c43bc118fccda3094fb4e2 2e7bb2d2ec5b4e9b836f3a8ae10eb7bc 94e3abf26c26b7a66d1858edf6f399e9 c5dd39d1b0c43bc118fccda3094fb4e2 fpscr=00000000
+sha1c.32 q11, q10, q2 2497720343b5ec2ae5f8f1335752c15f 5f2d600012fbb3181054e0951279a0c3 4d260583d0bcc78de4fa82c3ce0e851d 03260cfbfba3e7acdc5cc678e5933de1 5f2d600012fbb3181054e0951279a0c3 4d260583d0bcc78de4fa82c3ce0e851d fpscr=00000000
+sha1c.32 q11, q10, q2 be098d4378d8962ec9da1823673a0703 e2d10a83d1fbeeb59283ef9809a139d7 bdf57a5acc0300c3685016cac1c13e83 0c5f3b8af43a7fcf3569feeb66fb1992 e2d10a83d1fbeeb59283ef9809a139d7 bdf57a5acc0300c3685016cac1c13e83 fpscr=00000000
+sha1c.32 q11, q10, q2 153d92147f5309d64b43e1a21bee4078 00cccbbd5f8a40bcd0100e90e20d52c2 2631b48f178f1faa4759a8ab53ad8446 44c07e68cce9ee338eaad3ae93355b20 00cccbbd5f8a40bcd0100e90e20d52c2 2631b48f178f1faa4759a8ab53ad8446 fpscr=00000000
+sha1c.32 q11, q10, q2 de3613dbcdea9a46a0b7619465b43f63 6d233614316bfe51ff7d51608f00c029 3ddf478523227766b5984b4c78162e0a a7103b3bf116a55734ccda6d9cdd892b 6d233614316bfe51ff7d51608f00c029 3ddf478523227766b5984b4c78162e0a fpscr=00000000
+sha1c.32 q11, q10, q2 cdf9a5fdd4619ca3fbbaaadd3bcfd967 dfdae1eabc617c99524fccee06be57b0 b602c7a266815d1ae592158f24400e74 1e56c82bb777977045a65c330834e2dc dfdae1eabc617c99524fccee06be57b0 b602c7a266815d1ae592158f24400e74 fpscr=00000000
+sha1c.32 q11, q10, q2 968adddb0a7d641192ced1619083e128 08f45fa473320db7fe09931c398be9f9 459fc848547124ea0cca19594c6ff827 673114f311b7c5409f051d5bf129245d 08f45fa473320db7fe09931c398be9f9 459fc848547124ea0cca19594c6ff827 fpscr=00000000
+randV128: 1024 calls, 1056 iters
+sha1c.32 q11, q10, q2 ecec4ddce20045b2977aea0359142c4b 9d7644a6cba105d03730bacf1eab4daa 9dbadedc5fb420fa5ec66b8ee2e7c1c7 83a737aec098be967559dc1e04fc5034 9d7644a6cba105d03730bacf1eab4daa 9dbadedc5fb420fa5ec66b8ee2e7c1c7 fpscr=00000000
+sha1c.32 q11, q10, q2 84238a63d0b094ac3e4009a988c78d73 516423533673b907314755eca9625466 73569dc2fd10a66f0f071e11dcec3cf9 8bd8f0aef614c4293ad5126ec7f9e1f1 516423533673b907314755eca9625466 73569dc2fd10a66f0f071e11dcec3cf9 fpscr=00000000
+sha1c.32 q11, q10, q2 123429d34950a423bda5423513ded944 d9c192112a6a7c8020d37854ccf5d4d2 7b78995ea148096c521348c82dc23e60 e068d965b8ff266cd9171804f230a9e7 d9c192112a6a7c8020d37854ccf5d4d2 7b78995ea148096c521348c82dc23e60 fpscr=00000000
+sha1c.32 q11, q10, q2 4a23bc92c0a3ca39462ca98ced9ee362 e99123421a4ba360375837ee7da6a191 68236613bf1f9e155c6efc95caae9aa0 1115512a8cb99e2aee61a85fdee96ced e99123421a4ba360375837ee7da6a191 68236613bf1f9e155c6efc95caae9aa0 fpscr=00000000
+sha1c.32 q11, q10, q2 dff4d802a96f59140e5a52920a4c7f72 35d96c4c7ada81c9ab59a59dafba8e47 ef5c9847cb5bb98f619c4d5ea5f2255e 9e4ade7dc289de143c51bad887bce7cf 35d96c4c7ada81c9ab59a59dafba8e47 ef5c9847cb5bb98f619c4d5ea5f2255e fpscr=00000000
+sha1c.32 q11, q10, q2 86aa11887877a6d849b3512b5e2a8218 719d0091bedc6be2af5ad84456767099 c427c35c3abfacfd95205105b4d4b23d 67b5999a1a4535222c7c94e49dc5e4b9 719d0091bedc6be2af5ad84456767099 c427c35c3abfacfd95205105b4d4b23d fpscr=00000000
+sha1c.32 q11, q10, q2 f349fc88a17f04a72ababa3ade7ec0f6 50e072765b13b4cc78e1e3c8661d1a2a 9b877bb77e0fce842b7e1a6eea9715e2 4481e4e2c2ba83271780231bcbd04249 50e072765b13b4cc78e1e3c8661d1a2a 9b877bb77e0fce842b7e1a6eea9715e2 fpscr=00000000
+sha1c.32 q11, q10, q2 d9d62b65994cc8a7e6f3a0a57d8b0bb2 87a7575fc345b1ad3a70d90ed4f3609e 278154bc0d107047583cbe7f3c7e24ef 5705e1f02bb42a8dbe1d6a6b7c0e6b54 87a7575fc345b1ad3a70d90ed4f3609e 278154bc0d107047583cbe7f3c7e24ef fpscr=00000000
+sha1c.32 q11, q10, q2 c9f644af6c35b5a9298bd0f8923b179a 1d18e1cf5b84e96a50db4f1b9ccfb00a e575baa0e0ae972530ded746a0c24cf6 5b5aa14045931c173c7d4dae324d0a87 1d18e1cf5b84e96a50db4f1b9ccfb00a e575baa0e0ae972530ded746a0c24cf6 fpscr=00000000
+sha1c.32 q11, q10, q2 cbd0cbcccaa715e279b7db6a963b13c1 3151b853da308a1146e2e32500cd8fd7 4f455ede773e287b72d7d019b8471c89 12d78d9e224720743aa53d60a6f1c76f 3151b853da308a1146e2e32500cd8fd7 4f455ede773e287b72d7d019b8471c89 fpscr=00000000
+sha1c.32 q11, q10, q2 413a82184f5f247e5d780d4ad43726b8 162f6cae00d9a9616fd38c835bbc95f9 69510c18b200844005914da50633a077 0d8be4f96b0713a63a46fda966988a50 162f6cae00d9a9616fd38c835bbc95f9 69510c18b200844005914da50633a077 fpscr=00000000
+sha1c.32 q11, q10, q2 df37fbf8712136a00a527c7a3e712721 81b790423f419a7dff326017a1e19514 e69d59b105b800971d9062cd7ccaac64 f07db74f5fad078f90f149eef8978e6e 81b790423f419a7dff326017a1e19514 e69d59b105b800971d9062cd7ccaac64 fpscr=00000000
+sha1c.32 q11, q10, q2 59cccbd0a3b1a06db3c73ae0ca2ee8a2 26edba740e2eb08a2a8571c6c67e63cc 7c2dd90fe42aefa4ee582477105015f5 820d5be25e212a9dbd3388f8d19fe146 26edba740e2eb08a2a8571c6c67e63cc 7c2dd90fe42aefa4ee582477105015f5 fpscr=00000000
+sha1h.32 q6, q7 62fd870459d3b6088d5e5d5e6ab23ddf b9d47da8de6340ab234dd474bfd8d4c6 000000000000000000000000aff63531 b9d47da8de6340ab234dd474bfd8d4c6 fpscr=00000000
+sha1h.32 q6, q7 d8308a161f694382213cafb53a36aff3 86ff9089fca3cac10660484e4f7344dc 00000000000000000000000013dcd137 86ff9089fca3cac10660484e4f7344dc fpscr=00000000
+sha1h.32 q6, q7 be27c1a5174d78728950fadd603a4d91 7b86d0bd834973a67642acd16da757d4 0000000000000000000000001b69d5f5 7b86d0bd834973a67642acd16da757d4 fpscr=00000000
+sha1h.32 q6, q7 f3400e1021de3339a4fd1e35bc20f51a 77c81da655b51bba5253e15cf9d4ed0e 000000000000000000000000be753b43 77c81da655b51bba5253e15cf9d4ed0e fpscr=00000000
+sha1h.32 q6, q7 59de50b71e7c553654a0fa1d2d4888ed 5c2557a35145a25b79f3c74fd35be6ea 000000000000000000000000b4d6f9ba 5c2557a35145a25b79f3c74fd35be6ea fpscr=00000000
+sha1h.32 q6, q7 ce5f66faeb88beca779c70f69511e66a 08fe5d15565be8ebcc833e0bdb9b22c9 00000000000000000000000076e6c8b2 08fe5d15565be8ebcc833e0bdb9b22c9 fpscr=00000000
+sha1h.32 q6, q7 3423313a6b614e53ee4e5e1fd2dbeff2 5db2115b4557cdca2b6225eff1f5800b 000000000000000000000000fc7d6002 5db2115b4557cdca2b6225eff1f5800b fpscr=00000000
+sha1h.32 q6, q7 698a92d77d68e5349918a6f9c50683e4 39a151d5fe98305776f15e5cf5c8e20f 000000000000000000000000fd723883 39a151d5fe98305776f15e5cf5c8e20f fpscr=00000000
+sha1h.32 q6, q7 4ef5672f00fb63ca585926e34ef381a1 7d2cfee4617ef2f28d8fc7b1c6742635 000000000000000000000000719d098d 7d2cfee4617ef2f28d8fc7b1c6742635 fpscr=00000000
+sha1h.32 q6, q7 c3c391a4d67da7770a72bf3d4d01cb88 09b2f9e74e69f4fb509d414e46592ddd 00000000000000000000000051964b77 09b2f9e74e69f4fb509d414e46592ddd fpscr=00000000
+sha1h.32 q6, q7 a855ef96dd4b939b91c15167a2913ff9 bd93203fa5ba14d39e79ad9453d8d768 00000000000000000000000014f635da bd93203fa5ba14d39e79ad9453d8d768 fpscr=00000000
+sha1h.32 q6, q7 dc0a6363f6c70594cca9bcc22d02be55 7830544b46d033d95986e9e2ce510435 0000000000000000000000007394410d 7830544b46d033d95986e9e2ce510435 fpscr=00000000
+sha1h.32 q6, q7 4142cc6e0151dec49a87e0aecdb528fb 1ce8746b100c316e5f21d698972394a5 00000000000000000000000065c8e529 1ce8746b100c316e5f21d698972394a5 fpscr=00000000
+sha1h.32 q6, q7 b55e0914de47fe8bdcbe9c8a64085d4b 871c6200e4cdeef191ac53178eae6717 000000000000000000000000e3ab99c5 871c6200e4cdeef191ac53178eae6717 fpscr=00000000
+sha1h.32 q6, q7 1abdfbb76d0b454872abd2b6d05e3da6 9b2bfd69a37449c3cf8642bf93525ceb 000000000000000000000000e4d4973a 9b2bfd69a37449c3cf8642bf93525ceb fpscr=00000000
+sha1h.32 q6, q7 4ec083b68efd935b3cb06092f214a86b 367524062b5f2442f91082ee85705582 000000000000000000000000a15c1560 367524062b5f2442f91082ee85705582 fpscr=00000000
+sha1h.32 q6, q7 32c57f72207cc8251a2c277faa8c7dfb 395bb8385df15dd1efa9f2064667303b 000000000000000000000000d199cc0e 395bb8385df15dd1efa9f2064667303b fpscr=00000000
+sha1h.32 q6, q7 a62fd04a04e8c304ec8007dcd8259eb4 843c995e1987d6cd91b17366b498ce77 000000000000000000000000ed26339d 843c995e1987d6cd91b17366b498ce77 fpscr=00000000
+sha1h.32 q6, q7 8a5b559f1ba1665b910ae00a5c40e9f9 f778a7d83e836d98be89e66fb0620f95 0000000000000000000000006c1883e5 f778a7d83e836d98be89e66fb0620f95 fpscr=00000000
+sha1h.32 q6, q7 beabf0cf43088f88eb2d9268163c4027 7170c207ae440391589029801b25d3f5 00000000000000000000000046c974fd 7170c207ae440391589029801b25d3f5 fpscr=00000000
+sha1h.32 q6, q7 217f803d5d7d1febd846fd56e57981a1 d483ca4b472b78193d271dfad342faf8 00000000000000000000000034d0bebe d483ca4b472b78193d271dfad342faf8 fpscr=00000000
+sha1h.32 q6, q7 9535e446495ef6e439b80035aa588dc4 fe129f02eb97ac8f4eada13cb81864fd 0000000000000000000000006e06193f fe129f02eb97ac8f4eada13cb81864fd fpscr=00000000
+sha1h.32 q6, q7 f82ffe4ce70ef4d4efe07d64463843f2 d17c208e78e97e546b8297a6ac08f064 0000000000000000000000002b023c19 d17c208e78e97e546b8297a6ac08f064 fpscr=00000000
+sha1h.32 q6, q7 2bcdacae16eaf91ad7205244977a858a 2b212f4fcf7fd0c67407de988e71808e 000000000000000000000000a39c6023 2b212f4fcf7fd0c67407de988e71808e fpscr=00000000
+sha1h.32 q6, q7 0e6ecfcdb854e517d4d760347e7d32ec ed62aaa3d0bc8048499b55733db3f2da 0000000000000000000000008f6cfcb6 ed62aaa3d0bc8048499b55733db3f2da fpscr=00000000
+sha1h.32 q6, q7 f79e72ed5bfd7037ca9edd979b2f27a8 375799de8a4e91eaff7c1a9c5cde179e 000000000000000000000000973785e7 375799de8a4e91eaff7c1a9c5cde179e fpscr=00000000
+sha1h.32 q6, q7 2835678a50a47ef5d67157628644ff59 656554181c3df2324141abf98de8837e 000000000000000000000000a37a20df 656554181c3df2324141abf98de8837e fpscr=00000000
+sha1h.32 q6, q7 628869dc8e108be14f73a137df525a4d 54c514de300aca01a8ce04366504e9f9 00000000000000000000000059413a7e 54c514de300aca01a8ce04366504e9f9 fpscr=00000000
+sha1h.32 q6, q7 84f65842f7a2775c13059c7386ba18e2 e3d9b990a614f9b6128306b4c2912b6e 000000000000000000000000b0a44adb e3d9b990a614f9b6128306b4c2912b6e fpscr=00000000
+sha1h.32 q6, q7 6de0141d69b921c5038527785bdb197a f101238f5ebb5eb260be91d286ef273e 000000000000000000000000a1bbc9cf f101238f5ebb5eb260be91d286ef273e fpscr=00000000
+sha1h.32 q6, q7 fea57cccc6b66b7dff5624a53e153c75 5f9c323a3760db5472e185f08f7fbfc8 00000000000000000000000023dfeff2 5f9c323a3760db5472e185f08f7fbfc8 fpscr=00000000
+sha1h.32 q6, q7 18a572afecf734e2e7d5725b0ec96331 0d0ac5f213624efc284cc26fbea1d16c 0000000000000000000000002fa8745b 0d0ac5f213624efc284cc26fbea1d16c fpscr=00000000
+sha1h.32 q6, q7 9941d427bcdf5b579b64f0f9ad576c11 dbabbe15d021980b625e27aef3b33e8b 000000000000000000000000fceccfa2 dbabbe15d021980b625e27aef3b33e8b fpscr=00000000
+sha1h.32 q6, q7 62d8839316cbc139fb637fdff91d3872 c780ad135ca102e2518a639b87f45de2 000000000000000000000000a1fd1778 c780ad135ca102e2518a639b87f45de2 fpscr=00000000
+sha1h.32 q6, q7 57095d22715931dce1f8ededef2ca9b8 600494c229cacbfe9623d042fcc5d21b 000000000000000000000000ff317486 600494c229cacbfe9623d042fcc5d21b fpscr=00000000
+sha1h.32 q6, q7 c584c4cb1490405ee7400ead75536685 707377d5afe882f8b81cfda24fbfb946 00000000000000000000000093efee51 707377d5afe882f8b81cfda24fbfb946 fpscr=00000000
+sha1h.32 q6, q7 d2b310611905a5c6f0afd7ae82ebff4e d92e37acd05b093195d4cc196042f3c4 0000000000000000000000001810bcf1 d92e37acd05b093195d4cc196042f3c4 fpscr=00000000
+sha1h.32 q6, q7 60f6214260184275dda6294ff4555270 7995b5a86a843e090eac1b090faf60f4 00000000000000000000000003ebd83d 7995b5a86a843e090eac1b090faf60f4 fpscr=00000000
+sha1h.32 q6, q7 4dacd7d0c927f5ca8e84e4f0acf0404d 3106cf285ec202de0303cbd23c65e037 000000000000000000000000cf19780d 3106cf285ec202de0303cbd23c65e037 fpscr=00000000
+sha1h.32 q6, q7 7a35126b3395a025e3aae8f28a1caa45 e2e3658d8c7635125439bcd2c6c553ec 00000000000000000000000031b154fb e2e3658d8c7635125439bcd2c6c553ec fpscr=00000000
+sha1h.32 q6, q7 c7f1b17280bf21e7bc7614b46e396eb6 6a8c5936d4ffb705e1afce6c8f2e9973 000000000000000000000000e3cba65c 6a8c5936d4ffb705e1afce6c8f2e9973 fpscr=00000000
+sha1h.32 q6, q7 144196458e07596ff84b4a9638a86d02 a95f8a8316bd671589c4e1fd7500922d 0000000000000000000000005d40248b a95f8a8316bd671589c4e1fd7500922d fpscr=00000000
+sha1h.32 q6, q7 81bfd7d5321027a42ed9d4e7599c1d79 319fe7a3f9dc7ef119c965e9a0f91330 000000000000000000000000283e44cc 319fe7a3f9dc7ef119c965e9a0f91330 fpscr=00000000
+sha1h.32 q6, q7 d109228b0759d612ae4c89891b611cb7 9244ec39401133ed6394a4e0fdd8d922 000000000000000000000000bf763648 9244ec39401133ed6394a4e0fdd8d922 fpscr=00000000
+randV128: 1280 calls, 1316 iters
+sha1h.32 q6, q7 789f490577f853beeb80de449baf6d48 02ccc56c59b42f8021b67dc84f586abe 00000000000000000000000093d61aaf 02ccc56c59b42f8021b67dc84f586abe fpscr=00000000
+sha1h.32 q6, q7 58e02da4604c7f08c3d2b477b9e7f18b 6297549b24245209338fce0077d9a665 0000000000000000000000005df66999 6297549b24245209338fce0077d9a665 fpscr=00000000
+sha1h.32 q6, q7 4f2daec7a3b53a5117a4ec82556888e1 9305772680c27ce8798079e855bb6d76 000000000000000000000000956edb5d 9305772680c27ce8798079e855bb6d76 fpscr=00000000
+sha1h.32 q6, q7 3ee5accf209364f8c75664c64e9312a9 73770f6e4fec8c7dd3e85ce1c95f9e52 000000000000000000000000b257e794 73770f6e4fec8c7dd3e85ce1c95f9e52 fpscr=00000000
+sha1h.32 q6, q7 0568061bb747dd5db246fca286c76f43 e34cfcd26f0564292128584ab3251b58 0000000000000000000000002cc946d6 e34cfcd26f0564292128584ab3251b58 fpscr=00000000
+sha1h.32 q6, q7 84179e0b483085e1bad69677db647e10 c2e51eb2c26ae24c429f4d84f36bc2e8 0000000000000000000000003cdaf0ba c2e51eb2c26ae24c429f4d84f36bc2e8 fpscr=00000000
+sha1m.32 q2, q8, q13 9b515300b3af3be3bd6611a42ecb216f f2a1546f267de84418ad1bee69937563 05e1aeea00847261042b58e9fde83fe3 ff11bdb2d609a9b4a85b4cfa1bd77f7b f2a1546f267de84418ad1bee69937563 05e1aeea00847261042b58e9fde83fe3 fpscr=00000000
+sha1m.32 q2, q8, q13 636051b9d517c3b565ae686f7131c232 9aef684968f127ae939020ea68a8ced4 10e8927696ec55e2380329874e749e64 d09efd50ef057976c72cc5196c09c5a1 9aef684968f127ae939020ea68a8ced4 10e8927696ec55e2380329874e749e64 fpscr=00000000
+sha1m.32 q2, q8, q13 2e04ddb8ce6b79a06fd086fddb7739ba 216936907d86e1f590245a0ecd148c11 5830e0157e2c82888be11b33c34e7036 9b62c45691afd77ca460bbeed73c4c1e 216936907d86e1f590245a0ecd148c11 5830e0157e2c82888be11b33c34e7036 fpscr=00000000
+sha1m.32 q2, q8, q13 ae428a61116fafc610527d3260e35cac 3b1353a6dc026b3f41eddb408b1a83bd 92bf2a292c094c75314a43d24eb988fe 41a0946aec0fa8c7495e375a52d34be9 3b1353a6dc026b3f41eddb408b1a83bd 92bf2a292c094c75314a43d24eb988fe fpscr=00000000
+sha1m.32 q2, q8, q13 981dec1812e8bc4e7cb663f1f5b8fcab 9ef053f0f62818afda6eb8629600867d 72960517154507cf5ec1b548e5f9bb5f 92f5b0ecf2583525b3b6f2d02206acd7 9ef053f0f62818afda6eb8629600867d 72960517154507cf5ec1b548e5f9bb5f fpscr=00000000
+sha1m.32 q2, q8, q13 a09a97414698f159e6804a208c3aef5c fc04cad241bc3d69912b0559e3096af4 acbc0643aca508b945cb85797b52dcfd f314c1b0df5ffa22cc3d1e44e11ecaf7 fc04cad241bc3d69912b0559e3096af4 acbc0643aca508b945cb85797b52dcfd fpscr=00000000
+sha1m.32 q2, q8, q13 7abc1e3f1f44a40d833548a11bad0962 0a554caf30822d9298aad709647a02c6 f333bf1165eea2581becc7480508c07d fe95feecbf5d570b16ae1e7c28ba3f27 0a554caf30822d9298aad709647a02c6 f333bf1165eea2581becc7480508c07d fpscr=00000000
+sha1m.32 q2, q8, q13 d989177813b1298d87596f5995551c61 7ce46ded373e3d4d236c40560f952297 fc00c6e4b4e22acf14a78f9b755f3a82 30cc4629198a355cef035dfca28f298f 7ce46ded373e3d4d236c40560f952297 fc00c6e4b4e22acf14a78f9b755f3a82 fpscr=00000000
+sha1m.32 q2, q8, q13 7202154e95a2d3fe266fd42cef77fffd 05b8c1eecab4c0be68f85523d79f9f0c 7b26ad220e47f3426380f154c19c1eb0 ef97a4b90bc256e6c0f632341326e87e 05b8c1eecab4c0be68f85523d79f9f0c 7b26ad220e47f3426380f154c19c1eb0 fpscr=00000000
+sha1m.32 q2, q8, q13 f82dab261adaf78393fc8cfe1b5584db 5ad3dc175da80b0a99d02a56b0dc4dc8 24aa092de5e052d63dfd0158dc0142ab 24a2b6dfe89970ead453230bfaf371a5 5ad3dc175da80b0a99d02a56b0dc4dc8 24aa092de5e052d63dfd0158dc0142ab fpscr=00000000
+sha1m.32 q2, q8, q13 210e6f64141fe8400383a9b30f347f9e 2f3a52cc65df7154eb79d3d18f91ff70 aa8f6e6aaf719aaed6a0d38bbad47718 4c8194ea4f2ba898dc2a2b02fd435014 2f3a52cc65df7154eb79d3d18f91ff70 aa8f6e6aaf719aaed6a0d38bbad47718 fpscr=00000000
+sha1m.32 q2, q8, q13 37f0b771551b46c192766479670089a7 c2da713ddfbe20ee62ed7bd14f57939b baa1500e5bbebe48a67944424c4eaafc 24ca767d2a2fb0f39a3c181a0f8c4eec c2da713ddfbe20ee62ed7bd14f57939b baa1500e5bbebe48a67944424c4eaafc fpscr=00000000
+sha1m.32 q2, q8, q13 28fa9f6aa122df73c14cf1322c6fc011 208fa40aea8b37bb14690d0d90d069d6 ca394c0019fa007080b4b8335d5a2d2a c7635e257ef091b44c813226d5f6231a 208fa40aea8b37bb14690d0d90d069d6 ca394c0019fa007080b4b8335d5a2d2a fpsc...
[truncated message content] |
|
From: <sv...@va...> - 2016-08-07 23:33:56
|
Author: sewardj
Date: Mon Aug 8 00:33:48 2016
New Revision: 3242
Log:
Implement VMULL.P64.
dis_neon_data_3diff: don't mistakenly recognise VMULL.P64 as a plain
VMUL due to inadequate checking for the VMULL.P64 case.
Fix ARM decoding of SHA1SU1, SHA256SU0, SHA1H introduced in r3241.
Modified:
trunk/priv/guest_arm_defs.h
trunk/priv/guest_arm_helpers.c
trunk/priv/guest_arm_toIR.c
Modified: trunk/priv/guest_arm_defs.h
==============================================================================
--- trunk/priv/guest_arm_defs.h (original)
+++ trunk/priv/guest_arm_defs.h Mon Aug 8 00:33:48 2016
@@ -217,6 +217,12 @@
UInt argM3, UInt argM2, UInt argM1, UInt argM0
);
+extern
+void armg_dirtyhelper_VMULLP64 (
+ /*OUT*/V128* res,
+ UInt argN1, UInt argN0, UInt argM1, UInt argM0
+ );
+
/*---------------------------------------------------------*/
/*--- Condition code stuff ---*/
Modified: trunk/priv/guest_arm_helpers.c
==============================================================================
--- trunk/priv/guest_arm_helpers.c (original)
+++ trunk/priv/guest_arm_helpers.c Mon Aug 8 00:33:48 2016
@@ -780,6 +780,18 @@
arm64g_dirtyhelper_SHA1H(res, argMhi, argMlo);
}
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_VMULLP64 (
+ /*OUT*/V128* res,
+ UInt argN1, UInt argN0, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argN = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argM = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_PMULLQ(res, argN, argM);
+}
+
/*---------------------------------------------------------------*/
/*--- Flag-helpers translation-time function specialisers. ---*/
Modified: trunk/priv/guest_arm_toIR.c
==============================================================================
--- trunk/priv/guest_arm_toIR.c (original)
+++ trunk/priv/guest_arm_toIR.c Mon Aug 8 00:33:48 2016
@@ -3041,6 +3041,12 @@
static
Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
{
+ /* In paths where this returns False, indicating a non-decodable
+ instruction, there may still be some IR assignments to temporaries
+ generated. This is inconvenient but harmless, and the post-front-end
+ IR optimisation pass will just remove them anyway. So there's no
+ effort made here to tidy it up.
+ */
UInt Q = (theInstr >> 6) & 1;
UInt dreg = get_neon_d_regno(theInstr);
UInt nreg = get_neon_n_regno(theInstr);
@@ -4834,6 +4840,12 @@
static
Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
{
+ /* In paths where this returns False, indicating a non-decodable
+ instruction, there may still be some IR assignments to temporaries
+ generated. This is inconvenient but harmless, and the post-front-end
+ IR optimisation pass will just remove them anyway. So there's no
+ effort made here to tidy it up.
+ */
UInt A = (theInstr >> 8) & 0xf;
UInt B = (theInstr >> 20) & 3;
UInt U = (theInstr >> 24) & 1;
@@ -5191,11 +5203,15 @@
op = Iop_PolynomialMull8x8;
break;
case 1:
+ if (P) return False;
op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
break;
case 2:
+ if (P) return False;
op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
break;
+ case 3:
+ return False;
default:
vassert(0);
}
@@ -12928,7 +12944,7 @@
{
Bool gate = False;
- UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0);
+ UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,1,0)
&& INSNA(11,7) == BITS5(0,0,1,1,1) && INSNA(4,4) == 0) {
gate = True;
@@ -12998,7 +13014,7 @@
{
Bool gate = False;
- UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0);
+ UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,0,1)
&& INSNA(11,6) == BITS6(0,0,1,0,1,1) && INSNA(4,4) == 0) {
gate = True;
@@ -13043,6 +13059,64 @@
/* fall through */
}
+ /* ----------- VMULL.P64 ----------- */
+ /*
+ 31 27 23 21 19 15 11 7 3
+ T2: 1110 1111 1 D 10 n d 1110 N 0 M 0 m
+ A2: 1111 0010 -------------------------
+
+ The ARM documentation is pretty difficult to follow here.
+ Same comments about conditionalisation as for the AES group above apply.
+ */
+ {
+ Bool gate = False;
+
+ UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
+ if (INSNA(31,23) == hi9 && INSNA(21,20) == BITS2(1,0)
+ && INSNA(11,8) == BITS4(1,1,1,0)
+ && INSNA(6,6) == 0 && INSNA(4,4) == 0) {
+ gate = True;
+ }
+
+ UInt regN = (INSNA(7,7) << 4) | INSNA(19,16);
+ UInt regD = (INSNA(22,22) << 4) | INSNA(15,12);
+ UInt regM = (INSNA(5,5) << 4) | INSNA(3,0);
+
+ if ((regD & 1) == 1)
+ gate = False;
+
+ if (gate) {
+ const HChar* iname = "vmull";
+ void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
+ const HChar* hname = "armg_dirtyhelper_VMULLP64";
+
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+
+ IRTemp srcN = newTemp(Ity_I64);
+ IRTemp srcM = newTemp(Ity_I64);
+ assign(srcN, getDRegI64(regN));
+ assign(srcM, getDRegI64(regM));
+
+ IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
+ unop(Iop_64HIto32, mkexpr(srcN)),
+ unop(Iop_64to32, mkexpr(srcN)),
+ unop(Iop_64HIto32, mkexpr(srcM)),
+ unop(Iop_64to32, mkexpr(srcM)));
+
+ IRTemp res = newTemp(Ity_V128);
+ IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ hname, helper, argvec );
+ stmt(IRStmt_Dirty(di));
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+
+ DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
+ return True;
+ }
+ /* fall through */
+ }
+
/* ---------- Doesn't match anything. ---------- */
return False;
|
|
From: <sv...@va...> - 2016-08-07 16:42:47
|
Author: sewardj
Date: Sun Aug 7 17:42:37 2016
New Revision: 3241
Log:
Implement SHA1C, SHA1M, SHA1P, SHA1SU0, SHA256H2, SHA256H, SHA256SU1,
SHA1H, SHA1SU1, SHA256SU0.
Modified:
trunk/priv/guest_arm_defs.h
trunk/priv/guest_arm_helpers.c
trunk/priv/guest_arm_toIR.c
trunk/priv/host_arm_isel.c
trunk/priv/ir_defs.c
trunk/pub/libvex_ir.h
Modified: trunk/priv/guest_arm_defs.h
==============================================================================
--- trunk/priv/guest_arm_defs.h (original)
+++ trunk/priv/guest_arm_defs.h Sun Aug 7 17:42:37 2016
@@ -113,25 +113,109 @@
/* --- DIRTY HELPERS --- */
+/* Confusingly, for the AES insns, the 32-bit ARM docs refers to the
+ one-and-only source register as 'm' whereas the 64-bit docs refer to
+ it as 'n'. We sidestep that here by just calling it 'arg32_*'. */
+
+extern
+void armg_dirtyhelper_AESE (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ );
+
+extern
+void armg_dirtyhelper_AESD (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ );
+
+extern
+void armg_dirtyhelper_AESMC (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ );
+
+extern
+void armg_dirtyhelper_AESIMC (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ );
+
+extern
+void armg_dirtyhelper_SHA1C (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
+
+extern
+void armg_dirtyhelper_SHA1P (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
+
+extern
+void armg_dirtyhelper_SHA1M (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
+
+extern
+void armg_dirtyhelper_SHA1SU0 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
+
+extern
+void armg_dirtyhelper_SHA256H (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
+
+extern
+void armg_dirtyhelper_SHA256H2 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
+
extern
-void armg_dirtyhelper_AESE ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 );
+void armg_dirtyhelper_SHA256SU1 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
extern
-void armg_dirtyhelper_AESD ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 );
+void armg_dirtyhelper_SHA1SU1 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
extern
-void armg_dirtyhelper_AESMC ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 );
+void armg_dirtyhelper_SHA256SU0 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
extern
-void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 );
+void armg_dirtyhelper_SHA1H (
+ /*OUT*/V128* res,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ );
/*---------------------------------------------------------*/
Modified: trunk/priv/guest_arm_helpers.c
==============================================================================
--- trunk/priv/guest_arm_helpers.c (original)
+++ trunk/priv/guest_arm_helpers.c Sun Aug 7 17:42:37 2016
@@ -553,50 +553,234 @@
be at least 8 aligned.
*/
/* CALLED FROM GENERATED CODE */
-void armg_dirtyhelper_AESE ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 )
+void armg_dirtyhelper_AESE (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ )
{
vassert(0 == (((HWord)res) & (8-1)));
- ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
- ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
+ ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
arm64g_dirtyhelper_AESE(res, argHi, argLo);
}
/* CALLED FROM GENERATED CODE */
-void armg_dirtyhelper_AESD ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 )
+void armg_dirtyhelper_AESD (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ )
{
vassert(0 == (((HWord)res) & (8-1)));
- ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
- ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
+ ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
arm64g_dirtyhelper_AESD(res, argHi, argLo);
}
/* CALLED FROM GENERATED CODE */
-void armg_dirtyhelper_AESMC ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 )
+void armg_dirtyhelper_AESMC (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ )
{
vassert(0 == (((HWord)res) & (8-1)));
- ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
- ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
+ ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
arm64g_dirtyhelper_AESMC(res, argHi, argLo);
}
/* CALLED FROM GENERATED CODE */
-void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res,
- UInt argW3, UInt argW2,
- UInt argW1, UInt argW0 )
+void armg_dirtyhelper_AESIMC (
+ /*OUT*/V128* res,
+ UInt arg32_3, UInt arg32_2, UInt arg32_1, UInt arg32_0
+ )
{
vassert(0 == (((HWord)res) & (8-1)));
- ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
- ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ ULong argHi = (((ULong)arg32_3) << 32) | ((ULong)arg32_2);
+ ULong argLo = (((ULong)arg32_1) << 32) | ((ULong)arg32_0);
arm64g_dirtyhelper_AESIMC(res, argHi, argLo);
}
+/* DIRTY HELPERS for the SHA instruction family. Same comments
+ as for the AES group above apply.
+*/
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA1C (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA1C(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA1P (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA1P(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA1M (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA1M(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA1SU0 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA1SU0(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA256H (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA256H(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA256H2 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA256H2(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA256SU1 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argN3, UInt argN2, UInt argN1, UInt argN0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argNhi = (((ULong)argN3) << 32) | ((ULong)argN2);
+ ULong argNlo = (((ULong)argN1) << 32) | ((ULong)argN0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA256SU1(res, argDhi, argDlo,
+ argNhi, argNlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA1SU1 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA1SU1(res, argDhi, argDlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA256SU0 (
+ /*OUT*/V128* res,
+ UInt argD3, UInt argD2, UInt argD1, UInt argD0,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argDhi = (((ULong)argD3) << 32) | ((ULong)argD2);
+ ULong argDlo = (((ULong)argD1) << 32) | ((ULong)argD0);
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA256SU0(res, argDhi, argDlo, argMhi, argMlo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_SHA1H (
+ /*OUT*/V128* res,
+ UInt argM3, UInt argM2, UInt argM1, UInt argM0
+ )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argMhi = (((ULong)argM3) << 32) | ((ULong)argM2);
+ ULong argMlo = (((ULong)argM1) << 32) | ((ULong)argM0);
+ arm64g_dirtyhelper_SHA1H(res, argMhi, argMlo);
+}
+
+
/*---------------------------------------------------------------*/
/*--- Flag-helpers translation-time function specialisers. ---*/
/*--- These help iropt specialise calls the above run-time ---*/
Modified: trunk/priv/guest_arm_toIR.c
==============================================================================
--- trunk/priv/guest_arm_toIR.c (original)
+++ trunk/priv/guest_arm_toIR.c Sun Aug 7 17:42:37 2016
@@ -12688,7 +12688,7 @@
vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
}
- /* ----------- AESD.8 q_q ----------- */
+ /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
/* 31 27 23 21 19 17 15 11 7 3
T1: 1111 1111 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm
A1: 1111 0011 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm
@@ -12703,7 +12703,8 @@
A1: 1111 0011 1 D 11 sz 00 d 0011 11 M 0 m AESIMC Qd, Qm
sz must be 00
- ARM encoding is in NV space
+ ARM encoding is in NV space.
+ In Thumb mode, we must not be in an IT block.
*/
{
UInt regD = 99, regM = 99, opc = 4/*invalid*/;
@@ -12727,6 +12728,9 @@
if (isT) {
gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
}
+ /* In ARM mode, this is statically unconditional. In Thumb mode,
+ this must be dynamically unconditional, and we've SIGILLd if not.
+ In either case we can create unconditional IR. */
IRTemp op1 = newTemp(Ity_V128);
IRTemp op2 = newTemp(Ity_V128);
IRTemp src = newTemp(Ity_V128);
@@ -12769,6 +12773,276 @@
/* fall through */
}
+ /* ----------- SHA 3-reg insns q_q_q ----------- */
+ /*
+ 31 27 23 19 15 11 7 3
+ T1: 1110 1111 0 D 00 n d 1100 N Q M 0 m SHA1C Qd, Qn, Qm ix=0
+ A1: 1111 0010 ----------------------------
+
+ T1: 1110 1111 0 D 01 n d 1100 N Q M 0 m SHA1P Qd, Qn, Qm ix=1
+ A1: 1111 0010 ----------------------------
+
+ T1: 1110 1111 0 D 10 n d 1100 N Q M 0 m SHA1M Qd, Qn, Qm ix=2
+ A1: 1111 0010 ----------------------------
+
+ T1: 1110 1111 0 D 11 n d 1100 N Q M 0 m SHA1SU0 Qd, Qn, Qm ix=3
+ A1: 1111 0010 ----------------------------
+ (that's a complete set of 4, based on insn[21,20])
+
+ T1: 1111 1111 0 D 00 n d 1100 N Q M 0 m SHA256H Qd, Qn, Qm ix=4
+ A1: 1111 0011 ----------------------------
+
+ T1: 1111 1111 0 D 01 n d 1100 N Q M 0 m SHA256H2 Qd, Qn, Qm ix=5
+ A1: 1111 0011 ----------------------------
+
+ T1: 1111 1111 0 D 10 n d 1100 N Q M 0 m SHA256SU1 Qd, Qn, Qm ix=6
+ A1: 1111 0011 ----------------------------
+ (3/4 of a complete set of 4, based on insn[21,20])
+
+ Q must be 1. Same comments about conditionalisation as for the AES
+ group above apply.
+ */
+ {
+ UInt ix = 8; /* invalid */
+ Bool gate = False;
+
+ UInt hi9_sha1 = isT ? BITS9(1,1,1,0,1,1,1,1,0)
+ : BITS9(1,1,1,1,0,0,1,0,0);
+ UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
+ : BITS9(1,1,1,1,0,0,1,1,0);
+ if ((INSNA(31,23) == hi9_sha1 || INSNA(31,23) == hi9_sha256)
+ && INSNA(11,8) == BITS4(1,1,0,0)
+ && INSNA(6,6) == 1 && INSNA(4,4) == 0) {
+ ix = INSNA(21,20);
+ if (INSNA(31,23) == hi9_sha256)
+ ix |= 4;
+ if (ix < 7)
+ gate = True;
+ }
+
+ UInt regN = (INSNA(7,7) << 4) | INSNA(19,16);
+ UInt regD = (INSNA(22,22) << 4) | INSNA(15,12);
+ UInt regM = (INSNA(5,5) << 4) | INSNA(3,0);
+ if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
+ gate = False;
+
+ if (gate) {
+ vassert(ix >= 0 && ix < 7);
+ const HChar* inames[7]
+ = { "sha1c", "sha1p", "sha1m", "sha1su0",
+ "sha256h", "sha256h2", "sha256su1" };
+ void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
+ UInt,UInt,UInt,UInt,UInt,UInt)
+ = { &armg_dirtyhelper_SHA1C, &armg_dirtyhelper_SHA1P,
+ &armg_dirtyhelper_SHA1M, &armg_dirtyhelper_SHA1SU0,
+ &armg_dirtyhelper_SHA256H, &armg_dirtyhelper_SHA256H2,
+ &armg_dirtyhelper_SHA256SU1 };
+ const HChar* hnames[7]
+ = { "armg_dirtyhelper_SHA1C", "armg_dirtyhelper_SHA1P",
+ "armg_dirtyhelper_SHA1M", "armg_dirtyhelper_SHA1SU0",
+ "armg_dirtyhelper_SHA256H", "armg_dirtyhelper_SHA256H2",
+ "armg_dirtyhelper_SHA256SU1" };
+
+ /* This is a really lame way to implement this, even worse than
+ the arm64 version. But at least it works. */
+
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+
+ IRTemp vD = newTemp(Ity_V128);
+ IRTemp vN = newTemp(Ity_V128);
+ IRTemp vM = newTemp(Ity_V128);
+ assign(vD, getQReg(regD >> 1));
+ assign(vN, getQReg(regN >> 1));
+ assign(vM, getQReg(regM >> 1));
+
+ IRTemp d32_3, d32_2, d32_1, d32_0;
+ d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
+ breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
+
+ IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
+ n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
+ breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
+
+ IRTemp m32_3, m32_2, m32_1, m32_0;
+ m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
+ breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
+
+ IRTemp n32_3 = newTemp(Ity_I32);
+ IRTemp n32_2 = newTemp(Ity_I32);
+ IRTemp n32_1 = newTemp(Ity_I32);
+ IRTemp n32_0 = newTemp(Ity_I32);
+
+ /* Mask off any bits of the N register operand that aren't actually
+ needed, so that Memcheck doesn't complain unnecessarily. */
+ switch (ix) {
+ case 0: case 1: case 2:
+ assign(n32_3, mkU32(0));
+ assign(n32_2, mkU32(0));
+ assign(n32_1, mkU32(0));
+ assign(n32_0, mkexpr(n32_0_pre));
+ break;
+ case 3: case 4: case 5: case 6:
+ assign(n32_3, mkexpr(n32_3_pre));
+ assign(n32_2, mkexpr(n32_2_pre));
+ assign(n32_1, mkexpr(n32_1_pre));
+ assign(n32_0, mkexpr(n32_0_pre));
+ break;
+ default:
+ vassert(0);
+ }
+
+ IRExpr** argvec
+ = mkIRExprVec_13(
+ IRExpr_VECRET(),
+ mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
+ mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
+ mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
+ );
+
+ IRTemp res = newTemp(Ity_V128);
+ IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ hnames[ix], helpers[ix], argvec );
+ stmt(IRStmt_Dirty(di));
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+
+ DIP("%s.8 q%u, q%u, q%u\n",
+ inames[ix], regD >> 1, regN >> 1, regM >> 1);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------- SHA1SU1, SHA256SU0 ----------- */
+ /*
+ 31 27 23 21 19 15 11 7 3
+ T1: 1111 1111 1 D 11 1010 d 0011 10 M 0 m SHA1SU1 Qd, Qm
+ A1: 1111 0011 ----------------------------
+
+ T1: 1111 1111 1 D 11 1010 d 0011 11 M 0 m SHA256SU0 Qd, Qm
+ A1: 1111 0011 ----------------------------
+
+ Same comments about conditionalisation as for the AES group above apply.
+ */
+ {
+ Bool gate = False;
+
+ UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0);
+ if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,1,0)
+ && INSNA(11,7) == BITS5(0,0,1,1,1) && INSNA(4,4) == 0) {
+ gate = True;
+ }
+
+ UInt regD = (INSNA(22,22) << 4) | INSNA(15,12);
+ UInt regM = (INSNA(5,5) << 4) | INSNA(3,0);
+ if ((regD & 1) == 1 || (regM & 1) == 1)
+ gate = False;
+
+ Bool is_1SU1 = INSNA(6,6) == 0;
+
+ if (gate) {
+ const HChar* iname
+ = is_1SU1 ? "sha1su1" : "sha256su0";
+ void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
+ = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
+ : *armg_dirtyhelper_SHA256SU0;
+ const HChar* hname
+ = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
+ : "armg_dirtyhelper_SHA256SU0";
+
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+
+ IRTemp vD = newTemp(Ity_V128);
+ IRTemp vM = newTemp(Ity_V128);
+ assign(vD, getQReg(regD >> 1));
+ assign(vM, getQReg(regM >> 1));
+
+ IRTemp d32_3, d32_2, d32_1, d32_0;
+ d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
+ breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
+
+ IRTemp m32_3, m32_2, m32_1, m32_0;
+ m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
+ breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
+
+ IRExpr** argvec
+ = mkIRExprVec_9(
+ IRExpr_VECRET(),
+ mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
+ mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
+ );
+
+ IRTemp res = newTemp(Ity_V128);
+ IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ hname, helper, argvec );
+ stmt(IRStmt_Dirty(di));
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+
+ DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ----------- SHA1H ----------- */
+ /*
+ 31 27 23 21 19 15 11 7 3
+ T1: 1111 1111 1 D 11 1001 d 0010 11 M 0 m SHA1H Qd, Qm
+ A1: 1111 0011 ----------------------------
+
+ Same comments about conditionalisation as for the AES group above apply.
+ */
+ {
+ Bool gate = False;
+
+ UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,0);
+ if (INSNA(31,23) == hi9 && INSNA(21,16) == BITS6(1,1,1,0,0,1)
+ && INSNA(11,6) == BITS6(0,0,1,0,1,1) && INSNA(4,4) == 0) {
+ gate = True;
+ }
+
+ UInt regD = (INSNA(22,22) << 4) | INSNA(15,12);
+ UInt regM = (INSNA(5,5) << 4) | INSNA(3,0);
+ if ((regD & 1) == 1 || (regM & 1) == 1)
+ gate = False;
+
+ if (gate) {
+ const HChar* iname = "sha1h";
+ void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
+ const HChar* hname = "armg_dirtyhelper_SHA1H";
+
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+
+ IRTemp vM = newTemp(Ity_V128);
+ assign(vM, getQReg(regM >> 1));
+
+ IRTemp m32_3, m32_2, m32_1, m32_0;
+ m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
+ breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
+ /* m32_3, m32_2, m32_1 are just abandoned. No harm; iropt will
+ remove them. */
+
+ IRExpr* zero = mkU32(0);
+ IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
+ zero, zero, zero, mkexpr(m32_0));
+
+ IRTemp res = newTemp(Ity_V128);
+ IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
+ hname, helper, argvec );
+ stmt(IRStmt_Dirty(di));
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+
+ DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
+ return True;
+ }
+ /* fall through */
+ }
+
/* ---------- Doesn't match anything. ---------- */
return False;
Modified: trunk/priv/host_arm_isel.c
==============================================================================
--- trunk/priv/host_arm_isel.c (original)
+++ trunk/priv/host_arm_isel.c Sun Aug 7 17:42:37 2016
@@ -436,9 +436,9 @@
HReg argVRegs[n_real_args];
but that makes it impossible to do 'goto's forward past.
Hence the following kludge. */
- vassert(n_real_args <= 11);
- HReg argVRegs[11];
- for (i = 0; i < 11; i++)
+ vassert(n_real_args <= 12);
+ HReg argVRegs[12];
+ for (i = 0; i < 12; i++)
argVRegs[i] = INVALID_HREG;
/* Compute args into vregs. */
Modified: trunk/priv/ir_defs.c
==============================================================================
--- trunk/priv/ir_defs.c (original)
+++ trunk/priv/ir_defs.c Sun Aug 7 17:42:37 2016
@@ -1986,6 +1986,45 @@
vec[8] = NULL;
return vec;
}
+IRExpr** mkIRExprVec_9 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5, IRExpr* arg6,
+ IRExpr* arg7, IRExpr* arg8, IRExpr* arg9 ) {
+ IRExpr** vec = LibVEX_Alloc_inline(10 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = arg6;
+ vec[6] = arg7;
+ vec[7] = arg8;
+ vec[8] = arg9;
+ vec[9] = NULL;
+ return vec;
+}
+IRExpr** mkIRExprVec_13 ( IRExpr* arg1, IRExpr* arg2, IRExpr* arg3,
+ IRExpr* arg4, IRExpr* arg5, IRExpr* arg6,
+ IRExpr* arg7, IRExpr* arg8, IRExpr* arg9,
+ IRExpr* arg10, IRExpr* arg11, IRExpr* arg12,
+ IRExpr* arg13
+ ) {
+ IRExpr** vec = LibVEX_Alloc_inline(14 * sizeof(IRExpr*));
+ vec[0] = arg1;
+ vec[1] = arg2;
+ vec[2] = arg3;
+ vec[3] = arg4;
+ vec[4] = arg5;
+ vec[5] = arg6;
+ vec[6] = arg7;
+ vec[7] = arg8;
+ vec[8] = arg9;
+ vec[9] = arg10;
+ vec[10] = arg11;
+ vec[11] = arg12;
+ vec[12] = arg13;
+ vec[13] = NULL;
+ return vec;
+}
/* Constructors -- IRDirty */
Modified: trunk/pub/libvex_ir.h
==============================================================================
--- trunk/pub/libvex_ir.h (original)
+++ trunk/pub/libvex_ir.h Sun Aug 7 17:42:37 2016
@@ -2197,7 +2197,12 @@
extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
IRExpr*, IRExpr*, IRExpr* );
extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
- IRExpr*, IRExpr*, IRExpr*, IRExpr*);
+ IRExpr*, IRExpr*, IRExpr*, IRExpr* );
+extern IRExpr** mkIRExprVec_9 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
+ IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* );
+extern IRExpr** mkIRExprVec_13 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
+ IRExpr*, IRExpr*, IRExpr*, IRExpr*,
+ IRExpr*, IRExpr*, IRExpr*, IRExpr*, IRExpr* );
/* IRExpr copiers:
- shallowCopy: shallow-copy (ie. create a new vector that shares the
|
|
From: <sv...@va...> - 2016-08-06 13:04:39
|
Author: sewardj
Date: Sat Aug 6 14:04:32 2016
New Revision: 3240
Log:
Fix UBSAN reported complaints about left shifts of signed values
in the arm32 front and back ends.
Modified:
trunk/priv/guest_arm_toIR.c
trunk/priv/host_arm_defs.c
Modified: trunk/priv/guest_arm_toIR.c
==============================================================================
--- trunk/priv/guest_arm_toIR.c (original)
+++ trunk/priv/guest_arm_toIR.c Sat Aug 6 14:04:32 2016
@@ -1598,8 +1598,9 @@
IRTemp regT, /* value to clamp - Ity_I32 */
UInt imm5 ) /* saturation ceiling */
{
- UInt ceil = (1 << imm5) - 1; // (2^imm5)-1
- UInt floor = 0;
+ ULong ceil64 = (1ULL << imm5) - 1; // (2^imm5)-1
+ UInt ceil = (UInt)ceil64;
+ UInt floor = 0;
IRTemp nd0 = newTemp(Ity_I32);
IRTemp nd1 = newTemp(Ity_I32);
@@ -1642,8 +1643,10 @@
IRTemp* res, /* OUT - Ity_I32 */
IRTemp* resQ ) /* OUT - Ity_I32 */
{
- Int ceil = (1 << (imm5-1)) - 1; // (2^(imm5-1))-1
- Int floor = -(1 << (imm5-1)); // -(2^(imm5-1))
+ Long ceil64 = (1LL << (imm5-1)) - 1; // (2^(imm5-1))-1
+ Long floor64 = -(1LL << (imm5-1)); // -(2^(imm5-1))
+ Int ceil = (Int)ceil64;
+ Int floor = (Int)floor64;
IRTemp nd0 = newTemp(Ity_I32);
IRTemp nd1 = newTemp(Ity_I32);
@@ -8874,8 +8877,8 @@
&& INSN(27,24) == BITS4(1,1,1,1)) {
// Thumb, DP
UInt reformatted = INSN(23,0);
- reformatted |= (INSN(28,28) << 24); // U bit
- reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
+ reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
+ reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
return dis_neon_data_processing(reformatted, condT);
}
@@ -8889,7 +8892,7 @@
}
if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
UInt reformatted = INSN(23,0);
- reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
+ reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
return dis_neon_load_or_store(reformatted, isT, condT);
}
@@ -14542,7 +14545,7 @@
IRExpr* rm = mkU32(Irrm_NEAREST);
IRTemp scale = newTemp(Ity_F64);
- assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
+ assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
&& size == 32) {
@@ -14719,8 +14722,9 @@
// and set CPSR.T = 1, that is, switch to Thumb mode
if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
UInt bitH = INSN(24,24);
- Int uimm24 = INSN(23,0);
- Int simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
+ UInt uimm24 = INSN(23,0); uimm24 <<= 8;
+ Int simm24 = (Int)uimm24; simm24 >>= 8;
+ simm24 = (((UInt)simm24) << 2) + (bitH << 1);
/* Now this is a bit tricky. Since we're decoding an ARM insn,
it is implies that CPSR.T == 0. Hence the current insn's
address is guaranteed to be of the form X--(30)--X00. So, no
@@ -15756,10 +15760,9 @@
//
if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
UInt link = (insn >> 24) & 1;
- UInt uimm24 = insn & ((1<<24)-1);
- Int simm24 = (Int)uimm24;
- UInt dst = guest_R15_curr_instr_notENC + 8
- + (((simm24 << 8) >> 8) << 2);
+ UInt uimm24 = insn & ((1<<24)-1); uimm24 <<= 8;
+ Int simm24 = (Int)uimm24; simm24 >>= 8;
+ UInt dst = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
if (link) {
putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
@@ -16539,7 +16542,7 @@
IRTemp src = newTemp(Ity_I32);
IRTemp olddst = newTemp(Ity_I32);
IRTemp newdst = newTemp(Ity_I32);
- UInt mask = 1 << (msb - lsb);
+ UInt mask = ((UInt)1) << (msb - lsb);
mask = (mask - 1) + mask;
vassert(mask != 0); // guaranteed by "msb < lsb" check above
mask <<= lsb;
@@ -19353,8 +19356,8 @@
case BITS5(1,1,1,0,0): {
/* ---------------- B #simm11 ---------------- */
- Int simm11 = INSN0(10,0);
- simm11 = (simm11 << 21) >> 20;
+ UInt uimm11 = INSN0(10,0); uimm11 <<= 21;
+ Int simm11 = (Int)uimm11; simm11 >>= 20;
UInt dst = simm11 + guest_R15_curr_instr_notENC + 4;
/* Only allowed outside or last-in IT block; SIGILL if not so. */
gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
@@ -19383,8 +19386,8 @@
case BITS4(1,1,0,1): {
/* ---------------- Bcond #simm8 ---------------- */
UInt cond = INSN0(11,8);
- Int simm8 = INSN0(7,0);
- simm8 = (simm8 << 24) >> 23;
+ UInt uimm8 = INSN0(7,0); uimm8 <<= 24;
+ Int simm8 = (Int)uimm8; simm8 >>= 23;
UInt dst = simm8 + guest_R15_curr_instr_notENC + 4;
if (cond != ARMCondAL && cond != ARMCondNV) {
/* Not allowed in an IT block; SIGILL if so. */
@@ -19472,13 +19475,15 @@
UInt bJ2 = INSN1(11,11);
UInt bI1 = 1 ^ (bJ1 ^ bS);
UInt bI2 = 1 ^ (bJ2 ^ bS);
- Int simm25
+ UInt uimm25
= (bS << (1 + 1 + 10 + 11 + 1))
| (bI1 << (1 + 10 + 11 + 1))
| (bI2 << (10 + 11 + 1))
| (INSN0(9,0) << (11 + 1))
| (INSN1(10,0) << 1);
- simm25 = (simm25 << 7) >> 7;
+ uimm25 <<= 7;
+ Int simm25 = (Int)uimm25;
+ simm25 >>= 7;
vassert(0 == (guest_R15_curr_instr_notENC & 1));
UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
@@ -20900,13 +20905,15 @@
&& INSN1(12,12) == 0) {
UInt cond = INSN0(9,6);
if (cond != ARMCondAL && cond != ARMCondNV) {
- Int simm21
+ UInt uimm21
= (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
| (INSN1(11,11) << (1 + 6 + 11 + 1))
| (INSN1(13,13) << (6 + 11 + 1))
| (INSN0(5,0) << (11 + 1))
| (INSN1(10,0) << 1);
- simm21 = (simm21 << 11) >> 11;
+ uimm21 <<= 11;
+ Int simm21 = (Int)uimm21;
+ simm21 >>= 11;
vassert(0 == (guest_R15_curr_instr_notENC & 1));
UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
@@ -20944,13 +20951,15 @@
UInt bJ2 = INSN1(11,11);
UInt bI1 = 1 ^ (bJ1 ^ bS);
UInt bI2 = 1 ^ (bJ2 ^ bS);
- Int simm25
+ UInt uimm25
= (bS << (1 + 1 + 10 + 11 + 1))
| (bI1 << (1 + 10 + 11 + 1))
| (bI2 << (10 + 11 + 1))
| (INSN0(9,0) << (11 + 1))
| (INSN1(10,0) << 1);
- simm25 = (simm25 << 7) >> 7;
+ uimm25 <<= 7;
+ Int simm25 = (Int)uimm25;
+ simm25 >>= 7;
vassert(0 == (guest_R15_curr_instr_notENC & 1));
UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
@@ -21392,7 +21401,7 @@
IRTemp src = newTemp(Ity_I32);
IRTemp olddst = newTemp(Ity_I32);
IRTemp newdst = newTemp(Ity_I32);
- UInt mask = 1 << (msb - lsb);
+ UInt mask = ((UInt)1) << (msb - lsb);
mask = (mask - 1) + mask;
vassert(mask != 0); // guaranteed by "msb < lsb" check above
mask <<= lsb;
Modified: trunk/priv/host_arm_defs.c
==============================================================================
--- trunk/priv/host_arm_defs.c (original)
+++ trunk/priv/host_arm_defs.c Sat Aug 6 14:04:32 2016
@@ -2776,33 +2776,38 @@
#define X1111 BITS4(1,1,1,1)
#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
- ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((((UInt)(zzx7)) & 0xF) << 28) | \
+ (((zzx6) & 0xF) << 24) | \
(((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
(((zzx3) & 0xF) << 12))
#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
- ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((((UInt)(zzx7)) & 0xF) << 28) | \
+ (((zzx6) & 0xF) << 24) | \
(((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
(((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
- ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((((UInt)(zzx7)) & 0xF) << 28) | \
+ (((zzx6) & 0xF) << 24) | \
(((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
(((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
- ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((((UInt)(zzx7)) & 0xF) << 28) | \
+ (((zzx6) & 0xF) << 24) | \
(((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
(((zzx0) & 0xF) << 0))
#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
- ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
+ (((((UInt)(zzx7)) & 0xF) << 28) | \
+ (((zzx6) & 0xF) << 24) | \
(((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
(((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
(((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
#define XX______(zzx7,zzx6) \
- ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
+ (((((UInt)(zzx7)) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
/* Generate a skeletal insn that involves an a RI84 shifter operand.
Returns a word which is all zeroes apart from bits 25 and 11..0,
@@ -4838,8 +4843,11 @@
/* And make the modifications. */
if (shortOK) {
- Int simm24 = (Int)(delta >> 2);
- vassert(simm24 == ((simm24 << 8) >> 8));
+ UInt uimm24 = (UInt)(delta >> 2);
+ UInt uimm24_shl8 = uimm24 << 8;
+ Int simm24 = (Int)uimm24_shl8;
+ simm24 >>= 8;
+ vassert(uimm24 == simm24);
p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
p[1] = 0xFF000000;
p[2] = 0xFF000000;
|
|
From: <sv...@va...> - 2016-08-06 12:34:06
|
Author: sewardj
Date: Sat Aug 6 13:33:59 2016
New Revision: 15932
Log:
Fix uses of CPSR in these tests, so that (1) the relevant fields are
initialised properly before the test, and (2) after the test, we don't
print implementation-defined parts of the resulting CPSR.
Modified:
trunk/none/tests/arm/v6intARM.c
trunk/none/tests/arm/v6intThumb.c
trunk/none/tests/arm/v6media.c
Modified: trunk/none/tests/arm/v6intARM.c
==============================================================================
--- trunk/none/tests/arm/v6intARM.c (original)
+++ trunk/none/tests/arm/v6intARM.c Sat Aug 6 13:33:59 2016
@@ -13,8 +13,8 @@
\
__asm__ volatile( \
"movs %3,%3;" \
- "msrne cpsr_f,#(1<<29);" \
- "msreq cpsr_f,#0;" \
+ "msrne cpsr_fs,#(1<<29);" \
+ "msreq cpsr_fs,#0;" \
"mov " #RM ",%2;" \
/* set #RD to 0x55555555 so we can see which parts get overwritten */ \
"mov " #RD ", #0x55" "\n\t" \
@@ -30,7 +30,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -45,8 +45,8 @@
\
__asm__ volatile( \
"movs %4,%4;" \
- "msrne cpsr_f,#(1<<29);" \
- "msreq cpsr_f,#0;" \
+ "msrne cpsr_fs,#(1<<29);" \
+ "msreq cpsr_fs,#0;" \
"mov " #RM ",%2;" \
"mov " #RN ",%3;" \
instruction ";" \
@@ -59,7 +59,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, rn 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, RNval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -74,8 +74,8 @@
\
__asm__ volatile( \
"movs %5,%5;" \
- "msrne cpsr_f,#(1<<29);" \
- "msreq cpsr_f,#0;" \
+ "msrne cpsr_fs,#(1<<29);" \
+ "msreq cpsr_fs,#0;" \
"mov " #RM ",%2;" \
"mov " #RN ",%3;" \
"mov " #RS ",%4;" \
@@ -89,7 +89,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, rn 0x%08x rs 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, RNval, RSval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -105,8 +105,8 @@
\
__asm__ volatile( \
"movs %7,%7;" \
- "msrne cpsr_f,#(1<<29);" \
- "msreq cpsr_f,#0;" \
+ "msrne cpsr_fs,#(1<<29);" \
+ "msreq cpsr_fs,#0;" \
"mov " #RD ",%3;" \
"mov " #RD2 ",%4;" \
"mov " #RM ",%5;" \
@@ -122,7 +122,7 @@
printf("%s :: rd 0x%08x rd2 0x%08x, rm 0x%08x rs 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, out2, RMval, RSval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
Modified: trunk/none/tests/arm/v6intThumb.c
==============================================================================
--- trunk/none/tests/arm/v6intThumb.c (original)
+++ trunk/none/tests/arm/v6intThumb.c Sat Aug 6 13:33:59 2016
@@ -22,7 +22,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %2;" \
+ "msr cpsr_fs, %2;" \
instruction ";" \
"mov %0," #RD ";" \
"mrs %1,cpsr;" \
@@ -33,7 +33,7 @@
printf("%s :: rd 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -50,7 +50,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %2;" \
+ "msr cpsr_fs, %2;" \
"mov " #RD ",%3;" \
instruction ";" \
"mov %0," #RD ";" \
@@ -62,7 +62,7 @@
printf("%s :: rd 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -77,7 +77,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %3;" \
+ "msr cpsr_fs, %3;" \
"mov " #RM ",%2;" \
/* set #RD to 0x55555555 so we can see which parts get overwritten */ \
"mov " #RD ", #0x55" "\n\t" \
@@ -93,7 +93,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -109,7 +109,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %3;" \
+ "msr cpsr_fs, %3;" \
"mov " #RM ",%2;" \
"mov " #RD ",%4;" \
instruction ";" \
@@ -122,7 +122,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -138,7 +138,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %4;" \
+ "msr cpsr_fs, %4;" \
"mov " #RM ",%2;" \
"mov " #RN ",%3;" \
instruction ";" \
@@ -151,7 +151,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, rn 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, RNval, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -165,7 +165,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %5;" \
+ "msr cpsr_fs, %5;" \
"mov " #RM ",%2;" \
"mov " #RN ",%3;" \
"mov " #RS ",%4;" \
@@ -179,7 +179,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, rn 0x%08x rs 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, RMval, RNval, RSval, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -194,7 +194,7 @@
unsigned int cpsr; \
\
__asm__ volatile( \
- "msr cpsr_f, %7;" \
+ "msr cpsr_fs, %7;" \
"mov " #RD ",%3;" \
"mov " #RD2 ",%4;" \
"mov " #RM ",%5;" \
@@ -210,7 +210,7 @@
printf("%s :: rd 0x%08x rd2 0x%08x, rm 0x%08x rs 0x%08x, c:v-in %d, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, out2, RMval, RSval, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -225,7 +225,7 @@
unsigned int cpsr; \
__asm__ volatile(\
".align 4;" \
- "msr cpsr_f, %2;" \
+ "msr cpsr_fs, %2;" \
"mov " #RD ", #0;" \
".align 2;" \
".thumb;" \
@@ -248,7 +248,7 @@
); \
printf("%s :: rd 0x%08x, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -264,7 +264,7 @@
unsigned int cpsr; \
__asm__ volatile(\
".align 4;" \
- "msr cpsr_f, %3;" \
+ "msr cpsr_fs, %3;" \
".align 2;" \
".thumb;" \
".syntax unified;" \
@@ -287,7 +287,7 @@
); \
printf("%s :: s0 0x%08x s1 0x%08x, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, out2, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -302,7 +302,7 @@
unsigned int cpsr; \
__asm__ volatile(\
".align 4;" \
- "msr cpsr_f, %3;" \
+ "msr cpsr_fs, %3;" \
"mov " #RD ", #0;" \
"mov " #RD2 ", #0;" \
".align 2;" \
@@ -327,7 +327,7 @@
); \
printf("%s :: rd 0x%08x rd2 0x%08x, cpsr 0x%08x %c%c%c%c\n", \
instruction, out, out2, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -358,7 +358,7 @@
__asm__ volatile(
".thumb;\n"
".syntax unified ;\n"
- "msr cpsr_f, %3 ;\n"
+ "msr cpsr_fs, %3 ;\n"
"mov r9, %2 ;\n"
"movw r2, #:lower16:.ldrwpclabel1 ;\n"
"movt r2, #:upper16:.ldrwpclabel1 ;\n"
@@ -386,7 +386,7 @@
"cpsr 0x%08x %c%c%c%c\n", \
out, \
cvin, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
Modified: trunk/none/tests/arm/v6media.c
==============================================================================
--- trunk/none/tests/arm/v6media.c (original)
+++ trunk/none/tests/arm/v6media.c Sat Aug 6 13:33:59 2016
@@ -37,7 +37,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c%c ge[3:0]=%d%d%d%d\n", \
instruction, out, RMval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -66,7 +66,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, rn 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c%c ge[3:0]=%d%d%d%d\n", \
instruction, out, RMval, RNval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -96,7 +96,7 @@
printf("%s :: rd 0x%08x rm 0x%08x, rn 0x%08x rs 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c%c ge[3:0]=%d%d%d%d\n", \
instruction, out, RMval, RNval, RSval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
@@ -129,7 +129,7 @@
printf("%s :: rd 0x%08x rd2 0x%08x, rm 0x%08x rs 0x%08x, carryin %d, cpsr 0x%08x %c%c%c%c%c ge[3:0]=%d%d%d%d\n", \
instruction, out, out2, RMval, RSval, \
carryin ? 1 : 0, \
- cpsr & 0xffff0000, \
+ cpsr & 0xff0f0000, \
((1<<31) & cpsr) ? 'N' : ' ', \
((1<<30) & cpsr) ? 'Z' : ' ', \
((1<<29) & cpsr) ? 'C' : ' ', \
|
|
From: <sv...@va...> - 2016-08-06 12:32:44
|
Author: sewardj
Date: Sat Aug 6 13:32:37 2016
New Revision: 15931
Log:
do_vldm_vstm_check: don't use caller saved FP ('s') registers to hold
data that we expect to stay alive over calls to printf. Fixes erratic
failures of this test.
Modified:
trunk/none/tests/arm/vfp.c
Modified: trunk/none/tests/arm/vfp.c
==============================================================================
--- trunk/none/tests/arm/vfp.c (original)
+++ trunk/none/tests/arm/vfp.c Sat Aug 6 13:32:37 2016
@@ -848,55 +848,55 @@
printf("do_vldm_vstm_check:\n");
__asm__ volatile(
"mov r1, %0\n\t"
- "vldmia r1!, {s0, s1, s2, s3}\n\t"
+ "vldmia r1!, {s16, s17, s18, s19}\n\t"
"mov r0, %1\n\t"
"sub r1, r1, %0\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s2\n\t"
+ "vmov r1, s18\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s3\n\t"
+ "vmov r1, s19\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s0\n\t"
+ "vmov r1, s16\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
/* --- */
"add r1, %0, #32\n\t"
- "vldmdb r1!, {s5, s6}\n\t"
+ "vldmdb r1!, {s25, s26}\n\t"
"mov r0, %1\n\t"
"sub r1, r1, %0\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s5\n\t"
+ "vmov r1, s25\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s6\n\t"
+ "vmov r1, s26\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
/* --- */
"add r1, %0, #4\n\t"
- "vldmia r1, {s0, s1, s2, s3}\n\t"
+ "vldmia r1, {s20, s21, s22, s23}\n\t"
"mov r0, %1\n\t"
"sub r1, r1, %0\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s2\n\t"
+ "vmov r1, s22\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s3\n\t"
+ "vmov r1, s23\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
"mov r0, %1\n\t"
- "vmov r1, s0\n\t"
+ "vmov r1, s20\n\t"
"mov r3, r1\n\t"
"bl printf\n\t"
/* --- */
@@ -970,15 +970,15 @@
"bl printf\n\t"
/* --- */
"mov r0, #0x55\n\t"
- "vmov s0, r0\n\t"
+ "vmov s20, r0\n\t"
"mov r0, #0x56\n\t"
- "vmov s1, r0\n\t"
+ "vmov s21, r0\n\t"
"mov r0, #0x57\n\t"
- "vmov s2, r0\n\t"
+ "vmov s22, r0\n\t"
"mov r0, #0x58\n\t"
- "vmov s3, r0\n\t"
+ "vmov s23, r0\n\t"
"add r1, %0, #0\n\t"
- "vstmia r1!, {s0, s1, s2, s3}\n\t"
+ "vstmia r1!, {s20, s21, s22, s23}\n\t"
"mov r0, %1\n\t"
"sub r1, r1, %0\n\t"
"mov r3, r1\n\t"
@@ -1044,7 +1044,8 @@
:
: "r" (data), "r" (format), "r"(&res)
: "r0", "r1", "r2", "r3", "r5", "r12", "r14", "memory",
- "s0", "s1", "s2", "s3", "s5", "s6", "s16", "s17",
+ "s0", "s1", "s2", "s3", "s5", "s6", "s16", "s17", "s18", "s19",
+ "s20", "s21", "s22", "s23", "s25", "s26",
"d10", "d30", "d31"
);
printf("data:\n");
|
|
From: <sv...@va...> - 2016-08-06 07:15:38
|
Author: sewardj
Date: Sat Aug 6 08:15:30 2016
New Revision: 15930
Log:
Fix invalid code caught by Ubsan, in which we compute the address
of "cgs->events[-1]", even though it isn't dereferenced.
Modified:
trunk/cachegrind/cg_main.c
trunk/callgrind/main.c
Modified: trunk/cachegrind/cg_main.c
==============================================================================
--- trunk/cachegrind/cg_main.c (original)
+++ trunk/cachegrind/cg_main.c Sat Aug 6 08:15:30 2016
@@ -914,7 +914,6 @@
static
void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
{
- Event* lastEvt;
Event* evt;
tl_assert(isIRAtom(ea));
@@ -924,15 +923,16 @@
return;
/* Is it possible to merge this write with the preceding read? */
- lastEvt = &cgs->events[cgs->events_used-1];
- if (cgs->events_used > 0
- && lastEvt->tag == Ev_Dr
- && lastEvt->Ev.Dr.szB == datasize
- && lastEvt->inode == inode
- && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
- {
- lastEvt->tag = Ev_Dm;
- return;
+ if (cgs->events_used > 0) {
+ Event* lastEvt = &cgs->events[cgs->events_used-1];
+ if ( lastEvt->tag == Ev_Dr
+ && lastEvt->Ev.Dr.szB == datasize
+ && lastEvt->inode == inode
+ && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
+ {
+ lastEvt->tag = Ev_Dm;
+ return;
+ }
}
/* No. Add as normal. */
Modified: trunk/callgrind/main.c
==============================================================================
--- trunk/callgrind/main.c (original)
+++ trunk/callgrind/main.c Sat Aug 6 08:15:30 2016
@@ -637,7 +637,6 @@
static
void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
{
- Event* lastEvt;
Event* evt;
tl_assert(isIRAtom(ea));
tl_assert(datasize >= 1);
@@ -645,15 +644,16 @@
tl_assert(datasize <= CLG_(min_line_size));
/* Is it possible to merge this write with the preceding read? */
- lastEvt = &clgs->events[clgs->events_used-1];
- if (clgs->events_used > 0
- && lastEvt->tag == Ev_Dr
- && lastEvt->Ev.Dr.szB == datasize
- && lastEvt->inode == inode
- && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
- {
- lastEvt->tag = Ev_Dm;
- return;
+ if (clgs->events_used > 0) {
+ Event* lastEvt = &clgs->events[clgs->events_used-1];
+ if ( lastEvt->tag == Ev_Dr
+ && lastEvt->Ev.Dr.szB == datasize
+ && lastEvt->inode == inode
+ && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
+ {
+ lastEvt->tag = Ev_Dm;
+ return;
+ }
}
/* No. Add as normal. */
|
|
From: <sv...@va...> - 2016-08-05 17:22:31
|
Author: sewardj
Date: Fri Aug 5 18:22:21 2016
New Revision: 15929
Log:
Fix completely bogus array indexing introduced in r15927 -- how did this
ever work? Spotted by UBSAN. Yay UBSAN! Also update comments.
Modified:
trunk/memcheck/mc_translate.c
Modified: trunk/memcheck/mc_translate.c
==============================================================================
--- trunk/memcheck/mc_translate.c (original)
+++ trunk/memcheck/mc_translate.c Fri Aug 5 18:22:21 2016
@@ -6612,35 +6612,35 @@
bz2-32
- 1 111,840 -> 1,702,810; ratio 15.2
- 2 111,840 -> 1,656,644; ratio 14.8
- 3 111,840 -> 1,650,457; ratio 14.7
- 4 111,840 -> 1,649,103; ratio 14.7
- 5 111,840 -> 1,648,655; ratio 14.7
- 6 111,840 -> 1,648,435; ratio 14.7
- 7 111,840 -> 1,648,304; ratio 14.7
- 8 111,840 -> 1,648,304; ratio 14.7
- 10 111,840 -> 1,648,171; ratio 14.7
- 12 111,840 -> 1,648,109 ratio 14.7
- 16 111,840 -> 1,647,947; ratio 14.7
- 32 111,840 -> 1,647,881; ratio 14.7
- inf 111,840 -> 1,647,881; ratio 14.7
+ 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
+ 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
+ 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
+ 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
+ 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
+ 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
+ 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
+ 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
+ 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
+ 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
+ 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
+ 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
+ inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
bz2-64
- 1 106,628 -> 1,811,992; ratio 17.0
- 2 106,628 -> 1,797,805; ratio 16.9
- 3 106,628 -> 1,792,429; ratio 16.8
- 4 106,628 -> 1,791,037; ratio 16.8
- 5 106,628 -> 1,790,929; ratio 16.8
- 6 106,628 -> 1,790,810; ratio 16.8
- 7 106,628 -> 1,790,764; ratio 16.8
- 8 106,628 -> 1,790,764; ratio 16.8
- 10 106,628 -> 1,790,764; ratio 16.8
- 12 106,628 -> 1,790,764; ratio 16.8
- 16 106,628 -> 1,790,701; ratio 16.8
- 32 106,628 -> 1,790,671; ratio 16.8
- inf 106,628 -> 1,790,671; ratio 16.8
+ 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
+ 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
+ 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
+ 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
+ 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
+ 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
+ 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
+ 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
+ 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
+ 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
+ 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
+ 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
+ inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
*/
/* Structs for recording which (helper, guard) pairs we have already
@@ -6734,7 +6734,7 @@
tl_assert(i == n);
if (n == N_TIDYING_PAIRS) {
for (i = 1; i < N_TIDYING_PAIRS; i++) {
- tidyingEnv[n-1] = tidyingEnv[n];
+ tidyingEnv->pairs[i-1] = tidyingEnv->pairs[i];
}
tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry;
tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard;
|
|
From: <sv...@va...> - 2016-08-05 15:15:27
|
Author: sewardj
Date: Fri Aug 5 16:15:20 2016
New Revision: 15928
Log:
Update.
Modified:
trunk/NEWS
trunk/docs/internals/3_11_BUGSTATUS.txt
Modified: trunk/NEWS
==============================================================================
--- trunk/NEWS (original)
+++ trunk/NEWS Fri Aug 5 16:15:20 2016
@@ -40,6 +40,10 @@
- zlib ELF gABI format with SHF_COMPRESSED flag (gcc option -gz=zlib)
- zlib GNU format with .zdebug sections (gcc option -gz=zlib-gnu)
+* Modest JIT-cost improvements: the cost of instrumenting code blocks
+ for the most common use case (x86_64-linux, Memcheck) has been
+ reduced by 10%-15%.
+
* ==================== FIXED BUGS ====================
The following bugs have been fixed or resolved. Note that "n-i-bz"
@@ -116,6 +120,7 @@
360425 arm64 unsupported instruction ldpsw
== 364435
360519 none/tests/arm64/memory.vgtest might fail with newer gcc
+360574 Wrong parameter type for an ashmem ioctl() call on Android and ARM64
360749 kludge for multiple .rodata sections on Solaris no longer needed
360752 raise the number of reserved fds in m_main.c from 10 to 12
361207 Valgrind does not support the IBM POWER ISA 3.0 instructions, part 2
@@ -136,6 +141,7 @@
get_otrack_shadow_offset_wrk()
365273 Invalid write to stack location reported after signal handler runs
365912 ppc64BE segfault during jm-insns test (RELRO)
+366344 Multiple unhandled instruction for Aarch64
n-i-bz Fix incorrect (or infinite loop) unwind on RHEL7 x86 and amd64
n-i-bz massif --pages-as-heap=yes does not report peak caused by mmap+munmap
Modified: trunk/docs/internals/3_11_BUGSTATUS.txt
==============================================================================
--- trunk/docs/internals/3_11_BUGSTATUS.txt (original)
+++ trunk/docs/internals/3_11_BUGSTATUS.txt Fri Aug 5 16:15:20 2016
@@ -90,7 +90,6 @@
359705 memcheck causes segfault on a dynamically-linked test from
rustlang's test suite on i686
360429 Warning: noted but unhandled ioctl 0x530d with no size/direction hints.
-360574 Wrong parameter type for an ashmem ioctl() call on Android and ARM64
361615 Inconsistent termination when an instrumented multithreaded process
is terminated by signal
361726 WARNING:unhandled syscall on ppc64
|
|
From: <sv...@va...> - 2016-08-05 15:02:55
|
Author: sewardj
Date: Fri Aug 5 16:02:48 2016
New Revision: 3239
Log:
Reduce the number of IR sanity checks from 4 per block to 2 per block.
Also relax assertion checking in the register allocator.
Together with valgrind r15927 this reduces per-block JITting cost by 10%-15%.
Modified:
trunk/priv/host_generic_reg_alloc2.c
trunk/priv/main_main.c
Modified: trunk/priv/host_generic_reg_alloc2.c
==============================================================================
--- trunk/priv/host_generic_reg_alloc2.c (original)
+++ trunk/priv/host_generic_reg_alloc2.c Fri Aug 5 16:02:48 2016
@@ -993,13 +993,13 @@
/* ------------ Sanity checks ------------ */
/* Sanity checks are expensive. So they are done only once
- every 13 instructions, and just before the last
+ every 17 instructions, and just before the last
instruction. */
do_sanity_check
= toBool(
False /* Set to True for sanity checking of all insns. */
|| ii == instrs_in->arr_used-1
- || (ii > 0 && (ii % 13) == 0)
+ || (ii > 0 && (ii % 17) == 0)
);
if (do_sanity_check) {
Modified: trunk/priv/main_main.c
==============================================================================
--- trunk/priv/main_main.c (original)
+++ trunk/priv/main_main.c Fri Aug 5 16:02:48 2016
@@ -916,8 +916,13 @@
irsb = do_iropt_BB ( irsb, specHelper, preciseMemExnsFn, pxControl,
vta->guest_bytes_addr,
vta->arch_guest );
- sanityCheckIRSB( irsb, "after initial iropt",
- True/*must be flat*/, guest_word_type );
+
+ // JRS 2016 Aug 03: Sanity checking is expensive, we already checked
+ // the output of the front end, and iropt never screws up the IR by
+ // itself, unless it is being hacked on. So remove this post-iropt
+ // check in "production" use.
+ // sanityCheckIRSB( irsb, "after initial iropt",
+ // True/*must be flat*/, guest_word_type );
if (vex_traceflags & VEX_TRACE_OPT1) {
vex_printf("\n------------------------"
@@ -953,9 +958,12 @@
vex_printf("\n");
}
- if (vta->instrument1 || vta->instrument2)
- sanityCheckIRSB( irsb, "after instrumentation",
- True/*must be flat*/, guest_word_type );
+ // JRS 2016 Aug 03: as above, this never actually fails in practice.
+ // And we'll sanity check anyway after the post-instrumentation
+ // cleanup pass. So skip this check in "production" use.
+ // if (vta->instrument1 || vta->instrument2)
+ // sanityCheckIRSB( irsb, "after instrumentation",
+ // True/*must be flat*/, guest_word_type );
/* Do a post-instrumentation cleanup pass. */
if (vta->instrument1 || vta->instrument2) {
|
|
From: <sv...@va...> - 2016-08-05 14:59:58
|
Author: sewardj
Date: Fri Aug 5 15:59:50 2016
New Revision: 15927
Log:
Reimplement MC_(final_tidy) much more efficiently. This reduces its instruction
count by a factor of about 4.
Modified:
trunk/memcheck/mc_include.h
trunk/memcheck/mc_main.c
trunk/memcheck/mc_translate.c
Modified: trunk/memcheck/mc_include.h
==============================================================================
--- trunk/memcheck/mc_include.h (original)
+++ trunk/memcheck/mc_include.h Fri Aug 5 15:59:50 2016
@@ -788,6 +788,9 @@
IRSB* MC_(final_tidy) ( IRSB* );
+/* Check some assertions to do with the instrumentation machinery. */
+void MC_(do_instrumentation_startup_checks)( void );
+
#endif /* ndef __MC_INCLUDE_H */
/*--------------------------------------------------------------------*/
Modified: trunk/memcheck/mc_main.c
==============================================================================
--- trunk/memcheck/mc_main.c (original)
+++ trunk/memcheck/mc_main.c Fri Aug 5 15:59:50 2016
@@ -8142,6 +8142,9 @@
tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
# endif
+
+ /* Check some assertions to do with the instrumentation machinery. */
+ MC_(do_instrumentation_startup_checks)();
}
STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
Modified: trunk/memcheck/mc_translate.c
==============================================================================
--- trunk/memcheck/mc_translate.c (original)
+++ trunk/memcheck/mc_translate.c Fri Aug 5 15:59:50 2016
@@ -6584,6 +6584,7 @@
return sb_out;
}
+
/*------------------------------------------------------------*/
/*--- Post-tree-build final tidying ---*/
/*------------------------------------------------------------*/
@@ -6602,17 +6603,69 @@
reference, which is kinda pointless. MC_(final_tidy) therefore
looks for such repeated calls and removes all but the first. */
-/* A struct for recording which (helper, guard) pairs we have already
+
+/* With some testing on perf/bz2.c, on amd64 and x86, compiled with
+ gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
+ get almost all the benefits of this transformation whilst causing
+ the slide-back case to just often enough to be verifiably
+ correct. For posterity, the numbers are:
+
+ bz2-32
+
+ 1 111,840 -> 1,702,810; ratio 15.2
+ 2 111,840 -> 1,656,644; ratio 14.8
+ 3 111,840 -> 1,650,457; ratio 14.7
+ 4 111,840 -> 1,649,103; ratio 14.7
+ 5 111,840 -> 1,648,655; ratio 14.7
+ 6 111,840 -> 1,648,435; ratio 14.7
+ 7 111,840 -> 1,648,304; ratio 14.7
+ 8 111,840 -> 1,648,304; ratio 14.7
+ 10 111,840 -> 1,648,171; ratio 14.7
+ 12 111,840 -> 1,648,109 ratio 14.7
+ 16 111,840 -> 1,647,947; ratio 14.7
+ 32 111,840 -> 1,647,881; ratio 14.7
+ inf 111,840 -> 1,647,881; ratio 14.7
+
+ bz2-64
+
+ 1 106,628 -> 1,811,992; ratio 17.0
+ 2 106,628 -> 1,797,805; ratio 16.9
+ 3 106,628 -> 1,792,429; ratio 16.8
+ 4 106,628 -> 1,791,037; ratio 16.8
+ 5 106,628 -> 1,790,929; ratio 16.8
+ 6 106,628 -> 1,790,810; ratio 16.8
+ 7 106,628 -> 1,790,764; ratio 16.8
+ 8 106,628 -> 1,790,764; ratio 16.8
+ 10 106,628 -> 1,790,764; ratio 16.8
+ 12 106,628 -> 1,790,764; ratio 16.8
+ 16 106,628 -> 1,790,701; ratio 16.8
+ 32 106,628 -> 1,790,671; ratio 16.8
+ inf 106,628 -> 1,790,671; ratio 16.8
+*/
+
+/* Structs for recording which (helper, guard) pairs we have already
seen. */
+
+#define N_TIDYING_PAIRS 16
+
typedef
struct { void* entry; IRExpr* guard; }
Pair;
+typedef
+ struct {
+ Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/];
+ UInt pairsUsed;
+ }
+ Pairs;
+
+
/* Return True if e1 and e2 definitely denote the same value (used to
compare guards). Return False if unknown; False is the safe
answer. Since guest registers and guest memory do not have the
SSA property we must return False if any Gets or Loads appear in
- the expression. */
+ the expression. This implicitly assumes that e1 and e2 have the
+ same IR type, which is always true here -- the type is Ity_I1. */
static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
{
@@ -6661,45 +6714,98 @@
True if so. If not, add an entry. */
static
-Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
+Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry )
{
- Pair p;
- Pair* pp;
- Int i, n = VG_(sizeXA)( pairs );
+ UInt i, n = tidyingEnv->pairsUsed;
+ tl_assert(n <= N_TIDYING_PAIRS);
for (i = 0; i < n; i++) {
- pp = VG_(indexXA)( pairs, i );
- if (pp->entry == entry && sameIRValue(pp->guard, guard))
+ if (tidyingEnv->pairs[i].entry == entry
+ && sameIRValue(tidyingEnv->pairs[i].guard, guard))
return True;
}
- p.guard = guard;
- p.entry = entry;
- VG_(addToXA)( pairs, &p );
+ /* (guard, entry) wasn't found in the array. Add it at the end.
+ If the array is already full, slide the entries one slot
+ backwards. This means we will lose to ability to detect
+ duplicates from the pair in slot zero, but that happens so
+ rarely that it's unlikely to have much effect on overall code
+ quality. Also, this strategy loses the check for the oldest
+ tracked exit (memory reference, basically) and so that is (I'd
+ guess) least likely to be re-used after this point. */
+ tl_assert(i == n);
+ if (n == N_TIDYING_PAIRS) {
+ for (i = 1; i < N_TIDYING_PAIRS; i++) {
+ tidyingEnv[n-1] = tidyingEnv[n];
+ }
+ tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry;
+ tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard;
+ } else {
+ tl_assert(n < N_TIDYING_PAIRS);
+ tidyingEnv->pairs[n].entry = entry;
+ tidyingEnv->pairs[n].guard = guard;
+ n++;
+ tidyingEnv->pairsUsed = n;
+ }
return False;
}
static Bool is_helperc_value_checkN_fail ( const HChar* name )
{
- return
- 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
- || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
+ /* This is expensive because it happens a lot. We are checking to
+ see whether |name| is one of the following 8 strings:
+
+ MC_(helperc_value_check8_fail_no_o)
+ MC_(helperc_value_check4_fail_no_o)
+ MC_(helperc_value_check0_fail_no_o)
+ MC_(helperc_value_check1_fail_no_o)
+ MC_(helperc_value_check8_fail_w_o)
+ MC_(helperc_value_check0_fail_w_o)
+ MC_(helperc_value_check1_fail_w_o)
+ MC_(helperc_value_check4_fail_w_o)
+
+ To speed it up, check the common prefix just once, rather than
+ all 8 times.
+ */
+ const HChar* prefix = "MC_(helperc_value_check";
+
+ HChar n, p;
+ while (True) {
+ n = *name;
+ p = *prefix;
+ if (p == 0) break; /* ran off the end of the prefix */
+ /* We still have some prefix to use */
+ if (n == 0) return False; /* have prefix, but name ran out */
+ if (n != p) return False; /* have both pfx and name, but no match */
+ name++;
+ prefix++;
+ }
+
+ /* Check the part after the prefix. */
+ tl_assert(*prefix == 0 && *name != 0);
+ return 0==VG_(strcmp)(name, "8_fail_no_o)")
+ || 0==VG_(strcmp)(name, "4_fail_no_o)")
+ || 0==VG_(strcmp)(name, "0_fail_no_o)")
+ || 0==VG_(strcmp)(name, "1_fail_no_o)")
+ || 0==VG_(strcmp)(name, "8_fail_w_o)")
+ || 0==VG_(strcmp)(name, "4_fail_w_o)")
+ || 0==VG_(strcmp)(name, "0_fail_w_o)")
+ || 0==VG_(strcmp)(name, "1_fail_w_o)");
}
IRSB* MC_(final_tidy) ( IRSB* sb_in )
{
- Int i;
+ Int i;
IRStmt* st;
IRDirty* di;
IRExpr* guard;
IRCallee* cee;
Bool alreadyPresent;
- XArray* pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
- VG_(free), sizeof(Pair) );
+ Pairs pairs;
+
+ pairs.pairsUsed = 0;
+
+ pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123;
+ pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456;
+
/* Scan forwards through the statements. Each time a call to one
of the relevant helpers is seen, check if we have made a
previous call to the same helper using the same guard
@@ -6720,16 +6826,21 @@
guard 'guard'. Check if we have already seen a call to this
function with the same guard. If so, delete it. If not,
add it to the set of calls we do know about. */
- alreadyPresent = check_or_add( pairs, guard, cee->addr );
+ alreadyPresent = check_or_add( &pairs, guard, cee->addr );
if (alreadyPresent) {
sb_in->stmts[i] = IRStmt_NoOp();
if (0) VG_(printf)("XX\n");
}
}
- VG_(deleteXA)( pairs );
+
+ tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123);
+ tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456);
+
return sb_in;
}
+#undef N_TIDYING_PAIRS
+
/*------------------------------------------------------------*/
/*--- Origin tracking stuff ---*/
@@ -7485,6 +7596,62 @@
}
+/*------------------------------------------------------------*/
+/*--- Startup assertion checking ---*/
+/*------------------------------------------------------------*/
+
+void MC_(do_instrumentation_startup_checks)( void )
+{
+ /* Make a best-effort check to see that is_helperc_value_checkN_fail
+ is working as we expect. */
+
+# define CHECK(_expected, _string) \
+ tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
+
+ /* It should identify these 8, and no others, as targets. */
+ CHECK(True, "MC_(helperc_value_check8_fail_no_o)");
+ CHECK(True, "MC_(helperc_value_check4_fail_no_o)");
+ CHECK(True, "MC_(helperc_value_check0_fail_no_o)");
+ CHECK(True, "MC_(helperc_value_check1_fail_no_o)");
+ CHECK(True, "MC_(helperc_value_check8_fail_w_o)");
+ CHECK(True, "MC_(helperc_value_check0_fail_w_o)");
+ CHECK(True, "MC_(helperc_value_check1_fail_w_o)");
+ CHECK(True, "MC_(helperc_value_check4_fail_w_o)");
+
+ /* Ad-hoc selection of other strings gathered via a quick test. */
+ CHECK(False, "amd64g_dirtyhelper_CPUID_avx2");
+ CHECK(False, "amd64g_dirtyhelper_RDTSC");
+ CHECK(False, "MC_(helperc_b_load1)");
+ CHECK(False, "MC_(helperc_b_load2)");
+ CHECK(False, "MC_(helperc_b_load4)");
+ CHECK(False, "MC_(helperc_b_load8)");
+ CHECK(False, "MC_(helperc_b_load16)");
+ CHECK(False, "MC_(helperc_b_load32)");
+ CHECK(False, "MC_(helperc_b_store1)");
+ CHECK(False, "MC_(helperc_b_store2)");
+ CHECK(False, "MC_(helperc_b_store4)");
+ CHECK(False, "MC_(helperc_b_store8)");
+ CHECK(False, "MC_(helperc_b_store16)");
+ CHECK(False, "MC_(helperc_b_store32)");
+ CHECK(False, "MC_(helperc_LOADV8)");
+ CHECK(False, "MC_(helperc_LOADV16le)");
+ CHECK(False, "MC_(helperc_LOADV32le)");
+ CHECK(False, "MC_(helperc_LOADV64le)");
+ CHECK(False, "MC_(helperc_LOADV128le)");
+ CHECK(False, "MC_(helperc_LOADV256le)");
+ CHECK(False, "MC_(helperc_STOREV16le)");
+ CHECK(False, "MC_(helperc_STOREV32le)");
+ CHECK(False, "MC_(helperc_STOREV64le)");
+ CHECK(False, "MC_(helperc_STOREV8)");
+ CHECK(False, "track_die_mem_stack_8");
+ CHECK(False, "track_new_mem_stack_8_w_ECU");
+ CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
+ CHECK(False, "VG_(unknown_SP_update_w_ECU)");
+
+# undef CHECK
+}
+
+
/*--------------------------------------------------------------------*/
/*--- end mc_translate.c ---*/
/*--------------------------------------------------------------------*/
|
|
From: <sv...@va...> - 2016-08-05 14:54:35
|
Author: sewardj
Date: Fri Aug 5 15:54:27 2016
New Revision: 15926
Log:
Reimplement pszB_to_listNo using a binary search rather than a linear search.
Unlikely as it seems, this saves a considerable number of instructions (2% of total)
on very heap-intensive code (perf/heap.c).
Modified:
trunk/coregrind/m_mallocfree.c
Modified: trunk/coregrind/m_mallocfree.c
==============================================================================
--- trunk/coregrind/m_mallocfree.c (original)
+++ trunk/coregrind/m_mallocfree.c Fri Aug 5 15:54:27 2016
@@ -1011,66 +1011,125 @@
// payload size, not block size.
// Convert a payload size in bytes to a freelist number.
-static
+static __attribute__((noinline))
+UInt pszB_to_listNo_SLOW ( SizeT pszB__divided_by__VG_MIN_MALLOC_SZB )
+{
+ SizeT n = pszB__divided_by__VG_MIN_MALLOC_SZB;
+
+ if (n < 299) {
+ if (n < 114) {
+ if (n < 85) {
+ if (n < 74) {
+ /* -- Exponential slope up, factor 1.05 -- */
+ if (n < 67) return 64;
+ if (n < 70) return 65;
+ /* else */ return 66;
+ } else {
+ if (n < 77) return 67;
+ if (n < 81) return 68;
+ /* else */ return 69;
+ }
+ } else {
+ if (n < 99) {
+ if (n < 90) return 70;
+ if (n < 94) return 71;
+ /* else */ return 72;
+ } else {
+ if (n < 104) return 73;
+ if (n < 109) return 74;
+ /* else */ return 75;
+ }
+ }
+ } else {
+ if (n < 169) {
+ if (n < 133) {
+ if (n < 120) return 76;
+ if (n < 126) return 77;
+ /* else */ return 78;
+ } else {
+ if (n < 139) return 79;
+ /* -- Exponential slope up, factor 1.10 -- */
+ if (n < 153) return 80;
+ /* else */ return 81;
+ }
+ } else {
+ if (n < 224) {
+ if (n < 185) return 82;
+ if (n < 204) return 83;
+ /* else */ return 84;
+ } else {
+ if (n < 247) return 85;
+ if (n < 272) return 86;
+ /* else */ return 87;
+ }
+ }
+ }
+ } else {
+ if (n < 1331) {
+ if (n < 530) {
+ if (n < 398) {
+ if (n < 329) return 88;
+ if (n < 362) return 89;
+ /* else */ return 90;
+ } else {
+ if (n < 438) return 91;
+ if (n < 482) return 92;
+ /* else */ return 93;
+ }
+ } else {
+ if (n < 770) {
+ if (n < 583) return 94;
+ if (n < 641) return 95;
+ /* -- Exponential slope up, factor 1.20 -- */
+ /* else */ return 96;
+ } else {
+ if (n < 924) return 97;
+ if (n < 1109) return 98;
+ /* else */ return 99;
+ }
+ }
+ } else {
+ if (n < 3974) {
+ if (n < 2300) {
+ if (n < 1597) return 100;
+ if (n < 1916) return 101;
+ return 102;
+ } else {
+ if (n < 2760) return 103;
+ if (n < 3312) return 104;
+ /* else */ return 105;
+ }
+ } else {
+ if (n < 6868) {
+ if (n < 4769) return 106;
+ if (n < 5723) return 107;
+ /* else */ return 108;
+ } else {
+ if (n < 8241) return 109;
+ if (n < 9890) return 110;
+ /* else */ return 111;
+ }
+ }
+ }
+ }
+ /*NOTREACHED*/
+ vg_assert(0);
+}
+
+static inline
UInt pszB_to_listNo ( SizeT pszB )
{
SizeT n = pszB / VG_MIN_MALLOC_SZB;
- vg_assert(0 == pszB % VG_MIN_MALLOC_SZB);
+ vg_assert(0 == (pszB % VG_MIN_MALLOC_SZB));
// The first 64 lists hold blocks of size VG_MIN_MALLOC_SZB * list_num.
- // The final 48 hold bigger blocks.
- if (n < 64) return (UInt)n;
- /* Exponential slope up, factor 1.05 */
- if (n < 67) return 64;
- if (n < 70) return 65;
- if (n < 74) return 66;
- if (n < 77) return 67;
- if (n < 81) return 68;
- if (n < 85) return 69;
- if (n < 90) return 70;
- if (n < 94) return 71;
- if (n < 99) return 72;
- if (n < 104) return 73;
- if (n < 109) return 74;
- if (n < 114) return 75;
- if (n < 120) return 76;
- if (n < 126) return 77;
- if (n < 133) return 78;
- if (n < 139) return 79;
- /* Exponential slope up, factor 1.10 */
- if (n < 153) return 80;
- if (n < 169) return 81;
- if (n < 185) return 82;
- if (n < 204) return 83;
- if (n < 224) return 84;
- if (n < 247) return 85;
- if (n < 272) return 86;
- if (n < 299) return 87;
- if (n < 329) return 88;
- if (n < 362) return 89;
- if (n < 398) return 90;
- if (n < 438) return 91;
- if (n < 482) return 92;
- if (n < 530) return 93;
- if (n < 583) return 94;
- if (n < 641) return 95;
- /* Exponential slope up, factor 1.20 */
- if (n < 770) return 96;
- if (n < 924) return 97;
- if (n < 1109) return 98;
- if (n < 1331) return 99;
- if (n < 1597) return 100;
- if (n < 1916) return 101;
- if (n < 2300) return 102;
- if (n < 2760) return 103;
- if (n < 3312) return 104;
- if (n < 3974) return 105;
- if (n < 4769) return 106;
- if (n < 5723) return 107;
- if (n < 6868) return 108;
- if (n < 8241) return 109;
- if (n < 9890) return 110;
- return 111;
+ // The final 48 hold bigger blocks and are dealt with by the _SLOW
+ // case.
+ if (LIKELY(n < 64)) {
+ return (UInt)n;
+ } else {
+ return pszB_to_listNo_SLOW(n);
+ }
}
// What is the minimum payload size for a given list?
|
|
From: Ruurd B. <Ruu...@in...> - 2016-08-05 14:39:31
|
Hi,
I am a software developer at Infor, where we maintain a complex application (30+ years old, many millions of lines), most of it written in C/C++.
I have used valgrind with memcheck to find and fix memory related issues and have become a great fan of the product.
However, we use a custom allocator that caused me considerable problems because it has memory pool features not supported by the "loose model" of valgrind.
1. Specifically, it allows me to create a memory pool, allocate many items from that pool and then destroy the pool.
The applications know that all pool items are automatically freed when the pool is destroyed, so it saves time and code by not doing so explicitly.
Valgrind reports all items in such a pool as memory leaks, because that is the model it assumes.
I understand that this is a design choice: Either such application memory pools are considered "auto-freed" or not, and when not, they are considered leaks.
2. Another problem is that our allocator uses itself to allocate large chunks for the memory pools.
Those chunks are used to dole out smaller pieces for the applications.
Valgrind sees that as an error: Overlapping memory blocks because both types of blocks (memory pool and allocations from the pool) are marked as VALGRIND_MALLOCLIKE_BLOCK.
That triggers an error:
Block 0x%lx..0x%lx overlaps with block 0x%lx..0x%lx, this is usually caused by using VALGRIND_MALLOCLIKE_BLOCK in an inappropriate way
plus an assert in memcheck.
3. Our (admittedly ancient) allocator uses sbrk() to get the memory (and not mmap).
Valgrind (on linux) limits this to 8MB. That is not enough for our applications. The 8MB is hardcoded in valgrind.
4. We use Oracle as a database, which executes as setuid-to-oracle on Linux (we have our own database wrapper software layers for Oracle, DB2, MySql, Microsoft SQL and so on).
To be able to valgrind such executables, I've created a setuid-oracle copy of Valgrind.
That works, but the reports valgrind creates are owned by Oracle in such cases and our test framework got "Permission denied" when it wanted to analyze and modify the valgrind reports.
So I have modified valgrind to support our model, address the problems, and tried not to break anything in the process:
1. Added a VALGRIND_CREATE_META_MEMPOOL macro in valgrind.h, modelled after VALGRIND_CREATE_MEMPOOL.
It takes a flag parameter, with 2 (or-able) options: MEMPOOL_AUTO_FREE and MEMPOOL_METABLOCKS.
When AUTO_FREE is set when the pool is created, valgrind will free all allocations in a memory pool block when MEMPOOL_FREE is used on a block.
For a non-auto-free pool, everything is as before. This prevents the false memory-leak reports.
2. When METABLOCKS is used, it will not complain about overlapping blocks as long as the overlap is with a memory-pool chunk from a METABLOCKS pool.
Also, when reporting the location of a problem, the "describe_addr" function favored custom memory pool blocks (our 64 KB chunks for the pool) over all else.
That caused almost all reports to say "Address XXX is many bytes in a block of 64K alloc'd", and the alloc location-stack would be the place where the pool was extended.
Not very useful.
So I've modified the describe_addr function to take the "meta-blocks" into account and report the underlying smaller allocation.
When no such meta-blocks exists, everything is as before.
3. For the sbrk problem, I've added a new command line option, --main-sbrksize, patterned after -main-stacksize. The default is the old (hard-coded) 8MB.
In the initimg modules for Linux and Solaris I have changed the code to use the command line value.
So the behavior is modified only when the new command line option is used. Out test framework passes 1GB and that works well.
That change cause a few regression tests of valgrind to fail that check the "help" output of valgrind, I've fixed those as well.
4. I've added group-write permissions to the default file-creation mask for the valgrind reports.
BTW: Those reports are altered because I could not figure out how to write suppression rules that are based only on the allocation stack of a problem.
For example, we link against OpenSSL crypto libraries which (intentionally) do all sorts of things with uninitialized memory (for randomness).
Valgrind spots that, but I want to suppress those messages.
The numerous different error-locations all have the same allocation spot, but suppressions insist on using the location of the error (use of uninitialized memory).
Those are far too many and often change when a new OpenSSL version is released.
So I've written as Perl post-processor to delete (suppress) the OpenSSL stuff based on arbitrary patterns in a valgrind error message.
The "permission denied" occurred when it wanted to write the edited report back.
Suggestion: It would be nice to be able to write suppression rules for this kind of problem, with regular expressions on the complete valgrind message.
I've created a new version of valgrind for this: 3.11.1.
I've attached a patch file to alter a 3.11.0 tree to a 3.11.1. Apply to a 3.11.0 tree by going to the root of the tree and do "patch -p0 < metamempool.patch".
I've tried to do all the changes in the style of the existing code.
I've run all the regression tests of valgrind and the results of 3.11.0 and 3.11.1 are identical.
The patch also includes altered manual pages, I've been unable to build those on my system, so I hope they're OK.
I've installed this altered valgrind on various development and test systems at Infor and used it for a few months to make sure I have not broken anything.
This version 3.11.1 is used on both normal programs and ones using our custom allocator (or both). Everything works the way it should.
I'd appreciate it if this patch could be applied to the standard distribution so I will not have to maintain a separate version of valgrind/memcheck for Infor.
Comment / feedback appreciated,
Regards,
Ruurd Beerstra
[Infor]<http://www.infor.com/>
Ruurd Beerstra | Software Engineer, Sr.
office: 0342-427289 | mobile: +31 22 42 7478 | Ruu...@in... | http://www.infor.com
|
|
From: <sv...@va...> - 2016-08-05 10:34:23
|
Author: sewardj
Date: Fri Aug 5 11:34:15 2016
New Revision: 3238
Log:
Fix two invalid signed left shifts picked up by ubsan.
Modified:
trunk/priv/guest_arm64_toIR.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Fri Aug 5 11:34:15 2016
@@ -148,8 +148,9 @@
static ULong sx_to_64 ( ULong x, UInt n )
{
vassert(n > 1 && n < 64);
+ x <<= (64-n);
Long r = (Long)x;
- r = (r << (64-n)) >> (64-n);
+ r >>= (64-n);
return (ULong)r;
}
@@ -2590,7 +2591,7 @@
IRTemp old = newTemp(Ity_I32);
assign(old, getIReg32orZR(dd));
vassert(hw <= 1);
- UInt mask = 0xFFFF << (16 * hw);
+ UInt mask = ((UInt)0xFFFF) << (16 * hw);
IRExpr* res
= binop(Iop_Or32,
binop(Iop_And32, mkexpr(old), mkU32(~mask)),
|
|
From: <sv...@va...> - 2016-08-04 21:41:06
|
Author: sewardj
Date: Thu Aug 4 22:40:59 2016
New Revision: 15925
Log:
Connect up the new CRC32 tests to the build system.
Added:
trunk/none/tests/arm64/crc32.stderr.exp
trunk/none/tests/arm64/crc32.stdout.exp
trunk/none/tests/arm64/crc32.vgtest
Modified:
trunk/none/tests/arm64/Makefile.am
Modified: trunk/none/tests/arm64/Makefile.am
==============================================================================
--- trunk/none/tests/arm64/Makefile.am (original)
+++ trunk/none/tests/arm64/Makefile.am Thu Aug 4 22:40:59 2016
@@ -4,6 +4,7 @@
dist_noinst_SCRIPTS = filter_stderr
EXTRA_DIST = \
+ crc32.stdout.exp crc32.stderr.exp crc32.vgtest \
cvtf_imm.stdout.exp cvtf_imm.stderr.exp cvtf_imm.vgtest \
fp_and_simd.stdout.exp fp_and_simd.stderr.exp fp_and_simd.vgtest \
integer.stdout.exp integer.stderr.exp integer.vgtest \
@@ -11,6 +12,7 @@
check_PROGRAMS = \
allexec \
+ crc32 \
cvtf_imm \
fp_and_simd \
integer \
@@ -22,6 +24,7 @@
allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@
+crc32_CFLAGS = $(AM_CFLAGS) -march=armv8-a+crc
fp_and_simd_CFLAGS = $(AM_CFLAGS) -march=armv8-a+crypto
integer_CFLAGS = $(AM_CFLAGS) -g -O0 -DTEST_BFM=0
Added: trunk/none/tests/arm64/crc32.stderr.exp
==============================================================================
(empty)
Added: trunk/none/tests/arm64/crc32.stdout.exp
==============================================================================
--- trunk/none/tests/arm64/crc32.stdout.exp (added)
+++ trunk/none/tests/arm64/crc32.stdout.exp Thu Aug 4 22:40:59 2016
@@ -0,0 +1,57 @@
+CRC32/CRC32C
+crc32b w21,w20,w19 :: rd 00000000f8957d4c rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32b w21,w20,w19 :: rd 00000000f810b326 rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32b w21,w20,w19 :: rd 00000000ef405c96 rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32b w21,w20,w19 :: rd 00000000a0db523c rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32b w21,w20,w19 :: rd 0000000096de687b rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32b w21,w20,w19 :: rd 000000005b546bd0 rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32b w21,w20,w19 :: rd 000000008f7a8684 rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 00000000862b47a9 rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 000000009a47a305 rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 00000000a788663d rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 00000000b6c6f66f rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 00000000b046464a rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 00000000fb8f180e rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32h w21,w20,w19 :: rd 00000000c758d232 rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 00000000b24959b6 rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 00000000532cb693 rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 00000000ffe2757b rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 00000000c3c8592d rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 00000000e44ccdd5 rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 0000000004826ea7 rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32w w21,w20,w19 :: rd 00000000784b67ea rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 000000009ca1d692 rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 00000000b2eefa0a rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 00000000918ddaac rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 000000000449ce8d rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 000000001f887163 rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 000000003ba77596 rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32x w21,w20,x19 :: rd 00000000b1fd8b1d rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 000000006c0c7e5d rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 000000006c89b037 rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 0000000052bea2f4 rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 00000000e3e1185e rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 000000002a4ef571 rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 00000000ec8c8b20 rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32cb w21,w20,w19 :: rd 000000009e8f3600 rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 00000000318aa9b9 rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 0000000092c475a7 rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 00000000a4056232 rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 00000000eeee7718 rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 0000000071b21af8 rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 00000000fcb40509 rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32ch w21,w20,w19 :: rd 00000000ade3dc67 rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 000000004f6e8750 rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 000000004548949c rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 0000000050fc77a7 rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 000000000b0f3746 rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 0000000092a3acf2 rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 00000000d91fb7ba rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32cw w21,w20,w19 :: rd 000000006b548718 rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 00000000252d3dc0 rm 4b154113f7d32514, rn cce230caafbf9cc9, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 0000000094dc9608 rm 33d5d595721d4f13, rn f4509311f443a7ce, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 0000000004d0ed55 rm 4a3c6de6954cbc17, rn 111b21e39fbd7254, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 000000004c3be587 rm fbb5c64ed1b044c6, rn 33ca4c4fb3960326, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 00000000b914bc44 rm 2b7c5939d7c0f528, rn b73870a5a6630162, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 000000005b10caab rm 02fe41918ac5cdba, rn 48e0815289728f05, cin 0, nzcv 00000000
+crc32cx w21,w20,x19 :: rd 00000000803f9752 rm b60a8f381f187bae, rn 008c208cc413ff72, cin 0, nzcv 00000000
Added: trunk/none/tests/arm64/crc32.vgtest
==============================================================================
--- trunk/none/tests/arm64/crc32.vgtest (added)
+++ trunk/none/tests/arm64/crc32.vgtest Thu Aug 4 22:40:59 2016
@@ -0,0 +1,2 @@
+prog: crc32
+vgopts: -q
|
|
From: <sv...@va...> - 2016-08-04 21:14:32
|
Author: sewardj
Date: Thu Aug 4 22:14:24 2016
New Revision: 15924
Log:
Add test cases for the CRC32* instruction group. Is not yet connected to the
build system.
Added:
trunk/none/tests/arm64/crc32.c
Added: trunk/none/tests/arm64/crc32.c
==============================================================================
--- trunk/none/tests/arm64/crc32.c (added)
+++ trunk/none/tests/arm64/crc32.c Thu Aug 4 22:14:24 2016
@@ -0,0 +1,123 @@
+
+/* To compile:
+ gcc -Wall -g -o crc32 none/tests/arm64/crc32.c -march=armv8-a+crc
+ -march=armv8-a+crc+crypto is also OK
+*/
+
+#include <stdio.h>
+#include <malloc.h> // memalign
+#include <string.h> // memset
+#include <assert.h>
+
+typedef unsigned char UChar;
+typedef unsigned short int UShort;
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef signed long long int Long;
+typedef unsigned long long int ULong;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+
+#define TESTINST3(instruction, RMval, RNval, RD, RM, RN, carryin) \
+{ \
+ ULong out; \
+ ULong nzcv_out; \
+ ULong nzcv_in = (carryin ? (1<<29) : 0); \
+ __asm__ __volatile__( \
+ "msr nzcv,%4;" \
+ "mov " #RM ",%2;" \
+ "mov " #RN ",%3;" \
+ instruction ";" \
+ "mov %0," #RD ";" \
+ "mrs %1,nzcv;" \
+ : "=&r" (out), "=&r" (nzcv_out) \
+ : "r" (RMval), "r" (RNval), "r" (nzcv_in) \
+ : #RD, #RM, #RN, "cc", "memory" \
+ ); \
+ printf("%s :: rd %016llx rm %016llx, rn %016llx, " \
+ "cin %d, nzcv %08llx %c%c%c%c\n", \
+ instruction, out, ((ULong)RMval), ((ULong)RNval), \
+ carryin ? 1 : 0, \
+ nzcv_out & 0xffff0000, \
+ ((1<<31) & nzcv_out) ? 'N' : ' ', \
+ ((1<<30) & nzcv_out) ? 'Z' : ' ', \
+ ((1<<29) & nzcv_out) ? 'C' : ' ', \
+ ((1<<28) & nzcv_out) ? 'V' : ' ' \
+ ); \
+}
+
+int main ( void )
+{
+////////////////////////////////////////////////////////////////
+printf("CRC32/CRC32C\n");
+
+TESTINST3("crc32b w21,w20,w19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32b w21,w20,w19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32b w21,w20,w19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32b w21,w20,w19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32b w21,w20,w19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32b w21,w20,w19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32b w21,w20,w19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32h w21,w20,w19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32h w21,w20,w19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32h w21,w20,w19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32h w21,w20,w19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32h w21,w20,w19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32h w21,w20,w19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32h w21,w20,w19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32w w21,w20,w19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32w w21,w20,w19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32w w21,w20,w19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32w w21,w20,w19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32w w21,w20,w19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32w w21,w20,w19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32w w21,w20,w19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32x w21,w20,x19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32x w21,w20,x19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32x w21,w20,x19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32x w21,w20,x19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32x w21,w20,x19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32x w21,w20,x19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32x w21,w20,x19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32cb w21,w20,w19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32cb w21,w20,w19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32cb w21,w20,w19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32cb w21,w20,w19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32cb w21,w20,w19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32cb w21,w20,w19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32cb w21,w20,w19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32ch w21,w20,w19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32ch w21,w20,w19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32ch w21,w20,w19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32ch w21,w20,w19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32ch w21,w20,w19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32ch w21,w20,w19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32ch w21,w20,w19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32cw w21,w20,w19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32cw w21,w20,w19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32cw w21,w20,w19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32cw w21,w20,w19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32cw w21,w20,w19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32cw w21,w20,w19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32cw w21,w20,w19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+TESTINST3("crc32cx w21,w20,x19", 0x4b154113f7d32514, 0xcce230caafbf9cc9, x21,x20,x19, 0);
+TESTINST3("crc32cx w21,w20,x19", 0x33d5d595721d4f13, 0xf4509311f443a7ce, x21,x20,x19, 0);
+TESTINST3("crc32cx w21,w20,x19", 0x4a3c6de6954cbc17, 0x111b21e39fbd7254, x21,x20,x19, 0);
+TESTINST3("crc32cx w21,w20,x19", 0xfbb5c64ed1b044c6, 0x33ca4c4fb3960326, x21,x20,x19, 0);
+TESTINST3("crc32cx w21,w20,x19", 0x2b7c5939d7c0f528, 0xb73870a5a6630162, x21,x20,x19, 0);
+TESTINST3("crc32cx w21,w20,x19", 0x02fe41918ac5cdba, 0x48e0815289728f05, x21,x20,x19, 0);
+TESTINST3("crc32cx w21,w20,x19", 0xb60a8f381f187bae, 0x008c208cc413ff72, x21,x20,x19, 0);
+
+return 0;
+}
|
|
From: <sv...@va...> - 2016-08-04 21:11:36
|
Author: sewardj
Date: Thu Aug 4 22:11:28 2016
New Revision: 15923
Log:
Make "ashmem" and "binder" ioctls be handled properly on 64-bit ARM Android,
rather than falling through to the generic handler and being mishandled.
Fixes #360574.
Modified:
trunk/coregrind/m_syswrap/syswrap-linux.c
trunk/include/vki/vki-linux.h
Modified: trunk/coregrind/m_syswrap/syswrap-linux.c
==============================================================================
--- trunk/coregrind/m_syswrap/syswrap-linux.c (original)
+++ trunk/coregrind/m_syswrap/syswrap-linux.c Thu Aug 4 22:11:28 2016
@@ -7082,7 +7082,8 @@
break;
# if defined(VGPV_arm_linux_android) || defined(VGPV_x86_linux_android) \
- || defined(VGPV_mips32_linux_android)
+ || defined(VGPV_mips32_linux_android) \
+ || defined(VGPV_arm64_linux_android)
/* ashmem */
case VKI_ASHMEM_GET_SIZE:
case VKI_ASHMEM_SET_SIZE:
@@ -9574,7 +9575,8 @@
break;
# if defined(VGPV_arm_linux_android) || defined(VGPV_x86_linux_android) \
- || defined(VGPV_mips32_linux_android)
+ || defined(VGPV_mips32_linux_android) \
+ || defined(VGPV_arm64_linux_android)
/* ashmem */
case VKI_ASHMEM_GET_SIZE:
case VKI_ASHMEM_SET_SIZE:
Modified: trunk/include/vki/vki-linux.h
==============================================================================
--- trunk/include/vki/vki-linux.h (original)
+++ trunk/include/vki/vki-linux.h Thu Aug 4 22:11:28 2016
@@ -3009,7 +3009,8 @@
//----------------------------------------------------------------------
#if defined(VGPV_arm_linux_android) || defined(VGPV_x86_linux_android) \
- || defined(VGPV_mips32_linux_android)
+ || defined(VGPV_mips32_linux_android) \
+ || defined(VGPV_arm64_linux_android)
#define VKI_ASHMEM_NAME_LEN 256
|
|
From: <sv...@va...> - 2016-08-04 09:13:20
|
Author: sewardj
Date: Thu Aug 4 10:13:11 2016
New Revision: 3237
Log:
Implement CRC32{B,H,W,X} and CRC32C{B,H,W,X}. Fixes #366344.
Modified:
trunk/priv/guest_arm64_defs.h
trunk/priv/guest_arm64_helpers.c
trunk/priv/guest_arm64_toIR.c
Modified: trunk/priv/guest_arm64_defs.h
==============================================================================
--- trunk/priv/guest_arm64_defs.h (original)
+++ trunk/priv/guest_arm64_defs.h Thu Aug 4 10:13:11 2016
@@ -110,6 +110,15 @@
//ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
//ZZ UInt resR1, UInt resR2 );
+extern ULong arm64g_calc_crc32b ( ULong acc, ULong bits );
+extern ULong arm64g_calc_crc32h ( ULong acc, ULong bits );
+extern ULong arm64g_calc_crc32w ( ULong acc, ULong bits );
+extern ULong arm64g_calc_crc32x ( ULong acc, ULong bits );
+
+extern ULong arm64g_calc_crc32cb ( ULong acc, ULong bits );
+extern ULong arm64g_calc_crc32ch ( ULong acc, ULong bits );
+extern ULong arm64g_calc_crc32cw ( ULong acc, ULong bits );
+extern ULong arm64g_calc_crc32cx ( ULong acc, ULong bits );
/* --- DIRTY HELPERS --- */
Modified: trunk/priv/guest_arm64_helpers.c
==============================================================================
--- trunk/priv/guest_arm64_helpers.c (original)
+++ trunk/priv/guest_arm64_helpers.c Thu Aug 4 10:13:11 2016
@@ -677,6 +677,88 @@
}
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32b ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = (bits & 0xFFULL) ^ acc;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
+ return crc;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32h ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = (bits & 0xFFFFULL) ^ acc;
+ for (i = 0; i < 16; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
+ return crc;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32w ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
+ for (i = 0; i < 32; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
+ return crc;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32x ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = bits ^ acc;
+ for (i = 0; i < 64; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
+ return crc;
+
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32cb ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = (bits & 0xFFULL) ^ acc;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
+ return crc;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32ch ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = (bits & 0xFFFFULL) ^ acc;
+ for (i = 0; i < 16; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
+ return crc;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32cw ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
+ for (i = 0; i < 32; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
+ return crc;
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong arm64g_calc_crc32cx ( ULong acc, ULong bits )
+{
+ UInt i;
+ ULong crc = bits ^ acc;
+ for (i = 0; i < 64; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
+ return crc;
+}
+
+
/* CALLED FROM GENERATED CODE */
/* DIRTY HELPER (non-referentially-transparent) */
/* Horrible hack. On non-arm64 platforms, return 0. */
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Thu Aug 4 10:13:11 2016
@@ -3497,6 +3497,75 @@
nameIReg32orZR(mm), nameIReg64orZR(aa));
return True;
}
+
+ /* -------------------- CRC32/CRC32C -------------------- */
+ /* 31 30 20 15 11 9 4
+ sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
+ sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
+ */
+ if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
+ && INSN(15,13) == BITS3(0,1,0)) {
+ UInt bitSF = INSN(31,31);
+ UInt mm = INSN(20,16);
+ UInt bitC = INSN(12,12);
+ UInt sz = INSN(11,10);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+ vassert(sz >= 0 && sz <= 3);
+ if ((bitSF == 0 && sz <= BITS2(1,0))
+ || (bitSF == 1 && sz == BITS2(1,1))) {
+ UInt ix = (bitC == 1 ? 4 : 0) | sz;
+ void* helpers[8]
+ = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
+ &arm64g_calc_crc32w, &arm64g_calc_crc32x,
+ &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
+ &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
+ const HChar* hNames[8]
+ = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
+ "arm64g_calc_crc32w", "arm64g_calc_crc32x",
+ "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
+ "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
+ const HChar* iNames[8]
+ = { "crc32b", "crc32h", "crc32w", "crc32x",
+ "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
+
+ IRTemp srcN = newTemp(Ity_I64);
+ assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
+
+ IRTemp srcM = newTemp(Ity_I64);
+ IRExpr* at64 = getIReg64orZR(mm);
+ switch (sz) {
+ case BITS2(0,0):
+ assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
+ case BITS2(0,1):
+ assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
+ case BITS2(1,0):
+ assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
+ case BITS2(1,1):
+ assign(srcM, at64); break;
+ default:
+ vassert(0);
+ }
+
+ vassert(ix >= 0 && ix <= 7);
+
+ putIReg64orZR(
+ dd,
+ unop(Iop_32Uto64,
+ unop(Iop_64to32,
+ mkIRExprCCall(Ity_I64, 0/*regparm*/,
+ hNames[ix], helpers[ix],
+ mkIRExprVec_2(mkexpr(srcN),
+ mkexpr(srcM))))));
+
+ DIP("%s %s, %s, %s\n", iNames[ix],
+ nameIReg32orZR(dd),
+ nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
+ return True;
+ }
+ /* fall through */
+ }
+
vex_printf("ARM64 front end: data_processing_register\n");
return False;
# undef INSN
|
|
From: <sv...@va...> - 2016-08-03 11:55:46
|
Author: sewardj
Date: Wed Aug 3 12:55:33 2016
New Revision: 3236
Log:
* Add infrastructure for decoding (32-bit) ARMv8 instructions.
* Use this to implement AESE, AESD, AESMC, AESIMC.
Modified:
trunk/priv/guest_arm_defs.h
trunk/priv/guest_arm_helpers.c
trunk/priv/guest_arm_toIR.c
Modified: trunk/priv/guest_arm_defs.h
==============================================================================
--- trunk/priv/guest_arm_defs.h (original)
+++ trunk/priv/guest_arm_defs.h Wed Aug 3 12:55:33 2016
@@ -111,6 +111,28 @@
UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
UInt resR1, UInt resR2 );
+/* --- DIRTY HELPERS --- */
+
+extern
+void armg_dirtyhelper_AESE ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
+extern
+void armg_dirtyhelper_AESD ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
+extern
+void armg_dirtyhelper_AESMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
+extern
+void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 );
+
/*---------------------------------------------------------*/
/*--- Condition code stuff ---*/
Modified: trunk/priv/guest_arm_helpers.c
==============================================================================
--- trunk/priv/guest_arm_helpers.c (original)
+++ trunk/priv/guest_arm_helpers.c Wed Aug 3 12:55:33 2016
@@ -38,6 +38,7 @@
#include "main_globals.h"
#include "guest_generic_bb_to_IR.h"
#include "guest_arm_defs.h"
+#include "guest_arm64_defs.h" /* for crypto helper functions */
/* This file contains helper functions for arm guest code. Calls to
@@ -535,6 +536,68 @@
/*---------------------------------------------------------------*/
+/*--- Crypto instruction helpers ---*/
+/*---------------------------------------------------------------*/
+
+/* DIRTY HELPERS for doing AES support:
+ * AESE (SubBytes, then ShiftRows)
+ * AESD (InvShiftRows, then InvSubBytes)
+ * AESMC (MixColumns)
+ * AESIMC (InvMixColumns)
+ These don't actually have to be dirty helpers -- they could be
+ clean, but for the fact that they return a V128 and a clean helper
+ can't do that.
+
+ These just call onwards to the implementations of the same in
+ guest_arm64_helpers.c. In all of these cases, we expect |res| to
+ be at least 8 aligned.
+*/
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESE ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESE(res, argHi, argLo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESD ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESD(res, argHi, argLo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESMC(res, argHi, argLo);
+}
+
+/* CALLED FROM GENERATED CODE */
+void armg_dirtyhelper_AESIMC ( /*OUT*/V128* res,
+ UInt argW3, UInt argW2,
+ UInt argW1, UInt argW0 )
+{
+ vassert(0 == (((HWord)res) & (8-1)));
+ ULong argHi = (((ULong)argW3) << 32) | ((ULong)argW2);
+ ULong argLo = (((ULong)argW1) << 32) | ((ULong)argW0);
+ arm64g_dirtyhelper_AESIMC(res, argHi, argLo);
+}
+
+
+/*---------------------------------------------------------------*/
/*--- Flag-helpers translation-time function specialisers. ---*/
/*--- These help iropt specialise calls the above run-time ---*/
/*--- flags functions. ---*/
Modified: trunk/priv/guest_arm_toIR.c
==============================================================================
--- trunk/priv/guest_arm_toIR.c (original)
+++ trunk/priv/guest_arm_toIR.c Wed Aug 3 12:55:33 2016
@@ -8837,8 +8837,11 @@
Finally, the caller must indicate whether this occurs in ARM or in
Thumb code.
+
+ This only handles NEON for ARMv7 and below. The NEON extensions
+ for v8 are handled by decode_V8_instruction.
*/
-static Bool decode_NEON_instruction (
+static Bool decode_NEON_instruction_ARMv7_and_below (
/*MOD*/DisResult* dres,
UInt insn32,
IRTemp condT,
@@ -8915,7 +8918,7 @@
Caller must supply an IRTemp 'condT' holding the gating condition,
or IRTemp_INVALID indicating the insn is always executed.
- Caller must also supply an ARMCondcode 'cond'. This is only used
+ Caller must also supply an ARMCondcode 'conq'. This is only used
for debug printing, no other purpose. For ARM, this is simply the
top 4 bits of the original instruction. For Thumb, the condition
is not (really) known until run time, and so ARMCondAL should be
@@ -12581,6 +12584,198 @@
/*------------------------------------------------------------*/
+/*--- V8 instructions ---*/
+/*------------------------------------------------------------*/
+
+/* Break a V128-bit value up into four 32-bit ints. */
+
+static void breakupV128to32s ( IRTemp t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 = newTemp(Ity_I64);
+ IRTemp lo64 = newTemp(Ity_I64);
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
+
+ vassert(t0 && *t0 == IRTemp_INVALID);
+ vassert(t1 && *t1 == IRTemp_INVALID);
+ vassert(t2 && *t2 == IRTemp_INVALID);
+ vassert(t3 && *t3 == IRTemp_INVALID);
+
+ *t0 = newTemp(Ity_I32);
+ *t1 = newTemp(Ity_I32);
+ *t2 = newTemp(Ity_I32);
+ *t3 = newTemp(Ity_I32);
+ assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
+ assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
+ assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
+ assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
+}
+
+
+/* Both ARM and Thumb */
+
+/* Translate a V8 instruction. If successful, returns True and *dres
+ may or may not be updated. If unsuccessful, returns False and
+ doesn't change *dres nor create any IR.
+
+ The Thumb and ARM encodings are potentially different. In both
+ ARM and Thumb mode, the caller must pass the entire 32 bits of
+ the instruction. Callers may pass any instruction; this function
+ ignores anything it doesn't recognise.
+
+ Caller must supply an IRTemp 'condT' holding the gating condition,
+ or IRTemp_INVALID indicating the insn is always executed.
+
+ If we are decoding an ARM instruction which is in the NV space
+ then it is expected that condT will be IRTemp_INVALID, and that is
+ asserted for. That condition is ensured by the logic near the top
+ of disInstr_ARM_WRK, that sets up condT.
+
+ When decoding for Thumb, the caller must pass the ITState pre/post
+ this instruction, so that we can generate a SIGILL in the cases where
+ the instruction may not be in an IT block. When decoding for ARM,
+ both of these must be IRTemp_INVALID.
+
+ Finally, the caller must indicate whether this occurs in ARM or in
+ Thumb code.
+*/
+static Bool decode_V8_instruction (
+ /*MOD*/DisResult* dres,
+ UInt insnv8,
+ IRTemp condT,
+ Bool isT,
+ IRTemp old_itstate,
+ IRTemp new_itstate
+ )
+{
+# define INSNA(_bMax,_bMin) SLICE_UInt(insnv8, (_bMax), (_bMin))
+# define INSNT0(_bMax,_bMin) SLICE_UInt( ((insnv8 >> 16) & 0xFFFF), \
+ (_bMax), (_bMin) )
+# define INSNT1(_bMax,_bMin) SLICE_UInt( ((insnv8 >> 0) & 0xFFFF), \
+ (_bMax), (_bMin) )
+ //HChar dis_buf[128];
+ //dis_buf[0] = 0;
+
+ if (isT) {
+ vassert(old_itstate != IRTemp_INVALID);
+ vassert(new_itstate != IRTemp_INVALID);
+ } else {
+ vassert(old_itstate == IRTemp_INVALID);
+ vassert(new_itstate == IRTemp_INVALID);
+ }
+
+ /* ARMCondcode 'conq' is only used for debug printing and for no other
+ purpose. For ARM, this is simply the top 4 bits of the instruction.
+ For Thumb, the condition is not (really) known until run time, and so
+ we set it to ARMCondAL in order that printing of these instructions
+ does not show any condition. */
+ ARMCondcode conq;
+ if (isT) {
+ conq = ARMCondAL;
+ } else {
+ conq = (ARMCondcode)INSNA(31,28);
+ if (conq == ARMCondNV || conq == ARMCondAL) {
+ vassert(condT == IRTemp_INVALID);
+ } else {
+ vassert(condT != IRTemp_INVALID);
+ }
+ vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
+ }
+
+ /* ----------- AESD.8 q_q ----------- */
+ /* 31 27 23 21 19 17 15 11 7 3
+ T1: 1111 1111 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 00 M 0 m AESE Qd, Qm
+
+ T1: 1111 1111 1 D 11 sz 00 d 0011 01 M 0 m AESD Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 01 M 0 m AESD Qd, Qm
+
+ T1: 1111 1111 1 D 11 sz 00 d 0011 10 M 0 m AESMC Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 10 M 0 m AESMC Qd, Qm
+
+ T1: 1111 1111 1 D 11 sz 00 d 0011 11 M 0 m AESIMC Qd, Qm
+ A1: 1111 0011 1 D 11 sz 00 d 0011 11 M 0 m AESIMC Qd, Qm
+
+ sz must be 00
+ ARM encoding is in NV space
+ */
+ {
+ UInt regD = 99, regM = 99, opc = 4/*invalid*/;
+ Bool gate = True;
+
+ UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
+ if (INSNA(31,23) == high9 && INSNA(21,16) == BITS6(1,1,0,0,0,0)
+ && INSNA(11,8) == BITS4(0,0,1,1) && INSNA(4,4) == 0) {
+ UInt bitD = INSNA(22,22);
+ UInt fldD = INSNA(15,12);
+ UInt bitM = INSNA(5,5);
+ UInt fldM = INSNA(3,0);
+ opc = INSNA(7,6);
+ regD = (bitD << 4) | fldD;
+ regM = (bitM << 4) | fldM;
+ }
+ if ((regD & 1) == 1 || (regM & 1) == 1)
+ gate = False;
+
+ if (gate) {
+ if (isT) {
+ gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
+ }
+ IRTemp op1 = newTemp(Ity_V128);
+ IRTemp op2 = newTemp(Ity_V128);
+ IRTemp src = newTemp(Ity_V128);
+ IRTemp res = newTemp(Ity_V128);
+ assign(op1, getQReg(regD >> 1));
+ assign(op2, getQReg(regM >> 1));
+ assign(src, opc == BITS2(0,0) || opc == BITS2(0,1)
+ ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
+ : mkexpr(op2));
+
+ void* helpers[4]
+ = { &armg_dirtyhelper_AESE, &armg_dirtyhelper_AESD,
+ &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
+ const HChar* hNames[4]
+ = { "armg_dirtyhelper_AESE", "armg_dirtyhelper_AESD",
+ "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
+ const HChar* iNames[4]
+ = { "aese", "aesd", "aesmc", "aesimc" };
+
+ vassert(opc >= 0 && opc <= 3);
+ void* helper = helpers[opc];
+ const HChar* hname = hNames[opc];
+
+ IRTemp w32_3, w32_2, w32_1, w32_0;
+ w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
+ breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
+
+ IRDirty* di
+ = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
+ mkIRExprVec_5(
+ IRExpr_VECRET(),
+ mkexpr(w32_3), mkexpr(w32_2),
+ mkexpr(w32_1), mkexpr(w32_0)) );
+ stmt(IRStmt_Dirty(di));
+
+ putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
+ DIP("%s.8 q%d, q%d\n", iNames[opc], regD >> 1, regM >> 1);
+ return True;
+ }
+ /* fall through */
+ }
+
+ /* ---------- Doesn't match anything. ---------- */
+ return False;
+
+# undef INSNA
+# undef INSNT0
+# undef INSNT1
+}
+
+
+/*------------------------------------------------------------*/
/*--- LDMxx/STMxx helper (both ARM and Thumb32) ---*/
/*------------------------------------------------------------*/
@@ -14456,10 +14651,12 @@
*dres may or may not be updated. If failure, returns False and
doesn't change *dres nor create any IR.
- Note that all NEON instructions (in ARM mode) are handled through
- here, since they are all in NV space.
+ Note that all NEON instructions (in ARM mode) up to and including
+ ARMv7, but not later, are handled through here, since they are all
+ in NV space.
*/
-static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
+static Bool decode_NV_instruction_ARMv7_and_below
+ ( /*MOD*/DisResult* dres,
const VexArchInfo* archinfo,
UInt insn )
{
@@ -14585,7 +14782,7 @@
/* ------------------- NEON ------------------- */
if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
- Bool ok_neon = decode_NEON_instruction(
+ Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
dres, insn, IRTemp_INVALID/*unconditional*/,
False/*!isT*/
);
@@ -14627,16 +14824,10 @@
DisResult dres;
UInt insn;
- //Bool allow_VFP = False;
- //UInt hwcaps = archinfo->hwcaps;
IRTemp condT; /* :: Ity_I32 */
UInt summary;
HChar dis_buf[128]; // big enough to hold LDMIA etc text
- /* What insn variants are we supporting today? */
- //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
- // etc etc
-
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 4;
@@ -14751,11 +14942,12 @@
case ARMCondNV: {
// Illegal instruction prior to v5 (see ARM ARM A3-5), but
// some cases are acceptable
- Bool ok = decode_NV_instruction(&dres, archinfo, insn);
+ Bool ok
+ = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
if (ok)
goto decode_success;
else
- goto decode_failure;
+ goto after_v7_decoder;
}
case ARMCondAL: // Always executed
break;
@@ -15685,7 +15877,7 @@
}
/* --- NB: ARM interworking branches are in NV space, hence
- are handled elsewhere by decode_NV_instruction.
+ are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
---
*/
@@ -17341,7 +17533,8 @@
/* ----------------------------------------------------------- */
/* These are all in NV space, and so are taken care of (far) above,
- by a call from this function to decode_NV_instruction(). */
+ by a call from this function to
+ decode_NV_instruction_ARMv7_and_below(). */
/* ----------------------------------------------------------- */
/* -- v6 media instructions (in ARM mode) -- */
@@ -17356,6 +17549,24 @@
}
/* ----------------------------------------------------------- */
+ /* -- v8 instructions (in ARM mode) -- */
+ /* ----------------------------------------------------------- */
+
+ after_v7_decoder:
+
+ /* If we get here, it means that all attempts to decode the
+ instruction as ARMv7 or earlier have failed. So, if we're doing
+ ARMv8 or later, here is the point to try for it. */
+
+ if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
+ Bool ok_v8
+ = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
+ IRTemp_INVALID, IRTemp_INVALID );
+ if (ok_v8)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
/* -- Undecodable -- */
/* ----------------------------------------------------------- */
@@ -17497,18 +17708,12 @@
DisResult dres;
UShort insn0; /* first 16 bits of the insn */
UShort insn1; /* second 16 bits of the insn */
- //Bool allow_VFP = False;
- //UInt hwcaps = archinfo->hwcaps;
HChar dis_buf[128]; // big enough to hold LDMIA etc text
/* Summary result of the ITxxx backwards analysis: False == safe
but suboptimal. */
Bool guaranteedUnconditional = False;
- /* What insn variants are we supporting today? */
- //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
- // etc etc
-
/* Set result defaults. */
dres.whatNext = Dis_Continue;
dres.len = 2;
@@ -21921,12 +22126,12 @@
}
/* ----------------------------------------------------------- */
- /* -- NEON instructions (in Thumb mode) -- */
+ /* -- NEON instructions (only v7 and below, in Thumb mode) -- */
/* ----------------------------------------------------------- */
if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
- Bool ok_neon = decode_NEON_instruction(
+ Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
&dres, insn32, condT, True/*isT*/
);
if (ok_neon)
@@ -21947,6 +22152,23 @@
}
/* ----------------------------------------------------------- */
+ /* -- v8 instructions (in Thumb mode) -- */
+ /* ----------------------------------------------------------- */
+
+ /* If we get here, it means that all attempts to decode the
+ instruction as ARMv7 or earlier have failed. So, if we're doing
+ ARMv8 or later, here is the point to try for it. */
+
+ if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
+ UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
+ Bool ok_v8
+ = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
+ old_itstate, new_itstate );
+ if (ok_v8)
+ goto decode_success;
+ }
+
+ /* ----------------------------------------------------------- */
/* -- Undecodable -- */
/* ----------------------------------------------------------- */
|
|
From: <sv...@va...> - 2016-08-03 11:53:19
|
Author: sewardj
Date: Wed Aug 3 12:53:11 2016
New Revision: 3235
Log:
arm32 backend stuff needed to support IR artefacts resulting from
guest support of 32-bit V8 crypto instructions:
* add new pseudo-instruction ARMin_VXferQ, to move values between
two D regs and a Q reg, in either direction. Use this to implement
Iop_64HLtoV128 much more efficiently than before, and to implement
Iop_V128HIto64 and Iop_V128to64.
* Generate code for helper calls in which have four or more
(32-bit) word-sized arguments and a V128 return value.
These require passing arguments on the stack.
Modified:
trunk/priv/host_arm_defs.c
trunk/priv/host_arm_defs.h
trunk/priv/host_arm_isel.c
Modified: trunk/priv/host_arm_defs.c
==============================================================================
--- trunk/priv/host_arm_defs.c (original)
+++ trunk/priv/host_arm_defs.c Wed Aug 3 12:53:11 2016
@@ -1329,6 +1329,15 @@
i->ARMin.VCvtSD.src = src;
return i;
}
+ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_VXferQ;
+ i->ARMin.VXferQ.toQ = toQ;
+ i->ARMin.VXferQ.qD = qD;
+ i->ARMin.VXferQ.dHi = dHi;
+ i->ARMin.VXferQ.dLo = dLo;
+ return i;
+}
ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
i->tag = ARMin_VXferD;
@@ -1800,6 +1809,29 @@
vex_printf(", ");
ppHRegARM(i->ARMin.VCvtSD.src);
return;
+ case ARMin_VXferQ:
+ if (i->ARMin.VXferQ.toQ) {
+ vex_printf("vmov ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-lo64, ");
+ ppHRegARM(i->ARMin.VXferQ.dLo);
+ vex_printf(" ; vmov ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-hi64, ");
+ ppHRegARM(i->ARMin.VXferQ.dHi);
+ } else {
+ vex_printf("vmov ");
+ ppHRegARM(i->ARMin.VXferQ.dLo);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-lo64");
+ vex_printf(" ; vmov ");
+ ppHRegARM(i->ARMin.VXferQ.dHi);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.VXferQ.qD);
+ vex_printf("-hi64");
+ }
+ return;
case ARMin_VXferD:
vex_printf("vmov ");
if (i->ARMin.VXferD.toD) {
@@ -2201,6 +2233,17 @@
addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
return;
+ case ARMin_VXferQ:
+ if (i->ARMin.VXferQ.toQ) {
+ addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
+ addHRegUse(u, HRmRead, i->ARMin.VXferQ.dHi);
+ addHRegUse(u, HRmRead, i->ARMin.VXferQ.dLo);
+ } else {
+ addHRegUse(u, HRmRead, i->ARMin.VXferQ.qD);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
+ addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
+ }
+ return;
case ARMin_VXferD:
if (i->ARMin.VXferD.toD) {
addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
@@ -2422,6 +2465,11 @@
i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
return;
+ case ARMin_VXferQ:
+ i->ARMin.VXferQ.qD = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
+ i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
+ i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
+ return;
case ARMin_VXferD:
i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
@@ -3682,6 +3730,46 @@
goto done;
}
}
+ case ARMin_VXferQ: {
+ UInt insn;
+ UInt qD = qregEnc(i->ARMin.VXferQ.qD);
+ UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
+ UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
+ /* This is a bit tricky. We need to make 2 D-D moves and we rely
+ on the fact that the Q register can be treated as two D registers.
+ We also rely on the fact that the register allocator will allocate
+ the two D's and the Q to disjoint parts of the register file,
+ and so we don't have to worry about the first move's destination
+ being the same as the second move's source, etc. We do have
+ assertions though. */
+ /* The ARM ARM specifies that
+ D<2n> maps to the least significant half of Q<n>
+ D<2n+1> maps to the most significant half of Q<n>
+ So there are no issues with endianness here.
+ */
+ UInt qDlo = 2 * qD + 0;
+ UInt qDhi = 2 * qD + 1;
+ /* Stay sane .. */
+ vassert(qDhi != dHi && qDhi != dLo);
+ vassert(qDlo != dHi && qDlo != dLo);
+ /* vmov dX, dY is
+ F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
+ */
+# define VMOV_D_D(_xx,_yy) \
+ XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
+ ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
+ BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
+ ((_yy) & 0xF) )
+ if (i->ARMin.VXferQ.toQ) {
+ insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
+ insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
+ } else {
+ insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
+ insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
+ }
+# undef VMOV_D_D
+ goto done;
+ }
case ARMin_VXferD: {
UInt dD = dregEnc(i->ARMin.VXferD.dD);
UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
Modified: trunk/priv/host_arm_defs.h
==============================================================================
--- trunk/priv/host_arm_defs.h (original)
+++ trunk/priv/host_arm_defs.h Wed Aug 3 12:53:11 2016
@@ -591,6 +591,7 @@
ARMin_VCMovD,
ARMin_VCMovS,
ARMin_VCvtSD,
+ ARMin_VXferQ,
ARMin_VXferD,
ARMin_VXferS,
ARMin_VCvtID,
@@ -824,6 +825,13 @@
HReg dst;
HReg src;
} VCvtSD;
+ /* Transfer a NEON Q reg to/from two D registers (VMOV x 2) */
+ struct {
+ Bool toQ;
+ HReg qD;
+ HReg dHi;
+ HReg dLo;
+ } VXferQ;
/* Transfer a VFP D reg to/from two integer registers (VMOV) */
struct {
Bool toD;
@@ -994,6 +1002,7 @@
extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src );
extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src );
extern ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src );
+extern ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo );
extern ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo );
extern ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo );
extern ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
Modified: trunk/priv/host_arm_isel.c
==============================================================================
--- trunk/priv/host_arm_isel.c (original)
+++ trunk/priv/host_arm_isel.c Wed Aug 3 12:53:11 2016
@@ -368,6 +368,134 @@
}
+static
+Bool doHelperCallWithArgsOnStack ( /*OUT*/UInt* stackAdjustAfterCall,
+ /*OUT*/RetLoc* retloc,
+ ISelEnv* env,
+ IRExpr* guard,
+ IRCallee* cee, IRType retTy, IRExpr** args )
+{
+ /* This function deals just with the case where the arg sequence is:
+ VECRET followed by between 4 and 12 Ity_I32 values. So far no other
+ cases are necessary or supported. */
+
+ /* Check this matches the required format. */
+ if (args[0] == NULL || args[0]->tag != Iex_VECRET)
+ goto no_match;
+
+ UInt i;
+ UInt n_real_args = 0;
+ for (i = 1; args[i]; i++) {
+ IRExpr* arg = args[i];
+ if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(arg)))
+ goto no_match;
+ IRType argTy = typeOfIRExpr(env->type_env, arg);
+ if (UNLIKELY(argTy != Ity_I32))
+ goto no_match;
+ n_real_args++;
+ }
+
+ /* We expect to pass at least some args on the stack. */
+ if (n_real_args <= 3)
+ goto no_match;
+
+ /* But not too many. */
+ if (n_real_args > 12)
+ goto no_match;
+
+ /* General rules for a call:
+
+ Args 1 .. 4 go in R0 .. R3. The rest are pushed R to L on the
+ stack; that is, arg 5 is at the lowest address, arg 6 at the
+ next lowest, etc.
+
+ The stack is to be kept 8 aligned.
+
+ It appears (for unclear reasons) that the highest 3 words made
+ available when moving SP downwards are not to be used. For
+ example, if 5 args are to go on the stack, then SP must be moved
+ down 32 bytes, and the area at SP+20 .. SP+31 is not to be used
+ by the caller.
+ */
+
+ /* For this particular case, we use the following layout:
+
+ ------ original SP
+ 112 bytes
+ ------
+ return value
+ ------ original SP - 128
+ space
+ args words, between 1 and 11
+ ------ new SP = original_SP - 256
+
+ Using 256 bytes is overkill, but it is simple and good enough.
+ */
+
+ /* This should really be
+ HReg argVRegs[n_real_args];
+ but that makes it impossible to do 'goto's forward past.
+ Hence the following kludge. */
+ vassert(n_real_args <= 11);
+ HReg argVRegs[11];
+ for (i = 0; i < 11; i++)
+ argVRegs[i] = INVALID_HREG;
+
+ /* Compute args into vregs. */
+ for (i = 0; i < n_real_args; i++) {
+ argVRegs[i] = iselIntExpr_R(env, args[i+1]);
+ }
+
+ /* Now we can compute the condition. We can't do it earlier
+ because the argument computations could trash the condition
+ codes. Be a bit clever to handle the common case where the
+ guard is 1:Bit. */
+ ARMCondCode cc = ARMcc_AL;
+ if (guard) {
+ if (guard->tag == Iex_Const
+ && guard->Iex.Const.con->tag == Ico_U1
+ && guard->Iex.Const.con->Ico.U1 == True) {
+ /* unconditional -- do nothing */
+ } else {
+ goto no_match; //ATC
+ cc = iselCondCode( env, guard );
+ }
+ }
+
+ HReg r0 = hregARM_R0();
+ HReg sp = hregARM_R13();
+
+ ARMRI84* c256 = ARMRI84_I84(64, 15); // 64 `ror` (15 * 2)
+
+ addInstr(env, ARMInstr_Alu(ARMalu_SUB, r0, sp, ARMRI84_I84(128, 0)));
+
+ addInstr(env, mk_iMOVds_RR(hregARM_R1(), argVRegs[0]));
+ addInstr(env, mk_iMOVds_RR(hregARM_R2(), argVRegs[1]));
+ addInstr(env, mk_iMOVds_RR(hregARM_R3(), argVRegs[2]));
+
+ addInstr(env, ARMInstr_Alu(ARMalu_SUB, sp, sp, c256));
+
+ for (i = 3; i < n_real_args; i++) {
+ addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/, argVRegs[i],
+ ARMAMode1_RI(sp, (i-3) * 4)));
+ }
+
+ vassert(*stackAdjustAfterCall == 0);
+ vassert(is_RetLoc_INVALID(*retloc));
+
+ *stackAdjustAfterCall = 256;
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 128);
+
+ Addr32 target = (Addr)cee->addr;
+ addInstr(env, ARMInstr_Call( cc, target, 4, *retloc ));
+
+ return True; /* success */
+
+ no_match:
+ return False;
+}
+
+
/* Do a complete function call. |guard| is a Ity_Bit expression
indicating whether or not the call happens. If guard==NULL, the
call is unconditional. |retloc| is set to indicate where the
@@ -470,6 +598,21 @@
n_args++;
}
+ /* If there are more than 4 args, we are going to have to pass
+ some via memory. Use a different function to (possibly) deal with
+ that; dealing with it here is too complex. */
+ if (n_args > ARM_N_ARGREGS) {
+ return doHelperCallWithArgsOnStack(stackAdjustAfterCall, retloc,
+ env, guard, cee, retTy, args );
+
+ }
+
+ /* After this point we make no attempt to pass args on the stack,
+ and just give up if that case (which is OK because it never
+ happens). Even if there are for example only 3 args, it might
+ still be necessary to pass some of them on the stack if for example
+ two or more of them are 64-bit integers. */
+
argregs[0] = hregARM_R0();
argregs[1] = hregARM_R1();
argregs[2] = hregARM_R2();
@@ -653,30 +796,30 @@
vassert(*stackAdjustAfterCall == 0);
vassert(is_RetLoc_INVALID(*retloc));
switch (retTy) {
- case Ity_INVALID:
- /* Function doesn't return a value. */
- *retloc = mk_RetLoc_simple(RLPri_None);
- break;
- case Ity_I64:
- *retloc = mk_RetLoc_simple(RLPri_2Int);
- break;
- case Ity_I32: case Ity_I16: case Ity_I8:
- *retloc = mk_RetLoc_simple(RLPri_Int);
- break;
- case Ity_V128:
- vassert(0); // ATC
- *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
- *stackAdjustAfterCall = 16;
- break;
- case Ity_V256:
- vassert(0); // ATC
- *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
- *stackAdjustAfterCall = 32;
- break;
- default:
- /* IR can denote other possible return types, but we don't
- handle those here. */
- vassert(0);
+ case Ity_INVALID:
+ /* Function doesn't return a value. */
+ *retloc = mk_RetLoc_simple(RLPri_None);
+ break;
+ case Ity_I64:
+ *retloc = mk_RetLoc_simple(RLPri_2Int);
+ break;
+ case Ity_I32: case Ity_I16: case Ity_I8:
+ *retloc = mk_RetLoc_simple(RLPri_Int);
+ break;
+ case Ity_V128:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+ *stackAdjustAfterCall = 16;
+ break;
+ case Ity_V256:
+ vassert(0); // ATC
+ *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+ *stackAdjustAfterCall = 32;
+ break;
+ default:
+ /* IR can denote other possible return types, but we don't
+ handle those here. */
+ vassert(0);
}
/* Finally, generate the call itself. This needs the *retloc value
@@ -3714,6 +3857,14 @@
res, arg, 0, False));
return res;
}
+ case Iop_V128to64:
+ case Iop_V128HIto64: {
+ HReg src = iselNeonExpr(env, e->Iex.Unop.arg);
+ HReg resLo = newVRegD(env);
+ HReg resHi = newVRegD(env);
+ addInstr(env, ARMInstr_VXferQ(False/*!toQ*/, src, resHi, resLo));
+ return e->Iex.Unop.op == Iop_V128HIto64 ? resHi : resLo;
+ }
default:
break;
}
@@ -4305,7 +4456,7 @@
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
- case Iop_64HLtoV128:
+ case Iop_64HLtoV128: {
/* Try to match into single "VMOV reg, imm" instruction */
if (e->Iex.Binop.arg1->tag == Iex_Const &&
e->Iex.Binop.arg2->tag == Iex_Const &&
@@ -4349,45 +4500,12 @@
}
/* Does not match "VMOV Reg, Imm" form. We'll have to do
it the slow way. */
- {
- /* local scope */
- /* Done via the stack for ease of use. */
- /* FIXME: assumes little endian host */
- HReg w3, w2, w1, w0;
- HReg res = newVRegV(env);
- ARMAMode1* sp_0 = ARMAMode1_RI(hregARM_R13(), 0);
- ARMAMode1* sp_4 = ARMAMode1_RI(hregARM_R13(), 4);
- ARMAMode1* sp_8 = ARMAMode1_RI(hregARM_R13(), 8);
- ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
- ARMRI84* c_16 = ARMRI84_I84(16,0);
- /* Make space for SP */
- addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
- hregARM_R13(), c_16));
-
- /* Store the less significant 64 bits */
- iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w0, sp_0));
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w1, sp_4));
-
- /* Store the more significant 64 bits */
- iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w2, sp_8));
- addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
- w3, sp_12));
-
- /* Load result back from stack. */
- addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
- mkARMAModeN_R(hregARM_R13())));
-
- /* Restore SP */
- addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
- hregARM_R13(), c_16));
- return res;
- } /* local scope */
- goto neon_expr_bad;
+ HReg dHi = iselNeon64Expr(env, e->Iex.Binop.arg1);
+ HReg dLo = iselNeon64Expr(env, e->Iex.Binop.arg2);
+ HReg res = newVRegV(env);
+ addInstr(env, ARMInstr_VXferQ(True/*toQ*/, res, dHi, dLo));
+ return res;
+ }
case Iop_AndV128: {
HReg res = newVRegV(env);
HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
@@ -5359,7 +5477,7 @@
return dst;
}
- neon_expr_bad:
+ /* neon_expr_bad: */
ppIRExpr(e);
vpanic("iselNeonExpr_wrk");
}
@@ -5974,7 +6092,7 @@
switch (retty) {
case Ity_INVALID: /* function doesn't return anything */
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
- //case Ity_V128: //ATC
+ case Ity_V128:
retty_ok = True; break;
default:
break;
@@ -5987,7 +6105,9 @@
call is skipped. */
UInt addToSp = 0;
RetLoc rloc = mk_RetLoc_INVALID();
- doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
+ Bool ok = doHelperCall( &addToSp, &rloc, env,
+ d->guard, d->cee, retty, d->args );
+ if (!ok) goto stmt_fail;
vassert(is_sane_RetLoc(rloc));
/* Now figure out what to do with the returned value, if any. */
@@ -6026,11 +6146,6 @@
return;
}
case Ity_V128: {
- vassert(0); // ATC. The code that this produces really
- // needs to be looked at, to verify correctness.
- // I don't think this can ever happen though, since the
- // ARM front end never produces 128-bit loads/stores.
- // Hence the following is mostly theoretical.
/* The returned value is on the stack, and *retloc tells
us where. Fish it off the stack and then move the
stack pointer upwards to clear it, as directed by
@@ -6038,16 +6153,26 @@
vassert(rloc.pri == RLPri_V128SpRel);
vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
vassert(addToSp >= 16);
- vassert(addToSp < 256); // ditto reason as for rloc.spOff
+ vassert(addToSp <= 256);
+ /* Both the stack delta and the offset must be at least 8-aligned.
+ If that isn't so, doHelperCall() has generated bad code. */
+ vassert(0 == (rloc.spOff % 8));
+ vassert(0 == (addToSp % 8));
HReg dst = lookupIRTemp(env, d->tmp);
HReg tmp = newVRegI(env);
- HReg r13 = hregARM_R13(); // sp
+ HReg sp = hregARM_R13();
addInstr(env, ARMInstr_Alu(ARMalu_ADD,
- tmp, r13, ARMRI84_I84(rloc.spOff,0)));
+ tmp, sp, ARMRI84_I84(rloc.spOff,0)));
ARMAModeN* am = mkARMAModeN_R(tmp);
+ /* This load could be done with its effective address 0 % 8,
+ because that's the best stack alignment that we can be
+ assured of. */
addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
- addInstr(env, ARMInstr_Alu(ARMalu_ADD,
- r13, r13, ARMRI84_I84(addToSp,0)));
+
+ ARMRI84* spAdj
+ = addToSp == 256 ? ARMRI84_I84(64, 15) // 64 `ror` (15 * 2)
+ : ARMRI84_I84(addToSp, 0);
+ addInstr(env, ARMInstr_Alu(ARMalu_ADD, sp, sp, spAdj));
return;
}
default:
|
|
From: <sv...@va...> - 2016-08-03 11:44:09
|
Author: sewardj
Date: Wed Aug 3 12:44:02 2016
New Revision: 15922
Log:
Add test cases for v8 crypto instructions in 32-bit mode. Is not yet connected
to the build/test system.
Added:
trunk/none/tests/arm/v8crypto.c
Added: trunk/none/tests/arm/v8crypto.c
==============================================================================
--- trunk/none/tests/arm/v8crypto.c (added)
+++ trunk/none/tests/arm/v8crypto.c Wed Aug 3 12:44:02 2016
@@ -0,0 +1,250 @@
+
+/*
+gcc -o v8crypto v8crypto.c -march=armv8-a -mfpu=crypto-neon-fp-armv8
+gcc -o v8crypto v8crypto.c -mfpu=crypto-neon-fp-armv8
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h> // memalign
+#include <string.h> // memset
+#include "tests/malloc.h"
+#include <math.h> // isnormal
+
+typedef unsigned char UChar;
+typedef unsigned short int UShort;
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef unsigned long long int ULong;
+typedef signed long long int Long;
+typedef double Double;
+typedef float Float;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+
+#define ITERS 1
+
+typedef
+ enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
+ LaneTy;
+
+union _V128 {
+ UChar u8[16];
+ UShort u16[8];
+ UInt u32[4];
+ ULong u64[2];
+ Float f32[4];
+ Double f64[2];
+};
+typedef union _V128 V128;
+
+static inline UChar randUChar ( void )
+{
+ static UInt seed = 80021;
+ seed = 1103515245 * seed + 12345;
+ return (seed >> 17) & 0xFF;
+}
+
+static ULong randULong ( LaneTy ty )
+{
+ Int i;
+ ULong r = 0;
+ for (i = 0; i < 8; i++) {
+ r = (r << 8) | (ULong)(0xFF & randUChar());
+ }
+ return r;
+}
+
+/* Generates a random V128. Ensures that that it contains normalised
+ FP numbers when viewed as either F32x4 or F64x2, so that it is
+ reasonable to use in FP test cases. */
+static void randV128 ( /*OUT*/V128* v, LaneTy ty )
+{
+ static UInt nCalls = 0, nIters = 0;
+ Int i;
+ nCalls++;
+ while (1) {
+ nIters++;
+ for (i = 0; i < 16; i++) {
+ v->u8[i] = randUChar();
+ }
+ if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
+ && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
+ break;
+ }
+ if (0 == (nCalls & 0xFF))
+ printf("randV128: %u calls, %u iters\n", nCalls, nIters);
+}
+
+static void showV128 ( V128* v )
+{
+ Int i;
+ for (i = 15; i >= 0; i--)
+ printf("%02x", (Int)v->u8[i]);
+}
+
+static void showBlock ( const char* msg, V128* block, Int nBlock )
+{
+ Int i;
+ printf("%s\n", msg);
+ for (i = 0; i < nBlock; i++) {
+ printf(" ");
+ showV128(&block[i]);
+ printf("\n");
+ }
+}
+
+
+/* ---------------------------------------------------------------- */
+/* -- Parameterisable test macros -- */
+/* ---------------------------------------------------------------- */
+
+#define DO50(_action) \
+ do { \
+ Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
+ } while (0)
+
+
+/* Generate a test that involves two vector regs,
+ with no bias as towards which is input or output.
+ It's OK to use r8 as scratch.*/
+#define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( LaneTy ty ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[4+1]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0], ty); \
+ randV128(&block[1], ty); \
+ randV128(&block[2], ty); \
+ randV128(&block[3], ty); \
+ __asm__ __volatile__( \
+ "mov r9, #0 ; vmsr fpscr, r9 ; " \
+ "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \
+ INSN " ; " \
+ "add r9, %0, #32 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #48 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \
+ "vmrs r9, fpscr ; str r9, [%0, #64] " \
+ : : "r"(&block[0]) \
+ : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "r8", "r9" \
+ ); \
+ printf(INSN " "); \
+ UInt fpscr = 0xFFFFFFFF & block[4].u32[0]; \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf(" "); \
+ showV128(&block[2]); printf(" "); \
+ showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \
+ } \
+ }
+
+
+/* Generate a test that involves three vector regs,
+ with no bias as towards which is input or output. It's also OK
+ to use r8 scratch. */
+#define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( LaneTy ty ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[6+1]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0], ty); \
+ randV128(&block[1], ty); \
+ randV128(&block[2], ty); \
+ randV128(&block[3], ty); \
+ randV128(&block[4], ty); \
+ randV128(&block[5], ty); \
+ __asm__ __volatile__( \
+ "mov r9, #0 ; vmsr fpscr, r9 ; " \
+ "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \
+ "add r9, %0, #32 ; vld1.8 { q"#VECREG3NO" }, [r9] ; " \
+ INSN " ; " \
+ "add r9, %0, #48 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \
+ "add r9, %0, #64 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \
+ "add r9, %0, #80 ; vst1.8 { q"#VECREG3NO" }, [r9] ; " \
+ "vmrs r9, fpscr ; str r9, [%0, #96] " \
+ : : "r"(&block[0]) \
+ : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "q"#VECREG3NO, \
+ "r8", "r9" \
+ ); \
+ printf(INSN " "); \
+ UInt fpscr = 0xFFFFFFFF & block[6].u32[0]; \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf(" "); \
+ showV128(&block[2]); printf(" "); \
+ showV128(&block[3]); printf(" "); \
+ showV128(&block[4]); printf(" "); \
+ showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \
+ } \
+ }
+
+// ======================== CRYPTO ========================
+
+GEN_TWOVEC_TEST(aesd_q_q, "aesd.8 q3, q4", 3, 4)
+GEN_TWOVEC_TEST(aese_q_q, "aese.8 q12, q13", 12, 13)
+GEN_TWOVEC_TEST(aesimc_q_q, "aesimc.8 q15, q0", 15, 0)
+GEN_TWOVEC_TEST(aesmc_q_q, "aesmc.8 q1, q9", 1, 9)
+
+GEN_THREEVEC_TEST(sha1c_q_q_q, "sha1c.32 q11, q10, q2", 11, 10, 2)
+GEN_TWOVEC_TEST(sha1h_q_q, "sha1h.32 q6, q7", 6, 7)
+GEN_THREEVEC_TEST(sha1m_q_q_q, "sha1m.32 q2, q8, q13", 2, 8, 13)
+GEN_THREEVEC_TEST(sha1p_q_q_q, "sha1p.32 q3, q9, q14", 3, 9, 14)
+GEN_THREEVEC_TEST(sha1su0_q_q_q, "sha1su0.32 q4, q10, q15", 4, 10, 15)
+GEN_TWOVEC_TEST(sha1su1_q_q, "sha1su1.32 q11, q2", 11, 2)
+
+GEN_THREEVEC_TEST(sha256h2_q_q_q, "sha256h2.32 q9, q8, q7", 9, 8, 7)
+GEN_THREEVEC_TEST(sha256h_q_q_q, "sha256h.32 q10, q9, q8", 10, 9, 8)
+GEN_TWOVEC_TEST(sha256su0_q_q, "sha256su0.32 q11, q10", 11, 10)
+GEN_THREEVEC_TEST(sha256su1_q_q_q, "sha256su1.32 q12, q11, q10", 12, 11, 10)
+
+// This is a bit complex.
+//GEN_THREEVEC_TEST(pmull_q_d_d, 1q, 1d, 1d)
+
+int main ( void )
+{
+ // ======================== CRYPTO ========================
+
+ // aesd.8 q_q (aes single round decryption)
+ // aese.8 q_q (aes single round encryption)
+ // aesimc.8 q_q (aes inverse mix columns)
+ // aesmc.8 q_q (aes mix columns)
+ if (1) DO50( test_aesd_q_q(TyNONE) );
+ if (1) DO50( test_aese_q_q(TyNONE) );
+ if (1) DO50( test_aesimc_q_q(TyNONE) );
+ if (1) DO50( test_aesmc_q_q(TyNONE) );
+
+#if 0
+ // sha1c.32 q_q_q
+ // sha1h.32 q_q
+ // sha1m.32 q_q_q
+ // sha1p.32 q_q_q
+ // sha1su0.32 q_q_q
+ // sha1su1.32 q_q
+ if (1) DO50( test_sha1c_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1h_q_q(TyNONE) );
+ if (1) DO50( test_sha1m_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1p_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1su0_q_q_q(TyNONE) );
+ if (1) DO50( test_sha1su1_q_q(TyNONE) );
+
+ // sha256h2.32 q_q_q
+ // sha256h.32 q_q_q
+ // sha256su0.32 q_q
+ // sha256su1.32 q_q_q
+ if (1) DO50( test_sha256h2_q_q_q(TyNONE) );
+ if (1) DO50( test_sha256h_q_q_q(TyNONE) );
+ if (1) DO50( test_sha256su0_q_q(TyNONE) );
+ if (1) DO50( test_sha256su1_q_q_q(TyNONE) );
+
+ // vmull.64 q_d_d
+ if (1) test_pmull_q_d_d(TyD);
+#endif
+ return 0;
+}
|
|
From: <sv...@va...> - 2016-08-03 11:41:30
|
Author: sewardj
Date: Wed Aug 3 12:41:24 2016
New Revision: 3234
Log:
Add infrastructure for detection of 32-bit ARMv8 capable CPUs (VEX side).
Modified:
trunk/priv/main_main.c
Modified: trunk/priv/main_main.c
==============================================================================
--- trunk/priv/main_main.c (original)
+++ trunk/priv/main_main.c Wed Aug 3 12:41:24 2016
@@ -1933,8 +1933,8 @@
case VexArchARM: {
Bool NEON = ((hwcaps & VEX_HWCAPS_ARM_NEON) != 0);
+ Bool VFP3 = ((hwcaps & VEX_HWCAPS_ARM_VFP3) != 0);
UInt level = VEX_ARM_ARCHLEVEL(hwcaps);
-
switch (level) {
case 5:
if (NEON)
@@ -1948,6 +1948,11 @@
return;
case 7:
return;
+ case 8:
+ if (!NEON || !VFP3)
+ invalid_hwcaps(arch, hwcaps,
+ "NEON and VFP3 are required for ARMv8.\n");
+ return;
default:
invalid_hwcaps(arch, hwcaps,
"ARM architecture level is not supported.\n");
|
|
From: <sv...@va...> - 2016-08-03 11:40:45
|
Author: sewardj
Date: Wed Aug 3 12:40:36 2016
New Revision: 15921
Log:
Add infrastructure for detection of 32-bit ARMv8 capable CPUs (Valgrind side).
Modified:
trunk/coregrind/m_initimg/initimg-linux.c
trunk/coregrind/m_machine.c
Modified: trunk/coregrind/m_initimg/initimg-linux.c
==============================================================================
--- trunk/coregrind/m_initimg/initimg-linux.c (original)
+++ trunk/coregrind/m_initimg/initimg-linux.c Wed Aug 3 12:40:36 2016
@@ -691,8 +691,9 @@
"ARM has-neon from-auxv: %s\n",
has_neon ? "YES" : "NO");
VG_(machine_arm_set_has_NEON)( has_neon );
- #define VKI_HWCAP_TLS 32768
+# define VKI_HWCAP_TLS 32768
Bool has_tls = (auxv->u.a_val & VKI_HWCAP_TLS) > 0;
+# undef VKI_HWCAP_TLS
VG_(debugLog)(2, "initimg",
"ARM has-tls from-auxv: %s\n",
has_tls ? "YES" : "NO");
Modified: trunk/coregrind/m_machine.c
==============================================================================
--- trunk/coregrind/m_machine.c (original)
+++ trunk/coregrind/m_machine.c Wed Aug 3 12:40:36 2016
@@ -1523,7 +1523,7 @@
vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
- volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
+ volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON, have_V8;
volatile Int archlevel;
Int r;
@@ -1602,6 +1602,19 @@
}
}
+ /* ARMv8 insns */
+ have_V8 = True;
+ if (archlevel == 7) {
+ if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
+ have_V8 = False;
+ } else {
+ __asm__ __volatile__(".word 0xF3044F54"); /* VMAXNM.F32 q2,q2,q2 */
+ }
+ if (have_V8 && have_NEON && have_VFP3) {
+ archlevel = 8;
+ }
+ }
+
VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
|