You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
|
1
(7) |
|
2
(13) |
3
(14) |
4
(11) |
5
(10) |
6
|
7
(5) |
8
(12) |
|
9
(14) |
10
(19) |
11
(16) |
12
(13) |
13
(11) |
14
(4) |
15
(5) |
|
16
(11) |
17
(10) |
18
(4) |
19
(15) |
20
(12) |
21
(11) |
22
|
|
23
|
24
|
25
|
26
(1) |
27
(6) |
28
(10) |
29
(10) |
|
30
(1) |
31
|
|
|
|
|
|
|
From: <sv...@va...> - 2014-03-01 11:28:16
|
Author: sewardj
Date: Sat Mar 1 11:27:59 2014
New Revision: 13847
Log:
Update; document missing test cases.
Modified:
trunk/none/tests/arm64/test_arm64_int.c
Modified: trunk/none/tests/arm64/test_arm64_int.c
==============================================================================
--- trunk/none/tests/arm64/test_arm64_int.c (original)
+++ trunk/none/tests/arm64/test_arm64_int.c Sat Mar 1 11:27:59 2014
@@ -1,11 +1,13 @@
-// To compile:
-// aarch64-linux-gnu-gcc -Wall -g -O0 -o test_arm64_int test_arm64_int.c
-
-// The ubfm/sbfm/bfm tests are huge and take ages to compile and run.
-// Set TEST_BFM to 0 to skip them.
-#define TEST_BFM 1
-
+/* To compile:
+ aarch64-linux-gnu-gcc -Wall -g -O0 -o test_arm64_int test_arm64_int.c \
+ -DTEST_BFM=1 # (or 0)
+ The ubfm/sbfm/bfm tests are huge and take ages to compile and run.
+ Set TEST_BFM to 0 to skip them.
+*/
+#ifndef TEST_BFM
+# define TEST_BFM 1
+#endif
#include <stdio.h>
@@ -122,7 +124,8 @@
}
// Same as TESTINST2 except it doesn't print the RN value, since
-// that may differ between runs (it's a stack address).
+// that may differ between runs (it's a stack address). Also,
+// claim it trashes x28 so that can be used as scratch if needed.
#define TESTINST2_hide2(instruction, RNval, RD, RN, carryin) \
{ \
ULong out; \
@@ -136,7 +139,7 @@
"mrs %1,nzcv;" \
: "=&r" (out), "=&r" (nzcv_out) \
: "r" (RNval), "r" (nzcv_in) \
- : #RD, #RN, "cc", "memory" \
+ : #RD, #RN, "cc", "memory", "x28" \
); \
printf("%s :: rd %016llx rn (hidden), " \
"cin %d, nzcv %08llx %c%c%c%c\n", \
@@ -350,11 +353,9 @@
TESTINST2("subs x3, x4, #0xD87, lsl #0", 0x8000000000000d87, x3, x4, 0);
TESTINST2("subs x3, x4, #0xD87, lsl #0", 0x8000000000000d88, x3, x4, 0);
-#if 0
////////////////////////////////////////////////////////////////
-printf("ADR/ADRP\n");
-TESTINST1("adrp x27, #0x987", x27, 0);
-#endif
+printf("ADR/ADRP MISSING (results are PC dependant)\n");
+//TESTINST1("adrp x27, #0x987", x27, 0);
////////////////////////////////////////////////////////////////
printf("AND(imm)\n");
@@ -10276,6 +10277,18 @@
////////////////////////////////////////////////////////////////
+printf("RBIT\n");
+
+TESTINST2("rbit x11,x23", 0xfd79baaee550b488, x11,x23,0);
+TESTINST2("rbit x11,x23", 0xe861540945421773, x11,x23,0);
+TESTINST2("rbit x11,x23", 0x9a1140d0fd1dbf6c, x11,x23,0);
+
+TESTINST2("rbit w11,w23", 0xfd79baaee550b488, x11,x23,0);
+TESTINST2("rbit w11,w23", 0xe861540945421773, x11,x23,0);
+TESTINST2("rbit w11,w23", 0x9a1140d0fd1dbf6c, x11,x23,0);
+
+
+////////////////////////////////////////////////////////////////
printf("CLZ\n");
TESTINST2("clz x17, x22", 0xFFFFFFFFFFFFFFFFULL, x17, x22, 0);
@@ -10512,26 +10525,54 @@
////////////////////////////////////////////////////////////////
-printf("LDR,STR (immediate, uimm12)");
+printf("LDR,STR (immediate, uimm12) (STR cases are MISSING)");
TESTINST2_hide2("ldr x21, [x22, #24]", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldr w21, [x22, #20]", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldrh w21, [x22, #44]", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldrb w21, [x22, #56]", AREA_MID, x21,x22,0);
////////////////////////////////////////////////////////////////
-printf("LDUR,STUR (immediate, simm9) (wb is unchecked)\n");
+printf("LDUR,STUR (immediate, simm9) (STR cases and wb check are MISSING)\n");
TESTINST2_hide2("ldr x21, [x22], #-24", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldr x21, [x22, #-40]!", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldr x21, [x22, #-48]", AREA_MID, x21,x22,0);
+printf("LDUR,STUR (immediate, simm9): STR cases are MISSING");
////////////////////////////////////////////////////////////////
-printf("LDP,STP (immediate, simm7)\n");
+// TESTINST2_hide2 allows use of x28 as scratch
+printf("LDP,STP (immediate, simm7) (STR cases and wb check is MISSING)\n");
+
+TESTINST2_hide2("ldp x21, x28, [x22], #-24 ; add x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp x21, x28, [x22], #-24 ; eor x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp x21, x28, [x22, #-40]! ; add x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp x21, x28, [x22, #-40]! ; eor x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp x21, x28, [x22, #-40] ; add x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp x21, x28, [x22, #-40] ; eor x21,x21,x28", AREA_MID, x21,x22,0);
+
+TESTINST2_hide2("ldp w21, w28, [x22], #-24 ; add x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp w21, w28, [x22], #-24 ; eor x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp w21, w28, [x22, #-40]! ; add x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp w21, w28, [x22, #-40]! ; eor x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp w21, w28, [x22, #-40] ; add x21,x21,x28", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldp w21, w28, [x22, #-40] ; eor x21,x21,x28", AREA_MID, x21,x22,0);
+
+////////////////////////////////////////////////////////////////
+// This is a bit tricky. We load the value from just before and
+// just after the actual instruction. Because TESTINSN2_hide2
+// generates two fixed insns either side of the test insn, these
+// should be constant and hence "safe" to check.
-////////////////////////////////////////////////////////////////
printf("LDR (literal, int reg)\n");
+TESTINST2_hide2("xyzzy00: ldr x21, xyzzy00 - 8", AREA_MID, x21,x22,0);
+TESTINST2_hide2("xyzzy01: ldr x21, xyzzy01 + 0", AREA_MID, x21,x22,0);
+TESTINST2_hide2("xyzzy02: ldr x21, xyzzy02 + 8", AREA_MID, x21,x22,0);
+
+TESTINST2_hide2("xyzzy03: ldr x21, xyzzy03 - 4", AREA_MID, x21,x22,0);
+TESTINST2_hide2("xyzzy04: ldr x21, xyzzy04 + 0", AREA_MID, x21,x22,0);
+TESTINST2_hide2("xyzzy05: ldr x21, xyzzy05 + 4", AREA_MID, x21,x22,0);
////////////////////////////////////////////////////////////////
-printf("{LD,ST}R (integer register)\n");
+printf("{LD,ST}R (integer register) (entirely MISSING)\n");
////////////////////////////////////////////////////////////////
printf("LDRS{B,H,W} (uimm12)\n");
@@ -10542,7 +10583,7 @@
TESTINST2_hide2("ldrsb w21, [x22, #56]", AREA_MID, x21,x22,0);
////////////////////////////////////////////////////////////////
-printf("LDRS{B,H,W} (simm9, upd)\n");
+printf("LDRS{B,H,W} (simm9, upd) (upd check is MISSING)\n");
TESTINST2_hide2("ldrsw x21, [x22, #-24]!", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldrsh x21, [x22, #-20]!", AREA_MID, x21,x22,0);
TESTINST2_hide2("ldrsh w21, [x22, #-44]!", AREA_MID, x21,x22,0);
@@ -10564,6 +10605,12 @@
TESTINST2_hide2("ldrsb w21, [x22, #-56]", AREA_MID, x21,x22,0);
////////////////////////////////////////////////////////////////
+printf("LDP,STP (immediate, simm7) (FP&VEC) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("{LD,ST}R (vector register) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
printf("LDRS{B,H,W} (integer register, SX)\n");
TESTINST3_hide2and3("ldrsw x21, [x22,x23]", AREA_MID, 5, x21,x22,x23,0);
@@ -10601,9 +10648,41 @@
TESTINST3_hide2and3("ldrsb w21, [x22,w23,sxtw #0]", AREA_MID, -5ULL, x21,x22,x23,0);
TESTINST3_hide2and3("ldrsb w21, [x22,w23,sxtw #0]", AREA_MID, -5ULL, x21,x22,x23,0);
+////////////////////////////////////////////////////////////////
+printf("LDR/STR (immediate, SIMD&FP, unsigned offset) (entirely MISSING)\n");
////////////////////////////////////////////////////////////////
-printf("LD{A}XR\n");
+printf("LDR/STR (immediate, SIMD&FP, pre/post index) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("LDUR/STUR (unscaled offset, SIMD&FP) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("LDR (literal, SIMD&FP) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("LD1/ST1 (single structure, no offset) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("LD1/ST1 (single structure, post index) (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("LD{,A}X{R,RH,RB} (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("ST{,L}X{R,RH,RB} (entirely MISSING)\n");
+
+////////////////////////////////////////////////////////////////
+printf("LDA{R,RH,RB}\n");
+TESTINST2_hide2("ldar x21, [x22]", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldar w21, [x22]", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldarh w21, [x22]", AREA_MID, x21,x22,0);
+TESTINST2_hide2("ldarb w21, [x22]", AREA_MID, x21,x22,0);
+
+////////////////////////////////////////////////////////////////
+printf("STL{R,RH,RB} (entirely MISSING)\n");
+
+
return 0;
}
|
|
From: <sv...@va...> - 2014-03-01 11:27:37
|
Author: sewardj
Date: Sat Mar 1 11:27:18 2014
New Revision: 13846
Log:
Add test cases for SIMD and FP instructions.
Added:
trunk/none/tests/arm64/test_arm64_fp_and_simd.c
Added: trunk/none/tests/arm64/test_arm64_fp_and_simd.c
==============================================================================
--- trunk/none/tests/arm64/test_arm64_fp_and_simd.c (added)
+++ trunk/none/tests/arm64/test_arm64_fp_and_simd.c Sat Mar 1 11:27:18 2014
@@ -0,0 +1,1133 @@
+
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h> // memalign
+#include <string.h> // memset
+
+typedef unsigned char UChar;
+typedef unsigned short int UShort;
+typedef unsigned int UInt;
+typedef signed int Int;
+typedef unsigned char UChar;
+typedef unsigned long long int ULong;
+
+typedef unsigned char Bool;
+#define False ((Bool)0)
+#define True ((Bool)1)
+
+
+union _V128 {
+ UChar b[16];
+ UShort h[8];
+ UInt i[4];
+ ULong d[2];
+};
+typedef union _V128 V128;
+
+static UChar randUChar ( void )
+{
+ static UInt seed = 80021;
+ seed = 1103515245 * seed + 12345;
+ return (seed >> 17) & 0xFF;
+}
+
+static ULong randULong ( void )
+{
+ Int i;
+ ULong r = 0;
+ for (i = 0; i < 8; i++) {
+ r = (r << 8) | (ULong)(0xFF & randUChar());
+ }
+ return r;
+}
+
+static void randV128 ( V128* v )
+{
+ Int i;
+ for (i = 0; i < 16; i++)
+ v->b[i] = randUChar();
+}
+
+static void showV128 ( V128* v )
+{
+ Int i;
+ for (i = 15; i >= 0; i--)
+ printf("%02x", (Int)v->b[i]);
+}
+
+__attribute__((unused))
+static void* memalign16(size_t szB)
+{
+ void* x;
+ x = memalign(16, szB);
+ assert(x);
+ assert(0 == ((16-1) & (unsigned long)x));
+ return x;
+}
+
+
+void test_UMINV ( void )
+{
+ int i;
+ V128 block[2];
+
+ /* -- 4s -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "uminv s8, v7.4s ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMINV v8, v7.4s ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "uminv h8, v7.8h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMINV h8, v7.8h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 4h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "uminv h8, v7.4h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMINV h8, v7.4h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 16b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "uminv b8, v7.16b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMINV b8, v7.16b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "uminv b8, v7.8b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMINV b8, v7.8b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+}
+
+
+void test_UMAXV ( void )
+{
+ int i;
+ V128 block[2];
+
+ /* -- 4s -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "umaxv s8, v7.4s ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMAXV v8, v7.4s ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "umaxv h8, v7.8h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMAXV h8, v7.8h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 4h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "umaxv h8, v7.4h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMAXV h8, v7.4h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 16b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "umaxv b8, v7.16b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMAXV b8, v7.16b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "umaxv b8, v7.8b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("UMAXV b8, v7.8b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+}
+
+
+void test_INS_general ( void )
+{
+ V128 block[3];
+
+ /* -- D[0..1] -- */
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.d[0], x19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.d[0],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.d[1], x19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.d[1],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ /* -- S[0..3] -- */
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.s[0], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.s[0],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.s[1], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.s[1],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.s[2], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.s[2],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.s[3], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.s[3],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ /* -- H[0..7] -- */
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[0], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[0],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[1], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[1],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[2], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[2],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[3], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[3],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[4], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[4],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[5], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[5],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[6], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[6],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.h[7], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.h[7],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ /* -- B[0,15] -- */
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.b[0], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.b[0],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+
+ memset(&block, 0x55, sizeof(block));
+ block[1].d[0] = randULong();
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "ldr x19, [%0, #16] ; "
+ "ins v7.b[15], w19 ; "
+ "str q7, [%0, #32] "
+ : : "r"(&block[0]) : "memory", "x19", "v7"
+ );
+ printf("INS v7.b[15],x19 ");
+ showV128(&block[0]); printf(" %016llx ", block[1].d[0]);
+ showV128(&block[2]); printf("\n");
+}
+
+
+
+void test_SMINV ( void )
+{
+ int i;
+ V128 block[2];
+
+ /* -- 4s -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "sminv s8, v7.4s ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMINV v8, v7.4s ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "sminv h8, v7.8h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMINV h8, v7.8h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 4h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "sminv h8, v7.4h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMINV h8, v7.4h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 16b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "sminv b8, v7.16b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMINV b8, v7.16b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "sminv b8, v7.8b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMINV b8, v7.8b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+}
+
+
+void test_SMAXV ( void )
+{
+ int i;
+ V128 block[2];
+
+ /* -- 4s -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "smaxv s8, v7.4s ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMAXV v8, v7.4s ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "smaxv h8, v7.8h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMAXV h8, v7.8h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 4h -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "smaxv h8, v7.4h ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMAXV h8, v7.4h ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 16b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "smaxv b8, v7.16b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMAXV b8, v7.16b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+ /* -- 8b -- */
+
+ for (i = 0; i < 10; i++) {
+ memset(&block, 0x55, sizeof(block));
+ randV128(&block[0]);
+ __asm__ __volatile__(
+ "ldr q7, [%0, #0] ; "
+ "smaxv b8, v7.8b ; "
+ "str q8, [%0, #16] "
+ : : "r"(&block[0]) : "memory", "v7", "v8"
+ );
+ printf("SMAXV b8, v7.8b ");
+ showV128(&block[0]); printf(" ");
+ showV128(&block[1]); printf("\n");
+ }
+
+}
+
+#define ITERS 100
+
+/* Note this also sets the destination register to a known value (0x55..55)
+ since it can sometimes be an input to the instruction too. */
+#define GEN_BINARY_TEST(INSN,SUFFIX) \
+ __attribute__((noinline)) \
+ static void test_##INSN##_##SUFFIX ( void ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[3]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0]); \
+ randV128(&block[1]); \
+ __asm__ __volatile__( \
+ "ldr q7, [%0, #0] ; " \
+ "ldr q8, [%0, #16] ; " \
+ "ldr q9, [%0, #32] ; " \
+ #INSN " v9." #SUFFIX ", v7." #SUFFIX ", v8." #SUFFIX " ; " \
+ "str q9, [%0, #32] " \
+ : : "r"(&block[0]) : "memory", "v7", "v8", "v9" \
+ ); \
+ printf(#INSN " v9." #SUFFIX ", v7." #SUFFIX ", v8." #SUFFIX " "); \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf(" "); \
+ showV128(&block[2]); printf("\n"); \
+ } \
+ }
+
+
+/* Note this also sets the destination register to a known value (0x55..55)
+ since it can sometimes be an input to the instruction too. */
+#define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
+ __attribute__((noinline)) \
+ static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( void ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[2]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0]); \
+ __asm__ __volatile__( \
+ "ldr q7, [%0, #0] ; " \
+ "ldr q8, [%0, #16] ; " \
+ #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
+ "str q8, [%0, #16] " \
+ : : "r"(&block[0]) : "memory", "v7", "v8" \
+ ); \
+ printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " "); \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf("\n"); \
+ } \
+ }
+
+/* Note this also sets the destination register to a known value (0x55..55)
+ since it can sometimes be an input to the instruction too. */
+#define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
+ __attribute__((noinline)) \
+ static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( void ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[2]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0]); \
+ __asm__ __volatile__( \
+ "ldr q7, [%0, #0] ; " \
+ "ldr q8, [%0, #16] ; " \
+ #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
+ "str q8, [%0, #16] " \
+ : : "r"(&block[0]) : "memory", "v7", "v8" \
+ ); \
+ printf(#INSN " v8." #SUFFIXD ", v7." #SUFFIXN); \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf("\n"); \
+ } \
+ }
+
+
+GEN_BINARY_TEST(umax, 4s)
+GEN_BINARY_TEST(umax, 8h)
+GEN_BINARY_TEST(umax, 4h)
+GEN_BINARY_TEST(umax, 16b)
+GEN_BINARY_TEST(umax, 8b)
+
+GEN_BINARY_TEST(umin, 4s)
+GEN_BINARY_TEST(umin, 8h)
+GEN_BINARY_TEST(umin, 4h)
+GEN_BINARY_TEST(umin, 16b)
+GEN_BINARY_TEST(umin, 8b)
+
+GEN_BINARY_TEST(smax, 4s)
+GEN_BINARY_TEST(smax, 8h)
+GEN_BINARY_TEST(smax, 4h)
+GEN_BINARY_TEST(smax, 16b)
+GEN_BINARY_TEST(smax, 8b)
+
+GEN_BINARY_TEST(smin, 4s)
+GEN_BINARY_TEST(smin, 8h)
+GEN_BINARY_TEST(smin, 4h)
+GEN_BINARY_TEST(smin, 16b)
+GEN_BINARY_TEST(smin, 8b)
+
+GEN_BINARY_TEST(add, 2d)
+GEN_BINARY_TEST(add, 4s)
+GEN_BINARY_TEST(add, 2s)
+GEN_BINARY_TEST(add, 8h)
+GEN_BINARY_TEST(add, 4h)
+GEN_BINARY_TEST(add, 16b)
+GEN_BINARY_TEST(add, 8b)
+
+GEN_BINARY_TEST(sub, 2d)
+GEN_BINARY_TEST(sub, 4s)
+GEN_BINARY_TEST(sub, 2s)
+GEN_BINARY_TEST(sub, 8h)
+GEN_BINARY_TEST(sub, 4h)
+GEN_BINARY_TEST(sub, 16b)
+GEN_BINARY_TEST(sub, 8b)
+
+GEN_BINARY_TEST(mul, 4s)
+GEN_BINARY_TEST(mul, 2s)
+GEN_BINARY_TEST(mul, 8h)
+GEN_BINARY_TEST(mul, 4h)
+GEN_BINARY_TEST(mul, 16b)
+GEN_BINARY_TEST(mul, 8b)
+
+GEN_BINARY_TEST(mla, 4s)
+GEN_BINARY_TEST(mla, 2s)
+GEN_BINARY_TEST(mla, 8h)
+GEN_BINARY_TEST(mla, 4h)
+GEN_BINARY_TEST(mla, 16b)
+GEN_BINARY_TEST(mla, 8b)
+
+GEN_BINARY_TEST(mls, 4s)
+GEN_BINARY_TEST(mls, 2s)
+GEN_BINARY_TEST(mls, 8h)
+GEN_BINARY_TEST(mls, 4h)
+GEN_BINARY_TEST(mls, 16b)
+GEN_BINARY_TEST(mls, 8b)
+
+GEN_BINARY_TEST(and, 16b)
+GEN_BINARY_TEST(and, 8b)
+
+GEN_BINARY_TEST(bic, 16b)
+GEN_BINARY_TEST(bic, 8b)
+
+GEN_BINARY_TEST(orr, 16b)
+GEN_BINARY_TEST(orr, 8b)
+
+GEN_BINARY_TEST(orn, 16b)
+GEN_BINARY_TEST(orn, 8b)
+
+GEN_BINARY_TEST(eor, 16b)
+GEN_BINARY_TEST(eor, 8b)
+
+GEN_BINARY_TEST(bsl, 16b)
+GEN_BINARY_TEST(bsl, 8b)
+
+GEN_BINARY_TEST(bit, 16b)
+GEN_BINARY_TEST(bit, 8b)
+
+GEN_BINARY_TEST(bif, 16b)
+GEN_BINARY_TEST(bif, 8b)
+
+GEN_BINARY_TEST(cmeq, 2d)
+GEN_BINARY_TEST(cmeq, 4s)
+GEN_BINARY_TEST(cmeq, 2s)
+GEN_BINARY_TEST(cmeq, 8h)
+GEN_BINARY_TEST(cmeq, 4h)
+GEN_BINARY_TEST(cmeq, 16b)
+GEN_BINARY_TEST(cmeq, 8b)
+
+GEN_BINARY_TEST(cmtst, 2d)
+GEN_BINARY_TEST(cmtst, 4s)
+GEN_BINARY_TEST(cmtst, 2s)
+GEN_BINARY_TEST(cmtst, 8h)
+GEN_BINARY_TEST(cmtst, 4h)
+GEN_BINARY_TEST(cmtst, 16b)
+GEN_BINARY_TEST(cmtst, 8b)
+
+GEN_BINARY_TEST(cmhi, 2d)
+GEN_BINARY_TEST(cmhi, 4s)
+GEN_BINARY_TEST(cmhi, 2s)
+GEN_BINARY_TEST(cmhi, 8h)
+GEN_BINARY_TEST(cmhi, 4h)
+GEN_BINARY_TEST(cmhi, 16b)
+GEN_BINARY_TEST(cmhi, 8b)
+
+GEN_BINARY_TEST(cmgt, 2d)
+GEN_BINARY_TEST(cmgt, 4s)
+GEN_BINARY_TEST(cmgt, 2s)
+GEN_BINARY_TEST(cmgt, 8h)
+GEN_BINARY_TEST(cmgt, 4h)
+GEN_BINARY_TEST(cmgt, 16b)
+GEN_BINARY_TEST(cmgt, 8b)
+
+GEN_BINARY_TEST(cmhs, 2d)
+GEN_BINARY_TEST(cmhs, 4s)
+GEN_BINARY_TEST(cmhs, 2s)
+GEN_BINARY_TEST(cmhs, 8h)
+GEN_BINARY_TEST(cmhs, 4h)
+GEN_BINARY_TEST(cmhs, 16b)
+GEN_BINARY_TEST(cmhs, 8b)
+
+GEN_BINARY_TEST(cmge, 2d)
+GEN_BINARY_TEST(cmge, 4s)
+GEN_BINARY_TEST(cmge, 2s)
+GEN_BINARY_TEST(cmge, 8h)
+GEN_BINARY_TEST(cmge, 4h)
+GEN_BINARY_TEST(cmge, 16b)
+GEN_BINARY_TEST(cmge, 8b)
+
+GEN_SHIFT_TEST(ushr, 2d, 2d, 1)
+GEN_SHIFT_TEST(ushr, 2d, 2d, 13)
+GEN_SHIFT_TEST(ushr, 2d, 2d, 63)
+GEN_SHIFT_TEST(sshr, 2d, 2d, 1)
+GEN_SHIFT_TEST(sshr, 2d, 2d, 13)
+GEN_SHIFT_TEST(sshr, 2d, 2d, 63)
+
+GEN_SHIFT_TEST(ushr, 4s, 4s, 1)
+GEN_SHIFT_TEST(ushr, 4s, 4s, 13)
+GEN_SHIFT_TEST(ushr, 4s, 4s, 31)
+GEN_SHIFT_TEST(sshr, 4s, 4s, 1)
+GEN_SHIFT_TEST(sshr, 4s, 4s, 13)
+GEN_SHIFT_TEST(sshr, 4s, 4s, 31)
+
+GEN_SHIFT_TEST(ushr, 2s, 2s, 1)
+GEN_SHIFT_TEST(ushr, 2s, 2s, 13)
+GEN_SHIFT_TEST(ushr, 2s, 2s, 31)
+GEN_SHIFT_TEST(sshr, 2s, 2s, 1)
+GEN_SHIFT_TEST(sshr, 2s, 2s, 13)
+GEN_SHIFT_TEST(sshr, 2s, 2s, 31)
+
+GEN_SHIFT_TEST(ushr, 8h, 8h, 1)
+GEN_SHIFT_TEST(ushr, 8h, 8h, 13)
+GEN_SHIFT_TEST(ushr, 8h, 8h, 15)
+GEN_SHIFT_TEST(sshr, 8h, 8h, 1)
+GEN_SHIFT_TEST(sshr, 8h, 8h, 13)
+GEN_SHIFT_TEST(sshr, 8h, 8h, 15)
+
+GEN_SHIFT_TEST(ushr, 4h, 4h, 1)
+GEN_SHIFT_TEST(ushr, 4h, 4h, 13)
+GEN_SHIFT_TEST(ushr, 4h, 4h, 15)
+GEN_SHIFT_TEST(sshr, 4h, 4h, 1)
+GEN_SHIFT_TEST(sshr, 4h, 4h, 13)
+GEN_SHIFT_TEST(sshr, 4h, 4h, 15)
+
+GEN_SHIFT_TEST(ushr, 16b, 16b, 1)
+GEN_SHIFT_TEST(ushr, 16b, 16b, 7)
+GEN_SHIFT_TEST(sshr, 16b, 16b, 1)
+GEN_SHIFT_TEST(sshr, 16b, 16b, 7)
+
+GEN_SHIFT_TEST(ushr, 8b, 8b, 1)
+GEN_SHIFT_TEST(ushr, 8b, 8b, 7)
+GEN_SHIFT_TEST(sshr, 8b, 8b, 1)
+GEN_SHIFT_TEST(sshr, 8b, 8b, 7)
+
+GEN_SHIFT_TEST(ushll, 2d, 2s, 0)
+GEN_SHIFT_TEST(ushll, 2d, 2s, 15)
+GEN_SHIFT_TEST(ushll, 2d, 2s, 31)
+GEN_SHIFT_TEST(ushll2, 2d, 4s, 0)
+GEN_SHIFT_TEST(ushll2, 2d, 4s, 15)
+GEN_SHIFT_TEST(ushll2, 2d, 4s, 31)
+
+GEN_SHIFT_TEST(sshll, 2d, 2s, 0)
+GEN_SHIFT_TEST(sshll, 2d, 2s, 15)
+GEN_SHIFT_TEST(sshll, 2d, 2s, 31)
+GEN_SHIFT_TEST(sshll2, 2d, 4s, 0)
+GEN_SHIFT_TEST(sshll2, 2d, 4s, 15)
+GEN_SHIFT_TEST(sshll2, 2d, 4s, 31)
+
+GEN_UNARY_TEST(xtn, 2s, 2d)
+GEN_UNARY_TEST(xtn2, 4s, 2d)
+GEN_UNARY_TEST(xtn, 4h, 4s)
+GEN_UNARY_TEST(xtn2, 8h, 4s)
+
+int main ( void )
+{
+ assert(sizeof(V128) == 16);
+
+ printf("FMOV (general) MISSING\n");
+ printf("FMOV (scalar, immediate) MISSING\n");
+ printf("{FMOV,MOVI} (vector, immediate) MISSING\n");
+ printf("{S,U}CVTF (scalar, integer) MISSING\n");
+ printf("F{ADD,SUB,MUL,DIV,NMUL} (scalar) MISSING\n");
+ printf("F{MOV,ABS,NEG,SQRT} D/D or S/S MISSING\n");
+ printf("F{ABS,NEG} (vector) MISSING\n");
+ printf("FCMP,FCMPE MISSING\n");
+ printf("F{N}M{ADD,SUB} MISSING\n");
+ printf("FCVT{N,P,M,Z}{S,U} (scalar, integer) MISSING\n");
+ printf("FRINT{I,M,P,Z} (scalar) MISSING\n");
+ printf("FCVT (scalar) MISSING\n");
+ printf("FABD (scalar) MISSING\n");
+ printf("{S,U}CVTF (vector, integer) MISSING\n");
+ printf("F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) MISSING\n");
+
+ printf("ADD/SUB (vector) MISSING\n");
+ test_add_2d();
+ test_add_4s();
+ test_add_2s();
+ test_add_8h();
+ test_add_4h();
+ //test_add_16b();
+ //test_add_8b();
+ test_sub_2d();
+ test_sub_4s();
+ test_sub_2s();
+ test_sub_8h();
+ test_sub_4h();
+ //test_sub_16b();
+ //test_sub_8b();
+
+ printf("ADD/SUB (scalar) MISSING\n");
+
+ test_mul_4s();
+ test_mul_2s();
+ test_mul_8h();
+ test_mul_4h();
+ //test_mul_16b();
+ //test_mul_8b();
+ test_mla_4s();
+ test_mla_2s();
+ test_mla_8h();
+ test_mla_4h();
+ //test_mla_16b();
+ //test_mla_8b();
+ test_mls_4s();
+ test_mls_2s();
+ test_mls_8h();
+ test_mls_4h();
+ //test_mls_16b();
+ //test_mls_8b();
+ printf("MUL/PMUL/MLA/MLS (vector) (partly MISSING)\n");
+
+ test_umax_4s();
+ test_umax_8h();
+ test_umax_4h();
+ test_umax_16b();
+ test_umax_8b();
+ test_umin_4s();
+ test_umin_8h();
+ test_umin_4h();
+ test_umin_16b();
+ test_umin_8b();
+ test_smax_4s();
+ test_smax_8h();
+ test_smax_4h();
+ test_smax_16b();
+ test_smax_8b();
+ test_smin_4s();
+ test_smin_8h();
+ test_smin_4h();
+ test_smin_16b();
+ test_smin_8b();
+
+ test_UMINV();
+ test_UMAXV();
+ test_SMINV();
+ test_SMAXV();
+
+ test_and_16b();
+ test_and_8b();
+ test_bic_16b();
+ test_bic_8b();
+ test_orr_16b();
+ test_orr_8b();
+ test_orn_16b();
+ test_orn_8b();
+
+ test_cmeq_2d();
+#if 0
+ test_cmeq_4s();
+ test_cmeq_2s();
+ test_cmeq_8h();
+ test_cmeq_4h();
+ test_cmeq_16b();
+ test_cmeq_8b();
+ test_cmtst_2d();
+ test_cmtst_4s();
+ test_cmtst_2s();
+ test_cmtst_8h();
+ test_cmtst_4h();
+ test_cmtst_16b();
+ test_cmtst_8b();
+ test_cmhi_2d();
+ test_cmhi_4s();
+ test_cmhi_2s();
+ test_cmhi_8h();
+ test_cmhi_4h();
+ test_cmhi_16b();
+ test_cmhi_8b();
+ test_cmgt_2d();
+ test_cmgt_4s();
+ test_cmgt_2s();
+ test_cmgt_8h();
+ test_cmgt_4h();
+ test_cmgt_16b();
+ test_cmgt_8b();
+ test_cmhs_2d();
+ test_cmhs_4s();
+ test_cmhs_2s();
+ test_cmhs_8h();
+ test_cmhs_4h();
+ test_cmhs_16b();
+ test_cmhs_8b();
+ test_cmge_2d();
+ test_cmge_4s();
+ test_cmge_2s();
+ test_cmge_8h();
+ test_cmge_4h();
+ test_cmge_16b();
+ test_cmge_8b();
+#endif
+ printf("CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) (w/zero cases MISSING)\n");
+
+ test_eor_16b();
+ test_eor_8b();
+ test_bsl_16b();
+ test_bsl_8b();
+ test_bit_16b();
+ test_bit_8b();
+ test_bif_16b();
+ test_bif_8b();
+
+ test_ushr_2d_2d_1();
+ test_ushr_2d_2d_13();
+ test_ushr_2d_2d_63();
+ test_sshr_2d_2d_1();
+ test_sshr_2d_2d_13();
+ test_sshr_2d_2d_63();
+#if 0
+ test_ushr_4s_4s_1();
+ test_ushr_4s_4s_13();
+ test_ushr_4s_4s_31();
+ test_sshr_4s_4s_1();
+ test_sshr_4s_4s_13();
+ test_sshr_4s_4s_31();
+ test_ushr_2s_2s_1();
+ test_ushr_2s_2s_13();
+ test_ushr_2s_2s_31();
+ test_sshr_2s_2s_1();
+ test_sshr_2s_2s_13();
+ test_sshr_2s_2s_31();
+ test_ushr_8h_8h_1();
+ test_ushr_8h_8h_13();
+ test_ushr_8h_8h_15();
+ test_sshr_8h_8h_1();
+ test_sshr_8h_8h_13();
+ test_sshr_8h_8h_15();
+ test_ushr_4h_4h_1();
+ test_ushr_4h_4h_13();
+ test_ushr_4h_4h_15();
+ test_sshr_4h_4h_1();
+ test_sshr_4h_4h_13();
+ test_sshr_4h_4h_15();
+ test_ushr_16b_16b_1();
+ test_ushr_16b_16b_7();
+ test_sshr_16b_16b_1();
+ test_sshr_16b_16b_7();
+ test_ushr_8b_8b_1();
+ test_ushr_8b_8b_7();
+ test_sshr_8b_8b_1();
+ test_sshr_8b_8b_7();
+#endif
+
+ printf("{U,S}SHLL{,2} (MISSING h_b and s_h versions)\n");
+ test_ushll_2d_2s_0();
+ test_ushll_2d_2s_15();
+ test_ushll_2d_2s_31();
+ test_ushll2_2d_4s_0();
+ test_ushll2_2d_4s_15();
+ test_ushll2_2d_4s_31();
+ test_sshll_2d_2s_0();
+ test_sshll_2d_2s_15();
+ test_sshll_2d_2s_31();
+ test_sshll2_2d_4s_0();
+ test_sshll2_2d_4s_15();
+ test_sshll2_2d_4s_31();
+ printf("{U,S}SHLL{,2} (MISSING h_b and s_h versions)\n");
+
+ test_xtn_2s_2d();
+ test_xtn2_4s_2d();
+ test_xtn_4h_4s();
+ test_xtn2_8h_4s();
+ printf("XTN{,2} (MISSING b_h versions)\n");
+
+ printf("DUP (element, vector) MISSING\n");
+ printf("DUP (general, vector) MISSING\n");
+ printf("{S,U}MOV MISSING\n");
+
+ test_INS_general();
+
+ return 0;
+}
|
|
From: <sv...@va...> - 2014-03-01 11:25:07
|
Author: sewardj
Date: Sat Mar 1 11:24:42 2014
New Revision: 13845
Log:
Enable the following syscalls, which are needed to make regtests
in none/ work:
sys_ftruncate
sys_setitimer
sys_rt_sigsuspend
sys_rt_sigtimedwait
sys_mq_open
sys_mq_unlink
sys_semget
sys_semctl
sys_semtimedop
sys_semop
sys_listen
sys_accept
sys_process_vm_readv
sys_process_vm_writev
Modified:
trunk/coregrind/m_syswrap/syswrap-arm64-linux.c
Modified: trunk/coregrind/m_syswrap/syswrap-arm64-linux.c
==============================================================================
--- trunk/coregrind/m_syswrap/syswrap-arm64-linux.c (original)
+++ trunk/coregrind/m_syswrap/syswrap-arm64-linux.c Sat Mar 1 11:24:42 2014
@@ -894,6 +894,9 @@
// FIXME IS THIS CORRECT? it may well not be.
GENXY(__NR3264_statfs, sys_statfs), // 43
+ // FIXME IS THIS CORRECT? it may well not be.
+ GENX_(__NR3264_ftruncate, sys_ftruncate), // 46
+
LINX_(__NR_faccessat, sys_faccessat), // 48
GENX_(__NR_chdir, sys_chdir), // 49
LINXY(__NR_openat, sys_openat), // 56
@@ -921,11 +924,14 @@
LINXY(__NR_futex, sys_futex), // 98
LINX_(__NR_set_robust_list, sys_set_robust_list), // 99
GENXY(__NR_nanosleep, sys_nanosleep), // 101
+ GENXY(__NR_setitimer, sys_setitimer), // 103
LINXY(__NR_clock_gettime, sys_clock_gettime), // 113
GENX_(__NR_kill, sys_kill), // 129
LINX_(__NR_tgkill, sys_tgkill), // 131
+ LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend), // 133
LINXY(__NR_rt_sigaction, sys_rt_sigaction), // 134
LINXY(__NR_rt_sigprocmask, sys_rt_sigprocmask), // 135
+ LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait), // 137
PLAX_(__NR_rt_sigreturn, sys_rt_sigreturn), // 139
GENX_(__NR_setpriority, sys_setpriority), // 140
GENX_(__NR_getpriority, sys_getpriority), // 141
@@ -947,9 +953,17 @@
GENX_(__NR_getgid, sys_getgid), // 176
GENX_(__NR_getegid, sys_getegid), // 177
LINX_(__NR_gettid, sys_gettid), // 178
+ LINXY(__NR_mq_open, sys_mq_open), // 180
+ LINX_(__NR_mq_unlink, sys_mq_unlink), // 181
+ LINX_(__NR_semget, sys_semget), // 190
+ LINXY(__NR_semctl, sys_semctl), // 191
+ LINX_(__NR_semtimedop, sys_semtimedop), // 192
+ LINX_(__NR_semop, sys_semop), // 193
LINXY(__NR_socket, sys_socket), // 198
LINXY(__NR_socketpair, sys_socketpair), // 199
LINX_(__NR_bind, sys_bind), // 200
+ LINX_(__NR_listen, sys_listen), // 201
+ LINXY(__NR_accept, sys_accept), // 202
LINX_(__NR_connect, sys_connect), // 203
LINXY(__NR_getsockname, sys_getsockname), // 204
LINXY(__NR_getpeername, sys_getpeername), // 205
@@ -973,6 +987,9 @@
GENX_(__NR_madvise, sys_madvise), // 233
GENXY(__NR_wait4, sys_wait4), // 260
+ LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 270
+ LINX_(__NR_process_vm_writev, sys_process_vm_writev), // 271
+
// The numbers below are bogus. (See comment further down.)
// When pulling entries above this line, change the numbers
// to be correct.
@@ -1084,7 +1101,6 @@
//ZZ // LINX_(__NR_ioperm, sys_ioperm), // 101
//ZZ LINXY(__NR_socketcall, sys_socketcall), // 102
//ZZ LINXY(__NR_syslog, sys_syslog), // 103
-//ZZ GENXY(__NR_setitimer, sys_setitimer), // 104
//ZZ
//ZZ GENXY(__NR_getitimer, sys_getitimer), // 105
//ZZ GENXY(__NR_stat, sys_newstat), // 106
@@ -1168,7 +1184,6 @@
//ZZ LINXY(__NR_rt_sigpending, sys_rt_sigpending), // 176
//ZZ LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait),// 177
//ZZ LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo),// 178
-//ZZ LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend), // 179
//ZZ
//ZZ GENXY(__NR_pread64, sys_pread64), // 180
//ZZ GENX_(__NR_pwrite64, sys_pwrite64), // 181
@@ -1276,8 +1291,6 @@
//ZZ
//ZZ LINXY(__NR_get_mempolicy, sys_get_mempolicy), // 275 ?/?
//ZZ LINX_(__NR_set_mempolicy, sys_set_mempolicy), // 276 ?/?
-//ZZ LINXY(__NR_mq_open, sys_mq_open), // 277
-//ZZ LINX_(__NR_mq_unlink, sys_mq_unlink), // (mq_open+1)
//ZZ LINX_(__NR_mq_timedsend, sys_mq_timedsend), // (mq_open+2)
//ZZ
//ZZ LINXY(__NR_mq_timedreceive, sys_mq_timedreceive),// (mq_open+3)
@@ -1285,19 +1298,15 @@
//ZZ LINXY(__NR_mq_getsetattr, sys_mq_getsetattr), // (mq_open+5)
//ZZ LINXY(__NR_waitid, sys_waitid), // 280
//ZZ
-//ZZ LINX_(__NR_listen, sys_listen), // 284
-//ZZ LINXY(__NR_accept, sys_accept), // 285
//ZZ LINX_(__NR_send, sys_send),
//ZZ LINXY(__NR_recv, sys_recv),
//ZZ LINXY(__NR_recvfrom, sys_recvfrom), // 292
-//ZZ LINX_(__NR_semop, sys_semop), // 298
//ZZ LINX_(__NR_semget, sys_semget), // 299
//ZZ LINXY(__NR_semctl, sys_semctl), // 300
//ZZ LINX_(__NR_msgget, sys_msgget),
//ZZ LINX_(__NR_msgsnd, sys_msgsnd),
//ZZ LINXY(__NR_msgrcv, sys_msgrcv),
//ZZ LINXY(__NR_msgctl, sys_msgctl), // 304
-//ZZ LINX_(__NR_semtimedop, sys_semtimedop), // 312
//ZZ
//ZZ LINX_(__NR_add_key, sys_add_key), // 286
//ZZ LINX_(__NR_request_key, sys_request_key), // 287
|
|
From: <sv...@va...> - 2014-03-01 11:22:03
|
Author: sewardj
Date: Sat Mar 1 11:21:45 2014
New Revision: 13844
Log:
Fixes form arm64-linux.
Modified:
trunk/none/tests/allexec_prepare_prereq
trunk/none/tests/faultstatus.c
Modified: trunk/none/tests/allexec_prepare_prereq
==============================================================================
--- trunk/none/tests/allexec_prepare_prereq (original)
+++ trunk/none/tests/allexec_prepare_prereq Sat Mar 1 11:21:45 2014
@@ -31,7 +31,7 @@
pair x86 amd64
pair ppc32 ppc64
pair s390x_unexisting_in_32bits s390x
-pair arm arm_unexisting_in_64bits
+pair arm arm64
pair mips32 mips64
exit 0
Modified: trunk/none/tests/faultstatus.c
==============================================================================
--- trunk/none/tests/faultstatus.c (original)
+++ trunk/none/tests/faultstatus.c Sat Mar 1 11:21:45 2014
@@ -18,7 +18,7 @@
Hence we get a SIGFPE but the SI_CODE is different from that on
x86/amd64-linux.
*/
-#if defined(__powerpc__)
+#if defined(__powerpc__) || defined(__aarch64__)
# define DIVISION_BY_ZERO_TRIGGERS_FPE 0
# define DIVISION_BY_ZERO_SI_CODE SI_TKILL
#elif defined(__arm__)
|
|
From: <sv...@va...> - 2014-03-01 11:20:48
|
Author: sewardj
Date: Sat Mar 1 11:20:33 2014
New Revision: 13843
Log:
Support arm64.
Modified:
trunk/tests/arch_test.c
Modified: trunk/tests/arch_test.c
==============================================================================
--- trunk/tests/arch_test.c (original)
+++ trunk/tests/arch_test.c Sat Mar 1 11:20:33 2014
@@ -60,6 +60,9 @@
#elif defined(VGP_arm_linux)
if ( 0 == strcmp( arch, "arm" ) ) return True;
+#elif defined(VGP_arm64_linux)
+ if ( 0 == strcmp( arch, "arm64" ) ) return True;
+
#elif defined(VGP_mips32_linux)
if ( 0 == strcmp( arch, "mips32" ) ) return True;
|
|
From: <sv...@va...> - 2014-03-01 11:20:04
|
Author: sewardj
Date: Sat Mar 1 11:19:45 2014
New Revision: 2829
Log:
Remove redundant FMOV (vector, immediate) case.
Minor comment fixes.
Fix bugs in {U,S}{MIN,MAX}V, {U,S}{MIN,MAX}, {S,U}SSHL
Modified:
trunk/priv/guest_arm64_toIR.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Sat Mar 1 11:19:45 2014
@@ -970,6 +970,8 @@
stupid types. */
UInt laneSzB = 0;
switch (laneTy) {
+ case Ity_I8: laneSzB = 1; break;
+ case Ity_I16: laneSzB = 2; break;
case Ity_F32: case Ity_I32: laneSzB = 4; break;
case Ity_F64: case Ity_I64: laneSzB = 8; break;
case Ity_V128: laneSzB = 16; break;
@@ -1087,6 +1089,9 @@
Int off = offsetQRegLane(qregNo, laneTy, laneNo);
switch (laneTy) {
case Ity_F64: case Ity_I64:
+ case Ity_I32:
+ case Ity_I16:
+ case Ity_I8:
break;
default:
vassert(0); // Other cases are ATC
@@ -3280,7 +3285,6 @@
putIReg64orZR(rT2, loadLE(Ity_I64,
binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
} else if (bL == 1 && bX == 0) {
- vassert(0); //ATC
// 32 bit load
putIReg32orZR(rT1, loadLE(Ity_I32,
binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
@@ -5065,36 +5069,6 @@
return True;
}
-#if 0
- /* -------------- FMOV (vector, immediate) -------------- */
- /* 31 28 18 15 9 4
- 011 01111 00000 abc 111101 defgh d FMOV Vd.2d, #imm
- 0q0 01111 00000 abc 111101 defgh d FMOV Vd.2s, #imm (q=0)
- FMOV Vd.4s, #imm (q=1)
- */
- if (INSN(31,31) == 0
- && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
- && INSN(15,10) == BITS6(1,1,1,1,0,1)
- && INSN(30,29) != BITS2(0,1)) {
- UInt bitQ = INSN(30,30);
- UInt bitOP = INSN(29,29);
- UInt cmode = INSN(15,12);
- UInt imm8 = (INSN(18,16) << 5) | INSN(9,5);
- UInt dd = INSN(4,0);
- ULong imm64lo = 0;
- Bool ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
- vassert(! (bitOP == 1 && bitQ == 0) );
- if (ok) {
- ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
- putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
- const HChar* ar[4] = { "2s", "??", "4s", "2d" };
- DIP("fmov %s.%s, #0x%llx\n",
- nameQReg128(dd), ar[INSN(30,29)], imm64lo);
- return True;
- }
- /* else fall through */
- }
-#else
/* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
/* 31 28 18 15 11 9 4
0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0)
@@ -5137,7 +5111,6 @@
}
/* else fall through */
}
-#endif
/* -------------- {S,U}CVTF (scalar, integer) -------------- */
/* 31 28 23 21 20 18 15 9 4 ix
@@ -5177,7 +5150,7 @@
return True;
}
- /* -------------- F{ADD,SUB,MUL,DIV} (scalar) -------------- */
+ /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
/* 31 23 20 15 11 9 4
---------------- 0000 ------ FMUL --------
000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
@@ -5848,7 +5821,7 @@
Bool isQ = INSN(30,30) == 1;
Bool isU = INSN(29,29) == 1;
UInt szBlg2 = INSN(23,22);
- Bool isMAX = INSN(12,12) == 0;
+ Bool isMAX = INSN(11,11) == 0;
UInt mm = INSN(20,16);
UInt nn = INSN(9,5);
UInt dd = INSN(4,0);
@@ -5923,7 +5896,7 @@
source into the upper half, so we can then treat it the
same as the full width case. */
IRTemp tN2 = newTemp(Ity_V128);
- assign(tN2, zeroHI ? mk_CatOddLanes64x2(tN1,tN1) : mkexpr(tN1));
+ assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
IRTemp res = math_MINMAXV(tN2, op);
if (res == IRTemp_INVALID)
return False; /* means math_MINMAXV
@@ -6097,22 +6070,22 @@
const IROp opNOT = Iop_NotV128;
IRExpr* res = NULL;
switch (op) {
- case BITS2(0,0):
+ case BITS2(0,0): /* EOR */
res = binop(opXOR, mkexpr(argM), mkexpr(argN));
break;
- case BITS2(0,1):
+ case BITS2(0,1): /* BSL */
res = binop(opXOR, mkexpr(argM),
binop(opAND,
binop(opXOR, mkexpr(argM), mkexpr(argN)),
mkexpr(argD)));
break;
- case BITS2(1,0):
+ case BITS2(1,0): /* BIT */
res = binop(opXOR, mkexpr(argD),
binop(opAND,
binop(opXOR, mkexpr(argD), mkexpr(argN)),
mkexpr(argM)));
break;
- case BITS2(1,1):
+ case BITS2(1,1): /* BIF */
res = binop(opXOR, mkexpr(argD),
binop(opAND,
binop(opXOR, mkexpr(argD), mkexpr(argN)),
@@ -6179,9 +6152,11 @@
0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
where Ta,Tb,sh
- = case immh of 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
- 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
+ = case immh of 1xxx -> invalid
01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
+ 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
+ 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
+ 0000 -> AdvSIMD modified immediate (???)
*/
if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
&& INSN(15,10) == BITS6(1,0,1,0,0,1)) {
@@ -6200,14 +6175,17 @@
const HChar* tb = "??";
assign(src, getQReg128(nn));
assign(zero, mkV128(0x0000));
- if (immh & 1) {
- sh = immhb - 8;
- vassert(sh < 8); /* so 8-sh is 1..8 */
- ta = "8h";
- tb = isQ ? "16b" : "8b";
- IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
- : mk_InterleaveLO8x16(src, zero);
- res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
+ if (immh & 8) {
+ /* invalid; don't assign to res */
+ }
+ else if (immh & 4) {
+ sh = immhb - 32;
+ vassert(sh < 32); /* so 32-sh is 1..32 */
+ ta = "2d";
+ tb = isQ ? "4s" : "2s";
+ IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
+ : mk_InterleaveLO32x4(src, zero);
+ res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
}
else if (immh & 2) {
sh = immhb - 16;
@@ -6218,14 +6196,17 @@
: mk_InterleaveLO16x8(src, zero);
res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
}
- else if (immh & 4) {
- sh = immhb - 32;
- vassert(sh < 32); /* so 32-sh is 1..32 */
- ta = "2d";
- tb = isQ ? "4s" : "2s";
- IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
- : mk_InterleaveLO32x4(src, zero);
- res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
+ else if (immh & 1) {
+ sh = immhb - 8;
+ vassert(sh < 8); /* so 8-sh is 1..8 */
+ ta = "8h";
+ tb = isQ ? "16b" : "8b";
+ IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
+ : mk_InterleaveLO8x16(src, zero);
+ res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
+ } else {
+ vassert(immh == 0);
+ /* invalid; don't assign to res */
}
/* */
if (res) {
|
|
From: <sv...@va...> - 2014-03-01 11:17:35
|
Author: sewardj
Date: Sat Mar 1 11:16:57 2014
New Revision: 2828
Log:
Select and emit insns for
Iop_ZeroHI64ofV128 Iop_Max8Sx16 Iop_Min8Sx16
Modified:
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Sat Mar 1 11:16:57 2014
@@ -4916,11 +4916,13 @@
011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
- 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
- 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
-
- 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
- 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
+ 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
+ 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
+ 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
+
+ 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
+ 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
+ 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
@@ -5009,6 +5011,9 @@
case ARM64vecb_SMAX16x8:
*p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
break;
+ case ARM64vecb_SMAX8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
+ break;
case ARM64vecb_SMIN32x4:
*p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
@@ -5016,6 +5021,9 @@
case ARM64vecb_SMIN16x8:
*p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
break;
+ case ARM64vecb_SMIN8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
+ break;
case ARM64vecb_AND:
*p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
@@ -6005,6 +6013,12 @@
*p++ = 0x2F00E5E0 | rQ;
goto done;
}
+ if (imm == 0x00FF) {
+ /* movi rD, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rD */
+ vassert(rQ < 32);
+ *p++ = 0x2F07E7E0 | rQ;
+ goto done;
+ }
goto bad; /* no other handled cases right now */
}
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Sat Mar 1 11:16:57 2014
@@ -4347,6 +4347,7 @@
/* Iop_ZeroHIXXofV128 cases */
UShort imm16 = 0;
switch (e->Iex.Unop.op) {
+ case Iop_ZeroHI64ofV128: imm16 = 0x00FF; break;
case Iop_ZeroHI96ofV128: imm16 = 0x000F; break;
case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
@@ -4867,8 +4868,10 @@
case Iop_Min8Ux16:
case Iop_Max32Sx4:
case Iop_Max16Sx8:
+ case Iop_Max8Sx16:
case Iop_Min32Sx4:
case Iop_Min16Sx8:
+ case Iop_Min8Sx16:
case Iop_Add64x2:
case Iop_Add32x4:
case Iop_Add16x8:
@@ -4894,8 +4897,10 @@
case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break;
case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break;
case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break;
+ case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break;
case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break;
case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break;
+ case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break;
case Iop_Add64x2: op = ARM64vecb_ADD64x2; break;
case Iop_Add32x4: op = ARM64vecb_ADD32x4; break;
case Iop_Add16x8: op = ARM64vecb_ADD16x8; break;
|