You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
1
(5) |
2
(11) |
3
|
4
(9) |
5
(10) |
6
(4) |
7
(14) |
|
8
(15) |
9
(15) |
10
(14) |
11
(13) |
12
(16) |
13
(12) |
14
(9) |
|
15
(21) |
16
(13) |
17
(11) |
18
(13) |
19
(5) |
20
(29) |
21
(20) |
|
22
(13) |
23
(18) |
24
(21) |
25
(17) |
26
(26) |
27
(13) |
28
(17) |
|
29
(10) |
30
(5) |
|
|
|
|
|
|
From: <sv...@va...> - 2014-06-19 22:21:35
|
Author: sewardj
Date: Thu Jun 19 22:21:28 2014
New Revision: 14054
Log:
Enable/enhance test cases for: dup_{d_d[], s_s[], h_h[], b_b[]}, ext
Modified:
trunk/none/tests/arm64/fp_and_simd.c
Modified: trunk/none/tests/arm64/fp_and_simd.c
==============================================================================
--- trunk/none/tests/arm64/fp_and_simd.c (original)
+++ trunk/none/tests/arm64/fp_and_simd.c Thu Jun 19 22:21:28 2014
@@ -1514,19 +1514,48 @@
GEN_THREEVEC_TEST(ext_16b_16b_16b_0x0,
"ext v2.16b, v11.16b, v29.16b, #0", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0x1,
+ "ext v2.16b, v11.16b, v29.16b, #1", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0x2,
+ "ext v2.16b, v11.16b, v29.16b, #2", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0x3,
+ "ext v2.16b, v11.16b, v29.16b, #3", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0x4,
+ "ext v2.16b, v11.16b, v29.16b, #4", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0x5,
+ "ext v2.16b, v11.16b, v29.16b, #5", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0x6,
+ "ext v2.16b, v11.16b, v29.16b, #6", 2, 11, 29)
GEN_THREEVEC_TEST(ext_16b_16b_16b_0x7,
"ext v2.16b, v11.16b, v29.16b, #7", 2, 11, 29)
GEN_THREEVEC_TEST(ext_16b_16b_16b_0x8,
"ext v2.16b, v11.16b, v29.16b, #8", 2, 11, 29)
GEN_THREEVEC_TEST(ext_16b_16b_16b_0x9,
"ext v2.16b, v11.16b, v29.16b, #9", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0xA,
+ "ext v2.16b, v11.16b, v29.16b, #10", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0xB,
+ "ext v2.16b, v11.16b, v29.16b, #11", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0xC,
+ "ext v2.16b, v11.16b, v29.16b, #12", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0xD,
+ "ext v2.16b, v11.16b, v29.16b, #13", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_16b_16b_16b_0xE,
+ "ext v2.16b, v11.16b, v29.16b, #14", 2, 11, 29)
GEN_THREEVEC_TEST(ext_16b_16b_16b_0xF,
"ext v2.16b, v11.16b, v29.16b, #15", 2, 11, 29)
-
GEN_THREEVEC_TEST(ext_8b_8b_8b_0x0,
"ext v2.8b, v11.8b, v29.8b, #0", 2, 11, 29)
GEN_THREEVEC_TEST(ext_8b_8b_8b_0x1,
"ext v2.8b, v11.8b, v29.8b, #1", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_8b_8b_8b_0x2,
+ "ext v2.8b, v11.8b, v29.8b, #2", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_8b_8b_8b_0x3,
+ "ext v2.8b, v11.8b, v29.8b, #3", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_8b_8b_8b_0x4,
+ "ext v2.8b, v11.8b, v29.8b, #4", 2, 11, 29)
+GEN_THREEVEC_TEST(ext_8b_8b_8b_0x5,
+ "ext v2.8b, v11.8b, v29.8b, #5", 2, 11, 29)
GEN_THREEVEC_TEST(ext_8b_8b_8b_0x6,
"ext v2.8b, v11.8b, v29.8b, #6", 2, 11, 29)
GEN_THREEVEC_TEST(ext_8b_8b_8b_0x7,
@@ -3540,14 +3569,14 @@
if (1) test_cnt_8b_8b(TyB);
// dup d,s,h,b (vec elem to scalar)
- if (0) test_dup_d_d0(TyD);
- if (0) test_dup_d_d1(TyD);
- if (0) test_dup_s_s0(TyS);
- if (0) test_dup_s_s3(TyS);
- if (0) test_dup_h_h0(TyH);
- if (0) test_dup_h_h6(TyH);
- if (0) test_dup_b_b0(TyB);
- if (0) test_dup_b_b13(TyB);
+ if (1) test_dup_d_d0(TyD);
+ if (1) test_dup_d_d1(TyD);
+ if (1) test_dup_s_s0(TyS);
+ if (1) test_dup_s_s3(TyS);
+ if (1) test_dup_h_h0(TyH);
+ if (1) test_dup_h_h6(TyH);
+ if (1) test_dup_b_b0(TyB);
+ if (1) test_dup_b_b13(TyB);
// dup 2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
if (1) test_dup_2d_d0(TyD);
@@ -3575,15 +3604,30 @@
if (1) test_dup_8b_w(TyB);
// ext 16b,8b,#imm4 (concat 2 vectors, then slice)
- if (0) test_ext_16b_16b_16b_0x0(TyB);
- if (0) test_ext_16b_16b_16b_0x7(TyB);
- if (0) test_ext_16b_16b_16b_0x8(TyB);
- if (0) test_ext_16b_16b_16b_0x9(TyB);
- if (0) test_ext_16b_16b_16b_0xF(TyB);
- if (0) test_ext_8b_8b_8b_0x0(TyB);
- if (0) test_ext_8b_8b_8b_0x1(TyB);
- if (0) test_ext_8b_8b_8b_0x6(TyB);
- if (0) test_ext_8b_8b_8b_0x7(TyB);
+ if (1) test_ext_16b_16b_16b_0x0(TyB);
+ if (1) test_ext_16b_16b_16b_0x1(TyB);
+ if (1) test_ext_16b_16b_16b_0x2(TyB);
+ if (1) test_ext_16b_16b_16b_0x3(TyB);
+ if (1) test_ext_16b_16b_16b_0x4(TyB);
+ if (1) test_ext_16b_16b_16b_0x5(TyB);
+ if (1) test_ext_16b_16b_16b_0x6(TyB);
+ if (1) test_ext_16b_16b_16b_0x7(TyB);
+ if (1) test_ext_16b_16b_16b_0x8(TyB);
+ if (1) test_ext_16b_16b_16b_0x9(TyB);
+ if (1) test_ext_16b_16b_16b_0xA(TyB);
+ if (1) test_ext_16b_16b_16b_0xB(TyB);
+ if (1) test_ext_16b_16b_16b_0xC(TyB);
+ if (1) test_ext_16b_16b_16b_0xD(TyB);
+ if (1) test_ext_16b_16b_16b_0xE(TyB);
+ if (1) test_ext_16b_16b_16b_0xF(TyB);
+ if (1) test_ext_8b_8b_8b_0x0(TyB);
+ if (1) test_ext_8b_8b_8b_0x1(TyB);
+ if (1) test_ext_8b_8b_8b_0x2(TyB);
+ if (1) test_ext_8b_8b_8b_0x3(TyB);
+ if (1) test_ext_8b_8b_8b_0x4(TyB);
+ if (1) test_ext_8b_8b_8b_0x5(TyB);
+ if (1) test_ext_8b_8b_8b_0x6(TyB);
+ if (1) test_ext_8b_8b_8b_0x7(TyB);
// ins d[]_d[],s[]_s[],h[]_h[],b[]_b[]
if (0) test_ins_d0_d0(TyD);
|
|
From: <sv...@va...> - 2014-06-19 22:21:01
|
Author: sewardj
Date: Thu Jun 19 22:20:47 2014
New Revision: 2880
Log:
Implement: dup_{d_d[], s_s[], h_h[], b_b[]}, ext
Modified:
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Thu Jun 19 22:20:47 2014
@@ -5555,7 +5555,58 @@
static
Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 29 23 21 20 15 14 10 9 4
+ 0 q 101110 op2 0 m 0 imm4 0 n d
+ Decode fields: op2
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,31) != 0
+ || INSN(29,24) != BITS6(1,0,1,1,1,0)
+ || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
+ return False;
+ }
+ UInt bitQ = INSN(30,30);
+ UInt op2 = INSN(23,22);
+ UInt mm = INSN(20,16);
+ UInt imm4 = INSN(14,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+
+ if (op2 == BITS2(0,0)) {
+ /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
+ IRTemp sHi = newTemp(Ity_V128);
+ IRTemp sLo = newTemp(Ity_V128);
+ IRTemp res = newTemp(Ity_V128);
+ assign(sHi, getQReg128(mm));
+ assign(sLo, getQReg128(nn));
+ if (bitQ == 1) {
+ if (imm4 == 0) {
+ assign(res, mkexpr(sLo));
+ } else {
+ vassert(imm4 <= 15);
+ assign(res,
+ binop(Iop_OrV128,
+ binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
+ binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
+ }
+ putQReg128(dd, mkexpr(res));
+ DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
+ } else {
+ if (imm4 >= 8) return False;
+ if (imm4 == 0) {
+ assign(res, mkexpr(sLo));
+ } else {
+ assign(res,
+ binop(Iop_ShrV128,
+ binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
+ mkU8(8 * imm4)));
+ }
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
+ }
+ return True;
+ }
+
return False;
# undef INSN
}
@@ -6115,7 +6166,66 @@
static
Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
{
+ /* 31 28 20 15 14 10 9 4
+ 01 op 11110000 imm5 0 imm4 1 n d
+ Decode fields: op,imm4
+ */
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+ if (INSN(31,30) != BITS2(0,1)
+ || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
+ || INSN(15,15) != 0 || INSN(10,10) != 1) {
+ return False;
+ }
+ UInt bitOP = INSN(29,29);
+ UInt imm5 = INSN(20,16);
+ UInt imm4 = INSN(14,11);
+ UInt nn = INSN(9,5);
+ UInt dd = INSN(4,0);
+
+ if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
+ /* -------- 0,0000 DUP (element, scalar) -------- */
+ IRTemp w0 = newTemp(Ity_I64);
+ const HChar* arTs = "??";
+ IRType laneTy = Ity_INVALID;
+ UInt laneNo = 16; /* invalid */
+ if (imm5 & 1) {
+ arTs = "b";
+ laneNo = (imm5 >> 1) & 15;
+ laneTy = Ity_I8;
+ assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
+ }
+ else if (imm5 & 2) {
+ arTs = "h";
+ laneNo = (imm5 >> 2) & 7;
+ laneTy = Ity_I16;
+ assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
+ }
+ else if (imm5 & 4) {
+ arTs = "s";
+ laneNo = (imm5 >> 3) & 3;
+ laneTy = Ity_I32;
+ assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
+ }
+ else if (imm5 & 8) {
+ arTs = "d";
+ laneNo = (imm5 >> 4) & 1;
+ laneTy = Ity_I64;
+ assign(w0, getQRegLane(nn, laneNo, laneTy));
+ }
+ else {
+ /* invalid; leave laneTy unchanged. */
+ }
+ /* */
+ if (laneTy != Ity_INVALID) {
+ vassert(laneNo < 16);
+ putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
+ DIP("dup %s, %s.%s[%u]\n",
+ nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
+ return True;
+ }
+ /* else fall through */
+ }
+
return False;
# undef INSN
}
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Thu Jun 19 22:20:47 2014
@@ -1690,6 +1690,16 @@
vassert(amt > 0 && amt <= maxSh);
return i;
}
+ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
+ ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr));
+ i->tag = ARM64in_VExtV;
+ i->ARM64in.VExtV.dst = dst;
+ i->ARM64in.VExtV.srcLo = srcLo;
+ i->ARM64in.VExtV.srcHi = srcHi;
+ i->ARM64in.VExtV.amtB = amtB;
+ vassert(amtB >= 1 && amtB <= 15);
+ return i;
+}
//ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
//ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
//ZZ i->tag = ARMin_VAluS;
@@ -2335,6 +2345,16 @@
vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
return;
}
+ case ARM64in_VExtV: {
+ vex_printf("ext ");
+ ppHRegARM64(i->ARM64in.VExtV.dst);
+ vex_printf(".16b, ");
+ ppHRegARM64(i->ARM64in.VExtV.srcLo);
+ vex_printf(".16b, ");
+ ppHRegARM64(i->ARM64in.VExtV.srcHi);
+ vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
+ return;
+ }
//ZZ case ARMin_VAluS:
//ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
//ZZ ppHRegARM(i->ARMin.VAluS.dst);
@@ -2816,6 +2836,10 @@
addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
return;
+ case ARM64in_VExtV:
+ addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
+ addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
+ addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
//ZZ case ARMin_VAluS:
//ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
//ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
@@ -3112,6 +3136,12 @@
i->ARM64in.VShiftImmV.src
= lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
return;
+ case ARM64in_VExtV:
+ i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
+ i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
+ i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
+ return;
+
//ZZ case ARMin_VAluS:
//ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
//ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
@@ -5410,8 +5440,8 @@
}
case ARM64in_VShiftImmV: {
/*
- 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
- 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
+ 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
+ 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
where immh:immb
= case T of
2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx
@@ -5419,7 +5449,7 @@
8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx
16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx
- 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
+ 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
where immh:immb
= case T of
2d | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx
@@ -5487,8 +5517,6 @@
goto done;
}
break;
-
-
/* 8x16 cases */
case ARM64vecsh_SSHR8x16: syned = True;
case ARM64vecsh_USHR8x16: /* fallthrough */
@@ -5507,12 +5535,26 @@
goto done;
}
break;
-
default:
break;
}
goto bad;
}
+ case ARM64in_VExtV: {
+ /*
+ 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
+ where imm4 = the shift amount, in bytes,
+ Vn is low operand, Vm is high operand
+ */
+ UInt vD = qregNo(i->ARM64in.VExtV.dst);
+ UInt vN = qregNo(i->ARM64in.VExtV.srcLo);
+ UInt vM = qregNo(i->ARM64in.VExtV.srcHi);
+ UInt imm4 = i->ARM64in.VExtV.amtB;
+ vassert(imm4 >= 1 && imm4 <= 15);
+ *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
+ X000000 | (imm4 << 1), vN, vD);
+ goto done;
+ }
//ZZ case ARMin_VAluS: {
//ZZ UInt dN = fregNo(i->ARMin.VAluS.argL);
//ZZ UInt dD = fregNo(i->ARMin.VAluS.dst);
Modified: trunk/priv/host_arm64_defs.h
==============================================================================
--- trunk/priv/host_arm64_defs.h (original)
+++ trunk/priv/host_arm64_defs.h Thu Jun 19 22:20:47 2014
@@ -561,6 +561,7 @@
ARM64in_VUnaryV,
ARM64in_VNarrowV,
ARM64in_VShiftImmV,
+ ARM64in_VExtV,
//ZZ ARMin_VAluS,
//ZZ ARMin_VCMovD,
//ZZ ARMin_VCMovS,
@@ -855,6 +856,12 @@
HReg src;
UInt amt;
} VShiftImmV;
+ struct {
+ HReg dst;
+ HReg srcLo;
+ HReg srcHi;
+ UInt amtB;
+ } VExtV;
//ZZ /* 32-bit FP binary arithmetic */
//ZZ struct {
//ZZ ARMVfpOp op;
@@ -1051,6 +1058,8 @@
extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op,
HReg dst, HReg src, UInt amt );
+extern ARM64Instr* ARM64Instr_VExtV ( HReg dst,
+ HReg srcLo, HReg srcHi, UInt amtB );
//ZZ extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg );
//ZZ extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src );
//ZZ extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src );
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Thu Jun 19 22:20:47 2014
@@ -5626,6 +5626,49 @@
/* else fall out; this is unhandled */
break;
}
+
+ case Iop_ShlV128:
+ case Iop_ShrV128: {
+ Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
+ /* This is tricky. Generate an EXT instruction with zeroes in
+ the high operand (shift right) or low operand (shift left).
+ Note that we can only slice in the EXT instruction at a byte
+ level of granularity, so the shift amount needs careful
+ checking. */
+ IRExpr* argL = e->Iex.Binop.arg1;
+ IRExpr* argR = e->Iex.Binop.arg2;
+ if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
+ UInt amt = argR->Iex.Const.con->Ico.U8;
+ Bool amtOK = False;
+ switch (amt) {
+ case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
+ case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
+ case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
+ amtOK = True; break;
+ }
+ /* We could also deal with amt==0 by copying the source to
+ the destination, but there's no need for that so far. */
+ if (amtOK) {
+ HReg src = iselV128Expr(env, argL);
+ HReg srcZ = newVRegV(env);
+ addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
+ UInt immB = amt / 8;
+ vassert(immB >= 1 && immB <= 15);
+ HReg dst = newVRegV(env);
+ if (isSHR) {
+ addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
+ immB));
+ } else {
+ addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
+ 16 - immB));
+ }
+ return dst;
+ }
+ }
+ /* else fall out; this is unhandled */
+ break;
+ }
+
//ZZ case Iop_CmpGT8Ux16:
//ZZ case Iop_CmpGT16Ux8:
//ZZ case Iop_CmpGT32Ux4: {
|
|
From: <sv...@va...> - 2014-06-19 20:33:36
|
Author: philippe
Date: Thu Jun 19 20:33:27 2014
New Revision: 14053
Log:
Improve/fix hash table collision statistics + remove useless space in gdbsrv hostvisibility keywork
Modified:
trunk/coregrind/m_gdbserver/server.c
trunk/coregrind/m_hashtable.c
Modified: trunk/coregrind/m_gdbserver/server.c
==============================================================================
--- trunk/coregrind/m_gdbserver/server.c (original)
+++ trunk/coregrind/m_gdbserver/server.c Thu Jun 19 20:33:27 2014
@@ -257,7 +257,7 @@
wcmd = strtok_r (NULL, " ", &ssaveptr);
switch (kwdid = VG_(keyword_id)
("vgdb-error debuglog merge-recursive-frames"
- " gdb_output log_output mixed_output hostvisibility ",
+ " gdb_output log_output mixed_output hostvisibility",
wcmd, kwd_report_all)) {
case -2:
case -1:
Modified: trunk/coregrind/m_hashtable.c
==============================================================================
--- trunk/coregrind/m_hashtable.c (original)
+++ trunk/coregrind/m_hashtable.c Thu Jun 19 20:33:27 2014
@@ -32,6 +32,7 @@
#include "pub_core_debuglog.h"
#include "pub_core_hashtable.h"
#include "pub_core_libcassert.h"
+#include "pub_core_libcbase.h"
#include "pub_core_libcprint.h"
#include "pub_core_mallocfree.h"
@@ -234,23 +235,21 @@
void VG_(HT_print_stats) ( VgHashTable table, HT_Cmp_t cmp )
{
#define MAXOCCUR 20
- UInt elt_occurences[MAXOCCUR];
- UInt key_occurences[MAXOCCUR];
- UInt cno_occurences[MAXOCCUR];
+ UInt elt_occurences[MAXOCCUR+1];
+ UInt key_occurences[MAXOCCUR+1];
+ UInt cno_occurences[MAXOCCUR+1];
/* Key occurence : how many ht elements have the same key.
elt_occurences : how many elements are inserted multiple time.
cno_occurences : how many chains have that length.
- The last entry in these arrays collects all occurences >= MAXOCCUR-1. */
- #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[n-1]++ : occur[n]++)
+ The last entry in these arrays collects all occurences >= MAXOCCUR. */
+ #define INCOCCUR(occur,n) (n >= MAXOCCUR ? occur[MAXOCCUR]++ : occur[n]++)
UInt i;
UInt nkey, nelt, ncno;
VgHashNode *cnode, *node;
- for (i = 0; i < 20; i++) {
- key_occurences[i] = 0;
- elt_occurences[i] = 0;
- cno_occurences[i] = 0;
- }
+ VG_(memset)(key_occurences, 0, sizeof(key_occurences));
+ VG_(memset)(elt_occurences, 0, sizeof(elt_occurences));
+ VG_(memset)(cno_occurences, 0, sizeof(cno_occurences));
// Note that the below algorithm is quadractic in nr of elements in a chain
// but if that happens, the hash table/function is really bad and that
@@ -307,16 +306,20 @@
" N-plicated keys,"
" N-plicated elts\n");
nkey = nelt = ncno = 0;
- for (i = 0; i < MAXOCCUR; i++) {
- if (elt_occurences[i] > 0 || key_occurences[i] > 0 || cno_occurences[i] > 0)
- VG_(message)(Vg_DebugMsg,
- "N:%2d : nr chain %6d, nr keys %6d, nr elts %6d\n",
- i, cno_occurences[i], key_occurences[i], elt_occurences[i]);
+ for (i = 0; i <= MAXOCCUR; i++) {
+ if (elt_occurences[i] > 0
+ || key_occurences[i] > 0
+ || cno_occurences[i] > 0)
+ VG_(message)(Vg_DebugMsg,
+ "%s=%2d : nr chain %6d, nr keys %6d, nr elts %6d\n",
+ i == MAXOCCUR ? ">" : "N", i,
+ cno_occurences[i], key_occurences[i], elt_occurences[i]);
nkey += key_occurences[i];
nelt += elt_occurences[i];
ncno += cno_occurences[i];
}
- VG_(message)(Vg_DebugMsg, "total nr of unique chains: %6d, keys %6d, elts %6d\n",
+ VG_(message)(Vg_DebugMsg,
+ "total nr of unique chains: %6d, keys %6d, elts %6d\n",
ncno, nkey, nelt);
}
|
|
From: <sv...@va...> - 2014-06-19 14:22:28
|
Author: sewardj
Date: Thu Jun 19 14:22:20 2014
New Revision: 14052
Log:
Enable test cases for: orr_{8h,4h}_imm8_shifted,
orr_{4s,2s}_imm8_shifted, bic_{8h,4h}_imm8_shifted,
bic_{4s,2s}_imm8_shifted, cls_std6_std6, cm{eq,ge,gt,hi,hs,tst}_d_d_d,
cm{ge,gt,le,lt}_d_d_zero, cnt_{16,8}b_{16,8}b
Modified:
trunk/none/tests/arm64/fp_and_simd.c
Modified: trunk/none/tests/arm64/fp_and_simd.c
==============================================================================
--- trunk/none/tests/arm64/fp_and_simd.c (original)
+++ trunk/none/tests/arm64/fp_and_simd.c Thu Jun 19 14:22:20 2014
@@ -3368,30 +3368,30 @@
// bic 4s,2s #imm8, LSL #0, 8, 16 or 24
// movi and mvni are very similar, a superset of these.
// Cases are below.
- if (0) test_orr_8h_0x5A_lsl0(TyH);
- if (0) test_orr_8h_0xA5_lsl8(TyH);
- if (0) test_orr_4h_0x5A_lsl0(TyH);
- if (0) test_orr_4h_0xA5_lsl8(TyH);
- if (0) test_orr_4s_0x5A_lsl0(TyS);
- if (0) test_orr_4s_0x6B_lsl8(TyS);
- if (0) test_orr_4s_0x49_lsl16(TyS);
- if (0) test_orr_4s_0x3D_lsl24(TyS);
- if (0) test_orr_2s_0x5A_lsl0(TyS);
- if (0) test_orr_2s_0x6B_lsl8(TyS);
- if (0) test_orr_2s_0x49_lsl16(TyS);
- if (0) test_orr_2s_0x3D_lsl24(TyS);
- if (0) test_bic_8h_0x5A_lsl0(TyH);
- if (0) test_bic_8h_0xA5_lsl8(TyH);
- if (0) test_bic_4h_0x5A_lsl0(TyH);
- if (0) test_bic_4h_0xA5_lsl8(TyH);
- if (0) test_bic_4s_0x5A_lsl0(TyS);
- if (0) test_bic_4s_0x6B_lsl8(TyS);
- if (0) test_bic_4s_0x49_lsl16(TyS);
- if (0) test_bic_4s_0x3D_lsl24(TyS);
- if (0) test_bic_2s_0x5A_lsl0(TyS);
- if (0) test_bic_2s_0x6B_lsl8(TyS);
- if (0) test_bic_2s_0x49_lsl16(TyS);
- if (0) test_bic_2s_0x3D_lsl24(TyS);
+ if (1) test_orr_8h_0x5A_lsl0(TyH);
+ if (1) test_orr_8h_0xA5_lsl8(TyH);
+ if (1) test_orr_4h_0x5A_lsl0(TyH);
+ if (1) test_orr_4h_0xA5_lsl8(TyH);
+ if (1) test_orr_4s_0x5A_lsl0(TyS);
+ if (1) test_orr_4s_0x6B_lsl8(TyS);
+ if (1) test_orr_4s_0x49_lsl16(TyS);
+ if (1) test_orr_4s_0x3D_lsl24(TyS);
+ if (1) test_orr_2s_0x5A_lsl0(TyS);
+ if (1) test_orr_2s_0x6B_lsl8(TyS);
+ if (1) test_orr_2s_0x49_lsl16(TyS);
+ if (1) test_orr_2s_0x3D_lsl24(TyS);
+ if (1) test_bic_8h_0x5A_lsl0(TyH);
+ if (1) test_bic_8h_0xA5_lsl8(TyH);
+ if (1) test_bic_4h_0x5A_lsl0(TyH);
+ if (1) test_bic_4h_0xA5_lsl8(TyH);
+ if (1) test_bic_4s_0x5A_lsl0(TyS);
+ if (1) test_bic_4s_0x6B_lsl8(TyS);
+ if (1) test_bic_4s_0x49_lsl16(TyS);
+ if (1) test_bic_4s_0x3D_lsl24(TyS);
+ if (1) test_bic_2s_0x5A_lsl0(TyS);
+ if (1) test_bic_2s_0x6B_lsl8(TyS);
+ if (1) test_bic_2s_0x49_lsl16(TyS);
+ if (1) test_bic_2s_0x3D_lsl24(TyS);
// bif 16b,8b (vector) (bit insert if false)
// bit 16b,8b (vector) (bit insert if true)
@@ -3408,18 +3408,18 @@
// cls 4s,2s,8h,4h,16b,8b (count leading sign bits)
// clz 4s,2s,8h,4h,16b,8b (count leading zero bits)
- if (0) test_cls_4s_4s(TyS);
- if (0) test_cls_2s_2s(TyS);
- if (0) test_cls_8h_8h(TyH);
- if (0) test_cls_4h_4h(TyH);
- if (0) test_cls_16b_16b(TyB);
- if (0) test_cls_8b_8b(TyB);
- if (0) test_clz_4s_4s(TyS);
- if (0) test_clz_2s_2s(TyS);
- if (0) test_clz_8h_8h(TyH);
- if (0) test_clz_4h_4h(TyH);
- if (0) test_clz_16b_16b(TyB);
- if (0) test_clz_8b_8b(TyB);
+ if (1) test_cls_4s_4s(TyS);
+ if (1) test_cls_2s_2s(TyS);
+ if (1) test_cls_8h_8h(TyH);
+ if (1) test_cls_4h_4h(TyH);
+ if (1) test_cls_16b_16b(TyB);
+ if (1) test_cls_8b_8b(TyB);
+ if (1) test_clz_4s_4s(TyS);
+ if (1) test_clz_2s_2s(TyS);
+ if (1) test_clz_8h_8h(TyH);
+ if (1) test_clz_4h_4h(TyH);
+ if (1) test_clz_16b_16b(TyB);
+ if (1) test_clz_8b_8b(TyB);
// cmeq d
// cmge d
@@ -3427,12 +3427,12 @@
// cmhi d
// cmhs d
// cmtst d
- if (0) test_cmeq_d_d_d(TyD);
- if (0) test_cmge_d_d_d(TyD);
- if (0) test_cmgt_d_d_d(TyD);
- if (0) test_cmhi_d_d_d(TyD);
- if (0) test_cmhs_d_d_d(TyD);
- if (0) test_cmtst_d_d_d(TyD);
+ if (1) test_cmeq_d_d_d(TyD);
+ if (1) test_cmge_d_d_d(TyD);
+ if (1) test_cmgt_d_d_d(TyD);
+ if (1) test_cmhi_d_d_d(TyD);
+ if (1) test_cmhs_d_d_d(TyD);
+ if (1) test_cmtst_d_d_d(TyD);
// cmeq 2d,4s,2s,8h,4h,16b,8b
// cmge 2d,4s,2s,8h,4h,16b,8b
@@ -3489,10 +3489,10 @@
// cmle_z d
// cmlt_z d
if (1) test_cmeq_zero_d_d(TyD);
- if (0) test_cmge_zero_d_d(TyD);
- if (0) test_cmgt_zero_d_d(TyD);
- if (0) test_cmle_zero_d_d(TyD);
- if (0) test_cmlt_zero_d_d(TyD);
+ if (1) test_cmge_zero_d_d(TyD);
+ if (1) test_cmgt_zero_d_d(TyD);
+ if (1) test_cmle_zero_d_d(TyD);
+ if (1) test_cmlt_zero_d_d(TyD);
// cmeq_z 2d,4s,2s,8h,4h,16b,8b
// cmge_z 2d,4s,2s,8h,4h,16b,8b
@@ -3536,8 +3536,8 @@
if (1) test_cmlt_zero_8b_8b(TyB);
// cnt 16b,8b (population count per byte)
- if (0) test_cnt_16b_16b(TyB);
- if (0) test_cnt_8b_8b(TyB);
+ if (1) test_cnt_16b_16b(TyB);
+ if (1) test_cnt_8b_8b(TyB);
// dup d,s,h,b (vec elem to scalar)
if (0) test_dup_d_d0(TyD);
|
|
From: <sv...@va...> - 2014-06-19 14:22:01
|
Author: sewardj
Date: Thu Jun 19 14:21:37 2014
New Revision: 2879
Log:
Implement: orr_{8h,4h}_imm8_shifted, orr_{4s,2s}_imm8_shifted,
bic_{8h,4h}_imm8_shifted, bic_{4s,2s}_imm8_shifted, cls_std6_std6,
cm{eq,ge,gt,hi,hs,tst}_d_d_d, cm{ge,gt,le,lt}_d_d_zero,
cnt_{16,8}b_{16,8}b
Modified:
trunk/priv/guest_arm64_toIR.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
Modified: trunk/priv/guest_arm64_toIR.c
==============================================================================
--- trunk/priv/guest_arm64_toIR.c (original)
+++ trunk/priv/guest_arm64_toIR.c Thu Jun 19 14:21:37 2014
@@ -5993,6 +5993,8 @@
{
/* 31 28 18 15 11 9 4
0q op 01111 00000 abc cmode 01 defgh d
+ Decode fields: q,op,cmode
+ Bit 11 is really "o2", but it is always zero.
*/
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
if (INSN(31,31) != 0
@@ -6006,24 +6008,71 @@
UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
UInt dd = INSN(4,0);
- /* -------- {FMOV,MOVI} (vector, immediate) -------- */
- /* Allowable op:cmode
- FMOV = 1:1111
- MOVI = 0:xx00, 0:0010, 1:0x00, 1:10x0, 1:110x, x:1110,
- */
ULong imm64lo = 0;
UInt op_cmode = (bitOP << 4) | cmode;
Bool ok = False;
+ Bool isORR = False;
+ Bool isBIC = False;
switch (op_cmode) {
+ /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
case BITS5(1,1,1,1,1): // 1:1111
+ ok = bitQ == 1; break;
+
+ /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
+ /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0): // 0:0x00
+
+ /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
case BITS5(0,0,0,1,0): // 1:0010
- case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
+
+ /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
+ /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
+ case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:1x00
+
+ /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
+ /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
+
+ /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
+ /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
+
+ /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
+ /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
+
+ /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
+ /* -------- 1,1,1110 MOVI 64-bit vector -------- */
+ /* -------- x,0,1110 MOVI 8-bit -------- */
case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110
ok = True; break;
+
+ /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
+ /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
+ case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
+ ok = True; isORR = True; break;
+
+ /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
+ /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
+ case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
+ ok = True; isBIC = True; break;
+
+ /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
+ /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
+ /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
+ /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
+ case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
+ case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
+ ok = True; isORR = True; break;
+
+ /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
+ /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
+ /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
+ /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
+ case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
+ case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
+ ok = True; isBIC = True; break;
+
default:
break;
}
@@ -6031,9 +6080,29 @@
ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
}
if (ok) {
- ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
- putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
- DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
+ if (isORR || isBIC) {
+ ULong inv
+ = isORR ? 0ULL : ~0ULL;
+ IRExpr* immV128
+ = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
+ IRExpr* res
+ = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, res) : res);
+ const HChar* nm = isORR ? "orr" : "bic";
+ if (bitQ == 0) {
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
+ DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
+ } else {
+ putQReg128(dd, res);
+ DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
+ nameQReg128(dd), imm64lo, imm64lo);
+ }
+ } else {
+ ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
+ IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo));
+ putQReg128(dd, immV128);
+ DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
+ }
return True;
}
/* else fall through */
@@ -6168,6 +6237,40 @@
UInt dd = INSN(4,0);
vassert(size < 4);
+ if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
+ /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
+ /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
+ Bool isGT = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = getQReg128(mm);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res,
+ isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
+ : binop(Iop_CmpGT64Ux2, argL, argR));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
+ if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
+ /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
+ /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
+ Bool isGE = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = getQReg128(mm);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res,
+ isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
+ : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
/* -------- 0,11,10000 ADD d_d_d -------- */
/* -------- 1,11,10000 SUB d_d_d -------- */
@@ -6184,6 +6287,25 @@
return True;
}
+ if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
+ /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
+ /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
+ Bool isEQ = bitU == 1;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = getQReg128(mm);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res,
+ isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
+ : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
+ binop(Iop_AndV128, argL, argR),
+ mkV128(0x0000))));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
+ nameQRegLO(dd, Ity_I64),
+ nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
+ return True;
+ }
+
if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
/* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
IRType ity = size == X11 ? Ity_F64 : Ity_F32;
@@ -6226,12 +6348,41 @@
UInt dd = INSN(4,0);
vassert(size < 4);
- if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,0,1)) {
- /* -------- 0,11,01001 CMEQ d_d_#0 -------- */
+ if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
+ /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
+ /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
+ Bool isGT = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = mkV128(0x0000);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
+ : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
+ return True;
+ }
+
+ if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
+ /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
+ /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
+ Bool isEQ = bitU == 0;
+ IRExpr* argL = getQReg128(nn);
+ IRExpr* argR = mkV128(0x0000);
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
+ : unop(Iop_NotV128,
+ binop(Iop_CmpGT64Sx2, argL, argR)));
+ putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
+ DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
+ return True;
+ }
+
+ if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
+ /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
putQReg128(dd, unop(Iop_ZeroHI64ofV128,
- binop(Iop_CmpEQ64x2, getQReg128(nn),
- mkV128(0x0000))));
- DIP("cmeq d%u, d%u, #0\n", dd, nn);
+ binop(Iop_CmpGT64Sx2, mkV128(0x0000),
+ getQReg128(nn))));
+ DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
return True;
}
@@ -7002,6 +7153,36 @@
UInt dd = INSN(4,0);
vassert(size < 4);
+ if (opcode == BITS5(0,0,1,0,0)) {
+ /* -------- 0,xx,00100: CLS std6_std6 -------- */
+ /* -------- 1,xx,00100: CLZ std6_std6 -------- */
+ if (size == X11) return False; // no 1d or 2d cases
+ const IROp opsCLS[3] = { Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4 };
+ const IROp opsCLZ[3] = { Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4 };
+ Bool isCLZ = bitU == 1;
+ IRTemp res = newTemp(Ity_V128);
+ vassert(size <= 2);
+ assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
+ : mkexpr(res));
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
+ if (bitU == 0 && size == X00 && opcode == BITS5(0,0,1,0,1)) {
+ /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
+ IRTemp res = newTemp(Ity_V128);
+ assign(res, unop(Iop_Cnt8x16, getQReg128(nn)));
+ putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
+ : mkexpr(res));
+ const HChar* arr = nameArr_Q_SZ(bitQ, size);
+ DIP("%s %s.%s, %s.%s\n", "cnt",
+ nameQReg128(dd), arr, nameQReg128(nn), arr);
+ return True;
+ }
+
if (opcode == BITS5(0,1,0,0,0)) {
/* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
/* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Thu Jun 19 14:21:37 2014
@@ -929,11 +929,18 @@
case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
- case ARM64vecu_ABS64x2: *nm = "abs"; *ar = "2d"; return;
- case ARM64vecu_ABS32x4: *nm = "abs"; *ar = "4s"; return;
- case ARM64vecu_ABS16x8: *nm = "abs"; *ar = "8h"; return;
- case ARM64vecu_ABS8x16: *nm = "abs"; *ar = "16b"; return;
case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
+ case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
+ case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
+ case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
+ case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
+ case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
+ case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
+ case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
+ case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
+ case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
+ case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
+ case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
default: vpanic("showARM64VecUnaryOp");
}
}
@@ -3413,6 +3420,7 @@
#define X001111 BITS8(0,0, 0,0,1,1,1,1)
#define X010000 BITS8(0,0, 0,1,0,0,0,0)
#define X010001 BITS8(0,0, 0,1,0,0,0,1)
+#define X010010 BITS8(0,0, 0,1,0,0,1,0)
#define X010101 BITS8(0,0, 0,1,0,1,0,1)
#define X010110 BITS8(0,0, 0,1,0,1,1,0)
#define X011001 BITS8(0,0, 0,1,1,0,0,1)
@@ -5319,6 +5327,16 @@
010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
+
+ 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
+ 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
+ 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
+
+ 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
+ 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
+ 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
+
+ 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
*/
UInt vD = qregNo(i->ARM64in.VUnaryV.dst);
UInt vN = qregNo(i->ARM64in.VUnaryV.arg);
@@ -5350,6 +5368,27 @@
case ARM64vecu_ABS8x16:
*p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
break;
+ case ARM64vecu_CLS32x4:
+ *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLS16x8:
+ *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLS8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLZ32x4:
+ *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLZ16x8:
+ *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CLZ8x16:
+ *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
+ break;
+ case ARM64vecu_CNT8x16:
+ *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
+ break;
default:
goto bad;
}
Modified: trunk/priv/host_arm64_defs.h
==============================================================================
--- trunk/priv/host_arm64_defs.h (original)
+++ trunk/priv/host_arm64_defs.h Thu Jun 19 14:21:37 2014
@@ -356,6 +356,9 @@
ARM64vecu_NOT,
ARM64vecu_ABS64x2, ARM64vecu_ABS32x4,
ARM64vecu_ABS16x8, ARM64vecu_ABS8x16,
+ ARM64vecu_CLS32x4, ARM64vecu_CLS16x8, ARM64vecu_CLS8x16,
+ ARM64vecu_CLZ32x4, ARM64vecu_CLZ16x8, ARM64vecu_CLZ8x16,
+ ARM64vecu_CNT8x16,
ARM64vecu_INVALID
}
ARM64VecUnaryOp;
Modified: trunk/priv/host_arm64_isel.c
==============================================================================
--- trunk/priv/host_arm64_isel.c (original)
+++ trunk/priv/host_arm64_isel.c Thu Jun 19 14:21:37 2014
@@ -4410,14 +4410,13 @@
/* Other cases */
switch (e->Iex.Unop.op) {
case Iop_NotV128:
- case Iop_Abs64Fx2:
- case Iop_Abs32Fx4:
- case Iop_Neg64Fx2:
- case Iop_Neg32Fx4:
- case Iop_Abs64x2:
- case Iop_Abs32x4:
- case Iop_Abs16x8:
- case Iop_Abs8x16:
+ case Iop_Abs64Fx2: case Iop_Abs32Fx4:
+ case Iop_Neg64Fx2: case Iop_Neg32Fx4:
+ case Iop_Abs64x2: case Iop_Abs32x4:
+ case Iop_Abs16x8: case Iop_Abs8x16:
+ case Iop_Cls32Sx4: case Iop_Cls16Sx8: case Iop_Cls8Sx16:
+ case Iop_Clz32Sx4: case Iop_Clz16Sx8: case Iop_Clz8Sx16:
+ case Iop_Cnt8x16:
{
HReg res = newVRegV(env);
HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
@@ -4432,6 +4431,13 @@
case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break;
case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break;
case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break;
+ case Iop_Cls32Sx4: op = ARM64vecu_CLS32x4; break;
+ case Iop_Cls16Sx8: op = ARM64vecu_CLS16x8; break;
+ case Iop_Cls8Sx16: op = ARM64vecu_CLS8x16; break;
+ case Iop_Clz32Sx4: op = ARM64vecu_CLZ32x4; break;
+ case Iop_Clz16Sx8: op = ARM64vecu_CLZ16x8; break;
+ case Iop_Clz8Sx16: op = ARM64vecu_CLZ8x16; break;
+ case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break;
default: vassert(0);
}
addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
|