You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
1
|
2
|
3
|
4
(1) |
5
(1) |
6
(3) |
7
|
|
8
(1) |
9
(10) |
10
(11) |
11
|
12
|
13
|
14
|
|
15
(1) |
16
(5) |
17
(1) |
18
|
19
|
20
|
21
(1) |
|
22
|
23
|
24
|
25
|
26
|
27
(1) |
28
|
|
29
|
30
(4) |
|
|
|
|
|
|
From: Julian S. <se...@so...> - 2020-11-10 20:12:03
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=eb82a294573d15c1be663673d55b559a82ca29d3 commit eb82a294573d15c1be663673d55b559a82ca29d3 Author: Julian Seward <js...@ac...> Date: Tue Nov 10 21:10:48 2020 +0100 Add a missing ifdef, whose absence caused build breakage on non-POWER targets. Diff: --- VEX/priv/guest_ppc_helpers.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c index 45dce63512..ac4d7044a8 100644 --- a/VEX/priv/guest_ppc_helpers.c +++ b/VEX/priv/guest_ppc_helpers.c @@ -1112,12 +1112,16 @@ static ULong reinterpret_double_as_long( Double input ) static Double conv_f16_to_double( ULong input ) { - // This all seems to be very alignment sensitive?? - __attribute__ ((aligned (64))) ULong src; - __attribute__ ((aligned (64))) Double result; - src = input; - __asm__ __volatile__ ("xscvhpdp %x0,%x1" : "=wa" (result) : "wa" (src)); - return result; +# if defined(__powerpc__) + // This all seems to be very alignment sensitive?? + __attribute__ ((aligned (64))) ULong src; + __attribute__ ((aligned (64))) Double result; + src = input; + __asm__ __volatile__ ("xscvhpdp %x0,%x1" : "=wa" (result) : "wa" (src)); + return result; +# else + return 0.0; +# endif } |
|
From: Carl L. <ca...@so...> - 2020-11-10 18:37:46
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=d4cfcf14a083dcb54e0fe64ff4ae9b0b635c8e69 commit d4cfcf14a083dcb54e0fe64ff4ae9b0b635c8e69 Author: Carl Love <ce...@us...> Date: Tue Oct 6 12:14:45 2020 -0500 Reduced Precision Outer Product Operation tests Diff: --- NEWS | 1 + none/tests/ppc64/Makefile.am | 10 +- none/tests/ppc64/test_isa_3_1_AT.c | 990 ++++++++++++++ none/tests/ppc64/test_isa_3_1_AT.stderr.exp | 2 + none/tests/ppc64/test_isa_3_1_AT.stdout.exp | 1924 +++++++++++++++++++++++++++ 5 files changed, 2921 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index 563a9ef8c1..8ddcab2f0f 100644 --- a/NEWS +++ b/NEWS @@ -58,6 +58,7 @@ n-i-bz helgrind: If hg_cli__realloc fails, return NULL. 427400 PPC ISA 3.1 support is missing, part 4 427401 PPC ISA 3.1 support is missing, part 5 384729 __libc_freeres inhibits cross-platform valgrind +427404 PPC ISA 3.1 support is missing, part 6 Release 3.16.1 (?? June 2020) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am index 0a7fa77002..a267b727a8 100644 --- a/none/tests/ppc64/Makefile.am +++ b/none/tests/ppc64/Makefile.am @@ -53,10 +53,9 @@ EXTRA_DIST = \ test_isa_3_1_XT.vgtest test_isa_3_1_XT.stderr.exp test_isa_3_1_XT.stdout.exp \ test_isa_3_1_VRT.vgtest test_isa_3_1_VRT.stderr.exp test_isa_3_1_VRT.stdout.exp \ test_isa_3_1_Misc.vgtest test_isa_3_1_Misc.stderr.exp test_isa_3_1_Misc.stdout.exp \ - test_isa_3_1_AT.vgtest \ + test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp \ subnormal_test.stderr.exp subnormal_test.stdout.exp \ subnormal_test.vgtest -# test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp check_PROGRAMS = \ allexec \ @@ -66,12 +65,11 @@ check_PROGRAMS = \ test_isa_2_07_part1 test_isa_2_07_part2 \ test_isa_3_0 \ test_isa_3_1_RT test_isa_3_1_XT test_isa_3_1_VRT \ - test_isa_3_1_Misc \ + test_isa_3_1_Misc test_isa_3_1_AT \ subnormal_test \ test_tm test_touch_tm ldst_multiple data-cache-instructions \ power6_mf_gpr std_reg_imm \ twi_tdi tw_td power6_bcmp -# test_isa_3_1_AT AM_CFLAGS += @FLAG_M64@ AM_CXXFLAGS += @FLAG_M64@ @@ -83,8 +81,7 @@ test_isa_3_1_XT_SOURCES = test_isa_3_1_XT.c test_isa_3_1_common.c test_isa_3_1_RT_SOURCES = test_isa_3_1_RT.c test_isa_3_1_common.c test_isa_3_1_VRT_SOURCES = test_isa_3_1_VRT.c test_isa_3_1_common.c test_isa_3_1_Misc_SOURCES = test_isa_3_1_Misc.c test_isa_3_1_common.c - -#test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c +test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c if HAS_ALTIVEC BUILD_FLAG_ALTIVEC = -maltivec @@ -190,6 +187,7 @@ test_isa_3_1_RT_CFLAGS = $(test_isa_3_1_CFLAGS) test_isa_3_1_XT_CFLAGS = $(test_isa_3_1_CFLAGS) test_isa_3_1_VRT_CFLAGS = $(test_isa_3_1_CFLAGS) test_isa_3_1_Misc_CFLAGS = $(test_isa_3_1_CFLAGS) +test_isa_3_1_AT_CFLAGS = $(test_isa_3_1_CFLAGS) subnormal_test_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) $(ISA_2_06_FLAG) \ @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) $(BUILD_FLAGS_ISA_2_06) diff --git a/none/tests/ppc64/test_isa_3_1_AT.c b/none/tests/ppc64/test_isa_3_1_AT.c new file mode 100644 index 0000000000..1d6d42c613 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_AT.c @@ -0,0 +1,990 @@ +/* + * Valgrind testcase for PowerPC ISA 3.1 + * + * Copyright (C) 2019-2020 Will Schmidt <wil...@vn...> + * + * 64bit build: + * gcc -Winline -Wall -g -O -mregnames -maltivec -m64 + */ + +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdio.h> +#ifdef HAS_ISA_3_1 +#include <stdint.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <altivec.h> +#include <malloc.h> + +#include <string.h> +#include <signal.h> +#include <setjmp.h> + +/* Condition Register fields. + These are used to capture the condition register values immediately after + the instruction under test is executed. This is done to help prevent other + test overhead (switch statements, result compares, etc) from disturbing + the test case results. */ +unsigned long current_cr; +unsigned long current_fpscr; + +struct test_list_t current_test; + +#include "isa_3_1_helpers.h" + +static void test_xxmfacc (void) { + __asm__ __volatile__ ("xxmfacc 4"); +} +static void test_xxmtacc (void) { + __asm__ __volatile__ ("xxmtacc 4"); +} +static void test_xxsetaccz (void) { + __asm__ __volatile__ ("xxsetaccz 4"); +} +static void test_xvi4ger8 (void) { + __asm__ __volatile__ ("xvi4ger8 4, %x0, %x1" :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvi4ger8pp (void) { + __asm__ __volatile__ ("xvi4ger8pp 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM0_YM0_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 0, 0, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM0_YM1_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 0, 1, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM0_YM1_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 0, 1, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM11_YM0_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 11, 0, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM11_YM1_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 11, 1, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8_XM11_YM1_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8 4, %x0, %x1, 11, 1, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM0_YM0_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 0, 0, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM0_YM1_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 0, 1, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM0_YM1_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 0, 1, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM11_YM0_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 11, 0, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM11_YM1_PM0 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 11, 1, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi4ger8pp_XM11_YM1_PM45 (void) { + __asm__ __volatile__ ("pmxvi4ger8pp 4, %x0, %x1, 11, 1, 45" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvi8ger4 (void) { + __asm__ __volatile__ ("xvi8ger4 4, %x0, %x1" :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvi8ger4pp (void) { + __asm__ __volatile__ ("xvi8ger4pp 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM0_YM0_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 0, 0, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM0_YM13_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 0, 13, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM11_YM0_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 11, 0, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4_XM11_YM13_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4 4, %x0, %x1, 11, 13, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM0_YM0_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 0, 0, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM0_YM13_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 0, 13, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM11_YM0_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 11, 0, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi8ger4pp_XM11_YM13_PM5 (void) { + __asm__ __volatile__ ("pmxvi8ger4pp 4, %x0, %x1, 11, 13, 5" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvi16ger2s (void) { + __asm__ __volatile__ ("xvi16ger2s 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvi16ger2spp (void) { + __asm__ __volatile__ ("xvi16ger2spp 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2s_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2s 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvi16ger2spp_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvi16ger2spp 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf16ger2 (void) { + __asm__ __volatile__ ("xvf16ger2 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf16ger2pp (void) { + __asm__ __volatile__ ("xvf16ger2pp 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf16ger2pn (void) { + __asm__ __volatile__ ("xvf16ger2pn 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf16ger2np (void) { + __asm__ __volatile__ ("xvf16ger2np 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf16ger2nn (void) { + __asm__ __volatile__ ("xvf16ger2nn 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pp_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pp 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2pn_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2pn 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2np_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2np 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM0_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 0, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM0_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 0, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM0_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 0, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM0_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 0, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM11_YM0_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 11, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM11_YM0_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 11, 0, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM11_YM13_PM0 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 11, 13, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf16ger2nn_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvf16ger2nn 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf32ger (void) { + __asm__ __volatile__ ("xvf32ger 4, %x0, %x1" :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf32gerpp (void) { + __asm__ __volatile__ ("xvf32gerpp 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf32gerpn (void) { + __asm__ __volatile__ ("xvf32gerpn 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf32gernp (void) { + __asm__ __volatile__ ("xvf32gernp 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf32gernn (void) { + __asm__ __volatile__ ("xvf32gernn 4, %x0, %x1" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32ger_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf32ger 4, %x0, %x1, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32ger_XM0_YM13 (void) { + __asm__ __volatile__ ("pmxvf32ger 4, %x0, %x1, 0, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32ger_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf32ger 4, %x0, %x1, 11, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32ger_XM11_YM13 (void) { + __asm__ __volatile__ ("pmxvf32ger 4, %x0, %x1, 11, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpp_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gerpp 4, %x0, %x1, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpp_XM0_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gerpp 4, %x0, %x1, 0, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpp_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gerpp 4, %x0, %x1, 11, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpp_XM11_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gerpp 4, %x0, %x1, 11, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpn_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gerpn 4, %x0, %x1, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpn_XM0_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gerpn 4, %x0, %x1, 0, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpn_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gerpn 4, %x0, %x1, 11, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gerpn_XM11_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gerpn 4, %x0, %x1, 11, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernp_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gernp 4, %x0, %x1, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernp_XM0_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gernp 4, %x0, %x1, 0, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernp_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gernp 4, %x0, %x1, 11, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernp_XM11_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gernp 4, %x0, %x1, 11, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernn_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gernn 4, %x0, %x1, 0, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernn_XM0_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gernn 4, %x0, %x1, 0, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernn_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf32gernn 4, %x0, %x1, 11, 0" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_pmxvf32gernn_XM11_YM13 (void) { + __asm__ __volatile__ ("pmxvf32gernn 4, %x0, %x1, 11, 13" + :: "wa" (vec_xa), "wa" (vec_xb) ); +} +static void test_xvf64ger (void) { + __asm__ __volatile__ ("xvf64ger 4, 22, %x0" :: "wa" (vec_xa) ); +} +static void test_xvf64gerpp (void) { + __asm__ __volatile__ ("xvf64gerpp 4, 22, %x0" :: "wa" (vec_xa) ); +} +static void test_xvf64gerpn (void) { + __asm__ __volatile__ ("xvf64gerpn 4, 22, %x0" :: "wa" (vec_xa) ); +} +static void test_xvf64gernp (void) { + __asm__ __volatile__ ("xvf64gernp 4, 22, %x0" :: "wa" (vec_xa) ); +} +static void test_xvf64gernn (void) { + __asm__ __volatile__ ("xvf64gernn 4, 22, %x0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64ger_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf64ger 4, 22, %x0, 0, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64ger_XM0_YM1 (void) { + __asm__ __volatile__ ("pmxvf64ger 4, 22, %x0, 0, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64ger_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf64ger 4, 22, %x0, 11, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64ger_XM11_YM1 (void) { + __asm__ __volatile__ ("pmxvf64ger 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpp_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gerpp 4, 22, %x0, 0, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpp_XM0_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gerpp 4, 22, %x0, 0, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpp_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gerpp 4, 22, %x0, 11, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpp_XM11_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gerpp 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpn_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gerpn 4, 22, %x0, 0, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpn_XM0_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gerpn 4, 22, %x0, 0, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpn_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gerpn 4, 22, %x0, 11, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gerpn_XM11_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gerpn 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernp_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gernp 4, 22, %x0, 0, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernp_XM0_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gernp 4, 22, %x0, 0, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernp_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gernp 4, 22, %x0, 11, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernp_XM11_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gernp 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernn_XM0_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gernn 4, 22, %x0, 0, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernn_XM0_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gernn 4, 22, %x0, 0, 1" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernn_XM11_YM0 (void) { + __asm__ __volatile__ ("pmxvf64gernn 4, 22, %x0, 11, 0" :: "wa" (vec_xa) ); +} +static void test_pmxvf64gernn_XM11_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gernn 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); +} + +static test_list_t testgroup_generic[] = { + { &test_pmxvf16ger2nn_XM0_YM0_PM0, "pmxvf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM0_YM0_PM1, "pmxvf16ger2nn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM0_YM13_PM0, "pmxvf16ger2nn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM0_YM13_PM1, "pmxvf16ger2nn XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM11_YM0_PM0, "pmxvf16ger2nn XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM11_YM0_PM1, "pmxvf16ger2nn XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM11_YM13_PM0, "pmxvf16ger2nn XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM11_YM13_PM1, "pmxvf16ger2nn XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM0_YM0_PM0, "pmxvf16ger2np XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM0_YM0_PM1, "pmxvf16ger2np XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM0_YM13_PM0, "pmxvf16ger2np XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM0_YM13_PM1, "pmxvf16ger2np XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM11_YM0_PM0, "pmxvf16ger2np XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM11_YM0_PM1, "pmxvf16ger2np XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM11_YM13_PM0, "pmxvf16ger2np XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2np_XM11_YM13_PM1, "pmxvf16ger2np XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM0_YM0_PM0, "pmxvf16ger2pn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM0_YM0_PM1, "pmxvf16ger2pn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM0_YM13_PM0, "pmxvf16ger2pn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM0_YM13_PM1, "pmxvf16ger2pn XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM11_YM0_PM0, "pmxvf16ger2pn XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM11_YM0_PM1, "pmxvf16ger2pn XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM11_YM13_PM0, "pmxvf16ger2pn XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pn_XM11_YM13_PM1, "pmxvf16ger2pn XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM0_YM0_PM0, "pmxvf16ger2pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM0_YM0_PM1, "pmxvf16ger2pp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM0_YM13_PM0, "pmxvf16ger2pp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM0_YM13_PM1, "pmxvf16ger2pp XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM11_YM0_PM0, "pmxvf16ger2pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM11_YM0_PM1, "pmxvf16ger2pp XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM11_YM13_PM0, "pmxvf16ger2pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2pp_XM11_YM13_PM1, "pmxvf16ger2pp XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM0_YM0_PM0, "pmxvf16ger2 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM0_YM0_PM1, "pmxvf16ger2 XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM0_YM13_PM0, "pmxvf16ger2 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM0_YM13_PM1, "pmxvf16ger2 XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM11_YM0_PM0, "pmxvf16ger2 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM11_YM0_PM1, "pmxvf16ger2 XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM11_YM13_PM0, "pmxvf16ger2 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2_XM11_YM13_PM1, "pmxvf16ger2 XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernn_XM0_YM0, "pmxvf32gernn XM0_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernn_XM0_YM13, "pmxvf32gernn XM0_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernn_XM11_YM0, "pmxvf32gernn XM11_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernn_XM11_YM13, "pmxvf32gernn XM11_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernp_XM0_YM0, "pmxvf32gernp XM0_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernp_XM0_YM13, "pmxvf32gernp XM0_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernp_XM11_YM0, "pmxvf32gernp XM11_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gernp_XM11_YM13, "pmxvf32gernp XM11_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpn_XM0_YM0, "pmxvf32gerpn XM0_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpn_XM0_YM13, "pmxvf32gerpn XM0_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpn_XM11_YM0, "pmxvf32gerpn XM11_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpn_XM11_YM13, "pmxvf32gerpn XM11_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpp_XM0_YM0, "pmxvf32gerpp XM0_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpp_XM0_YM13, "pmxvf32gerpp XM0_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpp_XM11_YM0, "pmxvf32gerpp XM11_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32gerpp_XM11_YM13, "pmxvf32gerpp XM11_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32ger_XM0_YM0, "pmxvf32ger XM0_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32ger_XM0_YM13, "pmxvf32ger XM0_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32ger_XM11_YM0, "pmxvf32ger XM11_YM0", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf32ger_XM11_YM13, "pmxvf32ger XM11_YM13", "AT,XA,XB,XMSK,YMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf64gernn_XM0_YM0, "pmxvf64gernn XM0_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernn_XM0_YM1, "pmxvf64gernn XM0_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernn_XM11_YM0, "pmxvf64gernn XM11_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernn_XM11_YM1, "pmxvf64gernn XM11_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernp_XM0_YM0, "pmxvf64gernp XM0_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernp_XM0_YM1, "pmxvf64gernp XM0_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernp_XM11_YM0, "pmxvf64gernp XM11_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gernp_XM11_YM1, "pmxvf64gernp XM11_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpn_XM0_YM0, "pmxvf64gerpn XM0_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpn_XM0_YM1, "pmxvf64gerpn XM0_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpn_XM11_YM0, "pmxvf64gerpn XM11_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpn_XM11_YM1, "pmxvf64gerpn XM11_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpp_XM0_YM0, "pmxvf64gerpp XM0_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpp_XM0_YM1, "pmxvf64gerpp XM0_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpp_XM11_YM0, "pmxvf64gerpp XM11_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64gerpp_XM11_YM1, "pmxvf64gerpp XM11_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64ger_XM0_YM0, "pmxvf64ger XM0_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64ger_XM0_YM1, "pmxvf64ger XM0_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64ger_XM11_YM0, "pmxvf64ger XM11_YM0", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvf64ger_XM11_YM1, "pmxvf64ger XM11_YM1", "AT,XAp,XB,XMSK,YMSK", 0b00110000}, /* bcwp */ + { &test_pmxvi4ger8pp_XM0_YM0_PM0, "pmxvi4ger8pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM0_YM0_PM45, "pmxvi4ger8pp XM0_YM0_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM0_YM1_PM0, "pmxvi4ger8pp XM0_YM1_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM0_YM1_PM45, "pmxvi4ger8pp XM0_YM1_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM11_YM0_PM0, "pmxvi4ger8pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM11_YM0_PM45, "pmxvi4ger8pp XM11_YM0_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM11_YM1_PM0, "pmxvi4ger8pp XM11_YM1_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8pp_XM11_YM1_PM45, "pmxvi4ger8pp XM11_YM1_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM0_YM0_PM0, "pmxvi4ger8 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM0_YM0_PM45, "pmxvi4ger8 XM0_YM0_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM0_YM1_PM0, "pmxvi4ger8 XM0_YM1_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM0_YM1_PM45, "pmxvi4ger8 XM0_YM1_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM11_YM0_PM0, "pmxvi4ger8 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM11_YM0_PM45, "pmxvi4ger8 XM11_YM0_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM11_YM1_PM0, "pmxvi4ger8 XM11_YM1_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi4ger8_XM11_YM1_PM45, "pmxvi4ger8 XM11_YM1_PM45", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM0_YM0_PM0, "pmxvi8ger4pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM0_YM0_PM5, "pmxvi8ger4pp XM0_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM0_YM13_PM0, "pmxvi8ger4pp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM0_YM13_PM5, "pmxvi8ger4pp XM0_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM11_YM0_PM0, "pmxvi8ger4pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM11_YM0_PM5, "pmxvi8ger4pp XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM11_YM13_PM0, "pmxvi8ger4pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM11_YM13_PM5, "pmxvi8ger4pp XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM0_PM0, "pmxvi8ger4 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM0_PM5, "pmxvi8ger4 XM0_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM13_PM0, "pmxvi8ger4 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM13_PM5, "pmxvi8ger4 XM0_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM11_YM0_PM0, "pmxvi8ger4 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM11_YM0_PM5, "pmxvi8ger4 XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM11_YM13_PM0, "pmxvi8ger4 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM11_YM13_PM5, "pmxvi8ger4 XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM0_PM0, "pmxvi16ger2spp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM0_PM1, "pmxvi16ger2spp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM13_PM0, "pmxvi16ger2spp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM13_PM1, "pmxvi16ger2spp XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM11_YM0_PM0, "pmxvi16ger2spp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM11_YM0_PM1, "pmxvi16ger2spp XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM11_YM13_PM0, "pmxvi16ger2spp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM11_YM13_PM1, "pmxvi16ger2spp XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM0_YM0_PM0, "pmxvi16ger2s XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM0_YM0_PM1, "pmxvi16ger2s XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM0_YM13_PM0, "pmxvi16ger2s XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM0_YM13_PM1, "pmxvi16ger2s XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM0_PM0, "pmxvi16ger2s XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM0_PM1, "pmxvi16ger2s XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM13_PM0, "pmxvi16ger2s XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM13_PM1, "pmxvi16ger2s XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_xvf16ger2nn, "xvf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2np, "xvf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2pn, "xvf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2pp, "xvf16ger2pp", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2, "xvf16ger2", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf32gernn, "xvf32gernn", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf32gernp, "xvf32gernp", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf32gerpn, "xvf32gerpn", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf32gerpp, "xvf32gerpp", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf32ger, "xvf32ger", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf64gernn, "xvf64gernn", "AT,XAp,XB", 0b00110000}, /* bcs */ + { &test_xvf64gernp, "xvf64gernp", "AT,XAp,XB", 0b00110000}, /* bcs */ + { &test_xvf64gerpn, "xvf64gerpn", "AT,XAp,XB", 0b00110000}, /* bcs */ + { &test_xvf64gerpp, "xvf64gerpp", "AT,XAp,XB", 0b00110000}, /* bcs */ + { &test_xvf64ger, "xvf64ger", "AT,XAp,XB", 0b00110000}, /* bcs */ + { &test_xvi4ger8pp, "xvi4ger8pp", "AT,XA,XB"}, /* bcs */ + { &test_xvi4ger8, "xvi4ger8", "AT,XA,XB"}, /* bcs */ + { &test_xvi8ger4pp, "xvi8ger4pp", "AT,XA,XB"}, /* bcs */ + { &test_xvi8ger4, "xvi8ger4", "AT,XA,XB"}, /* bcs */ + { &test_xvi16ger2spp, "xvi16ger2spp", "AT,XA,XB"}, /* bcs */ + { &test_xvi16ger2s, "xvi16ger2s", "AT,XA,XB"}, /* bcs */ + { &test_xxmfacc, "xxmfacc", "AS"}, /* bcs */ + { &test_xxmtacc, "xxmtacc", "AT"}, /* bcs */ + { &test_xxsetaccz, "xxsetaccz", "AT"}, /* bcs */ + { NULL, NULL }, +}; + +/* Allow skipping of tests. */ +unsigned long test_count=0xffff; +unsigned long skip_count=0; +unsigned long setup_only=0; + +/* Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs. */ +static jmp_buf mybuf; + +/* This (testfunction_generic) is meant to handle all of the instruction + variations. The helpers set up the register and iterator values + as is appropriate for the instruction being tested. */ +static void testfunction_generic (const char* instruction_name, + test_func_t test_function, + unsigned int ignore_flags, + char * cur_form) { + + identify_form_components (instruction_name , cur_form); + debug_show_form (instruction_name, cur_form); + set_up_iterators (); + debug_show_iter_ranges (); + initialize_buffer (0); + debug_dump_buffer (); + + for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) { + for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) { + for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) { + for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) { + CHECK_OVERRIDES + debug_show_current_iteration (); + // Be sure to initialize the target registers first. + initialize_target_registers (); + initialize_source_registers (); + printf ("%s", instruction_name); + print_register_header (); + printf( " =>"); fflush (stdout); + if (!setup_only) { + if (enable_setjmp) { + if ( setjmp ( mybuf ) ) { + printf("signal tripped. (FIXME)\n"); + continue; + } + } + (*test_function) (); + } + print_register_footer (); + print_result_buffer (); + printf ("\n"); + } + } + } + } +} + +void mykillhandler ( int x ) { longjmp (mybuf, 1); } +void mysegvhandler ( int x ) { longjmp (mybuf, 1); } + +static void do_tests ( void ) +{ + int groupcount; + char * cur_form; + test_group_t group_function = &testfunction_generic; + test_list_t *tests = testgroup_generic; + + struct sigaction kill_action, segv_action; + struct sigaction old_kill_action, old_segv_action; + if (enable_setjmp) { + kill_action.sa_handler = mykillhandler; + segv_action.sa_handler = mysegvhandler; + sigemptyset ( &kill_action.sa_mask ); + sigemptyset ( &segv_action.sa_mask ); + kill_action.sa_flags = SA_NODEFER; + segv_action.sa_flags = SA_NODEFER; + sigaction ( SIGILL, &kill_action, &old_kill_action); + sigaction ( SIGSEGV, &segv_action, &old_segv_action); + } + + for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) { + cur_form = strdup(tests[groupcount].form); + current_test = tests[groupcount]; + if (groupcount < skip_count) continue; + if (verbose) printf("Test #%d ,", groupcount); + if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose); + (*group_function) (current_test.name, current_test.func, 0, cur_form ); + printf ("\n"); + if (groupcount >= (skip_count+test_count)) break; + } + if (debug_show_labels) printf("\n"); + printf ("All done. Tested %d different instruction groups\n", groupcount); +} + +static void usage (void) +{ + fprintf(stderr, + "Usage: test_isa_XXX [OPTIONS]\n" + "\t-h: display this help and exit\n" + "\t-v: increase verbosity\n" + "\t-a <foo> : limit number of a-iterations to <foo>\n" + "\t-b <foo> : limit number of b-iterations to <foo>\n" + "\t-c <foo> : limit number of c-iterations to <foo>\n" + "\t-n <foo> : limit to this number of tests.\n" + "\t-r <foo>: run only test # <foo> \n" + "\t\n" + "\t-j :enable setjmp to recover from illegal insns. \n" + "\t-m :(dev only?) lock VRM value to zero.\n" + "\t-z :(dev only?) lock MC value to zero.\n" + "\t-p :(dev only?) disable prefix instructions\n" + "\t-s <foo>: skip <foo> tests \n" + "\t-c <foo>: stop after running <foo> # of tests \n" + "\t-f : Do the test setup but do not actually execute the test instruction. \n" + ); +} + +int main (int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) { + switch (c) { + case 'h': + usage(); + return 0; + + case 'v': + verbose++; + break; + + /* Options related to limiting the test iterations. */ + case 'a': + a_limit=atoi (optarg); + printf ("limiting a-iters to %ld.\n", a_limit); + break; + case 'b': + b_limit=atoi (optarg); + printf ("limiting b-iters to %ld.\n", b_limit); + break; + case 'c': + c_limit=atoi (optarg); + printf ("limiting c-iters to %ld.\n", c_limit); + break; + case 'n': // run this number of tests. + test_count=atoi (optarg); + printf ("limiting to %ld tests\n", test_count); + break; + case 'r': // run just test #<foo>. + skip_count=atoi (optarg); + test_count=0; + if (verbose) printf("Running only test number %ld\n", skip_count); + break; + case 's': // skip this number of tests. + skip_count=atoi (optarg); + printf ("skipping %ld tests\n", skip_count); + break; + + /* debug options. */ + case 'd': + dump_tables=1; + printf("DEBUG:dump_tables.\n"); + break; + case 'f': + setup_only=1; + printf("DEBUG:setup_only.\n"); + break; + case 'j': + enable_setjmp=1; + printf ("DEBUG:setjmp enabled.\n"); + break; + case 'm': + vrm_override=1; + printf ("DEBUG:vrm override enabled.\n"); + break; + case 'p': + prefix_override=1; + printf ("DEBUG:prefix override enabled.\n"); + break; + case 'z': + mc_override=1; + printf ("DEBUG:MC override enabled.\n"); + break; + default: + usage(); + fprintf(stderr, "Unknown argument: '%c'\n", c); + } + } + + generic_prologue (); + build_vsx_table (); + build_args_table (); + build_float_vsx_tables (); + + if (dump_tables) { + dump_float_vsx_tables (); + dump_vsxargs (); + } + + do_tests (); + + return 0; +} + +#else // HAS_ISA_3_1 +int main (int argc, char **argv) +{ + printf("NO ISA 3.1 SUPPORT\n"); + return 0; +} +#endif diff --git a/none/tests/ppc64/test_isa_3_1_AT.stderr.exp b/none/tests/ppc64/test_isa_3_1_AT.stderr.exp new file mode 100644 index 0000000000..139597f9cb --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_AT.stderr.exp @@ -0,0 +1,2 @@ + + diff --git a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp new file mode 100644 index 0000000000..5ea9985630 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp @@ -0,0 +1,1924 @@ +pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) + +pmxvf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) + +pmxvf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) + +pmxvf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +pmxvf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero ... [truncated message content] |
|
From: Carl L. <ca...@so...> - 2020-11-10 18:37:23
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=be7da5401783a32149bd7dc31b49f3d72d4a2b10 commit be7da5401783a32149bd7dc31b49f3d72d4a2b10 Author: Carl Love <ce...@us...> Date: Tue Sep 29 13:29:34 2020 -0500 Fix, add ISA 3.1 check to set ISA 3.1 in Valgrind hwcaps value Diff: --- coregrind/m_machine.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c index 5594923fdb..e7877e6360 100644 --- a/coregrind/m_machine.c +++ b/coregrind/m_machine.c @@ -1234,6 +1234,8 @@ Bool VG_(machine_get_hwcaps)( void ) __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */ } + // ISA 3.1 not supported on 32-bit systems + /* determine dcbz/dcbzl sizes while we still have the signal * handlers registered */ find_ppc_dcbz_sz(&vai); @@ -1271,6 +1273,7 @@ Bool VG_(machine_get_hwcaps)( void ) if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07; if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA3_0; + /* ISA 3.1 not supported on 32-bit systems. */ VG_(machine_get_cache_info)(&vai); @@ -1287,7 +1290,7 @@ Bool VG_(machine_get_hwcaps)( void ) vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; - volatile Bool have_isa_2_07, have_isa_3_0; + volatile Bool have_isa_2_07, have_isa_3_0, have_isa_3_1; Int r; /* This is a kludge. Really we ought to back-convert saved_act @@ -1390,6 +1393,14 @@ Bool VG_(machine_get_hwcaps)( void ) __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */ } + /* Check for ISA 3.1 support. */ + have_isa_3_1 = True; + if (VG_MINIMAL_SETJMP(env_unsup_insn)) { + have_isa_3_1 = False; + } else { + __asm__ __volatile__(".long 0x7f1401b6"); /* brh RA, RS */ + } + /* determine dcbz/dcbzl sizes while we still have the signal * handlers registered */ find_ppc_dcbz_sz(&vai); @@ -1397,10 +1408,10 @@ Bool VG_(machine_get_hwcaps)( void ) VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); - VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d\n", + VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d ISA3.0 %d ISA3.1 %d\n", (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX, (Int)have_VX, (Int)have_DFP, - (Int)have_isa_2_07, (int)have_isa_3_0); + (Int)have_isa_2_07, (int)have_isa_3_0, (int)have_isa_3_1); /* on ppc64be, if we don't even have FP, just give up. */ if (!have_F) return False; @@ -1424,6 +1435,7 @@ Bool VG_(machine_get_hwcaps)( void ) if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07; if (have_isa_3_0) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_0; + if (have_isa_3_1) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA3_1; VG_(machine_get_cache_info)(&vai); |
|
From: Carl L. <ca...@so...> - 2020-11-10 18:37:02
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=092e5620d40d54bc1ab6a77c895fc18b0c86c6a9 commit 092e5620d40d54bc1ab6a77c895fc18b0c86c6a9 Author: Carl Love <ce...@us...> Date: Fri Sep 25 16:54:12 2020 -0500 ISA 3.1 Reduced-Precision: Outer Product Operations Add support for: pmxvf16ger2 Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) pmxvf16ger2nn Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Negative multiply, Negative accumulate) pmxvf16ger2np Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Negative multiply, Positive accumulate) pmxvf16ger2pn Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Negative accumulate) pmxvf16ger2pp Prefixed Masked VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Positive accumulate) pmxvf32ger Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) pmxvf32gernn Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) pmxvf32gernp Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) pmxvf32gerpn Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) pmxvf32gerpp Prefixed Masked VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) pmxvf64ger Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) pmxvf64gernn Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) pmxvf64gernp Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) pmxvf64gerpn Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) pmxvf64gerpp Prefixed Masked VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) pmxvi16ger2s Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation pmxvi16ger2spp Prefixed Masked VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation (Positive multiply, Positive accumulate) pmxvi4ger8 Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) pmxvi4ger8pp Prefixed Masked VSX Vector 4-bit Signed Integer GER (rank-8 update) (Positive multiply, Positive accumulate) pmxvi8ger4 Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) pmxvi8ger4pp Prefixed Masked VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) (Positive multiply, Positive accumulate) xvf16ger2 VSX Vector 16-bit Floating-Point GER (rank-2 update) xvf16ger2nn VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Positive accumulate) xvf16ger2np VSX Vector 16-bit Floating-Point GER (rank-2 update) (Negative multiply, Positive accumulate) xvf16ger2pn VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Negative accumulate) xvf16ger2pp VSX Vector 16-bit Floating-Point GER (rank-2 update) (Positive multiply, Positive accumulate) xvf32ger VSX Vector 32-bit Floating-Point GER (rank-1 update) xvf32gernn VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) xvf32gernp VSX Vector 32-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) xvf32gerpn VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) xvf32gerpp VSX Vector 32-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) xvf64ger VSX Vector 64-bit Floating-Point GER (rank-1 update) xvf64gernn VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Negative accumulate) xvf64gernp VSX Vector 64-bit Floating-Point GER (rank-1 update) (Negative multiply, Positive accumulate) xvf64gerpn VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Negative accumulate) xvf64gerpp VSX Vector 64-bit Floating-Point GER (rank-1 update) (Positive multiply, Positive accumulate) xvi16ger2s VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation xvi16ger2spp VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation (Positive multiply, Positive accumulate) xvi4ger8 VSX Vector 4-bit Signed Integer GER (rank-8 update) xvi4ger8pp VSX Vector 4-bit Signed Integer GER (rank-8 update) (Positive multiply, Positive accumulate) xvi8ger4 VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) xvi8ger4pp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) (Positive multiply, Positive accumulate) xxmfacc VSX Move From ACC xxmtacc VSX Move To ACC xxsetaccz VSX Set ACC to Zero Diff: --- VEX/priv/guest_ppc_defs.h | 67 ++- VEX/priv/guest_ppc_helpers.c | 842 ++++++++++++++++++++++++++++++ VEX/priv/guest_ppc_toIR.c | 1159 +++++++++++++++++++++++++++++++++++++++++- VEX/priv/host_ppc_isel.c | 11 +- VEX/pub/libvex_guest_ppc32.h | 45 +- VEX/pub/libvex_guest_ppc64.h | 61 ++- memcheck/mc_main.c | 2 +- 7 files changed, 2143 insertions(+), 44 deletions(-) diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h index 2efa89e102..da228b9f04 100644 --- a/VEX/priv/guest_ppc_defs.h +++ b/VEX/priv/guest_ppc_defs.h @@ -160,7 +160,36 @@ extern ULong deposit_bits_under_mask_helper( ULong src, ULong mask ); extern ULong population_count64_helper( ULong src ); extern ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC, ULong IMM ); - +void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, + UInt reg, UInt *result); +void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, + UInt reg, UInt *result); + +/* 8-bit XO value from instruction description */ +#define XVI4GER8 0b00100011 +#define XVI4GER8PP 0b00100010 +#define XVI8GER4 0b00000011 +#define XVI8GER4PP 0b00000010 +#define XVI8GER4SPP 0b01100011 +#define XVI16GER2 0b01001011 +#define XVI16GER2PP 0b01101011 +#define XVI16GER2S 0b00101011 +#define XVI16GER2SPP 0b00101010 +#define XVF16GER2 0b00010011 +#define XVF16GER2PP 0b00010010 +#define XVF16GER2PN 0b10010010 +#define XVF16GER2NP 0b01010010 +#define XVF16GER2NN 0b11010010 +#define XVF32GER 0b00011011 +#define XVF32GERPP 0b00011010 +#define XVF32GERPN 0b10011010 +#define XVF32GERNP 0b01011010 +#define XVF32GERNN 0b11011010 +#define XVF64GER 0b00111011 +#define XVF64GERPP 0b00111010 +#define XVF64GERPN 0b10111010 +#define XVF64GERNP 0b01111010 +#define XVF64GERNN 0b11111010 /* --- DIRTY HELPERS --- */ @@ -179,6 +208,42 @@ extern void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst, UInt shift_right, UInt endness ); +extern void vsx_matrix_4bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt inst_mask ); +extern void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt inst_mask ); +extern void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt inst_mask ); +extern void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, + ULong srcA_lo, + ULong srcB_hi, + ULong srcB_lo, + UInt masks_inst ); +extern void vsx_matrix_32bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcA_hi, + ULong srcA_lo, + ULong srcB_hi, + ULong srcB_lo, + UInt masks_inst ); +extern void vsx_matrix_64bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset, + ULong srcX_hi, + ULong srcX_lo, + ULong srcY_hi, + ULong srcY_lo, + UInt masks_inst ); #endif /* ndef __VEX_GUEST_PPC_DEFS_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c index 3bea3f9928..45dce63512 100644 --- a/VEX/priv/guest_ppc_helpers.c +++ b/VEX/priv/guest_ppc_helpers.c @@ -653,6 +653,814 @@ ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC, } +/*------------------------------------------------*/ +/*---- VSX Matrix signed integer GER functions ---*/ +/*------------------------------------------------*/ +static UInt exts4( UInt src) +{ + /* Input is an 4-bit value. Extend bit 3 to bits [31:4] */ + if (( src >> 3 ) & 0x1) + return src | 0xFFFFFFF0; /* sign bit is a 1, extend */ + else + return src & 0xF; /* make sure high order bits are zero */ +} + +static UInt exts8( UInt src) +{ + /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */ + if (( src >> 7 ) & 0x1) + return src | 0xFFFFFF00; /* sign bit is a 1, extend */ + else + return src & 0xFF; /* make sure high order bits are zero */ +} + +static UInt extz8( UInt src) +{ + /* Input is an 8-bit value. Extend src on the left with zeros. */ + return src & 0xFF; /* make sure high order bits are zero */ +} + +static ULong exts16to64( UInt src) +{ + /* Input is an 16-bit value. Extend bit 15 to bits [63:16] */ + if (( src >> 15 ) & 0x1) + return ((ULong) src) | 0xFFFFFFFFFFFF0000ULL; /* sign is 1, extend */ + else + /* make sure high order bits are zero */ + return ((ULong) src) & 0xFFFFULL; +} + +static UInt chop64to32( Long src ) { + /* Take a 64-bit input, return the lower 32-bits */ + return (UInt)(0xFFFFFFFF & src); +} + +static UInt clampS64toS32( Long src ) { + /* Take a 64-bit signed input, clamp positive values to 2^31, + clamp negative values at -2^31. Return the result in an + unsigned 32-bit value. */ + Long max_val = 2147483647; // 2^31-1 + if ( src > max_val) + return (UInt)max_val; + + if (src < -max_val) + return (UInt)-max_val; + + return (UInt)src; +} + +void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg, + UInt *acc_word) +{ + U128* pU128_dst; + + vassert( (acc >= 0) && (acc < 8) ); + vassert( (reg >= 0) && (reg < 4) ); + + pU128_dst = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128) + + reg*sizeof(U128)); + + /* The U128 type is defined as an array of unsigned intetgers. */ + (*pU128_dst)[0] = acc_word[0]; + (*pU128_dst)[1] = acc_word[1]; + (*pU128_dst)[2] = acc_word[2]; + (*pU128_dst)[3] = acc_word[3]; + return; +} + +void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg, + UInt *acc_word) +{ + U128* pU128_src; + + acc_word[3] = 0xDEAD; + acc_word[2] = 0xBEEF; + acc_word[1] = 0xBAD; + acc_word[0] = 0xBEEF; + + vassert( (acc >= 0) && (acc < 8) ); + vassert( (reg >= 0) && (reg < 4) ); + + pU128_src = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128) + + reg*sizeof(U128)); + + /* The U128 type is defined as an array of unsigned intetgers. */ + acc_word[0] = (*pU128_src)[0]; + acc_word[1] = (*pU128_src)[1]; + acc_word[2] = (*pU128_src)[2]; + acc_word[3] = (*pU128_src)[3]; + return; +} + +void vsx_matrix_4bit_ger_dirty_helper ( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + /* This helper calculates the result for one of the four ACC entires. + It is called twice, to get the hi and then the low 64-bit of the + 128-bit result. */ + UInt i, j, mask, sum, inst, acc_entry, prefix_inst; + + UInt srcA_nibbles[4][8]; /* word, nibble */ + UInt srcB_nibbles[4][8]; /* word, nibble */ + UInt acc_word[4]; + UInt prod0, prod1, prod2, prod3, prod4, prod5, prod6, prod7; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + /* LE word numbering */ + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + pmsk = 0b11111111; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + pmsk = (masks_inst >> 22) & 0xFF; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address nibbles using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word); + + // input is in double words + for( j = 0; j< 8; j++) { + srcA_nibbles[3][j] = (srcA_hi >> (60-4*j)) & mask; // hi bits [63:32] + srcA_nibbles[2][j] = (srcA_hi >> (28-4*j)) & mask; // hi bits [31:0] + srcA_nibbles[1][j] = (srcA_lo >> (60-4*j)) & mask; // lo bits [63:32] + srcA_nibbles[0][j] = (srcA_lo >> (28-4*j)) & mask; // lo bits [31:0] + + srcB_nibbles[3][j] = (srcB_hi >> (60-4*j)) & mask; + srcB_nibbles[2][j] = (srcB_hi >> (28-4*j)) & mask; + srcB_nibbles[1][j] = (srcB_lo >> (60-4*j)) & mask; + srcB_nibbles[0][j] = (srcB_lo >> (28-4*j)) & mask; + } + + for( j = 0; j < 4; j++) { + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + if (((pmsk >> 7) & 0x1) == 0) + prod0 = 0; + else + prod0 = exts4( srcA_nibbles[i][0] ) + * exts4( srcB_nibbles[j][0] ); + + if (((pmsk >> 6) & 0x1) == 0) + prod1 = 0; + else + prod1 = exts4( srcA_nibbles[i][1] ) + * exts4( srcB_nibbles[j][1] ); + + if (((pmsk >> 5) & 0x1) == 0) + prod2 = 0; + else + prod2 = exts4( srcA_nibbles[i][2] ) + * exts4( srcB_nibbles[j][2] ); + + if (((pmsk >> 4) & 0x1) == 0) + prod3 = 0; + else + prod3 = exts4( srcA_nibbles[i][3] ) + * exts4( srcB_nibbles[j][3] ); + + if (((pmsk >> 3) & 0x1) == 0) + prod4 = 0; + else + prod4 = exts4( srcA_nibbles[i][4] ) + * exts4( srcB_nibbles[j][4] ); + + if (((pmsk >> 2) & 0x1) == 0) + prod5 = 0; + else + prod5 = exts4( srcA_nibbles[i][5] ) + * exts4( srcB_nibbles[j][5] ); + + if (((pmsk >> 1) & 0x1) == 0) + prod6 = 0; + else + prod6 = exts4( srcA_nibbles[i][6] ) + * exts4( srcB_nibbles[j][6] ); + + if ((pmsk & 0x1) == 0) + prod7 = 0; + else + prod7 = exts4( srcA_nibbles[i][7] ) + * exts4( srcB_nibbles[j][7] ); + /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1 + prod2 + prod3 + prod4 + + prod5 + prod6 + prod7; + + if ( inst == XVI4GER8 ) + result[j] = sum; + + else if ( inst == XVI4GER8PP ) + result[j] = sum + acc_word[j]; + + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, sum, inst, acc_entry, prefix_inst; + + UInt srcA_bytes[4][4]; /* word, byte */ + UInt srcB_bytes[4][4]; /* word, byte */ + UInt acc_word[4]; + UInt prod0, prod1, prod2, prod3; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + /* LE word numbering */ + if ( prefix_inst == 0 ) { + /* Set the masks */ + pmsk = 0b1111; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + pmsk = (masks_inst >> 26) & 0xF; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address byes using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word); + + for( j = 0; j< 4; j++) { + srcA_bytes[3][j] = (srcA_hi >> (56-8*j)) & mask; + srcA_bytes[2][j] = (srcA_hi >> (24-8*j)) & mask; + srcA_bytes[1][j] = (srcA_lo >> (56-8*j)) & mask; + srcA_bytes[0][j] = (srcA_lo >> (24-8*j)) & mask; + + srcB_bytes[3][j] = (srcB_hi >> (56-8*j)) & mask; + srcB_bytes[2][j] = (srcB_hi >> (24-8*j)) & mask; + srcB_bytes[1][j] = (srcB_lo >> (56-8*j)) & mask; + srcB_bytes[0][j] = (srcB_lo >> (24-8*j)) & mask; + } + + for( j = 0; j < 4; j++) { + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + if (((pmsk >> 3) & 0x1) == 0) + prod0 = 0; + else + prod0 = + exts8( srcA_bytes[i][0] ) + * extz8( srcB_bytes[j][0] ); + + if (((pmsk >> 2) & 0x1) == 0) + prod1 = 0; + else + prod1 = + exts8( srcA_bytes[i][1] ) + * extz8( srcB_bytes[j][1] ); + + if (((pmsk >> 1) & 0x1) == 0) + prod2 = 0; + else + prod2 = + exts8( srcA_bytes[i][2] ) + * extz8( srcB_bytes[j][2] ); + + if (((pmsk >> 0) & 0x1) == 0) + prod3 = 0; + else + prod3 = + exts8( srcA_bytes[i][3] ) + * extz8( srcB_bytes[j][3] ); + + /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1 + prod2 + prod3; + + if ( inst == XVI8GER4 ) + result[j] = sum; + + else if ( inst == XVI8GER4PP ) + result[j] = sum + acc_word[j]; + + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, inst, acc_entry, prefix_inst; + ULong sum; + UInt srcA_word[4][2]; /* word, hword */ + UInt srcB_word[4][2]; /* word, hword */ + UInt acc_word[4]; + ULong prod0, prod1; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xFFFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + /* LE word numbering */ + if ( prefix_inst == 0 ) { + /* Set the masks for non prefix instructions */ + pmsk = 0b11; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + pmsk = (masks_inst >> 28) & 0x3; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address half-words using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word); + + for( j = 0; j< 2; j++) { + srcA_word[3][j] = (srcA_hi >> (48-16*j)) & mask; + srcA_word[2][j] = (srcA_hi >> (16-16*j)) & mask; + srcA_word[1][j] = (srcA_lo >> (48-16*j)) & mask; + srcA_word[0][j] = (srcA_lo >> (16-16*j)) & mask; + + srcB_word[3][j] = (srcB_hi >> (48-16*j)) & mask; + srcB_word[2][j] = (srcB_hi >> (16-16*j)) & mask; + srcB_word[1][j] = (srcB_lo >> (48-16*j)) & mask; + srcB_word[0][j] = (srcB_lo >> (16-16*j)) & mask; + } + + for( j = 0; j < 4; j++) { + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + if (((pmsk >> 1) & 0x1) == 0) + prod0 = 0; + + else + prod0 = exts16to64( srcA_word[i][0] ) + * exts16to64( srcB_word[j][0] ); + + if (((pmsk >> 0) & 0x1) == 0) + prod1 = 0; + else + prod1 = exts16to64( srcA_word[i][1] ) + * exts16to64( srcB_word[j][1] ); + /* sum is UInt so the result is choped to 32-bits */ + sum = prod0 + prod1; + + if ( inst == XVI16GER2 ) + result[j] = chop64to32( sum ); + + else if ( inst == XVI16GER2S ) + result[j] = clampS64toS32( sum ); + + else if ( inst == XVI16GER2PP ) { + result[j] = chop64to32( sum + acc_word[j] ); + } + + else if ( inst == XVI16GER2SPP ) { + result[j] = clampS64toS32( sum + acc_word[j] ); + } + + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +//matrix 16 float stuff +union +convert_t { + UInt u32; + ULong u64; + Float f; + Double d; +}; + +static Float reinterpret_int_as_float( UInt input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.u32 = input; + return conv.f; +} + +static UInt reinterpret_float_as_int( Float input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.f = input; + return conv.u32; +} + +static Double reinterpret_long_as_double( ULong input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.u64 = input; + return conv.d; +} + +static ULong reinterpret_double_as_long( Double input ) +{ + /* Reinterpret the bit pattern of an int as a float. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.d = input; + return conv.u64; +} + +static Double conv_f16_to_double( ULong input ) +{ + // This all seems to be very alignment sensitive?? + __attribute__ ((aligned (64))) ULong src; + __attribute__ ((aligned (64))) Double result; + src = input; + __asm__ __volatile__ ("xscvhpdp %x0,%x1" : "=wa" (result) : "wa" (src)); + return result; +} + + +static Float conv_double_to_float( Double src ) +{ + return (float) src ; +} + + +static Double negate_double( Double input ) +{ + /* Don't negate a NaN value. A NaN has an exponet + of all 1's, non zero fraction. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.d = input; + + if ( ( ( conv.u64 & I64_EXP_MASK) == I64_EXP_MASK ) + && ( ( conv.u64 & I64_FRACTION_MASK ) != 0 ) ) + return input; + else + return -input; +} + +static Float negate_float( Float input ) +{ + /* Don't negate a NaN value. A NaN has an exponet + of all 1's, non zero fraction. */ + __attribute__ ((aligned (128))) union convert_t conv; + + conv.f = input; + + if ( ( ( conv.u32 & I32_EXP_MASK) == I32_EXP_MASK ) + && ( ( conv.u32 & I32_FRACTION_MASK ) != 0 ) ) + return input; + else + return -input; +} + +void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, inst, acc_entry, prefix_inst; + + UInt srcA_word[4][2]; /* word, hword */ + UInt srcB_word[4][2]; /* word, hword */ + Double src10, src11, src20, src21; + UInt acc_word_input[4]; + Float acc_word[4]; + Double prod; + Double msum; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; + UInt ymsk = 0; + + mask = 0xFFFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + pmsk = 0b11; + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + /* Use mask supplied with prefix inst */ + pmsk = (masks_inst >> 28) & 0x3; + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + /* Address half-words using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input); + + acc_word[3] = reinterpret_int_as_float( acc_word_input[3] ); + acc_word[2] = reinterpret_int_as_float( acc_word_input[2] ); + acc_word[1] = reinterpret_int_as_float( acc_word_input[1] ); + acc_word[0] = reinterpret_int_as_float( acc_word_input[0] ); + + for( j = 0; j < 2; j++) { // input is in double words + srcA_word[3][j] = (UInt)((srcA_hi >> (48-16*j)) & mask); + srcA_word[2][j] = (UInt)((srcA_hi >> (16-16*j)) & mask); + srcA_word[1][j] = (UInt)((srcA_lo >> (48-16*j)) & mask); + srcA_word[0][j] = (UInt)((srcA_lo >> (16-16*j)) & mask); + + srcB_word[3][j] = (UInt)((srcB_hi >> (48-16*j)) & mask); + srcB_word[2][j] = (UInt)((srcB_hi >> (16-16*j)) & mask); + srcB_word[1][j] = (UInt)((srcB_lo >> (48-16*j)) & mask); + srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask); + } + + for( j = 0; j < 4; j++) { + if (((pmsk >> 1) & 0x1) == 0) { + src10 = 0; + src20 = 0; + } else { + src10 = conv_f16_to_double((ULong)srcA_word[i][0]); + src20 = conv_f16_to_double((ULong)srcB_word[j][0]); + } + + if ((pmsk & 0x1) == 0) { + src11 = 0; + src21 = 0; + } else { + src11 = conv_f16_to_double((ULong)srcA_word[i][1]); + src21 = conv_f16_to_double((ULong)srcB_word[j][1]); + } + + + prod = src10 * src20; + msum = prod + src11 * src21; + + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + + if ( inst == XVF16GER2 ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) ); + + else if ( inst == XVF16GER2PP ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) + + acc_word[j] ); + + else if ( inst == XVF16GER2PN ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) + + negate_float( acc_word[j] ) ); + + else if ( inst == XVF16GER2NP ) + result[j] = reinterpret_float_as_int( + conv_double_to_float( negate_double( msum ) ) + + acc_word[j] ); + + else if ( inst == XVF16GER2NN ) + result[j] = reinterpret_float_as_int( + conv_double_to_float( negate_double( msum ) ) + + negate_float( acc_word[j] ) ); + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_32bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) +{ + UInt i, j, mask, inst, acc_entry, prefix_inst; + + Float srcA_word[4]; + Float srcB_word[4]; + UInt acc_word_input[4]; + Float acc_word[4]; + UInt result[4]; + UInt xmsk = 0; + UInt ymsk = 0; + Float src1, src2, acc; + + mask = 0xFFFFFFFF; + inst = (masks_inst >> 5) & 0xFF; + prefix_inst = (masks_inst >> 13) & 0x1; + acc_entry = masks_inst & 0xF; + + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + xmsk = 0b1111; + ymsk = 0b1111; + + } else { + xmsk = (masks_inst >> 18) & 0xF; + ymsk = (masks_inst >> 14) & 0xF; + } + + srcA_word[3] = reinterpret_int_as_float( (srcA_hi >> 32) & mask ); + srcA_word[2] = reinterpret_int_as_float( srcA_hi & mask ); + srcA_word[1] = reinterpret_int_as_float( (srcA_lo >> 32) & mask ); + srcA_word[0] = reinterpret_int_as_float( srcA_lo & mask ); + + srcB_word[3] = reinterpret_int_as_float( (srcB_hi >> 32) & mask ); + srcB_word[2] = reinterpret_int_as_float( srcB_hi & mask ); + srcB_word[1] = reinterpret_int_as_float( (srcB_lo >> 32) & mask ); + srcB_word[0] = reinterpret_int_as_float( srcB_lo & mask ); + + /* Address byes using IBM numbering */ + for( i = 0; i < 4; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input); + + acc_word[3] = reinterpret_int_as_float( acc_word_input[3] ); + acc_word[2] = reinterpret_int_as_float( acc_word_input[2] ); + acc_word[1] = reinterpret_int_as_float( acc_word_input[1] ); + acc_word[0] = reinterpret_int_as_float( acc_word_input[0] ); + + for( j = 0; j < 4; j++) { + + if ((((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) == 0x1) { + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + + src1 = srcA_word[i]; + src2 = srcB_word[j]; + acc = acc_word[j]; + + if ( inst == XVF32GER ) + result[j] = reinterpret_float_as_int( src1 * src2 ); + + else if ( inst == XVF32GERPP ) + result[j] = reinterpret_float_as_int( ( src1 * src2 ) + acc ); + + else if ( inst == XVF32GERPN ) + result[j] = reinterpret_float_as_int( ( src1 * src2 ) + + negate_float( acc ) ); + + else if ( inst == XVF32GERNP ) + result[j] = reinterpret_float_as_int( + negate_float( src1 * src2 ) + acc ); + + else if ( inst == XVF32GERNN ) + result[j] = reinterpret_float_as_int( + negate_float( src1 * src2 ) + negate_float( acc ) ); + } else { + result[j] = 0; + } + } + write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result); + } +} + +void vsx_matrix_64bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcX_hi, ULong srcX_lo, + ULong srcY_hi, ULong srcY_lo, + UInt masks_inst ) +{ + /* This function just computes the result for one entry in the ACC. */ + UInt i, j, inst, acc_entry, prefix_inst; + + Double srcX_dword[4]; + Double srcY_dword[2]; + Double result[2]; + UInt result_uint[4]; + ULong result_ulong[2]; + Double acc_dword[4]; + ULong acc_word_ulong[2]; + UInt acc_word_input[4]; + UInt xmsk = 0; + UInt ymsk = 0; + UInt start_i; + Double src1, src2, acc; + + inst = (masks_inst >> 8) & 0xFF; + prefix_inst = (masks_inst >> 16) & 0x1; + start_i = (masks_inst >> 4) & 0xF; + acc_entry = masks_inst & 0xF; + + if ( prefix_inst == 0 ) { + /* Set the masks for non-prefix instructions */ + xmsk = 0b1111; + ymsk = 0b11; + + } else { + xmsk = (masks_inst >> 21) & 0xF; + ymsk = (masks_inst >> 19) & 0x3; + } + + /* Need to store the srcX_dword in the correct index for the following + for loop. */ + srcX_dword[1+start_i] = reinterpret_long_as_double( srcX_lo); + srcX_dword[0+start_i] = reinterpret_long_as_double( srcX_hi ); + srcY_dword[1] = reinterpret_long_as_double( srcY_lo ); + srcY_dword[0] = reinterpret_long_as_double( srcY_hi ); + + for( i = start_i; i < start_i+2; i++) { + /* Get the ACC contents directly from the PPC64 state */ + get_ACC_entry (gst, offset_ACC, acc_entry, 3 - i, + acc_word_input); + + acc_word_ulong[1] = acc_word_input[3]; + acc_word_ulong[1] = (acc_word_ulong[1] << 32) | acc_word_input[2]; + acc_word_ulong[0] = acc_word_input[1]; + acc_word_ulong[0] = (acc_word_ulong[0] << 32) | acc_word_input[0]; + acc_dword[0] = reinterpret_long_as_double( acc_word_ulong[0] ); + acc_dword[1] = reinterpret_long_as_double( acc_word_ulong[1]); + + for( j = 0; j < 2; j++) { + + if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) { + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + + src1 = srcX_dword[i]; + src2 = srcY_dword[j]; + acc = acc_dword[j]; + + if ( inst == XVF64GER ) + result[j] = src1 * src2; + + else if ( inst == XVF64GERPP ) + result[j] = ( src1 * src2 ) + acc; + + else if ( inst == XVF64GERPN ) + result[j] = ( src1 * src2 ) + negate_double( acc ); + + else if ( inst == XVF64GERNP ) + result[j] = negate_double( src1 * src2 ) + acc; + + else if ( inst == XVF64GERNN ) + result[j] = negate_double( src1 * src2 ) + negate_double( acc ); + + } else { + result[j] = 0; + } + } + + /* Need to store the two double float values as two unsigned ints in + order to store them to the ACC. */ + result_ulong[0] = reinterpret_double_as_long ( result[0] ); + result_ulong[1] = reinterpret_double_as_long ( result[1] ); + + result_uint[0] = result_ulong[0] & 0xFFFFFFFF; + result_uint[1] = (result_ulong[0] >> 32) & 0xFFFFFFFF; + result_uint[2] = result_ulong[1] & 0xFFFFFFFF; + result_uint[3] = (result_ulong[1] >> 32) & 0xFFFFFFFF; + + write_ACC_entry (gst, offset_ACC, acc_entry, 3 - i, + result_uint); + } +} + /*----------------------------------------------*/ /*--- The exported fns .. ---*/ /*----------------------------------------------*/ @@ -915,6 +1723,39 @@ void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state ) VECZERO(vex_state->guest_VSR62); VECZERO(vex_state->guest_VSR63); + VECZERO( vex_state->guest_ACC_0_r0 ); + VECZERO( vex_state->guest_ACC_0_r1 ); + VECZERO( vex_state->guest_ACC_0_r2 ); + VECZERO( vex_state->guest_ACC_0_r3 ); + VECZERO( vex_state->guest_ACC_1_r0 ); + VECZERO( vex_state->guest_ACC_1_r1 ); + VECZERO( vex_state->guest_ACC_1_r2 ); + VECZERO( vex_state->guest_ACC_1_r3 ); + VECZERO( vex_state->guest_ACC_2_r0 ); + VECZERO( vex_state->guest_ACC_2_r1 ); + VECZERO( vex_state->guest_ACC_2_r2 ); + VECZERO( vex_state->guest_ACC_2_r3 ); + VECZERO( vex_state->guest_ACC_3_r0 ); + VECZERO( vex_state->guest_ACC_3_r1 ); + VECZERO( vex_state->guest_ACC_3_r2 ); + VECZERO( vex_state->guest_ACC_3_r3 ); + VECZERO( vex_state->guest_ACC_4_r0 ); + VECZERO( vex_state->guest_ACC_4_r1 ); + VECZERO( vex_state->guest_ACC_4_r2 ); + VECZERO( vex_state->guest_ACC_4_r3 ); + VECZERO( vex_state->guest_ACC_5_r0 ); + VECZERO( vex_state->guest_ACC_5_r1 ); + VECZERO( vex_state->guest_ACC_5_r2 ); + VECZERO( vex_state->guest_ACC_5_r3 ); + VECZERO( vex_state->guest_ACC_6_r0 ); + VECZERO( vex_state->guest_ACC_6_r1 ); + VECZERO( vex_state->guest_ACC_6_r2 ); + VECZERO( vex_state->guest_ACC_6_r3 ); + VECZERO( vex_state->guest_ACC_7_r0 ); + VECZERO( vex_state->guest_ACC_7_r1 ); + VECZERO( vex_state->guest_ACC_7_r2 ); + VECZERO( vex_state->guest_ACC_7_r3 ); + # undef VECZERO vex_state->guest_CIA = 0; @@ -1160,6 +2001,7 @@ void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state ) vex_state->guest_PPR = 0x4ULL << 50; // medium priority vex_state->guest_PSPB = 0x100; // an arbitrary non-zero value to start with vex_state->guest_DSCR = 0; + } diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 9192436924..c9c058a7ab 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -343,6 +343,38 @@ static Bool OV32_CA32_supported = False; #define OFFB_PPR offsetofPPCGuestState(guest_PPR) #define OFFB_PSPB offsetofPPCGuestState(guest_PSPB) #define OFFB_DSCR offsetofPPCGuestState(guest_DSCR) +#define OFFB_ACC_0_r0 offsetofPPCGuestState(guest_ACC_0_r0) +#define OFFB_ACC_0_r1 offsetofPPCGuestState(guest_ACC_0_r1) +#define OFFB_ACC_0_r2 offsetofPPCGuestState(guest_ACC_0_r2) +#define OFFB_ACC_0_r3 offsetofPPCGuestState(guest_ACC_0_r3) +#define OFFB_ACC_1_r0 offsetofPPCGuestState(guest_ACC_1_r0) +#define OFFB_ACC_1_r1 offsetofPPCGuestState(guest_ACC_1_r1) +#define OFFB_ACC_1_r2 offsetofPPCGuestState(guest_ACC_1_r2) +#define OFFB_ACC_1_r3 offsetofPPCGuestState(guest_ACC_1_r3) +#define OFFB_ACC_2_r0 offsetofPPCGuestState(guest_ACC_2_r0) +#define OFFB_ACC_2_r1 offsetofPPCGuestState(guest_ACC_2_r1) +#define OFFB_ACC_2_r2 offsetofPPCGuestState(guest_ACC_2_r2) +#define OFFB_ACC_2_r3 offsetofPPCGuestState(guest_ACC_2_r3) +#define OFFB_ACC_3_r0 offsetofPPCGuestState(guest_ACC_3_r0) +#define OFFB_ACC_3_r1 offsetofPPCGuestState(guest_ACC_3_r1) +#define OFFB_ACC_3_r2 offsetofPPCGuestState(guest_ACC_3_r2) +#define OFFB_ACC_3_r3 offsetofPPCGuestState(guest_ACC_3_r3) +#define OFFB_ACC_4_r0 offsetofPPCGuestState(guest_ACC_4_r0) +#define OFFB_ACC_4_r1 offsetofPPCGuestState(guest_ACC_4_r1) +#define OFFB_ACC_4_r2 offsetofPPCGuestState(guest_ACC_4_r2) +#define OFFB_ACC_4_r3 offsetofPPCGuestState(guest_ACC_4_r3) +#define OFFB_ACC_5_r0 offsetofPPCGuestState(guest_ACC_5_r0) +#define OFFB_ACC_5_r1 offsetofPPCGuestState(guest_ACC_5_r1) +#define OFFB_ACC_5_r2 offsetofPPCGuestState(guest_ACC_5_r2) +#define OFFB_ACC_5_r3 offsetofPPCGuestState(guest_ACC_5_r3) +#define OFFB_ACC_6_r0 offsetofPPCGuestState(guest_ACC_6_r0) +#define OFFB_ACC_6_r1 offsetofPPCGuestState(guest_ACC_6_r1) +#define OFFB_ACC_6_r2 offsetofPPCGuestState(guest_ACC_6_r2) +#define OFFB_ACC_6_r3 offsetofPPCGuestState(guest_ACC_6_r3) +#define OFFB_ACC_7_r0 offsetofPPCGuestState(guest_ACC_7_r0) +#define OFFB_ACC_7_r1 offsetofPPCGuestState(guest_ACC_7_r1) +#define OFFB_ACC_7_r2 offsetofPPCGuestState(guest_ACC_7_r2) +#define OFFB_ACC_7_r3 offsetofPPCGuestState(guest_ACC_7_r3) /*------------------------------------------------------------*/ @@ -495,6 +527,11 @@ static inline UChar ifieldSHW ( UInt instr ) return ifieldDM ( instr ); } +/* Extract AT field from theInstr 8LS:D form */ +static UChar ifieldAT ( UInt instr ) { + return toUChar( IFIELD( instr, 23, 3 ) ); +} + /*------------------------------------------------------------*/ /*--- Guest-state identifiers ---*/ /*------------------------------------------------------------*/ @@ -526,6 +563,40 @@ typedef enum { * needed. */ PPC_GST_DSCR, // Data Stream Control Register + PPC_GST_ACC_0_r0, /* Accumulator register file. Eight accumulators each + * with four 128-bit registers. + */ + PPC_GST_ACC_0_r1, + PPC_GST_ACC_0_r2, + PPC_GST_ACC_0_r3, + PPC_GST_ACC_1_r0, + PPC_GST_ACC_1_r1, + PPC_GST_ACC_1_r2, + PPC_GST_ACC_1_r3, + PPC_GST_ACC_2_r0, + PPC_GST_ACC_2_r1, + PPC_GST_ACC_2_r2, + PPC_GST_ACC_2_r3, + PPC_GST_ACC_3_r0, + PPC_GST_ACC_3_r1, + PPC_GST_ACC_3_r2, + PPC_GST_ACC_3_r3, + PPC_GST_ACC_4_r0, + PPC_GST_ACC_4_r1, + PPC_GST_ACC_4_r2, + PPC_GST_ACC_4_r3, + PPC_GST_ACC_5_r0, + PPC_GST_ACC_5_r1, + PPC_GST_ACC_5_r2, + PPC_GST_ACC_5_r3, + PPC_GST_ACC_6_r0, + PPC_GST_ACC_6_r1, + PPC_GST_ACC_6_r2, + PPC_GST_ACC_6_r3, + PPC_GST_ACC_7_r0, + PPC_GST_ACC_7_r1, + PPC_GST_ACC_7_r2, + PPC_GST_ACC_7_r3, PPC_GST_MAX } PPC_GST; @@ -3994,6 +4065,264 @@ static IRExpr* /* ::Ity_I32 */ getFPCC ( void ) return mkexpr(val); } +/*-----------------------------------------------------------*/ +/* Helpers to access VSX Accumulator register file + *-----------------------------------------------------------*/ +static void putACC( UInt index, UInt reg, IRExpr* src ) +{ + switch (index) { + case 0: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_0_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_0_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_0_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_0_r3, src ) ); + break; + } + break; + + case 1: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_1_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_1_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_1_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_1_r3, src ) ); + break; + } + break; + + case 2: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_2_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_2_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_2_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_2_r3, src ) ); + break; + } + break; + + case 3: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_3_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_3_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_3_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_3_r3, src ) ); + break; + } + break; + + case 4: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_4_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_4_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_4_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_4_r3, src ) ); + break; + } + break; + + case 5: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_5_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_5_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_5_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_5_r3, src ) ); + break; + } + break; + + case 6: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_6_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_6_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_6_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_6_r3, src ) ); + break; + } + break; + + case 7: + switch (reg) { + case 0: + stmt( IRStmt_Put( OFFB_ACC_7_r0, src ) ); + break; + case 1: + stmt( IRStmt_Put( OFFB_ACC_7_r1, src ) ); + break; + case 2: + stmt( IRStmt_Put( OFFB_ACC_7_r2, src ) ); + break; + case 3: + stmt( IRStmt_Put( OFFB_ACC_7_r3, src ) ); + break; + } + break; + } +} + +static IRExpr* /* :: Ity_V128 */ getACC ( UInt index, UInt reg ) +{ + vassert( (index >= 0) && (index < 8) ); + vassert( (reg >= 0) && (reg < 4) ); + // vex_printf("getACC (%d, %d)) \n", index, reg); + switch (index) { + case 0: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_0_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_0_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_0_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_0_r3, Ity_V128 ); + } + break; + + case 1: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_1_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_1_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_1_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_1_r3, Ity_V128 ); + } + break; + + case 2: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_2_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_2_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_2_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_2_r3, Ity_V128 ); + } + break; + + case 3: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_3_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_3_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_3_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_3_r3, Ity_V128 ); + } + break; + + case 4: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_4_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_4_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_4_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_4_r3, Ity_V128 ); + } + break; + + case 5: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_5_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_5_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_5_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_5_r3, Ity_V128 ); + } + break; + + case 6: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_6_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_6_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_6_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_6_r3, Ity_V128 ); + } + break; + + case 7: + switch (reg) { + case 0: + return IRExpr_Get( OFFB_ACC_7_r0, Ity_V128 ); + case 1: + return IRExpr_Get( OFFB_ACC_7_r1, Ity_V128 ); + case 2: + return IRExpr_Get( OFFB_ACC_7_r2, Ity_V128 ); + case 3: + return IRExpr_Get( OFFB_ACC_7_r3, Ity_V128 ); + } + break; + } + return 0; // error +} + + /*------------------------------------------------------------*/ /* Helpers for VSX instructions that do floating point * operations and need to determine if a src contains a @@ -5438,35 +5767,23 @@ static IRExpr * vector_evaluate_inst ( const VexAbiInfo* vbi, IRExpr *srcA, IRExpr *srcB, IRExpr *srcC, IRExpr *IMM ){ /* This function implements the ISA 3.1 instruction xxeval. The - instruction is too complex to do with Iops. An Iop implementation is - expected to exhaust memory and be really complex to write, debug and - understand. The second option would be to just map it to a new Iop. - Unfortunately, I doubt any other architecture will implement it making - the Iop PPC specific which isn't really attractive. It would need - extensive documenation for the Iop definition for anyone else to - understand what it does. That leaves doing it as a clean helper. This - is not the ideal option, but was chosen for now to help document what - the instruction does. Discuss this with Julian before committing to - decide if we really want to use this approach or map the instructioin - to a new IOP. */ - /* FIX ME, CARLL 11/8/2018*/ + instruction is too complex to do with Iops. */ /* The instruction description, note the IBM bit numbering is left to right: - For each integer value i, 0 to 127, do the following. + For each integer value i, 0 to 127, do the following. - Let j be the value of the concatenation of the contents of bit i of - srcA, bit i of srcB, bit i of srcC. (j = srcA[i] | srcB[i] | srcC[i]) + Let j be the value of the concatenation of the contents of bit i of + srcA, bit i of srcB, bit i of srcC. (j = srcA[i] | srcB[i] | srcC[i]) - The value of bit IMM[j] is placed into bit result[i]. + The value of bit IMM[j] is placed into bit result[i]. Basically the instruction lets you set each of the 128 bits in the result by selecting one of the eight bits in the IMM value. */ - /* Calling clean helpers with 128-bit args is currently not supported. It - isn't worth adding the support. We will simply call a 64-bit helper to - do the upper 64-bits of the result and the lower 64-bits of the result. - */ + /* Calling clean helpers with 128-bit args is currently not supported. We + will simply call a 64-bit clean helper to do the upper 64-bits of the + result and then call it do do the lower 64-bits of the result. */ IRTemp result_hi = newTemp( Ity_I64 ); IRTemp result_lo = newTemp( Ity_I64 ); @@ -5501,6 +5818,295 @@ static IRExpr * vector_evaluate_inst ( const VexAbiInfo* vbi, return binop( Iop_64HLtoV128, mkexpr( result_hi ), mkexpr( result_lo ) ); } +static void setup_fxstate_struct( IRDirty* d, UInt AT, IREffect AT_fx ) { + /* declare guest state effects, writing to four ACC 128-bit regs. */ + d->nFxState = 4; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = AT_fx; + d->fxState[0].size = sizeof(U128); + d->fxState[1].fx = AT_fx; + d->fxState[1].size = sizeof(U128); + d->fxState[2].fx = AT_fx; + d->fxState[2].size = sizeof(U128); + d->fxState[3].fx = AT_fx; + d->fxState[3].size = sizeof(U128); + + switch (AT) { + case 0: + d->fxState[0].offset = OFFB_ACC_0_r0; + d->fxState[1].offset = OFFB_ACC_0_r1; + d->fxState[2].offset = OFFB_ACC_0_r2; + d->fxState[3].offset = OFFB_ACC_0_r3; + break; + case 1: + d->fxState[0].offset = OFFB_ACC_1_r0; + d->fxState[1].offset = OFFB_ACC_1_r1; + d->fxState[2].offset = OFFB_ACC_1_r2; + d->fxState[3].offset = OFFB_ACC_1_r3; + break; + case 2: + d->fxState[0].offset = OFFB_ACC_2_r0; + d->fxState[1].offset = OFFB_ACC_2_r1; + d->fxState[2].offset = OFFB_ACC_2_r2; + d->fxState[3].offset = OFFB_ACC_2_r3; + break; + case 3: + d->fxState[0].offset = OFFB_ACC_3_r0; + d->fxState[1].offset = OFFB_ACC_3_r1; + d->fxState[2].offset = OFFB_ACC_3_r2; + d->fxState[3].offset = OFFB_ACC_3_r3; + break; + case 4: + d->fxState[0].offset = OFFB_ACC_4_r0; + d->fxState[1].offset = OFFB_ACC_4_r1; + d->fxState[2].offset = OFFB_ACC_4_r2; + d->fxState[3].offset = OFFB_ACC_4_r3; + break; + case 5: + d->fxState[0].offset = OFFB_ACC_5_r0; + d->fxState[1].offset = OFFB_ACC_5_r1; + d->fxState[2].offset = OFFB_ACC_5_r2; + d->fxState[3].offset = OFFB_ACC_5_r3; + break; + case 6: + d->fxState[0].offset = OFFB_ACC_6_r0; + d->fxState[1].offset = OFFB_ACC_6_r1; + d->fxState[2].offset = OFFB_ACC_6_r2; + d->fxState[3].offset = OFFB_ACC_6_r3; + break; + case 7: + d->fxState[0].offset = OFFB_ACC_7_r0; + d->fxState[1].offset = OFFB_ACC_7_r1; + d->fxState[2].offset = OFFB_ACC_7_r2; + d->fxState[3].offset = OFFB_ACC_7_r3; + break; + default: + vassert( (AT >= 0) && (AT < 8)); + } + return; +} +#define MATRIX_4BIT_INT_GER 1 +#define MATRIX_8BIT_INT_GER 2 +#define MATRIX_16BIT_INT_GER 3 +#define MATRIX_16BIT_FLOAT_GER 4 +#define MATRIX_32BIT_FLOAT_GER 5 +/* Note, the 64-bit float instructions have their caller. */ + +static void vsx_matrix_ger ( const VexAbiInfo* vbi, + UInt inst_class, + IRExpr *srcA, IRExpr *srcB, + UInt AT, UInt mask_inst ) { + /* This helper function does the VSX Matrix 4-bit Signed Integer GER + (Rank-8 Update) instructions xvi4ger8, xvi4ger8pp, pmxvi4ger8, + pmxvi4ger8pp. The instructions work on four V128 values, and three + 8-bit masks. */ + + IRTemp srcA_hi = newTemp( Ity_I64); + IRTemp srcA_lo = newTemp( Ity_I64); + IRTemp srcB_hi = newTemp( Ity_I64); + IRTemp srcB_lo = newTemp( Ity_I64); + IRDirty* d; + UInt instruction = mask_inst & 0xFF; /* Instruction is lower 8-bits. */ + IREffect AT_fx; + + assign( srcA_hi, unop( Iop_V128HIto64, srcA ) ); + assign( srcA_lo, unop( Iop_V128to64, srcA ) ); + assign( srcB_hi, unop( Iop_V128HIto64, srcB ) ); + assign( srcB_lo, unop( Iop_V128to64, srcB ) ); + + /* Using a dirty helper so we can access the contents of the ACC for use in + by the instruction and then write the result directly back to the ACC. + The dirty helper does not return data. */ + IRExpr** args = mkIRExprVec_7( + IRExpr_GSPTR(), + mkU32(offsetofPPCGuestState(guest_ACC_0_r0)), + mkexpr(srcA_hi), mkexpr(srcA_lo), + mkexpr(srcB_hi), mkexpr(srcB_lo), + mkU32( (mask_inst << 5) | AT )); + + /* Set AT_fx to Write if the instruction only writes the ACC. Set + AT_fx to modify if the instruction uses the AT entry and writes + to the ACC entry. */ + switch (instruction) { + case XVI4GER8: + case XVI8GER4: + case XVI16GER2: + case XVI16GER2S: + case XVF16GER2: + case XVF32GER: + AT_fx = Ifx_Write; + break; + case XVI4GER8PP: + case XVI8GER4PP: + case XVI16GER2PP: + case XVI16GER2SPP: + case XVF16GER2PP: + case XVF16GER2PN: + case XVF16GER2NP: + case XVF16GER2NN: + case XVF32GERPP: + case XVF32GERPN: + case XVF32GERNP: + case XVF32GERNN: + AT_fx = Ifx_Modify; + break; + default: + vassert(0); /* Unknown instruction */ + } + + switch(inst_class) { + case MATRIX_4BIT_INT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_4bit_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_4bit_ger_dirty_helper ), + args ); + break; + + case MATRIX_8BIT_INT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_8bit_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_8bit_ger_dirty_helper ), + args ); + break; + + case MATRIX_16BIT_INT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_16bit_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_16bit_ger_dirty_helper ), + args ); + break; + + case MATRIX_16BIT_FLOAT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_16bit_float_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_16bit_float_ger_dirty_helper ), + args ); + break; + + case MATRIX_32BIT_FLOAT_GER: + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "vsx_matrix_32bit_float_ger_dirty_helper", + fnptr_to_fnentry( vbi, &vsx_matrix_32bit_float_ger_dirty_helper ), + args ); + break; + + default: + vex_printf("ERROR: Unkown inst_class = %u in vsx_matrix_ger()\n", + inst_class); + return; + } + + setup_fxstate_struct( d, AT, AT_fx ); + + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); +} + +static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi, + IRExpr *srcA, IRExpr *srcA1, + IRExpr *srcB, + UInt AT, UInt mask_inst ) { + /* This helper function does the VSX Matrix 64-bit floating-point GER + (Rank-1 Update) instructions xvf64ger, xvf64gerpp, xvf64gerpn, + xvf64gernp, xvf64gernn, pmxvf64ger, pmxvf64gerpp, pmxvf64gerpn, + pmxvf64gernp, pmxvf64gernn. */ + IRTemp srcX_hi = newTemp( Ity_I64); + IRTemp srcX_lo = newTemp( Ity_I64); + IRTemp srcX1_hi = newTemp( Ity_I64); + IRTemp srcX1_lo = newTemp( Ity_I64); + IRTemp srcY_hi = newTemp( Ity_I64); + IRTemp srcY_lo = newTemp( Ity_I64); + UInt start_i; + IRDirty* d; + ULong combined_args; + UInt instruction = mask_inst & 0xFF; /* Instruction is lower 8-bits. */ + IREffect AT_fx; + + assign( srcX_lo, unop( Iop_V128HIto64, srcA ) ); + assign( srcX_hi, unop( Iop_V128to64, srcA ) ); + assign( srcX1_lo, unop( Iop_V128HIto64, srcA1 ) ); + assign( srcX1_hi, unop( Iop_V128to64, srcA1 ) ); + assign( srcY_lo, unop( Iop_V128HIto64, srcB ) ); + assign( srcY_hi, unop( Iop_V128to64, srcB ) ); + + /* Using a dirty helper so we can access the contents of the ACC for use in + by the instruction and then write the result directly back to the ACC. + The dirty helper does not return data. + + There is a restriction of 8 args in a dirty helper. Can't pass the four + srcX values. So, just do two calls calculating the first two ACC + results then the second two ACC results. */ + + start_i = 0; + combined_args = (mask_inst << 8) | (start_i << 4) | AT; + + IRExpr** args1 = mkIRExprVec_7( + IRExpr_GSPTR(), + mkU32( offsetofPPCGuestState(guest_ACC_0_r0) ), + mkexpr(srcX1_hi), mkexpr(srcX1_lo), + mkexpr(srcY_hi), mkexpr(srcY_lo), + mkU32( combined_args )); + + /* Set AT_fx to Write if the instruction only writes the ACC. Set + AT_fx to modify if the instruction uses the AT entry and writes + to the ACC entry. */ + switch (instruction) { + case XVF64GER: + AT_fx = Ifx_Write; + break; + case XVF64GERPP: + case XVF64GERPN: + case XVF64GERNP: + case XVF64GERNN: + AT_fx = Ifx_Modify; + break; + default: + vassert(0); /* Unknown instruction */ + } + + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + ... [truncated message content] |
|
From: Paul F. <pa...@so...> - 2020-11-10 13:53:01
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=6e5213028c9c37765b1be52bb5185a5bfcdb526d commit 6e5213028c9c37765b1be52bb5185a5bfcdb526d Author: Paul Floyd <pj...@wa...> Date: Tue Nov 10 14:49:27 2020 +0100 Make memcheck/tests/sized_delete conditional upon the compiler having -fsized-deallocators, add 384729 to NEWS Diff: --- NEWS | 1 + configure.ac | 25 +++++++++++++++++++++++++ memcheck/tests/Makefile.am | 6 ++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index f1ced278c4..563a9ef8c1 100644 --- a/NEWS +++ b/NEWS @@ -57,6 +57,7 @@ n-i-bz helgrind: If hg_cli__realloc fails, return NULL. 428648 s390_emit_load_mem panics due to 20-bit offset for vector load 427400 PPC ISA 3.1 support is missing, part 4 427401 PPC ISA 3.1 support is missing, part 5 +384729 __libc_freeres inhibits cross-platform valgrind Release 3.16.1 (?? June 2020) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/configure.ac b/configure.ac index 42b2a71448..f01dbff7fc 100755 --- a/configure.ac +++ b/configure.ac @@ -2287,6 +2287,31 @@ AC_LANG(C) AC_SUBST(FLAG_FALIGNED_NEW) +# does this compiler support -fsized-deallocation ? +AC_MSG_CHECKING([if g++ accepts -fsized-deallocation]) + +safe_CXXFLAGS=$CXXFLAGS +CXXFLAGS="-fsized-deallocation -Werror" + +AC_LANG(C++) +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ + return 0; +]])], [ +FLAG_FSIZED_DEALLOCATION="-fsized-deallocation" +ac_have_sized_deallocation=yes +AC_MSG_RESULT([yes]) +], [ +FLAG_FSIZED_DEALLOCATION="" +ac_have_sized_deallocation=no +AC_MSG_RESULT([no]) +]) +CXXFLAGS=$safe_CXXFLAGS +AC_LANG(C) + +AC_SUBST(FLAG_FSIZED_DEALLOCATION) +AM_CONDITIONAL([HAVE_FSIZED_DEALLOCATION], [test x$ac_have_sized_deallocation = xyes]) + + # does this compiler support -fno-stack-protector ? AC_MSG_CHECKING([if gcc accepts -fno-stack-protector]) diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am index 28f26bbebe..e24bcc24d9 100644 --- a/memcheck/tests/Makefile.am +++ b/memcheck/tests/Makefile.am @@ -399,7 +399,6 @@ check_PROGRAMS = \ sendmsg \ sh-mem sh-mem-random \ sigaltstack signal2 sigprocmask static_malloc sigkill \ - sized_delete \ strchr \ str_tester \ supp_unknown supp1 supp2 suppfree \ @@ -552,8 +551,11 @@ sendmsg_CFLAGS += -D_XOPEN_SOURCE=600 sendmsg_LDADD = -lsocket -lnsl endif +if HAVE_FSIZED_DEALLOCATION +check_PROGRAMS += sized_delete sized_delete_SOURCES = sized_delete.cpp -sized_delete_CXXFLAGS = $(AM_CXXFLAGS) -fsized-deallocation +sized_delete_CXXFLAGS = $(AM_CXXFLAGS) @FLAG_FSIZED_DEALLOCATION@ +endif str_tester_CFLAGS = $(AM_CFLAGS) -Wno-shadow \ @FLAG_W_NO_MEMSET_TRANSPOSED_ARGS@ |
|
From: Carl L. <ca...@so...> - 2020-11-10 01:07:12
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=b900ce172ef81fde9c875c70991485be6ea90e75 commit b900ce172ef81fde9c875c70991485be6ea90e75 Author: Carl Love <ce...@us...> Date: Tue Oct 6 12:06:56 2020 -0500 VSX Load/Store rightmost element operation tests Diff: --- NEWS | 1 + none/tests/ppc64/test_isa_3_1_XT.c | 40 ++++++++++++++++++++ none/tests/ppc64/test_isa_3_1_XT.stdout.exp | 58 ++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 7b4cea04a0..f1ced278c4 100644 --- a/NEWS +++ b/NEWS @@ -56,6 +56,7 @@ n-i-bz helgrind: If hg_cli__realloc fails, return NULL. 428035 drd: Unbreak the musl build 428648 s390_emit_load_mem panics due to 20-bit offset for vector load 427400 PPC ISA 3.1 support is missing, part 4 +427401 PPC ISA 3.1 support is missing, part 5 Release 3.16.1 (?? June 2020) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/none/tests/ppc64/test_isa_3_1_XT.c b/none/tests/ppc64/test_isa_3_1_XT.c index 838a75ab12..c16ddedac3 100644 --- a/none/tests/ppc64/test_isa_3_1_XT.c +++ b/none/tests/ppc64/test_isa_3_1_XT.c @@ -297,6 +297,38 @@ static void test_xxeval_imm3 (void) { __asm__ __volatile__ ("xxeval %x0, %x1, %x2, %x3, 3" : "=wa" (vec_xt) : "wa" (vec_xa), "wa" (vec_xb), "wa" (vec_xc) ); } +static void test_lxvrbx (void) { + __asm__ __volatile__ ("lxvrbx %x0, %1, %2" + : "=wa" (vec_xt) : "r" (ra), "r" (rb) ); +} +static void test_lxvrhx (void) { + __asm__ __volatile__ ("lxvrhx %x0, %1, %2" + : "=wa" (vec_xt) : "r" (ra), "r" (rb) ); +} +static void test_lxvrwx (void) { + __asm__ __volatile__ ("lxvrwx %x0, %1, %2" + : "=wa" (vec_xt) : "r" (ra), "r" (rb) ); +} +static void test_lxvrdx (void) { + __asm__ __volatile__ ("lxvrdx %x0, %1, %2" + : "=wa" (vec_xt) : "r" (ra), "r" (rb) ); +} +static void test_stxvrbx (void) { + __asm__ __volatile__ ("stxvrbx %x0, %1, %2" + :: "wa" (vec_xs), "r" (ra), "r" (rb) ); +} +static void test_stxvrhx (void) { + __asm__ __volatile__ ("stxvrhx %x0, %1, %2" + :: "wa" (vec_xs), "r" (ra), "r" (rb) ); +} +static void test_stxvrwx (void) { + __asm__ __volatile__ ("stxvrwx %x0, %1, %2" + :: "wa" (vec_xs), "r" (ra), "r" (rb) ); +} +static void test_stxvrdx (void) { + __asm__ __volatile__ ("stxvrdx %x0, %1, %2" + :: "wa" (vec_xs), "r" (ra), "r" (rb) ); +} static void test_plfd_64 (void) { __asm__ __volatile__ ("plfd 28, 64(%0), 0" :: "r" (ra) ); } @@ -483,6 +515,10 @@ static test_list_t testgroup_generic[] = { { &test_lxvp_0, "lxvp 0", "XTp,DQ(RA)"}, /* bcwp */ { &test_lxvp_16, "lxvp 16", "XTp,DQ(RA)"}, /* bcwp */ { &test_lxvp_32, "lxvp 32", "XTp,DQ(RA)"}, /* bcwp */ + { &test_lxvrbx, "lxvrbx", "XT,RA,RB"}, /* bcs */ + { &test_lxvrdx, "lxvrdx", "XT,RA,RB"}, /* bcs */ + { &test_lxvrhx, "lxvrhx", "XT,RA,RB"}, /* bcs */ + { &test_lxvrwx, "lxvrwx", "XT,RA,RB"}, /* bcs */ { &test_plfd_0, "plfd 0", "FRT,D(RA),R"}, /* bcwp */ { &test_plfd_4, "plfd 4", "FRT,D(RA),R"}, /* bcwp */ { &test_plfd_8, "plfd 8", "FRT,D(RA),R"}, /* bcwp */ @@ -546,6 +582,10 @@ static test_list_t testgroup_generic[] = { { &test_stxvp_off16, "stxvp off16", "XSp,DQ(RA)"}, /* bcwp */ { &test_stxvp_off32, "stxvp off32", "XSp,DQ(RA)"}, /* bcwp */ { &test_stxvp_off48, "stxvp off48", "XSp,DQ(RA)"}, /* bcwp */ + { &test_stxvrbx, "stxvrbx", "XS,RA,RB"}, /* bcs */ + { &test_stxvrdx, "stxvrdx", "XS,RA,RB"}, /* bcs */ + { &test_stxvrhx, "stxvrhx", "XS,RA,RB"}, /* bcs */ + { &test_stxvrwx, "stxvrwx", "XS,RA,RB"}, /* bcs */ { &test_xxblendvb, "xxblendvb", "XT,XA,XB,XC"}, /* bcs */ { &test_xxblendvd, "xxblendvd", "XT,XA,XB,XC"}, /* bcs */ { &test_xxblendvh, "xxblendvh", "XT,XA,XB,XC"}, /* bcs */ diff --git a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp index 365fc134eb..efa95884e9 100644 --- a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp +++ b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp @@ -47,6 +47,34 @@ lxvp 16 (&buffer) => 5a05a05a05a07a05 0102030405067708 7ff0000000007000 7f000000 lxvp 32 (&buffer) => fedcba9876547210 0123456789ab7def 5a05a05a05a07a05 0102030405067708 +lxvrbx 0 (&buffer) => 59 0 +lxvrbx 8 (&buffer) => 7 0 +lxvrbx 10 (&buffer) => 0 0 +lxvrbx 18 (&buffer) => 0 0 +lxvrbx 20 (&buffer) => 5 0 +lxvrbx 28 (&buffer) => 8 0 + +lxvrdx 0 (&buffer) => 3fe00094e0007359 0 +lxvrdx 8 (&buffer) => 7ff7020304057607 0 +lxvrdx 10 (&buffer) => 7ff0000000007000 0 +lxvrdx 18 (&buffer) => 7f0000007f007000 0 +lxvrdx 20 (&buffer) => 5a05a05a05a07a05 0 +lxvrdx 28 (&buffer) => 102030405067708 0 + +lxvrhx 0 (&buffer) => 7359 0 +lxvrhx 8 (&buffer) => 7607 0 +lxvrhx 10 (&buffer) => 7000 0 +lxvrhx 18 (&buffer) => 7000 0 +lxvrhx 20 (&buffer) => 7a05 0 +lxvrhx 28 (&buffer) => 7708 0 + +lxvrwx 0 (&buffer) => e0007359 0 +lxvrwx 8 (&buffer) => 4057607 0 +lxvrwx 10 (&buffer) => 7000 0 +lxvrwx 18 (&buffer) => 7f007000 0 +lxvrwx 20 (&buffer) => 5a07a05 0 +lxvrwx 28 (&buffer) => 5067708 0 + plfd 0 (&buffer) => 5.000710e-01 plfd 4 (&buffer) => 2.752739e-289 @@ -188,6 +216,34 @@ stxvp off32 (&buffer) 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff80000 stxvp off48 (&buffer) 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => [ - - - - - - ff7ffffe7f7ffffe ff8000007f800000] +stxvrbx 0 (&buffer) ff7ffffe7f7ffffe,ff8000007f800000 => [3fe00094e00073fe - - - - - - - ] +stxvrbx 8 (&buffer) 0080000e8080000e,ff7ffffe7f7ffffe => [ - 7ff702030405760e - - - - - - ] +stxvrbx 10 (&buffer) 0180055e0180077e,0080000e8080000e => [ - - 7ff000000000707e - - - - - ] +stxvrbx 18 (&buffer) 0000111e8000222e,0180055e0180077e => [ - - - 7f0000007f00702e - - - - ] +stxvrbx 20 (&buffer) 7ff0000000000000,0000111e8000222e => [ - - - - 5a05a05a05a07a00 - - - ] +stxvrbx 28 (&buffer) fff0000000000000,7ff0000000000000 => [ - - - - - 0102030405067700 - - ] + +stxvrdx 0 (&buffer) ff7ffffe7f7ffffe,ff8000007f800000 => [ff7ffffe7f7ffffe - - - - - - - ] +stxvrdx 8 (&buffer) 0080000e8080000e,ff7ffffe7f7ffffe => [ - 0080000e8080000e - - - - - - ] +stxvrdx 10 (&buffer) 0180055e0180077e,0080000e8080000e => [ - - 0180055e0180077e - - - - - ] +stxvrdx 18 (&buffer) 0000111e8000222e,0180055e0180077e => [ - - - 0000111e8000222e - - - - ] +stxvrdx 20 (&buffer) 7ff0000000000000,0000111e8000222e => [ - - - - 7ff0000000000000 - - - ] +stxvrdx 28 (&buffer) fff0000000000000,7ff0000000000000 => [ - - - - - fff0000000000000 - - ] + +stxvrhx 0 (&buffer) ff7ffffe7f7ffffe,ff8000007f800000 => [3fe00094e000fffe - - - - - - - ] +stxvrhx 8 (&buffer) 0080000e8080000e,ff7ffffe7f7ffffe => [ - 7ff702030405000e - - - - - - ] +stxvrhx 10 (&buffer) 0180055e0180077e,0080000e8080000e => [ - - 7ff000000000077e - - - - - ] +stxvrhx 18 (&buffer) 0000111e8000222e,0180055e0180077e => [ - - - 7f0000007f00222e - - - - ] +stxvrhx 20 (&buffer) 7ff0000000000000,0000111e8000222e => [ - - - - 5a05a05a05a00000 - - - ] +stxvrhx 28 (&buffer) fff0000000000000,7ff0000000000000 => [ - - - - - 0102030405060000 - - ] + +stxvrwx 0 (&buffer) ff7ffffe7f7ffffe,ff8000007f800000 => [3fe000947f7ffffe - - - - - - - ] +stxvrwx 8 (&buffer) 0080000e8080000e,ff7ffffe7f7ffffe => [ - 7ff702038080000e - - - - - - ] +stxvrwx 10 (&buffer) 0180055e0180077e,0080000e8080000e => [ - - 7ff000000180077e - - - - - ] +stxvrwx 18 (&buffer) 0000111e8000222e,0180055e0180077e => [ - - - 7f0000008000222e - - - - ] +stxvrwx 20 (&buffer) 7ff0000000000000,0000111e8000222e => [ - - - - 5a05a05a00000000 - - - ] +stxvrwx 28 (&buffer) fff0000000000000,7ff0000000000000 => [ - - - - - 0102030400000000 - - ] + xxblendvb 7f800000ff800000,ff8000007f800000 0000000000000000,00000000ffffffff 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 xxblendvb 7f800000ff800000,ff8000007f800000 ffffffff55555555,5555aaaaaaaa5555 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 xxblendvb 7f800000ff800000,ff8000007f800000 aaaa00000000aaaa,0000000000000000 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 @@ -7038,4 +7094,4 @@ xxspltiw imm3 => 300000003 300000003 xxspltiw imm8 => 800000008 800000008 -All done. Tested 134 different instruction groups +All done. Tested 142 different instruction groups |
|
From: Carl L. <ca...@so...> - 2020-11-10 01:06:57
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=4e2fee61a090f8239c74e8e0fd6da44a8aaf2dc5 commit 4e2fee61a090f8239c74e8e0fd6da44a8aaf2dc5 Author: Carl Love <ce...@us...> Date: Tue Oct 6 12:05:03 2020 -0500 Test LSB by Byte operation tests Diff: --- none/tests/ppc64/test_isa_3_1_Misc.c | 6 ++++++ none/tests/ppc64/test_isa_3_1_Misc.stdout.exp | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/none/tests/ppc64/test_isa_3_1_Misc.c b/none/tests/ppc64/test_isa_3_1_Misc.c index 54db7e21be..78ca1534ae 100644 --- a/none/tests/ppc64/test_isa_3_1_Misc.c +++ b/none/tests/ppc64/test_isa_3_1_Misc.c @@ -59,10 +59,16 @@ static void test_vcmpuq (void) { __asm__ __volatile__ ("vcmpuq 3, %0, %1" :: "v" (vra), "v" (vrb) ); GET_CR(current_cr); SET_CR_ZERO; } +static void test_xvtlsbb (void) { + SET_CR_ZERO; + __asm__ __volatile__ ("xvtlsbb 3, %x0" :: "wa" (vec_xb) ); + GET_CR(current_cr); SET_CR_ZERO; +} static test_list_t testgroup_generic[] = { { &test_vcmpsq, "vcmpsq", "BF,VRA,VRB"}, /* bcs */ { &test_vcmpuq, "vcmpuq", "BF,VRA,VRB"}, /* bcs */ + { &test_xvtlsbb, "xvtlsbb", "BF,XB"}, /* bcs */ { NULL, NULL }, }; diff --git a/none/tests/ppc64/test_isa_3_1_Misc.stdout.exp b/none/tests/ppc64/test_isa_3_1_Misc.stdout.exp index 47b396aba5..71ab382edf 100644 --- a/none/tests/ppc64/test_isa_3_1_Misc.stdout.exp +++ b/none/tests/ppc64/test_isa_3_1_Misc.stdout.exp @@ -338,4 +338,18 @@ vcmpuq 8000000000000000,7f800000ff800000 ffff000180000001,0000000000000000 => [0 vcmpuq 8000000000000000,7f800000ff800000 0000000000000000,8000000000000000 => [00080000] vcmpuq 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [00020000] -All done. Tested 2 different instruction groups +xvtlsbb 7e800000fe800000,fe8000007e800000 => [00020000] +xvtlsbb ff8101017f810101,ff7fffff7f7fffff => [00080000] +xvtlsbb ff7ffffe7f7ffffe,0080000e8080000e => [00000000] +xvtlsbb 0080000e8080000e,0080045e0080067e => [00020000] +xvtlsbb 0181055f0181077f,0101111f8101232f => [00080000] +xvtlsbb 0000111e8000222e,7ff0000000000000 => [00000000] +xvtlsbb 7ef0000000000000,fef0000000000000 => [00020000] +xvtlsbb fff1010101010101,2309410101010101 => [00080000] +xvtlsbb 2208400000000000,0000000000000009 => [00000000] +xvtlsbb 0000000000000008,fefe000080000000 => [00020000] +xvtlsbb ffff010181010101,0101010101010101 => [00080000] +xvtlsbb 0000000000000000,8000000000000000 => [00020000] +xvtlsbb 8000000000000000,7e800000fe800000 => [00020000] + +All done. Tested 3 different instruction groups |
|
From: Carl L. <ca...@so...> - 2020-11-10 01:06:42
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=9695f157f328b7f8baa147331c87a729d0d65183 commit 9695f157f328b7f8baa147331c87a729d0d65183 Author: Carl Love <ce...@us...> Date: Tue Oct 6 12:01:35 2020 -0500 String operation tests Diff: --- none/tests/ppc64/test_isa_3_1_VRT.c | 50 ++++++++ none/tests/ppc64/test_isa_3_1_VRT.stdout.exp | 164 ++++++++++++++++++++++++++- 2 files changed, 213 insertions(+), 1 deletion(-) diff --git a/none/tests/ppc64/test_isa_3_1_VRT.c b/none/tests/ppc64/test_isa_3_1_VRT.c index 955ddd18cd..5f2e7ebd35 100644 --- a/none/tests/ppc64/test_isa_3_1_VRT.c +++ b/none/tests/ppc64/test_isa_3_1_VRT.c @@ -323,6 +323,46 @@ static void test_vpextd (void) { __asm__ __volatile__ ("vpextd %0, %1, %2" : "=v" (vrt) : "v" (vra), "v" (vrb) ); } +static void test_vclrlb (void) { + __asm__ __volatile__ ("vclrlb %0, %1, %2" + : "=v" (vrt) : "v" (vra), "r" (rb) ); +} +static void test_vclrrb (void) { + __asm__ __volatile__ ("vclrrb %0, %1, %2" + : "=v" (vrt) : "v" (vra), "r" (rb) ); +} +static void test_vstribl (void) { + __asm__ __volatile__ ("vstribl %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_dotted_vstribl (void) { + SET_CR_ZERO; + __asm__ __volatile__ ("vstribl. %0, %1 " : "=v" (vrt) : "v" (vrb) ); + GET_CR(current_cr); SET_CR_ZERO; +} +static void test_vstribr (void) { + __asm__ __volatile__ ("vstribr %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_dotted_vstribr (void) { + SET_CR_ZERO; + __asm__ __volatile__ ("vstribr. %0, %1 " : "=v" (vrt) : "v" (vrb) ); + GET_CR(current_cr); SET_CR_ZERO; +} +static void test_vstrihl (void) { + __asm__ __volatile__ ("vstrihl %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_dotted_vstrihl (void) { + SET_CR_ZERO; + __asm__ __volatile__ ("vstrihl. %0, %1 " : "=v" (vrt) : "v" (vrb) ); + GET_CR(current_cr); SET_CR_ZERO; +} +static void test_vstrihr (void) { + __asm__ __volatile__ ("vstrihr %0, %1 " : "=v" (vrt) : "v" (vrb) ); +} +static void test_dotted_vstrihr (void) { + SET_CR_ZERO; + __asm__ __volatile__ ("vstrihr. %0, %1 " : "=v" (vrt) : "v" (vrb) ); + GET_CR(current_cr); SET_CR_ZERO; +} static void test_xscmpeqqp (void) { __asm__ __volatile__ ("xscmpeqqp %0, %1, %2" : "=v" (vrt) : "v" (vra), "v" (vrb) ); @@ -348,7 +388,13 @@ static test_list_t testgroup_generic[] = { { &test_dotted_vcmpequq, "vcmpequq.", "VRT,VRA,VRB"}, /* bcs */ { &test_dotted_vcmpgtsq, "vcmpgtsq.", "VRT,VRA,VRB"}, /* bcs */ { &test_dotted_vcmpgtuq, "vcmpgtuq.", "VRT,VRA,VRB"}, /* bcs */ + { &test_dotted_vstribl, "vstribl.", "VRT,VRB"}, /* bcs */ + { &test_dotted_vstribr, "vstribr.", "VRT,VRB"}, /* bcs */ + { &test_dotted_vstrihl, "vstrihl.", "VRT,VRB"}, /* bcs */ + { &test_dotted_vstrihr, "vstrihr.", "VRT,VRB"}, /* bcs */ { &test_vcfuged, "vcfuged", "VRT,VRA,VRB"}, /* bcs */ + { &test_vclrlb, "vclrlb", "VRT,VRA,RB"}, /* bcs */ + { &test_vclrrb, "vclrrb", "VRT,VRA,RB"}, /* bcs */ { &test_vclzdm, "vclzdm", "VRT,VRA,VRB"}, /* bcs */ { &test_vcmpequq, "vcmpequq", "VRT,VRA,VRB"}, /* bcs */ { &test_vcmpgtsq, "vcmpgtsq", "VRT,VRA,VRB"}, /* bcs */ @@ -414,6 +460,10 @@ static test_list_t testgroup_generic[] = { { &test_vsrdbi_0, "vsrdbi 0", "VRT,VRA,VRB,SH"}, /* bcwp */ { &test_vsrdbi_4, "vsrdbi 4", "VRT,VRA,VRB,SH"}, /* bcwp */ { &test_vsrq, "vsrq", "VRT,VRA,VRB"}, /* bcs */ + { &test_vstribl, "vstribl", "VRT,VRB"}, /* bcs */ + { &test_vstribr, "vstribr", "VRT,VRB"}, /* bcs */ + { &test_vstrihl, "vstrihl", "VRT,VRB"}, /* bcs */ + { &test_vstrihr, "vstrihr", "VRT,VRB"}, /* bcs */ { &test_xscmpeqqp, "xscmpeqqp", "VRT,VRA,VRB"}, /* bcs */ { &test_xscmpgeqp, "xscmpgeqp", "VRT,VRA,VRB"}, /* bcs */ { &test_xscmpgtqp, "xscmpgtqp", "VRT,VRA,VRB"}, /* bcs */ diff --git a/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp b/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp index 7890dc28ac..430604dab9 100644 --- a/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp +++ b/none/tests/ppc64/test_isa_3_1_VRT.stdout.exp @@ -508,6 +508,62 @@ vcmpgtuq. 8000000000000000,7f800000ff800000 ffff000180000001,0000000000000000 => vcmpgtuq. 8000000000000000,7f800000ff800000 0000000000000000,8000000000000000 => [00000000]6:[0000] 0,0000000000000000 vcmpgtuq. 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [00000000]6:[0000] 0,0000000000000000 +vstribl. 7f800000ff800000,ff8000007f800000 => [00000020]6:[0010] ff80000000000000,0000000000000000 +vstribl. ff8000007f800000,ff7ffffe7f7ffffe => [00000020]6:[0010] ff7ffffe7f7ffffe,ff80000000000000 +vstribl. ff7ffffe7f7ffffe,0080000e8080000e => [00000020]6:[0010] 0,0000000000000000 +vstribl. 0080000e8080000e,0180055e0180077e => [00000020]6:[0010] 180055e0180077e,0000000000000000 +vstribl. 0180055e0180077e,0000111e8000222e => [00000020]6:[0010] 0,0000000000000000 +vstribl. 0000111e8000222e,7ff0000000000000 => [00000020]6:[0010] 7ff0000000000000,0000000000000000 +vstribl. 7ff0000000000000,fff0000000000000 => [00000020]6:[0010] fff0000000000000,0000000000000000 +vstribl. fff0000000000000,2208400000000000 => [00000020]6:[0010] 2208400000000000,0000000000000000 +vstribl. 2208400000000000,0000000000000009 => [00000020]6:[0010] 0,0000000000000000 +vstribl. 0000000000000009,ffff000180000001 => [00000020]6:[0010] ffff000000000000,0000000000000000 +vstribl. ffff000180000001,0000000000000000 => [00000020]6:[0010] 0,0000000000000000 +vstribl. 0000000000000000,8000000000000000 => [00000020]6:[0010] 8000000000000000,0000000000000000 +vstribl. 8000000000000000,7f800000ff800000 => [00000020]6:[0010] 7f80000000000000,0000000000000000 + +vstribr. 7f800000ff800000,ff8000007f800000 => [00000020]6:[0010] 0,0000000000000000 +vstribr. ff8000007f800000,ff7ffffe7f7ffffe => [00000020]6:[0010] 0,0000000000000000 +vstribr. ff7ffffe7f7ffffe,0080000e8080000e => [00000020]6:[0010] e,ff7ffffe7f7ffffe +vstribr. 0080000e8080000e,0180055e0180077e => [00000020]6:[0010] 0,000000000000000e +vstribr. 0180055e0180077e,0000111e8000222e => [00000020]6:[0010] 222e,0180055e0180077e +vstribr. 0000111e8000222e,7ff0000000000000 => [00000020]6:[0010] 0,000000000000222e +vstribr. 7ff0000000000000,fff0000000000000 => [00000020]6:[0010] 0,0000000000000000 +vstribr. fff0000000000000,2208400000000000 => [00000020]6:[0010] 0,0000000000000000 +vstribr. 2208400000000000,0000000000000009 => [00000020]6:[0010] 0,0000000000000000 +vstribr. 0000000000000009,ffff000180000001 => [00000020]6:[0010] 0,0000000000000009 +vstribr. ffff000180000001,0000000000000000 => [00000020]6:[0010] 0,0000000000000001 +vstribr. 0000000000000000,8000000000000000 => [00000020]6:[0010] 0,0000000000000000 +vstribr. 8000000000000000,7f800000ff800000 => [00000020]6:[0010] 0,0000000000000000 + +vstrihl. 7f800000ff800000,ff8000007f800000 => [00000020]6:[0010] ff80000000000000,0000000000000000 +vstrihl. ff8000007f800000,ff7ffffe7f7ffffe => [00000020]6:[0010] ff7ffffe7f7ffffe,ff80000000000000 +vstrihl. ff7ffffe7f7ffffe,0080000e8080000e => [00000000]6:[0000] 80000e8080000e,ff7ffffe7f7ffffe +vstrihl. 0080000e8080000e,0180055e0180077e => [00000000]6:[0000] 180055e0180077e,0080000e8080000e +vstrihl. 0180055e0180077e,0000111e8000222e => [00000020]6:[0010] 0,0000000000000000 +vstrihl. 0000111e8000222e,7ff0000000000000 => [00000020]6:[0010] 7ff0000000000000,0000000000000000 +vstrihl. 7ff0000000000000,fff0000000000000 => [00000020]6:[0010] fff0000000000000,0000000000000000 +vstrihl. fff0000000000000,2208400000000000 => [00000020]6:[0010] 2208400000000000,0000000000000000 +vstrihl. 2208400000000000,0000000000000009 => [00000020]6:[0010] 0,0000000000000000 +vstrihl. 0000000000000009,ffff000180000001 => [00000020]6:[0010] ffff000180000001,0000000000000000 +vstrihl. ffff000180000001,0000000000000000 => [00000020]6:[0010] 0,0000000000000000 +vstrihl. 0000000000000000,8000000000000000 => [00000020]6:[0010] 8000000000000000,0000000000000000 +vstrihl. 8000000000000000,7f800000ff800000 => [00000020]6:[0010] 7f80000000000000,0000000000000000 + +vstrihr. 7f800000ff800000,ff8000007f800000 => [00000020]6:[0010] 0,0000000000000000 +vstrihr. ff8000007f800000,ff7ffffe7f7ffffe => [00000020]6:[0010] 0,0000000000000000 +vstrihr. ff7ffffe7f7ffffe,0080000e8080000e => [00000000]6:[0000] 80000e8080000e,ff7ffffe7f7ffffe +vstrihr. 0080000e8080000e,0180055e0180077e => [00000000]6:[0000] 180055e0180077e,0080000e8080000e +vstrihr. 0180055e0180077e,0000111e8000222e => [00000020]6:[0010] 111e8000222e,0180055e0180077e +vstrihr. 0000111e8000222e,7ff0000000000000 => [00000020]6:[0010] 0,0000111e8000222e +vstrihr. 7ff0000000000000,fff0000000000000 => [00000020]6:[0010] 0,0000000000000000 +vstrihr. fff0000000000000,2208400000000000 => [00000020]6:[0010] 0,0000000000000000 +vstrihr. 2208400000000000,0000000000000009 => [00000020]6:[0010] 0,0000000000000000 +vstrihr. 0000000000000009,ffff000180000001 => [00000020]6:[0010] 0,0000000000000009 +vstrihr. ffff000180000001,0000000000000000 => [00000020]6:[0010] 0,ffff000180000001 +vstrihr. 0000000000000000,8000000000000000 => [00000020]6:[0010] 0,0000000000000000 +vstrihr. 8000000000000000,7f800000ff800000 => [00000020]6:[0010] 0,0000000000000000 + vcfuged 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => 1ffff,000000000001ffff vcfuged 7f800000ff800000,ff8000007f800000 ff8000007f800000,ff7ffffe7f7ffffe => 97f800001fc00000,000001000000ffff vcfuged 7f800000ff800000,ff8000007f800000 ff7ffffe7f7ffffe,0080000e8080000e => ff00000fe0000108,b3f800001fc00000 @@ -678,6 +734,56 @@ vcfuged 8000000000000000,7f800000ff800000 ffff000180000001,0000000000000000 => 7 vcfuged 8000000000000000,7f800000ff800000 0000000000000000,8000000000000000 => ff000001ff000000,8000000000000000 vcfuged 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 1ffff,0000000000000001 +vclrlb 0 7f800000ff800000,ff8000007f800000 => 0,0000000000000000 +vclrlb 2 7f800000ff800000,ff8000007f800000 => 0,0000000000000000 +vclrlb 4 7f800000ff800000,ff8000007f800000 => 0,00000000ff800000 +vclrlb 6 7f800000ff800000,ff8000007f800000 => 0,00000000ff800000 +vclrlb 8 7f800000ff800000,ff8000007f800000 => 0,7f800000ff800000 +vclrlb a 7f800000ff800000,ff8000007f800000 => 0,7f800000ff800000 +vclrlb 0 ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000000 +vclrlb 2 ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000000 +vclrlb 4 ff8000007f800000,ff7ffffe7f7ffffe => 0,000000007f800000 +vclrlb 6 ff8000007f800000,ff7ffffe7f7ffffe => 0,000000007f800000 +vclrlb 8 ff8000007f800000,ff7ffffe7f7ffffe => 0,ff8000007f800000 +vclrlb a ff8000007f800000,ff7ffffe7f7ffffe => fffe,ff8000007f800000 +vclrlb 0 ff7ffffe7f7ffffe,0080000e8080000e => 0,0000000000000000 +vclrlb 2 ff7ffffe7f7ffffe,0080000e8080000e => 0,000000000000fffe +vclrlb 4 ff7ffffe7f7ffffe,0080000e8080000e => 0,000000007f7ffffe +vclrlb 6 ff7ffffe7f7ffffe,0080000e8080000e => 0,0000fffe7f7ffffe +vclrlb 8 ff7ffffe7f7ffffe,0080000e8080000e => 0,ff7ffffe7f7ffffe +vclrlb a ff7ffffe7f7ffffe,0080000e8080000e => e,ff7ffffe7f7ffffe +vclrlb 0 0080000e8080000e,0180055e0180077e => 0,0000000000000000 +vclrlb 2 0080000e8080000e,0180055e0180077e => 0,000000000000000e +vclrlb 4 0080000e8080000e,0180055e0180077e => 0,000000008080000e +vclrlb 6 0080000e8080000e,0180055e0180077e => 0,0000000e8080000e +vclrlb 8 0080000e8080000e,0180055e0180077e => 0,0080000e8080000e +vclrlb a 0080000e8080000e,0180055e0180077e => 77e,0080000e8080000e + +vclrrb 0 7f800000ff800000,ff8000007f800000 => 0,0000000000000000 +vclrrb 2 7f800000ff800000,ff8000007f800000 => ff80000000000000,0000000000000000 +vclrrb 4 7f800000ff800000,ff8000007f800000 => ff80000000000000,0000000000000000 +vclrrb 6 7f800000ff800000,ff8000007f800000 => ff8000007f800000,0000000000000000 +vclrrb 8 7f800000ff800000,ff8000007f800000 => ff8000007f800000,0000000000000000 +vclrrb a 7f800000ff800000,ff8000007f800000 => ff8000007f800000,7f80000000000000 +vclrrb 0 ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000000 +vclrrb 2 ff8000007f800000,ff7ffffe7f7ffffe => ff7f000000000000,0000000000000000 +vclrrb 4 ff8000007f800000,ff7ffffe7f7ffffe => ff7ffffe00000000,0000000000000000 +vclrrb 6 ff8000007f800000,ff7ffffe7f7ffffe => ff7ffffe7f7f0000,0000000000000000 +vclrrb 8 ff8000007f800000,ff7ffffe7f7ffffe => ff7ffffe7f7ffffe,0000000000000000 +vclrrb a ff8000007f800000,ff7ffffe7f7ffffe => ff7ffffe7f7ffffe,ff80000000000000 +vclrrb 0 ff7ffffe7f7ffffe,0080000e8080000e => 0,0000000000000000 +vclrrb 2 ff7ffffe7f7ffffe,0080000e8080000e => 80000000000000,0000000000000000 +vclrrb 4 ff7ffffe7f7ffffe,0080000e8080000e => 80000e00000000,0000000000000000 +vclrrb 6 ff7ffffe7f7ffffe,0080000e8080000e => 80000e80800000,0000000000000000 +vclrrb 8 ff7ffffe7f7ffffe,0080000e8080000e => 80000e8080000e,0000000000000000 +vclrrb a ff7ffffe7f7ffffe,0080000e8080000e => 80000e8080000e,ff7f000000000000 +vclrrb 0 0080000e8080000e,0180055e0180077e => 0,0000000000000000 +vclrrb 2 0080000e8080000e,0180055e0180077e => 180000000000000,0000000000000000 +vclrrb 4 0080000e8080000e,0180055e0180077e => 180055e00000000,0000000000000000 +vclrrb 6 0080000e8080000e,0180055e0180077e => 180055e01800000,0000000000000000 +vclrrb 8 0080000e8080000e,0180055e0180077e => 180055e0180077e,0000000000000000 +vclrrb a 0080000e8080000e,0180055e0180077e => 180055e0180077e,0080000000000000 + vclzdm 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => 0,0000000000000000 vclzdm 7f800000ff800000,ff8000007f800000 ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000001 vclzdm 7f800000ff800000,ff8000007f800000 ff7ffffe7f7ffffe,0080000e8080000e => 0,0000000000000001 @@ -10478,6 +10584,62 @@ vsrq 8000000000000000,7f800000ff800000 ffff000180000001,0000000000000000 => 7f80 vsrq 8000000000000000,7f800000ff800000 0000000000000000,8000000000000000 => 7f800000ff800000,8000000000000000 vsrq 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 7f800000ff800000,8000000000000000 +vstribl 7f800000ff800000,ff8000007f800000 => ff80000000000000,0000000000000000 +vstribl ff8000007f800000,ff7ffffe7f7ffffe => ff7ffffe7f7ffffe,ff80000000000000 +vstribl ff7ffffe7f7ffffe,0080000e8080000e => 0,0000000000000000 +vstribl 0080000e8080000e,0180055e0180077e => 180055e0180077e,0000000000000000 +vstribl 0180055e0180077e,0000111e8000222e => 0,0000000000000000 +vstribl 0000111e8000222e,7ff0000000000000 => 7ff0000000000000,0000000000000000 +vstribl 7ff0000000000000,fff0000000000000 => fff0000000000000,0000000000000000 +vstribl fff0000000000000,2208400000000000 => 2208400000000000,0000000000000000 +vstribl 2208400000000000,0000000000000009 => 0,0000000000000000 +vstribl 0000000000000009,ffff000180000001 => ffff000000000000,0000000000000000 +vstribl ffff000180000001,0000000000000000 => 0,0000000000000000 +vstribl 0000000000000000,8000000000000000 => 8000000000000000,0000000000000000 +vstribl 8000000000000000,7f800000ff800000 => 7f80000000000000,0000000000000000 + +vstribr 7f800000ff800000,ff8000007f800000 => 0,0000000000000000 +vstribr ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000000 +vstribr ff7ffffe7f7ffffe,0080000e8080000e => e,ff7ffffe7f7ffffe +vstribr 0080000e8080000e,0180055e0180077e => 0,000000000000000e +vstribr 0180055e0180077e,0000111e8000222e => 222e,0180055e0180077e +vstribr 0000111e8000222e,7ff0000000000000 => 0,000000000000222e +vstribr 7ff0000000000000,fff0000000000000 => 0,0000000000000000 +vstribr fff0000000000000,2208400000000000 => 0,0000000000000000 +vstribr 2208400000000000,0000000000000009 => 0,0000000000000000 +vstribr 0000000000000009,ffff000180000001 => 0,0000000000000009 +vstribr ffff000180000001,0000000000000000 => 0,0000000000000001 +vstribr 0000000000000000,8000000000000000 => 0,0000000000000000 +vstribr 8000000000000000,7f800000ff800000 => 0,0000000000000000 + +vstrihl 7f800000ff800000,ff8000007f800000 => ff80000000000000,0000000000000000 +vstrihl ff8000007f800000,ff7ffffe7f7ffffe => ff7ffffe7f7ffffe,ff80000000000000 +vstrihl ff7ffffe7f7ffffe,0080000e8080000e => 80000e8080000e,ff7ffffe7f7ffffe +vstrihl 0080000e8080000e,0180055e0180077e => 180055e0180077e,0080000e8080000e +vstrihl 0180055e0180077e,0000111e8000222e => 0,0000000000000000 +vstrihl 0000111e8000222e,7ff0000000000000 => 7ff0000000000000,0000000000000000 +vstrihl 7ff0000000000000,fff0000000000000 => fff0000000000000,0000000000000000 +vstrihl fff0000000000000,2208400000000000 => 2208400000000000,0000000000000000 +vstrihl 2208400000000000,0000000000000009 => 0,0000000000000000 +vstrihl 0000000000000009,ffff000180000001 => ffff000180000001,0000000000000000 +vstrihl ffff000180000001,0000000000000000 => 0,0000000000000000 +vstrihl 0000000000000000,8000000000000000 => 8000000000000000,0000000000000000 +vstrihl 8000000000000000,7f800000ff800000 => 7f80000000000000,0000000000000000 + +vstrihr 7f800000ff800000,ff8000007f800000 => 0,0000000000000000 +vstrihr ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000000 +vstrihr ff7ffffe7f7ffffe,0080000e8080000e => 80000e8080000e,ff7ffffe7f7ffffe +vstrihr 0080000e8080000e,0180055e0180077e => 180055e0180077e,0080000e8080000e +vstrihr 0180055e0180077e,0000111e8000222e => 111e8000222e,0180055e0180077e +vstrihr 0000111e8000222e,7ff0000000000000 => 0,0000111e8000222e +vstrihr 7ff0000000000000,fff0000000000000 => 0,0000000000000000 +vstrihr fff0000000000000,2208400000000000 => 0,0000000000000000 +vstrihr 2208400000000000,0000000000000009 => 0,0000000000000000 +vstrihr 0000000000000009,ffff000180000001 => 0,0000000000000009 +vstrihr ffff000180000001,0000000000000000 => 0,ffff000180000001 +vstrihr 0000000000000000,8000000000000000 => 0,0000000000000000 +vstrihr 8000000000000000,7f800000ff800000 => 0,0000000000000000 + xscmpeqqp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffffffffffffffff,ffffffffffffffff xscmpeqqp 7f800000ff800000,ff8000007f800000 ff8000007f800000,ff7ffffe7f7ffffe => 0,0000000000000000 xscmpeqqp 7f800000ff800000,ff8000007f800000 ff7ffffe7f7ffffe,0080000e8080000e => 0,0000000000000000 @@ -11328,4 +11490,4 @@ xsmincqp 8000000000000000,7f800000ff800000 ffff000180000001,0000000000000000 => xsmincqp 8000000000000000,7f800000ff800000 0000000000000000,8000000000000000 => 8000000000000000,0000000000000000 xsmincqp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 7f800000ff800000,8000000000000000 -All done. Tested 74 different instruction groups +All done. Tested 84 different instruction groups |
|
From: Carl L. <ca...@so...> - 2020-11-10 01:06:19
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=207101dfc9fd36fcb4181857bd868eb741ff27d3 commit 207101dfc9fd36fcb4181857bd868eb741ff27d3 Author: Carl Love <ce...@us...> Date: Thu Apr 30 10:54:43 2020 -0500 ISA 3.1 VSX Load/Store Rightmost Element Operations Add support for: lxvrbx Load VSX Vector Rightmost Byte Indexed lxvrdx Load VSX Vector Rightmost Doubleword Indexed lxvrhx Load VSX Vector Rightmost Halfword Indexed lxvrwx Load VSX Vector Rightmost Word Indexed stxvrbx Store VSX Vector Rightmost Byte Indexed stxvrdx Store VSX Vector Rightmost Doubleword Indexed stxvrhx Store VSX Vector Rightmost Halfword Indexed stxvrwx Store VSX Vector Rightmost Word Indexed Diff: --- VEX/priv/guest_ppc_toIR.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index b208cb83d0..9192436924 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -23571,6 +23571,226 @@ dis_vx_load ( UInt prefix, UInt theInstr ) mkU64(0) ) ); break; } + + case 0x00D: // lxvrbx + { + IRExpr * exp; + DIP("lxvrbx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + exp = load( Ity_I64, mkexpr( EA ) ); + + if (host_endness == VexEndnessLE) + putVSReg( XT, binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, mkU64( 0xFF ), exp ) ) ); + else + putVSReg( XT, + binop( Iop_ShrV128, + binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, mkU64( 0xFF ), exp ) ), + mkU8( 15*8 ) ) ); // data is left most byte + break; + } + + case 0x02D: // lxvrhx + { + IRExpr * exp; + + DIP("lxvrhx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + exp = load( Ity_I64, mkexpr( EA ) ); + + if (host_endness == VexEndnessLE) + putVSReg( XT, binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, mkU64( 0xFFFF ), exp ) ) ); + else + putVSReg( XT, + binop( Iop_ShrV128, + binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, mkU64( 0xFFFF ), exp ) ), + mkU8( 7*16 ) ) ); // data is left most half-word + break; + } + + case 0x04D: // lxvrwx + { + IRExpr * exp; + + DIP("lxvrwx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + exp = load( Ity_I64, mkexpr( EA ) ); + + if (host_endness == VexEndnessLE) + putVSReg( XT, binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, mkU64( 0xFFFFFFFF ), exp ) ) ); + else + putVSReg( XT, + binop( Iop_ShrV128, + binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, + mkU64( 0xFFFFFFFF ), exp ) ), + mkU8( 3*32 ) ) ); // data is left most word + break; + } + + case 0x06D: // lxvrdx + { + IRExpr * exp; + + DIP("lxvrdx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + exp = load( Ity_I64, mkexpr( EA ) ); + + if (host_endness == VexEndnessLE) + putVSReg( XT, binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, + mkU64( 0xFFFFFFFFFFFFFFFFULL), exp ) ) ); + else + putVSReg( XT, + binop( Iop_ShrV128, + binop( Iop_64HLtoV128, + mkU64( 0x0 ), + binop( Iop_And64, + mkU64( 0xFFFFFFFFFFFFFFFFULL), exp ) ), + mkU8( 1*64 ) ) ); // data is left most double word + break; + } + + case 0x08D: // stxvrbx + { + IRExpr * fetched_exp; + IRExpr * store_exp; + IRTemp vS = newTemp( Ity_V128 ); + + DIP("stxvrbx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + fetched_exp = load( Ity_I64, mkexpr( EA ) ); + assign( vS, getVSReg( XT ) ); + + /* Fetch 64 bits, merge byte element 15 into the fetched value and + * store. */ + if (host_endness == VexEndnessLE) { + store_exp = binop( Iop_Or64, + binop( Iop_And64, + mkU64( 0x00000000000000FF ), + unop( Iop_V128to64, mkexpr( vS ) ) ), + binop( Iop_And64, + mkU64( 0xFFFFFFFFFFFFFF00 ), + fetched_exp ) ); + store( mkexpr( EA ), store_exp ); + } else { + store_exp = binop( Iop_Or64, + binop( Iop_And64, + mkU64( 0xFF00000000000000 ), + unop( Iop_V128HIto64, mkexpr( vS ) ) ), + binop( Iop_And64, + mkU64( 0x00FFFFFFFFFFFFFF ), + fetched_exp ) ); + store( mkexpr( EA ), store_exp ); + } + break; + } + + case 0x0AD: // stxvrhx + { + IRExpr * fetched_exp; + IRExpr * store_exp; + IRTemp vS = newTemp( Ity_V128 ); + + DIP("stxvrhx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + fetched_exp = load( Ity_I64, mkexpr( EA ) ); + assign( vS, getVSReg( XT ) ); + + /* Fetch 64 bits, merge half-word element 7 into the fetched value and + * store. */ + if (host_endness == VexEndnessLE) { + store_exp = binop( Iop_Or64, + binop( Iop_And64, + mkU64( 0x000000000000FFFF ), + unop( Iop_V128to64, mkexpr( vS ) ) ), + binop( Iop_And64, + mkU64( 0xFFFFFFFFFFFF0000 ), + fetched_exp ) ); + store( mkexpr( EA ), store_exp ); + } else { + store_exp = binop( Iop_Or64, + binop( Iop_And64, + mkU64( 0xFFFF000000000000 ), + unop( Iop_V128HIto64, mkexpr( vS ) ) ), + binop( Iop_And64, + mkU64( 0x0000FFFFFFFFFFFF ), + fetched_exp ) ); + store( mkexpr( EA ), store_exp ); + } + break; + } + + case 0x0CD: // stxvrwx + { + IRExpr * fetched_exp; + IRExpr * store_exp; + IRTemp vS = newTemp( Ity_V128 ); + + DIP("stxvrwx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + fetched_exp = load( Ity_I64, mkexpr( EA ) ); + assign( vS, getVSReg( XT ) ); + + /* Fetch 64 bits, merge word element 3 into the fetched value and + * store. */ + if (host_endness == VexEndnessLE) { + store_exp = binop( Iop_Or64, + binop( Iop_And64, + mkU64( 0x00000000FFFFFFFF ), + unop( Iop_V128to64, mkexpr( vS ) ) ), + binop( Iop_And64, + mkU64( 0xFFFFFFFF00000000 ), + fetched_exp ) ); + store( mkexpr( EA ), store_exp ); + } else { + store_exp = binop( Iop_Or64, + binop( Iop_And64, + mkU64( 0xFFFFFFFF00000000 ), + unop( Iop_V128HIto64, mkexpr( vS ) ) ), + binop( Iop_And64, + mkU64( 0x00000000FFFFFFFF ), + fetched_exp ) ); + store( mkexpr( EA ), store_exp ); + } + break; + } + + case 0x0ED: // stxvrdx + { + IRExpr * store_exp; + IRTemp vS = newTemp( Ity_V128 ); + + DIP("stxvrdx v%u,r%u,r%u\n", XT, rA_addr, rB_addr); + + assign( vS, getVSReg( XT ) ); + + /* Fetch 64 bits, merge double word element 1 into the fetched value and + * store. Well, this is just store vS bits[63:0] at EA. */ + if (host_endness == VexEndnessLE) { + store_exp = binop( Iop_And64, + mkU64( 0xFFFFFFFFFFFFFFFF ), + unop( Iop_V128to64, mkexpr( vS ) ) ); + store( mkexpr( EA ), store_exp ); + } else { + store_exp = binop( Iop_And64, + mkU64( 0xFFFFFFFFFFFFFFFF ), + unop( Iop_V128HIto64, mkexpr( vS ) ) ); + store( mkexpr( EA ), store_exp ); + } + break; + } + case 0x04C: // lxsiwax (Load VSX Scalar as Integer Word Algebraic Indexed) { IRExpr * exp; @@ -34666,6 +34886,21 @@ DisResult disInstr_PPC_WRK ( if (dis_vx_load( prefix, theInstr )) goto decode_success; goto decode_failure; + case 0x00D: // lxvrbx + case 0x02D: // lxvrhx + case 0x04D: // lxvrwx + case 0x06D: // lxvrdx + case 0x08D: // stxvrbx + case 0x0AD: // stxvrhx + case 0x0CD: // stxvrwx + case 0x0ED: // stxvrdx + // All of these VSX load instructions use some VMX facilities, so + // if allow_V is not set, we'll skip trying to decode. + if (!allow_V) goto decode_noV; + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_vx_load( prefix, theInstr )) goto decode_success; + goto decode_failure; + /* VSX Store */ case 0x08C: // stxsiwx case 0x18C: // stxvx |
|
From: Carl L. <ca...@so...> - 2020-11-10 01:06:06
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=e3584136cb18baa44cf48a7ec396d57fdbe5ac9b commit e3584136cb18baa44cf48a7ec396d57fdbe5ac9b Author: Carl Love <ce...@us...> Date: Thu Apr 30 10:53:34 2020 -0500 ISA 3.1 Test LSB by Byte Operation Add support for: xvtlsbb Diff: --- VEX/priv/guest_ppc_toIR.c | 87 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index b2ebf3a063..b208cb83d0 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -32955,6 +32955,67 @@ static Bool dis_string_isolate ( UInt prefix, UInt theInstr ) return True; } +static Bool dis_test_LSB_by_bit ( UInt prefix, UInt theInstr ) +{ +#define MAX_FIELDS 16 + UChar vB_addr = ifieldRegXB(theInstr); + IRTemp vB = newTemp( Ity_V128 ); + UChar opc1 = ifieldOPC(theInstr); + UInt opc2 = IFIELD(theInstr, (31-29), 9); // bits[21:29] + UInt inst_select = IFIELD( theInstr, (31-15), 5); // bits[11:15] + UInt BF = IFIELD( theInstr, (31-8), 3); // bits[6:8] + UInt i; + IRTemp all_true[MAX_FIELDS+1]; + IRTemp all_false[MAX_FIELDS+1]; + IRTemp tmp128[MAX_FIELDS]; + IRTemp cc = newTemp(Ity_I32); + + if (!((opc1 == 0x3C) && (opc2 == 0x1DB) && (inst_select == 2))) + return False; + + DIP("xvtlsbb %u,v%u\n", BF, vB_addr); + + assign( vB, getVSReg( vB_addr ) ); + all_true[0] = newTemp( Ity_I1 ); + all_false[0] = newTemp( Ity_I1 ); + assign( all_true[0], mkU1( 1 ) ); + assign( all_false[0], mkU1( 1 ) ); + + for (i = 0; i< MAX_FIELDS; i++) { + tmp128[i] = newTemp( Ity_I64 ); + all_true[i+1] = newTemp( Ity_I1 ); + all_false[i+1] = newTemp( Ity_I1 ); + + assign( tmp128[i], binop( Iop_And64, + mkU64( 0x1 ), + unop( Iop_V128to64, + binop( Iop_ShrV128, + mkexpr( vB ), mkU8( i*8 ) ) ) ) ); + assign( all_true[i+1], mkAND1 ( mkexpr( all_true[i] ), + binop( Iop_CmpEQ64, + mkU64( 1 ), + mkexpr( tmp128[i] ) ) ) ); + assign( all_false[i+1], mkAND1 ( mkexpr( all_false[i] ), + binop( Iop_CmpEQ64, + mkU64( 0 ), + mkexpr( tmp128[i] ) ) ) ); + } + + assign( cc, binop( Iop_Or32, + binop( Iop_Shl32, + unop( Iop_1Uto32, + mkexpr( all_true[MAX_FIELDS] ) ), + mkU8( 3 ) ), + binop( Iop_Shl32, + unop( Iop_1Uto32, + mkexpr( all_false[MAX_FIELDS] ) ), + mkU8( 1 ) ) ) ); + + putGST_field( PPC_GST_CR, mkexpr( cc ), BF ); + return True; +#undef MAX_FIELDS +} + static Int dis_nop_prefix ( UInt prefix, UInt theInstr ) { Bool is_prefix = prefix_instruction( prefix ); @@ -33804,19 +33865,37 @@ DisResult disInstr_PPC_WRK ( case 0x0B4: case 0x094: // xsredp, xsrsqrtedp case 0x0D6: case 0x0B2: // xsrdpic, xsrdpiz case 0x092: case 0x232: // xsrdpi, xsrsp - case 0x3B6: // xxbrh, xvxexpdp, xvxexpsp, xvxsigdp - // xvxsigsp, xvcvhpsp case 0x2b6: // xsxexpdp, xsxsigdp case 0x254: case 0x2d4: // xststdcsp, xststdcdp case 0x354: // xvtstdcsp case 0x360:case 0x396: // xviexpsp, xsiexpdp case 0x3D4: case 0x3E0: // xvtstdcdp, xviexpdp - if (dis_vxs_misc( prefix, theInstr, abiinfo, vsxOpc2, allow_isa_3_0 )) + if (dis_vxs_misc( prefix, theInstr, abiinfo, vsxOpc2, + allow_isa_3_0 )) goto decode_success; goto decode_failure; + + case 0x3B6: { + UInt inst_select = IFIELD( theInstr, 16, 5); + + if (inst_select == 2) { //xvtlsbb + if (dis_test_LSB_by_bit( prefix, theInstr)) + goto decode_success; + goto decode_failure; + } + + // xxbrh, xvxexpdp, xvxexpsp, xvxsigdp + // xvxsigsp, xvcvhpsp + if (dis_vxs_misc( prefix, theInstr, abiinfo, vsxOpc2, + allow_isa_3_0 )) + goto decode_success; + goto decode_failure; + } + case 0x08C: case 0x0AC: // xscmpudp, xscmpodp - if (dis_vx_cmp( prefix, theInstr, vsxOpc2 )) goto decode_success; + if (dis_vx_cmp( prefix, theInstr, vsxOpc2 )) goto decode_success; goto decode_failure; + case 0x0: case 0x020: // xsaddsp, xssubsp case 0x080: // xsadddp case 0x060: case 0x0E0: // xsdivsp, xsdivdp |
|
From: Carl L. <ca...@so...> - 2020-11-10 01:05:52
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=ced69e5ea80f647a736c026bacf647c55a8c6c7f commit ced69e5ea80f647a736c026bacf647c55a8c6c7f Author: Carl Love <ce...@us...> Date: Fri May 15 20:57:24 2020 -0500 ISA 3.1 String Operations Add support for: vclrlb Vector Clear Leftmost Bytes vclrrb Vector Clear Rightmost Bytes vstribl[.] Vector String Isolate Byte Left -Justified vstribr[.] Vector String Isolate Byte Right -Justified vstrihl[.] Vector String Isolate Halfword Left -Justified vstrihr[.] Vector String Isolate Halfword Right -Justified Diff: --- VEX/priv/guest_ppc_toIR.c | 324 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 322 insertions(+), 2 deletions(-) diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 62eeade57a..b2ebf3a063 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -3173,6 +3173,76 @@ static IRExpr * absI64( IRTemp src ) binop( Iop_And64, mkexpr( twos_comp ), mkexpr( sign_mask ) ) ); } +static IRExpr * locate_vector_ele_eq ( IRTemp src, IRExpr *value, + UInt dir, IRType size ) +{ +#define MAX_ELE 16 + /* Find the index, 0 to max-1, of the element in 128-bit vector that matches + value. The returned value will be index+1. Return the index as an + Ity_I8. If no match is found, the returned value is equal to the number + of elements in the vector plus one. The argument dir specifies match from + left (dir = 0) or from the right (dir != 0). */ + UInt i, num_bytes; + UInt max = 0; /* number of vector elements */ + UInt mask = 0; + IRTemp cnt[MAX_ELE+1]; + IRTemp flag[MAX_ELE+1]; + IRTemp cmp_result[MAX_ELE]; + UInt byte_index; + + vassert(size == Ity_I8 || size == Ity_I16); + + if (size == Ity_I8) { + mask = 0xFF; + max = 128/8; + num_bytes = 1; + } else { + mask = 0xFFFF; + max = 128/16; + num_bytes = 2; // num bytes in half word + } + + cnt[0] = newTemp(Ity_I8); + assign( cnt[0], mkU8( 1 ) ); + flag[0] = newTemp(Ity_I8); + assign( flag[0], mkU8( 1 ) ); + + for (i = 0; i < max; i++) { + if (dir == 0) { + byte_index = (max - 1 - i)*num_bytes; + } else { + byte_index = i*num_bytes; + } + + cnt[i+1] = newTemp(Ity_I8); + cmp_result[i] = newTemp(Ity_I8); + flag[i+1] = newTemp(Ity_I8); + + assign( cmp_result[i], + unop( Iop_1Uto8, + binop( Iop_CmpEQ64, + binop( Iop_And64, + mkU64( mask ), + value ), + extract_field_from_vector( src, + mkU64( byte_index ), + mask ) ) ) ); + + assign( flag[i+1], binop( Iop_And8, + mkexpr( flag[i] ), + unop( Iop_Not8, + mkexpr( cmp_result[i] ) ) ) ); + + // Once flag[i] becomes zero, it forces the increment to zero + assign( cnt[i+1], + binop( Iop_Add8, + binop( Iop_And8, mkexpr( flag[i+1] ), mkU8( 1 ) ), + mkexpr( cnt[i] ) ) ); + } + return mkexpr( cnt[max] ); +#undef MAX_ELE +} + /*-----------------------------------------------------------*/ /*--- Prefix instruction helpers ---*/ /*-----------------------------------------------------------*/ @@ -32656,6 +32726,235 @@ static Bool dis_vec_extract_insert ( UInt prefix, UInt theInstr ) return True; } +static Bool dis_string_isolate ( UInt prefix, UInt theInstr ) +{ + UChar vT_addr = ifieldRegDS(theInstr); + UChar vA_addr = ifieldRegA(theInstr); + UChar vB_addr = ifieldRegB(theInstr); + + IRTemp vT = newTemp(Ity_V128); + IRTemp index = newTemp(Ity_I32); + IRTemp sh_index = newTemp(Ity_I32); + IRTemp mask = newTemp(Ity_V128); + IRTemp cc = newTemp(Ity_I32); + UInt cc_field = 6; + + UInt Rc = IFIELD( theInstr, (31-21), 1 ); + + UInt opc2 = IFIELD( theInstr, 0, 11 ); + Int inst_sel = IFIELD(theInstr, 16, 5); + Int dir = 0; // 0 - index from left, 1 - index from right + IROp shift_first, shift_second; + + assign( mask, binop( Iop_64HLtoV128, + mkU64( 0xFFFFFFFFFFFFFFFF ), + mkU64( 0xFFFFFFFFFFFFFFFF ) ) ); + + if (opc2 == 0x18D) + inst_sel = opc2; + + else if (opc2 == 0x1CD) + inst_sel = opc2; + + switch(inst_sel) { + case 0x0: // vstribl[.] + case 0x1: // vstribr[.] + { + IRTemp vB = newTemp(Ity_V128); + + if (inst_sel == 0) { + DIP("vstribl%s v%u,v%u\n", Rc ? ".":"", vT_addr, vB_addr); + shift_first = Iop_ShlV128; + dir = 0; + + } else { + DIP("vstribr%s v%u,v%u\n", Rc ? ".":"", vT_addr, vB_addr); + shift_first = Iop_ShrV128; + dir = 1; + } + + /* Get index of match of first byte from the left that matches zero. + Index will be equal to max elements in vector if there is no match. + If index is equal to the max, which is 16 in this case, set index + to zero so the data mask will select all of the bits. + */ + assign( vB, getVReg( vB_addr ) ); + assign( index, unop( Iop_8Uto32, + locate_vector_ele_eq( vB, mkU64( 0 ), dir, + Ity_I8 ) ) ); + assign( sh_index, + binop( Iop_And32, + unop( Iop_1Sto32, + binop( Iop_CmpLE32U, + mkexpr( index ), + mkU32( 16 ) ) ), + binop( Iop_Sub32, + mkU32( 16 ), + mkexpr( index ) ) ) ); + + /* Shift mask to select the bytes up to the match with zero */ + assign( vT, binop( Iop_AndV128, + // binop( Iop_ShlV128, + binop( shift_first, + mkexpr( mask ), + unop( Iop_32to8, + binop( Iop_Mul32, + mkU32( 8 ), + mkexpr( sh_index ) ) ) ), + mkexpr( vB ) ) ); + + if (Rc) + /* The returned index was between 1 and 16 if a null was found. */ + assign( cc, binop( Iop_Shl32, + unop( Iop_1Uto32, + binop( Iop_CmpLE32U, + mkexpr( index ), mkU32( 16 ) ) ), + mkU8( 1 ) ) ); + } + break; + + case 0x2: // vstrihl[.] + case 0x3: // vstrihr[.] + { + IRTemp vB = newTemp(Ity_V128); + + if (inst_sel == 2) { + DIP("vstrihl%s v%u,v%u\n", Rc ? ".":"", vT_addr, vB_addr); + shift_first = Iop_ShlV128; + dir = 0; + + } else { + DIP("vstrihr%s v%u,v%u\n", Rc ? ".":"", vT_addr, vB_addr); + shift_first = Iop_ShrV128; + dir = 1; + } + + assign( vB, getVReg( vB_addr ) ); + assign( index, unop( Iop_8Uto32, + locate_vector_ele_eq( vB, mkU64( 0 ), dir, + Ity_I16 ) ) ); + /* Get index of match of first half word from specified direction + that matches zero. Index will be equal to max elements in vector + if there is no match. If index is equal to the max, which is 8 + in this case, set index to zero so the data mask will select all + of the bits. + */ + assign( sh_index, + binop( Iop_And32, + unop( Iop_1Sto32, + binop( Iop_CmpLE32U, + mkexpr( index ), + mkU32( 8 ) ) ), + binop( Iop_Sub32, + mkU32( 8 ), + mkexpr( index ) ) ) ); + + /* Shift mask left to select the bytes up to the match with zero */ + assign( vT, binop( Iop_AndV128, + // binop( Iop_ShlV128, + binop( shift_first, + mkexpr( mask ), + unop( Iop_32to8, + binop( Iop_Mul32, + mkU32( 16 ), + mkexpr( sh_index ) ) ) ), + mkexpr( vB ) ) ); + + if (Rc) + /* The returned index was between 1 and 16 if a null was found. */ + assign( cc, binop( Iop_Shl32, + unop( Iop_1Uto32, + binop( Iop_CmpLE32U, + mkexpr( index ), mkU32( 8 ) ) ), + mkU8( 1 ) ) ); + } + break; + + case 0x18D: // vclrlb + case 0x1CD: // vclrrb + { + IRTemp rB = newTemp(Ity_I64); + IRTemp vA = newTemp(Ity_V128); + IRTemp shift = newTemp(Ity_I8); + IRTemp clear_result = newTemp(Ity_I64); + + /* Note vB_addr actually refers to a GPR in this inst. */ + if (inst_sel == 0x18D) { + DIP("vclrlb v%u,v%u,%u\n", vT_addr, vA_addr, vB_addr); + shift_first = Iop_ShlV128; + shift_second = Iop_ShrV128; + + } else { + DIP("vclrrb v%u,v%u,%u\n", vT_addr, vA_addr, vB_addr); + shift_first = Iop_ShrV128; + shift_second = Iop_ShlV128; + } + + assign( vA, getVReg( vA_addr ) ); + assign( rB, getIReg( vB_addr ) ); + + /* Clear left 16-rB bytes, if rb > 16, set shift to 0 + and clear_result to all 1's. */ + assign( shift, + unop( Iop_32to8, + binop( Iop_And32, + binop( Iop_Mul32, + mkU32( 8 ), + binop( Iop_Sub32, + mkU32( 16 ), + unop( Iop_64to32, + mkexpr( rB ) ) ) ), + unop( Iop_Not32, + unop( Iop_1Sto32, + binop( Iop_CmpLT32S, + mkU32( 16 ), + unop( Iop_64to32, + mkexpr( rB ) ) ) ) ) + ) ) ); + + /* Clear all bits if rB > 16 */ + assign( clear_result, + binop( Iop_Or64, + unop( Iop_1Sto64, + binop( Iop_CmpLE32S, + unop( Iop_8Uto32, mkexpr( shift ) ), + mkU32( 127 ) ) ), + unop( Iop_1Sto64, + binop( Iop_CmpLT32S, + mkU32( 16 ), + unop( Iop_64to32, + mkexpr( rB ) ) ) ) ) ); + + /* Clear bits by shifting mask, then shifting back by index. If + * shift is >= 127, need to mask out result as underlying shift only + * supports shifts up to 127 bits. + */ + assign( vT, + binop( Iop_AndV128, + binop( Iop_AndV128, + binop( shift_second, + binop( shift_first, + mkexpr( mask ), + mkexpr( shift ) ), + mkexpr( shift ) ), + mkexpr( vA ) ), + binop( Iop_64HLtoV128, mkexpr( clear_result ), + mkexpr( clear_result ) ) ) ); + } + break; + + default: + vex_printf("dis_string_isolate(isnt_sel = %d)\n", inst_sel); + return False; + } + + if (Rc) + putGST_field( PPC_GST_CR, mkexpr( cc ), cc_field ); + + putVReg( vT_addr, mkexpr( vT ) ); + return True; +} + static Int dis_nop_prefix ( UInt prefix, UInt theInstr ) { Bool is_prefix = prefix_instruction( prefix ); @@ -32709,6 +33008,7 @@ DisResult disInstr_PPC_WRK ( { UChar opc1; UInt opc2; + UInt opc3; DisResult dres; UInt theInstr; UInt prefix; @@ -34451,13 +34751,33 @@ DisResult disInstr_PPC_WRK ( } } + opc2 = IFIELD(theInstr, 0, 10); + opc3 = IFIELD(theInstr, 16, 5); + + if ((opc2 == 0x0D) & (opc3 < 4)) { // vstrihr, vstrihl, vstribr vstrib + /* Vector String Isolate instructions */ + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_string_isolate( prefix, theInstr )) + goto decode_success; + goto decode_failure; + } + opc2 = IFIELD(theInstr, 0, 11); + switch (opc2) { + /* Vector String Isolate instructions */ + case 0x18D: // vclrlb + case 0x1CD: // vclrrb + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; + if (dis_string_isolate( prefix, theInstr )) + goto decode_success; + goto decode_failure; + /* BCD manipulation */ case 0x341: // bcdcpsgn - if (!allow_isa_2_07) goto decode_noP8; - if (dis_av_bcd_misc( prefix, theInstr, abiinfo )) goto decode_success; + if (dis_av_bcd_misc( prefix, theInstr, abiinfo )) + goto decode_success; goto decode_failure; |