You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
1
|
2
|
3
|
|
4
(1) |
5
(1) |
6
(5) |
7
|
8
|
9
|
10
|
|
11
|
12
|
13
|
14
(3) |
15
(2) |
16
(3) |
17
|
|
18
|
19
(2) |
20
(7) |
21
(2) |
22
(7) |
23
|
24
|
|
25
(2) |
26
|
27
|
28
(1) |
29
|
30
(2) |
|
|
From: Andreas A. <ar...@so...> - 2018-11-30 14:13:10
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=86bd889458883295b73c36696ec64dea9338a7a3 commit 86bd889458883295b73c36696ec64dea9338a7a3 Author: Vadim Barkov <vb...@gm...> Date: Fri Oct 5 13:46:44 2018 +0300 Bug 385411 s390x: Tests and internals for z13 vector FP support Add test cases for the z13 vector FP support. Bring s390-opcodes.csv up-to-date, reflecting that the z13 vector instructions are now supported. Also remove the non-support disclaimer for the vector facility from README.s390. The patch was contributed by Vadim Barkov, with some clean-up and minor adjustments by Andreas Arnez. Diff: --- .gitignore | 10 + README.s390 | 1 - docs/internals/s390-opcodes.csv | 236 ++-- none/tests/s390x/Makefile.am | 5 +- none/tests/s390x/vector.h | 111 +- none/tests/s390x/vector_float.c | 275 +++++ none/tests/s390x/vector_float.stderr.exp | 2 + none/tests/s390x/vector_float.stdout.exp | 1808 ++++++++++++++++++++++++++++++ none/tests/s390x/vector_float.vgtest | 2 + 9 files changed, 2318 insertions(+), 132 deletions(-) diff --git a/.gitignore b/.gitignore index a427659..e88e3b3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ # / /.in_place +/.vs /acinclude.m4 /aclocal.m4 /autom4te-*.cache @@ -1135,6 +1136,10 @@ # /memcheck/tests/s390x/ /memcheck/tests/s390x/.deps +/memcheck/tests/s390x/*.stderr.diff* +/memcheck/tests/s390x/*.stderr.out +/memcheck/tests/s390x/*.stdout.diff* +/memcheck/tests/s390x/*.stdout.out /memcheck/tests/s390x/Makefile /memcheck/tests/s390x/Makefile.in /memcheck/tests/s390x/cs @@ -1750,6 +1755,10 @@ # /none/tests/s390x/ /none/tests/s390x/.deps +/none/tests/s390x/*.stderr.diff* +/none/tests/s390x/*.stderr.out +/none/tests/s390x/*.stdout.diff* +/none/tests/s390x/*.stdout.out /none/tests/s390x/add /none/tests/s390x/add_EI /none/tests/s390x/add_GE @@ -1883,6 +1892,7 @@ /none/tests/s390x/vector_string /none/tests/s390x/vector_integer /none/tests/s390x/high-word +/none/tests/s390x/vector_float # /none/tests/scripts/ /none/tests/scripts/*.dSYM diff --git a/README.s390 b/README.s390 index 96dac1b..4679bb3 100644 --- a/README.s390 +++ b/README.s390 @@ -24,7 +24,6 @@ Limitations 4 one-byte reads/writes instead of just a single read/write. - The transactional-execution facility is not supported; it is masked off from HWCAP. -- The vector facility is not supported; it is masked off from HWCAP. Hardware facilities diff --git a/docs/internals/s390-opcodes.csv b/docs/internals/s390-opcodes.csv index 0dfce68..d7056ff 100644 --- a/docs/internals/s390-opcodes.csv +++ b/docs/internals/s390-opcodes.csv @@ -980,15 +980,15 @@ cdzt,"convert from zoned long","not implemented",zEC12, cxzt,"convert from zoned extended","not implemented",zEC12, czdt,"convert to zoned long","not implemented",zEC12, czxt,"convert to zoned extended","not implemented",zEC12, -vfsdb,"vector fp subtract long","not implemented",z13 +vfsdb,"vector fp subtract long",implemented,z13 vlpf,"vector load positive word",implemented,z13 verllh,"vector element rotate left logical mem halfword",implemented,z13 -vzero,"vector set to zero","not implemented",z13 +vzero,"vector set to zero",implemented,z13 vmalof,"vector multiply and add logical odd word",implemented,z13 vleif,"vector load word element immediate",implemented,z13 vlpb,"vector load positive byte",implemented,z13 vmxlh,"vector maximum logical halfword",implemented,z13 -vpksfs,"vector pack saturate word","not implemented",z13 +vpksfs,"vector pack saturate word",implemented,z13 vfenezh,"vector find element not equal halfword","implemented",z13 vecl,"vector element compare logical",implemented,z13 verimb,"vector element rotate and insert under mask byte",implemented,z13 @@ -998,25 +998,25 @@ vst,"vector store","implemented",z13 vsteg,"vector store double word element","implemented",z13 vmnf,"vector minimum word",implemented,z13 vavgl,"vector average logical",implemented,z13 -vfpsodb,"vector fp perform sign operation","not implemented",z13 -llzrgf,"load logical and zero rightmost bytes 32->64","not implemented",z13 -vledb,"vector fp load rounded","not implemented",z13 -vldeb,"vector fp load lengthened","not implemented",z13 +vfpsodb,"vector fp perform sign operation",implemented,z13 +llzrgf,"load logical and zero rightmost bytes 32->64",implemented,z13 +vledb,"vector fp load rounded",implemented,z13 +vldeb,"vector fp load lengthened",implemented,z13 vclzg,"vector count leading zeros doubleword",implemented,z13 vecg,"vector element compare double word",implemented,z13 -vpksgs,"vector pack saturate double word","not implemented",z13 +vpksgs,"vector pack saturate double word",implemented,z13 vsel,"vector select","implemented",z13 vllezb,"vector load logical byte element and zero","implemented",z13 vfaezh,"vector find any element equal","implemented",z13 -vftci,"vector fp test data class immediate","not implemented",z13 +vftci,"vector fp test data class immediate",implemented,z13 veclb,"vector element compare logical byte",implemented,z13 -vuplhw,"vector unpack low halfword","not implemented",z13 +vuplhw,"vector unpack low halfword",implemented,z13 veslvb,"vector element shift left reg byte",implemented,z13 vuplh,"vector unpack logical high","implemented",z13 -vlde,"vector fp load lengthened","not implemented",z13 +vlde,"vector fp load lengthened",implemented,z13 vmoh,"vector multiply odd halfword",implemented,z13 vfaehs,"vector find any element equal","implemented",z13 -vftcidb,"vector fp test data class immediate","not implemented",z13 +vftcidb,"vector fp test data class immediate",implemented,z13 vaq,"vector add quad word",implemented,z13 vlgvh,"vector load gr from vr halfword element","implemented",z13 vchlg,"vector compare high logical double word",implemented,z13 @@ -1034,47 +1034,47 @@ vsbiq,"vector subtract with borrow indication quadword",implemented,z13 vuphb,"vector unpack high byte","implemented",z13 vgfmb,"vector galois field multiply sum byte",implemented,z13 vrepih,"vector replicate immediate halfword","implemented",z13 -vcdlg,"vector fp convert from logical 64 bit","not implemented",z13 +vcdlg,"vector fp convert from logical 64 bit",implemented,z13 cxpt,"convert from packed to extended dfp","not implemented",z13 vceqb,"vector compare equal byte",implemented,z13 vstrczfs,"vector string range compare word",implemented,z13 -vpklshs,"vector pack logical saturate halfword","not implemented",z13 +vpklshs,"vector pack logical saturate halfword",implemented,z13 vlvgb,"vector load VR byte element from GR","implemented",z13 -lcbb,"load count to block boundary","not implemented",z13 +lcbb,"load count to block boundary",implemented,z13 vlcf,"vector load complement word",implemented,z13 vlvg,"vector load VR element from GR","implemented",z13 vmalef,"vector multiply and add logical even word",implemented,z13 vn,"vector and","implemented",z13 vmae,"vector multiply and add even",implemented,z13 vstrc,"vector string range compare",implemented,z13 -vfcedb,"vector fp compare equal","not implemented",z13 +vfcedb,"vector fp compare equal",implemented,z13 vgfm,"vector galois field multiply sum",implemented,z13 vlrepb,"vector load and replicate byte elements","implemented",z13 vgfmag,"vector galois field multiply sum and accumulate doubleword",implemented,z13 -vflndb,"vector fp perform sign operation","not implemented",z13 +vflndb,"vector fp perform sign operation",implemented,z13 vmaeb,"vector multiply and add even byte",implemented,z13 -vpkg,"vector pack double word","not implemented",z13 +vpkg,"vector pack double word",implemented,z13 vsb,"vector subtract byte",implemented,z13 vchl,"vector compare high logical",implemented,z13 vlvgh,"vector load VR halfword element from GR","implemented",z13 -locghi,"load halfword immediate on condition into 64 bit gpr","not implemented",z13 +locghi,"load halfword immediate on condition into 64 bit gpr",implemented,z13 vmalb,"vector multiply and add low byte",implemented,z13 -vchlgs,"vector compare high logical double word","not implemented",z13 +vchlgs,"vector compare high logical double word",implemented,z13 vstef,"vector store word element","implemented",z13 -lzrf,"load and zero rightmost byte 32->32","not implemented",z13 +lzrf,"load and zero rightmost byte 32->32",implemented,z13 vmrlh,"vector merge low halfword","implemented",z13 -vchbs,"vector compare high byte","not implemented",z13 +vchbs,"vector compare high byte",implemented,z13 vesrlf,"vector element shift right logical mem word",implemented,z13 vmxf,"vector maximum word",implemented,z13 vgmh,"vector generate mask halfword","implemented",z13 vfenezb,"vector find element not equal byte","implemented",z13 -vpklsgs,"vector pack logical saturate double word","not implemented",z13 +vpklsgs,"vector pack logical saturate double word",implemented,z13 vpksg,"vector pack saturate double word","implemented",z13 vfaeh,"vector find any element equal halfword","implemented",z13 vmlof,"vector multiply logical odd word",implemented,z13 vmahh,"vector multiply and add high halfword",implemented,z13 vx,"vector exclusive or","implemented",z13 -vchlfs,"vector compare high logical word","not implemented",z13 +vchlfs,"vector compare high logical word",implemented,z13 vacccq,"vector add with carry compute carry quadword",implemented,z13 vchb,"vector compare high byte",implemented,z13 vmaloh,"vector multiply and add logical odd halfword",implemented,z13 @@ -1090,22 +1090,22 @@ vmxb,"vector maximum byte",implemented,z13 vmnl,"vector minimum logical",implemented,z13 vmng,"vector minimum doubleword",implemented,z13 vchlb,"vector compare high logical byte",implemented,z13 -wfadb,"vector fp add","not implemented",z13 +wfadb,"vector fp add",implemented,z13 vmrl,"vector merge low","implemented",z13 -wfk,"vector fp compare and signal scalar","not implemented",z13 +wfk,"vector fp compare and signal scalar",implemented,z13 vno,"vector nor","implemented",z13 vstrcf,"vector string range compare word",implemented,z13 -vfmsdb,"vector fp multiply and subtract","not implemented",z13 +vfmsdb,"vector fp multiply and subtract",implemented,z13 vavgh,"vector average half word",implemented,z13 -vchlhs,"vector compare high logical half word","not implemented",z13 +vchlhs,"vector compare high logical half word",implemented,z13 vah,"vector add halfword",implemented,z13 vmalhh,"vector multiply and add logical high halfword",implemented,z13 -wldeb,"vector fp load lengthened","not implemented",z13 +wldeb,"vector fp load lengthened",implemented,z13 vmrh,"vector merge high","implemented",z13 -vclgdb,"vector fp convert to logical 64 bit","not implemented",z13 -wfsqdb,"vector fp square root","not implemented",z13 +vclgdb,"vector fp convert to logical 64 bit",implemented,z13 +wfsqdb,"vector fp square root",implemented,z13 vpopct,"vector population count",implemented,z13 -vfenef,"vector find element not equal word","not implemented",z13 +vfenef,"vector find element not equal word",implemented,z13 vgfmf,"vector galois field multiply sum word",implemented,z13 vgmf,"vector generate mask word","implemented",z13 vleg,"vector load double word element","implemented",z13 @@ -1113,8 +1113,8 @@ vmn,"vector minimum",implemented,z13 vrepi,"vector replicate immediate","implemented",z13 vsegb,"vector sign extend byte to double word","implemented",z13 cpxt,"convert from extended dfp to packed","not implemented",z13 -wftcidb,"vector fp test data class immediate","not implemented",z13 -wfchedbs,"vector fp compare high or equal","not implemented",z13 +wftcidb,"vector fp test data class immediate",implemented,z13 +wfchedbs,"vector fp compare high or equal",implemented,z13 vpks,"vector pack saturate","implemented",z13 veslg,"vector element shift left mem doubleword",implemented,z13 vupllb,"vector unpack logical low byte","implemented",z13 @@ -1122,50 +1122,50 @@ vscbig,"vector subtract compute borrow indication doubleword",implemented,z13 vsegh,"vector sign extend halfword to double word","implemented",z13 vsumb,"vector sum across word - byte elements",implemented,z13 vgeg,"vector gather element 8 byte elements","implemented",z13 -vcgd,"vector fp convert to fixed 64 bit","not implemented",z13 +vcgd,"vector fp convert to fixed 64 bit",implemented,z13 vuplhb,"vector unpack logical high byte","implemented",z13 verllv,"vector element rotate left logical reg",implemented,z13 vavgb,"vector average byte",implemented,z13 veclh,"vector element compare logical half word",implemented,z13 -vfmadb,"vector fp multiply and add","not implemented",z13 +vfmadb,"vector fp multiply and add",implemented,z13 vesravb,"vector element shift right arithmetic reg byte",implemented,z13 vmaleb,"vector multiply and add logical even byte",implemented,z13 -vuplf,"vector unpack low word","not implemented",z13 +vuplf,"vector unpack low word",implemented,z13 vsbi,"vector subtract with borrow indication",implemented,z13 vupll,"vector unpack logical low","implemented",z13 -vmrhh,"vector merge high halfword","not implemented",z13 +vmrhh,"vector merge high halfword",implemented,z13 vfenezbs,"vector find element not equal byte",implemented,z13 vmhb,"vector multiply high byte",implemented,z13 -vfmdb,"vector fp multiply","not implemented",z13 +vfmdb,"vector fp multiply",implemented,z13 vesrlg,"vector element shift right logical mem doubleword",implemented,z13 vmahb,"vector multiply and add high byte",implemented,z13 vstrczf,"vector string range compare word",implemented,z13 -wfcedb,"vector fp compare equal","not implemented",z13 +wfcedb,"vector fp compare equal",implemented,z13 vscbih,"vector subtract compute borrow indication halfword",implemented,z13 vlch,"vector load complement halfword",implemented,z13 vfenebs,"vector find element not equal byte",implemented,z13 vpklsh,"vector pack logical saturate halfword","implemented",z13 vlgv,"vector load gr from vr element","implemented",z13 -vchfs,"vector compare high word","not implemented",z13 +vchfs,"vector compare high word",implemented,z13 vctzb,"vector count trailing zeros byte",implemented,z13 vfaef,"vector find any element equal word",implemented,z13 vstrch,"vector string range compare halfword",implemented,z13 -wfidb,"vector load fp integer","not implemented",z13 -vmrhb,"vector merge high byte","not implemented",z13 +wfidb,"vector load fp integer",implemented,z13 +vmrhb,"vector merge high byte",implemented,z13 vuph,"vector unpack high","implemented",z13 vperm,"vector permute","implemented",z13 vrep,"vector replicate","implemented",z13 vmalhb,"vector multiply and add logical high byte",implemented,z13 vleib,"vector load byte element immediate","implemented",z13 vavg,"vector average",implemented,z13 -vfenefs,"vector find element not equal word","not implemented",z13 +vfenefs,"vector find element not equal word",implemented,z13 vsumh,"vector sum across word - halfword elements",implemented,z13 vchh,"vector compare high half word",implemented,z13 -wcdgb,"vector fp convert from fixed 64 bit","not implemented",z13 +wcdgb,"vector fp convert from fixed 64 bit",implemented,z13 verllvb,"vector element rotate left logical reg byte",implemented,z13 vec,"vector element compare",implemented,z13 vpdi,"vector permute double word immediate",implemented,z13 -vfchedb,"vector fp compare high or equal long","not implemented",z13 +vfchedb,"vector fp compare high or equal long",implemented,z13 vchlh,"vector compare high logical half word",implemented,z13 vmaleh,"vector multiply and add logical even halfword",implemented,z13 vstrcb,"vector string range compare byte",implemented,z13 @@ -1177,15 +1177,15 @@ vmo,"vector multiply odd",implemented,z13 vmxg,"vector maximum doubleword",implemented,z13 vsrab,"vector shift right arithmetic by byte",implemented,z13 vsbcbiq,"vector subtract with borrow compute borrow indication quadword",implemented,z13 -wfchdb,"vector fp compare high long","not implemented",z13 +wfchdb,"vector fp compare high long",implemented,z13 vmlhf,"vector multiply logical high word",implemented,z13 vesra,"vector element shift right arithmetic mem",implemented,z13 vmnh,"vector minimum halfword",implemented,z13 -vled,"vector fp load rounded","not implemented",z13 +vled,"vector fp load rounded",implemented,z13 vstrczbs,"vector string range compare byte",implemented,z13 vaccb,"vector add compute carry byte",implemented,z13 vmahf,"vector multiply and add high word",implemented,z13 -wfcedbs,"vector fp compare equal long","not implemented",z13 +wfcedbs,"vector fp compare equal long",implemented,z13 vmeh,"vector multiply even halfword",implemented,z13 vclzb,"vector count leading zeros byte",implemented,z13 vmh,"vector multiply high",implemented,z13 @@ -1193,14 +1193,14 @@ vllez,"vector load logical element and zero",implemented,z13 vnc,"vector and with complement",implemented,z13 vesrlvg,"vector element shift right logical reg doubleword",implemented,z13 vrepif,"vector replicate immediate word",implemented,z13 -vfd,"vector fp divide","not implemented",z13 +vfd,"vector fp divide",implemented,z13 vesrlb,"vector element shift right logical mem byte",implemented,z13 vavglg,"vector average logical double word",implemented,z13 vpksh,"vector pack saturate halfword",implemented,z13 veslv,"vector element shift left reg",implemented,z13 -vone,"vector set to ones","not implemented",z13 +vone,"vector set to ones",implemented,z13 vsrl,"vector shift right logical",implemented,z13 -vcdg,"vector fp convert from fixed 64 bit","not implemented",z13 +vcdg,"vector fp convert from fixed 64 bit",implemented,z13 vmlhw,"vector multiply low halfword",implemented,z13 vscbib,"vector subtract compute borrow indication byte",implemented,z13 vrepib,"vector replicate immediate byte",implemented,z13 @@ -1220,8 +1220,8 @@ verllvf,"vector element rotate left logical reg word",implemented,z13 vsra,"vector shift right arithmetic",implemented,z13 vslb,"vector shift left by byte",implemented,z13 vesravf,"vector element shift right arithmetic reg word",implemented,z13 -vfcedbs,"vector fp compare equal long","not implemented",z13 -vceqbs,"vector compare equal byte","not implemented",z13 +vfcedbs,"vector fp compare equal long",implemented,z13 +vceqbs,"vector compare equal byte",implemented,z13 vsbcbi,"vector subtract with borrow compute borrow indication",implemented,z13 vmle,"vector multiply logical even",implemented,z13 vfaezfs,"vector find any element equal",implemented,z13 @@ -1233,17 +1233,17 @@ vllezh,"vector load logical halfword element and zero",implemented,z13 vmalo,"vector multiply and add logical odd",implemented,z13 vclzh,"vector count leading zeros halfword",implemented,z13 vesravh,"vector element shift right arithmetic reg halfword",implemented,z13 -vceqfs,"vector compare equal word","not implemented",z13 +vceqfs,"vector compare equal word",implemented,z13 vlp,"vector load positive",implemented,z13 -wfmsdb,"vector fp multiply and subtract long","not implemented",z13 +wfmsdb,"vector fp multiply and subtract long",implemented,z13 vstrcbs,"vector string range compare byte",implemented,z13 vaccg,"vector add compute carry doubleword",implemented,z13 -wfsdb,"vector fp subtract long","not implemented",z13 +wfsdb,"vector fp subtract long",implemented,z13 vfee,"vector find element equal","implemented",z13 vmxh,"vector maximum halfword",implemented,z13 vtm,"vector test under mask",implemented,z13 vctzf,"vector count trailing zeros word",implemented,z13 -vfms,"vector fp multiply and subtract","not implemented",z13 +vfms,"vector fp multiply and subtract",implemented,z13 vavgg,"vector average double word",implemented,z13 vistr,"vector isolate string",implemented,z13 vesrlvb,"vector element shift right logical reg byte",implemented,z13 @@ -1252,11 +1252,11 @@ vmah,"vector multiply and add high",implemented,z13 vesrlvh,"vector element shift right logical reg halfword",implemented,z13 vesrah,"vector element shift right arithmetic mem halfword",implemented,z13 vrepig,"vector replicate immediate double word",implemented,z13 -wfddb,"vector fp divide long","not implemented",z13 +wfddb,"vector fp divide long",implemented,z13 vmhf,"vector multiply high word",implemented,z13 vupllf,"vector unpack logical low word",implemented,z13 veslf,"vector element shift left mem word",implemented,z13 -wflpdb,"vector fp perform sign operation long","not implemented",z13 +wflpdb,"vector fp perform sign operation long",implemented,z13 vscbi,"vector subtract compute borrow indication",implemented,z13 vmnlb,"vector minimum logical byte",implemented,z13 veslh,"vector element shift left mem halfword",implemented,z13 @@ -1264,7 +1264,7 @@ vfaebs,"vector find any element equal","implemented",z13 vleb,"vector load byte element",implemented,z13 vfaezb,"vector find any element equal","implemented",z13 vlbb,"vector load to block boundary",implemented,z13 -vflcdb,"vector fp perform sign operation long","not implemented",z13 +vflcdb,"vector fp perform sign operation long",implemented,z13 vmlo,"vector multiply logical odd",implemented,z13 vlgvf,"vector load gr from vr word element",implemented,z13 vavgf,"vector average word",implemented,z13 @@ -1274,37 +1274,37 @@ vsumgh,"vector sum across doubleword - halfword",implemented,z13 vmaeh,"vector multiply and add even halfword",implemented,z13 vmnlh,"vector minimum logical halfword",implemented,z13 vstl,"vector store with length",implemented,z13 -wfmadb,"vector fp multiply and add long","not implemented",z13 +wfmadb,"vector fp multiply and add long",implemented,z13 vme,"vector multiply even",implemented,z13 -wfmdb,"vector fp multiply long","not implemented",z13 -wflcdb,"vector fp perform sign operation long","not implemented",z13 +wfmdb,"vector fp multiply long",implemented,z13 +wflcdb,"vector fp perform sign operation long",implemented,z13 vreph,"vector replicate halfword",implemented,z13 -vclgd,"vector fp convert to logical 64 bit","not implemented",z13 +vclgd,"vector fp convert to logical 64 bit",implemented,z13 vpkls,"vector pack logical saturate",implemented,z13 vsf,"vector subtract word",implemented,z13 -vflpdb,"vector fp perform sign operation long","not implemented",z13 +vflpdb,"vector fp perform sign operation long",implemented,z13 vesrlv,"vector element shift right logical reg",implemented,z13 -vpklsfs,"vector pack logical saturate word","not implemented",z13 -vcdgb,"vector fp convert from fixed 64 bit","not implemented",z13 +vpklsfs,"vector pack logical saturate word",implemented,z13 +vcdgb,"vector fp convert from fixed 64 bit",implemented,z13 verll,"vector element rotate left logical mem",implemented,z13 vfeezf,"vector find element equal word","implemented",z13 -wclgdb,"vector fp convert to logical 64 bit","not implemented",z13 +wclgdb,"vector fp convert to logical 64 bit",implemented,z13 vgfma,"vector galois field multiply sum and accumulate",implemented,z13 vmob,"vector multiply odd byte",implemented,z13 vfeneb,"vector find element not equal byte","implemented",z13 vfene,"vector find element not equal","implemented",z13 vfenezfs,"vector find element not equal word","implemented",z13 vmal,"vector multiply and add low",implemented,z13 -vfchdb,"vector fp compare high long","not implemented",z13 +vfchdb,"vector fp compare high long",implemented,z13 vfeezb,"vector find element equal byte","implemented",z13 vfae,"vector find any element equal","implemented",z13 -vfchdbs,"vector fp compare high long","not implemented long",z13 +vfchdbs,"vector fp compare high long",implemented,z13 vsceg,"vector scatter element 8 byte",implemented,z13 vfeezfs,"vector find element equal word","implemented",z13 vsumgf,"vector sum across doubleword - word",implemented,z13 vmnb,"vector minimum byte",implemented,z13 vlef,"vector load word element",implemented,z13 -vceqgs,"vector compare equal double word","not implemented",z13 +vceqgs,"vector compare equal double word",implemented,z13 vech,"vector element compare half word",implemented,z13 vctz,"vector count trailing zeros",implemented,z13 vmloh,"vector multiply logical odd halfword",implemented,z13 @@ -1312,22 +1312,22 @@ vaccc,"vector add with carry compute carry",implemented,z13 vmale,"vector multiply and add logical even",implemented,z13 vsteh,"vector store halfword element",implemented,z13 vceq,"vector compare equal",implemented,z13 -vfchedbs,"vector fp compare high or equal long","not implemented",z13 +vfchedbs,"vector fp compare high or equal long",implemented,z13 vesl,"vector element shift left mem",implemented,z13 vesrav,"vector element shift right arithmetic reg",implemented,z13 -vfma,"vector fp multiply and add","not implemented",z13 +vfma,"vector fp multiply and add",implemented,z13 vmnlg,"vector minimum logical doubleword",implemented,z13 vclz,"vector count leading zeros",implemented,z13 vmrlf,"vector merge low word",implemented,z13 vistrh,"vector isolate string halfword",implemented,z13 vmxlb,"vector maximum logical byte",implemented,z13 -vfs,"vector fp subtract","not implemented",z13 -vfm,"vector fp multiply","not implemented",z13 +vfs,"vector fp subtract",implemented,z13 +vfm,"vector fp multiply",implemented,z13 vll,"vector load with length",implemented,z13 vleig,"vector load double word element immediate",implemented,z13 vfaezbs,"vector find any element equal","implemented",z13 veslvg,"vector element shift left reg doubleword",implemented,z13 -locfh,"load high on condition from memory","not implemented",z13 +locfh,"load high on condition from memory",implemented,z13 vfeeb,"vector find element equal byte","implemented",z13 vsumq,"vector sum across quadword",implemented,z13 vmleb,"vector multiply logical even byte",implemented,z13 @@ -1335,25 +1335,25 @@ vesrag,"vector element shift right arithmetic mem doubleword",implemented,z13 vceqh,"vector compare equal half word",implemented,z13 vmalf,"vector multiply and add low word",implemented,z13 vstrchs,"vector string range compare halfword",implemented,z13 -vcgdb,"vector fp convert to fixed 64 bit","not implemented",z13 +vcgdb,"vector fp convert to fixed 64 bit",implemented,z13 vsq,"vector subtract quadword",implemented,z13 -vnot,"vector not","not implemented",z13 -vfch,"vector fp compare high","not implemented",z13 -lochi,"load halfword immediate on condition into 32 bit gpr","not implemented",z13 +vnot,"vector not",implemented,z13 +vfch,"vector fp compare high",implemented,z13 +lochi,"load halfword immediate on condition into 32 bit gpr",implemented,z13 verllvh,"vector element rotate left logical reg halfword",implemented,z13 cpdt,"convert from long dfp to packed","not implemented",z13 vrepb,"vector replicate byte","implemented",z13 -ppno,"perform pseudorandom number operation","not implemented",z13 +ppno,"perform pseudorandom number operation",implemented,z13 vfeef,"vector find element equal word","implemented",z13 vac,"vector add with carry",implemented,z13 verimf,"vector element rotate and insert under mask word",implemented,z13 -vfi,"vector load fp integer","not implemented",z13 +vfi,"vector load fp integer",implemented,z13 vistrfs,"vector isolate string word",implemented,z13 vecf,"vector element compare word",implemented,z13 vfeezbs,"vector find element equal byte","implemented",z13 -wflndb,"vector fp perform sign operation long","not implemented",z13 +wflndb,"vector fp perform sign operation long",implemented,z13 vscbif,"vector subtract compute borrow indication word",implemented,z13 -vchhs,"vector compare high half word","not implemented",z13 +vchhs,"vector compare high half word",implemented,z13 vmlb,"vector multiply low byte",implemented,z13 veslvf,"vector element shift left reg word",implemented,z13 vfaefs,"vector find any element equal","implemented",z13 @@ -1370,22 +1370,22 @@ vsh,"vector subtract halfword",implemented,z13 vuplb,"vector unpack low byte",implemented,z13 vsegf,"vector sign extend word to double word",implemented,z13 vmxlf,"vector maximum logical word",implemented,z13 -wcdlgb,"vector fp convert from logical 64 bit","not implemented",z13 +wcdlgb,"vector fp convert from logical 64 bit",implemented,z13 vstrczb,"vector string range compare byte",implemented,z13 vsldb,"vector shift left double by byte",implemented,z13 vesrlh,"vector element shift right logical mem halfword",implemented,z13 cdpt,"convert from packed to long dfp","not implemented",z13 vlcb,"vector load complement byte",implemented,z13 -wfpsodb,"vector fp perform sign operation long","not implemented",z13 +wfpsodb,"vector fp perform sign operation long",implemented,z13 vsum,"vector sum across word",implemented,z13 vfeehs,"vector find element equal halfword",implemented,z13 vml,"vector multiply low",implemented,z13 vuphh,"vector unpack high halfword",implemented,z13 vavglb,"vector average logical byte",implemented,z13 vmlf,"vector multiply low word",implemented,z13 -wledb,"vector fp load rounded long to short","not implemented",z13 +wledb,"vector fp load rounded long to short",implemented,z13 vstrcfs,"vector string range compare word",implemented,z13 -wcgdb,"vector fp convert to fixed 64 bit","not implemented",z13 +wcgdb,"vector fp convert to fixed 64 bit",implemented,z13 vlph,"vector load positive halfword",implemented,z13 vfenezf,"vector find element not equal word",implemented,z13 vseg,"vector sign extend to double word",implemented,z13 @@ -1402,32 +1402,32 @@ vmnlf,"vector minimum logical word",implemented,z13 vlm,"vector load multiple","implemented",z13 vmrlb,"vector merge low byte","implemented",z13 vavglh,"vector average logical half word",implemented,z13 -wfkdb,"vector fp compare and signal scalar","not implemented",z13 +wfkdb,"vector fp compare and signal scalar",implemented,z13 veslb,"vector element shift left mem byte",implemented,z13 -wfchedb,"vector fp compare high or equal","not implemented",z13 +wfchedb,"vector fp compare high or equal",implemented,z13 vllezg,"vector load logical double word element and zero","implemented",z13 vmaob,"vector multiply and add odd byte",implemented,z13 -vmrhf,"vector merge high word","not implemented",z13 +vmrhf,"vector merge high word",implemented,z13 vchg,"vector compare high double word",implemented,z13 -locfhr,"load high on condition from gpr","not implemented",z13 +locfhr,"load high on condition from gpr",implemented,z13 vlpg,"vector load positive doubleword",implemented,z13 -vcdlgb,"vector fp convert from logical 64 bit","not implemented",z13 +vcdlgb,"vector fp convert from logical 64 bit",implemented,z13 vstrczhs,"vector string range compare halfword",implemented,z13 vecb,"vector element compare byte",implemented,z13 vmxlg,"vector maximum logical doubleword",implemented,z13 -vfpso,"vector fp perform sign operation","not implemented",z13 +vfpso,"vector fp perform sign operation",implemented,z13 verim,"vector element rotate and insert under mask",implemented,z13 vsumqf,"vector sum across quadword - word elements",implemented,z13 vfeefs,"vector find element equal word","implemented",z13 -vfche,"vector fp compare high or equal","not implemented",z13 +vfche,"vector fp compare high or equal",implemented,z13 vistrhs,"vector isolate string halfword",implemented,z13 vsl,"vector shift left",implemented,z13 vfenezhs,"vector find element not equal halfword",implemented,z13 vsg,"vector subtract doubleword",implemented,z13 vclzf,"vector count leading zeros word",implemented,z13 -wfcdb,"vector fp compare scalar long","not implemented",z13 +wfcdb,"vector fp compare scalar long",implemented,z13 vmaoh,"vector multiply and add odd halfword",implemented,z13 -vchgs,"vector compare high double word","not implemented",z13 +vchgs,"vector compare high double word",implemented,z13 vchlf,"vector compare high logical word",implemented,z13 va,"vector add",implemented,z13 vmrlg,"vector merge low double word",implemented,z13 @@ -1435,36 +1435,36 @@ vlcg,"vector load complement doubleword",implemented,z13 vceqf,"vector compare equal word",implemented,z13 vacq,"vector add with carry quadword",implemented,z13 vmaof,"vector multiply and add odd word",implemented,z13 -vfadb,"vector fp add long","not implemented",z13 +vfadb,"vector fp add long",implemented,z13 vmlef,"vector multiply logical even word",implemented,z13 -wfc,"vector fp compare scalar","not implemented",z13 +wfc,"vector fp compare scalar",implemented,z13 vmx,"vector maximum",implemented,z13 vmlh,"vector multiply logical high",implemented,z13 vmeb,"vector multiply even byte",implemented,z13 -vfddb,"vector fp divide long","not implemented",z13 -vpkshs,"vector pack saturate halfword","not implemented",z13 -vpkf,"vector pack word","not implemented",z13 +vfddb,"vector fp divide long",implemented,z13 +vpkshs,"vector pack saturate halfword",implemented,z13 +vpkf,"vector pack word",implemented,z13 vlrepg,"vector load and replicate double word elements",implemented,z13 vmaef,"vector multiply and add even word",implemented,z13 vfeneh,"vector find element not equal halfword","implemented",z13 vgfmaf,"vector galois field multiply sum and accumulate word",implemented,z13 vctzg,"vector count trailing zeros doubleword",implemented,z13 -lzrg,"load and zero rightmost byte 64->64","not implemented",z13 +lzrg,"load and zero rightmost byte 64->64",implemented,z13 vmof,"vector multiply odd word",implemented,z13 -vfsqdb,"vector fp square root long","not implemented",z13 +vfsqdb,"vector fp square root long",implemented,z13 vlgvg,"vector load gr from vr double word element",implemented,z13 verllf,"vector element rotate left logical mem word",implemented,z13 verllg,"vector element rotate left logical mem doubleword",implemented,z13 vrepf,"vector replicate word",implemented,z13 vfeezhs,"vector find element equal halfword","implemented",z13 -wfchdbs,"vector fp compare high long","not implemented",z13 -lochhi,"load halfword high immediate on condition","not implemented",z13 -vmalhw,"vector multiply and add low halfword","not implemented",z13 +wfchdbs,"vector fp compare high long",implemented,z13 +lochhi,"load halfword high immediate on condition",implemented,z13 +vmalhw,"vector multiply and add low halfword",implemented,z13 vmlhb,"vector multiply logical high byte",implemented,z13 vfeeh,"vector find element equal halfword",implemented,z13 vgm,"vector generate mask",implemented,z13 vgfmab,"vector galois field multiply sum and accumulate byte",implemented,z13 -vmrhg,"vector merge high double word","not implemented",z13 +vmrhg,"vector merge high double word",implemented,z13 veclg,"vector element compare logical double word",implemented,z13 vl,"vector memory load",implemented,z13 vctzh,"vector count trailing zeros halfword",implemented,z13 @@ -1477,23 +1477,23 @@ vch,"vector compare high",implemented,z13 veclf,"vector element compare logical word",implemented,z13 vgef,"vector gather element 4 byte elements",implemented,z13 vscbiq,"vector subtract compute borrow indication quadword",implemented,z13 -cdgtr,"convert from fixed long dfp","not implemented",z13 +cdgtr,"convert from fixed long dfp",implemented,z13 vesrab,"vector element shift right arithmetic mem byte",implemented,z13 -vfsq,"vector fp square root","not implemented",z13 +vfsq,"vector fp square root",implemented,z13 vscef,"vector scatter element 4 byte",implemented,z13 -vpkh,"vector pack halfword","not implemented",z13 -vfa,"vector fp add","not implemented",z13 +vpkh,"vector pack halfword",implemented,z13 +vfa,"vector fp add",implemented,z13 vo,"vector or",implemented,z13 verllb,"vector element rotate left logical mem byte",implemented,z13 -stocfh,"store high on condition","not implemented",z13 -vchlbs,"vector compare high logical byte","not implemented",z13 +stocfh,"store high on condition",implemented,z13 +vchlbs,"vector compare high logical byte",implemented,z13 vuphf,"vector unpack high word",implemented,z13 vacc,"vector add compute carry",implemented,z13 vistrf,"vector isolate string word",implemented,z13 -vceqhs,"vector compare equal half word","not implemented",z13 -vfidb,"vector load fp integer long","not implemented",z13 +vceqhs,"vector compare equal half word",implemented,z13 +vfidb,"vector load fp integer long",implemented,z13 vupllh,"vector unpack logical low halfword",implemented,z13 -vfce,"vector fp compare equal","not implemented",z13 +vfce,"vector fp compare equal",implemented,z13 vs,"vector subtract",implemented,z13 vfeebs,"vector find element equal byte",implemented,z13 vlvgg,"vector load VR double word element from GR",implemented,z13 diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am index 77c00ba..097c85a 100644 --- a/none/tests/s390x/Makefile.am +++ b/none/tests/s390x/Makefile.am @@ -18,7 +18,8 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ spechelper-cr spechelper-clr \ spechelper-ltr spechelper-or \ spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ - spechelper-tm laa vector lsc2 ppno vector_string vector_integer + spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ + vector_float if BUILD_DFP_TESTS INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo @@ -71,4 +72,4 @@ vector_CFLAGS = $(AM_CFLAGS) -march=z13 lsc2_CFLAGS = -march=z13 -DS390_TESTS_NOCOLOR vector_string_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=5 vector_integer_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 - +vector_float_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 diff --git a/none/tests/s390x/vector.h b/none/tests/s390x/vector.h index adefbcd..de23914 100644 --- a/none/tests/s390x/vector.h +++ b/none/tests/s390x/vector.h @@ -12,17 +12,21 @@ #endif /* Test the instruction exactly one time. */ -#define test_once(insn) test_##insn() +#define test_once(insn) test_##insn () /* Test the instruction exactly S390_TEST_COUNT times. "..." arguments specifies code which must be executed after each tests */ #define test(insn, ...) \ for(iteration = 0; iteration < S390_TEST_COUNT; iteration++) \ - { test_##insn(); \ + { test_once(insn); \ __VA_ARGS__; \ } +#define test_with_selective_printing(insn, info) \ + for(iteration = 0; iteration < S390_TEST_COUNT; iteration++) \ + { test_ ## insn ## _selective(info); } + #ifdef __GNUC__ /* GCC complains about __int128 with -pedantic */ /* Hope that we will have int128_t in C standard someday. */ @@ -38,18 +42,67 @@ typedef union { uint32_t u32[4]; int32_t s32[4]; + float f32[4]; uint64_t u64[2]; int64_t s64[2]; + double f64[2]; unsigned __int128 u128[1]; __int128 s128[1]; } V128; +typedef enum { + V128_NO_PRINTING = 0, + V128_V_RES_AS_INT = 1 << 0, + V128_V_ARG1_AS_INT = 1 << 1, + V128_V_ARG2_AS_INT = 1 << 2, + V128_V_ARG3_AS_INT = 1 << 3, + V128_V_RES_AS_FLOAT64 = 1 << 4, + V128_V_ARG1_AS_FLOAT64 = 1 << 5, + V128_V_ARG2_AS_FLOAT64 = 1 << 6, + V128_V_ARG3_AS_FLOAT64 = 1 << 7, + V128_V_RES_AS_FLOAT32 = 1 << 8, + V128_V_ARG1_AS_FLOAT32 = 1 << 9, + V128_V_ARG2_AS_FLOAT32 = 1 << 10, + V128_V_ARG3_AS_FLOAT32 = 1 << 11, + V128_R_RES = 1 << 12, + V128_R_ARG1 = 1 << 13, + V128_R_ARG2 = 1 << 14, + V128_R_ARG3 = 1 << 15, + V128_V_RES_EVEN_ONLY = 1 << 16, + V128_V_RES_ZERO_ONLY = 1 << 17, + V128_PRINT_ALL = (V128_V_RES_AS_INT | + V128_V_ARG1_AS_INT | + V128_V_ARG2_AS_INT | + V128_V_ARG3_AS_INT | + V128_R_RES | + V128_R_ARG1 | + V128_R_ARG2 | + V128_R_ARG3), +} s390x_test_usageInfo; + void print_hex(const V128 value) { printf("%016lx | %016lx\n", value.u64[0], value.u64[1]); } +void print_f32(const V128 value, int even_only, int zero_only) { + if (zero_only) + printf("%a | -- | -- | --\n", value.f32[0]); + else if (even_only) + printf("%a | -- | %a | --\n", value.f32[0], value.f32[2]); + else + printf("%a | %a | %a | %a\n", + value.f32[0], value.f32[1], value.f32[2], value.f32[3]); +} + +void print_f64(const V128 value, int zero_only) { + if (zero_only) + printf("%a | --\n", value.f64[0]); + else + printf("%a | %a\n", value.f64[0], value.f64[1]); +} + void print_uint64_t(const uint64_t value) { printf("%016lx\n", value); } @@ -118,7 +171,7 @@ void randomize_memory_pool() */ #define s390_test_generate(insn, asm_string) \ -static void test_##insn() \ +static void test_##insn##_selective(const s390x_test_usageInfo info) \ { \ V128 v_result = { .u64 = {0ULL, 0ULL} }; \ V128 v_arg1; \ @@ -138,6 +191,7 @@ static void test_##insn() \ "vl %%v2, %[v_arg2]\n" \ "vl %%v3, %[v_arg3]\n" \ "vone %%v5\n" \ + "srnmb 1(0)\n " \ asm_string "\n"\ "vst %%v5, %[v_result]\n" \ "vst %%v1, %[v_arg1]\n" \ @@ -162,14 +216,49 @@ static void test_##insn() \ "v1", "v2", "v3", "v5"); \ \ printf("insn %s:\n", #insn); \ - printf(" v_arg1 = "); print_hex(v_arg1); \ - printf(" v_arg2 = "); print_hex(v_arg2); \ - printf(" v_arg3 = "); print_hex(v_arg3); \ - printf(" v_result = "); print_hex(v_result); \ - printf(" r_arg1 = "); print_uint64_t(r_arg1); \ - printf(" r_arg2 = "); print_uint64_t(r_arg2); \ - printf(" r_arg3 = "); print_uint64_t(r_arg3); \ - printf(" r_result = "); print_uint64_t(r_result); \ + if (info & V128_V_ARG1_AS_INT) \ + {printf(" v_arg1 = "); print_hex(v_arg1);} \ + if (info & V128_V_ARG2_AS_INT) \ + {printf(" v_arg2 = "); print_hex(v_arg2);} \ + if (info & V128_V_ARG3_AS_INT) \ + {printf(" v_arg3 = "); print_hex(v_arg3);} \ + if (info & V128_V_RES_AS_INT) \ + {printf(" v_result = "); print_hex(v_result);} \ + \ + if (info & V128_V_ARG1_AS_FLOAT64) \ + {printf(" v_arg1 = "); print_f64(v_arg1, 0);} \ + if (info & V128_V_ARG2_AS_FLOAT64) \ + {printf(" v_arg2 = "); print_f64(v_arg2, 0);} \ + if (info & V128_V_ARG3_AS_FLOAT64) \ + {printf(" v_arg3 = "); print_f64(v_arg3, 0);} \ + if (info & V128_V_RES_AS_FLOAT64) { \ + printf(" v_result = "); \ + print_f64(v_result, info & V128_V_RES_ZERO_ONLY); \ + } \ + \ + if (info & V128_V_ARG1_AS_FLOAT32) \ + {printf(" v_arg1 = "); print_f32(v_arg1, 0, 0);} \ + if (info & V128_V_ARG2_AS_FLOAT32) \ + {printf(" v_arg2 = "); print_f32(v_arg2, 0, 0);} \ + if (info & V128_V_ARG3_AS_FLOAT32) \ + {printf(" v_arg3 = "); print_f32(v_arg3, 0, 0);} \ + if (info & V128_V_RES_AS_FLOAT32) { \ + printf(" v_result = "); \ + print_f32(v_result, info & V128_V_RES_EVEN_ONLY, \ + info & V128_V_RES_ZERO_ONLY); \ + } \ + if (info & V128_R_ARG1) \ + {printf(" r_arg1 = "); print_uint64_t(r_arg1);} \ + if (info & V128_R_ARG2) \ + {printf(" r_arg2 = "); print_uint64_t(r_arg2);} \ + if (info & V128_R_ARG3) \ + {printf(" r_arg3 = "); print_uint64_t(r_arg3);} \ + if (info & V128_R_RES) \ + {printf(" r_result = "); print_uint64_t(r_result);} \ +} \ +__attribute__((unused)) static void test_##insn() \ +{ \ + test_##insn##_selective (V128_PRINT_ALL); \ } /* Stores CC to %[r_result]. diff --git a/none/tests/s390x/vector_float.c b/none/tests/s390x/vector_float.c new file mode 100644 index 0000000..52f3a29 --- /dev/null +++ b/none/tests/s390x/vector_float.c @@ -0,0 +1,275 @@ +#include "vector.h" + +#define s390_generate_float_test(insn, asm_string) \ + s390_test_generate(v##insn##00, "v" #insn " " asm_string ",0, 0") \ + s390_test_generate(v##insn##01, "v" #insn " " asm_string ",0, 1") \ + s390_test_generate(v##insn##03, "v" #insn " " asm_string ",0, 3") \ + s390_test_generate(v##insn##04, "v" #insn " " asm_string ",0, 4") \ + s390_test_generate(v##insn##05, "v" #insn " " asm_string ",0, 5") \ + s390_test_generate(v##insn##06, "v" #insn " " asm_string ",0, 6") \ + s390_test_generate(v##insn##07, "v" #insn " " asm_string ",0, 7") \ + s390_test_generate(w##insn##00, "w" #insn " " asm_string ",0, 0") \ + s390_test_generate(w##insn##01, "w" #insn " " asm_string ",0, 1") \ + s390_test_generate(w##insn##03, "w" #insn " " asm_string ",0, 3") \ + s390_test_generate(w##insn##04, "w" #insn " " asm_string ",0, 4") \ + s390_test_generate(w##insn##05, "w" #insn " " asm_string ",0, 5") \ + s390_test_generate(w##insn##06, "w" #insn " " asm_string ",0, 6") \ + s390_test_generate(w##insn##07, "w" #insn " " asm_string ",0, 7") \ + +#define s390_call_float_test(insn, info) \ + test_with_selective_printing(v ##insn ## 00, info); \ + test_with_selective_printing(v ##insn ## 01, info); \ + test_with_selective_printing(v ##insn ## 03, info); \ + test_with_selective_printing(v ##insn ## 04, info); \ + test_with_selective_printing(v ##insn ## 05, info); \ + test_with_selective_printing(v ##insn ## 06, info); \ + test_with_selective_printing(v ##insn ## 07, info); \ + test_with_selective_printing(w ##insn ## 00, info | V128_V_RES_ZERO_ONLY); \ + test_with_selective_printing(w ##insn ## 01, info | V128_V_RES_ZERO_ONLY); \ + test_with_selective_printing(w ##insn ## 03, info | V128_V_RES_ZERO_ONLY); \ + test_with_selective_printing(w ##insn ## 04, info | V128_V_RES_ZERO_ONLY); \ + test_with_selective_printing(w ##insn ## 05, info | V128_V_RES_ZERO_ONLY); \ + test_with_selective_printing(w ##insn ## 06, info | V128_V_RES_ZERO_ONLY); \ + test_with_selective_printing(w ##insn ## 07, info | V128_V_RES_ZERO_ONLY); \ + +s390_generate_float_test(cdgb, " %%v5, %%v1") +s390_generate_float_test(cdlgb, " %%v5, %%v1") +s390_generate_float_test(cgdb, " %%v5, %%v1") +s390_generate_float_test(clgdb, " %%v5, %%v1") +s390_generate_float_test(fidb, " %%v5, %%v1") +s390_generate_float_test(ledb, " %%v5, %%v1") + +s390_test_generate(vldeb, "vldeb %%v5, %%v1") +s390_test_generate(wldeb, "wldeb %%v5, %%v1") + +s390_test_generate(vflcdb, "vflcdb %%v5, %%v1") +s390_test_generate(wflcdb, "wflcdb %%v5, %%v1") +s390_test_generate(vflndb, "vflndb %%v5, %%v1") +s390_test_generate(wflndb, "wflndb %%v5, %%v1") +s390_test_generate(vflpdb, "vflpdb %%v5, %%v1") +s390_test_generate(wflpdb, "wflpdb %%v5, %%v1") + +s390_test_generate(vfadb, "vfadb %%v5, %%v1, %%v2") +s390_test_generate(wfadb, "wfadb %%v5, %%v1, %%v2") +s390_test_generate(vfsdb, "vfsdb %%v5, %%v1, %%v2") +s390_test_generate(wfsdb, "wfsdb %%v5, %%v1, %%v2") +s390_test_generate(vfmdb, "vfmdb %%v5, %%v1, %%v2") +s390_test_generate(wfmdb, "wfmdb %%v5, %%v1, %%v2") +s390_test_generate(vfddb, "vfddb %%v5, %%v1, %%v2") +s390_test_generate(wfddb, "wfddb %%v5, %%v1, %%v2") + +s390_test_generate(vfsqdb, "vfsqdb %%v5, %%v1") +s390_test_generate(wfsqdb, "wfsqdb %%v5, %%v1") + +s390_test_generate(vfmadb, "vfmadb %%v5, %%v1, %%v2, %%v3") +s390_test_generate(wfmadb, "wfmadb %%v5, %%v1, %%v2, %%v3") +s390_test_generate(vfmsdb, "vfmsdb %%v5, %%v1, %%v2, %%v3") +s390_test_generate(wfmsdb, "wfmsdb %%v5, %%v1, %%v2, %%v3") + +s390_test_generate(wfcdb, "wfcdb %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(wfkdb, "wfkdb %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) + +s390_test_generate(vfcedb, "vfcedb %%v5, %%v1, %%v2") +s390_test_generate(wfcedb, "wfcedb %%v5, %%v1, %%v2") +s390_test_generate(vfcedbs, "vfcedbs %%v5, %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(wfcedbs, "wfcedbs %%v5, %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) + +s390_test_generate(vfchdb, "vfchdb %%v5, %%v1, %%v2") +s390_test_generate(wfchdb, "wfchdb %%v5, %%v1, %%v2") +s390_test_generate(vfchdbs, "vfchdbs %%v5, %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(wfchdbs, "wfchdbs %%v5, %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) + +s390_test_generate(vfchedb, "vfchedb %%v5, %%v1, %%v2") +s390_test_generate(wfchedb, "wfchedb %%v5, %%v1, %%v2") +s390_test_generate(vfchedbs, "vfchedbs %%v5, %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(wfchedbs, "wfchedbs %%v5, %%v1, %%v2\n" S390_TEST_PUT_CC_TO_RESULT) + +s390_test_generate(vftcidb0, "vftcidb %%v5, %%v1, 0 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb1, "vftcidb %%v5, %%v1, 1 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb2, "vftcidb %%v5, %%v1, 2 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb3, "vftcidb %%v5, %%v1, 0 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb4, "vftcidb %%v5, %%v1, 4 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb8, "vftcidb %%v5, %%v1, 8 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb16, "vftcidb %%v5, %%v1, 16 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb32, "vftcidb %%v5, %%v1, 32 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb64, "vftcidb %%v5, %%v1, 64 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb128, "vftcidb %%v5, %%v1, 128 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb256, "vftcidb %%v5, %%v1, 256 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb512, "vftcidb %%v5, %%v1, 512 \n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb1024, "vftcidb %%v5, %%v1, 1024\n" S390_TEST_PUT_CC_TO_RESULT) +s390_test_generate(vftcidb2048, "vftcidb %%v5, %%v1, 2048\n" S390_TEST_PUT_CC_TO_RESULT) + +int main() +{ + size_t iteration = 0; + + s390_call_float_test(cdgb, (V128_V_RES_AS_FLOAT64 | V128_V_ARG1_AS_INT)); + s390_call_float_test(cdlgb, (V128_V_RES_AS_FLOAT64 | V128_V_ARG1_AS_INT)); + s390_call_float_test(cgdb, (V128_V_RES_AS_INT | V128_V_ARG1_AS_FLOAT64)); + s390_call_float_test(clgdb, (V128_V_RES_AS_INT | V128_V_ARG1_AS_FLOAT64)); + s390_call_float_test(fidb, (V128_V_RES_AS_FLOAT64 | V128_V_ARG1_AS_FLOAT64)); + s390_call_float_test(ledb, (V128_V_RES_AS_FLOAT32 | V128_V_RES_EVEN_ONLY | + V128_V_ARG1_AS_FLOAT64)); + + test_with_selective_printing(vldeb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wldeb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + + test_with_selective_printing(vflcdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wflcdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(vflndb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wflndb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(vflpdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wflpdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + + test_with_selective_printing(vfadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(vfsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(vfmdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfmdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(vfddb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfddb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + + test_with_selective_printing(vfsqdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wfsqdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + + test_with_selective_printing(vfmadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_V_ARG3_AS_FLOAT64)); + test_with_selective_printing(wfmadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_V_ARG3_AS_FLOAT64)); + test_with_selective_printing(vfmsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_V_ARG3_AS_FLOAT64)); + test_with_selective_printing(wfmsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_V_ARG3_AS_FLOAT64)); + + test_with_selective_printing(wfcdb, (V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(wfkdb, (V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + + test_with_selective_printing(vfcedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfcedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(vfcedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(wfcedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + + test_with_selective_printing(vfchdb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfchdb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(vfchdbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(wfchdbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + + test_with_selective_printing(vfchedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfchedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(vfchedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(wfchedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | + V128_R_RES)); + + test_with_selective_printing(vftcidb0, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb1, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb2, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb3, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb4, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb8, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb16, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb32, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb64, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb128, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb256, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb512, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb1024, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + test_with_selective_printing(vftcidb2048, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_R_RES)); + + return 0; +} diff --git a/none/tests/s390x/vector_float.stderr.exp b/none/tests/s390x/vector_float.stderr.exp new file mode 100644 index 0000000..139597f --- /dev/null +++ b/none/tests/s390x/vector_float.stderr.exp @@ -0,0 +1,2 @@ + + diff --git a/none/tests/s390x/vector_float.stdout.exp b/none/tests/s390x/vector_float.stdout.exp new file mode 100644 index 0000000..eac5250 --- /dev/null +++ b/none/tests/s390x/vector_float.stdout.exp @@ -0,0 +1,1808 @@ +insn vcdgb00: + v_arg1 = 0d6a95fac528657d | 501eefeec0d8b847 + v_result = 0x1.ad52bf58a50cap+59 | 0x1.407bbfbb0362ep+62 +insn vcdgb00: + v_arg1 = e540bc6839c44b4a | 36ed3550df9899d8 + v_result = -0x1.abf4397c63bb4p+60 | 0x1.b769aa86fcc4cp+61 +insn vcdgb00: + v_arg1 = 979569ee6d5cbcd8 | 966cf73d98a42d54 + v_result = -0x1.a1... [truncated message content] |
|
From: Andreas A. <ar...@so...> - 2018-11-30 14:13:04
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=600a0099a1eb2335a3f9563534c112e11817002b commit 600a0099a1eb2335a3f9563534c112e11817002b Author: Vadim Barkov <vb...@gm...> Date: Fri Oct 5 13:51:49 2018 +0300 Bug 385411 s390x: Add z13 vector floating point support This adds support for the z/Architecture vector FP instructions that were introduced with z13. The patch was contributed by Vadim Barkov, with some clean-up and minor adjustments by Andreas Arnez. Diff: --- NEWS | 1 + VEX/priv/guest_s390_defs.h | 10 +- VEX/priv/guest_s390_helpers.c | 47 +++ VEX/priv/guest_s390_toIR.c | 875 ++++++++++++++++++++++++++++++++++++++++-- VEX/priv/host_s390_defs.c | 240 +++++++++++- VEX/priv/host_s390_defs.h | 16 +- VEX/priv/host_s390_isel.c | 82 +++- 7 files changed, 1231 insertions(+), 40 deletions(-) diff --git a/NEWS b/NEWS index bfa7162..ffaabd7 100644 --- a/NEWS +++ b/NEWS @@ -58,6 +58,7 @@ where XXXXXX is the bug number as listed below. 397187 z13 vector register support for vgdb gdbserver 401277 More bugs in z13 support 401112 LLVM 5.0 generates comparison against partially initialized data +385411 s390x: z13 vector floating-point instructions not implemented Release 3.14.0 (9 October 2018) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index 3bfecbe..d72cc9f 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -281,7 +281,11 @@ enum { S390_VEC_OP_VMALH = 13, S390_VEC_OP_VCH = 14, S390_VEC_OP_VCHL = 15, - S390_VEC_OP_LAST = 16 // supposed to be the last element in enum + S390_VEC_OP_VFCE = 16, + S390_VEC_OP_VFCH = 17, + S390_VEC_OP_VFCHE = 18, + S390_VEC_OP_VFTCI = 19, + S390_VEC_OP_LAST = 20 // supposed to be the last element in enum } s390x_vec_op_t; /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one @@ -300,8 +304,10 @@ typedef union { unsigned int m4 : 4; // field m4 of insn or zero if it's missing unsigned int m5 : 4; // field m5 of insn or zero if it's missing + unsigned int m6 : 4; // field m6 of insn or zero if it's missing + unsigned int i3 : 12; // field i3 of insn or zero if it's missing unsigned int read_only: 1; // don't write result to Guest State - unsigned int reserved : 27; // reserved for future + unsigned int reserved : 11; // reserved for future }; ULong serialized; } s390x_vec_op_details_t; diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index d9773e7..5877743 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -2498,6 +2498,10 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, {0xe7, 0xa9}, /* VMALH */ {0xe7, 0xfb}, /* VCH */ {0xe7, 0xf9}, /* VCHL */ + {0xe7, 0xe8}, /* VFCE */ + {0xe7, 0xeb}, /* VFCH */ + {0xe7, 0xea}, /* VFCHE */ + {0xe7, 0x4a} /* VFTCI */ }; union { @@ -2525,6 +2529,28 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, unsigned int rxb : 4; unsigned int op2 : 8; } VRRd; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt v3 : 4; + UInt : 4; + UInt m6 : 4; + UInt m5 : 4; + UInt m4 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRRc; + struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt i3 : 12; + UInt m5 : 4; + UInt m4 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRIe; UChar bytes[6]; } the_insn; @@ -2578,6 +2604,27 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, the_insn.VRRd.m6 = d->m5; break; + case S390_VEC_OP_VFCE: + case S390_VEC_OP_VFCH: + case S390_VEC_OP_VFCHE: + the_insn.VRRc.v1 = 1; + the_insn.VRRc.v2 = 2; + the_insn.VRRc.v3 = 3; + the_insn.VRRc.rxb = 0b1110; + the_insn.VRRc.m4 = d->m4; + the_insn.VRRc.m5 = d->m5; + the_insn.VRRc.m6 = d->m6; + break; + + case S390_VEC_OP_VFTCI: + the_insn.VRIe.v1 = 1; + the_insn.VRIe.v2 = 2; + the_insn.VRIe.rxb = 0b1100; + the_insn.VRIe.i3 = d->i3; + the_insn.VRIe.m4 = d->m4; + the_insn.VRIe.m5 = d->m5; + break; + default: vex_printf("operation = %d\n", d->op); vpanic("s390x_dirtyhelper_vec_op: unknown operation"); diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 50a5a41..1c4ac39 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -86,6 +86,7 @@ typedef enum { S390_DECODE_UNKNOWN_INSN, S390_DECODE_UNIMPLEMENTED_INSN, S390_DECODE_UNKNOWN_SPECIAL_INSN, + S390_DECODE_SPECIFICATION_EXCEPTION, S390_DECODE_ERROR } s390_decode_t; @@ -421,6 +422,26 @@ yield_if(IRExpr *condition) S390X_GUEST_OFFSET(guest_IA))); } +/* Convenience macro to yield a specification exception if the given condition + is not met. Used to pass this type of decoding error up through the call + chain. */ +#define s390_insn_assert(mnm, cond) \ + do { \ + if (!(cond)) { \ + dis_res->whatNext = Dis_StopHere; \ + dis_res->jk_StopHere = Ijk_NoDecode; \ + return (mnm); \ + } \ + } while (0) + +/* Convenience function to check for a specification exception. */ +static Bool +is_specification_exception(void) +{ + return (dis_res->whatNext == Dis_StopHere && + dis_res->jk_StopHere == Ijk_NoDecode); +} + static __inline__ IRExpr *get_fpr_dw0(UInt); static __inline__ void put_fpr_dw0(UInt, IRExpr *); static __inline__ IRExpr *get_dpr_dw0(UInt); @@ -1770,6 +1791,11 @@ s390_vr_get_type(const UChar m) /* Determine if Zero Search (ZS) flag is set in m field */ #define s390_vr_is_zs_set(m) (((m) & 0b0010) != 0) +/* Check if the "Single-Element-Control" bit is set. + Used in vector FP instructions. + */ +#define s390_vr_is_single_element_control_set(m) (((m) & 0x8) != 0) + /* Generates arg1 < arg2 (or arg1 <= arg2 if allow_equal == True) expression. Arguments must have V128 type and are treated as unsigned 128-bit numbers. */ @@ -2001,12 +2027,14 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index) return vr_offset(archreg) + sizeof(UShort) * index; case Ity_I32: + case Ity_F32: if(index > 3) { goto invalidIndex; } return vr_offset(archreg) + sizeof(UInt) * index; case Ity_I64: + case Ity_F64: if(index > 1) { goto invalidIndex; } @@ -2237,8 +2265,8 @@ encode_bfp_rounding_mode(UChar mode) case S390_BFP_ROUND_PER_FPC: rm = get_bfp_rounding_mode_from_fpc(); break; - case S390_BFP_ROUND_NEAREST_AWAY: /* not supported */ - case S390_BFP_ROUND_PREPARE_SHORT: /* not supported */ + case S390_BFP_ROUND_NEAREST_AWAY: rm = mkU32(Irrm_NEAREST_TIE_AWAY_0); break; + case S390_BFP_ROUND_PREPARE_SHORT: rm = mkU32(Irrm_PREPARE_SHORTER); break; case S390_BFP_ROUND_NEAREST_EVEN: rm = mkU32(Irrm_NEAREST); break; case S390_BFP_ROUND_ZERO: rm = mkU32(Irrm_ZERO); break; case S390_BFP_ROUND_POSINF: rm = mkU32(Irrm_PosINF); break; @@ -3524,6 +3552,26 @@ s390_format_VRI_VVIM(const HChar *(*irgen)(UChar v1, UChar v3, UShort i2, UChar s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v3, i2, m4); } +static void +s390_format_VRI_VVIMM(const HChar *(*irgen)(UChar v1, UChar v2, UShort i3, + UChar m4, UChar m5), + UChar v1, UChar v2, UShort i3, UChar m4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, i3, m4, m5); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, i3, m4, m5); +} static void s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, @@ -3680,7 +3728,7 @@ s390_format_VRV_VVRDMT(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar m3) static void -s390_format_VRRd_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, +s390_format_VRR_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6), UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6, UChar rxb) @@ -3794,6 +3842,92 @@ s390_format_VRRd_VVVVM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, } +static void +s390_format_VRRa_VVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, + UChar m4, UChar m5), + UChar v1, UChar v2, UChar m3, UChar m4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m4, m5); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, m3, m4, m5); +} + +static void +s390_format_VRRa_VVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar m4, UChar m5), + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, m4, m5); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), mnm, v1, v2, v3, m4, m5); +} + +static void +s390_format_VRRa_VVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, + UChar m4), + UChar v1, UChar v2, UChar m3, UChar m4, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m4); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v2, m3, m4); +} + +static void +s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + UChar m4, UChar m5, UChar m6), + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, m4, m5, m6); + + if (vex_traceflags & VEX_TRACE_FE) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), + mnm, v1, v2, v3, m4, m5, m6); +} + /*------------------------------------------------------------*/ /*--- Build IR for opcodes ---*/ /*------------------------------------------------------------*/ @@ -17895,6 +18029,575 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) return "vmalh"; } +static void +s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, + UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4); + UChar maxIndex = isSingleElementOp ? 0 : 1; + + /* For Iop_F32toF64 we do this: + f32[0] -> f64[0] + f32[2] -> f64[1] + + For Iop_F64toF32 we do this: + f64[0] -> f32[0] + f64[1] -> f32[2] + + The magic below with scaling factors is used to achieve the logic + described above. + */ + const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1; + const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1; + + const Bool isUnary = (op == Iop_F32toF64); + for (UChar i = 0; i <= maxIndex; i++) { + IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor); + IRExpr* result; + if (!isUnary) { + result = binop(op, + mkexpr(encode_bfp_rounding_mode(m5)), + argument); + } else { + result = unop(op, argument); + } + put_vr(v1, toType, i * destinationIndexScaleFactor, result); + } + + if (isSingleElementOp) { + put_vr_dw1(v1, mkU64(0)); + } +} + +static const HChar * +s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vcdg", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcdg"; +} + +static const HChar * +s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vcdlg", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcdlg"; +} + +static const HChar * +s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vcgd", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + + return "vcgd"; +} + +static const HChar * +s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vclgd", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + + return "vclgd"; +} + +static const HChar * +s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vfi", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, + v1, v2, m3, m4, m5); + + return "vcgld"; +} + +static const HChar * +s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vlde", m3 == 2); + + s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5); + + return "vlde"; +} + +static const HChar * +s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vled", m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5); + + return "vled"; +} + +static const HChar * +s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + s390_insn_assert("vfpso", m3 == 3); + + IRExpr* result; + switch (m5) { + case 0: { + /* Invert sign */ + if (!s390_vr_is_single_element_control_set(m4)) { + result = unop(Iop_Neg64Fx2, get_vr_qw(v2)); + } + else { + result = binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))), + mkU64(0)); + } + break; + } + + case 1: { + /* Set sign to negative */ + IRExpr* highHalf = mkU64(0x8000000000000000ULL); + if (!s390_vr_is_single_element_control_set(m4)) { + IRExpr* lowHalf = highHalf; + IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf); + result = binop(Iop_OrV128, get_vr_qw(v2), mask); + } + else { + result = binop(Iop_64HLtoV128, + binop(Iop_Or64, get_vr_dw0(v2), highHalf), + mkU64(0ULL)); + } + + break; + } + + case 2: { + /* Set sign to positive */ + if (!s390_vr_is_single_element_control_set(m4)) { + result = unop(Iop_Abs64Fx2, get_vr_qw(v2)); + } + else { + result = binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))), + mkU64(0)); + } + + break; + } + + default: + vpanic("s390_irgen_VFPSO: Invalid m5 value"); + } + + put_vr_qw(v1, result); + if (s390_vr_is_single_element_control_set(m4)) { + put_vr_dw1(v1, mkU64(0ULL)); + } + + return "vfpso"; +} + +static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp, + UChar v1, UChar v2, UChar v3, UChar m4, + UChar m5) +{ + IRExpr* result; + if (!s390_vr_is_single_element_control_set(m5)) { + result = triop(generalOp, get_bfp_rounding_mode_from_fpc(), + get_vr_qw(v2), get_vr_qw(v3)); + } else { + IRExpr* highHalf = triop(singleElementOp, + get_bfp_rounding_mode_from_fpc(), + get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + +static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp, + UChar v1, UChar v2, UChar m3, UChar m4) +{ + IRExpr* result; + if (!s390_vr_is_single_element_control_set(m4)) { + result = binop(generalOp, get_bfp_rounding_mode_from_fpc(), + get_vr_qw(v2)); + } + else { + IRExpr* highHalf = binop(singleElementOp, + get_bfp_rounding_mode_from_fpc(), + get_vr(v2, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + + +static void +s390_vector_fp_mulAddOrSub(IROp singleElementOp, + UChar v1, UChar v2, UChar v3, UChar v4, + UChar m5, UChar m6) +{ + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + IRTemp irrm_temp = newTemp(Ity_I32); + assign(irrm_temp, get_bfp_rounding_mode_from_fpc()); + IRExpr* irrm = mkexpr(irrm_temp); + IRExpr* result; + IRExpr* highHalf = qop(singleElementOp, + irrm, + get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0), + get_vr(v4, Ity_F64, 0)); + + if (isSingleElementOp) { + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + mkU64(0ULL)); + } else { + IRExpr* lowHalf = qop(singleElementOp, + irrm, + get_vr(v2, Ity_F64, 1), + get_vr(v3, Ity_F64, 1), + get_vr(v4, Ity_F64, 1)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), + unop(Iop_ReinterpF64asI64, lowHalf)); + } + + put_vr_qw(v1, result); +} + +static const HChar * +s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfa", m4 == 3); + s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5); + return "vfa"; +} + +static const HChar * +s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfs", m4 == 3); + s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5); + return "vfs"; +} + +static const HChar * +s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfm", m4 == 3); + s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5); + return "vfm"; +} + +static const HChar * +s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + s390_insn_assert("vfd", m4 == 3); + s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5); + return "vfd"; +} + +static const HChar * +s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_insn_assert("vfsq", m3 == 3); + s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4); + + return "vfsq"; +} + +static const HChar * +s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + s390_insn_assert("vfma", m6 == 3); + s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6); + return "vfma"; +} + +static const HChar * +s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + s390_insn_assert("vfms", m6 == 3); + s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6); + return "vfms"; +} + +static const HChar * +s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_insn_assert("wfc", m3 == 3); + s390_insn_assert("wfc", m4 == 0); + + IRTemp cc_vex = newTemp(Ity_I32); + assign(cc_vex, binop(Iop_CmpF64, + get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0))); + + IRTemp cc_s390 = newTemp(Ity_I32); + assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex)); + s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False); + + return "wfc"; +} + +static const HChar * +s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_irgen_WFC(v1, v2, m3, m4); + + return "wfk"; +} + +static const HChar * +s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + s390_insn_assert("vfce", m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (!isSingleElementOp) { + put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3))); + } else { + IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, + mkU32(Ircr_EQ)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfce"; +} + +static const HChar * +s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + vassert(m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (!isSingleElementOp) { + put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2))); + } else { + IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, + mkU32(Ircr_GT)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } + else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCH; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfch"; +} + +static const HChar * +s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + s390_insn_assert("vfche", m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (!isSingleElementOp) { + put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2))); + } + else { + IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0), + get_vr(v2, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, + mkU32(Ircr_LT)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } + else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCHE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = elementSize; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfche"; +} + +static const HChar * +s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) +{ + s390_insn_assert("vftci", m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFTCI; + details.v1 = v1; + details.v2 = v2; + details.i3 = i3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + + return "vftci"; +} + /* New insns are added here. If an insn is contingent on a facility being installed also check whether the list of supported facilities in function @@ -19358,6 +20061,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int op2 : 8; } VRR; struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt v3 : 4; + UInt : 4; + UInt m5 : 4; + UInt m4 : 4; + UInt m3 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRRa; + struct { unsigned int op1 : 8; unsigned int v1 : 4; unsigned int v2 : 4; @@ -19370,6 +20085,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int op2 : 8; } VRRd; struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int m6 : 4; + unsigned int : 4; + unsigned int m5 : 4; + unsigned int v4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRe; + struct { unsigned int op1 : 8; unsigned int v1 : 4; unsigned int v3 : 4; @@ -19390,6 +20117,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int op2 : 8; } VRId; struct { + UInt op1 : 8; + UInt v1 : 4; + UInt v2 : 4; + UInt i3 : 12; + UInt m5 : 4; + UInt m4 : 4; + UInt rxb : 4; + UInt op2 : 8; + } VRIe; + struct { unsigned int op1 : 8; unsigned int v1 : 4; unsigned int v3 : 4; @@ -19974,7 +20711,10 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe70000000046ULL: s390_format_VRI_VIM(s390_irgen_VGM, ovl.fmt.VRI.v1, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; - case 0xe7000000004aULL: /* VFTCI */ goto unimplemented; + case 0xe7000000004aULL: s390_format_VRI_VVIMM(s390_irgen_VFTCI, ovl.fmt.VRIe.v1, + ovl.fmt.VRIe.v2, ovl.fmt.VRIe.i3, + ovl.fmt.VRIe.m4, ovl.fmt.VRIe.m5, + ovl.fmt.VRIe.rxb); goto ok; case 0xe7000000004dULL: s390_format_VRI_VVIM(s390_irgen_VREP, ovl.fmt.VRI.v1, ovl.fmt.VRI.v3, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; @@ -20087,19 +20827,27 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe70000000085ULL: /* VBPERM */ goto unimplemented; - case 0xe7000000008aULL: s390_format_VRRd_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, - ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, - ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, - ovl.fmt.VRRd.m6, - ovl.fmt.VRRd.rxb); goto ok; + case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.m6, + ovl.fmt.VRRd.rxb); goto ok; case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe7000000008dULL: s390_format_VRR_VVVV(s390_irgen_VSEL, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe7000000008eULL: /* VFMS */ goto unimplemented; - case 0xe7000000008fULL: /* VFMA */ goto unimplemented; + case 0xe7000000008eULL: s390_format_VRR_VVVVMM(s390_irgen_VFMS, ovl.fmt.VRRe.v1, + ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3, + ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5, + ovl.fmt.VRRe.m6, + ovl.fmt.VRRe.rxb); goto ok; + case 0xe7000000008fULL: s390_format_VRR_VVVVMM(s390_irgen_VFMA, ovl.fmt.VRRe.v1, + ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3, + ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5, + ovl.fmt.VRRe.m6, + ovl.fmt.VRRe.rxb); goto ok; case 0xe70000000094ULL: s390_format_VRR_VVVM(s390_irgen_VPK, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -20184,17 +20932,50 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, ovl.fmt.VRRd.rxb); goto ok; - case 0xe700000000c0ULL: /* VCLGD */ goto unimplemented; - case 0xe700000000c1ULL: /* VCDLG */ goto unimplemented; - case 0xe700000000c2ULL: /* VCGD */ goto unimplemented; - case 0xe700000000c3ULL: /* VCDG */ goto unimplemented; - case 0xe700000000c4ULL: /* VLDE */ goto unimplemented; - case 0xe700000000c5ULL: /* VLED */ goto unimplemented; - case 0xe700000000c7ULL: /* VFI */ goto unimplemented; - case 0xe700000000caULL: /* WFK */ goto unimplemented; - case 0xe700000000cbULL: /* WFC */ goto unimplemented; - case 0xe700000000ccULL: /* VFPSO */ goto unimplemented; - case 0xe700000000ceULL: /* VFSQ */ goto unimplemented; + case 0xe700000000c0ULL: s390_format_VRRa_VVMMM(s390_irgen_VCLGD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c1ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDLG, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c2ULL: s390_format_VRRa_VVMMM(s390_irgen_VCGD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c3ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDG, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c7ULL: s390_format_VRRa_VVMMM(s390_irgen_VFI, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000caULL: s390_format_VRRa_VVMM(s390_irgen_WFK, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000cbULL: s390_format_VRRa_VVMM(s390_irgen_WFC, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ccULL: s390_format_VRRa_VVMMM(s390_irgen_VFPSO, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ceULL: s390_format_VRRa_VVMM(s390_irgen_VFSQ, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; case 0xe700000000d4ULL: s390_format_VRR_VVM(s390_irgen_VUPLL, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -20221,13 +21002,37 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe700000000dfULL: s390_format_VRR_VVM(s390_irgen_VLP, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe700000000e2ULL: /* VFS */ goto unimplemented; - case 0xe700000000e3ULL: /* VFA */ goto unimplemented; - case 0xe700000000e5ULL: /* VFD */ goto unimplemented; - case 0xe700000000e7ULL: /* VFM */ goto unimplemented; - case 0xe700000000e8ULL: /* VFCE */ goto unimplemented; - case 0xe700000000eaULL: /* VFCHE */ goto unimplemented; - case 0xe700000000ebULL: /* VFCH */ goto unimplemented; + case 0xe700000000e2ULL: s390_format_VRRa_VVVMM(s390_irgen_VFS, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e3ULL: s390_format_VRRa_VVVMM(s390_irgen_VFA, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e5ULL: s390_format_VRRa_VVVMM(s390_irgen_VFD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e7ULL: s390_format_VRRa_VVVMM(s390_irgen_VFM, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e8ULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000eaULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCHE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ebULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCH, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; case 0xe700000000eeULL: /* VFMIN */ goto unimplemented; case 0xe700000000efULL: /* VFMAX */ goto unimplemented; case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, ovl.fmt.VRR.v1, @@ -21148,7 +21953,13 @@ s390_decode_and_irgen(const UChar *bytes, UInt insn_length, DisResult *dres) dis_res->jk_StopHere = Ijk_Boring; } - if (status == S390_DECODE_OK) return insn_length; /* OK */ + if (status == S390_DECODE_OK) { + /* Adjust status if a specification exception was indicated. */ + if (is_specification_exception()) + status = S390_DECODE_SPECIFICATION_EXCEPTION; + else + return insn_length; /* OK */ + } /* Decoding failed somehow */ if (sigill_diag) { @@ -21166,6 +21977,10 @@ s390_decode_and_irgen(const UChar *bytes, UInt insn_length, DisResult *dres) vex_printf("unimplemented special insn: "); break; + case S390_DECODE_SPECIFICATION_EXCEPTION: + vex_printf("specification exception: "); + break; + case S390_DECODE_ERROR: vex_printf("decoding error: "); break; diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 98ac938..22cdd04 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -1711,6 +1711,23 @@ emit_VRR_VVM(UChar *p, ULong op, UChar v1, UChar v2, UChar m4) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRR_VVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar m3, UChar m4, + UChar m5) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)m5) << 20; + the_insn |= ((ULong)m4) << 16; + the_insn |= ((ULong)m3) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} static UChar * emit_VRR_VVVM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4) @@ -1762,6 +1779,26 @@ emit_VRR_VVVV(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRRe_VVVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4, + UChar m5, UChar m6) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + rxb = s390_update_rxb(rxb, 3, &v3); + rxb = s390_update_rxb(rxb, 4, &v4); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)v3) << 28; + the_insn |= ((ULong)m6) << 24; + the_insn |= ((ULong)m5) << 16; + the_insn |= ((ULong)v4) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} static UChar * emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) @@ -1777,6 +1814,33 @@ emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRR_VVVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4, + UChar m5, UChar m6) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + rxb = s390_update_rxb(rxb, 3, &v3); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)v3) << 28; + the_insn |= ((ULong)m6) << 20; + the_insn |= ((ULong)m5) << 16; + the_insn |= ((ULong)m4) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} + +static UChar* +emit_VRR_VVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4, + UChar m5) +{ + return emit_VRR_VVVMMM(p, op, v1, v2, v3, m4, m5, 0); +} + /*------------------------------------------------------------*/ /*--- Functions to emit particular instructions ---*/ /*------------------------------------------------------------*/ @@ -6057,6 +6121,116 @@ s390_emit_VLVGP(UChar *p, UChar v1, UChar r2, UChar r3) return emit_VRR_VRR(p, 0xE70000000062ULL, v1, r2, r3); } +static UChar * +s390_emit_VFPSO(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), "vfpso", v1, v2, m3, m4, + m5); + + return emit_VRR_VVMMM(p, 0xE700000000CCULL, v1, v2, m3, m4, m5); +} + +static UChar * +s390_emit_VFA(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfa", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e3ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfs", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e2ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFM(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfm", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e7ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFD(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfd", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e5ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFSQ(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vfsq", v1, v2, m3, m4); + + return emit_VRR_VVMMM(p, 0xE700000000CEULL, v1, v2, m3, m4, 0); +} + +static UChar * +s390_emit_VFMA(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfma", + v1, v2, v3, v4, m5, m6); + + return emit_VRRe_VVVVMM(p, 0xE7000000008fULL, v1, v2, v3, v4, m5, m6); +} + +static UChar * +s390_emit_VFMS(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfms", + v1, v2, v3, v4, m5, m6); + + return emit_VRRe_VVVVMM(p, 0xE7000000008eULL, v1, v2, v3, v4, m5, m6); +} + +static UChar * +s390_emit_VFCE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfce", + v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000e8ULL, v1, v2, v3, m4, m5, m6); +} + +static UChar * +s390_emit_VFCH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfch", + v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000ebULL, v1, v2, v3, m4, m5, m6); +} + +static UChar * +s390_emit_VFCHE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, + UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfche", + v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000eaULL, v1, v2, v3, m4, m5, m6); +} + /*---------------------------------------------------------------*/ /*--- Constructors for the various s390_insn kinds ---*/ /*---------------------------------------------------------------*/ @@ -7201,7 +7375,6 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst, { s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn)); - vassert(size == 16); insn->tag = S390_INSN_VEC_TRIOP; insn->size = size; @@ -7508,6 +7681,18 @@ s390_insn_as_string(const s390_insn *insn) op = "v-vunpacku"; break; + case S390_VEC_FLOAT_NEG: + op = "v-vfloatneg"; + break; + + case S390_VEC_FLOAT_SQRT: + op = "v-vfloatsqrt"; + break; + + case S390_VEC_FLOAT_ABS: + op = "v-vfloatabs"; + break; + default: goto fail; } @@ -7880,6 +8065,13 @@ s390_insn_as_string(const s390_insn *insn) case S390_VEC_PWSUM_DW: op = "v-vpwsumdw"; break; case S390_VEC_PWSUM_QW: op = "v-vpwsumqw"; break; case S390_VEC_INIT_FROM_GPRS: op = "v-vinitfromgprs"; break; + case S390_VEC_FLOAT_ADD: op = "v-vfloatadd"; break; + case S390_VEC_FLOAT_SUB: op = "v-vfloatsub"; break; + case S390_VEC_FLOAT_MUL: op = "v-vfloatmul"; break; + case S390_VEC_FLOAT_DIV: op = "v-vfloatdiv"; break; + case S390_VEC_FLOAT_COMPARE_EQUAL: op = "v-vfloatcmpeq"; break; + case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL: op = "v-vfloatcmple"; break; + case S390_VEC_FLOAT_COMPARE_LESS: op = "v-vfloatcmpl"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R", op, insn->variant.vec_binop.dst, @@ -7889,6 +8081,8 @@ s390_insn_as_string(const s390_insn *insn) case S390_INSN_VEC_TRIOP: switch (insn->variant.vec_triop.tag) { case S390_VEC_PERM: op = "v-vperm"; break; + case S390_VEC_FLOAT_MADD: op = "v-vfloatmadd"; break; + case S390_VEC_FLOAT_MSUB: op = "v-vfloatmsub"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R, %R", op, insn->variant.vec_triop.dst, @@ -9036,6 +9230,27 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VPOPCT(buf, v1, v2, s390_getM_from_size(insn->size)); } + case S390_VEC_FLOAT_NEG: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0); + } + case S390_VEC_FLOAT_ABS: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2); + } + case S390_VEC_FLOAT_SQRT: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0); + } default: vpanic("s390_insn_unop_emit"); } @@ -11049,6 +11264,21 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size)); case S390_VEC_INIT_FROM_GPRS: return s390_emit_VLVGP(buf, v1, v2, v3); + case S390_VEC_FLOAT_ADD: + return s390_emit_VFA(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_SUB: + return s390_emit_VFS(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_MUL: + return s390_emit_VFM(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_DIV: + return s390_emit_VFD(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_COMPARE_EQUAL: + return s390_emit_VFCE(buf, v1, v2, v3, s390_getM_from_size(size), 0, 0); + case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL: + return s390_emit_VFCH(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0); + case S390_VEC_FLOAT_COMPARE_LESS: + return s390_emit_VFCHE(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0); + default: goto fail; } @@ -11070,8 +11300,14 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) UChar v4 = hregNumber(insn->variant.vec_triop.op3); switch (tag) { - case S390_VEC_PERM: + case S390_VEC_PERM: { + vassert(insn->size == 16); return s390_emit_VPERM(buf, v1, v2, v3, v4); + } + case S390_VEC_FLOAT_MADD: + return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, 3); + case S390_VEC_FLOAT_MSUB: + return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, 3); default: goto fail; } diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 7ea0101..40f0472 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -202,7 +202,10 @@ typedef enum { S390_VEC_ABS, S390_VEC_COUNT_LEADING_ZEROES, S390_VEC_COUNT_TRAILING_ZEROES, - S390_VEC_COUNT_ONES + S390_VEC_COUNT_ONES, + S390_VEC_FLOAT_NEG, + S390_VEC_FLOAT_ABS, + S390_VEC_FLOAT_SQRT } s390_unop_t; /* The kind of ternary BFP operations */ @@ -394,11 +397,20 @@ typedef enum { S390_VEC_PWSUM_QW, S390_VEC_INIT_FROM_GPRS, + S390_VEC_FLOAT_ADD, + S390_VEC_FLOAT_SUB, + S390_VEC_FLOAT_MUL, + S390_VEC_FLOAT_DIV, + S390_VEC_FLOAT_COMPARE_EQUAL, + S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL, + S390_VEC_FLOAT_COMPARE_LESS } s390_vec_binop_t; /* The vector operations with three operands */ typedef enum { - S390_VEC_PERM + S390_VEC_PERM, + S390_VEC_FLOAT_MADD, + S390_VEC_FLOAT_MSUB } s390_vec_triop_t; /* The details of a CDAS insn. Carved out to keep the size of diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index bc34f90..79581ff 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -787,10 +787,12 @@ get_bfp_rounding_mode(ISelEnv *env, IRExpr *irrm) IRRoundingMode mode = irrm->Iex.Const.con->Ico.U32; switch (mode) { - case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN; - case Irrm_ZERO: return S390_BFP_ROUND_ZERO; - case Irrm_PosINF: return S390_BFP_ROUND_POSINF; - case Irrm_NegINF: return S390_BFP_ROUND_NEGINF; + case Irrm_NEAREST_TIE_AWAY_0: return S390_BFP_ROUND_NEAREST_AWAY; + case Irrm_PREPARE_SHORTER: return S390_BFP_ROUND_PREPARE_SHORT; + case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN; + case Irrm_ZERO: return S390_BFP_ROUND_ZERO; + case Irrm_PosINF: return S390_BFP_ROUND_POSINF; + case Irrm_NegINF: return S390_BFP_ROUND_NEGINF; default: vpanic("get_bfp_rounding_mode"); } @@ -3871,6 +3873,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_COUNT_ONES; goto Iop_V_wrk; + case Iop_Neg64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_NEG; + goto Iop_V_wrk; + + case Iop_Abs64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_ABS; + goto Iop_V_wrk; + + Iop_V_wrk: { dst = newVRegV(env); reg1 = s390_isel_vec_expr(env, arg); @@ -4388,6 +4401,28 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_ELEM_ROLL_V; goto Iop_VV_wrk; + case Iop_CmpEQ64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_EQUAL; + goto Iop_VV_wrk; + + case Iop_CmpLE64Fx2: { + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; + goto Iop_VV_wrk; + } + + case Iop_CmpLT64Fx2: { + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_LESS; + goto Iop_VV_wrk; + } + + case Iop_Sqrt64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_SQRT; + goto Iop_irrm_V_wrk; + case Iop_ShlN8x16: size = 1; shift_op = S390_VEC_ELEM_SHL_INT; @@ -4493,6 +4528,14 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) return dst; } + Iop_irrm_V_wrk: { + set_bfp_rounding_mode_in_fpc(env, arg1); + reg1 = s390_isel_vec_expr(env, arg2); + + addInstr(env, s390_insn_unop(size, vec_op, dst, s390_opnd_reg(reg1))); + return dst; + } + case Iop_64HLtoV128: reg1 = s390_isel_int_expr(env, arg1); reg2 = s390_isel_int_expr(env, arg2); @@ -4516,6 +4559,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) IRExpr* arg1 = expr->Iex.Triop.details->arg1; IRExpr* arg2 = expr->Iex.Triop.details->arg2; IRExpr* arg3 = expr->Iex.Triop.details->arg3; + IROp vec_op; switch (op) { case Iop_SetElem8x16: size = 1; @@ -4551,6 +4595,36 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) dst, reg1, reg2, reg3)); return dst; + case Iop_Add64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_ADD; + goto Iop_irrm_VV... [truncated message content] |