You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(32) |
Oct
|
Nov
|
Dec
|
|
From: Wu, F. <fe...@in...> - 2023-05-26 14:09:08
|
On 5/26/2023 9:59 PM, Fei Wu wrote: > I'm from Intel RISC-V team and working on a RISC-V International > development partner project to add RISC-V vector (RVV) support on > Valgrind, the target tool is memcheck. My work bases on commit > 71272b252977 of Petr's riscv64-linux branch, many thanks to Petr for his > great work first. > https://github.com/petrpavlu/valgrind-riscv64 > > This RFC is a starting point of RVV support on Valgrind, It's far from > complete, which will take huge time, but I do think it's more effective > to have some real code for discussion, so this series adds the RVV > support to run memcpy/strcmp/strcpy/strlen/strncpy in: > https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/examples > In case the intrinsic version is built with extra RVV instructions which are not supported yet, here is an assembly version. All C code is from the above link with a small tweak, and the asm code is copied from: https://github.com/riscv/riscv-v-spec/tree/master/example diff --git a/rvv-examples/Makefile b/rvv-examples/Makefile new file mode 100644 index 000000000..dfae4ac31 --- /dev/null +++ b/rvv-examples/Makefile @@ -0,0 +1,23 @@ +CC := clang +CFLAGS := -g -march=rv64gcv -mllvm -riscv-v-vector-bits-min=128 -O2 +ASFLAGS := -g -march=rv64gcv -mllvm -riscv-v-vector-bits-min=128 -O2 + +BINARY = rvv_strcmp rvv_memcpy rvv_strcpy rvv_strlen rvv_strncpy + +.PHONY: all clean test + +all: $(BINARY) + +clean: + rm -f $(BINARY) + +test: $(BINARY) + for t in $(BINARY); do \ + valgrind ./$$t; \ + done + +rvv_strcmp: rvv_strcmp.c strcmp.s +rvv_memcpy: rvv_memcpy.c memcpy.s +rvv_strcpy: rvv_strcpy.c strcpy.s +rvv_strlen: rvv_strlen.c strlen.s +rvv_strncpy: rvv_strncpy.c strncpy.s diff --git a/rvv-examples/common.h b/rvv-examples/common.h new file mode 100644 index 000000000..cec96ed2b --- /dev/null +++ b/rvv-examples/common.h @@ -0,0 +1,112 @@ +// common.h +// common utilites for the test code under exmaples/ + +#include <math.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +extern void *memcpy_vec(void *dst, void *src, size_t n); +extern int strcmp_vec(const char *src1, const char *src2); +extern char *strcpy_vec(char *dst, const char *src); +extern size_t strlen_vec(char *src); +extern char *strncpy_vec(char *dst, char *src, size_t count); + +void gen_rand_1d(double *a, int n) { + for (int i = 0; i < n; ++i) + a[i] = (double)rand() / (double)RAND_MAX + (double)(rand() % 1000); +} + +void gen_string(char *s, int n) { + // char value range: -128 ~ 127 + for (int i = 0; i < n - 1; ++i) + s[i] = (char)(rand() % 127) + 1; + s[n - 1] = '\0'; +} + +void gen_rand_2d(double **ar, int n, int m) { + for (int i = 0; i < n; ++i) + for (int j = 0; j < m; ++j) + ar[i][j] = (double)rand() / (double)RAND_MAX + (double)(rand() % 1000); +} + +void print_string(const char *a, const char *name) { + printf("const char *%s = \"", name); + int i = 0; + while (a[i] != 0) + putchar(a[i++]); + printf("\"\n"); + puts(""); +} + +void print_array_1d(double *a, int n, const char *type, const char *name) { + printf("%s %s[%d] = {\n", type, name, n); + for (int i = 0; i < n; ++i) { + printf("%06.2f%s", a[i], i != n - 1 ? "," : "};\n"); + if (i % 10 == 9) + puts(""); + } + puts(""); +} + +void print_array_2d(double **a, int n, int m, const char *type, + const char *name) { + printf("%s %s[%d][%d] = {\n", type, name, n, m); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < m; ++j) { + printf("%06.2f", a[i][j]); + if (j == m - 1) + puts(i == n - 1 ? "};" : ","); + else + putchar(','); + } + } + puts(""); +} + +bool double_eq(double golden, double actual, double relErr) { + return (fabs(actual - golden) < relErr); +} + +bool compare_1d(double *golden, double *actual, int n) { + for (int i = 0; i < n; ++i) + if (!double_eq(golden[i], actual[i], 1e-6)) + return false; + return true; +} + +bool compare_string(const char *golden, const char *actual, int n) { + for (int i = 0; i < n; ++i) + if (golden[i] != actual[i]) + return false; + return true; +} + +bool compare_2d(double **golden, double **actual, int n, int m) { + for (int i = 0; i < n; ++i) + for (int j = 0; j < m; ++j) + if (!double_eq(golden[i][j], actual[i][j], 1e-6)) + return false; + return true; +} + +double **alloc_array_2d(int n, int m) { + double **ret; + ret = (double **)malloc(sizeof(double *) * n); + for (int i = 0; i < n; ++i) + ret[i] = (double *)malloc(sizeof(double) * m); + return ret; +} + +void init_array_one_1d(double *ar, int n) { + for (int i = 0; i < n; ++i) + ar[i] = 1; +} + +void init_array_one_2d(double **ar, int n, int m) { + for (int i = 0; i < n; ++i) + for (int j = 0; j < m; ++j) + ar[i][j] = 1; +} diff --git a/rvv-examples/memcpy.s b/rvv-examples/memcpy.s new file mode 100644 index 000000000..1b50ab670 --- /dev/null +++ b/rvv-examples/memcpy.s @@ -0,0 +1,17 @@ + .text + .balign 4 + .global memcpy_vec + # void *memcpy_vec(void* dest, const void* src, size_t n) + # a0=dest, a1=src, a2=n + # + memcpy_vec: + mv a3, a0 # Copy destination + loop: + vsetvli t0, a2, e8, m8, ta, ma # Vectors of 8b + vle8.v v0, (a1) # Load bytes + add a1, a1, t0 # Bump pointer + sub a2, a2, t0 # Decrement count + vse8.v v0, (a3) # Store bytes + add a3, a3, t0 # Bump pointer + bnez a2, loop # Any more? + ret # Return diff --git a/rvv-examples/rvv_memcpy.c b/rvv-examples/rvv_memcpy.c new file mode 100644 index 000000000..d78b9b604 --- /dev/null +++ b/rvv-examples/rvv_memcpy.c @@ -0,0 +1,21 @@ +#include "common.h" +#include <riscv_vector.h> +#include <string.h> + +int main() { + const int N = 127; + const uint32_t seed = 0xdeadbeef; + srand(seed); + + // data gen + double A[N]; + gen_rand_1d(A, N); + + // compute + double golden[N], actual[N]; + memcpy(golden, A, sizeof(A)); + memcpy_vec(actual, A, sizeof(A)); + + // compare + puts(compare_1d(golden, actual, N) ? "pass" : "fail"); +} diff --git a/rvv-examples/rvv_strcmp.c b/rvv-examples/rvv_strcmp.c new file mode 100644 index 000000000..d10cac133 --- /dev/null +++ b/rvv-examples/rvv_strcmp.c @@ -0,0 +1,25 @@ +#include "common.h" +#include <riscv_vector.h> +#include <string.h> + +int main() { + const int N = 1023; + const uint32_t seed = 0xdeadbeef; + srand(seed); + + // data gen + char s0[N], s1[N]; + gen_string(s0, N); + gen_string(s1, N); + + // compute + int golden, actual; + golden = strcmp(s0, s1); + actual = strcmp_vec(s0, s1); + + golden = (golden == 0) ? 0 : (golden > 0) ? 1 : -1; + actual = (golden == 0) ? 0 : (golden > 0) ? 1 : -1; + + // compare + puts(golden == actual ? "pass" : "fail"); +} diff --git a/rvv-examples/rvv_strcpy.c b/rvv-examples/rvv_strcpy.c new file mode 100644 index 000000000..7e5af8673 --- /dev/null +++ b/rvv-examples/rvv_strcpy.c @@ -0,0 +1,22 @@ +#include "common.h" +#include <assert.h> +#include <riscv_vector.h> +#include <string.h> + +int main() { + const int N = 2000; + const uint32_t seed = 0xdeadbeef; + srand(seed); + + // data gen + char s0[N]; + gen_string(s0, N); + + // compute + char golden[N], actual[N]; + strcpy(golden, s0); + strcpy_vec(actual, s0); + + // compare + puts(strcmp(golden, actual) == 0 ? "pass" : "fail"); +} diff --git a/rvv-examples/rvv_strlen.c b/rvv-examples/rvv_strlen.c new file mode 100644 index 000000000..e1142f883 --- /dev/null +++ b/rvv-examples/rvv_strlen.c @@ -0,0 +1,22 @@ +#include "common.h" +#include <riscv_vector.h> +#include <string.h> + +int main() { + const uint32_t seed = 0xdeadbeef; + srand(seed); + + int N = rand() % 2000; + + // data gen + char s0[N]; + gen_string(s0, N); + + // compute + size_t golden, actual; + golden = strlen(s0); + actual = strlen_vec(s0); + + // compare + puts(golden == actual ? "pass" : "fail"); +} diff --git a/rvv-examples/rvv_strncpy.c b/rvv-examples/rvv_strncpy.c new file mode 100644 index 000000000..f1d14ac52 --- /dev/null +++ b/rvv-examples/rvv_strncpy.c @@ -0,0 +1,25 @@ +#include "common.h" +#include <riscv_vector.h> +#include <string.h> + +int main() { + const int N = 1320; + const uint32_t seed = 0xdeadbeef; + srand(seed); + + // data gen + char s0[N]; + gen_string(s0, N); + char s1[] = "the quick brown fox jumps over the lazy dog"; + size_t count = strlen(s1) + rand() % 500; + + // compute + char golden[N], actual[N]; + strcpy(golden, s0); + strcpy(actual, s0); + strncpy(golden, s1, count); + strncpy_vec(actual, s1, count); + + // compare + puts(compare_string(golden, actual, N) ? "pass" : "fail"); +} diff --git a/rvv-examples/strcmp.s b/rvv-examples/strcmp.s new file mode 100644 index 000000000..85d32c96d --- /dev/null +++ b/rvv-examples/strcmp.s @@ -0,0 +1,34 @@ + .text + .balign 4 + .global strcmp_vec + # int strcmp_vec(const char *src1, const char* src2) +strcmp_vec: + ## Using LMUL=2, but same register names work for larger LMULs + li t1, 0 # Initial pointer bump +loop: + vsetvli t0, x0, e8, m2, ta, ma # Max length vectors of bytes + add a0, a0, t1 # Bump src1 pointer + vle8ff.v v8, (a0) # Get src1 bytes + add a1, a1, t1 # Bump src2 pointer + vle8ff.v v16, (a1) # Get src2 bytes + + vmseq.vi v0, v8, 0 # Flag zero bytes in src1 + vmsne.vv v1, v8, v16 # Flag if src1 != src2 + vmor.mm v0, v0, v1 # Combine exit conditions + + vfirst.m a2, v0 # ==0 or != ? + csrr t1, vl # Get number of bytes fetched + + bltz a2, loop # Loop if all same and no zero byte + + add a0, a0, a2 # Get src1 element address + lbu a3, (a0) # Get src1 byte from memory + + add a1, a1, a2 # Get src2 element address + lbu a4, (a1) # Get src2 byte from memory + + sub a0, a3, a4 # Return value. + + ret + + diff --git a/rvv-examples/strcpy.s b/rvv-examples/strcpy.s new file mode 100644 index 000000000..292df25ac --- /dev/null +++ b/rvv-examples/strcpy.s @@ -0,0 +1,20 @@ + .text + .balign 4 + .global strcpy_vec + # char* strcpy_vec(char *dst, const char* src) +strcpy_vec: + mv a2, a0 # Copy dst + li t0, -1 # Infinite AVL +loop: + vsetvli x0, t0, e8, m8, ta, ma # Max length vectors of bytes + vle8ff.v v8, (a1) # Get src bytes + csrr t1, vl # Get number of bytes fetched + vmseq.vi v1, v8, 0 # Flag zero bytes + vfirst.m a3, v1 # Zero found? + add a1, a1, t1 # Bump pointer + vmsif.m v0, v1 # Set mask up to and including zero byte. + vse8.v v8, (a2), v0.t # Write out bytes + add a2, a2, t1 # Bump pointer + bltz a3, loop # Zero byte not found, so loop + + ret diff --git a/rvv-examples/strlen.s b/rvv-examples/strlen.s new file mode 100644 index 000000000..721c0257e --- /dev/null +++ b/rvv-examples/strlen.s @@ -0,0 +1,22 @@ + .text + .balign 4 + .global strlen_vec +# size_t strlen_vec(const char *str) +# a0 holds *str + +strlen_vec: + mv a3, a0 # Save start +loop: + vsetvli a1, x0, e8, m8, ta, ma # Vector of bytes of maximum length + vle8ff.v v8, (a3) # Load bytes + csrr a1, vl # Get bytes read + vmseq.vi v0, v8, 0 # Set v0[i] where v8[i] = 0 + vfirst.m a2, v0 # Find first set bit + add a3, a3, a1 # Bump pointer + bltz a2, loop # Not found? + + add a0, a0, a1 # Sum start + bump + add a3, a3, a2 # Add index + sub a0, a3, a0 # Subtract start address+bump + + ret diff --git a/rvv-examples/strncpy.s b/rvv-examples/strncpy.s new file mode 100644 index 000000000..f7114c5ca --- /dev/null +++ b/rvv-examples/strncpy.s @@ -0,0 +1,36 @@ + .text + .balign 4 + .global strncpy_vec + # char* strncpy_vec(char *dst, const char* src, size_t n) +strncpy_vec: + mv a3, a0 # Copy dst +loop: + vsetvli x0, a2, e8, m8, ta, ma # Vectors of bytes. + vle8ff.v v8, (a1) # Get src bytes + vmseq.vi v1, v8, 0 # Flag zero bytes + csrr t1, vl # Get number of bytes fetched + vfirst.m a4, v1 # Zero found? + vmsbf.m v0, v1 # Set mask up to before zero byte. + vse8.v v8, (a3), v0.t # Write out non-zero bytes + bgez a4, zero_tail # Zero remaining bytes. + sub a2, a2, t1 # Decrement count. + add a3, a3, t1 # Bump dest pointer + add a1, a1, t1 # Bump src pointer + bnez a2, loop # Anymore? + + ret + +zero_tail: + sub a2, a2, a4 # Subtract count on non-zero bytes. + add a3, a3, a4 # Advance past non-zero bytes. + vsetvli t1, a2, e8, m8, ta, ma # Vectors of bytes. + vmv.v.i v0, 0 # Splat zero. + +zero_loop: + vse8.v v0, (a3) # Store zero. + sub a2, a2, t1 # Decrement count. + add a3, a3, t1 # Bump pointer + vsetvli t1, a2, e8, m8, ta, ma # Vectors of bytes. + bnez a2, zero_loop # Anymore? + + ret Thanks, Fei. > The whole idea is splitting the vector instructions into scalar > instructions which have already been well supported on Petr's branch, > the correctness of binary translation (tool=none) is simple to ensure, > but the logic of tool=memcheck should not be broken, one of the keys is > to deal with the instructions with mask: > > * for load/store with mask, LoadG/StoreG are enabled, the same semantics > as other architectures > > * for other instructions such as vadd, if the vector mask agnostic (vma) > is set to undisturbed, the masked original value is read first then > write back, the V bit won't change even after write back, it's not > necessary to have another guard type like LoadG/StoreG. > > Pros > ---- > * by leveraging the existing scalar instructions support on Valgrind, > usually adding a new instruction involves only the frontend in > guest_riscv64_toIR, other parts are rare touched, so effort is much > reduced to enable new instructions. > > * As the backend only sees the scalar IRs and generates scalar > instructions, it's possible to run valgrind ./vec-test on non-RVV host. > > Cons > ---- > * as this method splits RVV instruction at frontend, there is less > chance to optimize at other stages, e.g. the vbits tracking. > > * with larger vlen such as 1K, at most 1 RVV instruction will split into > 1K ops, besides the performance penalty, it causes pressure to other > components such as tmp space too. Some of this can be relieved by > grouping multiple elements together. > > > There are some alternatives, but none seems perfect: > * helper function. It's much easier to make tool=none work, but how good > is it to handle the V+A tracking and other tools? Generally speaking, it > should not be a general solution for too many instructions. > > * define and pass the RVV IR to backend, instead of splitting it too > early. This introduces much effort, we should evaluate what level of > profit can be attained. > > At last, if the performance is tolerable, is this the right way to go? > > > Fei Wu (12): > riscv64: Starting Vector support, registers added > riscv64: Pass riscv guest_state for translation > riscv64: Add SyncupEnv & TooManyIR jump kinds > riscv64: Add LoadG/StoreG support > riscv64: Shift guest_state -2048 on calling helper > riscv64: Add cpu_state to TB > riscv64: Introduce dis_RV64V and add vsetvl > riscv64: Add load/store > riscv64: Add csrr vl > riscv64: add vfirst > riscv64: Add vmsgtu/vmseq/vmsne/vmsbf/vmsif/vmor/vmv/vid > riscv64: Add vadd > > VEX/priv/guest_riscv64_toIR.c | 974 +++++++++++++++++++++++++++++- > VEX/priv/host_riscv64_defs.c | 133 ++++ > VEX/priv/host_riscv64_defs.h | 23 + > VEX/priv/host_riscv64_isel.c | 89 ++- > VEX/priv/ir_defs.c | 8 + > VEX/priv/ir_opt.c | 4 +- > VEX/pub/libvex.h | 4 + > VEX/pub/libvex_guest_riscv64.h | 47 +- > VEX/pub/libvex_ir.h | 9 +- > coregrind/m_scheduler/scheduler.c | 17 +- > coregrind/m_translate.c | 5 + > coregrind/m_transtab.c | 26 +- > coregrind/pub_core_transtab.h | 5 + > memcheck/mc_machine.c | 35 ++ > memcheck/mc_translate.c | 4 + > 15 files changed, 1368 insertions(+), 15 deletions(-) > |
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:59
|
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 51 +++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index aaa906f1b..13be0d01d 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -3569,6 +3569,17 @@ static IRExpr* widen_Sto64(IRExpr* e, IRType ty)
}
}
+static IRExpr* narrow_64to(IRExpr* e, UInt bits)
+{
+ switch (bits) {
+ case 8: return unop(Iop_64to8, e);
+ case 16: return unop(Iop_64to16, e);
+ case 32: return unop(Iop_64to32, e);
+ case 64: return e;
+ default: vassert(0);
+ }
+}
+
static Bool dis_vmsgtu_vx(/*MB_OUT*/ DisResult* dres,
/*OUT*/ IRSB* irsb,
UInt insn,
@@ -3686,6 +3697,44 @@ static Bool dis_vmseq_vi(/*MB_OUT*/ DisResult* dres,
return True;
}
+static Bool dis_vadd_vv(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ UInt vs1 = INSN(19, 15);
+ UInt vd = INSN(11, 7);
+
+ UInt sew = get_sew(guest);
+ UInt sew_b = sew / 8;
+ IRType ty = integerIRTypeOfSize(sew_b);
+
+ for (UInt i = 0; i < guest->guest_vl; ++i) {
+ UInt offset = i * sew_b;
+ IRExpr* res = narrow_64to(
+ binop(Iop_Add64,
+ widen_Sto64(getVReg(vs2, offset, ty), ty),
+ widen_Sto64(getVReg(vs1, offset, ty), ty)),
+ sew);
+ if (vm == 0) {
+ UInt mask_outer_offset = i / 64 * 8;
+ UInt mask_inner_offset = i % 64;
+ IRExpr* guard = binop(Iop_CmpNE64,
+ mkU64(0),
+ binop(Iop_And64,
+ getVReg(0 /* v0 */, mask_outer_offset, Ity_I64),
+ mkU64(1UL << mask_inner_offset)));
+ res = IRExpr_ITE(guard, res, getVReg(vd, offset, ty));
+ }
+ putVReg(irsb, vd, offset, res);
+ }
+
+ return True;
+}
+
static Bool dis_vmsne_vv(/*MB_OUT*/ DisResult* dres,
/*OUT*/ IRSB* irsb,
UInt insn,
@@ -3977,6 +4026,8 @@ static Bool dis_opivv(/*MB_OUT*/ DisResult* dres,
UInt funct6 = INSN(31, 26);
switch (funct6) {
+ case 0b000000:
+ return dis_vadd_vv(dres, irsb, insn, guest_pc_curr_instr, guest);
case 0b011001:
return dis_vmsne_vv(dres, irsb, insn, guest_pc_curr_instr, guest);
default:
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:58
|
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 454 ++++++++++++++++++++++++++++++++++
VEX/priv/host_riscv64_isel.c | 20 +-
2 files changed, 471 insertions(+), 3 deletions(-)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index 3ef0aeb77..aaa906f1b 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -3534,6 +3534,320 @@ static Bool dis_vsetvl(/*MB_OUT*/ DisResult* dres,
return True;
}
+// return sew in bits
+static UInt get_sew(VexGuestRISCV64State* guest)
+{
+ UInt raw_sew = SLICE_UInt(guest->guest_vtype, 5, 3);
+ switch (raw_sew) {
+ case 0b000: return 8;
+ case 0b001: return 16;
+ case 0b010: return 32;
+ case 0b011: return 64;
+ default: vassert(0);
+ }
+}
+
+static IRExpr* widen_Uto64(IRExpr* e, IRType ty)
+{
+ switch (ty) {
+ case Ity_I8: return unop(Iop_8Uto64, e);
+ case Ity_I16: return unop(Iop_16Uto64, e);
+ case Ity_I32: return unop(Iop_32Uto64, e);
+ case Ity_I64: return e;
+ default: vassert(0);
+ }
+}
+
+static IRExpr* widen_Sto64(IRExpr* e, IRType ty)
+{
+ switch (ty) {
+ case Ity_I8: return unop(Iop_8Sto64, e);
+ case Ity_I16: return unop(Iop_16Sto64, e);
+ case Ity_I32: return unop(Iop_32Sto64, e);
+ case Ity_I64: return e;
+ default: vassert(0);
+ }
+}
+
+static Bool dis_vmsgtu_vx(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ UInt rs1 = INSN(19, 15);
+ UInt vd = INSN(11, 7);
+
+ UInt vma = SLICE_UInt(guest->guest_vtype, 7, 7);
+ UInt sew_b = get_sew(guest) / 8;
+ IRType ty = integerIRTypeOfSize(sew_b);
+
+ UInt offset = 0;
+ for (UInt o = 0; o < guest->guest_vl; o += 64) {
+ // generate res w/o mask
+ UInt remain = guest->guest_vl - o;
+ UInt step = (remain > 64) ? 64 : remain;
+ IRExpr* res = mkU64(0);
+ for (UInt i = 0; i < step; ++i) {
+ IRExpr* bit = binop(Iop_Shl64,
+ unop(Iop_1Uto64,
+ binop(Iop_CmpLT64U,
+ getIReg64(rs1),
+ widen_Uto64(getVReg(vs2, offset, ty), ty))),
+ mkU8(i));
+
+ res = binop(Iop_Or64, res, bit);
+ offset += sew_b;
+ }
+
+ // modify res according to mask
+ UInt mask_offset = o / 8;
+ if (vm == 0) {
+ IRExpr* v0_step = getVReg(0, mask_offset, Ity_I64);
+
+ IRExpr* inactive;
+ if (vma == 0) { // undisturbed, read it first
+ IRExpr* vd_step = getVReg(vd, mask_offset, Ity_I64);
+ inactive = binop(Iop_And64, unop(Iop_Not64, v0_step), vd_step);
+ } else { // agnostic, set to 1
+ inactive = binop(Iop_And64, unop(Iop_Not64, v0_step), mkU64(-1UL));
+ }
+ IRExpr* active = binop(Iop_And64, v0_step, res);
+ res = binop(Iop_Or64, active, inactive);
+ }
+ putVReg(irsb, vd, mask_offset, res);
+ }
+
+ putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_TooManyIR;
+
+ return True;
+}
+
+static Bool dis_vmseq_vi(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ ULong imm = sext_slice_ulong(insn, 19, 15);
+ UInt vd = INSN(11, 7);
+
+ UInt vma = SLICE_UInt(guest->guest_vtype, 7, 7);
+ UInt sew_b = get_sew(guest) / 8;
+ IRType ty = integerIRTypeOfSize(sew_b);
+
+ UInt offset = 0;
+ for (UInt o = 0; o < guest->guest_vl; o += 64) {
+ // generate res w/o mask
+ UInt remain = guest->guest_vl - o;
+ UInt step = (remain > 64) ? 64 : remain;
+ IRExpr* res = mkU64(0);
+ for (UInt i = 0; i < step; ++i) {
+ IRExpr* bit = binop(Iop_Shl64,
+ unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,
+ mkU64(imm),
+ widen_Sto64(getVReg(vs2, offset, ty), ty))),
+ mkU8(i));
+
+ res = binop(Iop_Or64, res, bit);
+ offset += sew_b;
+ }
+
+ // modify res according to mask
+ UInt mask_offset = o / 8;
+ if (vm == 0) {
+ IRExpr* v0_step = getVReg(0, mask_offset, Ity_I64);
+
+ IRExpr* inactive;
+ if (vma == 0) { // undisturbed, read it first
+ IRExpr* vd_step = getVReg(vd, mask_offset, Ity_I64);
+ inactive = binop(Iop_And64, unop(Iop_Not64, v0_step), vd_step);
+ } else { // agnostic, set to 1
+ inactive = binop(Iop_And64, unop(Iop_Not64, v0_step), mkU64(-1UL));
+ }
+ IRExpr* active = binop(Iop_And64, v0_step, res);
+ res = binop(Iop_Or64, active, inactive);
+ }
+
+ putVReg(irsb, vd, mask_offset, res);
+ }
+
+ putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_TooManyIR;
+
+ return True;
+}
+
+static Bool dis_vmsne_vv(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ UInt vs1 = INSN(19, 15);
+ UInt vd = INSN(11, 7);
+
+ UInt vma = SLICE_UInt(guest->guest_vtype, 7, 7);
+ UInt sew_b = get_sew(guest) / 8;
+ IRType ty = integerIRTypeOfSize(sew_b);
+
+ UInt offset = 0;
+ for (UInt o = 0; o < guest->guest_vl; o += 64) {
+ // generate res w/o mask
+ UInt remain = guest->guest_vl - o;
+ UInt step = (remain > 64) ? 64 : remain;
+ IRExpr* res = mkU64(0);
+ for (UInt i = 0; i < step; ++i) {
+ IRExpr* bit = binop(Iop_Shl64,
+ unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,
+ widen_Sto64(getVReg(vs1, offset, ty), ty),
+ widen_Sto64(getVReg(vs2, offset, ty), ty))),
+ mkU8(i));
+
+ res = binop(Iop_Or64, res, bit);
+ offset += sew_b;
+ }
+
+ // modify res according to mask
+ UInt mask_offset = o / 8;
+ if (vm == 0) {
+ IRExpr* v0_step = getVReg(0, mask_offset, Ity_I64);
+
+ IRExpr* inactive;
+ if (vma == 0) { // undisturbed, read it first
+ IRExpr* vd_step = getVReg(vd, mask_offset, Ity_I64);
+ inactive = binop(Iop_And64, unop(Iop_Not64, v0_step), vd_step);
+ } else { // agnostic, set to 1
+ inactive = binop(Iop_And64, unop(Iop_Not64, v0_step), mkU64(-1UL));
+ }
+ IRExpr* active = binop(Iop_And64, v0_step, res);
+ res = binop(Iop_Or64, active, inactive);
+ }
+
+ putVReg(irsb, vd, mask_offset, res);
+ }
+
+ putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_TooManyIR;
+
+ return True;
+}
+
+static Bool dis_vmsbf_m(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ UInt vd = INSN(11, 7);
+
+ vassert(vm == 1); // mask not supported yet
+
+ IRExpr* not_found = mkU64(-1UL);
+ IRExpr* prev = not_found;
+ for (UInt i = 0; i < guest->guest_vl; i += 64) {
+ UInt mask_offset = i / 8;
+ // x = n - (n & n - 1) with only the rightmost set bit
+ // y = (x - 1) for vmsbf
+ IRExpr* n = getVReg(vs2, mask_offset, Ity_I64);
+ IRExpr* x = binop(Iop_Sub64,
+ n,
+ binop(Iop_And64,
+ n,
+ binop(Iop_Sub64,
+ n,
+ mkU64(1))));
+ IRExpr* y = binop(Iop_Sub64,
+ x,
+ mkU64(1));
+
+ IRExpr* cond = binop(Iop_CmpEQ64, prev, not_found);
+ IRExpr* res = IRExpr_ITE(cond, y, mkU64(0));
+
+ putVReg(irsb, vd, mask_offset, res);
+ prev = res;
+ }
+
+ return True;
+}
+
+static Bool dis_vmsif_m(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ UInt vd = INSN(11, 7);
+
+ vassert(vm == 1); // mask not supported yet
+
+ IRExpr* not_found = mkU64(-1UL);
+ IRExpr* prev = not_found;
+ for (UInt i = 0; i < guest->guest_vl; i += 64) {
+ UInt mask_offset = i / 8;
+ // x = n - (n & n - 1) with only the rightmost set bit
+ // y = x + (x - 1) for vmsif
+ IRExpr* n = getVReg(vs2, mask_offset, Ity_I64);
+ IRExpr* x = binop(Iop_Sub64,
+ n,
+ binop(Iop_And64,
+ n,
+ binop(Iop_Sub64,
+ n,
+ mkU64(1))));
+ IRExpr* y = binop(Iop_Add64,
+ x,
+ binop(Iop_Sub64,
+ x,
+ mkU64(1)));
+
+ IRExpr* cond = binop(Iop_CmpEQ64, prev, not_found);
+ IRExpr* res = IRExpr_ITE(cond, y, mkU64(0));
+
+ putVReg(irsb, vd, mask_offset, res);
+ prev = res;
+ }
+
+ return True;
+}
+
+static Bool dis_vmor_mm(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vs2 = INSN(24, 20);
+ UInt vs1 = INSN(19, 15);
+ UInt vd = INSN(11, 7);
+
+ for (UInt i = 0; i < guest->guest_vl; i += 64) {
+ UInt mask_offset = i / 8;
+ IRExpr* mask = binop(Iop_Or64,
+ getVReg(vs1, mask_offset, Ity_I64),
+ getVReg(vs2, mask_offset, Ity_I64));
+ putVReg(irsb, vd, mask_offset, mask);
+ }
+
+ return True;
+}
+
static ULong riscv_vfirst(VexGuestRISCV64State* guest, UInt vs2, UInt vm)
{
ULong index = -1UL;
@@ -3600,6 +3914,77 @@ static Bool dis_vfirst_m(/*MB_OUT*/ DisResult* dres,
return True;
}
+static Bool dis_vid_v(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vs2 = INSN(24, 20);
+ UInt vd = INSN(11, 7);
+
+ vassert(vs2 == 0);
+
+ UInt sew_b = get_sew(guest) / 8;
+ IRType ty = integerIRTypeOfSize(sew_b);
+
+ UInt offset = 0;
+ for (UInt i = 0; i < guest->guest_vl; ++i) {
+ putVReg(irsb, vd, offset, mkU(ty, i));
+ offset += sew_b;
+ }
+
+ return True;
+}
+
+static Bool dis_vmv_vi(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vs2 = INSN(24, 20);
+ ULong imm = sext_slice_ulong(insn, 19, 15);
+ UInt vd = INSN(11, 7);
+
+ vassert(vs2 == 0);
+
+ UInt sew_b = get_sew(guest) / 8;
+ IRExpr* e_imm;
+ switch (sew_b) {
+ case 1: e_imm = mkU8((UChar)imm); break;
+ case 2: e_imm = mkU16((UShort)imm); break;
+ case 4: e_imm = mkU32((UInt)imm); break;
+ case 8: e_imm = mkU64(imm); break;
+ default: vassert(0);
+ }
+
+ UInt offset = 0;
+ for (UInt i = 0; i < guest->guest_vl; ++i) {
+ putVReg(irsb, vd, offset, e_imm);
+ offset += sew_b;
+ }
+
+ return True;
+}
+
+static Bool dis_opivv(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt funct6 = INSN(31, 26);
+
+ switch (funct6) {
+ case 0b011001:
+ return dis_vmsne_vv(dres, irsb, insn, guest_pc_curr_instr, guest);
+ default:
+ return False;
+ }
+ return False;
+}
+
static Bool dis_opmvv(/*MB_OUT*/ DisResult* dres,
/*OUT*/ IRSB* irsb,
UInt insn,
@@ -3609,6 +3994,8 @@ static Bool dis_opmvv(/*MB_OUT*/ DisResult* dres,
UInt funct6 = INSN(31, 26);
switch (funct6) {
+ case 0b011010:
+ return dis_vmor_mm(dres, irsb, insn, guest_pc_curr_instr, guest);
case 0b010000:
switch (INSN(19, 15)) {
case 0b10001:
@@ -3617,12 +4004,71 @@ static Bool dis_opmvv(/*MB_OUT*/ DisResult* dres,
return False;
}
return False;
+ case 0b010100:
+ switch (INSN(19, 15)) {
+ case 0b00001:
+ return dis_vmsbf_m(dres, irsb, insn, guest_pc_curr_instr, guest);
+ case 0b00011:
+ return dis_vmsif_m(dres, irsb, insn, guest_pc_curr_instr, guest);
+ case 0b10001:
+ return dis_vid_v(dres, irsb, insn, guest_pc_curr_instr, guest);
+ default:
+ return False;
+ }
+ return False;
default:
return False;
}
return False;
}
+static Bool dis_opivi(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt funct6 = INSN(31, 26);
+ UInt vm = INSN(25, 25);
+
+ switch (funct6) {
+ case 0b011000:
+ return dis_vmseq_vi(dres, irsb, insn, guest_pc_curr_instr, guest);
+ case 0b010111:
+ if (vm == 1) {
+ return dis_vmv_vi(dres, irsb, insn, guest_pc_curr_instr, guest);
+ }
+ return False;
+ default:
+ return False;
+ }
+}
+
+static Bool dis_opivx(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt funct6 = INSN(31, 26);
+
+ switch (funct6) {
+ case 0b011110:
+ return dis_vmsgtu_vx(dres, irsb, insn, guest_pc_curr_instr, guest);
+ default:
+ return False;
+ }
+}
+
+static Bool dis_opmvx(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ return False;
+}
+
static UInt decode_eew(UInt raw_eew)
{
switch (raw_eew) {
@@ -3728,8 +4174,16 @@ static Bool dis_RV64V(/*MB_OUT*/ DisResult* dres,
switch (INSN(6, 0)) {
case 0b1010111:
switch (INSN(14, 12)) {
+ case 0b000: // OPIVV
+ return dis_opivv(dres, irsb, insn, guest_pc_curr_instr, guest);
case 0b010: // OPMVV
return dis_opmvv(dres, irsb, insn, guest_pc_curr_instr, guest);
+ case 0b011: // OPIVI
+ return dis_opivi(dres, irsb, insn, guest_pc_curr_instr, guest);
+ case 0b100: // OPIVX
+ return dis_opivx(dres, irsb, insn, guest_pc_curr_instr, guest);
+ case 0b110: // OPMVX
+ return dis_opmvx(dres, irsb, insn, guest_pc_curr_instr, guest);
case 0b111: // vsetvl
return dis_vsetvl(dres, irsb, insn, guest_pc_curr_instr);
default:
diff --git a/VEX/priv/host_riscv64_isel.c b/VEX/priv/host_riscv64_isel.c
index 127200d8e..06e08ca8d 100644
--- a/VEX/priv/host_riscv64_isel.c
+++ b/VEX/priv/host_riscv64_isel.c
@@ -634,6 +634,8 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
case Iop_Xor32:
case Iop_Or64:
case Iop_Or32:
+ case Iop_Or16:
+ case Iop_Or8:
case Iop_Or1:
case Iop_And64:
case Iop_And32:
@@ -670,6 +672,8 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
break;
case Iop_Or64:
case Iop_Or32:
+ case Iop_Or16:
+ case Iop_Or8:
case Iop_Or1:
op = RISCV64op_OR;
break;
@@ -982,11 +986,12 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, src, 1));
return dst;
}
+ case Iop_1Uto32:
case Iop_8Uto32:
case Iop_8Uto64:
case Iop_16Uto64:
case Iop_32Uto64: {
- UInt shift =
+ UInt shift = (e->Iex.Unop.op == Iop_1Uto32) ? 63 :
64 - 8 * sizeofIRType(typeOfIRExpr(env->type_env, e->Iex.Unop.arg));
HReg tmp = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
@@ -995,6 +1000,8 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRLI, dst, tmp, shift));
return dst;
}
+ case Iop_1Sto8:
+ case Iop_1Sto16:
case Iop_1Sto32:
case Iop_1Sto64: {
HReg tmp = newVRegI(env);
@@ -1010,12 +1017,14 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
case Iop_32Sto64:
/* These are no-ops. */
return iselIntExpr_R(env, e->Iex.Unop.arg);
+ case Iop_32to1:
case Iop_32to8:
case Iop_32to16:
case Iop_64to8:
case Iop_64to16:
case Iop_64to32: {
- UInt shift = 64 - 8 * sizeofIRType(ty);
+ UInt shift = (e->Iex.Unop.op == Iop_32to1) ? 63 :
+ 64 - 8 * sizeofIRType(ty);
HReg tmp = newVRegI(env);
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, shift));
@@ -1047,6 +1056,7 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
return dst;
}
case Iop_CmpNEZ8:
+ case Iop_CmpNEZ16:
case Iop_CmpNEZ32:
case Iop_CmpNEZ64: {
HReg dst = newVRegI(env);
@@ -1166,6 +1176,10 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
vassert(ty == Ity_I8);
u = vex_sx_to_64(e->Iex.Const.con->Ico.U8, 8);
break;
+ case Ico_U1:
+ vassert(ty == Ity_I1);
+ u = vex_sx_to_64(e->Iex.Const.con->Ico.U1, 1);
+ break;
default:
goto irreducible;
}
@@ -1176,7 +1190,7 @@ static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
/* ---------------------- MULTIPLEX ---------------------- */
case Iex_ITE: {
/* ITE(ccexpr, iftrue, iffalse) */
- if (ty == Ity_I64 || ty == Ity_I32) {
+ if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
HReg dst = newVRegI(env);
HReg iftrue = iselIntExpr_R(env, e->Iex.ITE.iftrue);
HReg iffalse = iselIntExpr_R(env, e->Iex.ITE.iffalse);
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:55
|
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 91 +++++++++++++++++++++++++++++++++++
1 file changed, 91 insertions(+)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index ccad384d4..3ef0aeb77 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -3534,6 +3534,95 @@ static Bool dis_vsetvl(/*MB_OUT*/ DisResult* dres,
return True;
}
+static ULong riscv_vfirst(VexGuestRISCV64State* guest, UInt vs2, UInt vm)
+{
+ ULong index = -1UL;
+ ULong* p0 = (ULong *)((char *)guest + OFFB_V0);
+ ULong* p = (ULong *)((char *)guest + OFFB_V0 + vs2 * sizeof(guest->guest_v0));
+
+ for (UInt o = 0; o < guest->guest_vl && index == -1; o += 64) {
+ UInt remain = guest->guest_vl - o;
+ UInt step = (remain > 64) ? 64 : remain;
+
+ ULong v = *p++;
+ ULong v0 = (vm == 1) ? -1UL : *p0++;
+ v &= v0;
+ for (ULong i = 0; i < step; ++i) {
+ if (v & (1UL << i)) {
+ index = i + o;
+ break;
+ }
+ }
+ }
+
+ return index;
+}
+
+// From Hacker's Delight
+static UInt round_down_to_pow2(UInt x)
+{
+ x = x | (x >> 1);
+ x = x | (x >> 2);
+ x = x | (x >> 4);
+ x = x | (x >> 8);
+ x = x | (x >> 16);
+ return x - (x >> 1);
+}
+
+static Bool dis_vfirst_m(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vm = INSN(25, 25);
+ UInt vs2 = INSN(24, 20);
+ UInt rd = INSN(11, 7);
+
+ // lack ctz (count trailing zeros) like instruction in the backend, so use
+ // helper function
+ IRTemp index = newTemp(irsb, Ity_I64);
+ IRDirty *d = unsafeIRDirty_1_N(index,
+ 0,
+ "riscv_vfirst",
+ &riscv_vfirst,
+ mkIRExprVec_3(IRExpr_GSPTR(), mkU32(vs2), mkU32(vm)));
+ d->nFxState = 1;
+ vex_bzero(&d->fxState, sizeof(d->fxState));
+ d->fxState[0].fx = Ifx_Read;
+ d->fxState[0].offset = offsetVReg(vs2);
+ // do_shadow_Dirty doesn't accept non-power-2 size yet
+ d->fxState[0].size = round_down_to_pow2(guest->guest_vl / 8);
+
+ stmt(irsb, IRStmt_Dirty(d));
+ putIReg64(irsb, rd, mkexpr(index));
+
+ return True;
+}
+
+static Bool dis_opmvv(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt funct6 = INSN(31, 26);
+
+ switch (funct6) {
+ case 0b010000:
+ switch (INSN(19, 15)) {
+ case 0b10001:
+ return dis_vfirst_m(dres, irsb, insn, guest_pc_curr_instr, guest);
+ default:
+ return False;
+ }
+ return False;
+ default:
+ return False;
+ }
+ return False;
+}
+
static UInt decode_eew(UInt raw_eew)
{
switch (raw_eew) {
@@ -3639,6 +3728,8 @@ static Bool dis_RV64V(/*MB_OUT*/ DisResult* dres,
switch (INSN(6, 0)) {
case 0b1010111:
switch (INSN(14, 12)) {
+ case 0b010: // OPMVV
+ return dis_opmvv(dres, irsb, insn, guest_pc_curr_instr, guest);
case 0b111: // vsetvl
return dis_vsetvl(dres, irsb, insn, guest_pc_curr_instr);
default:
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:53
|
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index 30644e171..ccad384d4 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -826,6 +826,8 @@ static const HChar* nameCSR(UInt csr)
return "frm";
case 0x003:
return "fcsr";
+ case 0xc20:
+ return "vl";
default:
vpanic("nameCSR(riscv64)");
}
@@ -3376,7 +3378,7 @@ static Bool dis_RV64Zicsr(/*MB_OUT*/ DisResult* dres,
UInt rd = INSN(11, 7);
UInt rs1 = INSN(19, 15);
UInt csr = INSN(31, 20);
- if (csr != 0x001 && csr != 0x002 && csr != 0x003) {
+ if (csr != 0x001 && csr != 0x002 && csr != 0x003 && csr != 0xc20) {
/* Invalid CSRRS, fall through. */
} else {
switch (csr) {
@@ -3419,6 +3421,15 @@ static Bool dis_RV64Zicsr(/*MB_OUT*/ DisResult* dres,
binop(Iop_And32, getIReg32(rs1), mkU32(0xff))));
break;
}
+ case 0xc20: {
+ /* vl */
+ IRTemp vl = newTemp(irsb, Ity_I64);
+ assign(irsb, vl, IRExpr_Get(OFFB_VL, Ity_I64));
+ if (rd != 0)
+ putIReg64(irsb, rd, mkexpr(vl));
+ vassert(rs1 == 0);
+ break;
+ }
default:
vassert(0);
}
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:51
|
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 110 ++++++++++++++++++++++++++++++++++
1 file changed, 110 insertions(+)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index 6407692f9..30644e171 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -123,6 +123,8 @@ static IRExpr* mkU64(ULong i) { return IRExpr_Const(IRConst_U64(i)); }
/* Create an expression to produce a 32-bit constant. */
static IRExpr* mkU32(UInt i) { return IRExpr_Const(IRConst_U32(i)); }
+static IRExpr* mkU16(UInt i) { return IRExpr_Const(IRConst_U16((UShort)i)); }
+
/* Create an expression to produce an 8-bit constant. */
static IRExpr* mkU8(UInt i)
{
@@ -130,6 +132,17 @@ static IRExpr* mkU8(UInt i)
return IRExpr_Const(IRConst_U8((UChar)i));
}
+static IRExpr* mkU(IRType ty, ULong i)
+{
+ switch (ty) {
+ case Ity_I8: return mkU8((UChar)i);
+ case Ity_I16: return mkU16((UShort)i);
+ case Ity_I32: return mkU32((UInt)i);
+ case Ity_I64: return mkU64(i);
+ default: vassert(0);
+ }
+}
+
/* Create an expression to read a temporary. */
static IRExpr* mkexpr(IRTemp tmp) { return IRExpr_RdTmp(tmp); }
@@ -3510,6 +3523,98 @@ static Bool dis_vsetvl(/*MB_OUT*/ DisResult* dres,
return True;
}
+static UInt decode_eew(UInt raw_eew)
+{
+ switch (raw_eew) {
+ case 0b000: return 8;
+ case 0b101: return 16;
+ case 0b110: return 32;
+ case 0b111: return 64;
+ default: vassert(0);
+ }
+}
+
+static Bool dis_ldst(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ VexGuestRISCV64State* guest)
+{
+ UInt vd = INSN(11, 7);
+ UInt width = INSN(14, 12);
+ UInt rs1 = INSN(19, 15);
+ UInt umop = INSN(24, 20);
+ UInt vm = INSN(25, 25);
+ UInt mew_mop = INSN(28, 26);
+
+ // TODO: only part of all ld/st instructions are handled
+ if (!(mew_mop == 0b000 &&
+ (umop == 0b00000 || umop == 0b10000))) { // ignore fault-only-first
+ return False;
+ }
+
+ Bool is_load = INSN(6, 0) == 0b0000111;
+
+ DIP("%s - vl: %llu, insn: %x, vtype: %llx, vreg: %s\n",
+ is_load ? "vload" : "vstore",
+ guest->guest_vl, insn, guest->guest_vtype, nameVReg(vd));
+
+ UInt eew_b = decode_eew(width) / 8;
+ IRType ty = integerIRTypeOfSize(eew_b);
+ UInt offset = 0;
+ if (vm == 1) { // disabled
+ // It's possible to use larger ty them elem size
+ for (UInt i = 0; i < guest->guest_vl; ++i) {
+ IRExpr* addr = binop(Iop_Add64, getIReg64(rs1), mkU64(offset));
+
+ if (is_load) {
+ putVReg(irsb, vd, offset, loadLE(ty, addr));
+ } else {
+ storeLE(irsb, addr, getVReg(vd, offset, ty));
+ }
+
+ offset += eew_b;
+ }
+ } else { // enabled
+ for (UInt i = 0; i < guest->guest_vl; ++i) {
+ IRExpr* addr = binop(Iop_Add64, getIReg64(rs1), mkU64(offset));
+ UInt mask_offset = i / 64 * 8;
+ IRExpr* guard = binop(Iop_CmpNE64,
+ mkU64(0),
+ binop(Iop_And64,
+ getVReg(0 /* v0 */, mask_offset, Ity_I64),
+ mkU64(1UL << (i % 64))));
+
+ if (is_load) {
+ IRLoadGOp no_cvt = ILGop_INVALID;
+ switch (ty) {
+ case Ity_I8: no_cvt = ILGop_Ident8; break;
+ case Ity_I16: no_cvt = ILGop_Ident16; break;
+ case Ity_I32: no_cvt = ILGop_Ident32; break;
+ case Ity_I64: no_cvt = ILGop_Ident64; break;
+ default: vassert(0);
+ }
+
+ UInt vma = SLICE_UInt(guest->guest_vtype, 7, 7);
+ IRExpr* alt = (vma == 0) ? getVReg(vd, offset, ty) : mkU(ty, -1UL);
+ IRTemp res = newTemp(irsb, ty);
+ stmt(irsb, IRStmt_LoadG(Iend_LE, no_cvt, res, addr, alt, guard));
+ putVReg(irsb, vd, offset, mkexpr(res));
+ } else {
+ stmt(irsb, IRStmt_StoreG(Iend_LE, addr, getVReg(vd, offset, ty), guard));
+ }
+
+ offset += eew_b;
+ }
+ }
+
+ putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_TooManyIR;
+
+ return True;
+}
+
static Bool dis_RV64V(/*MB_OUT*/ DisResult* dres,
/*OUT*/ IRSB* irsb,
UInt insn,
@@ -3517,6 +3622,8 @@ static Bool dis_RV64V(/*MB_OUT*/ DisResult* dres,
const VexAbiInfo* abiinfo)
{
+ VexGuestRISCV64State* guest = abiinfo->riscv64_guest_state;
+
// spec - 10. Vector Arithmetic Instruction Formats
switch (INSN(6, 0)) {
case 0b1010111:
@@ -3526,6 +3633,9 @@ static Bool dis_RV64V(/*MB_OUT*/ DisResult* dres,
default:
return False;
}
+ case 0b0000111: // load
+ case 0b0100111: // store
+ return dis_ldst(dres, irsb, insn, guest_pc_curr_instr, guest);
default:
return False;
}
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:49
|
There is no conditional move instruction on riscv so far, support
LoadG/StoreG with branches in host isel.
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/host_riscv64_defs.c | 133 +++++++++++++++++++++++++++++++++++
VEX/priv/host_riscv64_defs.h | 23 ++++++
VEX/priv/host_riscv64_isel.c | 58 +++++++++++++++
VEX/priv/ir_defs.c | 6 ++
VEX/priv/ir_opt.c | 4 +-
VEX/pub/libvex_ir.h | 2 +
memcheck/mc_translate.c | 4 ++
7 files changed, 229 insertions(+), 1 deletion(-)
diff --git a/VEX/priv/host_riscv64_defs.c b/VEX/priv/host_riscv64_defs.c
index f6137b55b..457b2fde4 100644
--- a/VEX/priv/host_riscv64_defs.c
+++ b/VEX/priv/host_riscv64_defs.c
@@ -440,6 +440,20 @@ RISCV64Instr_Load(RISCV64LoadOp op, HReg dst, HReg base, Int soff12)
return i;
}
+RISCV64Instr*
+RISCV64Instr_LoadG(RISCV64LoadOp op, HReg dst, HReg base, Int soff12, HReg guard, HReg alt)
+{
+ RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+ i->tag = RISCV64in_LoadG;
+ i->RISCV64in.LoadG.op = op;
+ i->RISCV64in.LoadG.dst = dst;
+ i->RISCV64in.LoadG.base = base;
+ i->RISCV64in.LoadG.soff12 = soff12;
+ i->RISCV64in.LoadG.guard = guard;
+ i->RISCV64in.LoadG.alt = alt;
+ return i;
+}
+
RISCV64Instr*
RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12)
{
@@ -452,6 +466,19 @@ RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12)
return i;
}
+RISCV64Instr*
+RISCV64Instr_StoreG(RISCV64StoreOp op, HReg src, HReg base, Int soff12, HReg guard)
+{
+ RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+ i->tag = RISCV64in_StoreG;
+ i->RISCV64in.StoreG.op = op;
+ i->RISCV64in.StoreG.src = src;
+ i->RISCV64in.StoreG.base = base;
+ i->RISCV64in.StoreG.soff12 = soff12;
+ i->RISCV64in.StoreG.guard = guard;
+ return i;
+}
+
RISCV64Instr* RISCV64Instr_LoadR(RISCV64LoadROp op, HReg dst, HReg addr)
{
RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
@@ -703,6 +730,16 @@ void ppRISCV64Instr(const RISCV64Instr* i, Bool mode64)
ppHRegRISCV64(i->RISCV64in.Load.base);
vex_printf(")");
return;
+ case RISCV64in_LoadG:
+ vex_printf("%-7s ", showRISCV64LoadOp(i->RISCV64in.LoadG.op));
+ ppHRegRISCV64(i->RISCV64in.LoadG.dst);
+ vex_printf(", %d(", i->RISCV64in.LoadG.soff12);
+ ppHRegRISCV64(i->RISCV64in.LoadG.base);
+ vex_printf("), ");
+ ppHRegRISCV64(i->RISCV64in.LoadG.guard);
+ vex_printf(", ");
+ ppHRegRISCV64(i->RISCV64in.LoadG.alt);
+ return;
case RISCV64in_Store:
vex_printf("%-7s ", showRISCV64StoreOp(i->RISCV64in.Store.op));
ppHRegRISCV64(i->RISCV64in.Store.src);
@@ -710,6 +747,14 @@ void ppRISCV64Instr(const RISCV64Instr* i, Bool mode64)
ppHRegRISCV64(i->RISCV64in.Store.base);
vex_printf(")");
return;
+ case RISCV64in_StoreG:
+ vex_printf("%-7s ", showRISCV64StoreOp(i->RISCV64in.StoreG.op));
+ ppHRegRISCV64(i->RISCV64in.StoreG.src);
+ vex_printf(", %d(", i->RISCV64in.StoreG.soff12);
+ ppHRegRISCV64(i->RISCV64in.StoreG.base);
+ vex_printf("), ");
+ ppHRegRISCV64(i->RISCV64in.StoreG.guard);
+ return;
case RISCV64in_LoadR:
vex_printf("%-7s ", showRISCV64LoadROp(i->RISCV64in.LoadR.op));
ppHRegRISCV64(i->RISCV64in.LoadR.dst);
@@ -997,10 +1042,21 @@ void getRegUsage_RISCV64Instr(HRegUsage* u, const RISCV64Instr* i, Bool mode64)
addHRegUse(u, HRmWrite, i->RISCV64in.Load.dst);
addHRegUse(u, HRmRead, i->RISCV64in.Load.base);
return;
+ case RISCV64in_LoadG:
+ addHRegUse(u, HRmWrite, i->RISCV64in.LoadG.dst);
+ addHRegUse(u, HRmRead, i->RISCV64in.LoadG.base);
+ addHRegUse(u, HRmRead, i->RISCV64in.LoadG.guard);
+ addHRegUse(u, HRmRead, i->RISCV64in.LoadG.alt);
+ return;
case RISCV64in_Store:
addHRegUse(u, HRmRead, i->RISCV64in.Store.src);
addHRegUse(u, HRmRead, i->RISCV64in.Store.base);
return;
+ case RISCV64in_StoreG:
+ addHRegUse(u, HRmRead, i->RISCV64in.StoreG.src);
+ addHRegUse(u, HRmRead, i->RISCV64in.StoreG.base);
+ addHRegUse(u, HRmRead, i->RISCV64in.StoreG.guard);
+ return;
case RISCV64in_LoadR:
addHRegUse(u, HRmWrite, i->RISCV64in.LoadR.dst);
addHRegUse(u, HRmRead, i->RISCV64in.LoadR.addr);
@@ -1218,10 +1274,21 @@ void mapRegs_RISCV64Instr(HRegRemap* m, RISCV64Instr* i, Bool mode64)
mapReg(m, &i->RISCV64in.Load.dst);
mapReg(m, &i->RISCV64in.Load.base);
return;
+ case RISCV64in_LoadG:
+ mapReg(m, &i->RISCV64in.LoadG.dst);
+ mapReg(m, &i->RISCV64in.LoadG.base);
+ mapReg(m, &i->RISCV64in.LoadG.guard);
+ mapReg(m, &i->RISCV64in.LoadG.alt);
+ return;
case RISCV64in_Store:
mapReg(m, &i->RISCV64in.Store.src);
mapReg(m, &i->RISCV64in.Store.base);
return;
+ case RISCV64in_StoreG:
+ mapReg(m, &i->RISCV64in.StoreG.src);
+ mapReg(m, &i->RISCV64in.StoreG.base);
+ mapReg(m, &i->RISCV64in.StoreG.guard);
+ return;
case RISCV64in_LoadR:
mapReg(m, &i->RISCV64in.LoadR.dst);
mapReg(m, &i->RISCV64in.LoadR.addr);
@@ -1914,6 +1981,43 @@ Int emit_RISCV64Instr(/*MB_MOD*/ Bool* is_profInc,
}
break;
}
+ case RISCV64in_LoadG: {
+ /* beq guard, zero, 1f
+ * l<size> dst, soff12(base)
+ * c.j 2f
+ * 1: c.mv dst, alt
+ * 2:
+ */
+ UInt guard = iregEnc(i->RISCV64in.LoadG.guard);
+ p = emit_B(p, 0b1100011, (8 >> 1) & 0xfff, 0b000, guard, 0 /*x0/zero*/);
+
+ UInt dst = iregEnc(i->RISCV64in.LoadG.dst);
+ UInt base = iregEnc(i->RISCV64in.LoadG.base);
+ Int soff12 = i->RISCV64in.LoadG.soff12;
+ vassert(soff12 >= -2048 && soff12 < 2048);
+ UInt imm11_0 = soff12 & 0xfff;
+ switch (i->RISCV64in.LoadG.op) {
+ case RISCV64op_LD:
+ p = emit_I(p, 0b0000011, dst, 0b011, base, imm11_0);
+ goto done;
+ case RISCV64op_LW:
+ p = emit_I(p, 0b0000011, dst, 0b010, base, imm11_0);
+ goto done;
+ case RISCV64op_LH:
+ p = emit_I(p, 0b0000011, dst, 0b001, base, imm11_0);
+ goto done;
+ case RISCV64op_LB:
+ p = emit_I(p, 0b0000011, dst, 0b000, base, imm11_0);
+ goto done;
+ }
+
+ p = emit_CJ(p, 0b01, (4 >> 1) & 0x7ff, 0b101);
+
+ UInt alt = iregEnc(i->RISCV64in.LoadG.alt);
+ p = emit_CR(p, 0b10, alt, dst, 0b1000);
+
+ break;
+ }
case RISCV64in_Store: {
/* s<size> src, soff12(base) */
UInt src = iregEnc(i->RISCV64in.Store.src);
@@ -1937,6 +2041,35 @@ Int emit_RISCV64Instr(/*MB_MOD*/ Bool* is_profInc,
}
goto done;
}
+ case RISCV64in_StoreG: {
+ /* beq guard, zero, 1f
+ * s<size> src, soff12(base)
+ * 1:
+ */
+ UInt guard = iregEnc(i->RISCV64in.StoreG.guard);
+ p = emit_B(p, 0b1100011, (8 >> 1) & 0xfff, 0b000, guard, 0 /*x0/zero*/);
+
+ UInt src = iregEnc(i->RISCV64in.StoreG.src);
+ UInt base = iregEnc(i->RISCV64in.StoreG.base);
+ Int soff12 = i->RISCV64in.StoreG.soff12;
+ vassert(soff12 >= -2048 && soff12 < 2048);
+ UInt imm11_0 = soff12 & 0xfff;
+ switch (i->RISCV64in.StoreG.op) {
+ case RISCV64op_SD:
+ p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+ goto done;
+ case RISCV64op_SW:
+ p = emit_S(p, 0b0100011, imm11_0, 0b010, base, src);
+ goto done;
+ case RISCV64op_SH:
+ p = emit_S(p, 0b0100011, imm11_0, 0b001, base, src);
+ goto done;
+ case RISCV64op_SB:
+ p = emit_S(p, 0b0100011, imm11_0, 0b000, base, src);
+ goto done;
+ }
+ goto done;
+ }
case RISCV64in_LoadR: {
/* lr.<size> dst, (addr) */
UInt dst = iregEnc(i->RISCV64in.LoadR.dst);
diff --git a/VEX/priv/host_riscv64_defs.h b/VEX/priv/host_riscv64_defs.h
index 1990fe3f5..45fadeb6c 100644
--- a/VEX/priv/host_riscv64_defs.h
+++ b/VEX/priv/host_riscv64_defs.h
@@ -324,7 +324,9 @@ typedef enum {
RISCV64in_ALUImm, /* Computational binary instruction, with
an immediate as the second input. */
RISCV64in_Load, /* Load from memory (sign-extended). */
+ RISCV64in_LoadG, /* Load from memory (sign-extended) with guard. */
RISCV64in_Store, /* Store to memory. */
+ RISCV64in_StoreG, /* Store to memory with guard. */
RISCV64in_LoadR, /* Load-reserved from memory (sign-extended). */
RISCV64in_StoreC, /* Store-conditional to memory. */
RISCV64in_CSRRW, /* Atomic swap of values in a CSR and an integer
@@ -382,6 +384,15 @@ typedef struct {
HReg base;
Int soff12; /* -2048 .. +2047 */
} Load;
+ /* Load from memory (sign-extended) with guard. */
+ struct {
+ RISCV64LoadOp op;
+ HReg dst;
+ HReg base;
+ Int soff12; /* -2048 .. +2047 */
+ HReg guard;
+ HReg alt;
+ } LoadG;
/* Store to memory. */
struct {
RISCV64StoreOp op;
@@ -389,6 +400,14 @@ typedef struct {
HReg base;
Int soff12; /* -2048 .. +2047 */
} Store;
+ /* Store to memory with guard. */
+ struct {
+ RISCV64StoreOp op;
+ HReg src;
+ HReg base;
+ Int soff12; /* -2048 .. +2047 */
+ HReg guard;
+ } StoreG;
/* Load-reserved from memory (sign-extended). */
struct {
RISCV64LoadROp op;
@@ -536,7 +555,11 @@ RISCV64Instr_ALUImm(RISCV64ALUImmOp op, HReg dst, HReg src, Int imm12);
RISCV64Instr*
RISCV64Instr_Load(RISCV64LoadOp op, HReg dst, HReg base, Int soff12);
RISCV64Instr*
+RISCV64Instr_LoadG(RISCV64LoadOp op, HReg dst, HReg base, Int soff12, HReg guard, HReg alt);
+RISCV64Instr*
RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12);
+RISCV64Instr*
+RISCV64Instr_StoreG(RISCV64StoreOp op, HReg src, HReg base, Int soff12, HReg guard);
RISCV64Instr* RISCV64Instr_LoadR(RISCV64LoadROp op, HReg dst, HReg addr);
RISCV64Instr*
RISCV64Instr_StoreC(RISCV64StoreCOp op, HReg res, HReg src, HReg addr);
diff --git a/VEX/priv/host_riscv64_isel.c b/VEX/priv/host_riscv64_isel.c
index 87213fb86..355f559bd 100644
--- a/VEX/priv/host_riscv64_isel.c
+++ b/VEX/priv/host_riscv64_isel.c
@@ -1587,6 +1587,37 @@ static void iselStmt(ISelEnv* env, IRStmt* stmt)
}
switch (stmt->tag) {
+ /* ----------------------- LoadG ------------------------ */
+ case Ist_LoadG: {
+ IRLoadG* lg = stmt->Ist.LoadG.details;
+ if (lg->end != Iend_LE)
+ goto stmt_fail;
+
+ IRType tyd = typeOfIRExpr(env->type_env, lg->alt);
+ if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+ HReg dst = lookupIRTemp(env, lg->dst);
+ HReg addr = iselIntExpr_R(env, lg->addr);
+ HReg guard = iselIntExpr_R(env, lg->guard);
+ HReg alt = iselIntExpr_R(env, lg->alt);
+
+ vassert(lg->cvt == ILGop_Ident8 || lg->cvt == ILGop_Ident16 ||
+ lg->cvt == ILGop_Ident32 || lg->cvt == ILGop_Ident64);
+
+ if (tyd == Ity_I64)
+ addInstr(env, RISCV64Instr_LoadG(RISCV64op_LD, dst, addr, 0, guard, alt));
+ else if (tyd == Ity_I32)
+ addInstr(env, RISCV64Instr_LoadG(RISCV64op_LW, dst, addr, 0, guard, alt));
+ else if (tyd == Ity_I16)
+ addInstr(env, RISCV64Instr_LoadG(RISCV64op_LH, dst, addr, 0, guard, alt));
+ else if (tyd == Ity_I8)
+ addInstr(env, RISCV64Instr_LoadG(RISCV64op_LB, dst, addr, 0, guard, alt));
+ else
+ vassert(0);
+ return;
+ }
+ return;
+ }
+
/* ------------------------ STORE ------------------------ */
/* Little-endian write to memory. */
case Ist_Store: {
@@ -1623,6 +1654,33 @@ static void iselStmt(ISelEnv* env, IRStmt* stmt)
break;
}
+ /* ----------------------- StoreG ------------------------ */
+ case Ist_StoreG: {
+ IRStoreG* sg = stmt->Ist.StoreG.details;
+ if (sg->end != Iend_LE)
+ goto stmt_fail;
+
+ IRType tyd = typeOfIRExpr(env->type_env, sg->data);
+ if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+ HReg src = iselIntExpr_R(env, sg->data);
+ HReg addr = iselIntExpr_R(env, sg->addr);
+ HReg guard = iselIntExpr_R(env, sg->guard);
+
+ if (tyd == Ity_I64)
+ addInstr(env, RISCV64Instr_StoreG(RISCV64op_SD, src, addr, 0, guard));
+ else if (tyd == Ity_I32)
+ addInstr(env, RISCV64Instr_StoreG(RISCV64op_SW, src, addr, 0, guard));
+ else if (tyd == Ity_I16)
+ addInstr(env, RISCV64Instr_StoreG(RISCV64op_SH, src, addr, 0, guard));
+ else if (tyd == Ity_I8)
+ addInstr(env, RISCV64Instr_StoreG(RISCV64op_SB, src, addr, 0, guard));
+ else
+ vassert(0);
+ return;
+ }
+ return;
+ }
+
/* ------------------------- PUT ------------------------- */
/* Write guest state, fixed offset. */
case Ist_Put: {
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 875816c78..697e34313 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -2032,6 +2032,8 @@ void ppIRLoadGOp ( IRLoadGOp cvt )
case ILGop_IdentV128: vex_printf("IdentV128"); break;
case ILGop_Ident64: vex_printf("Ident64"); break;
case ILGop_Ident32: vex_printf("Ident32"); break;
+ case ILGop_Ident16: vex_printf("Ident16"); break;
+ case ILGop_Ident8: vex_printf("Ident8"); break;
case ILGop_16Uto32: vex_printf("16Uto32"); break;
case ILGop_16Sto32: vex_printf("16Sto32"); break;
case ILGop_8Uto32: vex_printf("8Uto32"); break;
@@ -4261,6 +4263,10 @@ void typeOfIRLoadGOp ( IRLoadGOp cvt,
*t_res = Ity_I64; *t_arg = Ity_I64; break;
case ILGop_Ident32:
*t_res = Ity_I32; *t_arg = Ity_I32; break;
+ case ILGop_Ident16:
+ *t_res = Ity_I16; *t_arg = Ity_I16; break;
+ case ILGop_Ident8:
+ *t_res = Ity_I8; *t_arg = Ity_I8; break;
case ILGop_16Uto32: case ILGop_16Sto32:
*t_res = Ity_I32; *t_arg = Ity_I16; break;
case ILGop_8Uto32: case ILGop_8Sto32:
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
index 93dd6188e..e790acb5b 100644
--- a/VEX/priv/ir_opt.c
+++ b/VEX/priv/ir_opt.c
@@ -2996,7 +2996,9 @@ static IRSB* cprop_BB_WRK ( IRSB* in, Bool mustRetainNoOps, Bool doFolding )
switch (lg->cvt) {
case ILGop_IdentV128:
case ILGop_Ident64:
- case ILGop_Ident32: break;
+ case ILGop_Ident32:
+ case ILGop_Ident16:
+ case ILGop_Ident8: break;
case ILGop_8Uto32: cvtOp = Iop_8Uto32; break;
case ILGop_8Sto32: cvtOp = Iop_8Sto32; break;
case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index b4b1e9d6e..c7b97c11d 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -2829,6 +2829,8 @@ typedef
ILGop_IdentV128, /* 128 bit vector, no conversion */
ILGop_Ident64, /* 64 bit, no conversion */
ILGop_Ident32, /* 32 bit, no conversion */
+ ILGop_Ident16, /* 16 bit, no conversion */
+ ILGop_Ident8, /* 8 bit, no conversion */
ILGop_16Uto32, /* 16 bit load, Z-widen to 32 */
ILGop_16Sto32, /* 16 bit load, S-widen to 32 */
ILGop_8Uto32, /* 8 bit load, Z-widen to 32 */
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 72ccb3c8c..b6c63aa05 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -6987,6 +6987,8 @@ static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
+ case ILGop_Ident16: loadedTy = Ity_I16; vwiden = Iop_INVALID; break;
+ case ILGop_Ident8: loadedTy = Ity_I8; vwiden = Iop_INVALID; break;
case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
@@ -7619,6 +7621,8 @@ static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
case ILGop_IdentV128: loadedTy = Ity_V128; break;
case ILGop_Ident64: loadedTy = Ity_I64; break;
case ILGop_Ident32: loadedTy = Ity_I32; break;
+ case ILGop_Ident16: loadedTy = Ity_I16; break;
+ case ILGop_Ident8: loadedTy = Ity_I8; break;
case ILGop_16Uto32: loadedTy = Ity_I16; break;
case ILGop_16Sto32: loadedTy = Ity_I16; break;
case ILGop_8Uto32: loadedTy = Ity_I8; break;
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:49
|
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 120 ++++++++++++++++++++++++++++++++++
1 file changed, 120 insertions(+)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index 423260679..6407692f9 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -58,6 +58,8 @@
#include "main_globals.h"
#include "main_util.h"
+#include "coregrind/pub_core_transtab.h"
+
/*------------------------------------------------------------*/
/*--- Debugging output ---*/
/*------------------------------------------------------------*/
@@ -3415,6 +3417,122 @@ static Bool dis_RV64Zicsr(/*MB_OUT*/ DisResult* dres,
return False;
}
+static inline Long sext_slice_ulong(ULong value, UInt bmax, UInt bmin)
+{
+ return ((Long)value) << (63 - bmax) >> (63 - (bmax - bmin));
+}
+
+#define MAX_VL (-1UL)
+#define KEEP_VL (-2UL)
+
+static ULong helper_vsetvl(VexGuestRISCV64State* guest, ULong avl, ULong vtype)
+{
+ UInt sew = SLICE_UInt(vtype, 5, 3);
+ Int lmul = sext_slice_ulong(vtype, 3, 0);
+
+ ULong vlmax = VLEN >> (sew + 3 - lmul);
+ ULong vl = guest->guest_vl;
+ if (avl != KEEP_VL)
+ vl = (avl < vlmax) ? avl : vlmax;
+
+ guest->guest_vl = vl;
+ guest->guest_vtype = vtype;
+
+ invalidateFastCache();
+
+ DIP("vsetvl - vl: %llu, sew: 0x%x, lmul: %d, avl: %llu, vtype: %llx\n",
+ vl, sew, lmul, avl, vtype);
+
+ return vl;
+}
+
+static Bool dis_vsetvl(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr)
+{
+ UInt rd = INSN(11, 7);
+ IRExpr* avl;
+ IRExpr* vtype;
+
+ if (INSN(31, 30) == 0b11) { // vsetivli
+ UInt uimm = INSN(19, 15);
+ Int zimm = INSN(29, 20);
+ avl = mkU64(uimm);
+ vtype = mkU64(zimm);
+ } else if (INSN(31, 31) == 0b0 || INSN(31, 25) == 0b1000000) {
+ UInt rs1 = INSN(19, 15);
+ if (rs1 != 0) {
+ avl = getIReg64(rs1);
+ } else if (rd == 0) {
+ avl = mkU64(KEEP_VL);
+ } else {
+ avl = mkU64(MAX_VL);
+ }
+
+ if (INSN(31, 31) == 0b0) { // vsetvli
+ Int zimm = INSN(30, 20);
+ vtype = mkU64(zimm);
+ } else { // vsetvl
+ UInt rs2 = INSN(24, 20);
+ vtype = getIReg64(rs2);
+ }
+ } else {
+ vassert(0);
+ }
+
+ IRTemp vl = newTemp(irsb, Ity_I64);
+ IRDirty *d = unsafeIRDirty_1_N(vl,
+ 0,
+ "helper_vsetvl",
+ &helper_vsetvl,
+ mkIRExprVec_3(IRExpr_GSPTR(), avl, vtype));
+
+ d->nFxState = 2;
+ vex_bzero(&d->fxState, sizeof(d->fxState));
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_VL;
+ d->fxState[0].size = sizeof(ULong);
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_VTYPE;
+ d->fxState[1].size = sizeof(ULong);
+
+ stmt(irsb, IRStmt_Dirty(d));
+
+ if (rd != 0) {
+ putIReg64(irsb, rd, mkexpr(vl));
+ }
+
+ putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_SyncupEnv;
+
+ return True;
+}
+
+static Bool dis_RV64V(/*MB_OUT*/ DisResult* dres,
+ /*OUT*/ IRSB* irsb,
+ UInt insn,
+ Addr guest_pc_curr_instr,
+ const VexAbiInfo* abiinfo)
+
+{
+ // spec - 10. Vector Arithmetic Instruction Formats
+ switch (INSN(6, 0)) {
+ case 0b1010111:
+ switch (INSN(14, 12)) {
+ case 0b111: // vsetvl
+ return dis_vsetvl(dres, irsb, insn, guest_pc_curr_instr);
+ default:
+ return False;
+ }
+ default:
+ return False;
+ }
+
+ return False;
+}
+
static Bool dis_RISCV64_standard(/*MB_OUT*/ DisResult* dres,
/*OUT*/ IRSB* irsb,
UInt insn,
@@ -3437,6 +3555,8 @@ static Bool dis_RISCV64_standard(/*MB_OUT*/ DisResult* dres,
ok = dis_RV64D(dres, irsb, insn);
if (!ok)
ok = dis_RV64Zicsr(dres, irsb, insn);
+ if (!ok)
+ ok = dis_RV64V(dres, irsb, insn, guest_pc_curr_instr, abiinfo);
if (ok)
return True;
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:49
|
disp_run_translations in dispatch-riscv64-linux.S shift guest_state by
2048, it needs to adjust accordingly on calling helper.
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/host_riscv64_isel.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/VEX/priv/host_riscv64_isel.c b/VEX/priv/host_riscv64_isel.c
index 355f559bd..127200d8e 100644
--- a/VEX/priv/host_riscv64_isel.c
+++ b/VEX/priv/host_riscv64_isel.c
@@ -425,8 +425,10 @@ static Bool doHelperCall(/*OUT*/ UInt* stackAdjustAfterCall,
} else if (arg->tag == Iex_GSPTR) {
if (nextArgReg >= RISCV64_N_ARGREGS)
return False; /* Out of argregs. */
+ /* See dispatch-riscv64-linux.S for -2048 */
addInstr(env,
- RISCV64Instr_MV(argregs[nextArgReg], hregRISCV64_x8()));
+ RISCV64Instr_ALUImm(RISCV64op_ADDI, argregs[nextArgReg],
+ hregRISCV64_x8(), -2048));
nextArgReg++;
} else if (arg->tag == Iex_VECRET) {
/* Because of the go_fast logic above, we can't get here, since
@@ -461,7 +463,10 @@ static Bool doHelperCall(/*OUT*/ UInt* stackAdjustAfterCall,
} else if (arg->tag == Iex_GSPTR) {
if (nextArgReg >= RISCV64_N_ARGREGS)
return False; /* Out of argregs. */
- tmpregs[nextArgReg] = hregRISCV64_x8();
+
+ addInstr(env,
+ RISCV64Instr_ALUImm(RISCV64op_ADDI, tmpregs[nextArgReg],
+ hregRISCV64_x8(), -2048));
nextArgReg++;
} else if (arg->tag == Iex_VECRET) {
vassert(!hregIsInvalid(r_vecRetAddr));
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:49
|
During translation to IR, the states such guest vl and vtype are
referenced directly, add them to cpu_state to differentiate the same
guest code with different cpu_state.
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/pub/libvex_guest_riscv64.h | 9 +++++++++
coregrind/m_scheduler/scheduler.c | 17 +++++++++++++----
coregrind/m_translate.c | 3 +++
coregrind/m_transtab.c | 26 ++++++++++++++++++++++++--
coregrind/pub_core_transtab.h | 5 +++++
5 files changed, 54 insertions(+), 6 deletions(-)
diff --git a/VEX/pub/libvex_guest_riscv64.h b/VEX/pub/libvex_guest_riscv64.h
index 50bec58bd..36149bbf2 100644
--- a/VEX/pub/libvex_guest_riscv64.h
+++ b/VEX/pub/libvex_guest_riscv64.h
@@ -177,6 +177,15 @@ typedef struct {
/* Initialise all guest riscv64 state. */
void LibVEX_GuestRISCV64_initialise(/*OUT*/ VexGuestRISCV64State* vex_state);
+static inline ULong get_cpu_state(VexGuestRISCV64State* guest)
+{
+#if defined(VGA_riscv64)
+ return guest->guest_vl | (guest->guest_vtype << 16);
+#else
+ return 0;
+#endif
+}
+
#endif /* ndef __LIBVEX_PUB_GUEST_RISCV64_H */
/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 4e18c80fa..6d9a721c0 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -948,6 +948,8 @@ void run_thread_for_a_while ( /*OUT*/HWord* two_words,
do_pre_run_checks( tst );
/* end Paranoia */
+ ULong cpu_state = get_cpu_state(&tst->arch.vex);
+
/* Futz with the XIndir stats counters. */
vg_assert(VG_(stats__n_xIndirs_32) == 0);
vg_assert(VG_(stats__n_xIndir_hits1_32) == 0);
@@ -977,6 +979,7 @@ void run_thread_for_a_while ( /*OUT*/HWord* two_words,
to the scheduler. */
Bool found = VG_(search_transtab)(&res, NULL, NULL,
(Addr)tst->arch.vex.VG_INSTR_PTR,
+ cpu_state,
True/*upd cache*/
);
if (LIKELY(found)) {
@@ -1133,16 +1136,19 @@ static void handle_tt_miss ( ThreadId tid )
Bool found;
Addr ip = VG_(get_IP)(tid);
+ volatile ThreadState* tst = VG_(get_ThreadState)(tid);
+ ULong cpu_state = get_cpu_state(&tst->arch.vex);
+
/* Trivial event. Miss in the fast-cache. Do a full
lookup for it. */
found = VG_(search_transtab)( NULL, NULL, NULL,
- ip, True/*upd_fast_cache*/ );
+ ip, cpu_state, True/*upd_fast_cache*/ );
if (UNLIKELY(!found)) {
/* Not found; we need to request a translation. */
if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
bbs_done, True/*allow redirection*/ )) {
found = VG_(search_transtab)( NULL, NULL, NULL,
- ip, True );
+ ip, cpu_state, True );
vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
} else {
@@ -1163,14 +1169,17 @@ void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
SECno to_sNo = INV_SNO;
TTEno to_tteNo = INV_TTE;
+ volatile ThreadState* tst = VG_(get_ThreadState)(tid);
+ ULong cpu_state = get_cpu_state(&tst->arch.vex);
+
found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
- ip, False/*dont_upd_fast_cache*/ );
+ ip, cpu_state, False/*dont_upd_fast_cache*/ );
if (!found) {
/* Not found; we need to request a translation. */
if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
bbs_done, True/*allow redirection*/ )) {
found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
- ip, False );
+ ip, cpu_state, False );
vg_assert2(found, "handle_chain_me: missing tt_fast entry");
} else {
// If VG_(translate)() fails, it's because it had to throw a
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index dc3c65814..cad9184b9 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -1510,6 +1510,7 @@ Bool VG_(translate) ( ThreadId tid,
VexTranslateArgs vta;
VexTranslateResult tres;
VgCallbackClosure closure;
+ ULong cpu_state = 0;
/* Make sure Vex is initialised right. */
@@ -1754,6 +1755,7 @@ Bool VG_(translate) ( ThreadId tid,
vex_abiinfo.guest__use_fallback_LLSC = True;
ThreadState *tst = VG_(get_ThreadState)(tid);
vex_abiinfo.riscv64_guest_state = &tst->arch.vex;
+ cpu_state = get_cpu_state(&tst->arch.vex);
# endif
/* Set up closure args. */
@@ -1868,6 +1870,7 @@ Bool VG_(translate) ( ThreadId tid,
// addr, which might have been changed by the redirection
VG_(add_to_transtab)( &vge,
nraddr,
+ cpu_state,
(Addr)(&tmpbuf[0]),
tmpbuf_used,
tres.n_sc_extents > 0,
diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c
index 102108a35..06019efa1 100644
--- a/coregrind/m_transtab.c
+++ b/coregrind/m_transtab.c
@@ -192,6 +192,9 @@ typedef
may not be a lie, depending on whether or not we're doing
redirection. */
Addr entry;
+#ifdef VGA_riscv64
+ ULong cpu_state;
+#endif
/* Address range summary info: these are pointers back to
eclass[] entries in the containing Sector. Those entries in
@@ -1461,7 +1464,7 @@ static inline HTTno HASH_TT ( Addr key )
}
/* Invalidate the fast cache VG_(tt_fast). */
-static void invalidateFastCache ( void )
+void invalidateFastCache ( void )
{
for (UWord j = 0; j < VG_TT_FAST_SETS; j++) {
FastCacheSet* set = &VG_(tt_fast)[j];
@@ -1734,6 +1737,7 @@ static void initialiseSector ( SECno sno )
*/
void VG_(add_to_transtab)( const VexGuestExtents* vge,
Addr entry,
+ ULong cpu_state,
Addr code,
UInt code_len,
Bool is_self_checking,
@@ -1845,6 +1849,9 @@ void VG_(add_to_transtab)( const VexGuestExtents* vge,
(code_len == 0 ? 1 : (code_len / 4));
sectors[y].ttC[tteix].entry = entry;
+#ifdef VGA_riscv64
+ sectors[y].ttC[tteix].cpu_state = cpu_state;
+#endif
TTEntryH__from_VexGuestExtents( §ors[y].ttH[tteix], vge );
sectors[y].ttH[tteix].status = InUse;
@@ -1905,6 +1912,14 @@ void VG_(add_to_transtab)( const VexGuestExtents* vge,
upd_eclasses_after_add( §ors[y], tteix );
}
+static inline Bool cpu_state_match(TTEntryC* ttC, ULong cpu_state)
+{
+#ifdef VGA_riscv64
+ return ttC->cpu_state == cpu_state;
+#else
+ return True;
+#endif
+}
/* Search for the translation of the given guest address. If
requested, a successful search can also cause the fast-caches to be
@@ -1914,6 +1929,7 @@ Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
/*OUT*/SECno* res_sNo,
/*OUT*/TTEno* res_tteNo,
Addr guest_addr,
+ ULong cpu_state,
Bool upd_cache )
{
SECno i, sno;
@@ -1940,7 +1956,9 @@ Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
n_lookup_probes++;
tti = sectors[sno].htt[k];
if (tti < N_TTES_PER_SECTOR
- && sectors[sno].ttC[tti].entry == guest_addr) {
+ && sectors[sno].ttC[tti].entry == guest_addr
+ && cpu_state_match(§ors[sno].ttC[tti], cpu_state)
+ ) {
/* found it */
if (upd_cache)
setFastCacheEntry(
@@ -2553,7 +2571,11 @@ void VG_(init_tt_tc) ( void )
have a lot of TTEntryCs so let's check that too. */
if (sizeof(HWord) == 8) {
vg_assert(sizeof(TTEntryH) <= 32);
+#ifdef VGA_riscv64
+ vg_assert(sizeof(TTEntryC) <= 120);
+#else
vg_assert(sizeof(TTEntryC) <= 112);
+#endif
}
else if (sizeof(HWord) == 4) {
vg_assert(sizeof(TTEntryH) <= 20);
diff --git a/coregrind/pub_core_transtab.h b/coregrind/pub_core_transtab.h
index cc70a2944..b352891cf 100644
--- a/coregrind/pub_core_transtab.h
+++ b/coregrind/pub_core_transtab.h
@@ -171,6 +171,7 @@ extern void VG_(init_tt_tc) ( void );
extern
void VG_(add_to_transtab)( const VexGuestExtents* vge,
Addr entry,
+ ULong cpu_state,
Addr code,
UInt code_len,
Bool is_self_checking,
@@ -194,6 +195,7 @@ extern Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
/*OUT*/SECno* res_sNo,
/*OUT*/TTEno* res_tteNo,
Addr guest_addr,
+ ULong cpu_state,
Bool upd_cache );
extern void VG_(discard_translations) ( Addr start, ULong range,
@@ -216,6 +218,9 @@ extern
Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
Addr guest_addr );
+extern
+void invalidateFastCache ( void );
+
// SB profiling stuff
typedef struct _SBProfEntry {
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:48
|
SyncupEnv is added for syncup the environment so that the following
instructions can get the update from the previous instructions, e.g.
vl set by vsetvl.
TooManyIR is added for the cases one guest instruction is translated to
many IRs, this stops TB including more guest instructions and it is used
to avoid too many IRs in one TB.
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/host_riscv64_isel.c | 2 ++
VEX/priv/ir_defs.c | 2 ++
VEX/pub/libvex_ir.h | 7 ++++++-
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/VEX/priv/host_riscv64_isel.c b/VEX/priv/host_riscv64_isel.c
index 76fc3fd5c..87213fb86 100644
--- a/VEX/priv/host_riscv64_isel.c
+++ b/VEX/priv/host_riscv64_isel.c
@@ -1942,6 +1942,8 @@ static void iselNext(ISelEnv* env, IRExpr* next, IRJumpKind jk, Int offsIP)
/* Case: call/return (==boring) transfer to any address. */
switch (jk) {
case Ijk_Boring:
+ case Ijk_SyncupEnv:
+ case Ijk_TooManyIR:
case Ijk_Ret:
case Ijk_Call: {
HReg r = iselIntExpr_R(env, next);
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 2d82c41a1..875816c78 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -2083,6 +2083,8 @@ void ppIRJumpKind ( IRJumpKind kind )
case Ijk_Sys_int145: vex_printf("Sys_int145"); break;
case Ijk_Sys_int210: vex_printf("Sys_int210"); break;
case Ijk_Sys_sysenter: vex_printf("Sys_sysenter"); break;
+ case Ijk_SyncupEnv: vex_printf("SyncupEnv"); break;
+ case Ijk_TooManyIR: vex_printf("TooManyIR"); break;
default: vpanic("ppIRJumpKind");
}
}
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index 8c47be090..b4b1e9d6e 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -2513,8 +2513,13 @@ typedef
Ijk_Sys_int130, /* amd64/x86 'int $0x82' */
Ijk_Sys_int145, /* amd64/x86 'int $0x91' */
Ijk_Sys_int210, /* amd64/x86 'int $0xD2' */
- Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes
+ Ijk_Sys_sysenter, /* x86 'sysenter'. guest_EIP becomes
invalid at the point this happens. */
+ Ijk_SyncupEnv, /* rvv syncup so that following instructions can read
+ the env set here */
+ Ijk_TooManyIR /* some rvv instructions generate too many IRs to
+ exhaust storage, break out early to reduce the
+ risk */
}
IRJumpKind;
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:46
|
Vector instruction needs this info in guest_riscv64_toIR.c
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/pub/libvex.h | 4 ++++
coregrind/m_translate.c | 2 ++
2 files changed, 6 insertions(+)
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
index 2c54a8d8f..7cabc36aa 100644
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -37,6 +37,7 @@
#include "libvex_basictypes.h"
#include "libvex_ir.h"
+#include "pub_tool_guest.h"
/*---------------------------------------------------------------*/
@@ -470,6 +471,9 @@ typedef
/* MIPS32/MIPS64 GUESTS only: emulated FPU mode. */
UInt guest_mips_fp_mode;
+
+ /* RISC-V vector needs guest state on translation */
+ VexGuestArchState* riscv64_guest_state;
}
VexAbiInfo;
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 75dca062d..dc3c65814 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -1752,6 +1752,8 @@ Bool VG_(translate) ( ThreadId tid,
# if defined(VGP_riscv64_linux)
vex_abiinfo.guest__use_fallback_LLSC = True;
+ ThreadState *tst = VG_(get_ThreadState)(tid);
+ vex_abiinfo.riscv64_guest_state = &tst->arch.vex;
# endif
/* Set up closure args. */
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:46
|
Add v0-v31, vl and vtype
Signed-off-by: Fei Wu <fe...@in...>
---
VEX/priv/guest_riscv64_toIR.c | 135 +++++++++++++++++++++++++++++++++
VEX/pub/libvex_guest_riscv64.h | 38 +++++++++-
memcheck/mc_machine.c | 35 +++++++++
3 files changed, 207 insertions(+), 1 deletion(-)
diff --git a/VEX/priv/guest_riscv64_toIR.c b/VEX/priv/guest_riscv64_toIR.c
index 93ea5a173..423260679 100644
--- a/VEX/priv/guest_riscv64_toIR.c
+++ b/VEX/priv/guest_riscv64_toIR.c
@@ -289,6 +289,42 @@ static IRExpr* narrowFrom64(IRType dstTy, IRExpr* e)
#define OFFB_LLSC_ADDR offsetof(VexGuestRISCV64State, guest_LLSC_ADDR)
#define OFFB_LLSC_DATA offsetof(VexGuestRISCV64State, guest_LLSC_DATA)
+#define OFFB_V0 offsetof(VexGuestRISCV64State, guest_v0)
+#define OFFB_V1 offsetof(VexGuestRISCV64State, guest_v1)
+#define OFFB_V2 offsetof(VexGuestRISCV64State, guest_v2)
+#define OFFB_V3 offsetof(VexGuestRISCV64State, guest_v3)
+#define OFFB_V4 offsetof(VexGuestRISCV64State, guest_v4)
+#define OFFB_V5 offsetof(VexGuestRISCV64State, guest_v5)
+#define OFFB_V6 offsetof(VexGuestRISCV64State, guest_v6)
+#define OFFB_V7 offsetof(VexGuestRISCV64State, guest_v7)
+#define OFFB_V8 offsetof(VexGuestRISCV64State, guest_v8)
+#define OFFB_V9 offsetof(VexGuestRISCV64State, guest_v9)
+#define OFFB_V10 offsetof(VexGuestRISCV64State, guest_v10)
+#define OFFB_V11 offsetof(VexGuestRISCV64State, guest_v11)
+#define OFFB_V12 offsetof(VexGuestRISCV64State, guest_v12)
+#define OFFB_V13 offsetof(VexGuestRISCV64State, guest_v13)
+#define OFFB_V14 offsetof(VexGuestRISCV64State, guest_v14)
+#define OFFB_V15 offsetof(VexGuestRISCV64State, guest_v15)
+#define OFFB_V16 offsetof(VexGuestRISCV64State, guest_v16)
+#define OFFB_V17 offsetof(VexGuestRISCV64State, guest_v17)
+#define OFFB_V18 offsetof(VexGuestRISCV64State, guest_v18)
+#define OFFB_V19 offsetof(VexGuestRISCV64State, guest_v19)
+#define OFFB_V20 offsetof(VexGuestRISCV64State, guest_v20)
+#define OFFB_V21 offsetof(VexGuestRISCV64State, guest_v21)
+#define OFFB_V22 offsetof(VexGuestRISCV64State, guest_v22)
+#define OFFB_V23 offsetof(VexGuestRISCV64State, guest_v23)
+#define OFFB_V24 offsetof(VexGuestRISCV64State, guest_v24)
+#define OFFB_V25 offsetof(VexGuestRISCV64State, guest_v25)
+#define OFFB_V26 offsetof(VexGuestRISCV64State, guest_v26)
+#define OFFB_V27 offsetof(VexGuestRISCV64State, guest_v27)
+#define OFFB_V28 offsetof(VexGuestRISCV64State, guest_v28)
+#define OFFB_V29 offsetof(VexGuestRISCV64State, guest_v29)
+#define OFFB_V30 offsetof(VexGuestRISCV64State, guest_v30)
+#define OFFB_V31 offsetof(VexGuestRISCV64State, guest_v31)
+
+#define OFFB_VL offsetof(VexGuestRISCV64State, guest_vl)
+#define OFFB_VTYPE offsetof(VexGuestRISCV64State, guest_vtype)
+
/*------------------------------------------------------------*/
/*--- Integer registers ---*/
/*------------------------------------------------------------*/
@@ -413,6 +449,105 @@ static void putPC(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
stmt(irsb, IRStmt_Put(OFFB_PC, e));
}
+/*------------------------------------------------------------*/
+/*--- Vector registers ---*/
+/*------------------------------------------------------------*/
+static Int offsetVReg(UInt vregNo)
+{
+ switch (vregNo) {
+ case 0:
+ return OFFB_V0;
+ case 1:
+ return OFFB_V1;
+ case 2:
+ return OFFB_V2;
+ case 3:
+ return OFFB_V3;
+ case 4:
+ return OFFB_V4;
+ case 5:
+ return OFFB_V5;
+ case 6:
+ return OFFB_V6;
+ case 7:
+ return OFFB_V7;
+ case 8:
+ return OFFB_V8;
+ case 9:
+ return OFFB_V9;
+ case 10:
+ return OFFB_V10;
+ case 11:
+ return OFFB_V11;
+ case 12:
+ return OFFB_V12;
+ case 13:
+ return OFFB_V13;
+ case 14:
+ return OFFB_V14;
+ case 15:
+ return OFFB_V15;
+ case 16:
+ return OFFB_V16;
+ case 17:
+ return OFFB_V17;
+ case 18:
+ return OFFB_V18;
+ case 19:
+ return OFFB_V19;
+ case 20:
+ return OFFB_V20;
+ case 21:
+ return OFFB_V21;
+ case 22:
+ return OFFB_V22;
+ case 23:
+ return OFFB_V23;
+ case 24:
+ return OFFB_V24;
+ case 25:
+ return OFFB_V25;
+ case 26:
+ return OFFB_V26;
+ case 27:
+ return OFFB_V27;
+ case 28:
+ return OFFB_V28;
+ case 29:
+ return OFFB_V29;
+ case 30:
+ return OFFB_V30;
+ case 31:
+ return OFFB_V31;
+ default:
+ vassert(0);
+ }
+}
+
+static const HChar* nameVReg(UInt iregNo)
+{
+ vassert(iregNo < 32);
+ static const HChar* names[32] = {
+ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"};
+ return names[iregNo];
+}
+
+static IRExpr* getVReg(UInt vregNo, UInt offset, IRType ty)
+{
+ vassert(vregNo < 32);
+ return IRExpr_Get(offsetVReg(vregNo) + offset, ty);
+}
+
+static void putVReg(/*OUT*/ IRSB* irsb, UInt vregNo, UInt offset, /*IN*/ IRExpr* e)
+{
+ vassert(vregNo < 32);
+ stmt(irsb, IRStmt_Put(offsetVReg(vregNo) + offset, e));
+}
+
+
/*------------------------------------------------------------*/
/*--- Floating-point registers ---*/
/*------------------------------------------------------------*/
diff --git a/VEX/pub/libvex_guest_riscv64.h b/VEX/pub/libvex_guest_riscv64.h
index 31264b124..50bec58bd 100644
--- a/VEX/pub/libvex_guest_riscv64.h
+++ b/VEX/pub/libvex_guest_riscv64.h
@@ -128,8 +128,44 @@ typedef struct {
/* 576 */ ULong guest_LLSC_ADDR; /* Address of the transaction. */
/* 584 */ ULong guest_LLSC_DATA; /* Original value at ADDR, sign-extended. */
+#define VLEN 128
+ /* 592 */ V128 guest_v0;
+ V128 guest_v1;
+ V128 guest_v2;
+ V128 guest_v3;
+ V128 guest_v4;
+ V128 guest_v5;
+ V128 guest_v6;
+ V128 guest_v7;
+ V128 guest_v8;
+ V128 guest_v9;
+ V128 guest_v10;
+ V128 guest_v11;
+ V128 guest_v12;
+ V128 guest_v13;
+ V128 guest_v14;
+ V128 guest_v15;
+ V128 guest_v16;
+ V128 guest_v17;
+ V128 guest_v18;
+ V128 guest_v19;
+ V128 guest_v20;
+ V128 guest_v21;
+ V128 guest_v22;
+ V128 guest_v23;
+ V128 guest_v24;
+ V128 guest_v25;
+ V128 guest_v26;
+ V128 guest_v27;
+ V128 guest_v28;
+ V128 guest_v29;
+ V128 guest_v30;
+ V128 guest_v31;
+
+ /* 1104 */ ULong guest_vl;
+ /* 1112 */ ULong guest_vtype;
+
/* Padding to 16 bytes. */
- /* 592 */
} VexGuestRISCV64State;
/*------------------------------------------------------------*/
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
index 34df0011a..acda0bd95 100644
--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
@@ -1489,6 +1489,41 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
if (o == GOF(LLSC_ADDR) && sz == 8) return o;
if (o == GOF(LLSC_DATA) && sz == 8) return o;
+ if (o >= GOF(v0) && o+sz <= GOF(v0) +SZB(v0)) return GOF(v0);
+ if (o >= GOF(v1) && o+sz <= GOF(v1) +SZB(v1)) return GOF(v1);
+ if (o >= GOF(v2) && o+sz <= GOF(v2) +SZB(v2)) return GOF(v2);
+ if (o >= GOF(v3) && o+sz <= GOF(v3) +SZB(v3)) return GOF(v3);
+ if (o >= GOF(v4) && o+sz <= GOF(v4) +SZB(v4)) return GOF(v4);
+ if (o >= GOF(v5) && o+sz <= GOF(v5) +SZB(v5)) return GOF(v5);
+ if (o >= GOF(v6) && o+sz <= GOF(v6) +SZB(v6)) return GOF(v6);
+ if (o >= GOF(v7) && o+sz <= GOF(v7) +SZB(v7)) return GOF(v7);
+ if (o >= GOF(v8) && o+sz <= GOF(v8) +SZB(v8)) return GOF(v8);
+ if (o >= GOF(v9) && o+sz <= GOF(v9) +SZB(v9)) return GOF(v9);
+ if (o >= GOF(v10) && o+sz <= GOF(v10)+SZB(v10)) return GOF(v10);
+ if (o >= GOF(v11) && o+sz <= GOF(v11)+SZB(v11)) return GOF(v11);
+ if (o >= GOF(v12) && o+sz <= GOF(v12)+SZB(v12)) return GOF(v12);
+ if (o >= GOF(v13) && o+sz <= GOF(v13)+SZB(v13)) return GOF(v13);
+ if (o >= GOF(v14) && o+sz <= GOF(v14)+SZB(v14)) return GOF(v14);
+ if (o >= GOF(v15) && o+sz <= GOF(v15)+SZB(v15)) return GOF(v15);
+ if (o >= GOF(v16) && o+sz <= GOF(v16)+SZB(v16)) return GOF(v16);
+ if (o >= GOF(v17) && o+sz <= GOF(v17)+SZB(v17)) return GOF(v17);
+ if (o >= GOF(v18) && o+sz <= GOF(v18)+SZB(v18)) return GOF(v18);
+ if (o >= GOF(v19) && o+sz <= GOF(v19)+SZB(v19)) return GOF(v19);
+ if (o >= GOF(v20) && o+sz <= GOF(v20)+SZB(v20)) return GOF(v20);
+ if (o >= GOF(v21) && o+sz <= GOF(v21)+SZB(v21)) return GOF(v21);
+ if (o >= GOF(v22) && o+sz <= GOF(v22)+SZB(v22)) return GOF(v22);
+ if (o >= GOF(v23) && o+sz <= GOF(v23)+SZB(v23)) return GOF(v23);
+ if (o >= GOF(v24) && o+sz <= GOF(v24)+SZB(v24)) return GOF(v24);
+ if (o >= GOF(v25) && o+sz <= GOF(v25)+SZB(v25)) return GOF(v25);
+ if (o >= GOF(v26) && o+sz <= GOF(v26)+SZB(v26)) return GOF(v26);
+ if (o >= GOF(v27) && o+sz <= GOF(v27)+SZB(v27)) return GOF(v27);
+ if (o >= GOF(v28) && o+sz <= GOF(v28)+SZB(v28)) return GOF(v28);
+ if (o >= GOF(v29) && o+sz <= GOF(v29)+SZB(v29)) return GOF(v29);
+ if (o >= GOF(v30) && o+sz <= GOF(v30)+SZB(v30)) return GOF(v30);
+ if (o >= GOF(v31) && o+sz <= GOF(v31)+SZB(v31)) return GOF(v31);
+ if (o >= GOF(vl) && o+sz <= GOF(vl)+SZB(vl)) return GOF(vl);
+ if (o >= GOF(vtype) && o+sz <= GOF(vtype)+SZB(vtype)) return GOF(vtype);
+
VG_(printf)("MC_(get_otrack_shadow_offset)(riscv64)(off=%d,sz=%d)\n",
offset,szB);
tl_assert(0);
--
2.25.1
|
|
From: Fei Wu <fe...@in...> - 2023-05-26 13:57:45
|
I'm from Intel RISC-V team and working on a RISC-V International
development partner project to add RISC-V vector (RVV) support on
Valgrind, the target tool is memcheck. My work bases on commit
71272b252977 of Petr's riscv64-linux branch, many thanks to Petr for his
great work first.
https://github.com/petrpavlu/valgrind-riscv64
This RFC is a starting point of RVV support on Valgrind, It's far from
complete, which will take huge time, but I do think it's more effective
to have some real code for discussion, so this series adds the RVV
support to run memcpy/strcmp/strcpy/strlen/strncpy in:
https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/examples
The whole idea is splitting the vector instructions into scalar
instructions which have already been well supported on Petr's branch,
the correctness of binary translation (tool=none) is simple to ensure,
but the logic of tool=memcheck should not be broken, one of the keys is
to deal with the instructions with mask:
* for load/store with mask, LoadG/StoreG are enabled, the same semantics
as other architectures
* for other instructions such as vadd, if the vector mask agnostic (vma)
is set to undisturbed, the masked original value is read first then
write back, the V bit won't change even after write back, it's not
necessary to have another guard type like LoadG/StoreG.
Pros
----
* by leveraging the existing scalar instructions support on Valgrind,
usually adding a new instruction involves only the frontend in
guest_riscv64_toIR, other parts are rare touched, so effort is much
reduced to enable new instructions.
* As the backend only sees the scalar IRs and generates scalar
instructions, it's possible to run valgrind ./vec-test on non-RVV host.
Cons
----
* as this method splits RVV instruction at frontend, there is less
chance to optimize at other stages, e.g. the vbits tracking.
* with larger vlen such as 1K, at most 1 RVV instruction will split into
1K ops, besides the performance penalty, it causes pressure to other
components such as tmp space too. Some of this can be relieved by
grouping multiple elements together.
There are some alternatives, but none seems perfect:
* helper function. It's much easier to make tool=none work, but how good
is it to handle the V+A tracking and other tools? Generally speaking, it
should not be a general solution for too many instructions.
* define and pass the RVV IR to backend, instead of splitting it too
early. This introduces much effort, we should evaluate what level of
profit can be attained.
At last, if the performance is tolerable, is this the right way to go?
Fei Wu (12):
riscv64: Starting Vector support, registers added
riscv64: Pass riscv guest_state for translation
riscv64: Add SyncupEnv & TooManyIR jump kinds
riscv64: Add LoadG/StoreG support
riscv64: Shift guest_state -2048 on calling helper
riscv64: Add cpu_state to TB
riscv64: Introduce dis_RV64V and add vsetvl
riscv64: Add load/store
riscv64: Add csrr vl
riscv64: add vfirst
riscv64: Add vmsgtu/vmseq/vmsne/vmsbf/vmsif/vmor/vmv/vid
riscv64: Add vadd
VEX/priv/guest_riscv64_toIR.c | 974 +++++++++++++++++++++++++++++-
VEX/priv/host_riscv64_defs.c | 133 ++++
VEX/priv/host_riscv64_defs.h | 23 +
VEX/priv/host_riscv64_isel.c | 89 ++-
VEX/priv/ir_defs.c | 8 +
VEX/priv/ir_opt.c | 4 +-
VEX/pub/libvex.h | 4 +
VEX/pub/libvex_guest_riscv64.h | 47 +-
VEX/pub/libvex_ir.h | 9 +-
coregrind/m_scheduler/scheduler.c | 17 +-
coregrind/m_translate.c | 5 +
coregrind/m_transtab.c | 26 +-
coregrind/pub_core_transtab.h | 5 +
memcheck/mc_machine.c | 35 ++
memcheck/mc_translate.c | 4 +
15 files changed, 1368 insertions(+), 15 deletions(-)
--
2.25.1
|
|
From: Jojo R <rj...@gm...> - 2023-05-22 11:46:42
|
Hi,
Any feedback or suggestion about this RFC ?
在 2023/4/21 17:25, Jojo R 写道:
>
> Hi,
>
> We consider to add RVV/Vector [1] feature in valgrind, there are some
> challenges.
> RVV like ARM's SVE [2] programming model, it's scalable/VLA, that
> means the vector length is agnostic.
> ARM's SVE is not supported in valgrind :(
>
> There are three major issues in implementing RVV instruction set in
> Valgrind as following:
>
> 1. Scalable vector register width VLENB
> 2. Runtime changing property of LMUL and SEW
> 3. Lack of proper VEX IR to represent all vector operations
>
> We propose applicable methods to solve 1 and 2. As for 3, we explore
> several possible but maybe imperfect approaches to handle different cases.
>
> We start from 1. As each guest register should be described in
> VEXGuestState struct, the vector registers with scalable width of
> VLENB can be added into VEXGuestState as arrays using an allowable
> maximum length like 2048/4096.
>
> The actual available access range can be determined at Valgrind
> startup time by querying the CPU for its vector capability or some
> suitable setup steps.
>
>
> To solve problem 2, we are inspired by already-proven techniques in
> QEMU, where translation blocks are broken up when certain critical
> CSRs are set. Because the guest code to IR translation relies on the
> precise value of LMUL/SEW and they may change within a basic block, we
> can break up the basic block each time encountering a vsetvl{i}
> instruction and return to the scheduler to execute the translated code
> and update LMUL/SEW. Accordingly, translation cache management should
> be refactored to detect the changing of LMUL/SEW to invalidate
> outdated code cache. Without losing the generality, the LMUL/SEW
> should be encoded into an ULong flag such that other architectures can
> leverage this flag to store their arch-dependent information. The
> TTentry struct should also take the flag into account no matter
> insertion or deletion. By doing this, the flag carries the newest
> LMUL/SEW throughout the simulation and can be passed to disassemble
> functions using the VEXArchInfo struct such that we can get the real
> and newest value of LMUL and SEW to facilitate our translation.
>
> Also, some architecture-related code should be taken care of. Like
> m_dispatch part, disp_cp_xindir function looks up code cache using
> hardcoded assembly by checking the requested guest state IP and
> translation cache entry address with no more constraints. Many other
> modules should be checked to ensure the in-time update of LMUL/SEW is
> instantly visible to essential parts in Valgrind.
>
>
> The last remaining big issue is 3, which we introduce some ad-hoc
> approaches to deal with. We summarize these approaches into three
> types as following:
>
> 1. Break down a vector instruction to scalar VEX IR ops.
> 2. Break down a vector instruction to fixed-length VEX IR ops.
> 3. Use dirty helpers to realize vector instructions.
>
> The very first method theoretically exists but is probably not
> applicable as the number of IR ops explodes when a large VLENB is
> adopted. Imaging a configuration of VLENB=512, SEW=8, LMUL=8, the VL
> is 512 * 8 / 8 = 512, meaning that a single vector instruction turns
> into 512 scalar instructions and each scalar instruction would be
> expanded to multiple IRs. To make things worse, the tool
> instrumentation will insert more IRs between adjacent scalar IR ops.
> As a result, the performance is likely to be slowed down thousand
> times during running a real-world application with lots of vector
> instructions. Therefore, the other two methods are more promising and
> we will discuss them below.
>
> 2 and 3 are not mutually exclusive as we may choose a suitable method
> from them to implement a vector instruction regarding its concrete
> behavior. To explain these methods in detail, we present some
> instances to illustrate their pros and cons.
>
> In terms of method 2, we have real values of VLENB/LMUL/SEW. The
> simple case is VLENB <= 256 and LMUL=1, where many SIMD IR ops are
> available and can be directly applied to represent vector operations.
> However, even when VLENB is restricted to 128, it still exceeds the
> maximum SIMD width of 256 supported by VEX IR if LMUL>2. Hence, here
> are two variants of method 2 to deal with long vectors:
>
>
> *2.1*Add more SIMD IR ops such as 1024/2048/4096, and translate vector
> instructions in the granularity of VLENB. Accordingly, VLENB=4096 with
> LMUL=2 is fulfilled by two 4096 SIMD VEX IR ops.
>
> * *pros*: it encourages VEX backend to generate more compact and
> efficient SIMD code (maybe). Particularly,it accommodatesmask and
> gather/scatter (indexed) instructions by delivering more
> information in IR itself.
> * *cons*: too many new IR ops need to be introduced in VEX as each
> op of different length should implement its add/sub/mul variants.
> New data types to denote long vectors are necessary too, causing
> difficulties in both VEX backend register allocation and tool
> instrumentation.
>
> *2.2*Break down long vectors to multiple repeated SIMD ops. For
> instance, a vadd.vv vector instruction with VLENB=256/LMUL=2/SEW=8 is
> composed of four operators of Iop_Add8x16 type.
>
> * *pros:*less efforts are required in register allocation and tool
> instrumentation. The VEX frontend is able to notify the backend to
> generate efficient vector instructions by existing Iops. It better
> trades off the complexity of adding many long vector IR ops and
> the benefit of generating high-efficiency host code.
> * *cons:*it is hard to describe a mask operation given that the mask
> is pretty flexible (the least significant bit of each segment of
> v0). Additionally, gather/scatter instructions may have similar
> problems in appropriately dividing index registers. There are
> various corner cases left here such as widening arithmetic
> operations (widening SIMD IR ops are currently not compatible) and
> vstart CSR register. When using fixed-length IR ops to comprise a
> vector instruction, we will inevitably tell each IR op which
> position encoded in vstart you can start to process the data. We
> can use vstart as a normal guest state virtual register to
> calculate each op's start position as a guard IRExpr or obtain the
> value of vstart like what we do in LMUL/SEW. Nevertheless, it is
> non-trivial to decompose a vector instruction concisely.
>
> In short, both 2.1 and 2.2 confront a dilemma in reducing engineering
> efforts of refactoring Valgrind elegantly as well as implementing the
> vector instruction set efficiently. Same obstacles exist in ARM SVE as
> they are scalable vector instructions and flexible in many ways.
>
> The final solution is the dirty helper. It is undoubtedly practical
> and requires possibly the least engineering efforts in dealing with so
> many details in Valgrind. In this design, each instruction is
> completed using an inline assembly running the same instruction on the
> host. Moreover, tool instrumentation already handles IRDirty except
> that new fields should be added in _IRDirty struct to indicate
> strided/indexed/masked memory accesses and arithmetic operations.
>
> * *pros:*it supports all instructions without bothering to build
> complicated IR expressions and statements. It executes vector
> instructions using host CPU to get acceleration to some extent.
> Besides, we do not need to add VEX backend to translate new IRs to
> vector instructions.
> * *cons:*the dirty helper always keeps its operations in a black box
> such that tools can never see what happens in a dirty helper. Like
> memcheck, the bit precision merit is missing once it meets a dirty
> helper as the V-bit propagation chain adopts a pretty coarse
> determination strategy. On the other hand, it is also not an
> elegant way to implement the entire ISA extension in dirty helpers.
>
> In summary, it is far to reach a truly applicable solution in adding
> vector extensions in Valgrind. We need to do detailed and
> comprehensive estimations on different vector instruction categories.
>
> Any feedback is welcome in github [3] also.
>
>
> [1] https://github.com/riscv/riscv-v-spec
>
> [2]
> https://community.arm.com/arm-research/b/articles/posts/the-arm-scalable-vector-extension-sve
>
> [3] https://github.com/petrpavlu/valgrind-riscv64/issues/17
>
>
> Thanks.
>
> Jojo
>
>
>
> _______________________________________________
> Valgrind-developers mailing list
> Val...@li...
> https://lists.sourceforge.net/lists/listinfo/valgrind-developers |
|
From: Mark W. <ma...@so...> - 2023-05-19 14:09:14
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=e3602b3eec0696b183722ea21da14588cf205c74 commit e3602b3eec0696b183722ea21da14588cf205c74 Author: Mark Wielaard <ma...@kl...> Date: Sun May 14 23:34:05 2023 +0200 Add --with-gdbscripts-dir=PATH configure option Currently the gdb valgrind scripts are installed under VG_LIBDIR which is normally pkglibexecdir which is likely not in the default gdb safe-path (a list of directories from which it is safe to auto-load files). So users will have to add the directory to their .gdbinit file. This patch adds a --with-gdbscripts-dir=PATH configure option that sets VG_GDBSCRIPTS_DIR to the given PATH (${libexecdir}/valgrind if not given). A user can also configure --without-gdbscripts-dir to disable adding a .debug_gdb_scripts section to the vgpreload library and installing the valgrind-monitor python scripts completely. Use VG_GDBSCRIPTS_DIR as gdbscriptsdir to install the valgrind-monitor python files and pass it with CPPFLAGS when building vg_preloaded.c and vgdb.c to use instead of VG_LIBDIR. https://bugs.kde.org/show_bug.cgi?id=469768 Diff: --- NEWS | 8 ++++++++ configure.ac | 17 +++++++++++++++++ coregrind/Makefile.am | 20 +++++++++++++++----- coregrind/vg_preloaded.c | 4 +++- coregrind/vgdb.c | 9 +++++++-- 5 files changed, 50 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index 87bfae5960..ea9fc7c868 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,13 @@ AMD64/macOS 10.13 and nanoMIPS/Linux. * ==================== CORE CHANGES =================== +* A new configure option --with-gdbscripts-dir lets you install + the gdb valgrind python monitor scripts in a specific location. + For example an distro could use it to install the scripts in a + safe load location --with-gdbscripts-dir=%{_datadir}/gdb/auto-load + It is also possible to configure --without-gdb-scripts-dir so no + .debug_gdb_scripts section is added to the vgpreload library and + no valgrind-monitor python scripts are installed at all. * ================== PLATFORM CHANGES ================= @@ -29,6 +36,7 @@ are not entered into bugzilla tend to get forgotten about or ignored. 460192 Add epoll_pwait2 469049 link failure on ppc64 (big endian) valgrind 3.20 469146 massif --ignore-fn does not ignore inlined functions +469768 Make it possible to install gdb scripts in a different location To see details of a given bug, visit https://bugs.kde.org/show_bug.cgi?id=XXXXXX diff --git a/configure.ac b/configure.ac index 15fbf5ea20..0cf84a1c00 100755 --- a/configure.ac +++ b/configure.ac @@ -1198,6 +1198,23 @@ AC_MSG_RESULT([$xcodedir]) AC_DEFINE_UNQUOTED(XCODE_DIR, "$xcodedir", [xcode sdk include directory]) AC_SUBST(XCODE_DIR, [$xcodedir])]) +#---------------------------------------------------------------------------- +# Where to install gdb scripts, defaults to VG_LIBDIR (pkglibexecdir) +#---------------------------------------------------------------------------- +AC_MSG_CHECKING([where gdb scripts are installed]) +AC_ARG_WITH(gdbscripts-dir, + [ --with-gdbscripts-dir=PATH Specify path to install gdb scripts], + [gdbscriptsdir=${withval}], + [gdbscriptsdir=${libexecdir}/valgrind]) +AC_MSG_RESULT([$gdbscriptsdir]) +if test "x$gdbscriptsdir" != "xno"; then + AC_SUBST(VG_GDBSCRIPTS_DIR, [$gdbscriptsdir]) + AM_CONDITIONAL(GDBSCRIPTS, true) +else + AC_SUBST(VG_GDBSCRIPTS_DIR, []) + AM_CONDITIONAL(GDBSCRIPTS, false) +fi + #---------------------------------------------------------------------------- # Libc and suppressions #---------------------------------------------------------------------------- diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am index 553211782f..8a7f753a6e 100644 --- a/coregrind/Makefile.am +++ b/coregrind/Makefile.am @@ -101,7 +101,7 @@ if VGCONF_OS_IS_FREEBSD vgdb_SOURCES += vgdb-invoker-freebsd.c endif -vgdb_CPPFLAGS = $(AM_CPPFLAGS_PRI) +vgdb_CPPFLAGS = $(AM_CPPFLAGS_PRI) $(GDB_SCRIPTS_DIR) vgdb_CFLAGS = $(AM_CFLAGS_PRI) $(LTO_CFLAGS) vgdb_CCASFLAGS = $(AM_CCASFLAGS_PRI) vgdb_LDFLAGS = $(AM_CFLAGS_PRI) @LIB_UBSAN@ @@ -624,9 +624,15 @@ if VGCONF_OS_IS_DARWIN noinst_DSYMS = $(noinst_PROGRAMS) endif +if GDBSCRIPTS + GDB_SCRIPTS_DIR=-DVG_GDBSCRIPTS_DIR="\"@VG_GDBSCRIPTS_DIR@\"" +else + GDB_SCRIPTS_DIR= +endif + vgpreload_core_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_SOURCES = vg_preloaded.c vgpreload_core_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_CPPFLAGS = \ - $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) + $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(GDB_SCRIPTS_DIR) vgpreload_core_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_CFLAGS = \ $(AM_CFLAGS_PSO_@VGCONF_PLATFORM_PRI_CAPS@) vgpreload_core_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_LDFLAGS = \ @@ -634,7 +640,7 @@ vgpreload_core_@VGCONF_ARCH_PRI@_@VGCONF_OS@_so_LDFLAGS = \ if VGCONF_HAVE_PLATFORM_SEC vgpreload_core_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_SOURCES = vg_preloaded.c vgpreload_core_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_CPPFLAGS = \ - $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) + $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) $(GDBSCRIPTS_DIR) vgpreload_core_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_CFLAGS = \ $(AM_CFLAGS_PSO_@VGCONF_PLATFORM_SEC_CAPS@) vgpreload_core_@VGCONF_ARCH_SEC@_@VGCONF_OS@_so_LDFLAGS = \ @@ -766,8 +772,12 @@ GDBSERVER_XML_FILES = \ # so as to make sure these get copied into the install tree vglibdir = $(pkglibexecdir) vglib_DATA = $(GDBSERVER_XML_FILES) -vglib_DATA += m_gdbserver/valgrind-monitor.py -vglib_DATA += m_gdbserver/valgrind-monitor-def.py + +if GDBSCRIPTS +gdbscriptsdir = @VG_GDBSCRIPTS_DIR@ +gdbscripts_DATA = m_gdbserver/valgrind-monitor.py +gdbscripts_DATA += m_gdbserver/valgrind-monitor-def.py +endif # so as to make sure these get copied into the tarball EXTRA_DIST += $(GDBSERVER_XML_FILES) diff --git a/coregrind/vg_preloaded.c b/coregrind/vg_preloaded.c index d6e05898c9..86f6ac5a26 100644 --- a/coregrind/vg_preloaded.c +++ b/coregrind/vg_preloaded.c @@ -61,7 +61,9 @@ .popsection \n\ "); -DEFINE_GDB_PY_SCRIPT(VG_LIBDIR "/valgrind-monitor.py") +#ifdef VG_GDBSCRIPTS_DIR +DEFINE_GDB_PY_SCRIPT(VG_GDBSCRIPTS_DIR "/valgrind-monitor.py") +#endif #endif #if defined(VGO_linux) || defined(VGO_solaris) || defined(VGO_freebsd) diff --git a/coregrind/vgdb.c b/coregrind/vgdb.c index 8ec4240770..56a969de78 100644 --- a/coregrind/vgdb.c +++ b/coregrind/vgdb.c @@ -1982,10 +1982,15 @@ void usage(void) " -d arg tells to show debug info. Multiple -d args for more debug info\n" "\n" " -h --help shows this message\n" +#ifdef VG_GDBSCRIPTS_DIR " The GDB python code defining GDB front end valgrind commands is:\n %s\n" +#endif " To get help from the Valgrind gdbserver, use vgdb help\n" -"\n", vgdb_prefix_default(), VG_LIBDIR "/valgrind-monitor.py" - ); +"\n", vgdb_prefix_default() +#ifdef VG_GDBSCRIPTS_DIR + , VG_GDBSCRIPTS_DIR "/valgrind-monitor.py" +#endif + ); invoker_restrictions_msg(); } |
|
From: Mark W. <ma...@so...> - 2023-05-17 11:20:50
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=17f56ab4c4fce7b4778251e1fab2bc50e5c6dbda commit 17f56ab4c4fce7b4778251e1fab2bc50e5c6dbda Author: Olli Vanhoja <oll...@gm...> Date: Mon Oct 10 11:44:22 2022 +0200 Add epoll_pwait2 The only difference between epoll_pwait and epoll_pwait2 is the timeout argument. For epoll_pwait2 this is a timespec (which is always 64bit). https://bugs.kde.org/show_bug.cgi?id=460192 Diff: --- NEWS | 1 + coregrind/m_syswrap/priv_syswrap-linux.h | 1 + coregrind/m_syswrap/syswrap-amd64-linux.c | 2 ++ coregrind/m_syswrap/syswrap-arm-linux.c | 2 ++ coregrind/m_syswrap/syswrap-arm64-linux.c | 2 ++ coregrind/m_syswrap/syswrap-linux.c | 23 +++++++++++++++++++++++ coregrind/m_syswrap/syswrap-mips32-linux.c | 2 ++ coregrind/m_syswrap/syswrap-mips64-linux.c | 1 + coregrind/m_syswrap/syswrap-nanomips-linux.c | 1 + coregrind/m_syswrap/syswrap-ppc32-linux.c | 2 ++ coregrind/m_syswrap/syswrap-ppc64-linux.c | 2 ++ coregrind/m_syswrap/syswrap-s390x-linux.c | 2 ++ coregrind/m_syswrap/syswrap-x86-linux.c | 2 ++ include/vki/vki-scnums-shared-linux.h | 2 ++ 14 files changed, 45 insertions(+) diff --git a/NEWS b/NEWS index 0b54c9cb7b..87bfae5960 100644 --- a/NEWS +++ b/NEWS @@ -26,6 +26,7 @@ bugzilla (https://bugs.kde.org/enter_bug.cgi?product=valgrind) rather than mailing the developers (or mailing lists) directly -- bugs that are not entered into bugzilla tend to get forgotten about or ignored. +460192 Add epoll_pwait2 469049 link failure on ppc64 (big endian) valgrind 3.20 469146 massif --ignore-fn does not ignore inlined functions diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h index a73b6247e7..7c9decf5aa 100644 --- a/coregrind/m_syswrap/priv_syswrap-linux.h +++ b/coregrind/m_syswrap/priv_syswrap-linux.h @@ -105,6 +105,7 @@ DECL_TEMPLATE(linux, sys_epoll_create1); DECL_TEMPLATE(linux, sys_epoll_ctl); DECL_TEMPLATE(linux, sys_epoll_wait); DECL_TEMPLATE(linux, sys_epoll_pwait); +DECL_TEMPLATE(linux, sys_epoll_pwait2); DECL_TEMPLATE(linux, sys_eventfd); DECL_TEMPLATE(linux, sys_eventfd2); diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c index 1aeebd274b..008600798a 100644 --- a/coregrind/m_syswrap/syswrap-amd64-linux.c +++ b/coregrind/m_syswrap/syswrap-amd64-linux.c @@ -883,6 +883,8 @@ static SyscallTableEntry syscall_table[] = { LINX_(__NR_faccessat2, sys_faccessat2), // 439 + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 + LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 }; diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c index bca5095893..9a7a1e0d27 100644 --- a/coregrind/m_syswrap/syswrap-arm-linux.c +++ b/coregrind/m_syswrap/syswrap-arm-linux.c @@ -1057,6 +1057,8 @@ static SyscallTableEntry syscall_main_table[] = { LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 + + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 }; diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c index 9532360007..6af7bab831 100644 --- a/coregrind/m_syswrap/syswrap-arm64-linux.c +++ b/coregrind/m_syswrap/syswrap-arm64-linux.c @@ -837,6 +837,8 @@ static SyscallTableEntry syscall_main_table[] = { LINX_(__NR_faccessat2, sys_faccessat2), // 439 + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 + LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 }; diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c index 26f1fbee3c..f8621f8f0d 100644 --- a/coregrind/m_syswrap/syswrap-linux.c +++ b/coregrind/m_syswrap/syswrap-linux.c @@ -2165,6 +2165,29 @@ POST(sys_epoll_pwait) epoll_post_helper (tid, arrghs, status); } +PRE(sys_epoll_pwait2) +{ + *flags |= SfMayBlock; + PRINT("sys_epoll_pwait2 ( %ld, %#" FMT_REGWORD "x, %ld, %#" + FMT_REGWORD "x, %#" FMT_REGWORD "x, %" FMT_REGWORD "u )", + SARG1, ARG2, SARG3, ARG4, ARG5, ARG6); + PRE_REG_READ6(long, "epoll_pwait2", + int, epfd, struct vki_epoll_event *, events, + int, maxevents, const struct timespec64 *, timeout, + vki_sigset_t *, sigmask, vki_size_t, sigsetsize); + /* Assume all (maxevents) events records should be (fully) writable. */ + PRE_MEM_WRITE( "epoll_pwait2(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3); + /* epoll_pwait2 only supports 64bit timespec. */ + if (ARG4) + pre_read_timespec64(tid, "epoll_pwait2(timeout)", ARG4); + if (ARG5) + PRE_MEM_READ( "epoll_pwait2(sigmask)", ARG5, sizeof(vki_sigset_t) ); +} +POST(sys_epoll_pwait2) +{ + epoll_post_helper (tid, arrghs, status); +} + PRE(sys_eventfd) { PRINT("sys_eventfd ( %" FMT_REGWORD "u )", ARG1); diff --git a/coregrind/m_syswrap/syswrap-mips32-linux.c b/coregrind/m_syswrap/syswrap-mips32-linux.c index de27998b3f..6268a00ddf 100644 --- a/coregrind/m_syswrap/syswrap-mips32-linux.c +++ b/coregrind/m_syswrap/syswrap-mips32-linux.c @@ -1141,6 +1141,8 @@ static SyscallTableEntry syscall_main_table[] = { LINXY(__NR_close_range, sys_close_range), // 436 LINX_ (__NR_faccessat2, sys_faccessat2), // 439 + + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 }; SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno) diff --git a/coregrind/m_syswrap/syswrap-mips64-linux.c b/coregrind/m_syswrap/syswrap-mips64-linux.c index 67e7c2c2f6..6cdf25893b 100644 --- a/coregrind/m_syswrap/syswrap-mips64-linux.c +++ b/coregrind/m_syswrap/syswrap-mips64-linux.c @@ -819,6 +819,7 @@ static SyscallTableEntry syscall_main_table[] = { GENX_ (__NR_clone3, sys_ni_syscall), LINXY (__NR_close_range, sys_close_range), LINX_ (__NR_faccessat2, sys_faccessat2), + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), }; SyscallTableEntry * ML_(get_linux_syscall_entry) ( UInt sysno ) diff --git a/coregrind/m_syswrap/syswrap-nanomips-linux.c b/coregrind/m_syswrap/syswrap-nanomips-linux.c index 9c535c68ea..d724cde748 100644 --- a/coregrind/m_syswrap/syswrap-nanomips-linux.c +++ b/coregrind/m_syswrap/syswrap-nanomips-linux.c @@ -828,6 +828,7 @@ static SyscallTableEntry syscall_main_table[] = { GENX_ (__NR_clone3, sys_ni_syscall), LINXY (__NR_close_range, sys_close_range), LINX_ (__NR_faccessat2, sys_faccessat2), + LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), }; SyscallTableEntry* ML_(get_linux_syscall_entry) (UInt sysno) diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c index 12c0730271..c0cfef2354 100644 --- a/coregrind/m_syswrap/syswrap-ppc32-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c @@ -1061,6 +1061,8 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 + + LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), // 441 }; SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c index 3c33d1267e..f5976f30c8 100644 --- a/coregrind/m_syswrap/syswrap-ppc64-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c @@ -1030,6 +1030,8 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 + + LINXY (__NR_epoll_pwait2, sys_epoll_pwait2), // 441 }; SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c index a377cb7315..afba154e78 100644 --- a/coregrind/m_syswrap/syswrap-s390x-linux.c +++ b/coregrind/m_syswrap/syswrap-s390x-linux.c @@ -871,6 +871,8 @@ static SyscallTableEntry syscall_table[] = { LINXY(__NR_close_range, sys_close_range), // 436 LINX_(__NR_faccessat2, sys_faccessat2), // 439 + + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 }; SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c index a9ba15dfe6..da4fd8fa20 100644 --- a/coregrind/m_syswrap/syswrap-x86-linux.c +++ b/coregrind/m_syswrap/syswrap-x86-linux.c @@ -1655,6 +1655,8 @@ static SyscallTableEntry syscall_table[] = { LINX_(__NR_faccessat2, sys_faccessat2), // 439 + LINXY(__NR_epoll_pwait2, sys_epoll_pwait2), // 441 + LINXY(__NR_memfd_secret, sys_memfd_secret), // 447 }; diff --git a/include/vki/vki-scnums-shared-linux.h b/include/vki/vki-scnums-shared-linux.h index d90cdd3124..542382b533 100644 --- a/include/vki/vki-scnums-shared-linux.h +++ b/include/vki/vki-scnums-shared-linux.h @@ -46,6 +46,8 @@ #define __NR_faccessat2 439 +#define __NR_epoll_pwait2 441 + #define __NR_memfd_secret 447 #endif |
|
From: Paul F. <pj...@wa...> - 2023-05-17 06:45:33
|
On 31-03-23 09:32, Julian Seward wrote: > On 31/03/2023 01:55, Nicholas Nethercote wrote: >> I'd do the simple thing. > > I agree. We already use so much memory that I imagine the extra overhead > is close to being in the noise. If an application has 1 million blocks on > the go, this is only going to add 8 MB to the overall space use -- perhaps > a bit more or bit less, per Nick's comments about 40 vs 48 etc. But > this is > insignificant considering that (eg) the default translated code cache size > is already several hundred megabytes. > > J > Hi I did a quick test, 10 million ints allocated. Without this patch 1636M, with this patch 1708M, or about 4.4% more. A bit less than I expected. A+ Paul |
|
From: Paul F. <pa...@so...> - 2023-05-17 06:10:46
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=ea2cb0479719f724d98833460a24b08926ce2e2d commit ea2cb0479719f724d98833460a24b08926ce2e2d Author: Paul Floyd <pj...@wa...> Date: Wed May 17 08:09:39 2023 +0200 Typo in massif tests Makefile.am Diff: --- massif/tests/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/massif/tests/Makefile.am b/massif/tests/Makefile.am index 132e633124..cc79beceb4 100644 --- a/massif/tests/Makefile.am +++ b/massif/tests/Makefile.am @@ -90,7 +90,7 @@ AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) # C++ tests bug469146_SOURCES = bug469146.cpp # -fno-optimize-sibling-calls because otherwise some platforms will have -# tail call optimization which meeses up --ignore-fn +# tail call optimization which messes up --ignore-fn bug469146_CXXFLAGS = $(AM_CXXFLAGS) -O2 -fno-optimize-sibling-calls new_cpp_SOURCES = new-cpp.cpp overloaded_new_SOURCES = overloaded-new.cpp |
|
From: Paul F. <pa...@so...> - 2023-05-16 06:21:52
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=5810a06d54c46ffe87052cc1c1c29252e1e6f29d commit 5810a06d54c46ffe87052cc1c1c29252e1e6f29d Author: Paul Floyd <pj...@wa...> Date: Tue May 16 08:20:59 2023 +0200 Linux regtest: fixes for building on musl x86 Diff: --- memcheck/tests/x86-linux/scalar.c | 4 ++-- none/tests/x86-linux/sigcontext.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/memcheck/tests/x86-linux/scalar.c b/memcheck/tests/x86-linux/scalar.c index 54d0e0443a..83ed38c4d9 100644 --- a/memcheck/tests/x86-linux/scalar.c +++ b/memcheck/tests/x86-linux/scalar.c @@ -5,7 +5,7 @@ #include <unistd.h> #include <sched.h> #include <signal.h> -#include <linux/mman.h> // MREMAP_FIXED +#include <sys/mman.h> // MREMAP_FIXED #include <sys/prctl.h> // Here we are trying to trigger every syscall error (scalar errors and @@ -269,7 +269,7 @@ int main(void) SY(__NR_lock); FAIL; // __NR_ioctl 54 - #include <asm/ioctls.h> + #include <sys/ioctl.h> GO(__NR_ioctl, "3s 1m"); SY(__NR_ioctl, x0, x0+TCSETS, x0); FAIL; diff --git a/none/tests/x86-linux/sigcontext.c b/none/tests/x86-linux/sigcontext.c index dec1c8f60e..9dd8d34023 100644 --- a/none/tests/x86-linux/sigcontext.c +++ b/none/tests/x86-linux/sigcontext.c @@ -4,7 +4,7 @@ #include <signal.h> #include <unistd.h> #include <sys/ucontext.h> -#include <asm/unistd.h> +#include <syscall.h> #define VAL1 0x11223344 #define VAL2 0x44332211 |
|
From: Nicholas N. <n.n...@gm...> - 2023-05-16 04:43:21
|
Hi, Are there any consequences of note for Valgrind? Judging by this paragraph, not particularly: > Sourceware will continue its long standing mission of providing free software infrastructure to the projects it supports, and this will not change moving forward. The affiliation with SFC will be transparent to the projects hosted on Sourceware. Project admins will keep being in charge of how they utilize the services Sourceware provides. Is that right? I have been thinking a bit recently about the fact that Valgrind doesn't have any explicit governance structure or decision-making processes, and how it would be good to have some. Nick On Tue, 16 May 2023 at 04:28, Mark Wielaard <ma...@kl...> wrote: > https://sfconservancy.org/news/2023/may/15/sourceware-joins-sfc/ > > After various discussions and lots of positive feedback [1] [2] [3] [4] > Software Freedom Conservancy and Sourceware proudly announce that > Sourceware today joins SFC as a member project! > > As the fiscal host of Sourceware, Software Freedom Conservancy will > provide a home for fundraising, legal protection and governance that > will benefit all projects under Sourceware's care. We share one mission: > developing, distributing and advocactingfor Software Freedom. Together > we will offer a worry-free, friendly home for core toolchain and > developer tool projects. > > We are happy to discuss this in #overseers on irc.libera.chat now > 18:00-19:00 UTC. And we will also start regular Overseers Open Office > Hours every second Friday of the month on irc at the same time. > > Of course you are welcome to drop into the #overseers channel at any > time and we can also be reached through email and bugzilla: > https://sourceware.org/mission.html#organization > > To support the Software Freedom Conservancy, please become a Sustainer > https://sfconservancy.org/sustainer > > You can also donate directly to Sourceware: > https://sfconservancy.org/members/current/#Sourceware > as a directed donation (mention Sourceware in the comment or memo line) > > See https://sfconservancy.org/donate/ for other ways to donate. > > [1] https://sourceware.org/pipermail/overseers/2022q3/018802.html > [2] https://sourceware.org/pipermail/overseers/2022q3/018804.html > [3] https://sourceware.org/pipermail/overseers/2022q3/018834.html > [4] > https://www.fsf.org/events/sourceware-infrastructure-a-presentation-and-community-q-a > > https://sfconservancy.org/news/2023/may/15/sourceware-joins-sfc/ > > Sourceware, one of the longest standing Free Software hosting platforms, > joins SFC > > Important Free Software infrastructure project finds non-profit home > > May 15, 2023 > > As a home for Free Software projects since 1998, Sourceware is a > keystone in Free Software infrastructure. For almost 25 years > Sourceware has been the long-time home of various core toolchain > project communities. Projects like Cygwin, a UNIX API for Win32 > systems, the GNU Toolchain, including GCC, the GNU Compiler Colection, > two C libraries, glibc and newlib, binary tools, binutils and elfutils, > debuggers and profilers, GDB, systemtap and valgrind. Sourceware also > hosts standard groups like gnu-gabi and the DWARF Debugging Standard. > See the full list project hosted and services provided on the > [1]Sourceware projects page. > > Becoming an SFC member project will improve future operations carried > out by dedicated volunteers to and furthering the mission of Free > Software hosting. This will accelerate the Sourceware [2]technical > roadmap to improve and modernize the infrastructure. > > As the fiscal host of Sourceware, Software Freedom Conservancy will > provide a home for fundraising, legal assistance and governance that > will benefit all projects under Sourceware's care. We share one > mission: developing, distributing and advocating for Software Freedom. > And to offer a worry-free, friendly home for Free Software communities. > We see a bright future working together. With Conservancy as fiscal > sponsor, Sourceware will also be able to fundraise and have the > community of volunteers work together with paid contractors and enter > into contracts for managed infrastructure where appropriate. > > SFC looks to Sourceware's years of experience in providing outstanding > infrastructure as an inspiration for improving the Free Software > ecosystem both for other SFC projects, and also in furthering SFC's > mission around campaigns to promote Software Freedom Infrastructure. > For decades, Sourceware has shown that hosting Free Software projects > with Free Software infrastructure is not only possible, but helps > create and fosters the growth of relationships and networks within the > Free Software communities. SFC is thrilled to join the powerful history > of demonstrable experience to grow hosting options that are 100% free > software, in the future to bring in new ideas, communities, and > projects! > > Projects hosted by Sourceware are part of the core toolchain for > GNU/Linux distros, embedded systems, the cloud and, through Cygwin, > Windows. Back in 1984 Ken Thompson's Reflections on Trusting Trust > already described how making the source code for these tools available > is essential to create what today we call secure software supply > chains. Sourceware provides robust infrastructure and services for > projects to adopt secure collaboration and release policies. We forsee > future cooperation with other Conservancy member projects, such as the > [3]Reproducible Builds project which provides an > independently-verifiable path to supply chain security. Additionally, > Sourceware will leverage Conservancy advisory role in how community > projects are impacted by and can comply with regulations like NIST, > CISA, USA Cyber Security Directives and the EU Cyber Resilience act. > > Each SFC member project is led by a Project Leadership Committee (PLC). > Each individual member of the PLC participates in their own capacity, > but nevertheless the majority of the PLC never includes a majority of > people affiliated with the same organization. Sourceware's PLC includes > various volunteers, past and present, from the Sourceware community. > The founding PLC is: Frank Ch. Eigler, Christopher Faylor, Ian Kelling, > Ian Lance Taylor, Tom Tromey, Jon Turney, and Mark J. Wielaard. > > Recent discussions have inspired the Sourceware volunteers to think > carefully about the future and succession of the leadership for this > important hosting project. By joining SFC, Sourceware gains access to > strategic advice and governance expertise to recruit new volunteers and > raise funds to support work on Sourceware infrastructure. As part of > this governance improvement, Sourceware also announces today regular > irc office hours for guest project admins to advise and discuss any > needs and issues in hosting. The [4]Sourceware mission page lists > various other ways to contact and participate in the community. > > Sourceware will continue its long standing mission of providing free > software infrastructure to the projects it supports, and this will not > change moving forward. The affiliation with SFC will be transparent to > the projects hosted on Sourceware. Project admins will keep being in > charge of how they utilize the services Sourceware provides. > > To support the Software Freedom Conservancy, please become a > [5]Sustainer. > > You can also [6]donate directly to Sourceware (mention Sourceware in > the comment or memo line). > > See [7]the donation page for other ways to donate. > > Sourceware may be volunteer managed, but wouldn't be possible without > the hardware, network resources and services provided by Red Hat and > OSUOSL. Additionally [8]build/CI testing machines are provided by > various individuals and the Brno University, Marist College, IBM, the > Works on Arm initiative and the Gentoo Foundation. > > References > > 1. https://sourceware.org/projects.html > 2. > https://inbox.sourceware.org/overseers/YrL...@wi.../ > 3. https://reproducible-builds.org/ > 4. https://sourceware.org/mission.html > 5. https://sfconservancy.org/sustainer/ > 6. https://sfconservancy.org/donate/#wire > 7. https://sfconservancy.org/donate/ > 8. https://builder.sourceware.org/ > > > _______________________________________________ > Valgrind-developers mailing list > Val...@li... > https://lists.sourceforge.net/lists/listinfo/valgrind-developers > |
|
From: Mark W. <ma...@kl...> - 2023-05-15 18:27:42
|
https://sfconservancy.org/news/2023/may/15/sourceware-joins-sfc/ After various discussions and lots of positive feedback [1] [2] [3] [4] Software Freedom Conservancy and Sourceware proudly announce that Sourceware today joins SFC as a member project! As the fiscal host of Sourceware, Software Freedom Conservancy will provide a home for fundraising, legal protection and governance that will benefit all projects under Sourceware's care. We share one mission: developing, distributing and advocactingfor Software Freedom. Together we will offer a worry-free, friendly home for core toolchain and developer tool projects. We are happy to discuss this in #overseers on irc.libera.chat now 18:00-19:00 UTC. And we will also start regular Overseers Open Office Hours every second Friday of the month on irc at the same time. Of course you are welcome to drop into the #overseers channel at any time and we can also be reached through email and bugzilla: https://sourceware.org/mission.html#organization To support the Software Freedom Conservancy, please become a Sustainer https://sfconservancy.org/sustainer You can also donate directly to Sourceware: https://sfconservancy.org/members/current/#Sourceware as a directed donation (mention Sourceware in the comment or memo line) See https://sfconservancy.org/donate/ for other ways to donate. [1] https://sourceware.org/pipermail/overseers/2022q3/018802.html [2] https://sourceware.org/pipermail/overseers/2022q3/018804.html [3] https://sourceware.org/pipermail/overseers/2022q3/018834.html [4] https://www.fsf.org/events/sourceware-infrastructure-a-presentation-and-community-q-a https://sfconservancy.org/news/2023/may/15/sourceware-joins-sfc/ Sourceware, one of the longest standing Free Software hosting platforms, joins SFC Important Free Software infrastructure project finds non-profit home May 15, 2023 As a home for Free Software projects since 1998, Sourceware is a keystone in Free Software infrastructure. For almost 25 years Sourceware has been the long-time home of various core toolchain project communities. Projects like Cygwin, a UNIX API for Win32 systems, the GNU Toolchain, including GCC, the GNU Compiler Colection, two C libraries, glibc and newlib, binary tools, binutils and elfutils, debuggers and profilers, GDB, systemtap and valgrind. Sourceware also hosts standard groups like gnu-gabi and the DWARF Debugging Standard. See the full list project hosted and services provided on the [1]Sourceware projects page. Becoming an SFC member project will improve future operations carried out by dedicated volunteers to and furthering the mission of Free Software hosting. This will accelerate the Sourceware [2]technical roadmap to improve and modernize the infrastructure. As the fiscal host of Sourceware, Software Freedom Conservancy will provide a home for fundraising, legal assistance and governance that will benefit all projects under Sourceware's care. We share one mission: developing, distributing and advocating for Software Freedom. And to offer a worry-free, friendly home for Free Software communities. We see a bright future working together. With Conservancy as fiscal sponsor, Sourceware will also be able to fundraise and have the community of volunteers work together with paid contractors and enter into contracts for managed infrastructure where appropriate. SFC looks to Sourceware's years of experience in providing outstanding infrastructure as an inspiration for improving the Free Software ecosystem both for other SFC projects, and also in furthering SFC's mission around campaigns to promote Software Freedom Infrastructure. For decades, Sourceware has shown that hosting Free Software projects with Free Software infrastructure is not only possible, but helps create and fosters the growth of relationships and networks within the Free Software communities. SFC is thrilled to join the powerful history of demonstrable experience to grow hosting options that are 100% free software, in the future to bring in new ideas, communities, and projects! Projects hosted by Sourceware are part of the core toolchain for GNU/Linux distros, embedded systems, the cloud and, through Cygwin, Windows. Back in 1984 Ken Thompson's Reflections on Trusting Trust already described how making the source code for these tools available is essential to create what today we call secure software supply chains. Sourceware provides robust infrastructure and services for projects to adopt secure collaboration and release policies. We forsee future cooperation with other Conservancy member projects, such as the [3]Reproducible Builds project which provides an independently-verifiable path to supply chain security. Additionally, Sourceware will leverage Conservancy advisory role in how community projects are impacted by and can comply with regulations like NIST, CISA, USA Cyber Security Directives and the EU Cyber Resilience act. Each SFC member project is led by a Project Leadership Committee (PLC). Each individual member of the PLC participates in their own capacity, but nevertheless the majority of the PLC never includes a majority of people affiliated with the same organization. Sourceware's PLC includes various volunteers, past and present, from the Sourceware community. The founding PLC is: Frank Ch. Eigler, Christopher Faylor, Ian Kelling, Ian Lance Taylor, Tom Tromey, Jon Turney, and Mark J. Wielaard. Recent discussions have inspired the Sourceware volunteers to think carefully about the future and succession of the leadership for this important hosting project. By joining SFC, Sourceware gains access to strategic advice and governance expertise to recruit new volunteers and raise funds to support work on Sourceware infrastructure. As part of this governance improvement, Sourceware also announces today regular irc office hours for guest project admins to advise and discuss any needs and issues in hosting. The [4]Sourceware mission page lists various other ways to contact and participate in the community. Sourceware will continue its long standing mission of providing free software infrastructure to the projects it supports, and this will not change moving forward. The affiliation with SFC will be transparent to the projects hosted on Sourceware. Project admins will keep being in charge of how they utilize the services Sourceware provides. To support the Software Freedom Conservancy, please become a [5]Sustainer. You can also [6]donate directly to Sourceware (mention Sourceware in the comment or memo line). See [7]the donation page for other ways to donate. Sourceware may be volunteer managed, but wouldn't be possible without the hardware, network resources and services provided by Red Hat and OSUOSL. Additionally [8]build/CI testing machines are provided by various individuals and the Brno University, Marist College, IBM, the Works on Arm initiative and the Gentoo Foundation. References 1. https://sourceware.org/projects.html 2. https://inbox.sourceware.org/overseers/YrL...@wi.../ 3. https://reproducible-builds.org/ 4. https://sourceware.org/mission.html 5. https://sfconservancy.org/sustainer/ 6. https://sfconservancy.org/donate/#wire 7. https://sfconservancy.org/donate/ 8. https://builder.sourceware.org/ |
|
From: Paul F. <pa...@so...> - 2023-05-13 18:49:31
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=2d340608dbe785c33cdbf1e9f070eeb69b3255dd commit 2d340608dbe785c33cdbf1e9f070eeb69b3255dd Author: Paul Floyd <pj...@wa...> Date: Sat May 13 20:44:46 2023 +0200 DRD: Make libgomp suppression work for any destdir Diff: --- glibc-2.X-drd.supp.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/glibc-2.X-drd.supp.in b/glibc-2.X-drd.supp.in index 9f8fda9f4f..dca0ac9e31 100644 --- a/glibc-2.X-drd.supp.in +++ b/glibc-2.X-drd.supp.in @@ -193,7 +193,7 @@ { drd-libgomp drd:ConflictingAccess - obj:/usr/lib*/libgomp.so* + obj:*/lib*/libgomp.so* } # |
|
From: Andreas A. <ar...@so...> - 2023-05-11 16:06:07
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=96a830df537e2ebd02f0b96603c986351ac5d6f8 commit 96a830df537e2ebd02f0b96603c986351ac5d6f8 Author: Andreas Arnez <ar...@li...> Date: Thu Jan 26 17:41:18 2023 +0100 s390x: XC instruction: clear in 8-byte increments if possible The XC instruction is frequently executed in many programs, mainly for clearing memory. It can target from 1 to 256 bytes. If the size is constant and XC is actually used for clearing memory, Valgrind implements it as a byte-wise loop and rolls out the loop for <= 8 bytes. Instead of clearing byte-wise, it is more efficient to clear in 64-bit increments, so do this for sizes >= 8 bytes. Roll out the loop for up to 32 bytes. Overall, this reduces the number of insns by a few percent and provides a slight performance improvement for some programs. Diff: --- VEX/priv/guest_s390_toIR.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 39356e088f..11dda41ef5 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -13615,36 +13615,36 @@ s390_irgen_XC(UChar length, IRTemp start1, IRTemp start2) static void s390_irgen_XC_sameloc(UChar length, UChar b, UShort d) { - IRTemp counter = newTemp(Ity_I32); IRTemp start = newTemp(Ity_I64); - IRTemp addr = newTemp(Ity_I64); - assign(start, binop(Iop_Add64, mkU64(d), b != 0 ? get_gpr_dw0(b) : mkU64(0))); - if (length < 8) { - UInt i; - - for (i = 0; i <= length; ++i) { + if (length < 7) { + for (UInt i = 0; i <= length; ++i) { store(binop(Iop_Add64, mkexpr(start), mkU64(i)), mkU8(0)); } } else { - assign(counter, get_counter_w0()); - - assign(addr, binop(Iop_Add64, mkexpr(start), - unop(Iop_32Uto64, mkexpr(counter)))); - - store(mkexpr(addr), mkU8(0)); - - /* Check for end of field */ - put_counter_w0(binop(Iop_Add32, mkexpr(counter), mkU32(1))); - iterate_if(binop(Iop_CmpNE32, mkexpr(counter), mkU32(length))); - - /* Reset counter */ - put_counter_dw0(mkU64(0)); + if (length < 32) { + for (UInt i = 0; i <= length - 7; i += 8) { + store(binop(Iop_Add64, mkexpr(start), mkU64(i)), mkU64(0)); + } + } else { + IRTemp counter = newTemp(Ity_I64); + assign(counter, get_counter_dw0()); + store(binop(Iop_Add64, mkexpr(start), mkexpr(counter)), mkU64(0)); + put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(8))); + iterate_if(binop(Iop_CmpLE64U, mkexpr(counter), mkU64(length - 15))); + + /* Reset counter */ + put_counter_dw0(mkU64(0)); + } + /* Clear the remaining bytes with backward overlap */ + if ((length + 1) % 8 != 0) { + store(binop(Iop_Add64, mkexpr(start), mkU64(length - 7)), mkU64(0)); + } } - s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, mkU32(0)), False); + s390_cc_set_val(0); if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) s390_disasm(ENC3(MNM, UDLB, UDXB), "xc", d, length, b, d, 0, b); |
|
From: Andreas A. <ar...@so...> - 2023-05-11 16:06:02
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=97d335621a60a3c29ded56ea4a29ae1968ed7c8a commit 97d335621a60a3c29ded56ea4a29ae1968ed7c8a Author: Andreas Arnez <ar...@li...> Date: Fri May 5 17:48:31 2023 +0200 s390x: Optimize CLC for 1, 2, 4, and 8 bytes The CLC instruction compares two memory areas with sizes from 1 up to 256 bytes. Currently Valgrind always implements it with a bytewise loop. Add special handling for the sizes 1, 2, 4, and 8. Realize CLC with an 8-, 16-, 32-, and 64-bit integer comparison, respectively, in those cases. Apart from a slight optimization this also improves the diagnostics for uninitialized values since it avoids the manufactured conditional jump that breaks out of the loop over the individual bytes. Diff: --- VEX/priv/guest_s390_toIR.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 250daeca13..39356e088f 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -12849,11 +12849,28 @@ s390_irgen_TDGXT(UChar r1, IRTemp op2addr) static const HChar * s390_irgen_CLC(UChar length, IRTemp start1, IRTemp start2) { - IRTemp len = newTemp(Ity_I64); - - assign(len, mkU64(length)); - s390_irgen_CLC_EX(len, start1, start2); + IRType ty; + + switch (length) { + case 0: ty = Ity_I8; break; + case 1: ty = Ity_I16; break; + case 3: ty = Ity_I32; break; + case 7: ty = Ity_I64; break; + default: ty = Ity_INVALID; + } + if (ty != Ity_INVALID) { + IRTemp a = newTemp(ty); + IRTemp b = newTemp(ty); + + assign(a, load(ty, mkexpr(start1))); + assign(b, load(ty, mkexpr(start2))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, a, b); + } else { + IRTemp len = newTemp(Ity_I64); + assign(len, mkU64(length)); + s390_irgen_CLC_EX(len, start1, start2); + } return "clc"; } |