|
From: Carl L. <ca...@so...> - 2020-09-22 16:49:51
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=2a88a98f5b69ac7cdd06e682b2158fd8a31399c9 commit 2a88a98f5b69ac7cdd06e682b2158fd8a31399c9 Author: Carl Love <ce...@us...> Date: Mon Sep 21 15:56:22 2020 -0500 valgrind isa 3.1 foundation header files and other common parts associated with the initial isa v3.1 support Diff: --- configure.ac | 22 +- none/tests/ppc64/Makefile.am | 27 +- none/tests/ppc64/isa_3_1_helpers.h | 112 ++ none/tests/ppc64/isa_3_1_register_defines.h | 50 + none/tests/ppc64/test_isa_3_1_AT.vgtest | 3 + none/tests/ppc64/test_isa_3_1_Misc.vgtest | 2 + none/tests/ppc64/test_isa_3_1_RT.vgtest | 2 + none/tests/ppc64/test_isa_3_1_VRT.vgtest | 2 + none/tests/ppc64/test_isa_3_1_XT.vgtest | 2 + none/tests/ppc64/test_isa_3_1_common.c | 2188 +++++++++++++++++++++++++++ tests/check_ppc64_auxv_cap | 4 +- 11 files changed, 2410 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 1a89d05e5e..085c98993e 100755 --- a/configure.ac +++ b/configure.ac @@ -1455,7 +1455,9 @@ AC_HWCAP_CONTAINS_FLAG([arch_2_05],[HWCAP_HAS_ISA_2_05]) AC_HWCAP_CONTAINS_FLAG([arch_2_06],[HWCAP_HAS_ISA_2_06]) AC_HWCAP_CONTAINS_FLAG([arch_2_07],[HWCAP_HAS_ISA_2_07]) AC_HWCAP_CONTAINS_FLAG([arch_3_00],[HWCAP_HAS_ISA_3_00]) +AC_HWCAP_CONTAINS_FLAG([arch_3_01],[HWCAP_HAS_ISA_3_1]) AC_HWCAP_CONTAINS_FLAG([htm],[HWCAP_HAS_HTM]) +AC_HWCAP_CONTAINS_FLAG([mma],[HWCAP_HAS_MMA]) # ISA Levels AM_CONDITIONAL(HAS_ISA_2_05, [test x$HWCAP_HAS_ISA_2_05 = xyes]) @@ -1624,7 +1626,7 @@ AM_CONDITIONAL(SUPPORTS_HTM, test x$ac_compiler_supports_htm = xyes \ -a x$ac_compiler_sees_htm_builtins = xyes \ -a x$HWCAP_HAS_HTM = xyes ) -# isa 3.0 checking +# isa 3.0 checking. (actually 3.0 or newer) AC_MSG_CHECKING([that assembler knows ISA 3.00 ]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ @@ -1638,9 +1640,27 @@ ac_asm_have_isa_3_00=no AC_MSG_RESULT([no]) ]) +# isa 3.01 checking +AC_MSG_CHECKING([that assembler knows ISA 3.1 ]) + +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +]], [[ + __asm__ __volatile__("brh 1,2 "); +]])], [ +ac_asm_have_isa_3_1=yes +AC_MSG_RESULT([yes]) +], [ +ac_asm_have_isa_3_1=no +AC_MSG_RESULT([no]) +]) + + AM_CONDITIONAL(HAS_ISA_3_00, [test x$ac_asm_have_isa_3_00 = xyes \ -a x$HWCAP_HAS_ISA_3_00 = xyes]) +AM_CONDITIONAL(HAS_ISA_3_1, [test x$ac_asm_have_isa_3_1 = xyes \ + -a x$HWCAP_HAS_ISA_3_1 = xyes]) + # Check for pthread_create@GLIBC2.0 AC_MSG_CHECKING([for pthread_create@GLIBC2.0()]) diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am index 9bc0d0a764..a43e527fe6 100644 --- a/none/tests/ppc64/Makefile.am +++ b/none/tests/ppc64/Makefile.am @@ -3,7 +3,7 @@ include $(top_srcdir)/Makefile.tool-tests.am dist_noinst_SCRIPTS = filter_stderr -noinst_HEADERS = ppc64_helpers.h +noinst_HEADERS = ppc64_helpers.h isa_3_1_helpers.h EXTRA_DIST = \ jm-int.stderr.exp jm-int.stdout.exp jm-int.vgtest jm-int.stdout.exp-LE \ @@ -51,6 +51,11 @@ EXTRA_DIST = \ test_isa_3_0_other.stdout.exp-LE test_isa_3_0_other.vgtest \ subnormal_test.stderr.exp subnormal_test.stdout.exp \ subnormal_test.vgtest +# test_isa_3_1_RT.vgtest test_isa_3_1_RT.stderr.exp test_isa_3_1_RT.stdout.exp +# test_isa_3_1_XT.vgtest test_isa_3_1_XT.stderr.exp test_isa_3_1_XT.stdout.exp +# test_isa_3_1_VRT.vgtest test_isa_3_1_VRT.stderr.exp test_isa_3_1_VRT.stdout.exp +# test_isa_3_1_Misc.vgtest test_isa_3_1_Misc.stderr.exp test_isa_3_1_Misc.stdout.exp +# test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp check_PROGRAMS = \ allexec \ @@ -63,7 +68,8 @@ check_PROGRAMS = \ test_tm test_touch_tm ldst_multiple data-cache-instructions \ power6_mf_gpr std_reg_imm \ twi_tdi tw_td power6_bcmp - +# test_isa_3_1_RT test_isa_3_1_XT +# test_isa_3_1_Misc test_isa_3_1_VRT test_isa_3_1_AT AM_CFLAGS += @FLAG_M64@ AM_CXXFLAGS += @FLAG_M64@ @@ -71,6 +77,12 @@ AM_CCASFLAGS += @FLAG_M64@ allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@ +#test_isa_3_1_XT_SOURCES = test_isa_3_1_XT.c test_isa_3_1_common.c +#test_isa_3_1_RT_SOURCES = test_isa_3_1_RT.c test_isa_3_1_common.c +#test_isa_3_1_VRT_SOURCES = test_isa_3_1_VRT.c test_isa_3_1_common.c +#test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c +#test_isa_3_1_Misc_SOURCES = test_isa_3_1_Misc.c test_isa_3_1_common.c + if HAS_ALTIVEC BUILD_FLAG_ALTIVEC = -maltivec ALTIVEC_FLAG = -DHAS_ALTIVEC @@ -128,6 +140,14 @@ BUILD_FLAGS_ISA_3_00 = ISA_3_00_FLAG = endif +if HAS_ISA_3_1 +BUILD_FLAGS_ISA_3_1 = -mcpu=power10 +ISA_3_1_FLAG = -DHAS_ISA_3_1 +else +BUILD_FLAGS_ISA_3_1 = +ISA_3_1_FLAG = +endif + test_isa_2_06_part1_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) \ @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) @@ -161,6 +181,9 @@ test_touch_tm_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(HTM_FLAG) test_isa_3_0_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(HTM_FLAG) $(ISA_3_00_FLAG) \ @FLAG_M64@ $(BUILD_FLAGS_ISA_3_00) +test_isa_3_1_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(ISA_3_1_FLAG) \ + @FLAG_M64@ $(BUILD_FLAGS_ISA_3_1) + subnormal_test_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) $(ISA_2_06_FLAG) \ @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) $(BUILD_FLAGS_ISA_2_06) diff --git a/none/tests/ppc64/isa_3_1_helpers.h b/none/tests/ppc64/isa_3_1_helpers.h new file mode 100644 index 0000000000..dfc0422cbc --- /dev/null +++ b/none/tests/ppc64/isa_3_1_helpers.h @@ -0,0 +1,112 @@ +/* isa_3_1_helpers.h */ + +#include "isa_3_1_register_defines.h" + +extern unsigned long a_iters,b_iters,c_iters, m_iters; +extern unsigned long vrai,vrbi,vrci,vrmi; +extern unsigned long a_inc, b_inc, c_inc, m_inc; +extern unsigned long a_limit,b_limit,c_limit; +extern vector unsigned long long vrt, vra, vrb, vrc; +extern vector unsigned long long vrm; +extern vector unsigned long long vec_xa; +extern vector unsigned long long vec_xb; +extern vector unsigned long long vec_xc; +extern vector unsigned long long vec_xs; +extern vector unsigned long long vec_xt; +extern unsigned long long dcmx; + +extern unsigned long current_cr; +extern unsigned long current_fpscr; + +typedef void (*test_func_t) (void); +struct test_list_t { + test_func_t func; + const char *name; + const char *form; + unsigned long mask; /* holds SP or DP indicators. */ +}; +typedef struct test_list_t test_list_t; +extern struct test_list_t current_test; +typedef void (*test_group_t) (const char *name, test_func_t func, + unsigned int unused, char * cur_form); + +/* Misc options for debug. */ +/* setup_only indicates to do all of the register initializations, + but skip the instruction test. */ +extern unsigned long setup_only; +extern int verbose; +extern unsigned long prefix_override; +extern unsigned long vrm_override; +extern unsigned long mc_override; +extern unsigned long enable_setjmp; +extern unsigned long dump_tables; +extern void debug_show_form(const char *, char *); +extern void debug_show_current_iteration(); +extern void debug_dump_buffer(); + +extern void identify_form_components(const char *, const char *); +extern void dump_vsxargs(); +extern void generic_prologue(); +extern void build_args_table(); +extern void build_vsx_table(); +extern void print_register_header(); +extern void print_register_footer(); +extern void debug_show_iter_ranges(); +extern void print_result_buffer(); +extern void dump_float_vsx_tables(); +extern void build_float_vsx_tables(); +extern void initialize_target_registers(); +extern void initialize_source_registers(); +extern void set_up_iterators(); +extern void initialize_buffer(int); + +extern int verbose; +#define debug_printf(X) if (verbose>0) printf(X); +#define debug_show_labels (verbose>0) +#define debug_show_iters (verbose>1) +#define debug_show_raw_values (verbose>2) +#define debug_show_all_regs (verbose>5) +#define debug_show_tables (verbose>6) + + +#define CHECK_OVERRIDES { \ + if (vrm_override && vrmi > 0) continue; \ + if (prefix_override && strncmp("p", instruction_name, 1) == 0) { \ + if (verbose) printf("Skipping prefix insn test %s\n",instruction_name); \ + continue; \ + } \ +} + +/* CR helpers. */ + +#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" + +#define SET_CR(_arg) \ + __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); + +#define SET_CR0_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x80,%0 " : : "b" (_arg):"cr0"); +#define SET_CR1_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x40,%0 " : : "b" (_arg):"cr1"); +#define SET_CR2_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x20,%0 " : : "b" (_arg):"cr2"); +#define SET_CR3_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x10,%0 " : : "b" (_arg):"cr3"); +#define SET_CR4_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x08,%0 " : : "r" (_arg):"cr4"); +#define SET_CR5_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x04,%0 " : : "r" (_arg):"cr5"); +#define SET_CR6_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x02,%0 " : : "r" (_arg):"cr6"); +#define SET_CR7_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x01,%0 " : : "r" (_arg):"cr7"); + +#define SET_XER(_arg) __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); +#define GET_CR(_lval) __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) +#define GET_XER(_lval) __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) +#define SET_CR_ZERO SET_CR(0) + +/* ************** */ +/* FPSCR helpers. */ +#define SET_FPSCR_ZERO \ + do { \ + double _d = 0.0; \ + __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ + } while (0); + +#define GET_FPSCR(_arg) \ + __asm__ __volatile__ ("mffs %0" : "=f"(_arg) ); + + diff --git a/none/tests/ppc64/isa_3_1_register_defines.h b/none/tests/ppc64/isa_3_1_register_defines.h new file mode 100644 index 0000000000..e4d021e5c0 --- /dev/null +++ b/none/tests/ppc64/isa_3_1_register_defines.h @@ -0,0 +1,50 @@ +/* register definitions used in tests for isa_3_1. */ + +/* ACC / Accumulator. + An ACC is associated with a set of four VSR registers. + Each ACC contains four 128-bit rows. + Each row of each ACC is aliased to a specific VSR in the following manner. + ACC[0][0] == VSR[0]; ACC[0][1] == VSR[1]; ACC[0][2] == VSR[2]; ACC[0][3] == VSR[3] + ... + ACC[7][0] == VSR[28]; ACC[7][0] == VSR[28]; ACC[7][0] == VSR[28]; ACC[7][0] == VSR[28] +*/ +#define ACCNUM 4 +register vector long long TEST_ACC0 __asm__ ("vs16"); +register vector long long TEST_ACC1 __asm__ ("vs17"); +register vector long long TEST_ACC2 __asm__ ("vs18"); +register vector long long TEST_ACC3 __asm__ ("vs19"); + +/* XSp and XTp use the same register pair, defined here as 20 and 21. + { also XSp,XTp in scripts } */ +register vector long long XTp0 __asm__ ("vs20"); // XTp[0];XSp[0]; +register vector long long XTp1 __asm__ ("vs21"); // XTp[1];XSp[1]; + +// xa,xb,xc references are mapped to a specific vector register. +// out of order to allow xap mapped over xa and xc. +register vector long long xa __asm__ ("vs22"); // also xap. +register vector long long xc __asm__ ("vs23"); // also 2nd half of xap. +register vector long long xb __asm__ ("vs24"); +register vector long long xt __asm__ ("vs25"); + +/* frs,frb (variable named frsb) both use the same register pair. + (top half of vs26,vs27) */ +register double frsb __asm__ ("fr26"); +register double frsbp __asm__ ("fr27"); +/* frt,frtp register pair. (top half of vs28,vs29) */ +register double frt __asm__ ("vs28"); +register double frtp __asm__ ("vs29"); + +register uint64_t ra __asm__ ("r20"); +register uint64_t rb __asm__ ("r21"); +register uint64_t rc __asm__ ("r22"); +register uint64_t rs __asm__ ("r24"); /* rsp part 1 */ +register uint64_t rsp __asm__ ("r25"); /* rsp part 2 */ +register uint64_t rt __asm__ ("r26"); /* rtp part 1 */ +register uint64_t rtp __asm__ ("r27"); /* rtp part 2 */ + +extern unsigned long long vsrd; +extern unsigned long get_vsrhd_vs26(); +extern unsigned long get_vsrhd_vs27(); +extern unsigned long get_vsrhd_vs28(); +extern unsigned long get_vsrhd_vs29(); + diff --git a/none/tests/ppc64/test_isa_3_1_AT.vgtest b/none/tests/ppc64/test_isa_3_1_AT.vgtest new file mode 100644 index 0000000000..e39ffd22a5 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_AT.vgtest @@ -0,0 +1,3 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prereq: ../../../tests/check_ppc64_auxv_cap mma +prog: test_isa_3_1_AT diff --git a/none/tests/ppc64/test_isa_3_1_Misc.vgtest b/none/tests/ppc64/test_isa_3_1_Misc.vgtest new file mode 100644 index 0000000000..3802934054 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_Misc.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_Misc diff --git a/none/tests/ppc64/test_isa_3_1_RT.vgtest b/none/tests/ppc64/test_isa_3_1_RT.vgtest new file mode 100644 index 0000000000..5aad9de09f --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_RT.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_RT diff --git a/none/tests/ppc64/test_isa_3_1_VRT.vgtest b/none/tests/ppc64/test_isa_3_1_VRT.vgtest new file mode 100644 index 0000000000..96d57b2c28 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_VRT.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_VRT diff --git a/none/tests/ppc64/test_isa_3_1_XT.vgtest b/none/tests/ppc64/test_isa_3_1_XT.vgtest new file mode 100644 index 0000000000..cc717eab14 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_XT.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_XT diff --git a/none/tests/ppc64/test_isa_3_1_common.c b/none/tests/ppc64/test_isa_3_1_common.c new file mode 100644 index 0000000000..585fd0c90f --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_common.c @@ -0,0 +1,2188 @@ +/* test_isa_3_1_common.c */ + +/* Copyright (C) 2020, IBM + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + The GNU General Public License is contained in the file COPYING. + */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +#include "isa_3_1_register_defines.h" +#include "isa_3_1_helpers.h" +#include "tests/malloc.h" // memalign + +/* post_test indicates to the printf helpers if we are pre- or post- + instruction execution, subsequently used to suppress register + output when those register contents are not useful. */ +unsigned long post_test; +/* increase verbosity for increasing amounts of debug output. */ +int verbose = 0; +#define DEADBEEF 0x1111111111111111ULL + +vector unsigned long long vec_xa; +vector unsigned long long vec_xb; +vector unsigned long long vec_xc; +vector unsigned long long vec_xs; +vector unsigned long long vec_xt; +unsigned long long dcmx; + +/* Iterator controls. These are adjusted as appropriate for the tests + being exercised. See set_up_iterators () below. +*/ +unsigned long a_iters, b_iters, c_iters, m_iters; +unsigned long a_inc, b_inc, c_inc, m_inc; +unsigned long vrai, vrbi, vrci, vrmi; +unsigned long a_limit = 0xffff, b_limit = 0xffff, c_limit = 0xffff; + +vector unsigned long long vrt, vra, vrb, vrc; +vector unsigned long long vrm; + +/* Debug: Set these to allow skipping of test subsets that + have nonzero vrm or mc values. +*/ +unsigned long prefix_override = 0; +unsigned long vrm_override = 0; +unsigned long mc_override = 0; +unsigned long enable_setjmp = 0; +unsigned long dump_tables = 0; + +/* condition register misc. */ +extern unsigned long current_cr; +extern unsigned long current_fpscr; + +/* Helpers to manage when our output fields require special handling. + This includes scenarios including: + - some parts of the output fields are Undefined. + - some parts of the output field contain *estimated* data that needs to be + truncated when printed. + - Some parts of the output need to be reported as INF or NAN. + - The contents need to be interpreted as single or double precision. +*/ +// Double precision indicators. +#define DP0 0b00100000 +#define DP1 0b00010000 +#define DOUBLE_MASK 0b00110000 +// Single precision indicators. +#define SP0 0b00001000 +#define SP1 0b00000100 +#define SP2 0b00000010 +#define SP3 0b00000001 +#define SINGLE_MASK 0b00001111 +// Estimated output indicators. +#define SINGLE_EST_MASK 0b01000000 +#define DOUBLE_EST_MASK 0b10000000 +// bfloat16 indicators. +#define B16_MASK 0b1111111100000000 +#define B16_0 0b1000000000000000 +#define B16_1 0b0100000000000000 +#define B16_2 0b0010000000000000 +#define B16_3 0b0001000000000000 +#define B16_4 0b0000100000000000 +#define B16_5 0b0000010000000000 +#define B16_6 0b0000001000000000 +#define B16_7 0b0000000100000000 + +/* Instruction Form indicators. + These are set based on the instruction name and the associated + instruction form. These are subsequently used to help initialize + the incoming register contents when testing the specific instruction. +*/ +bool has_ra, has_rb, has_rc, has_rs, has_rt; +bool has_rtp, has_rsp; +bool has_vra, has_vrb, has_vrc, has_vrm, has_vrt; +bool has_xa, has_xb, has_xc, has_xs, has_xt; +bool has_xap; +bool uses_xc_as_blend_mask; +bool has_xsp, has_xtp; +bool has_frb, has_frbp; // frb* uses same regs as frsp. +bool has_frs, has_frsp; +bool has_frt, has_frtp; +bool uses_CRBIT, uses_RC, uses_MC; +bool uses_cr; +bool is_divide_or_modulo; +bool is_insert_double; +bool is_testlsb; +bool has_rs_as_value_source; +bool has_dcmx; +unsigned long is_clear_or_insert_insns; +unsigned long is_mtvsr_insn; +unsigned long is_cmp_insn; +bool has_ra_target; +bool uses_dfp128_input; +bool uses_dfp128_output; +bool uses_acc; // Accumulator related. +bool uses_acc_src; +bool uses_acc_dest; +bool uses_acc_vsrs; +bool uses_buffer; // Buffer related. +bool uses_load_buffer, uses_store_buffer, uses_any_buffer; +bool uses_quad; +unsigned long output_mask; // Output field special handling. +bool instruction_is_sp, instruction_is_sp_estimate; +bool instruction_is_dp, instruction_is_dp_estimate; +bool instruction_is_b16; + +unsigned long long min (unsigned long long a, unsigned long long b) { + if ( a < b ) + return a; + return b; +} + +/* Parse the 'form' field to mark and identify arguments to the instruction. */ +void identify_form_components (const char *instruction_name, + const char *cur_form) +{ + has_ra = ((strstr (cur_form, ",RA") != NULL) || + (strstr (cur_form, "(RA)") != NULL)); + has_ra_target = (strncmp (cur_form, "RA,", 3) == 0); + has_rb = strstr (cur_form, ",RB") != NULL; + has_rc = strstr (cur_form, ",RC") != NULL; + has_rs = ((strstr (cur_form, ",RS") != NULL) || + (strncmp (cur_form, "RS", 2) == 0)); + has_rsp = (strncmp (cur_form, "RSp", 3) == 0); + has_rt = (strncmp (cur_form, "RT", 2) == 0); + has_rtp = (strncmp (cur_form, "RTp", 3) == 0); + + has_vra = strstr (cur_form, "VRA") != NULL; + has_vrb = strstr (cur_form, "VRB") != NULL; + has_vrc = strstr (cur_form, "VRC") != NULL; + has_vrm = strstr (cur_form, "VRM") != NULL; + has_vrt = (strncmp (cur_form, "VRT", 3) == 0); + + has_frb = strstr (cur_form, "FRB") != NULL; + has_frbp = strstr (cur_form, "FRBp") != NULL; + has_frs = strstr (cur_form, "FRS") != NULL; + has_frsp = strstr (cur_form, "FRSp") != NULL; + has_frt = strstr (cur_form, "FRT") != NULL; + has_frtp = strstr (cur_form, "FRTp") != NULL; + + has_xa = strstr (cur_form, ",XA") != NULL; + has_xap = strstr (cur_form, ",XAp") != NULL; + has_xb = strstr (cur_form, ",XB") != NULL; + has_xc = strstr (cur_form, ",XC") != NULL; + has_xs = (strncmp (cur_form, "XS", 2) == 0); + has_xsp = (strncmp (cur_form, "XSp", 3) == 0); + has_xt = (strncmp (cur_form, "XT", 2) == 0); + has_xtp = (strncmp (cur_form, "XTp", 3) == 0); + + uses_acc_src = (strstr (cur_form, "AS") != NULL); + uses_acc_dest = (strstr (cur_form, "AT") != NULL); +/* These (xxm*acc) are special cases where the acc_src is used, but we + need to read the associated _vsrs on the way out. +*/ + uses_acc_vsrs = ( + (strstr (instruction_name, "xxmfacc") != NULL) || + (strstr (instruction_name, "xxmtacc") != NULL) ); + uses_acc = uses_acc_src || uses_acc_dest || uses_acc_vsrs; + + uses_dfp128_input = ( + (strncmp (instruction_name, "dctf", 4) == 0)); + uses_dfp128_output = ( + (strncmp (instruction_name, "dcff", 4) == 0)); + is_divide_or_modulo = ( + (strncmp (instruction_name, "vdiv", 4) == 0) || + (strncmp (instruction_name, "pmvdiv", 6) == 0) || + (strncmp (instruction_name, "vmod", 4) == 0) || + (strncmp (instruction_name, "pmvmod", 6) == 0) ); + is_insert_double = ( + (strncmp (instruction_name, "vinsd", 5) == 0) ); + is_testlsb = ( + (strncmp (instruction_name, "xvtlsbb", 7) == 0) ); + uses_xc_as_blend_mask = ( + (strncmp (instruction_name, "xxblend", 7) == 0) ); + has_dcmx = strstr (cur_form, "DCMX") != NULL; + uses_CRBIT = ( + (strncmp (cur_form, "BF", 2) == 0) || + (strstr (cur_form, ",BI") != 0)); + uses_RC = ( + (strstr (instruction_name, ".") != NULL )); + uses_MC = ( + (strstr (instruction_name, ",MC") != NULL )); + uses_cr = ( + (strstr (instruction_name, "setbcr") != 0) || + (strstr (instruction_name, "setnbcr") != 0)); +/* The lxvkq instruction loads special values into a VSX vector, so although + this looks like a load, it is excluded from the uses_load_buffer set + because it does not load a value from a buffer. */ + uses_load_buffer = ( + (strncmp (instruction_name, "ld", 2) == 0) || + (strncmp (instruction_name, "lq", 2) == 0) || + (strncmp (instruction_name, "plq", 3) == 0) || + (strncmp (instruction_name, "plx", 3) == 0) || + (strncmp (instruction_name, "pmlx", 4) == 0) || + (strncmp (instruction_name, "lxv", 3) == 0) || + ( (strncmp (instruction_name, "lxva", 4) == 0) && + (strncmp (instruction_name, "lxvkq", 5) != 0)) ); + uses_store_buffer = ( + (strncmp (instruction_name, "pmst", 4) == 0) || + (strncmp (instruction_name, "pst", 3) == 0) || + (strncmp (instruction_name, "st", 2) == 0)); + uses_any_buffer = (strstr (cur_form, "(RA)") != NULL); + uses_buffer = uses_any_buffer||uses_load_buffer||uses_store_buffer; + + uses_quad = (uses_buffer && (strstr (instruction_name, "q") != NULL)); + + has_rs_as_value_source = ( + (strcmp (cur_form, "RA,RS,RB") == 0) || + (strcmp (cur_form, "RA,RS") == 0) ); + + is_clear_or_insert_insns = ( + (strncmp (instruction_name, "vclr", 4) == 0) || + (strncmp (instruction_name, "vins", 4) == 0) ); + + /* This is used by a helper function to control the CR field output when + the instruction is a compare, otherwise it is likely a bitfield check. */ + is_cmp_insn = ( (strstr (cur_form, "cmp") != NULL)); + + is_mtvsr_insn = ( (strncmp (instruction_name, "mtvsr", 5) == 0)); + + /* If the instruction output needs to be something other than a hex dump, + a mask will have been defined as part of the test_list_t structure. + This includes instructions that return estimated values, as well as + those that return NAN results which contain sign bits that need to be + filtered out. */ + output_mask = ( current_test.mask ); + instruction_is_dp = ( current_test.mask & DOUBLE_MASK ); + instruction_is_dp_estimate = ( current_test.mask & DOUBLE_EST_MASK ); + instruction_is_sp = ( current_test.mask & SINGLE_MASK ); + instruction_is_sp_estimate = ( current_test.mask & SINGLE_EST_MASK ); + instruction_is_b16 = ( current_test.mask & B16_MASK ); +} + +void display_form_components (char * cur_form) { + printf (" %s\n", cur_form); + printf ("Instruction form elements: "); + if (has_ra) printf ("ra "); + if (has_rb) printf ("rb "); + if (has_rc) printf ("rc "); + if (has_rs) printf ("rs "); + if (has_rsp) printf ("rsp "); + if (has_rt) printf ("rt "); + if (has_rtp) printf ("rtp "); + if (has_vra) printf ("vra "); + if (has_vrb) printf ("vrb "); + if (has_vrc) printf ("vrc "); + if (has_vrm) printf ("vrm "); + if (has_vrt) printf ("vrt "); + + if (has_frb) printf ("frb "); + if (has_frbp) printf ("frbp "); + if (has_frs) printf ("frs "); + if (has_frsp) printf ("frsp "); + if (has_frt) printf ("frt "); + if (has_frtp) printf ("frtp "); + if (has_xa) printf ("xa "); + if (has_xap) printf ("xap "); + if (has_xb) printf ("xb "); + if (has_xc) printf ("xc "); + if (has_xs) printf ("xs "); + if (has_xsp) printf ("xsp "); + if (has_xt) printf ("xt "); + if (has_xtp) printf ("xtp "); + if (uses_acc_src) printf ("AS "); + if (uses_acc_dest) printf ("AT "); + printf ("\n"); + if (uses_dfp128_input) + printf ("uses dfp128 input.\n"); + if (uses_dfp128_output) + printf ("uses dfp128 output.\n"); + if (has_ra_target) + printf ("ra is a target register.\n"); + if (has_rs_as_value_source) + printf ("rs is a value source.\n"); + if (uses_xc_as_blend_mask) + printf ("uses xc as a blend mask.\n"); + if (is_clear_or_insert_insns) + printf ("is a clear or insert insn.\n"); + if (is_insert_double) + printf ("is an insert doubleword.\n"); + if (is_testlsb) + printf ("tests lsb.\n"); + if (uses_buffer) + printf ("uses_buffer: (l:%d s:%d ?:%d)\n", + uses_load_buffer, uses_store_buffer, uses_any_buffer); + if (uses_quad) + printf ("is a quad load or store.\n"); + if (is_cmp_insn) + printf ("is a compare instruction.\n"); + if (uses_CRBIT) + printf ("instruction references a CR.\n"); + if (uses_cr) + printf ("instruction reads CR bits.\n"); + if (uses_MC) + printf ("Instruction uses MC.\n"); + if (uses_RC) + printf ("Instruction uses Record Bit (cr6).\n"); + if (uses_acc) + printf ("Instruction uses ACC: (src:%d, dst:%d, vsrs:%d).\n", + uses_acc_src, uses_acc_dest, uses_acc_vsrs); + if (output_mask) { + printf ("Instruction results are masked: "); + printf (" (%lx) ", output_mask); + printf ("%s ", instruction_is_sp?"SP ":""); + printf ("%s ", instruction_is_sp_estimate?"SP Estimate ":""); + printf ("%s ", instruction_is_dp?"DP ":""); + printf ("%s ", instruction_is_dp_estimate?"DP Estimate ":""); + printf ("%s ", instruction_is_b16?"bfloat16 ":""); + } + printf ("\n"); +} + +long long mask64[] = { 0x0, 0x00000000ffffffff, 0xffffffff55555555, + 0x5555aaaaaaaa5555, 0xaaaa00000000aaaa }; +#define MASK64SIZE 5 +unsigned long long vrm_mask[] = { 0x0, 0x8000000000000000, + 0x8000000000000000, 0x0 }; +#define VRMMASK_SIZE 4 + +// Helpers to print double/float values. +// Union to help handle referencing hex/float/double values. +union rosetta_t { + unsigned long long ull; + unsigned long long ullp[2]; + float flt; + float fltp[2]; + uint16_t uint16s[4]; + double dbl; +}; + +void generic_print_float_as_hex (float f) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.flt = f; + printf (" %016llx", stone.ull); +} + +void generic_print_ull_as_float (unsigned long long ull) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.ull = ull; + printf (" %f", stone.flt); +} + +void generic_print_ull_as_double (unsigned long long ull) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.ull = ull; + printf (" %e", stone.dbl); +} + +void generic_print_double_as_hex (double d) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.dbl = d; + printf (" %016llx", stone.ull); +} + +// SP in a 32-bit field. +#define SP_SIGNBIT_MASK 0x80000000 +#define SP_EXPONENT_MASK 0x7f800000 +#define SP_FRACTION_MASK 0x007fffff + +// DP (64-bit). +#define DP_SIGNBIT_MASK 0x8000000000000000UL +#define DP_EXPONENT_MASK 0x7ff0000000000000UL +#define DP_FRACTION_MASK 0x000fffffffffffffUL + +// B16 bfloat16. +#define BF16_SIGNBIT_MASK 0x8000 +#define BF16_EXPONENT_MASK 0x7f80 +#define BF16_FRACTION_MASK 0x007f + +/* + - NAN and Zero values need the sign bit display suppressed. (See comments + in jm-insns.c, approx line 7203). + - Some instructions return estimated values, which are calculated + to a different level of precision within valgrind. Those + instructions need their outputs limited to a specific number of + digits as seen below. */ + +// NAN - Maximum biased exponent and a nonzero mantissa (fraction). +#define PRINT_SP_NAN printf (" NaN"); +// DEN - Exp == 0 and Frac != 0 +#define PRINT_SP_PLUS_DEN printf (" +Den"); +#define PRINT_SP_MINUS_DEN printf (" -Den"); +// INF - Maximum biased exponent and a zero mantissa. +#define PRINT_SP_INF printf (" Inf"); +#define PRINT_SP_PLUS_INF printf (" +Inf"); +#define PRINT_SP_MINUS_INF printf (" -Inf"); +#define PRINT_SP_FLOAT(x) printf ("%13.05e", x); +#define PRINT_SP_FLOAT_EST(x) printf ("%13.03e", x); +#define PRINT_SP_FLOAT_PLUS_ZERO printf (" +Zero"); +#define PRINT_SP_FLOAT_MINUS_ZERO printf (" -Zero"); + +/* Print a SINGLE (16 bit) SP value out of the left part of a 32-bit field. */ +void special_print_sp_value (uint32_t value) { + int signbit; + int exponent; + unsigned long long fraction; + union rosetta_t stone; + + stone.ull = value; + signbit = value & SP_SIGNBIT_MASK; + exponent = (value & SP_EXPONENT_MASK); + fraction = value & SP_FRACTION_MASK; + + if (debug_show_raw_values) { + printf ("\nsp_debug: v:%08x s: %d %3x %8llx %f , ", + value, signbit?1:0, exponent, fraction, stone.flt); + } + if (exponent == SP_EXPONENT_MASK && fraction == 0 ) { + if (signbit) + PRINT_SP_MINUS_INF + else + PRINT_SP_PLUS_INF + } else if (exponent == SP_EXPONENT_MASK && fraction != 0 ) { + PRINT_SP_NAN + } else if (exponent == 0 && fraction == 0 ) { + if (signbit) + PRINT_SP_FLOAT_MINUS_ZERO + else + PRINT_SP_FLOAT_PLUS_ZERO + } else if (exponent == 0 && fraction != 0 ) { + if (signbit) + PRINT_SP_MINUS_DEN + else + PRINT_SP_PLUS_DEN + } else if (instruction_is_sp_estimate) { + PRINT_SP_FLOAT_EST (stone.flt); + } else { + PRINT_SP_FLOAT (stone.flt); + } +} + +void dissect_sp_value (unsigned long long foo) { + if (debug_show_raw_values) { + printf ("RAW sp::%4llx ", foo); + printf (" [s:"); + printf ("%x", (foo & SP_SIGNBIT_MASK)>0); + printf (" e:"); + printf ("%4llx", foo & SP_EXPONENT_MASK); + printf (" f:"); + printf ("%4llx", foo & SP_FRACTION_MASK); + printf ("] "); + } + special_print_sp_value (foo); + printf (" "); +} + +/* Print one DP values out of our vec_ field. */ +#define PRINT_DP_NAN printf (" NaN"); +#define PRINT_DP_MINUS_DEN printf (" -Den"); +#define PRINT_DP_PLUS_DEN printf (" +Den"); +#define PRINT_DP_MINUS_INF printf (" -Inf"); +#define PRINT_DP_PLUS_INF printf (" +InF"); +#define PRINT_DP_FLOAT(x) printf (" %15.08e", x); +#define PRINT_DP_FLOAT_EST(x) printf (" %15.02e", x); +#define PRINT_DP_FLOAT_PLUS_ZERO printf (" +Zero"); +#define PRINT_DP_FLOAT_MINUS_ZERO printf (" -Zero"); +#define PRINT_DP_FLOAT_ZERO printf (" 0.000000e+000"); +void special_print_dp_value (unsigned long long value) { + unsigned long long signbit; + unsigned long long exponent; + unsigned long long fraction; + union rosetta_t stone; + + stone.ull = value; + signbit = (value & DP_SIGNBIT_MASK) > 0; + exponent = value & DP_EXPONENT_MASK; // >> double_exponent_shift; + fraction = value & DP_FRACTION_MASK; + if (verbose>2) + printf ("\ndb_debug: %16llx s:%d %3llx %8llx %llx , ", + value, signbit?1:0, exponent, fraction, stone.ull); + if (exponent == DP_EXPONENT_MASK /* MAX */ && fraction == 0 ) { + if (signbit) + PRINT_DP_MINUS_INF + else + PRINT_DP_PLUS_INF + } else if (exponent == DP_EXPONENT_MASK && fraction != 0 ) { + PRINT_DP_NAN + } else if (exponent == 0 && fraction == 0 ) { + if (signbit) + PRINT_DP_FLOAT_MINUS_ZERO + else + PRINT_DP_FLOAT_PLUS_ZERO + } else if (exponent == 0 && fraction != 0 ) { + if (signbit) + PRINT_DP_MINUS_DEN + else + PRINT_DP_PLUS_DEN + } else if (instruction_is_dp_estimate) { + PRINT_DP_FLOAT_EST (stone.dbl); + } else { + PRINT_DP_FLOAT (stone.dbl); + } +} + +void dissect_dp_value (unsigned long long foo) { + if (debug_show_raw_values) { + printf ("RAW dp::%llx", (foo)); + printf (" [sign:"); + printf ("%x ", (foo & DP_SIGNBIT_MASK) > 0); + printf (" expbits:"); + printf ("%3llx", foo & DP_EXPONENT_MASK ); + printf (" frac:"); + printf ("%16llx", foo & DP_FRACTION_MASK); + printf ("] "); + } + special_print_dp_value (foo); + printf (" "); +} + +// NAN - Maximum biased exponent and a nonzero mantissa (fraction). +#define PRINT_BF16_NAN printf (" NaN"); +// DEN - Exp == 0 and Frac != 0 +#define PRINT_BF16_PLUS_DEN printf (" +Den"); +#define PRINT_BF16_MINUS_DEN printf (" -Den"); +// INF - Maximum biased exponent and a zero mantissa. +#define PRINT_BF16_INF printf (" Inf"); +#define PRINT_BF16_PLUS_INF printf (" +Inf"); +#define PRINT_BF16_MINUS_INF printf (" -Inf"); +#define PRINT_BF16_FLOAT(x) printf (" 0x%04x", x); +#define PRINT_BF16_FLOAT_PLUS_ZERO printf (" +Zero"); +#define PRINT_BF16_FLOAT_MINUS_ZERO printf (" -Zero"); +/* print a single bfloat16 value. */ +void special_print_bf16_value (uint16_t value) { + int signbit; + int exponent; + unsigned long long fraction; + union rosetta_t stone; + signbit = value & BF16_SIGNBIT_MASK; + exponent = (value & BF16_EXPONENT_MASK); + fraction = (value & BF16_FRACTION_MASK); + stone.ull = value; + if (debug_show_raw_values) { + printf ("\nbf16_debug: v:%08x s: %d %3x %8llx %f , ", + value, signbit?1:0, exponent, fraction, stone.flt); + } else if (verbose > 0) { + printf (" v:%08x", value); + } + if (exponent == BF16_EXPONENT_MASK && fraction == 0 ) { + if (signbit) + PRINT_BF16_MINUS_INF + else + PRINT_BF16_PLUS_INF + } else if (exponent == BF16_EXPONENT_MASK && fraction != 0 ) { + PRINT_BF16_NAN + } else if (exponent == 0 && fraction == 0 ) { + if (signbit) + PRINT_BF16_FLOAT_MINUS_ZERO + else + PRINT_BF16_FLOAT_PLUS_ZERO + } else if (exponent == 0 && fraction != 0 ) { + if (signbit) + PRINT_BF16_MINUS_DEN + else + PRINT_BF16_PLUS_DEN + } else + PRINT_BF16_FLOAT (value); +} + +/* ******************** */ +/* Accumulator related. */ +/* Note that our tests to set and clear the acc both read and write + from and to the associated VSRs, so some tests may be + self-fulfilling. */ +void push_vsrs_to_acc () { + if (!setup_only) + __asm__ __volatile__ ("xxmtacc 4 "); // $ACCNUM +} + +void push_acc_to_vsrs () { + if (!setup_only) + __asm__ __volatile__ ("xxmfacc 4 "); // $ACCNUM +} + + +void __print_splat_or_sp(long long vv) { + if (vv == DEADBEEF) + printf (" * "); + else { + special_print_sp_value (0xffffffff & (vv>>32)); + special_print_sp_value (0xffffffff & (vv)); + } +} + +void __print_splat_or_dp(long long vv) { + if (vv == DEADBEEF) + printf (" * "); + else { + special_print_dp_value (vv); + } +} + +void __print_splat_or_raw(long long vv) { + if (vv == DEADBEEF) + printf (" * "); + else + printf ("%llx ", vv); +} + +void print_accumulator () { + if (uses_acc || debug_show_all_regs) { + push_acc_to_vsrs (); + if (debug_show_labels) printf (" Acc[]:"); + if (instruction_is_sp) { + printf (" ("); + __print_splat_or_sp(TEST_ACC0[0]); + __print_splat_or_sp(TEST_ACC0[1]); + __print_splat_or_sp(TEST_ACC1[0]); + __print_splat_or_sp(TEST_ACC1[1]); + __print_splat_or_sp(TEST_ACC2[0]); + __print_splat_or_sp(TEST_ACC2[1]); + __print_splat_or_sp(TEST_ACC3[0]); + __print_splat_or_sp(TEST_ACC3[1]); + printf (")"); + } else if (instruction_is_dp) { + printf (" {"); + __print_splat_or_dp(TEST_ACC0[0]); + __print_splat_or_sp(TEST_ACC0[1]); + __print_splat_or_dp(TEST_ACC1[0]); + __print_splat_or_sp(TEST_ACC1[1]); + __print_splat_or_dp(TEST_ACC2[0]); + __print_splat_or_sp(TEST_ACC2[1]); + __print_splat_or_dp(TEST_ACC3[0]); + __print_splat_or_sp(TEST_ACC3[1]); + printf ("}"); + } else { + printf (" ["); + __print_splat_or_raw(TEST_ACC0[0]); + __print_splat_or_raw(TEST_ACC0[1]); + __print_splat_or_raw(TEST_ACC1[0]); + __print_splat_or_raw(TEST_ACC1[1]); + __print_splat_or_raw(TEST_ACC2[0]); + __print_splat_or_raw(TEST_ACC2[1]); + __print_splat_or_raw(TEST_ACC3[0]); + __print_splat_or_raw(TEST_ACC3[1]); + printf ("]"); + } + } +} + + +/* ************** */ +/* The bit definitions for the FPSCR are as follows. +Bit (s) Description +0:31 Reserved +32 Floating-Point Exception Summary (FX) +33 Floating-Point Enabled Exception Summary (FEX) +34 Floating-Point Invalid Operation Exception Summary (VX) +35 Floating-Point Overflow Exception (OX) +36 Floating-Point Underflow Exception (UX) +37 Floating-Point Zero Divide Exception (ZX) +38 Floating-Point Inexact Exception (XX) +39 Floating-Point Invalid Operation Exception (SNaN) (VXSNAN) +40 Floating-Point Invalid Operation Exception (∞ - ∞) (VXISI) +41 Floating-Point Invalid Operation Exception (∞ ÷ ∞) (VXIDI) +42 Floating-Point Invalid Operation Exception (0 ÷ 0) (VXZDZ) +43 Floating-Point Invalid Operation Exception (∞ × 0) (VXIMZ) +44 Floating-Point Invalid Operation Exception (Invalid Compare) (VXVC) +45 Floating-Point Fraction Rounded (FR) +46 Floating-Point Fraction Inexact (FI) +47:51 Floating-Point Result Flags (FPRF) +47 Floating-Point Result Class Descriptor (C) +48:51 Floating-Point Condition Code (FPCC) + 48 Floating-Point Less Than or Negative (FL or <) + 49 Floating-Point Greater Than or Positive (FG or >) + 50 Floating-Point Equal or Zero (FE or = ) + 51 Floating-Point Unordered or NaN (FU or ?) +52 Reserved +53 Floating-Point Invalid Operation Exception (Software-Defined Condition) (VXSOFT) +54 Floating-Point Invalid Operation Exception (Invalid Square Root) (VXSQRT) +55 Floating-Point Invalid Operation Exception (Invalid Integer Convert) (VXCVI) +56 Floating-Point Invalid Operation Exception Enable (VE) +57 Floating-Point Overflow Exception Enable (OE) +58 Floating-Point Underflow Exception Enable (UE) +59 Floating-Point Zero Divide Exception Enable (ZE) +60 Floating-Point Inexact Exception Enable (XE) +61 Floating-Point Non-IEEE Mode (NI) +62:63 Floating-Point Rounding Control (RN) + 00 Round to Nearest + 01 Round toward Zero + 10 Round toward +Infinity + 11 Round toward -Infinity +*/ +/* Valgrind currently tracks the rounding mode, C and FPCC fields + of the FPSCR. Additional checking in the testcase is not + necessary or beneficial. */ + +#define FPCC_C_BIT (0x1 << (63-47)) +#define FPCC_FL_BIT (0x1 << (63-48)) +#define FPCC_FG_BIT (0x1 << (63-49)) +#define FPCC_FE_BIT (0x1 << (63-50)) +#define FPCC_FU_BIT (0x1 << (63-51)) +#define FPCC_FPRF_MASK \ + FPCC_C_BIT | FPCC_FL_BIT | FPCC_FG_BIT | FPCC_FE_BIT | FPCC_FU_BIT + +#define FPSCR_RN_BIT62 (0x1 << (63-62)) +#define FPSCR_RN_BIT63 (0x1 << (63-63)) + +#define CRFIELD_BIT0 0x8 +#define CRFIELD_BIT1 0x4 +#define CRFIELD_BIT2 0x2 +#define CRFIELD_BIT3 0x1 + +/* Display the condition register bits. */ +int cr_overflow_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT3); +} + +int cr_zero_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT2); +} + +int cr_positive_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT1); +} + +int cr_negative_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT0); +} + +/* This function (__dissect_cr) takes a bitfield directly. */ +static void __dissect_cr (unsigned this_cr) { + extern unsigned long is_cmp_insn; + printf ("["); + if (cr_negative_set (this_cr)) + printf ("%s", is_cmp_insn ? " (LT) 0x1 = Negative 0b1 " : "1"); + else + printf ("%s", verbose ? "0" : "0"); + + if (cr_positive_set (this_cr)) + printf ("%s", is_cmp_insn ? " (GT) 0x2 = Positive fg_flag (zero/inf/denorm) " : "1"); + else + printf ("%s", verbose ? "0" : "0"); + + if (cr_zero_set (this_cr)) + printf ("%s", is_cmp_insn ? " (EQ) 0x4 = Zero fe_flag (zero/nan/inf/neg/e_b<-970" : "1"); + else + printf ("%s", verbose ? "0" : "0"); + + if (cr_overflow_set (this_cr)) + printf ("%s", is_cmp_insn ? " (SO) 0x8 = Overflow 0b0" : "1"); + else + printf ("%s", verbose ? "0" : "0"); + printf ("]"); +} + +/* Extract one CR field */ +int extract_cr_rn (unsigned long chosen_cr, unsigned long rn) { + unsigned int masked_cr; + unsigned long shifted_value; + shifted_value = chosen_cr >> ( ( (7 - rn) * 4 ) ); + masked_cr = shifted_value & 0xf; + return masked_cr; +} + +/* Display one CR field */ +void dissect_cr_rn (unsigned long chosen_cr, unsigned long rn) { + unsigned int masked_cr; + if (debug_show_labels) printf (" RC/CR (%ld):", rn ); + masked_cr = extract_cr_rn (chosen_cr, rn); + printf ("%ld:", rn); + __dissect_cr (masked_cr); +} + +char * fpscr_strings[] = { +" 0-RSVD", " 1-RSVD", " 2-RSVD", " 3-RSVD", " 4-RSVD", " 5-RSVD", " 6-RSVD", +" 7-RSVD", " 8-RSVD", " 9-RSVD", "10-RSVD", "11-RSVD", "12-RSVD", "13-RSVD", +"14-RSVD", "15-RSVD", "16-RSVD", "17-RSVD", "18-RSVD", "19-RSVD", "20-RSVD", +"21-RSVD", "22-RSVD", "23-RSVD", "24-RSVD", "25-RSVD", "26-RSVD", "27-RSVD", +"28-RSVD", "29-DRN0", "30-DRN1", "31-DRN2", +/* 32 */ "FX", "FEX", "VX", +/* 35 */ "OX", "UX", "ZX", "XX", "VXSNAN", +/* 40 */ "VXISI (inf-inf)", "VXIDI (inf/inf)", "VXZDZ (0/0)", +/* 43 */ "VXIMZ (inf*0)", "VXVC", +/* 45 */ "FR", "FI", +/* 47 */ "FPRF-C", "FPCC-FL", "FPCC-FG", +/* 50 */ "FPCC-FE", "FPCC-FU", +/* 52 */ "52-RSVD", "FXSOFT", "VXSQRT", +/* 55 */ "VXCVI", "VE", "OE", "UE", "ZE", +/* 60 */ "XE", "NI", "RN-bit62", "RN-bit63" +}; +/* Display only the fpscr bits that are valid under valgrind. + * Valgrind tracks the C (FPSCR[47]), FPCC (FPSCR[48:51) + * DRN (FPSCR[29:31]) and RN (FPSCR[62:63]). */ +void dissect_fpscr_valgrind (unsigned long local_fpscr) { + int i; + long mybit; + + /* Print DRN fields */ + for (i = 29; i < 32; i++) { + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } + + /* Print C and FPCC fields */ + for (i = 47; i < 52; i++) { + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } + + /* Print RN field */ + for (i = 62; i < 64; i++) { + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } +} + +/* + * This prints the entire FPSCR field. This is only called under higher + * verbosities, as valgrind does not track most of these bits. + */ +void dissect_fpscr_raw (unsigned long local_fpscr) { +/* Due to the additional involved logic, the rounding mode (RN) bits 61-62 + * are handled within dissect_fpscr_rounding_mode (). */ + int i; + long mybit; + for (i = 0; i < 61; i++) { + /* also note that the bit numbering is backwards. */ + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } +} + +void dissect_fpscr (unsigned long local_fpscr) { + if (verbose > 2) { + printf (" [[ fpscr:%lx ]] ", local_fpscr); + dissect_fpscr_raw (local_fpscr); + } else { + dissect_fpscr_valgrind (local_fpscr); + } +} + + +/* *************** */ +/* Buffer Helpers. +Define both a base and a reference buffer. When printing results, only print +the values when there is a difference between the two. */ +#define BUFFER_SIZE 12 +/* Note: Watch the alignment of the buffer, some loads/stores may require +stronger alignments. */ +__attribute__ ( (aligned (16))) unsigned long long buffer[2*BUFFER_SIZE]; +__attribute__ ( (aligned (16))) unsigned long long reference_buffer[2*BUFFER_SIZE]; +unsigned long changed_index[2*BUFFER_SIZE]; +void initialize_buffer (int t) +{ + int x; + for (x = 0; x < BUFFER_SIZE; x++) + /* We don't want each of the 32-bit chunks to be identical since loads + * of a byte from the wrong 32-bit chuck may be difficult to spot. + * Load these up with values that are also interesting if SP/DP, etc. + */ + switch ( (t+x)%BUFFER_SIZE) { + case 0: buffer[x] = 0x3fe00094e0007359; break; // sp + case 1: buffer[x] = 0x7ff7020304057607; break; // nan + case 2: buffer[x] = 0x7ff0000000007000; break; // inf + case 3: buffer[x] = 0x7f0000007f007000; break; // sp pair. + case 4: buffer[x] = 0x5a05a05a05a07a05; break; + case 5: buffer[x] = 0x0102030405067708; break; + case 6: buffer[x] = 0xfedcba9876547210; break; + case 7: buffer[x] = 0x0123456789ab7def; break; + case 8: buffer[x] = 0xffeeddccbbaa7988; break; + default: buffer[x] = 0x1112111211127112* (x-8); break; + } + for (x = 0; x < BUFFER_SIZE; x++) + reference_buffer[x] = buffer[x]; +} + +/* Buffer printing helper. This only displays the contents if they have + changed with respect to the reference buffer, or if running under + high verbosity. */ +void dump_changed_buffer (unsigned long range) { + int x; + int buffer_changed = 0; + + for (x = 0; (x < BUFFER_SIZE) && (x<range) ; x++) { + changed_index[x] = 0; + if (buffer[x] !=reference_buffer[x]) { + buffer_changed = 1; + changed_index[x] = 1; + if (verbose>2) + printf (" {idx %d %016llx %016llx}", + x, reference_buffer[x] , buffer[x] ); + } + } + if (verbose>2 || buffer_changed) { + printf (" ["); + for (x = 0; x < BUFFER_SIZE && (x<range); x++) { + if (x) printf (" "); + if (verbose > 0) + printf ("%s%016llx", changed_index[x] == 1?"*":" ", buffer[x] ); + if (changed_index[x]) { + if (instruction_is_sp) { + printf (" ("); + special_print_sp_value (0xffffffff & buffer[x] >> 32 ); + printf (" "); + special_print_sp_value (0xffffffff & buffer[x]); + printf (") "); + } else if (instruction_is_dp) { + printf (" {"); + special_print_dp_value (buffer[x]); + printf ("} "); + } + printf ("%016llx", buffer[x]); + } else + printf (" - "); + } + printf ("]"); + } +} + +void dump_raw_buffer () { + int x; + printf ("buffer:["); + for (x = 0; x < BUFFER_SIZE ; x++) { + if (x%4 == 0) printf (" (%d)", x); + printf ("%016llx ", buffer[x]); + } + printf ("]"); +} + +void dump_small_buffer (void) { + dump_changed_buffer (8); +} + +void dump_large_buffer (void) { + dump_changed_buffer (8); +} + +void dump_buffer () { +if (verbose>1) printf (" buffer:"); + if (uses_quad) { + dump_large_buffer (); + } else { + dump_small_buffer (); + } +} + +void print_undefined () { + if (verbose>1) + printf (" [Undef]"); + else + printf (" "); +} + +/* print the input 64-bit vector as 32-bit SP lumps. */ +void print_vec_as_sp (unsigned long long ull64) { + printf (" %08llx", ull64 >> 32 ); + printf (" %08llx", ull64 & 0xffff ); +} + +/*------------------------------------------------------------------*/ +/* Decimal Floating Point (DFP) helper functions */ +/*------------------------------------------------------------------*/ +#define NOT(x) ( ( ( x ) == 0) ? 1 : 0) +#define GET(x,y) ( ( ( x ) & ( 0x1UL << ( y ) ) ) >> ( y ) ) +#define PUT(x,y) ( ( x )<< ( y ) ) + +unsigned long dpb_to_bcd ( unsigned long chunk ) +{ + int a, b, c, d, e, f, g, h, i, j, k, m; + int p, q, r, s, t, u, v, w, x, y; + unsigned long value; + + /* convert 10 bit densely packed BCD to BCD */ + p = GET ( chunk, 9 ); + q = GET ( chunk, 8 ); + r = GET ( chunk, 7 ); + s = GET ( chunk, 6 ); + t = GET ( chunk, 5 ); + u = GET ( chunk, 4 ); + v = GET ( chunk, 3 ); + w = GET ( chunk, 2 ); + x = GET ( chunk, 1 ); + y = GET ( chunk, 0 ); + + /* The BCD bit values are given by the following boolean equations.*/ + a = ( NOT (s) & v & w ) | ( t & v & w & s ) | ( v & w & NOT (x) ); + b = ( p & s & x & NOT (t) ) | ( p & NOT (w) ) | ( p & NOT (v) ); + c = ( q & s & x & NOT (t) ) | ( q & NOT (w) ) | ( q & NOT (v) ); + d =r; + e = ( v & NOT (w) & x ) | ( s & v & w & x ) | ( NOT (t) & v & x & w ); + f = ( p & t & v & w & x & NOT (s) ) | ( s & NOT (x) & v ) | ( s & NOT (v) ); + g = ( q & t & w & v & x & NOT (s) ) | ( t & NOT (x) & v ) | ( t & NOT (v) ); + h = u; + i = ( t & v & w & x ) | ( s & v & w & x ) | ( v & NOT (w) & NOT (x) ); + j = ( p & NOT (s) & NOT (t) & w & v ) | ( s & v & NOT (w) & x ) + | ( p & w & NOT (x) & v ) | ( w & NOT (v) ); + k = ( q & NOT (s) & NOT (t) & v & w ) | ( t & v & NOT (w) & x ) + | ( q & v & w & NOT (x) ) | ( x & NOT (v) ); + m = y; + + value = PUT (a, 11) | PUT (b, 10) | PUT (c, 9) | PUT (d, 8) | PUT (e, 7) + | PUT (f, 6) | PUT (g, 5) | PUT (h, 4) | PUT (i, 3) | PUT (j, 2) + | PUT (k, 1) | PUT (m, 0); + return value; +} +#undef NOT +#undef GET +#undef PUT + +/* get_declet (). Return a 10-bit declet, beginning at the 'start' + * offset. + * + * | dword1 | dword0 | + * | 0 63|64 127| + */ +#define TEN_BITS 0x03ffULL + +int get_declet (int start, uint64_t dword1, uint64_t dword0) { + unsigned long local_declet; + unsigned int dword0_shift; + unsigned int dword1_shift; + + dword1_shift = 63 - (start + 9); + dword0_shift = 127 - (start + 9); + + if (verbose>5) printf ("\n%s (%d) %016lx %016lx", + __FUNCTION__, start, dword1, dword0); + + if ( (start + 9) < 63) { /* fully within dword1 */ + local_declet = (dword1 >> dword1_shift) & TEN_BITS; + + } else if (start >= 65) {/* fully within dword0 */ + local_declet = (dword0 >> dword0_shift) & TEN_BITS; + + } else { /* straddling the two dwords*/ + unsigned long mask_dword0; + unsigned long mask_dword1; + + mask_dword1 = TEN_BITS >> (64 - dword0_shift); + mask_dword0 = TEN_BITS << (dword0_shift); + local_declet = + ( (dword1 & mask_dword1) << (64-dword0_shift)) + + ( (dword0 & mask_dword0) >> dword0_shift); + } + return local_declet; +} + +int get_bcd_digit_from_dpd (int start, uint64_t dword1, + uint64_t dword0) { + long bcd_digit; + long declet; + + declet = get_declet (start, dword1, dword0); + bcd_digit = dpb_to_bcd (declet); + return bcd_digit; +} + +/* For DFP finite numbers, the combination field (G field) is a + * combination of the exponent and the LMD (Left Most Digit) of the + * significand. The fields are encoded/decoded as described in the + * table here. + * 00 01 10 -< Exponent bits. + * 0: 00000 01000 10000 + * ... + * 7: 00111 01111 10111 + * 8: 11000 11010 11100 + * 9: 11001 11011 11101 (encoded special field). + * | + * ^ LMD value. +*/ +#define DFP_GFIELD_MASK 0x7c00000000000000UL +#define DFP_GFIELD_SHIFT 58 +//The exponent bias value is 101 for DFP Short, 398 +//for DFP Long, and 6176 for DFP Extended. +#define DFP128_EXPONENT_BIAS 6176 +#define DFP64_EXPONENT_BIAS 398 + +unsigned int special_field_LMD (uint64_t dword1) { + unsigned long g_field_specials; + int left_two_bits; + int right_three_bits; + + g_field_specials = (dword1 & DFP_GFIELD_MASK) >> DFP_GFIELD_SHIFT; + left_two_bits = (g_field_specials & 0x18) >> 3; + right_three_bits = g_field_specials & 0x07; + + /* The LMD result maps directly to the right_three_bits value as + * long as the left two bits are 0b00, 0b01, 0b10. So a compare + * against 3 is sufficient to determine if we can return the right + * three bits directly. (LMD values 0..7). + */ + if (left_two_bits < 3) { + return (right_three_bits); + } + + /* LMD values of 8 or 9 require a bit of swizzle, but a check of + * the right-most bit is sufficient to determine whether LMD value + * is 8 or 9. + */ + if (right_three_bits & 0x1) + return 9; + else + return 8; +} + +/* Returns the exponent bits, as decoded from the G field. */ +int special_field_exponent_bits (unsigned long dword1) { + unsigned long g_field_specials; + int left_two_bits; + int right_three_bits; + + g_field_specials = (dword1 & DFP_GFIELD_MASK) >> DFP_GFIELD_SHIFT; + left_two_bits = (g_field_specials & 0x18) >> 3; + right_three_bits = g_field_specials & 0x07; + + /* The special field exponent bits maps directly to the left_two_bits + * value as long as the left two bits are 0b00, 0b01, 0b10. So a compare + * against 3 is sufficient for those values. + */ + if (left_two_bits < 3) { + return (left_two_bits); + } + + switch (right_three_bits) { + case 0: + case 1: return 0x0; + case 2: + case 3: return 0x1; + case 4: + case 5: return 0x2; + case 6: /* Infinity */ return 0x0; + case 7: /* NaN */ return 0x0; + } + return -1; /* should never hit this */ +} + +/* The 'exponent left' shift is for moving the leftmost two bits + * of the exponent down to where they can be easily merged with the + * rest of the exponent. + */ +#define DFP128_EXPONENT_RIGHT_MASK 0x03ffc00000000000 +#define DFP64_EXPONENT_RIGHT_MASK 0x03fc000000000000 +#define DFP128_EXPONENT_RIGHT_MASK_SHIFT 46 +#define DFP64_EXPONENT_RIGHT_MASK_SHIFT 50 +#define DFP128_EXPONENT_LEFT_SHIFT 12 +#define DFP64_EXPONENT_LEFT_SHIFT 8 + +#define DFP_NAN 0x1f +#define DFP_INF 0x1e +#define DFP_SIGNALING_NAN_BIT 0x0200000000000000 + +/* return the dfp exponent from the leading dword. */ +signed long dfp128_exponent (unsigned long dword1) { + unsigned long exponent_left; + unsigned long exponent_right; + unsigned long biased_exponent; + signed long exponent; + + exponent_left = special_field_exponent_bits (dword1); + exponent_right = (dword1 & DFP128_EXPONENT_RIGHT_MASK); + biased_exponent = (exponent_left << DFP128_EXPONENT_LEFT_SHIFT) + + (exponent_right >> DFP128_EXPONENT_RIGHT_MASK_SHIFT); + + /* Unbias the exponent. */ + exponent = biased_exponent - DFP128_EXPONENT_BIAS; + return exponent; +} + +/* Interpret the paired 64-bit values as a extended (quad) 128 bit DFP. + * + * | Significand | Combination Field/ | | + * | sign bit | Encoded Exponent | remainder of significand | + * |0 |1 17|18 127| + * ^ (bit0) Significand sign bit. + * ^ (bit 1:17) Combination field. Contains high bits of + * exponent (encoded), LMD of significand (encoded), + * and the remainder of the exponent. First five bits + * will indicate special cases NAN or INF. + * ^ (bit 18:127) Remainder of the + * significand. + */ + +#define DFP128_COMBINATION_MASK 0x7fffc +#define DFP64_COMBINATION_MASK 0x7ffc +#define DFP128_COMBINATION_SHIFT 46 +#define DFP64_COMBINATION_SHIFT 50 +#define DFP_SPECIAL_SYMBOLS_MASK 0x1f +#define DFP_SPECIAL_SYMBOLS_SHIFT 58 + +#define DFP_NAN 0x1f +#define DFP_INF 0x1e +#define DFP_SIGNALING_NAN_BIT 0x0200000000000000 + +#define DFP128_T_START 18 + +void dissect_dfp128_float (uint64_t dword1, uint64_t dword0) { + long signbit; + signed long exponent; + unsigned long gfield_special_symbols; + unsigned long lmd_digit; + unsigned long bcd_digits[13]; + int i; + int silent = 0; // suppress leading zeros from the output. + + if (debug_show_raw_values) + printf ("DFP128R:%016lx, %016lx", dword1, dword0); + + signbit = (dword1 >> 63); + + if (signbit) printf (" -"); + else printf (" "); + + gfield_special_symbols = + ((dword1 >> DFP_SPECIAL_SYMBOLS_SHIFT) & DFP_SPECIAL_SYMBOLS_MASK); + + switch (gfield_special_symbols) { + case DFP_INF: + printf ( "inf "); + break; + + case DFP_NAN: + if (dword1 & DFP_SIGNALING_NAN_BIT) + printf ("SNaN "); + else + printf ("QNaN "); + break; + + default: + // printf ( "Finite "); + exponent = dfp128_exponent (dword1); + // printf ("Exponent: %d Bias: %d ", exponent, DFP128_EXPONENT_BIAS ); + + lmd_digit = special_field_LMD (dword1); + for (i = 0; i < 11; i++) { + bcd_digits[i] = get_bcd_digit_from_dpd ( (DFP128_T_START + + 10 * i), dword1, dword0); + } + if (lmd_digit) { + silent++; + printf ("%01lx", lmd_digit); + } else { + printf (" "); + } + for (i = 0; i < 11; i++) { + if (bcd_digits[i] || silent ) { + silent++; + printf ("%01lx", bcd_digits[i]); + } else { + /* always print at least the last zero */ + if (i == 10) + printf ("0"); + else + printf (" "); + } + } + printf (" * 10^"); + printf ("%ld", exponent); + } +} + +void print_vsr (int vsr_to_print) { +unsigned long long blob1 = 0, blob2 = 0; + switch (vsr_to_print) { + case 26: + __asm__ __volatile__ ("mfvsrd %0, 26":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 26":"=r" (blob2)); + break; + case 27: + __asm__ __volatile__ ("mfvsrd %0, 27":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 27":"=r" (blob2)); + break; + case 28: + __asm__ __volatile__ ("mfvsrd %0, 28":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 28":"=r" (blob2)); + break; + case 29: + __asm__ __volatile__ ("mfvsrd %0, 29":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 29":"=r" (blob2)); + break; + default: + printf ("Add entry for VSR %d to %s in %s.\n", vsr_to_print, __FUNCTION__, __FILE__); + } +if (debug_show_labels) + printf (" VSR (%d):", vsr_to_print); +printf (" %llx, %llx ", blob1, blob2); +} + +void print_frt () { + unsigned long long value1, value3; + if (has_frt || debug_show_all_regs ) { + if (debug_show_labels) printf (" frt%s:", has_frtp?"p":"" ); + /* If the result is a dfp128 value, the dfp128 value is + contained in the frt, frtp values which are split across + a pair of VSRs. */ + if (uses_dfp128_output) { + if (verbose) print_vsr (28); + if (verbose) print_vsr (29); + value1 = get_vsrhd_vs28 (); + value3 = get_vsrhd_vs29 (); + dissect_dfp128_float (value1, value3); + } else { + if (debug_show_raw_values) generic_print_float_as_hex (frt); + printf (" %e", frt); + if (has_frtp) { + if (debug_show_raw_values) generic_print_float_as_hex (frtp); + printf (" %e", frtp); + } + } + } +} + +/* implementation detail.. FRS and FRB use the same set of regs. */ +void print_frs_or_frb () { + unsigned long long vsrvalue1, vsrvalue3; + if (debug_show_labels) { + if (has_frs) printf (" frs%s:", has_frsp?"p":"" ); + if (has_frb) printf (" frb%s:", has_frbp?"p":"" ); + } + if (uses_dfp128_input) { + if (verbose) print_vsr (26); + if (verbose) print_vsr (27); + vsrvalue1 = get_vsrhd_vs26 (); vsrvalue3 = get_vsrhd_vs27 (); + dissect_dfp128_float (vsrvalue1, vsrvalu... [truncated message content] |