|
From: <sv...@va...> - 2014-11-23 17:51:45
|
Author: sewardj
Date: Sun Nov 23 17:51:34 2014
New Revision: 14764
Log:
Merge, from trunk, r14684
14684 arm64 Add tests for all SIMD FP instructions [..]
Modified:
branches/VALGRIND_3_10_BRANCH/ (props changed)
branches/VALGRIND_3_10_BRANCH/none/tests/arm64/fp_and_simd.c
branches/VALGRIND_3_10_BRANCH/none/tests/arm64/fp_and_simd.stdout.exp
Modified: branches/VALGRIND_3_10_BRANCH/none/tests/arm64/fp_and_simd.c
==============================================================================
--- branches/VALGRIND_3_10_BRANCH/none/tests/arm64/fp_and_simd.c (original)
+++ branches/VALGRIND_3_10_BRANCH/none/tests/arm64/fp_and_simd.c Sun Nov 23 17:51:34 2014
@@ -11,6 +11,9 @@
typedef signed int Int;
typedef unsigned char UChar;
typedef unsigned long long int ULong;
+typedef signed long long int Long;
+typedef double Double;
+typedef float Float;
typedef unsigned char Bool;
#define False ((Bool)0)
@@ -28,8 +31,8 @@
UShort u16[8];
UInt u32[4];
ULong u64[2];
- float f32[4];
- double f64[2];
+ Float f32[4];
+ Double f64[2];
};
typedef union _V128 V128;
@@ -78,6 +81,17 @@
printf("%02x", (Int)v->u8[i]);
}
+static void showBlock ( const char* msg, V128* block, Int nBlock )
+{
+ Int i;
+ printf("%s\n", msg);
+ for (i = 0; i < nBlock; i++) {
+ printf(" ");
+ showV128(&block[i]);
+ printf("\n");
+ }
+}
+
__attribute__((unused))
static void* memalign16(size_t szB)
{
@@ -88,11 +102,146 @@
return x;
}
+static ULong dup4x16 ( UInt x )
+{
+ ULong r = x & 0xF;
+ r |= (r << 4);
+ r |= (r << 8);
+ r |= (r << 16);
+ r |= (r << 32);
+ return r;
+}
+
+// Generate a random double-precision number. About 1 time in 2,
+// instead return a special value (+/- Inf, +/-Nan, denorm).
+// This ensures that many of the groups of 4 calls here will
+// return a special value.
+
+static Double special_values[10];
+static Bool special_values_initted = False;
+
+static __attribute__((noinline))
+Double negate ( Double d ) { return -d; }
+static __attribute__((noinline))
+Double divf64 ( Double x, Double y ) { return x/y; }
+
+static __attribute__((noinline))
+Double plusZero ( void ) { return 0.0; }
+static __attribute__((noinline))
+Double minusZero ( void ) { return negate(plusZero()); }
+
+static __attribute__((noinline))
+Double plusOne ( void ) { return 1.0; }
+static __attribute__((noinline))
+Double minusOne ( void ) { return negate(plusOne()); }
+
+static __attribute__((noinline))
+Double plusInf ( void ) { return 1.0 / 0.0; }
+static __attribute__((noinline))
+Double minusInf ( void ) { return negate(plusInf()); }
+
+static __attribute__((noinline))
+Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); }
+static __attribute__((noinline))
+Double minusNaN ( void ) { return negate(plusNaN()); }
+
+static __attribute__((noinline))
+Double plusDenorm ( void ) { return 1.23e-315 / 1e3; }
+static __attribute__((noinline))
+Double minusDenorm ( void ) { return negate(plusDenorm()); }
+
+
+static void ensure_special_values_initted ( void )
+{
+ if (special_values_initted) return;
+ special_values[0] = plusZero();
+ special_values[1] = minusZero();
+ special_values[2] = plusOne();
+ special_values[3] = minusOne();
+ special_values[4] = plusInf();
+ special_values[5] = minusInf();
+ special_values[6] = plusNaN();
+ special_values[7] = minusNaN();
+ special_values[8] = plusDenorm();
+ special_values[9] = minusDenorm();
+ special_values_initted = True;
+ int i;
+ printf("\n");
+ for (i = 0; i < 10; i++) {
+ printf("special value %d = %e\n", i, special_values[i]);
+ }
+ printf("\n");
+}
+
+static Double randDouble ( void )
+{
+ ensure_special_values_initted();
+ UChar c = randUChar();
+ if (c >= 128) {
+ // return a normal number most of the time.
+ // 0 .. 2^63-1
+ ULong u64 = randULong(TyDF);
+ // -2^62 .. 2^62-1
+ Long s64 = (Long)u64;
+ // -2^55 .. 2^55-1
+ s64 >>= (62-55);
+ // and now as a float
+ return (Double)s64;
+ }
+ c = randUChar() % 10;
+ return special_values[c];
+}
+
+static Float randFloat ( void )
+{
+ ensure_special_values_initted();
+ UChar c = randUChar();
+ if (c >= 128) {
+ // return a normal number most of the time.
+ // 0 .. 2^63-1
+ ULong u64 = randULong(TyDF);
+ // -2^62 .. 2^62-1
+ Long s64 = (Long)u64;
+ // -2^25 .. 2^25-1
+ s64 >>= (62-25);
+ // and now as a float
+ return (Float)s64;
+ }
+ c = randUChar() % 10;
+ return special_values[c];
+}
+
+void randBlock_Doubles ( V128* block, Int nBlock )
+{
+ Int i;
+ for (i = 0; i < nBlock; i++) {
+ block[i].f64[0] = randDouble();
+ block[i].f64[1] = randDouble();
+ }
+}
+
+void randBlock_Floats ( V128* block, Int nBlock )
+{
+ Int i;
+ for (i = 0; i < nBlock; i++) {
+ block[i].f32[0] = randFloat();
+ block[i].f32[1] = randFloat();
+ block[i].f32[2] = randFloat();
+ block[i].f32[3] = randFloat();
+ }
+}
+
/* ---------------------------------------------------------------- */
-/* -- Test macros -- */
+/* -- Parameterisable test macros -- */
/* ---------------------------------------------------------------- */
+#define DO50(_action) \
+ do { \
+ Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
+ } while (0)
+
+
/* Note this also sets the destination register to a known value (0x55..55)
since it can sometimes be an input to the instruction too. */
#define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
@@ -292,8 +441,58 @@
}
+/* Generate a test that involves four vector regs,
+ with no bias as towards which is input or output. It's also OK
+ to use v16, v17, v18 as scratch. */
+#define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
+ VECREG3NO,VECREG4NO) \
+ __attribute__((noinline)) \
+ static void test_##TESTNAME ( LaneTy ty ) { \
+ Int i; \
+ for (i = 0; i < ITERS; i++) { \
+ V128 block[8+1]; \
+ memset(block, 0x55, sizeof(block)); \
+ randV128(&block[0], ty); \
+ randV128(&block[1], ty); \
+ randV128(&block[2], ty); \
+ randV128(&block[3], ty); \
+ randV128(&block[4], ty); \
+ randV128(&block[5], ty); \
+ randV128(&block[6], ty); \
+ randV128(&block[7], ty); \
+ __asm__ __volatile__( \
+ "mov x30, #0 ; msr fpsr, x30 ; " \
+ "ldr q"#VECREG1NO", [%0, #0] ; " \
+ "ldr q"#VECREG2NO", [%0, #16] ; " \
+ "ldr q"#VECREG3NO", [%0, #32] ; " \
+ "ldr q"#VECREG4NO", [%0, #48] ; " \
+ INSN " ; " \
+ "str q"#VECREG1NO", [%0, #64] ; " \
+ "str q"#VECREG2NO", [%0, #80] ; " \
+ "str q"#VECREG3NO", [%0, #96] ; " \
+ "str q"#VECREG4NO", [%0, #112] ; " \
+ "mrs x30, fpsr ; str x30, [%0, #128] " \
+ : : "r"(&block[0]) \
+ : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
+ "v"#VECREG3NO, "v"#VECREG4NO, \
+ "v16", "v17", "v18", "x30" \
+ ); \
+ printf(INSN " "); \
+ UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
+ showV128(&block[0]); printf(" "); \
+ showV128(&block[1]); printf(" "); \
+ showV128(&block[2]); printf(" "); \
+ showV128(&block[3]); printf(" "); \
+ showV128(&block[4]); printf(" "); \
+ showV128(&block[5]); printf(" "); \
+ showV128(&block[6]); printf(" "); \
+ showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
+ } \
+ }
+
+
/* ---------------------------------------------------------------- */
-/* -- Test functions -- */
+/* -- Test functions and non-parameterisable test macros -- */
/* ---------------------------------------------------------------- */
void test_UMINV ( void )
@@ -891,6 +1090,991 @@
}
+//======== FCCMP_D ========//
+
+#define GEN_test_FCCMP_D_D_0xF_EQ \
+ __attribute__((noinline)) static void test_FCCMP_D_D_0xF_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_D_D_0xF_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp d29, d11, #0xf, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_D_D_0xF_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMP_D_D_0xF_NE \
+ __attribute__((noinline)) static void test_FCCMP_D_D_0xF_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_D_D_0xF_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp d29, d11, #0xf, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_D_D_0xF_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMP_D_D_0x0_EQ \
+ __attribute__((noinline)) static void test_FCCMP_D_D_0x0_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_D_D_0x0_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp d29, d11, #0x0, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_D_D_0x0_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMP_D_D_0x0_NE \
+ __attribute__((noinline)) static void test_FCCMP_D_D_0x0_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_D_D_0x0_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp d29, d11, #0x0, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_D_D_0x0_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCCMP_S ========//
+
+#define GEN_test_FCCMP_S_S_0xF_EQ \
+ __attribute__((noinline)) static void test_FCCMP_S_S_0xF_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp s29, s11, #0xf, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_S_S_0xF_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMP_S_S_0xF_NE \
+ __attribute__((noinline)) static void test_FCCMP_S_S_0xF_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0xF_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp s29, s11, #0xf, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_S_S_0xF_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMP_S_S_0x0_EQ \
+ __attribute__((noinline)) static void test_FCCMP_S_S_0x0_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp s29, s11, #0x0, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_S_S_0x0_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMP_S_S_0x0_NE \
+ __attribute__((noinline)) static void test_FCCMP_S_S_0x0_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmp s29, s11, #0x0, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMP_S_S_0x0_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCCMPE_D ========//
+
+#define GEN_test_FCCMPE_D_D_0xF_EQ \
+ __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMPE_D_D_0xF_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe d29, d11, #0xf, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_D_D_0xF_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMPE_D_D_0xF_NE \
+ __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMPE_D_D_0xF_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe d29, d11, #0xf, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_D_D_0xF_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMPE_D_D_0x0_EQ \
+ __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMPE_D_D_0x0_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe d29, d11, #0x0, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_D_D_0x0_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMPE_D_D_0x0_NE \
+ __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMPE_D_D_0x0_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe d29, d11, #0x0, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_D_D_0x0_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCCMPE_S ========//
+
+#define GEN_test_FCCMPE_S_S_0xF_EQ \
+ __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe s29, s11, #0xf, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_S_S_0xF_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMPE_S_S_0xF_NE \
+ __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMPE_S_S_0xF_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe s29, s11, #0xf, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_S_S_0xF_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMPE_S_S_0x0_EQ \
+ __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe s29, s11, #0x0, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_S_S_0x0_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+#define GEN_test_FCCMPE_S_S_0x0_NE \
+ __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fccmpe s29, s11, #0x0, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCCMPE_S_S_0x0_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMEQ_D_D ========//
+
+#define GEN_test_FCMEQ_D_D \
+ __attribute__((noinline)) static void test_FCMEQ_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMEQ_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmeq d29, d11, d9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMEQ_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMEQ_S_S ========//
+
+#define GEN_test_FCMEQ_S_S \
+ __attribute__((noinline)) static void test_FCMEQ_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMEQ_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmeq s29, s11, s9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMEQ_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGE_D_D ========//
+
+#define GEN_test_FCMGE_D_D \
+ __attribute__((noinline)) static void test_FCMGE_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGE_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmge d29, d11, d9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGE_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGE_S_S ========//
+
+#define GEN_test_FCMGE_S_S \
+ __attribute__((noinline)) static void test_FCMGE_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGE_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmge s29, s11, s9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGE_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGT_D_D ========//
+
+#define GEN_test_FCMGT_D_D \
+ __attribute__((noinline)) static void test_FCMGT_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGT_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmgt d29, d11, d9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGT_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGT_S_S ========//
+
+#define GEN_test_FCMGT_S_S \
+ __attribute__((noinline)) static void test_FCMGT_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGT_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmgt s29, s11, s9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGT_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FACGT_D_D ========//
+
+#define GEN_test_FACGT_D_D \
+ __attribute__((noinline)) static void test_FACGT_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FACGT_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "facgt d29, d11, d9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FACGT_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FACGT_S_S ========//
+
+#define GEN_test_FACGT_S_S \
+ __attribute__((noinline)) static void test_FACGT_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FACGT_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "facgt s29, s11, s9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FACGT_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FACGE_D_D ========//
+
+#define GEN_test_FACGE_D_D \
+ __attribute__((noinline)) static void test_FACGE_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FACGE_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "facge d29, d11, d9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FACGE_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FACGE_S_S ========//
+
+#define GEN_test_FACGE_S_S \
+ __attribute__((noinline)) static void test_FACGE_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FACGE_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "facge s29, s11, s9; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FACGE_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMEQ_Z_D ========//
+
+#define GEN_test_FCMEQ_Z_D \
+ __attribute__((noinline)) static void test_FCMEQ_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMEQ_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmeq d29, d11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMEQ_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMEQ_Z_S ========//
+
+#define GEN_test_FCMEQ_Z_S \
+ __attribute__((noinline)) static void test_FCMEQ_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMEQ_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmeq s29, s11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMEQ_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGE_Z_D ========//
+
+#define GEN_test_FCMGE_Z_D \
+ __attribute__((noinline)) static void test_FCMGE_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGE_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmge d29, d11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGE_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGE_Z_S ========//
+
+#define GEN_test_FCMGE_Z_S \
+ __attribute__((noinline)) static void test_FCMGE_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGE_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmge s29, s11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGE_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGT_Z_D ========//
+
+#define GEN_test_FCMGT_Z_D \
+ __attribute__((noinline)) static void test_FCMGT_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGT_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmgt d29, d11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGT_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMGT_Z_S ========//
+
+#define GEN_test_FCMGT_Z_S \
+ __attribute__((noinline)) static void test_FCMGT_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMGT_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmgt s29, s11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMGT_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMLE_Z_D ========//
+
+#define GEN_test_FCMLE_Z_D \
+ __attribute__((noinline)) static void test_FCMLE_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMLE_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmle d29, d11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMLE_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMLE_Z_S ========//
+
+#define GEN_test_FCMLE_Z_S \
+ __attribute__((noinline)) static void test_FCMLE_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMLE_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmle s29, s11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMLE_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMLT_Z_D ========//
+
+#define GEN_test_FCMLT_Z_D \
+ __attribute__((noinline)) static void test_FCMLT_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMLT_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmlt d29, d11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMLT_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMLT_Z_S ========//
+
+#define GEN_test_FCMLT_Z_S \
+ __attribute__((noinline)) static void test_FCMLT_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMLT_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmlt s29, s11, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMLT_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMP_D_D ========//
+
+#define GEN_test_FCMP_D_D \
+ __attribute__((noinline)) static void test_FCMP_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMP_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmp d29, d11; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMP_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMP_S_S ========//
+
+#define GEN_test_FCMP_S_S \
+ __attribute__((noinline)) static void test_FCMP_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMP_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmp s29, s11; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMP_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMPE_D_D ========//
+
+#define GEN_test_FCMPE_D_D \
+ __attribute__((noinline)) static void test_FCMPE_D_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMPE_D_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmpe d29, d11; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMPE_D_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMPE_S_S ========//
+
+#define GEN_test_FCMPE_S_S \
+ __attribute__((noinline)) static void test_FCMPE_S_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMPE_S_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmpe s29, s11; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMPE_S_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMP_Z_D ========//
+
+#define GEN_test_FCMP_Z_D \
+ __attribute__((noinline)) static void test_FCMP_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMP_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmp d29, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMP_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMP_Z_S ========//
+
+#define GEN_test_FCMP_Z_S \
+ __attribute__((noinline)) static void test_FCMP_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMP_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmp s29, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMP_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMPE_Z_D ========//
+
+#define GEN_test_FCMPE_Z_D \
+ __attribute__((noinline)) static void test_FCMPE_Z_D ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMPE_Z_D before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmpe d29, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMPE_Z_D after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCMPE_Z_S ========//
+
+#define GEN_test_FCMPE_Z_S \
+ __attribute__((noinline)) static void test_FCMPE_Z_S ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Floats(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCMPE_Z_S before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcmpe s29, #0; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCMPE_Z_S after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCSEL_D_D_D_EQ ========//
+
+#define GEN_test_FCSEL_D_D_D_EQ \
+ __attribute__((noinline)) static void test_FCSEL_D_D_D_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCSEL_D_D_D_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcsel d29, d11, d9, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCSEL_D_D_D_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCSEL_D_D_D_NE ========//
+
+#define GEN_test_FCSEL_D_D_D_NE \
+ __attribute__((noinline)) static void test_FCSEL_D_D_D_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCSEL_D_D_D_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcsel d29, d11, d9, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCSEL_D_D_D_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCSEL_S_S_S_EQ ========//
+
+#define GEN_test_FCSEL_S_S_S_EQ \
+ __attribute__((noinline)) static void test_FCSEL_S_S_S_EQ ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCSEL_S_S_S_EQ before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcsel s29, s11, s9, eq; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCSEL_S_S_S_EQ after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+//======== FCSEL_S_S_S_NE ========//
+
+#define GEN_test_FCSEL_S_S_S_NE \
+ __attribute__((noinline)) static void test_FCSEL_S_S_S_NE ( void ) \
+ { \
+ V128 block[4]; \
+ randBlock_Doubles(&block[0], 3); \
+ block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
+ showBlock("FCSEL_S_S_S_NE before", &block[0], 4); \
+ __asm__ __volatile__( \
+ "ldr x9, [%0, 48]; msr nzcv, x9; " \
+ "ldr q29, [%0, #0]; ldr q11, [%0, #16]; ldr q9, [%0, #32]; " \
+ "fcsel s29, s11, s9, ne; " \
+ "mrs x9, nzcv; str x9, [%0, 48]; " \
+ "str q29, [%0, #0]; str q11, [%0, #16]; str q9, [%0, #32]; " \
+ ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
+ ); \
+ showBlock("FCSEL_S_S_S_NE after", &block[0], 4); \
+ printf("\n"); \
+ }
+
+
/* ---------------------------------------------------------------- */
/* -- Tests, in the same order that they appear in main() -- */
/* ---------------------------------------------------------------- */
@@ -939,49 +2123,90 @@
GEN_THREEVEC_TEST(faddp_4s_4s_4s, "faddp v2.4s, v23.4s, v11.4s", 2, 23, 11)
GEN_THREEVEC_TEST(faddp_2s_2s_2s, "faddp v2.2s, v23.2s, v11.2s", 2, 23, 11)
-// fccmp d,s
-// fccmpe d,s
-
-// fcmeq d,s
-// fcmge d,s
-// fcmgt d,s
-// facgt d,s (floating abs compare GE)
-// facge d,s (floating abs compare GE)
-
-GEN_BINARY_TEST(fcmeq, 2d, 2d, 2d)
-GEN_BINARY_TEST(fcmeq, 4s, 4s, 4s)
-GEN_BINARY_TEST(fcmeq, 2s, 2s, 2s)
-GEN_BINARY_TEST(fcmge, 2d, 2d, 2d)
-GEN_BINARY_TEST(fcmge, 4s, 4s, 4s)
-GEN_BINARY_TEST(fcmge, 2s, 2s, 2s)
-GEN_BINARY_TEST(fcmgt, 2d, 2d, 2d)
-GEN_BINARY_TEST(fcmgt, 4s, 4s, 4s)
-GEN_BINARY_TEST(fcmgt, 2s, 2s, 2s)
-GEN_BINARY_TEST(facge, 2d, 2d, 2d)
-GEN_BINARY_TEST(facge, 4s, 4s, 4s)
-GEN_BINARY_TEST(facge, 2s, 2s, 2s)
-GEN_BINARY_TEST(facgt, 2d, 2d, 2d)
-GEN_BINARY_TEST(facgt, 4s, 4s, 4s)
-GEN_BINARY_TEST(facgt, 2s, 2s, 2s)
-
-// fcmeq_z d,s
-// fcmge_z d,s
-// fcmgt_z d,s
-// fcmle_z d,s
-// fcmlt_z d,s
-
-// fcmeq_z 2d,4s,2s
-// fcmge_z 2d,4s,2s
-// fcmgt_z 2d,4s,2s
-// fcmle_z 2d,4s,2s
-// fcmlt_z 2d,4s,2s
-
-// fcmp_z d,s
-// fcmpe_z d,s
-// fcmp d,s (floating point quiet, set flags)
-// fcmpe d,s (floating point signaling, set flags)
-
-// fcsel d,s (fp cond select)
+GEN_test_FCCMP_D_D_0xF_EQ
+GEN_test_FCCMP_D_D_0xF_NE
+GEN_test_FCCMP_D_D_0x0_EQ
+GEN_test_FCCMP_D_D_0x0_NE
+GEN_test_FCCMP_S_S_0xF_EQ
+GEN_test_FCCMP_S_S_0xF_NE
+GEN_test_FCCMP_S_S_0x0_EQ
+GEN_test_FCCMP_S_S_0x0_NE
+GEN_test_FCCMPE_D_D_0xF_EQ
+GEN_test_FCCMPE_D_D_0xF_NE
+GEN_test_FCCMPE_D_D_0x0_EQ
+GEN_test_FCCMPE_D_D_0x0_NE
+GEN_test_FCCMPE_S_S_0xF_EQ
+GEN_test_FCCMPE_S_S_0xF_NE
+GEN_test_FCCMPE_S_S_0x0_EQ
+GEN_test_FCCMPE_S_S_0x0_NE
+
+GEN_test_FCMEQ_D_D
+GEN_test_FCMEQ_S_S
+GEN_test_FCMGE_D_D
+GEN_test_FCMGE_S_S
+GEN_test_FCMGT_D_D
+GEN_test_FCMGT_S_S
+GEN_test_FACGT_D_D
+GEN_test_FACGT_S_S
+GEN_test_FACGE_D_D
+GEN_test_FACGE_S_S
+
+GEN_THREEVEC_TEST(fcmeq_2d_2d_2d, "fcmeq v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmeq_4s_4s_4s, "fcmeq v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmeq_2s_2s_2s, "fcmeq v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmge_2d_2d_2d, "fcmge v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmge_4s_4s_4s, "fcmge v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmge_2s_2s_2s, "fcmge v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmgt_2d_2d_2d, "fcmgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmgt_4s_4s_4s, "fcmgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fcmgt_2s_2s_2s, "fcmgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(facge_2d_2d_2d, "facge v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(facge_4s_4s_4s, "facge v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(facge_2s_2s_2s, "facge v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(facgt_2d_2d_2d, "facgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(facgt_4s_4s_4s, "facgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(facgt_2s_2s_2s, "facgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
+
+GEN_test_FCMEQ_Z_D
+GEN_test_FCMEQ_Z_S
+GEN_test_FCMGE_Z_D
+GEN_test_FCMGE_Z_S
+GEN_test_FCMGT_Z_D
+GEN_test_FCMGT_Z_S
+GEN_test_FCMLE_Z_D
+GEN_test_FCMLE_Z_S
+GEN_test_FCMLT_Z_D
+GEN_test_FCMLT_Z_S
+
+GEN_TWOVEC_TEST(fcmeq_z_2d_2d, "fcmeq v2.2d, v23.2d, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmeq_z_4s_4s, "fcmeq v2.4s, v23.4s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmeq_z_2s_2s, "fcmeq v2.2s, v23.2s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmge_z_2d_2d, "fcmge v2.2d, v23.2d, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmge_z_4s_4s, "fcmge v2.4s, v23.4s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmge_z_2s_2s, "fcmge v2.2s, v23.2s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmgt_z_2d_2d, "fcmgt v2.2d, v23.2d, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmgt_z_4s_4s, "fcmgt v2.4s, v23.4s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmgt_z_2s_2s, "fcmgt v2.2s, v23.2s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmle_z_2d_2d, "fcmle v2.2d, v23.2d, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmle_z_4s_4s, "fcmle v2.4s, v23.4s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmle_z_2s_2s, "fcmle v2.2s, v23.2s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmlt_z_2d_2d, "fcmlt v2.2d, v23.2d, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmlt_z_4s_4s, "fcmlt v2.4s, v23.4s, #0", 2, 23)
+GEN_TWOVEC_TEST(fcmlt_z_2s_2s, "fcmlt v2.2s, v23.2s, #0", 2, 23)
+
+GEN_test_FCMP_Z_D
+GEN_test_FCMP_Z_S
+GEN_test_FCMPE_Z_D
+GEN_test_FCMPE_Z_S
+GEN_test_FCMP_D_D
+GEN_test_FCMP_S_S
+GEN_test_FCMPE_D_D
+GEN_test_FCMPE_S_S
+
+GEN_test_FCSEL_D_D_D_EQ
+GEN_test_FCSEL_D_D_D_NE
+GEN_test_FCSEL_S_S_S_EQ
+GEN_test_FCSEL_S_S_S_NE
GEN_THREEVEC_TEST(fdiv_d_d_d, "fdiv d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fdiv_s_s_s, "fdiv s2, s11, s29", 2, 11, 29)
@@ -989,52 +2214,85 @@
GEN_BINARY_TEST(fdiv, 4s, 4s, 4s)
GEN_BINARY_TEST(fdiv, 2s, 2s, 2s)
-// fmadd d,s
-// fnmadd d,s
-// fmsub d,s
-// fnmsub d,s
+GEN_FOURVEC_TEST(fmadd_d_d_d_d, "fmadd d2, d11, d29, d3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fmadd_s_s_s_s, "fmadd s2, s11, s29, s3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fnmadd_d_d_d_d, "fnmadd d2, d11, d29, d3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fnmadd_s_s_s_s, "fnmadd s2, s11, s29, s3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fmsub_d_d_d_d, "fmsub d2, d11, d29, d3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fmsub_s_s_s_s, "fmsub s2, s11, s29, s3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fnmsub_d_d_d_d, "fnmsub d2, d11, d29, d3", 2, 11, 29, 3)
+GEN_FOURVEC_TEST(fnmsub_s_s_s_s, "fnmsub s2, s11, s29, s3", 2, 11, 29, 3)
GEN_THREEVEC_TEST(fnmul_d_d_d, "fnmul d2, d11, d29", 2, 11, 29)
GEN_THREEVEC_TEST(fnmul_s_s_s, "fnmul s2, s11, s29", 2, 11, 29)
-// fmax d,s
-// fmin d,s
-// fmaxnm d,s ("max number")
-// fminnm d,s
-
-// fmax 2d,4s,2s
-// fmin 2d,4s,2s
-// fmaxnm 2d,4s,2s
-// fminnm 2d,4s,2s
-
-// fmaxnmp d_2d,s_2s ("max number pairwise")
-// fminnmp d_2d,s_2s
-
-// fmaxnmp 2d,4s,2s
-// fminnmp 2d,4s,2s
-
-// fmaxnmv s_4s (maxnum across vector)
-// fminnmv s_4s
-
-// fmaxp d_2d,s_2s (max of a pair)
-// fminp d_2d,s_2s (max of a pair)
-
-// fmaxp 2d,4s,2s (max pairwise)
-// fminp 2d,4s,2s
-
-// fmaxv s_4s (max across vector)
-// fminv s_4s
-
-// FIXME these need to be THREEVEC
-GEN_BINARY_TEST(fmla, 2d, 2d, 2d)
-GEN_BINARY_TEST(fmla, 4s, 4s, 4s)
-GEN_BINARY_TEST(fmla, 2s, 2s, 2s)
-GEN_BINARY_TEST(fmls, 2d, 2d, 2d)
-GEN_BINARY_TEST(fmls, 4s, 4s, 4s)
-GEN_BINARY_TEST(fmls, 2s, 2s, 2s)
-
-// fmla d_d_d[],s_s_s[] (by element)
-// fmls d_d_d[],s_s_s[] (by element)
+GEN_THREEVEC_TEST(fmax_d_d_d, "fmax d2, d11, d29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmax_s_s_s, "fmax s2, s11, s29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmin_d_d_d, "fmin d2, d11, d29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmin_s_s_s, "fmin s2, s11, s29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmaxnm_d_d_d, "fmaxnm d2, d11, d29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmaxnm_s_s_s, "fmaxnm s2, s11, s29", 2, 11, 29)
+GEN_THREEVEC_TEST(fminnm_d_d_d, "fminnm d2, d11, d29", 2, 11, 29)
+GEN_THREEVEC_TEST(fminnm_s_s_s, "fminnm s2, s11, s29", 2, 11, 29)
+
+GEN_THREEVEC_TEST(fmax_2d_2d_2d, "fmax v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmax_4s_4s_4s, "fmax v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmax_2s_2s_2s, "fmax v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmin_2d_2d_2d, "fmin v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmin_4s_4s_4s, "fmin v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmin_2s_2s_2s, "fmin v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxnm_2d_2d_2d, "fmaxnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxnm_4s_4s_4s, "fmaxnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxnm_2s_2s_2s, "fmaxnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fminnm_2d_2d_2d, "fminnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fminnm_4s_4s_4s, "fminnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fminnm_2s_2s_2s, "fminnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
+
+GEN_TWOVEC_TEST(fmaxnmp_d_2d, "fmaxnmp d2, v23.2d", 2, 23)
+GEN_TWOVEC_TEST(fmaxnmp_s_2s, "fmaxnmp s2, v23.2s", 2, 23)
+GEN_TWOVEC_TEST(fminnmp_d_2d, "fminnmp d2, v23.2d", 2, 23)
+GEN_TWOVEC_TEST(fminnmp_s_2s, "fminnmp s2, v23.2s", 2, 23)
+
+GEN_THREEVEC_TEST(fmaxnmp_2d_2d_2d, "fmaxnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxnmp_4s_4s_4s, "fmaxnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxnmp_2s_2s_2s, "fmaxnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fminnmp_2d_2d_2d, "fminnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fminnmp_4s_4s_4s, "fminnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fminnmp_2s_2s_2s, "fminnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
+
+GEN_TWOVEC_TEST(fmaxnmv_s_4s, "fmaxnmv s2, v23.4s", 2, 23)
+GEN_TWOVEC_TEST(fminnmv_s_4s, "fminnmv s2, v23.4s", 2, 23)
+
+GEN_TWOVEC_TEST(fmaxp_d_2d, "fmaxp d2, v23.2d", 2, 23)
+GEN_TWOVEC_TEST(fmaxp_s_2s, "fmaxp s2, v23.2s", 2, 23)
+GEN_TWOVEC_TEST(fminp_d_2d, "fminp d2, v23.2d", 2, 23)
+GEN_TWOVEC_TEST(fminp_s_2s, "fminp s2, v23.2s", 2, 23)
+
+GEN_THREEVEC_TEST(fmaxp_2d_2d_2d, "fmaxp v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxp_4s_4s_4s, "fmaxp v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmaxp_2s_2s_2s, "fmaxp v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fminp_2d_2d_2d, "fminp v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fminp_4s_4s_4s, "fminp v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fminp_2s_2s_2s, "fminp v2.2s, v23.2s, v11.2s", 2, 23, 11)
+
+GEN_TWOVEC_TEST(fmaxv_s_4s, "fmaxv s2, v23.4s", 2, 23)
+GEN_TWOVEC_TEST(fminv_s_4s, "fminv s2, v23.4s", 2, 23)
+
+GEN_THREEVEC_TEST(fmla_2d_2d_2d, "fmla v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmla_4s_4s_4s, "fmla v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmla_2s_2s_2s, "fmla v2.2s, v23.2s, v11.2s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmls_2d_2d_2d, "fmls v2.2d, v23.2d, v11.2d", 2, 23, 11)
+GEN_THREEVEC_TEST(fmls_4s_4s_4s, "fmls v2.4s, v23.4s, v11.4s", 2, 23, 11)
+GEN_THREEVEC_TEST(fmls_2s_2s_2s, "fmls v2.2s, v23.2s, v11.2s", 2, 23, 11)
+
+GEN_THREEVEC_TEST(fmla_d_d_d0, "fmla d2, d11, v29.d[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmla_d_d_d1, "fmla d2, d11, v29.d[1]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmla_s_s_s0, "fmla s2, s11, v29.s[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmla_s_s_s3, "fmla s2, s11, v29.s[3]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmls_d_d_d0, "fmls d2, d11, v29.d[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmls_d_d_d1, "fmls d2, d11, v29.d[1]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmls_s_s_s0, "fmls s2, s11, v29.s[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmls_s_s_s3, "fmls s2, s11, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_2d_2d_d0, "fmla v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmla_2d_2d_d1, "fmla v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
@@ -1078,7 +2336,10 @@
GEN_TWOVEC_TEST(fmov_s_imm_02, "fmov s22, #-4.0", 22, 23)
GEN_TWOVEC_TEST(fmov_s_imm_03, "fmov s22, #-1.0", 22, 23)
-// fmul d_d_d[],s_s_s[]
+GEN_THREEVEC_TEST(fmul_d_d_d0, "fmul d2, d11, v29.d[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_d_d_d1, "fmul d2, d11, v29.d[1]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_s_s_s0, "fmul s2, s11, v29.s[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_s_s_s3, "fmul s2, s11, v29.s[3]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2d_2d_d0, "fmul v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2d_2d_d1, "fmul v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
@@ -1087,47 +2348,92 @@
GEN_THREEVEC_TEST(fmul_2s_2s_s0, "fmul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
GEN_THREEVEC_TEST(fmul_2s_2s_s3, "fmul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
-GEN_THREEVEC_TEST(fmul_d_d_d, "fmul d2, d11, d29", 2, 11, 29)
-GEN_THREEVEC_TEST(fmul_s_s_s, "fmul s2, s11, s29", 2, 11, 29)
-GEN_BINARY_TEST(fmul, 2d, 2d, 2d)
-GEN_BINARY_TEST(fmul, 4s, 4s, 4s)
-GEN_BINARY_TEST(fmul, 2s, 2s, 2s)
-
-// fmulx d_d_d[],s_s_s[]
-// fmulx 2d_2d_d[],4s_4s_s[],2s_2s_s[]
-
-// fmulx d,s
-// fmulx 2d,4s,2s
-
-// frecpe d,s (recip estimate)
-// frecpe 2d,4s,2s
-
-// frecps d,s (recip step)
-// frecps 2d,4s,2s
-
-// frecpx d,s (recip exponent)
-
-// frinta d,s
-// frinti d,s
-// frintm d,s
-// frintn d,s
-// frintp d,s
-// frintx d,s
-// frintz d,s
-
-// frinta 2d,4s,2s (round to integral, nearest away)
-// frinti 2d,4s,2s (round to integral, per FPCR)
-// frintm 2d,4s,2s (round to integral, minus inf)
-// frintn 2d,4s,2s (round to integral, nearest, to even)
-// frintp 2d,4s,2s (round to integral, plus inf)
-// frintx 2d,4s,2s (round to integral exact, per FPCR)
-// frintz 2d,4s,2s (round to integral, zero)
-
-// frsqrte d,s (est)
-// frsqrte 2d,4s,2s
-
-// frsqrts d,s (step)
-// frsqrts 2d,4s,2s
+GEN_THREEVEC_TEST(fmul_d_d_d, "fmul d2, d11, d29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_s_s_s, "fmul s2, s11, s29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_2d_2d_2d, "fmul v2.2d, v11.2d, v29.2d", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_4s_4s_4s, "fmul v2.4s, v11.4s, v29.4s", 2, 11, 29)
+GEN_THREEVEC_TEST(fmul_2s_2s_2s, "fmul v2.2s, v11.2s, v29.2s", 2, 11, 29)
+
+GEN_THREEVEC_TEST(fmulx_d_d_d0, "fmulx d2, d11, v29.d[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_d_d_d1, "fmulx d2, d11, v29.d[1]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_s_s_s0, "fmulx s2, s11, v29.s[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_s_s_s3, "fmulx s2, s11, v29.s[3]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_2d_2d_d0, "fmulx v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_2d_2d_d1, "fmulx v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_4s_4s_s0, "fmulx v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_4s_4s_s3, "fmulx v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_2s_2s_s0, "fmulx v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_2s_2s_s3, "fmulx v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
+
+GEN_THREEVEC_TEST(fmulx_d_d_d, "fmulx d2, d11, d29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_s_s_s, "fmulx s2, s11, s29", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_2d_2d_2d, "fmulx v2.2d, v11.2d, v29.2d", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_4s_4s_4s, "fmulx v2.4s, v11.4s, v29.4s", 2, 11, 29)
+GEN_THREEVEC_TEST(fmulx_2s_2s_2s, "fmulx v2.2s, v11.2s, v29.2s", 2, 11, 29)
+
+GEN_TWO...
[truncated message content] |