|
From: <sv...@va...> - 2005-11-18 20:45:32
|
Author: cerion
Date: 2005-11-18 20:45:18 +0000 (Fri, 18 Nov 2005)
New Revision: 5196
Log:
Changed altivec floating point setup to Java/IEEE mode
- Non-Java mode is the system default, but was causing some accuracy pro=
blems by rounding off intermediate denormalised results to zero.
We now have some small errors (lowest bit only) due to using greater a=
ccuracy than the system default, but is better overall.
Also expanded dispatcher check of FPSCR to include all contol bits
Modified:
trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
trunk/none/tests/ppc32/jm-insns.c
Modified: trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-11-18 18:43:56=
UTC (rev 5195)
+++ trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-11-18 20:45:18=
UTC (rev 5196)
@@ -164,12 +164,10 @@
=20
/* 24(sp) used later to stop ctr reg being clobbered */
=20
+ /* 8:20(sp) free */
+=09
/* Linkage Area (reserved)
- 20(sp) : TOC save area
- 16(sp) : link editor word
- 12(sp) : compiler word
- 8(sp) : LR
- 4(sp) : CR
+ 4(sp) : LR
0(sp) : back-chain
*/
=20
@@ -191,6 +189,7 @@
lwz 3,VG_(machine_ppc32_has_FP)@l(3)
cmplwi 3,0
beq LafterFP2
+
fsub 3,3,3 /* generate zero */
mtfsf 0xFF,3
LafterFP2:
@@ -201,10 +200,8 @@
lwz 3,VG_(machine_ppc32_has_VMX)@l(3)
cmplwi 3,0
beq LafterVMX2
- /* generate vector {0x0,0x0,0x0,0x00010000} */
- vspltisw 3,0x1 /* 4x 0x00000001 */
- vspltisw 4,0x0 /* generate zero */
- vsldoi 3,4,3,0x6 /* v3 =3D v3 >> 10 bytes */
+
+ vspltisw 3,0x0 /* generate zero */
mtvscr 3
LafterVMX2:
=20
@@ -298,12 +295,12 @@
cmplwi 10,0
beq LafterFP8
=20
- /* Check FPSCR[RM] =3D=3D 0 */
+ /* Check FPSCR & 0xFF =3D=3D 0 (lowest 8bits are controls) */
mffs 4 /* fpscr -> fpr */
li 5,48
stfiwx 4,5,1 /* fpr to stack */
lwzx 6,5,1 /* load to gpr */
- andi. 6,6,0x3 /* mask wanted bits */
+ andi. 6,6,0xFF /* mask wanted bits */
cmplwi 6,0x0 /* cmp with zero */
bne invariant_violation /* branch if not zero */
LafterFP8:
@@ -318,13 +315,13 @@
/* first generate 4x 0x00010000 */
vspltisw 4,0x1 /* 4x 0x00000001 */
vspltisw 5,0x0 /* zero */
- vsldoi 6,4,5,0x2 /* << 2bytes =3D> 4x 0x0001000=
0 */
+ vsldoi 6,4,5,0x2 /* <<2*8 =3D> 4x 0x00010000 */
/* retrieve VSCR and mask wanted bits */
mfvscr 7
- vand 7,7,6 /* gives SAT flag */
+ vand 7,7,6 /* gives NJ flag */
vspltw 7,7,0x3 /* flags-word to all lanes */
- vcmpequw. 8,6,7 /* CR[24] =3D 1 if equal */
- bt 26,invariant_violation /* branch if bit 26 of CR is t=
rue */
+ vcmpequw. 8,6,7 /* CR[24] =3D 1 if v6 =3D=3D v=
7 */
+ bt 24,invariant_violation /* branch if all_equal */
LafterVMX8:
=20
/* otherwise we're OK */
Modified: trunk/none/tests/ppc32/jm-insns.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/none/tests/ppc32/jm-insns.c 2005-11-18 18:43:56 UTC (rev 5195)
+++ trunk/none/tests/ppc32/jm-insns.c 2005-11-18 20:45:18 UTC (rev 5196)
@@ -5425,7 +5425,8 @@
__asm__ __volatile__ ("vsubsbs 31,%0,%1" : : "vr" (v1), "vr" (v=
2)); // sets VSCR[SAT]
*/
=20
-#define DEFAULT_VSCR 0x00010000
+//#define DEFAULT_VSCR 0x00010000
+#define DEFAULT_VSCR 0x0
=20
static void test_av_int_one_arg (const char* name, test_func_t func,
unused uint32_t test_flags)
|