|
From: <sv...@va...> - 2005-11-18 20:45:32
|
Author: cerion
Date: 2005-11-18 20:45:18 +0000 (Fri, 18 Nov 2005)
New Revision: 5196
Log:
Changed altivec floating point setup to Java/IEEE mode
- Non-Java mode is the system default, but was causing some accuracy pro=
blems by rounding off intermediate denormalised results to zero.
We now have some small errors (lowest bit only) due to using greater a=
ccuracy than the system default, but is better overall.
Also expanded dispatcher check of FPSCR to include all contol bits
Modified:
trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
trunk/none/tests/ppc32/jm-insns.c
Modified: trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-11-18 18:43:56=
UTC (rev 5195)
+++ trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-11-18 20:45:18=
UTC (rev 5196)
@@ -164,12 +164,10 @@
=20
/* 24(sp) used later to stop ctr reg being clobbered */
=20
+ /* 8:20(sp) free */
+=09
/* Linkage Area (reserved)
- 20(sp) : TOC save area
- 16(sp) : link editor word
- 12(sp) : compiler word
- 8(sp) : LR
- 4(sp) : CR
+ 4(sp) : LR
0(sp) : back-chain
*/
=20
@@ -191,6 +189,7 @@
lwz 3,VG_(machine_ppc32_has_FP)@l(3)
cmplwi 3,0
beq LafterFP2
+
fsub 3,3,3 /* generate zero */
mtfsf 0xFF,3
LafterFP2:
@@ -201,10 +200,8 @@
lwz 3,VG_(machine_ppc32_has_VMX)@l(3)
cmplwi 3,0
beq LafterVMX2
- /* generate vector {0x0,0x0,0x0,0x00010000} */
- vspltisw 3,0x1 /* 4x 0x00000001 */
- vspltisw 4,0x0 /* generate zero */
- vsldoi 3,4,3,0x6 /* v3 =3D v3 >> 10 bytes */
+
+ vspltisw 3,0x0 /* generate zero */
mtvscr 3
LafterVMX2:
=20
@@ -298,12 +295,12 @@
cmplwi 10,0
beq LafterFP8
=20
- /* Check FPSCR[RM] =3D=3D 0 */
+ /* Check FPSCR & 0xFF =3D=3D 0 (lowest 8bits are controls) */
mffs 4 /* fpscr -> fpr */
li 5,48
stfiwx 4,5,1 /* fpr to stack */
lwzx 6,5,1 /* load to gpr */
- andi. 6,6,0x3 /* mask wanted bits */
+ andi. 6,6,0xFF /* mask wanted bits */
cmplwi 6,0x0 /* cmp with zero */
bne invariant_violation /* branch if not zero */
LafterFP8:
@@ -318,13 +315,13 @@
/* first generate 4x 0x00010000 */
vspltisw 4,0x1 /* 4x 0x00000001 */
vspltisw 5,0x0 /* zero */
- vsldoi 6,4,5,0x2 /* << 2bytes =3D> 4x 0x0001000=
0 */
+ vsldoi 6,4,5,0x2 /* <<2*8 =3D> 4x 0x00010000 */
/* retrieve VSCR and mask wanted bits */
mfvscr 7
- vand 7,7,6 /* gives SAT flag */
+ vand 7,7,6 /* gives NJ flag */
vspltw 7,7,0x3 /* flags-word to all lanes */
- vcmpequw. 8,6,7 /* CR[24] =3D 1 if equal */
- bt 26,invariant_violation /* branch if bit 26 of CR is t=
rue */
+ vcmpequw. 8,6,7 /* CR[24] =3D 1 if v6 =3D=3D v=
7 */
+ bt 24,invariant_violation /* branch if all_equal */
LafterVMX8:
=20
/* otherwise we're OK */
Modified: trunk/none/tests/ppc32/jm-insns.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/none/tests/ppc32/jm-insns.c 2005-11-18 18:43:56 UTC (rev 5195)
+++ trunk/none/tests/ppc32/jm-insns.c 2005-11-18 20:45:18 UTC (rev 5196)
@@ -5425,7 +5425,8 @@
__asm__ __volatile__ ("vsubsbs 31,%0,%1" : : "vr" (v1), "vr" (v=
2)); // sets VSCR[SAT]
*/
=20
-#define DEFAULT_VSCR 0x00010000
+//#define DEFAULT_VSCR 0x00010000
+#define DEFAULT_VSCR 0x0
=20
static void test_av_int_one_arg (const char* name, test_func_t func,
unused uint32_t test_flags)
|
|
From: Nicholas N. <nj...@cs...> - 2005-11-18 21:02:03
|
On Fri, 18 Nov 2005, sv...@va... wrote: > Log: > Changed altivec floating point setup to Java/IEEE mode > - Non-Java mode is the system default, but was causing some accuracy problems by rounding off intermediate denormalised results to zero. > We now have some small errors (lowest bit only) due to using greater accuracy than the system default, but is better overall. This should be mentioned in the Limitations section of the manual. Nick |