Update of /cvsroot/gc-linux/libgx/src
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14482/src
Modified Files:
gu.c gu_asm.S gx_asm.S ps_cast.S
Log Message:
Modify instructions to use the underscored style.
That seems to be the common consensus out there.
Index: gx_asm.S
===================================================================
RCS file: /cvsroot/gc-linux/libgx/src/gx_asm.S,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -d -r1.1.1.1 -r1.2
--- gx_asm.S 22 Oct 2004 20:13:53 -0000 1.1.1.1
+++ gx_asm.S 11 Apr 2006 18:25:03 -0000 1.2
@@ -3,61 +3,61 @@
#include "asm.h"
_GLOBAL(WriteMtxPS4x3)
- psql f0,0(r3),0,0
- psql f1,8(r3),0,0
- psql f2,16(r3),0,0
- psql f3,24(r3),0,0
- psql f4,32(r3),0,0
- psql f5,40(r3),0,0
- psqst f0,0(r4),0,0
- psqst f1,0(r4),0,0
- psqst f2,0(r4),0,0
- psqst f3,0(r4),0,0
- psqst f4,0(r4),0,0
- psqst f5,0(r4),0,0
+ psq_l f0,0(r3),0,0
+ psq_l f1,8(r3),0,0
+ psq_l f2,16(r3),0,0
+ psq_l f3,24(r3),0,0
+ psq_l f4,32(r3),0,0
+ psq_l f5,40(r3),0,0
+ psq_st f0,0(r4),0,0
+ psq_st f1,0(r4),0,0
+ psq_st f2,0(r4),0,0
+ psq_st f3,0(r4),0,0
+ psq_st f4,0(r4),0,0
+ psq_st f5,0(r4),0,0
blr
# static void WriteMtxPS3x3from4x3(register Mtx mt,register void *wgpipe)
_GLOBAL(WriteMtxPS3x3from4x3)
- psql f0,0(r3),0,0
+ psq_l f0,0(r3),0,0
lfs f1,8(r3)
- psql f2,16(r3),0,0
+ psq_l f2,16(r3),0,0
lfs f3,24(r3)
- psql f4,32(r3),0,0
+ psq_l f4,32(r3),0,0
lfs f5,40(r3)
- psqst f0,0(r4),0,0
+ psq_st f0,0(r4),0,0
stfs f1,0(r4)
- psqst f2,0(r4),0,0
+ psq_st f2,0(r4),0,0
stfs f3,0(r4)
- psqst f4,0(r4),0,0
+ psq_st f4,0(r4),0,0
stfs f5,0(r4)
blr
#static void WriteMtxPS3x3(register Mtx33 mt,register void *wgpipe)
_GLOBAL(WriteMtxPS3x3)
- psql f0,0(r3),0,0
- psql f1,8(r3),0,0
- psql f2,16(r3),0,0
- psql f3,24(r3),0,0
+ psq_l f0,0(r3),0,0
+ psq_l f1,8(r3),0,0
+ psq_l f2,16(r3),0,0
+ psq_l f3,24(r3),0,0
lfs f4,32(r3)
- psqst f0,0(r4),0,0
- psqst f1,0(r4),0,0
- psqst f2,0(r4),0,0
- psqst f3,0(r4),0,0
+ psq_st f0,0(r4),0,0
+ psq_st f1,0(r4),0,0
+ psq_st f2,0(r4),0,0
+ psq_st f3,0(r4),0,0
stfs f4,0(r4)
blr
#static void WriteMtxPS4x2(register Mtx mt,register void *wgpipe)
_GLOBAL(WriteMtxPS4x2)
- psql f0,0(r3),0,0
- psql f1,8(r3),0,0
- psql f2,16(r3),0,0
- psql f3,24(r3),0,0
- psqst f0,0(r4),0,0
- psqst f1,0(r4),0,0
- psqst f2,0(r4),0,0
- psqst f3,0(r4),0,0
+ psq_l f0,0(r3),0,0
+ psq_l f1,8(r3),0,0
+ psq_l f2,16(r3),0,0
+ psq_l f3,24(r3),0,0
+ psq_st f0,0(r4),0,0
+ psq_st f1,0(r4),0,0
+ psq_st f2,0(r4),0,0
+ psq_st f3,0(r4),0,0
blr
_GLOBAL(dcache_flush)
Index: gu_asm.S
===================================================================
RCS file: /cvsroot/gc-linux/libgx/src/gu_asm.S,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- gu_asm.S 2 Dec 2004 19:56:37 -0000 1.2
+++ gu_asm.S 11 Apr 2006 18:25:03 -0000 1.3
@@ -39,53 +39,53 @@
_GLOBAL(ps_guMtxConcat)
//r3 = mtxA, r4 = mtxB, r5 = mtxAB
stwu r1,-64(r1)
- psql A00_A01,0(r3),0,0
+ psq_l A00_A01,0(r3),0,0
stfd f14,8(r1)
- psql B00_B01,0(r4),0,0
+ psq_l B00_B01,0(r4),0,0
lis r6,Unit01@...
- psql B02_B03,8(r4),0,0
+ psq_l B02_B03,8(r4),0,0
stfd f15,16(r1)
addi 6,6,Unit01@...
stfd f31,40(r1)
- psql B10_B11,16(r4),0,0
- psmuls0 D00_D01,B00_B01,A00_A01
- psql A10_A11,16(r3),0,0
- psmuls0 D02_D03,B02_B03,A00_A01
- psql UNIT01,0(r6),0,0
- psmuls0 D10_D11,B00_B01,A10_A11
- psql B12_B13,24(r4),0,0
- psmuls0 D12_D13,B02_B03,A10_A11
- psql A02_A03,8(r3),0,0
- psmadds1 D00_D01,B10_B11,A00_A01,D00_D01
- psql A12_A13,24(r3),0,0
- psmadds1 D10_D11,B10_B11,A10_A11,D10_D11
- psql B20_B21,32(r4),0,0
- psmadds1 D02_D03,B12_B13,A00_A01,D02_D03
- psql B22_B23,40(r4),0,0
- psmadds1 D12_D13,B12_B13,A10_A11,D12_D13
- psql A20_A21,32(r3),0,0
- psql A22_A23,40(r3),0,0
- psmadds0 D00_D01,B20_B21,A02_A03,D00_D01
- psmadds0 D02_D03,B22_B23,A02_A03,D02_D03
- psmadds0 D10_D11,B20_B21,A12_A13,D10_D11
- psmadds0 D12_D13,B22_B23,A12_A13,D12_D13
- psqst D00_D01,0(r5),0,0
- psmuls0 D20_D21,B00_B01,A20_A21
- psmadds1 D02_D03,UNIT01,A02_A03,D02_D03
- psmuls0 D22_D23,B02_B03,A20_A21
- psqst D10_D11,16(r5),0,0
- psmadds1 D12_D13,UNIT01,A12_A13,D12_D13
- psqst D02_D03,8(r5),0,0
- psmadds1 D20_D21,B10_B11,A20_A21,D20_D21
- psmadds1 D22_D23,B12_B13,A20_A21,D22_D23
- psmadds0 D20_D21,B20_B21,A22_A23,D20_D21
+ psq_l B10_B11,16(r4),0,0
+ ps_muls0 D00_D01,B00_B01,A00_A01
+ psq_l A10_A11,16(r3),0,0
+ ps_muls0 D02_D03,B02_B03,A00_A01
+ psq_l UNIT01,0(r6),0,0
+ ps_muls0 D10_D11,B00_B01,A10_A11
+ psq_l B12_B13,24(r4),0,0
+ ps_muls0 D12_D13,B02_B03,A10_A11
+ psq_l A02_A03,8(r3),0,0
+ ps_madds1 D00_D01,B10_B11,A00_A01,D00_D01
+ psq_l A12_A13,24(r3),0,0
+ ps_madds1 D10_D11,B10_B11,A10_A11,D10_D11
+ psq_l B20_B21,32(r4),0,0
+ ps_madds1 D02_D03,B12_B13,A00_A01,D02_D03
+ psq_l B22_B23,40(r4),0,0
+ ps_madds1 D12_D13,B12_B13,A10_A11,D12_D13
+ psq_l A20_A21,32(r3),0,0
+ psq_l A22_A23,40(r3),0,0
+ ps_madds0 D00_D01,B20_B21,A02_A03,D00_D01
+ ps_madds0 D02_D03,B22_B23,A02_A03,D02_D03
+ ps_madds0 D10_D11,B20_B21,A12_A13,D10_D11
+ ps_madds0 D12_D13,B22_B23,A12_A13,D12_D13
+ psq_st D00_D01,0(r5),0,0
+ ps_muls0 D20_D21,B00_B01,A20_A21
+ ps_madds1 D02_D03,UNIT01,A02_A03,D02_D03
+ ps_muls0 D22_D23,B02_B03,A20_A21
+ psq_st D10_D11,16(r5),0,0
+ ps_madds1 D12_D13,UNIT01,A12_A13,D12_D13
+ psq_st D02_D03,8(r5),0,0
+ ps_madds1 D20_D21,B10_B11,A20_A21,D20_D21
+ ps_madds1 D22_D23,B12_B13,A20_A21,D22_D23
+ ps_madds0 D20_D21,B20_B21,A22_A23,D20_D21
lfd f14,8(r1)
- psqst D12_D13,24(r5),0,0
- psmadds0 D22_D23,B22_B23,A22_A23,D22_D23
- psqst D20_D21,32(r5),0,0
- psmadds1 D22_D23,UNIT01,A22_A23,D22_D23
+ psq_st D12_D13,24(r5),0,0
+ ps_madds0 D22_D23,B22_B23,A22_A23,D22_D23
+ psq_st D20_D21,32(r5),0,0
+ ps_madds1 D22_D23,UNIT01,A22_A23,D22_D23
lfd f15,16(r1)
- psqst D22_D23,40(r5),0,0
+ psq_st D22_D23,40(r5),0,0
lfd f31,40(r1)
addi r1,r1,64
blr
@@ -96,30 +96,30 @@
addi r9,r9,Unit01@...
lfs f0,0(r9)
lfs f1,4(r9)
- psqst f0,8(r3),0,0
- psmerge01 f2,f0,f1
- psqst f0,24(r3),0,0
- psmerge10 f3,f1,f0
- psqst f0,32(r3),0,0
- psqst f2,16(r3),0,0
- psqst f3,0(r3),0,0
- psqst f3,40(r3),0,0
+ psq_st f0,8(r3),0,0
+ ps_merge01 f2,f0,f1
+ psq_st f0,24(r3),0,0
+ ps_merge10 f3,f1,f0
+ psq_st f0,32(r3),0,0
+ psq_st f2,16(r3),0,0
+ psq_st f3,0(r3),0,0
+ psq_st f3,40(r3),0,0
blr
_GLOBAL(ps_guMtxCopy)
//r3 = src, r4 = dst
- psql f0,0(r3),0,0
- psqst f0,0(r4),0,0
- psql f1,8(r3),0,0
- psqst f1,8(r4),0,0
- psql f2,16(r3),0,0
- psqst f2,16(r4),0,0
- psql f3,24(r3),0,0
- psqst f3,24(r4),0,0
- psql f4,32(r3),0,0
- psqst f4,32(r4),0,0
- psql f5,40(r3),0,0
- psqst f5,40(r4),0,0
+ psq_l f0,0(r3),0,0
+ psq_st f0,0(r4),0,0
+ psq_l f1,8(r3),0,0
+ psq_st f1,8(r4),0,0
+ psq_l f2,16(r3),0,0
+ psq_st f2,16(r4),0,0
+ psq_l f3,24(r3),0,0
+ psq_st f3,24(r4),0,0
+ psq_l f4,32(r3),0,0
+ psq_st f4,32(r4),0,0
+ psq_l f5,40(r3),0,0
+ psq_st f5,40(r4),0,0
blr
_GLOBAL(ps_guMtxTranspose)
@@ -127,144 +127,144 @@
lis r9,Unit01@...
addi r9,r9,Unit01@...
lfs f0,0(r9)
- psql f1,0(r3),0,0
+ psq_l f1,0(r3),0,0
stfs f0,44(r4)
- psql f2,16(r3),0,0
- psmerge00 f5,f1,f2
- psql f3,8(r3),1,0
- psmerge11 f6,f1,f2
- psql f4,24(r3),1,0
- psqst f5,0(r4),0,0
- psql f1,32(r3),0,0
- psmerge00 f7,f3,f4
- psqst f6,16(r4),0,0
- psmerge00 f5,f1,f0
- psqst f7,32(r4),0,0
- psmerge10 f6,f1,f0
- psqst f5,8(r4),0,0
+ psq_l f2,16(r3),0,0
+ ps_merge00 f5,f1,f2
+ psq_l f3,8(r3),1,0
+ ps_merge11 f6,f1,f2
+ psq_l f4,24(r3),1,0
+ psq_st f5,0(r4),0,0
+ psq_l f1,32(r3),0,0
+ ps_merge00 f7,f3,f4
+ psq_st f6,16(r4),0,0
+ ps_merge00 f5,f1,f0
+ psq_st f7,32(r4),0,0
+ ps_merge10 f6,f1,f0
+ psq_st f5,8(r4),0,0
lfs f3,40(r3)
- psqst f6,24(r4),0,0
+ psq_st f6,24(r4),0,0
stfs f3,40(r4)
blr
_GLOBAL(ps_guMtxInverse)
//r3 = src, r4 = inv
- psql f0,0(r3),1,0
- psql f1,4(r3),0,0
- psql f2,16(r3),1,0
- psmerge10 f6,f1,f0
- psql f3,20(r3),0,0
- psql f4,32(r3),1,0
- psmerge10 f7,f3,f2
- psql f5,36(r3),0,0
- psmul f11,f3,f6
- psmul f13,f5,f7
- psmerge10 f8,f5,f4
- psmsub f11,f1,f7,f11
- psmul f12,f1,f8
- psmsub f13,f3,f8,f13
- psmul f10,f3,f4
- psmsub f12,f5,f6,f12
- psmul f9,f0,f5
- psmul f8,f1,f2
- pssub f6,f6,f6
- psmsub f10,f2,f5,f10
- psmul f7,f0,f13
- psmsub f9,f1,f4,f9
- psmadd f7,f2,f12,f7
- psmsub f8,f0,f3,f8
- psmadd f7,f4,f11,f7
- pscmpo0 cr0,f7,f6
+ psq_l f0,0(r3),1,0
+ psq_l f1,4(r3),0,0
+ psq_l f2,16(r3),1,0
+ ps_merge10 f6,f1,f0
+ psq_l f3,20(r3),0,0
+ psq_l f4,32(r3),1,0
+ ps_merge10 f7,f3,f2
+ psq_l f5,36(r3),0,0
+ ps_mul f11,f3,f6
+ ps_mul f13,f5,f7
+ ps_merge10 f8,f5,f4
+ ps_msub f11,f1,f7,f11
+ ps_mul f12,f1,f8
+ ps_msub f13,f3,f8,f13
+ ps_mul f10,f3,f4
+ ps_msub f12,f5,f6,f12
+ ps_mul f9,f0,f5
+ ps_mul f8,f1,f2
+ ps_sub f6,f6,f6
+ ps_msub f10,f2,f5,f10
+ ps_mul f7,f0,f13
+ ps_msub f9,f1,f4,f9
+ ps_madd f7,f2,f12,f7
+ ps_msub f8,f0,f3,f8
+ ps_madd f7,f4,f11,f7
+ ps_cmpo0 cr0,f7,f6
bne 0f
li r3,0
blr
0: fres f0,f7
- psadd f6,f0,f0
- psmul f5,f0,f0
- psnmsub f0,f7,f5,f6
+ ps_add f6,f0,f0
+ ps_mul f5,f0,f0
+ ps_nmsub f0,f7,f5,f6
lfs f1,12(r3)
- psmuls0 f13,f13,f0
+ ps_muls0 f13,f13,f0
lfs f2,28(r3)
- psmuls0 f12,f12,f0
+ ps_muls0 f12,f12,f0
lfs f3,44(r3)
- psmuls0 f11,f11,f0
- psmerge00 f5,f13,f12
- psmuls0 f10,f10,f0
- psmerge11 f4,f13,f12
- psmuls0 f9,f9,f0
- psqst f5,0(r4),0,0
- psmul f6,f13,f1
- psqst f4,16(r4),0,0
- psmuls0 f8,f8,f0
- psmadd f6,f12,f2,f6
- psqst f10,32(r4),1,0
- psnmadd f6,f11,f3,f6
- psqst f9,36(r4),1,0
- psmul f7,f10,f1
- psmerge00 f5,f11,f6
- psqst f8,40(r4),1,0
- psmerge11 f4,f11,f6
- psqst f5,8(r4),0,0
- psmadd f7,f9,f2,f7
- psqst f4,24(r4),0,0
- psnmadd f7,f8,f3,f7
+ ps_muls0 f11,f11,f0
+ ps_merge00 f5,f13,f12
+ ps_muls0 f10,f10,f0
+ ps_merge11 f4,f13,f12
+ ps_muls0 f9,f9,f0
+ psq_st f5,0(r4),0,0
+ ps_mul f6,f13,f1
+ psq_st f4,16(r4),0,0
+ ps_muls0 f8,f8,f0
+ ps_madd f6,f12,f2,f6
+ psq_st f10,32(r4),1,0
+ ps_nmadd f6,f11,f3,f6
+ psq_st f9,36(r4),1,0
+ ps_mul f7,f10,f1
+ ps_merge00 f5,f11,f6
+ psq_st f8,40(r4),1,0
+ ps_merge11 f4,f11,f6
+ psq_st f5,8(r4),0,0
+ ps_madd f7,f9,f2,f7
+ psq_st f4,24(r4),0,0
+ ps_nmadd f7,f8,f3,f7
li r3,1
- psqst f7,44(r4),1,0
+ psq_st f7,44(r4),1,0
blr
_GLOBAL(ps_guMtxInvXpos)
//r3 = src, r4 = invx
- psql f0, 0(r3), 1, 0
- psql f1, 4(r3), 0, 0
- psql f2, 16(r3), 1, 0
- psmerge10 f6, f1, f0
- psql f3, 20(r3), 0, 0
- psql f4, 32(r3), 1, 0
- psmerge10 f7, f3, f2
- psql f5, 36(r3), 0, 0
- psmul f11, f3, f6
- psmerge10 f8, f5, f4
- psmul f13, f5, f7
- psmsub f11, f1, f7, f11
- psmul f12, f1, f8
- psmsub f13, f3, f8, f13
- psmsub f12, f5, f6, f12
- psmul f10, f3, f4
- psmul f9, f0, f5
- psmul f8, f1, f2
- psmsub f10, f2, f5, f10
- psmsub f9, f1, f4, f9
- psmsub f8, f0, f3, f8
- psmul f7, f0, f13
- pssub f1, f1, f1
- psmadd f7, f2, f12, f7
- psmadd f7, f4, f11, f7
- pscmpo0 cr0, f7, f1
+ psq_l f0, 0(r3), 1, 0
+ psq_l f1, 4(r3), 0, 0
+ psq_l f2, 16(r3), 1, 0
+ ps_merge10 f6, f1, f0
+ psq_l f3, 20(r3), 0, 0
+ psq_l f4, 32(r3), 1, 0
+ ps_merge10 f7, f3, f2
+ psq_l f5, 36(r3), 0, 0
+ ps_mul f11, f3, f6
+ ps_merge10 f8, f5, f4
+ ps_mul f13, f5, f7
+ ps_msub f11, f1, f7, f11
+ ps_mul f12, f1, f8
+ ps_msub f13, f3, f8, f13
+ ps_msub f12, f5, f6, f12
+ ps_mul f10, f3, f4
+ ps_mul f9, f0, f5
+ ps_mul f8, f1, f2
+ ps_msub f10, f2, f5, f10
+ ps_msub f9, f1, f4, f9
+ ps_msub f8, f0, f3, f8
+ ps_mul f7, f0, f13
+ ps_sub f1, f1, f1
+ ps_madd f7, f2, f12, f7
+ ps_madd f7, f4, f11, f7
+ ps_cmpo0 cr0, f7, f1
bne 0f
addi r3, 0, 0
blr
0: fres f0, f7
- psqst f1, 12(r4), 1, 0
- psadd f6, f0, f0
- psmul f5, f0, f0
- psqst f1, 28(r4), 1, 0
- psnmsub f0, f7, f5, f6
- psqst f1, 44(r4), 1, 0
- psmuls0 f13, f13, f0
- psmuls0 f12, f12, f0
- psmuls0 f11, f11, f0
- psqst f13, 0(r4), 0, 0
- psqst f12, 16(r4), 0, 0
- psmuls0 f10, f10, f0
- psmuls0 f9, f9, f0
- psqst f11, 32(r4), 0, 0
- psqst f10, 8(r4), 1, 0
- psmuls0 f8, f8, f0
+ psq_st f1, 12(r4), 1, 0
+ ps_add f6, f0, f0
+ ps_mul f5, f0, f0
+ psq_st f1, 28(r4), 1, 0
+ ps_nmsub f0, f7, f5, f6
+ psq_st f1, 44(r4), 1, 0
+ ps_muls0 f13, f13, f0
+ ps_muls0 f12, f12, f0
+ ps_muls0 f11, f11, f0
+ psq_st f13, 0(r4), 0, 0
+ psq_st f12, 16(r4), 0, 0
+ ps_muls0 f10, f10, f0
+ ps_muls0 f9, f9, f0
+ psq_st f11, 32(r4), 0, 0
+ psq_st f10, 8(r4), 1, 0
+ ps_muls0 f8, f8, f0
addi r3, 0, 1
- psqst f9, 24(r4), 1, 0
- psqst f8, 40(r4), 1, 0
+ psq_st f9, 24(r4), 1, 0
+ psq_st f8, 40(r4), 1, 0
blr
_GLOBAL(ps_guMtxScale)
@@ -273,35 +273,35 @@
addi r9,r9,Unit01@...
lfs f0,0(r9)
stfs f1,0(r3)
- psqst f0,4(r3),0,0
- psqst f0,12(r3),0,0
+ psq_st f0,4(r3),0,0
+ psq_st f0,12(r3),0,0
stfs f2,20(r3)
- psqst f0,24(r3),0,0
- psqst f0,32(r3),0,0
+ psq_st f0,24(r3),0,0
+ psq_st f0,32(r3),0,0
stfs f3,40(r3)
stfs f0,44(r3)
blr
_GLOBAL(ps_guMtxScaleApply)
//r3 = src,r4 = dst,f1 = xS,f2 = yS,f3 = zS
- psql f4,0(r3),0,0
- psql f5,8(r3),0,0
- psmuls0 f4,f4,f1
- psql f6,16(r3),0,0
- psmuls0 f5,f5,f1
- psql f7,24(r3),0,0
- psmuls0 f6,f6,f2
- psql f8,32(r3),0,0
- psqst f4,0(r4),0,0
- psmuls0 f7,f7,f2
- psql f2,40(r3),0,0
- psqst f5,8(r4),0,0
- psmuls0 f8,f8,f3
- psqst f6,16(r4),0,0
- psmuls0 f2,f2,f3
- psqst f7,24(r4),0,0
- psqst f8,32(r4),0,0
- psqst f2,40(r4),0,0
+ psq_l f4,0(r3),0,0
+ psq_l f5,8(r3),0,0
+ ps_muls0 f4,f4,f1
+ psq_l f6,16(r3),0,0
+ ps_muls0 f5,f5,f1
+ psq_l f7,24(r3),0,0
+ ps_muls0 f6,f6,f2
+ psq_l f8,32(r3),0,0
+ psq_st f4,0(r4),0,0
+ ps_muls0 f7,f7,f2
+ psq_l f2,40(r3),0,0
+ psq_st f5,8(r4),0,0
+ ps_muls0 f8,f8,f3
+ psq_st f6,16(r4),0,0
+ ps_muls0 f2,f2,f3
+ psq_st f7,24(r4),0,0
+ psq_st f8,32(r4),0,0
+ psq_st f2,40(r4),0,0
blr
_GLOBAL(ps_guMtxTrans)
@@ -312,33 +312,32 @@
lfs f5,4(r9)
stfs f1,12(r3)
stfs f2,28(r3)
- stfs f3,44(r3)
- psqst f4,4(r3),0,0
- psqst f4,32(r3),0,0
- stfs f5,0(r3)
+ psq_st f4,4(r3),0,0
+ psq_st f4,32(r3),0,0
stfs f5,20(r3)
- stfs f5,40(r3)
- stfs f4,16(r3)
stfs f4,24(r3)
+ stfs f5,40(r3)
+ stfs f3,44(r3)
+ stfs f5,0(r3)
blr
_GLOBAL(ps_guMtxTransApply)
//r3 = src,r4 = dst,f1 = xT,f2 = yT,f3 = zT
- psql f4,0(r3),0,0
- psql f5,8(r3),0,0
- psql f7,24(r3),0,0
- psql f8,40(r3),0,0
- pssum1 f5,f1,f5,f5
- psql f6,16(r3),0,0
- pssum1 f7,f2,f7,f7
- psql f9,32(r3),0,0
- pssum1 f8,f3,f8,f8
- psqst f4,0(r4),0,0
- psqst f5,8(r4),0,0
- psqst f6,16(r4),0,0
- psqst f7,24(r4),0,0
- psqst f9,32(r4),0,0
- psqst f8,40(r4),0,0
+ psq_l f4,0(r3),0,0
+ psq_l f5,8(r3),0,0
+ psq_l f7,24(r3),0,0
+ psq_l f8,40(r3),0,0
+ ps_sum1 f5,f1,f5,f5
+ psq_l f6,16(r3),0,0
+ ps_sum1 f7,f2,f7,f7
+ psq_l f9,32(r3),0,0
+ ps_sum1 f8,f3,f8,f8
+ psq_st f4,0(r4),0,0
+ psq_st f5,8(r4),0,0
+ psq_st f6,16(r4),0,0
+ psq_st f7,24(r4),0,0
+ psq_st f9,32(r4),0,0
+ psq_st f8,40(r4),0,0
blr
_GLOBAL(ps_guMtxRotTrig)
@@ -348,7 +347,7 @@
lfs f3,0(r9)
lfs f4,4(r9)
ori r4,r4,0x20
- psneg f5,f1
+ ps_neg f5,f1
cmplwi r4,'x'
beq 0f
cmplwi r4,'y'
@@ -357,38 +356,38 @@
beq 2f
b 3f
0:
- psqst f4,0(r3),1,0
- psqst f3,4(r3),0,0
- psmerge00 f6,f1,f2
- psqst f3,12(r3),0,0
- psmerge00 f7,f2,f5
- psqst f3,28(r3),0,0
- psqst f3,44(r3),1,0
- psqst f6,36(r3),0,0
- psqst f7,20(r3),0,0
+ psq_st f4,0(r3),1,0
+ psq_st f3,4(r3),0,0
+ ps_merge00 f6,f1,f2
+ psq_st f3,12(r3),0,0
+ ps_merge00 f7,f2,f5
+ psq_st f3,28(r3),0,0
+ psq_st f3,44(r3),1,0
+ psq_st f6,36(r3),0,0
+ psq_st f7,20(r3),0,0
b 3f
1:
- psmerge00 f6,f2,f3
- psmerge00 f7,f3,f4
- psqst f3,24(r3),0,0
- psqst f6,0(r3),0,0
- psmerge00 f8,f5,f3
- psmerge00 f9,f1,f3
- psqst f6,40(r3),0,0
- psqst f7,16(r3),0,0
- psqst f9,8(r3),0,0
- psqst f8,32(r3),0,0
+ ps_merge00 f6,f2,f3
+ ps_merge00 f7,f3,f4
+ psq_st f3,24(r3),0,0
+ psq_st f6,0(r3),0,0
+ ps_merge00 f8,f5,f3
+ ps_merge00 f9,f1,f3
+ psq_st f6,40(r3),0,0
+ psq_st f7,16(r3),0,0
+ psq_st f9,8(r3),0,0
+ psq_st f8,32(r3),0,0
b 3f
2:
- psqst f3,8(r3),0,0
- psmerge00 f6,f1,f2
- psmerge00 f8,f2,f5
- psqst f3,24(r3),0,0
- psqst f3,32(r3),0,0
- psmerge00 f7,f4,f3
- psqst f6,16(r3),0,0
- psqst f8,0(r3),0,0
- psqst f7,40(r3),0,0
+ psq_st f3,8(r3),0,0
+ ps_merge00 f6,f1,f2
+ ps_merge00 f8,f2,f5
+ psq_st f3,24(r3),0,0
+ psq_st f3,32(r3),0,0
+ ps_merge00 f7,f4,f3
+ psq_st f6,16(r3),0,0
+ psq_st f8,0(r3),0,0
+ psq_st f7,40(r3),0,0
3:
blr
@@ -397,66 +396,66 @@
lis r9,Unit01@...
addi r9,r9,Unit01@...
lfs f0,4(r9)
- psql f1,8(r5),1,0
- psql f2,0(r5),0,0
- psql f3,0(r4),0,0
- psnmadd f5,f1,f0,f1
- psql f4,8(r4),1,0
- psnmadd f6,f2,f0,f2
- psmuls0 f7,f2,f5
- psmul f8,f6,f3
- psmuls0 f9,f2,f6
- pssum0 f8,f8,f8,f8
- psmuls1 f10,f2,f6
- psqst f7,32(r3),0,0
- pssum0 f2,f2,f2,f0
- psnmadd f8,f5,f4,f8
- pssum1 f10,f0,f10,f10
- psqst f9,0(r3),0,0
- psmuls0 f11,f2,f8
- psmerge00 f12,f5,f8
- psqst f10,16(r3),0,0
- psmerge00 f13,f7,f11
- psmuls0 f12,f12,f1
- psmerge11 f11,f7,f11
- psqst f13,8(r3),0,0
- pssum0 f12,f12,f12,f0
- psqst f11,24(r3),0,0
- psqst f12,40(r3),0,0
+ psq_l f1,8(r5),1,0
+ psq_l f2,0(r5),0,0
+ psq_l f3,0(r4),0,0
+ ps_nmadd f5,f1,f0,f1
+ psq_l f4,8(r4),1,0
+ ps_nmadd f6,f2,f0,f2
+ ps_muls0 f7,f2,f5
+ ps_mul f8,f6,f3
+ ps_muls0 f9,f2,f6
+ ps_sum0 f8,f8,f8,f8
+ ps_muls1 f10,f2,f6
+ psq_st f7,32(r3),0,0
+ ps_sum0 f2,f2,f2,f0
+ ps_nmadd f8,f5,f4,f8
+ ps_sum1 f10,f0,f10,f10
+ psq_st f9,0(r3),0,0
+ ps_muls0 f11,f2,f8
+ ps_merge00 f12,f5,f8
+ psq_st f10,16(r3),0,0
+ ps_merge00 f13,f7,f11
+ ps_muls0 f12,f12,f1
+ ps_merge11 f11,f7,f11
+ psq_st f13,8(r3),0,0
+ ps_sum0 f12,f12,f12,f0
+ psq_st f11,24(r3),0,0
+ psq_st f12,40(r3),0,0
blr
_GLOBAL(ps_guVecAdd)
//r3 = v1,r4 = v2,r5 = dst
- psql V1_XY,0(r3),0,0
- psql V2_XY,0(r4),0,0
- psadd D1_XY,V1_XY,V2_XY
- psqst D1_XY,0(r5),0,0
- psql V1_Z,8(r3),1,0
- psql V2_Z,8(r4),1,0
- psadd D1_Z,V1_Z,V2_Z
- psqst D1_Z,8(r5),1,0
+ psq_l V1_XY,0(r3),0,0
+ psq_l V2_XY,0(r4),0,0
+ ps_add D1_XY,V1_XY,V2_XY
+ psq_st D1_XY,0(r5),0,0
+ psq_l V1_Z,8(r3),1,0
+ psq_l V2_Z,8(r4),1,0
+ ps_add D1_Z,V1_Z,V2_Z
+ psq_st D1_Z,8(r5),1,0
blr
_GLOBAL(ps_guVecSub)
//r3 = v1,r4 = v2,r5 = dst
- psql V1_XY,0(r3),0,0
- psql V2_XY,0(r4),0,0
- pssub D1_XY,V1_XY,V2_XY
- psqst D1_XY,0(r5),0,0
- psql V1_Z,8(r3),1,0
- psql V2_Z,8(r4),1,0
- pssub D1_Z,V1_Z,V2_Z
- psqst D1_Z,8(r5),1,0
+ psq_l V1_XY,0(r3),0,0
+ psq_l V2_XY,0(r4),0,0
+ ps_sub D1_XY,V1_XY,V2_XY
+ psq_st D1_XY,0(r5),0,0
+ psq_l V1_Z,8(r3),1,0
+ psq_l V2_Z,8(r4),1,0
+ ps_sub D1_Z,V1_Z,V2_Z
+ psq_st D1_Z,8(r5),1,0
blr
_GLOBAL(ps_guVecScale)
//r3 = src,r4 = dst,f1 = S
- psql f2,0(r3),0,0
- psql f3,8(r3),1,0
- psmuls0 f4,f2,f1
- psqst f4,0(r4),0,0
- psmuls0 f4,f3,f1
- psqst f4,8(r4),1,0
+ psq_l f2,0(r3),0,0
+ psq_l f3,8(r3),1,0
+ ps_muls0 f4,f2,f1
+ psq_st f4,0(r4),0,0
+ ps_muls0 f4,f3,f1
+ psq_st f4,8(r4),1,0
blr
_GLOBAL(ps_guVecNormalize)
@@ -465,100 +464,100 @@
addi r9,r9,NrmData@...
lfs f0,0(r9)
lfs f1,4(r9)
- psql f2,0(r3),0,0
- psmul f4,f2,f2
- psql f3,8(r3),1,0
- psmadd f5,f3,f3,f4
- pssum0 f6,f5,f3,f4
+ psq_l f2,0(r3),0,0
+ ps_mul f4,f2,f2
+ psq_l f3,8(r3),1,0
+ ps_madd f5,f3,f3,f4
+ ps_sum0 f6,f5,f3,f4
frsqrte f7,f6
fmuls f8,f7,f7
fmuls f9,f7,f0
fnmsubs f8,f8,f6,f1
fmuls f7,f8,f9
- psmuls0 f2,f2,f7
- psqst f2,0(r3),0,0
- psmuls0 f3,f3,f7
- psqst f3,8(r3),1,0
+ ps_muls0 f2,f2,f7
+ psq_st f2,0(r3),0,0
+ ps_muls0 f3,f3,f7
+ psq_st f3,8(r3),1,0
blr
_GLOBAL(ps_guVecCross)
//r3 = v1,r4 = v2,r5 = v12
- psql f1,0(r4),0,0
+ psq_l f1,0(r4),0,0
lfs f2,8(r3)
- psql f0,0(r3),0,0
- psmerge10 f6,f1,f1
+ psq_l f0,0(r3),0,0
+ ps_merge10 f6,f1,f1
lfs f3,8(r4)
- psmul f4,f1,f2
- psmuls0 f7,f1,f0
- psmsub f5,f0,f3,f4
- psmsub f8,f0,f6,f7
- psmerge11 f9,f5,f5
- psmerge01 f10,f5,f8
- psqst f9,0(r5),1,0
- psneg f10,f10
- psqst f10,4(r5),0,0
+ ps_mul f4,f1,f2
+ ps_muls0 f7,f1,f0
+ ps_msub f5,f0,f3,f4
+ ps_msub f8,f0,f6,f7
+ ps_merge11 f9,f5,f5
+ ps_merge01 f10,f5,f8
+ psq_st f9,0(r5),1,0
+ ps_neg f10,f10
+ psq_st f10,4(r5),0,0
blr
_GLOBAL(ps_guVecDotProduct)
//r3 = vec1,r4 = vec2
- psql f2,4(r3),0,0
- psql f3,4(r4),0,0
- psmul f2,f2,f3
- psql f5,0(r3),0,0
- psql f4,0(r4),0,0
- psmadd f3,f5,f4,f2
- pssum0 f1,f3,f2,f2
+ psq_l f2,4(r3),0,0
+ psq_l f3,4(r4),0,0
+ ps_mul f2,f2,f3
+ psq_l f5,0(r3),0,0
+ psq_l f4,0(r4),0,0
+ ps_madd f3,f5,f4,f2
+ ps_sum0 f1,f3,f2,f2
blr
_GLOBAL(ps_guVecMultiply)
- psql f0,0(r4),0,0
- psql f2,0(r3),0,0
- psql f1,8(r4),1,0
- psmul f4,f2,f0
- psql f3,8(r3),0,0
- psmadd f5,f3,f1,f4
- psql f8,16(r3),0,0
- pssum0 f6,f5,f6,f5
- psql f9,24(r3),0,0
- psmul f10,f8,f0
- psqst f6,0(r5),1,0
- psmadd f11,f9,f1,f10
- psql f2,32(r3),0,0
- pssum0 f12,f11,f12,f11
- psql f3,40(r3),0,0
- psmul f4,f2,f0
- psqst f12,4(r5),1,0
- psmadd f5,f3,f1,f4
- pssum0 f6,f5,f6,f5
- psqst f6,8(r5),1,0
+ psq_l f0,0(r4),0,0
+ psq_l f2,0(r3),0,0
+ psq_l f1,8(r4),1,0
+ ps_mul f4,f2,f0
+ psq_l f3,8(r3),0,0
+ ps_madd f5,f3,f1,f4
+ psq_l f8,16(r3),0,0
+ ps_sum0 f6,f5,f6,f5
+ psq_l f9,24(r3),0,0
+ ps_mul f10,f8,f0
+ psq_st f6,0(r5),1,0
+ ps_madd f11,f9,f1,f10
+ psq_l f2,32(r3),0,0
+ ps_sum0 f12,f11,f12,f11
+ psq_l f3,40(r3),0,0
+ ps_mul f4,f2,f0
+ psq_st f12,4(r5),1,0
+ ps_madd f5,f3,f1,f4
+ ps_sum0 f6,f5,f6,f5
+ psq_st f6,8(r5),1,0
blr
_GLOBAL(ps_guVecMultiplySR)
// r3 = mt, r4 = src, r5 = dst
- psql f0,0(r3),0,0 // m[0][0], m[0][1] GQR0 = 0
+ psq_l f0,0(r3),0,0 // m[0][0], m[0][1] GQR0 = 0
// fp6 - x y
- psql f6,0(r4),0,0
- psql f2,16(r3),0,0 // m[1][0], m[1][1]
+ psq_l f6,0(r4),0,0
+ psq_l f2,16(r3),0,0 // m[1][0], m[1][1]
// fp8 = m00x m01y // next X
- psmul f8,f0,f6
- psql f4,32(r3),0,0 // m[2][0], m[2][1]
+ ps_mul f8,f0,f6
+ psq_l f4,32(r3),0,0 // m[2][0], m[2][1]
// fp10 = m10x m11y // next Y
- psmul f10,f2,f6
- psql f7,8(r4),1,0 // fp7 - z,1.0
+ ps_mul f10,f2,f6
+ psq_l f7,8(r4),1,0 // fp7 - z,1.0
// fp12 = m20x m21y // next Z
- psmul f12,f4,f6 // YYY last FP6 usage
- psql f3,24(r3),0,0 // m[1][2], m[1][3]
- pssum0 f8,f8,f8,f8
- psql f5,40(r3),0,0 // m[2][2], m[2][3]
- pssum0 f10,f10,f10,f10
- psql f1,8(r3),0,0 // m[0][2], m[0][3]
- pssum0 f12,f12,f12,f12
- psmadd f9,f1,f7,f8
- psqst f9,0(r5),1,0 // store X
- psmadd f11,f3,f7,f10
- psqst f11,4(r5),1,0 // store Y
- psmadd f13,f5,f7,f12
- psqst f13,8(r5),1,0 // sore Z
+ ps_mul f12,f4,f6 // YYY last FP6 usage
+ psq_l f3,24(r3),0,0 // m[1][2], m[1][3]
+ ps_sum0 f8,f8,f8,f8
+ psq_l f5,40(r3),0,0 // m[2][2], m[2][3]
+ ps_sum0 f10,f10,f10,f10
+ psq_l f1,8(r3),0,0 // m[0][2], m[0][3]
+ ps_sum0 f12,f12,f12,f12
+ ps_madd f9,f1,f7,f8
+ psq_st f9,0(r5),1,0 // store X
+ ps_madd f11,f3,f7,f10
+ psq_st f11,4(r5),1,0 // store Y
+ ps_madd f13,f5,f7,f12
+ psq_st f13,8(r5),1,0 // sore Z
blr
.section .data
Index: gu.c
===================================================================
RCS file: /cvsroot/gc-linux/libgx/src/gu.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- gu.c 31 Oct 2004 22:35:00 -0000 1.2
+++ gu.c 11 Apr 2006 18:25:03 -0000 1.3
@@ -244,32 +244,32 @@
ps_guVecNormalize(axis);
__asm__ __volatile__(
- "psql %%f1,0(%1),0,0\n"
+ "psq_l %%f1,0(%1),0,0\n"
"lfs %%f2,8(%1)\n"
- "psmerge00 %2,%2,%2\n"
- "psmuls0 %%f5,%%f1,%3\n"
- "psmuls0 %%f6,%%f2,%3\n"
- "psmuls1 %%f4,%%f5,%%f1\n"
- "psmuls0 %%f3,%%f5,%%f1\n"
- "psmuls0 %%f1,%%f1,%4\n"
- "psmuls0 %%f5,%%f5,%%f2\n"
+ "ps_merge00 %2,%2,%2\n"
+ "ps_muls0 %%f5,%%f1,%3\n"
+ "ps_muls0 %%f6,%%f2,%3\n"
+ "ps_muls1 %%f4,%%f5,%%f1\n"
+ "ps_muls0 %%f3,%%f5,%%f1\n"
+ "ps_muls0 %%f1,%%f1,%4\n"
+ "ps_muls0 %%f5,%%f5,%%f2\n"
"fnmsubs %%f7,%%f2,%4,%%f4\n"
"fmadds %%f8,%%f2,%4,%%f4\n"
- "psneg %%f10,%%f1\n"
- "pssum0 %%f9,%%f5,%5,%%f1\n"
- "pssum0 %%f3,%%f3,%%f7,%2\n"
- "pssum1 %%f4,%2,%%f8,%%f4\n"
- "pssum0 %%f7,%%f10,%5,%%f5\n"
- "pssum0 %%f10,%%f5,%%f5,%%f10\n"
- "psqst %%f9,8(%0),0,0\n"
- "psmuls0 %%f6,%%f6,%%f2\n"
- "psqst %%f3,0(%0),0,0\n"
- "pssum1 %%f5,%%f1,%%f10,%%f5\n"
- "psqst %%f4,16(%0),0,0\n"
- "pssum0 %%f6,%%f6,%5,%2\n"
- "psqst %%f7,24(%0),0,0\n"
- "psqst %%f5,32(%0),0,0\n"
- "psqst %%f6,40(%0),0,0\n"
+ "ps_neg %%f10,%%f1\n"
+ "ps_sum0 %%f9,%%f5,%5,%%f1\n"
+ "ps_sum0 %%f3,%%f3,%%f7,%2\n"
+ "ps_sum1 %%f4,%2,%%f8,%%f4\n"
+ "ps_sum0 %%f7,%%f10,%5,%%f5\n"
+ "ps_sum0 %%f10,%%f5,%%f5,%%f10\n"
+ "psq_st %%f9,8(%0),0,0\n"
+ "ps_muls0 %%f6,%%f6,%%f2\n"
+ "psq_st %%f3,0(%0),0,0\n"
+ "ps_sum1 %%f5,%%f1,%%f10,%%f5\n"
+ "psq_st %%f4,16(%0),0,0\n"
+ "ps_sum0 %%f6,%%f6,%5,%2\n"
+ "psq_st %%f7,24(%0),0,0\n"
+ "psq_st %%f5,32(%0),0,0\n"
+ "psq_st %%f6,40(%0),0,0\n"
: "=r"(mt) : "r"(axis), "f"(cT), "f"(tT), "f"(sT), "f"(fc0)
);
}
Index: ps_cast.S
===================================================================
RCS file: /cvsroot/gc-linux/libgx/src/ps_cast.S,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- ps_cast.S 6 Dec 2004 18:35:15 -0000 1.1
+++ ps_cast.S 11 Apr 2006 18:25:03 -0000 1.2
@@ -3,12 +3,12 @@
#define DECLARES(prefix,float,int,gqr_arg,gqr) \
_GLOBAL(PSCast##prefix##float##to##int) \
- psql f0,0(r3),gqr_arg,gqr0; \
- psqst f0,0(r4),gqr_arg,gqr; \
+ psq_l f0,0(r3),gqr_arg,gqr0; \
+ psq_st f0,0(r4),gqr_arg,gqr; \
blr; \
_GLOBAL(PSCast##prefix##int##to##float) \
- psql f0,0(r3),gqr_arg,gqr; \
- psqst f0,0(r4),gqr_arg,gqr0; \
+ psq_l f0,0(r3),gqr_arg,gqr; \
+ psq_st f0,0(r4),gqr_arg,gqr0; \
blr;
#define DECLARE(a,b,c) DECLARES(Single,a,b,1,c) DECLARES(Double,a,b,0,c)
@@ -19,8 +19,8 @@
DECLARE(F32,S16,gqr5)
_GLOBAL(PSCopy2Floats)
- psql f0,0(r4),0,gqr0
- psqst f0,0(r3),0,gqr0
+ psq_l f0,0(r4),0,gqr0
+ psq_st f0,0(r3),0,gqr0
blr
|