From: Albert H. <he...@us...> - 2006-04-11 18:26:43
|
Update of /cvsroot/gc-linux/libgx/src In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14482/src Modified Files: gu.c gu_asm.S gx_asm.S ps_cast.S Log Message: Modify instructions to use the underscored style. That seems to be the common consensus out there. Index: gx_asm.S =================================================================== RCS file: /cvsroot/gc-linux/libgx/src/gx_asm.S,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -d -r1.1.1.1 -r1.2 --- gx_asm.S 22 Oct 2004 20:13:53 -0000 1.1.1.1 +++ gx_asm.S 11 Apr 2006 18:25:03 -0000 1.2 @@ -3,61 +3,61 @@ #include "asm.h" _GLOBAL(WriteMtxPS4x3) - psql f0,0(r3),0,0 - psql f1,8(r3),0,0 - psql f2,16(r3),0,0 - psql f3,24(r3),0,0 - psql f4,32(r3),0,0 - psql f5,40(r3),0,0 - psqst f0,0(r4),0,0 - psqst f1,0(r4),0,0 - psqst f2,0(r4),0,0 - psqst f3,0(r4),0,0 - psqst f4,0(r4),0,0 - psqst f5,0(r4),0,0 + psq_l f0,0(r3),0,0 + psq_l f1,8(r3),0,0 + psq_l f2,16(r3),0,0 + psq_l f3,24(r3),0,0 + psq_l f4,32(r3),0,0 + psq_l f5,40(r3),0,0 + psq_st f0,0(r4),0,0 + psq_st f1,0(r4),0,0 + psq_st f2,0(r4),0,0 + psq_st f3,0(r4),0,0 + psq_st f4,0(r4),0,0 + psq_st f5,0(r4),0,0 blr # static void WriteMtxPS3x3from4x3(register Mtx mt,register void *wgpipe) _GLOBAL(WriteMtxPS3x3from4x3) - psql f0,0(r3),0,0 + psq_l f0,0(r3),0,0 lfs f1,8(r3) - psql f2,16(r3),0,0 + psq_l f2,16(r3),0,0 lfs f3,24(r3) - psql f4,32(r3),0,0 + psq_l f4,32(r3),0,0 lfs f5,40(r3) - psqst f0,0(r4),0,0 + psq_st f0,0(r4),0,0 stfs f1,0(r4) - psqst f2,0(r4),0,0 + psq_st f2,0(r4),0,0 stfs f3,0(r4) - psqst f4,0(r4),0,0 + psq_st f4,0(r4),0,0 stfs f5,0(r4) blr #static void WriteMtxPS3x3(register Mtx33 mt,register void *wgpipe) _GLOBAL(WriteMtxPS3x3) - psql f0,0(r3),0,0 - psql f1,8(r3),0,0 - psql f2,16(r3),0,0 - psql f3,24(r3),0,0 + psq_l f0,0(r3),0,0 + psq_l f1,8(r3),0,0 + psq_l f2,16(r3),0,0 + psq_l f3,24(r3),0,0 lfs f4,32(r3) - psqst f0,0(r4),0,0 - psqst f1,0(r4),0,0 - psqst f2,0(r4),0,0 - psqst f3,0(r4),0,0 + psq_st f0,0(r4),0,0 + psq_st f1,0(r4),0,0 + psq_st f2,0(r4),0,0 + psq_st f3,0(r4),0,0 stfs f4,0(r4) blr #static void WriteMtxPS4x2(register Mtx mt,register void *wgpipe) _GLOBAL(WriteMtxPS4x2) - psql f0,0(r3),0,0 - psql f1,8(r3),0,0 - psql f2,16(r3),0,0 - psql f3,24(r3),0,0 - psqst f0,0(r4),0,0 - psqst f1,0(r4),0,0 - psqst f2,0(r4),0,0 - psqst f3,0(r4),0,0 + psq_l f0,0(r3),0,0 + psq_l f1,8(r3),0,0 + psq_l f2,16(r3),0,0 + psq_l f3,24(r3),0,0 + psq_st f0,0(r4),0,0 + psq_st f1,0(r4),0,0 + psq_st f2,0(r4),0,0 + psq_st f3,0(r4),0,0 blr _GLOBAL(dcache_flush) Index: gu_asm.S =================================================================== RCS file: /cvsroot/gc-linux/libgx/src/gu_asm.S,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- gu_asm.S 2 Dec 2004 19:56:37 -0000 1.2 +++ gu_asm.S 11 Apr 2006 18:25:03 -0000 1.3 @@ -39,53 +39,53 @@ _GLOBAL(ps_guMtxConcat) //r3 = mtxA, r4 = mtxB, r5 = mtxAB stwu r1,-64(r1) - psql A00_A01,0(r3),0,0 + psq_l A00_A01,0(r3),0,0 stfd f14,8(r1) - psql B00_B01,0(r4),0,0 + psq_l B00_B01,0(r4),0,0 lis r6,Unit01@ha - psql B02_B03,8(r4),0,0 + psq_l B02_B03,8(r4),0,0 stfd f15,16(r1) addi 6,6,Unit01@l stfd f31,40(r1) - psql B10_B11,16(r4),0,0 - psmuls0 D00_D01,B00_B01,A00_A01 - psql A10_A11,16(r3),0,0 - psmuls0 D02_D03,B02_B03,A00_A01 - psql UNIT01,0(r6),0,0 - psmuls0 D10_D11,B00_B01,A10_A11 - psql B12_B13,24(r4),0,0 - psmuls0 D12_D13,B02_B03,A10_A11 - psql A02_A03,8(r3),0,0 - psmadds1 D00_D01,B10_B11,A00_A01,D00_D01 - psql A12_A13,24(r3),0,0 - psmadds1 D10_D11,B10_B11,A10_A11,D10_D11 - psql B20_B21,32(r4),0,0 - psmadds1 D02_D03,B12_B13,A00_A01,D02_D03 - psql B22_B23,40(r4),0,0 - psmadds1 D12_D13,B12_B13,A10_A11,D12_D13 - psql A20_A21,32(r3),0,0 - psql A22_A23,40(r3),0,0 - psmadds0 D00_D01,B20_B21,A02_A03,D00_D01 - psmadds0 D02_D03,B22_B23,A02_A03,D02_D03 - psmadds0 D10_D11,B20_B21,A12_A13,D10_D11 - psmadds0 D12_D13,B22_B23,A12_A13,D12_D13 - psqst D00_D01,0(r5),0,0 - psmuls0 D20_D21,B00_B01,A20_A21 - psmadds1 D02_D03,UNIT01,A02_A03,D02_D03 - psmuls0 D22_D23,B02_B03,A20_A21 - psqst D10_D11,16(r5),0,0 - psmadds1 D12_D13,UNIT01,A12_A13,D12_D13 - psqst D02_D03,8(r5),0,0 - psmadds1 D20_D21,B10_B11,A20_A21,D20_D21 - psmadds1 D22_D23,B12_B13,A20_A21,D22_D23 - psmadds0 D20_D21,B20_B21,A22_A23,D20_D21 + psq_l B10_B11,16(r4),0,0 + ps_muls0 D00_D01,B00_B01,A00_A01 + psq_l A10_A11,16(r3),0,0 + ps_muls0 D02_D03,B02_B03,A00_A01 + psq_l UNIT01,0(r6),0,0 + ps_muls0 D10_D11,B00_B01,A10_A11 + psq_l B12_B13,24(r4),0,0 + ps_muls0 D12_D13,B02_B03,A10_A11 + psq_l A02_A03,8(r3),0,0 + ps_madds1 D00_D01,B10_B11,A00_A01,D00_D01 + psq_l A12_A13,24(r3),0,0 + ps_madds1 D10_D11,B10_B11,A10_A11,D10_D11 + psq_l B20_B21,32(r4),0,0 + ps_madds1 D02_D03,B12_B13,A00_A01,D02_D03 + psq_l B22_B23,40(r4),0,0 + ps_madds1 D12_D13,B12_B13,A10_A11,D12_D13 + psq_l A20_A21,32(r3),0,0 + psq_l A22_A23,40(r3),0,0 + ps_madds0 D00_D01,B20_B21,A02_A03,D00_D01 + ps_madds0 D02_D03,B22_B23,A02_A03,D02_D03 + ps_madds0 D10_D11,B20_B21,A12_A13,D10_D11 + ps_madds0 D12_D13,B22_B23,A12_A13,D12_D13 + psq_st D00_D01,0(r5),0,0 + ps_muls0 D20_D21,B00_B01,A20_A21 + ps_madds1 D02_D03,UNIT01,A02_A03,D02_D03 + ps_muls0 D22_D23,B02_B03,A20_A21 + psq_st D10_D11,16(r5),0,0 + ps_madds1 D12_D13,UNIT01,A12_A13,D12_D13 + psq_st D02_D03,8(r5),0,0 + ps_madds1 D20_D21,B10_B11,A20_A21,D20_D21 + ps_madds1 D22_D23,B12_B13,A20_A21,D22_D23 + ps_madds0 D20_D21,B20_B21,A22_A23,D20_D21 lfd f14,8(r1) - psqst D12_D13,24(r5),0,0 - psmadds0 D22_D23,B22_B23,A22_A23,D22_D23 - psqst D20_D21,32(r5),0,0 - psmadds1 D22_D23,UNIT01,A22_A23,D22_D23 + psq_st D12_D13,24(r5),0,0 + ps_madds0 D22_D23,B22_B23,A22_A23,D22_D23 + psq_st D20_D21,32(r5),0,0 + ps_madds1 D22_D23,UNIT01,A22_A23,D22_D23 lfd f15,16(r1) - psqst D22_D23,40(r5),0,0 + psq_st D22_D23,40(r5),0,0 lfd f31,40(r1) addi r1,r1,64 blr @@ -96,30 +96,30 @@ addi r9,r9,Unit01@l lfs f0,0(r9) lfs f1,4(r9) - psqst f0,8(r3),0,0 - psmerge01 f2,f0,f1 - psqst f0,24(r3),0,0 - psmerge10 f3,f1,f0 - psqst f0,32(r3),0,0 - psqst f2,16(r3),0,0 - psqst f3,0(r3),0,0 - psqst f3,40(r3),0,0 + psq_st f0,8(r3),0,0 + ps_merge01 f2,f0,f1 + psq_st f0,24(r3),0,0 + ps_merge10 f3,f1,f0 + psq_st f0,32(r3),0,0 + psq_st f2,16(r3),0,0 + psq_st f3,0(r3),0,0 + psq_st f3,40(r3),0,0 blr _GLOBAL(ps_guMtxCopy) //r3 = src, r4 = dst - psql f0,0(r3),0,0 - psqst f0,0(r4),0,0 - psql f1,8(r3),0,0 - psqst f1,8(r4),0,0 - psql f2,16(r3),0,0 - psqst f2,16(r4),0,0 - psql f3,24(r3),0,0 - psqst f3,24(r4),0,0 - psql f4,32(r3),0,0 - psqst f4,32(r4),0,0 - psql f5,40(r3),0,0 - psqst f5,40(r4),0,0 + psq_l f0,0(r3),0,0 + psq_st f0,0(r4),0,0 + psq_l f1,8(r3),0,0 + psq_st f1,8(r4),0,0 + psq_l f2,16(r3),0,0 + psq_st f2,16(r4),0,0 + psq_l f3,24(r3),0,0 + psq_st f3,24(r4),0,0 + psq_l f4,32(r3),0,0 + psq_st f4,32(r4),0,0 + psq_l f5,40(r3),0,0 + psq_st f5,40(r4),0,0 blr _GLOBAL(ps_guMtxTranspose) @@ -127,144 +127,144 @@ lis r9,Unit01@ha addi r9,r9,Unit01@l lfs f0,0(r9) - psql f1,0(r3),0,0 + psq_l f1,0(r3),0,0 stfs f0,44(r4) - psql f2,16(r3),0,0 - psmerge00 f5,f1,f2 - psql f3,8(r3),1,0 - psmerge11 f6,f1,f2 - psql f4,24(r3),1,0 - psqst f5,0(r4),0,0 - psql f1,32(r3),0,0 - psmerge00 f7,f3,f4 - psqst f6,16(r4),0,0 - psmerge00 f5,f1,f0 - psqst f7,32(r4),0,0 - psmerge10 f6,f1,f0 - psqst f5,8(r4),0,0 + psq_l f2,16(r3),0,0 + ps_merge00 f5,f1,f2 + psq_l f3,8(r3),1,0 + ps_merge11 f6,f1,f2 + psq_l f4,24(r3),1,0 + psq_st f5,0(r4),0,0 + psq_l f1,32(r3),0,0 + ps_merge00 f7,f3,f4 + psq_st f6,16(r4),0,0 + ps_merge00 f5,f1,f0 + psq_st f7,32(r4),0,0 + ps_merge10 f6,f1,f0 + psq_st f5,8(r4),0,0 lfs f3,40(r3) - psqst f6,24(r4),0,0 + psq_st f6,24(r4),0,0 stfs f3,40(r4) blr _GLOBAL(ps_guMtxInverse) //r3 = src, r4 = inv - psql f0,0(r3),1,0 - psql f1,4(r3),0,0 - psql f2,16(r3),1,0 - psmerge10 f6,f1,f0 - psql f3,20(r3),0,0 - psql f4,32(r3),1,0 - psmerge10 f7,f3,f2 - psql f5,36(r3),0,0 - psmul f11,f3,f6 - psmul f13,f5,f7 - psmerge10 f8,f5,f4 - psmsub f11,f1,f7,f11 - psmul f12,f1,f8 - psmsub f13,f3,f8,f13 - psmul f10,f3,f4 - psmsub f12,f5,f6,f12 - psmul f9,f0,f5 - psmul f8,f1,f2 - pssub f6,f6,f6 - psmsub f10,f2,f5,f10 - psmul f7,f0,f13 - psmsub f9,f1,f4,f9 - psmadd f7,f2,f12,f7 - psmsub f8,f0,f3,f8 - psmadd f7,f4,f11,f7 - pscmpo0 cr0,f7,f6 + psq_l f0,0(r3),1,0 + psq_l f1,4(r3),0,0 + psq_l f2,16(r3),1,0 + ps_merge10 f6,f1,f0 + psq_l f3,20(r3),0,0 + psq_l f4,32(r3),1,0 + ps_merge10 f7,f3,f2 + psq_l f5,36(r3),0,0 + ps_mul f11,f3,f6 + ps_mul f13,f5,f7 + ps_merge10 f8,f5,f4 + ps_msub f11,f1,f7,f11 + ps_mul f12,f1,f8 + ps_msub f13,f3,f8,f13 + ps_mul f10,f3,f4 + ps_msub f12,f5,f6,f12 + ps_mul f9,f0,f5 + ps_mul f8,f1,f2 + ps_sub f6,f6,f6 + ps_msub f10,f2,f5,f10 + ps_mul f7,f0,f13 + ps_msub f9,f1,f4,f9 + ps_madd f7,f2,f12,f7 + ps_msub f8,f0,f3,f8 + ps_madd f7,f4,f11,f7 + ps_cmpo0 cr0,f7,f6 bne 0f li r3,0 blr 0: fres f0,f7 - psadd f6,f0,f0 - psmul f5,f0,f0 - psnmsub f0,f7,f5,f6 + ps_add f6,f0,f0 + ps_mul f5,f0,f0 + ps_nmsub f0,f7,f5,f6 lfs f1,12(r3) - psmuls0 f13,f13,f0 + ps_muls0 f13,f13,f0 lfs f2,28(r3) - psmuls0 f12,f12,f0 + ps_muls0 f12,f12,f0 lfs f3,44(r3) - psmuls0 f11,f11,f0 - psmerge00 f5,f13,f12 - psmuls0 f10,f10,f0 - psmerge11 f4,f13,f12 - psmuls0 f9,f9,f0 - psqst f5,0(r4),0,0 - psmul f6,f13,f1 - psqst f4,16(r4),0,0 - psmuls0 f8,f8,f0 - psmadd f6,f12,f2,f6 - psqst f10,32(r4),1,0 - psnmadd f6,f11,f3,f6 - psqst f9,36(r4),1,0 - psmul f7,f10,f1 - psmerge00 f5,f11,f6 - psqst f8,40(r4),1,0 - psmerge11 f4,f11,f6 - psqst f5,8(r4),0,0 - psmadd f7,f9,f2,f7 - psqst f4,24(r4),0,0 - psnmadd f7,f8,f3,f7 + ps_muls0 f11,f11,f0 + ps_merge00 f5,f13,f12 + ps_muls0 f10,f10,f0 + ps_merge11 f4,f13,f12 + ps_muls0 f9,f9,f0 + psq_st f5,0(r4),0,0 + ps_mul f6,f13,f1 + psq_st f4,16(r4),0,0 + ps_muls0 f8,f8,f0 + ps_madd f6,f12,f2,f6 + psq_st f10,32(r4),1,0 + ps_nmadd f6,f11,f3,f6 + psq_st f9,36(r4),1,0 + ps_mul f7,f10,f1 + ps_merge00 f5,f11,f6 + psq_st f8,40(r4),1,0 + ps_merge11 f4,f11,f6 + psq_st f5,8(r4),0,0 + ps_madd f7,f9,f2,f7 + psq_st f4,24(r4),0,0 + ps_nmadd f7,f8,f3,f7 li r3,1 - psqst f7,44(r4),1,0 + psq_st f7,44(r4),1,0 blr _GLOBAL(ps_guMtxInvXpos) //r3 = src, r4 = invx - psql f0, 0(r3), 1, 0 - psql f1, 4(r3), 0, 0 - psql f2, 16(r3), 1, 0 - psmerge10 f6, f1, f0 - psql f3, 20(r3), 0, 0 - psql f4, 32(r3), 1, 0 - psmerge10 f7, f3, f2 - psql f5, 36(r3), 0, 0 - psmul f11, f3, f6 - psmerge10 f8, f5, f4 - psmul f13, f5, f7 - psmsub f11, f1, f7, f11 - psmul f12, f1, f8 - psmsub f13, f3, f8, f13 - psmsub f12, f5, f6, f12 - psmul f10, f3, f4 - psmul f9, f0, f5 - psmul f8, f1, f2 - psmsub f10, f2, f5, f10 - psmsub f9, f1, f4, f9 - psmsub f8, f0, f3, f8 - psmul f7, f0, f13 - pssub f1, f1, f1 - psmadd f7, f2, f12, f7 - psmadd f7, f4, f11, f7 - pscmpo0 cr0, f7, f1 + psq_l f0, 0(r3), 1, 0 + psq_l f1, 4(r3), 0, 0 + psq_l f2, 16(r3), 1, 0 + ps_merge10 f6, f1, f0 + psq_l f3, 20(r3), 0, 0 + psq_l f4, 32(r3), 1, 0 + ps_merge10 f7, f3, f2 + psq_l f5, 36(r3), 0, 0 + ps_mul f11, f3, f6 + ps_merge10 f8, f5, f4 + ps_mul f13, f5, f7 + ps_msub f11, f1, f7, f11 + ps_mul f12, f1, f8 + ps_msub f13, f3, f8, f13 + ps_msub f12, f5, f6, f12 + ps_mul f10, f3, f4 + ps_mul f9, f0, f5 + ps_mul f8, f1, f2 + ps_msub f10, f2, f5, f10 + ps_msub f9, f1, f4, f9 + ps_msub f8, f0, f3, f8 + ps_mul f7, f0, f13 + ps_sub f1, f1, f1 + ps_madd f7, f2, f12, f7 + ps_madd f7, f4, f11, f7 + ps_cmpo0 cr0, f7, f1 bne 0f addi r3, 0, 0 blr 0: fres f0, f7 - psqst f1, 12(r4), 1, 0 - psadd f6, f0, f0 - psmul f5, f0, f0 - psqst f1, 28(r4), 1, 0 - psnmsub f0, f7, f5, f6 - psqst f1, 44(r4), 1, 0 - psmuls0 f13, f13, f0 - psmuls0 f12, f12, f0 - psmuls0 f11, f11, f0 - psqst f13, 0(r4), 0, 0 - psqst f12, 16(r4), 0, 0 - psmuls0 f10, f10, f0 - psmuls0 f9, f9, f0 - psqst f11, 32(r4), 0, 0 - psqst f10, 8(r4), 1, 0 - psmuls0 f8, f8, f0 + psq_st f1, 12(r4), 1, 0 + ps_add f6, f0, f0 + ps_mul f5, f0, f0 + psq_st f1, 28(r4), 1, 0 + ps_nmsub f0, f7, f5, f6 + psq_st f1, 44(r4), 1, 0 + ps_muls0 f13, f13, f0 + ps_muls0 f12, f12, f0 + ps_muls0 f11, f11, f0 + psq_st f13, 0(r4), 0, 0 + psq_st f12, 16(r4), 0, 0 + ps_muls0 f10, f10, f0 + ps_muls0 f9, f9, f0 + psq_st f11, 32(r4), 0, 0 + psq_st f10, 8(r4), 1, 0 + ps_muls0 f8, f8, f0 addi r3, 0, 1 - psqst f9, 24(r4), 1, 0 - psqst f8, 40(r4), 1, 0 + psq_st f9, 24(r4), 1, 0 + psq_st f8, 40(r4), 1, 0 blr _GLOBAL(ps_guMtxScale) @@ -273,35 +273,35 @@ addi r9,r9,Unit01@l lfs f0,0(r9) stfs f1,0(r3) - psqst f0,4(r3),0,0 - psqst f0,12(r3),0,0 + psq_st f0,4(r3),0,0 + psq_st f0,12(r3),0,0 stfs f2,20(r3) - psqst f0,24(r3),0,0 - psqst f0,32(r3),0,0 + psq_st f0,24(r3),0,0 + psq_st f0,32(r3),0,0 stfs f3,40(r3) stfs f0,44(r3) blr _GLOBAL(ps_guMtxScaleApply) //r3 = src,r4 = dst,f1 = xS,f2 = yS,f3 = zS - psql f4,0(r3),0,0 - psql f5,8(r3),0,0 - psmuls0 f4,f4,f1 - psql f6,16(r3),0,0 - psmuls0 f5,f5,f1 - psql f7,24(r3),0,0 - psmuls0 f6,f6,f2 - psql f8,32(r3),0,0 - psqst f4,0(r4),0,0 - psmuls0 f7,f7,f2 - psql f2,40(r3),0,0 - psqst f5,8(r4),0,0 - psmuls0 f8,f8,f3 - psqst f6,16(r4),0,0 - psmuls0 f2,f2,f3 - psqst f7,24(r4),0,0 - psqst f8,32(r4),0,0 - psqst f2,40(r4),0,0 + psq_l f4,0(r3),0,0 + psq_l f5,8(r3),0,0 + ps_muls0 f4,f4,f1 + psq_l f6,16(r3),0,0 + ps_muls0 f5,f5,f1 + psq_l f7,24(r3),0,0 + ps_muls0 f6,f6,f2 + psq_l f8,32(r3),0,0 + psq_st f4,0(r4),0,0 + ps_muls0 f7,f7,f2 + psq_l f2,40(r3),0,0 + psq_st f5,8(r4),0,0 + ps_muls0 f8,f8,f3 + psq_st f6,16(r4),0,0 + ps_muls0 f2,f2,f3 + psq_st f7,24(r4),0,0 + psq_st f8,32(r4),0,0 + psq_st f2,40(r4),0,0 blr _GLOBAL(ps_guMtxTrans) @@ -312,33 +312,32 @@ lfs f5,4(r9) stfs f1,12(r3) stfs f2,28(r3) - stfs f3,44(r3) - psqst f4,4(r3),0,0 - psqst f4,32(r3),0,0 - stfs f5,0(r3) + psq_st f4,4(r3),0,0 + psq_st f4,32(r3),0,0 stfs f5,20(r3) - stfs f5,40(r3) - stfs f4,16(r3) stfs f4,24(r3) + stfs f5,40(r3) + stfs f3,44(r3) + stfs f5,0(r3) blr _GLOBAL(ps_guMtxTransApply) //r3 = src,r4 = dst,f1 = xT,f2 = yT,f3 = zT - psql f4,0(r3),0,0 - psql f5,8(r3),0,0 - psql f7,24(r3),0,0 - psql f8,40(r3),0,0 - pssum1 f5,f1,f5,f5 - psql f6,16(r3),0,0 - pssum1 f7,f2,f7,f7 - psql f9,32(r3),0,0 - pssum1 f8,f3,f8,f8 - psqst f4,0(r4),0,0 - psqst f5,8(r4),0,0 - psqst f6,16(r4),0,0 - psqst f7,24(r4),0,0 - psqst f9,32(r4),0,0 - psqst f8,40(r4),0,0 + psq_l f4,0(r3),0,0 + psq_l f5,8(r3),0,0 + psq_l f7,24(r3),0,0 + psq_l f8,40(r3),0,0 + ps_sum1 f5,f1,f5,f5 + psq_l f6,16(r3),0,0 + ps_sum1 f7,f2,f7,f7 + psq_l f9,32(r3),0,0 + ps_sum1 f8,f3,f8,f8 + psq_st f4,0(r4),0,0 + psq_st f5,8(r4),0,0 + psq_st f6,16(r4),0,0 + psq_st f7,24(r4),0,0 + psq_st f9,32(r4),0,0 + psq_st f8,40(r4),0,0 blr _GLOBAL(ps_guMtxRotTrig) @@ -348,7 +347,7 @@ lfs f3,0(r9) lfs f4,4(r9) ori r4,r4,0x20 - psneg f5,f1 + ps_neg f5,f1 cmplwi r4,'x' beq 0f cmplwi r4,'y' @@ -357,38 +356,38 @@ beq 2f b 3f 0: - psqst f4,0(r3),1,0 - psqst f3,4(r3),0,0 - psmerge00 f6,f1,f2 - psqst f3,12(r3),0,0 - psmerge00 f7,f2,f5 - psqst f3,28(r3),0,0 - psqst f3,44(r3),1,0 - psqst f6,36(r3),0,0 - psqst f7,20(r3),0,0 + psq_st f4,0(r3),1,0 + psq_st f3,4(r3),0,0 + ps_merge00 f6,f1,f2 + psq_st f3,12(r3),0,0 + ps_merge00 f7,f2,f5 + psq_st f3,28(r3),0,0 + psq_st f3,44(r3),1,0 + psq_st f6,36(r3),0,0 + psq_st f7,20(r3),0,0 b 3f 1: - psmerge00 f6,f2,f3 - psmerge00 f7,f3,f4 - psqst f3,24(r3),0,0 - psqst f6,0(r3),0,0 - psmerge00 f8,f5,f3 - psmerge00 f9,f1,f3 - psqst f6,40(r3),0,0 - psqst f7,16(r3),0,0 - psqst f9,8(r3),0,0 - psqst f8,32(r3),0,0 + ps_merge00 f6,f2,f3 + ps_merge00 f7,f3,f4 + psq_st f3,24(r3),0,0 + psq_st f6,0(r3),0,0 + ps_merge00 f8,f5,f3 + ps_merge00 f9,f1,f3 + psq_st f6,40(r3),0,0 + psq_st f7,16(r3),0,0 + psq_st f9,8(r3),0,0 + psq_st f8,32(r3),0,0 b 3f 2: - psqst f3,8(r3),0,0 - psmerge00 f6,f1,f2 - psmerge00 f8,f2,f5 - psqst f3,24(r3),0,0 - psqst f3,32(r3),0,0 - psmerge00 f7,f4,f3 - psqst f6,16(r3),0,0 - psqst f8,0(r3),0,0 - psqst f7,40(r3),0,0 + psq_st f3,8(r3),0,0 + ps_merge00 f6,f1,f2 + ps_merge00 f8,f2,f5 + psq_st f3,24(r3),0,0 + psq_st f3,32(r3),0,0 + ps_merge00 f7,f4,f3 + psq_st f6,16(r3),0,0 + psq_st f8,0(r3),0,0 + psq_st f7,40(r3),0,0 3: blr @@ -397,66 +396,66 @@ lis r9,Unit01@ha addi r9,r9,Unit01@l lfs f0,4(r9) - psql f1,8(r5),1,0 - psql f2,0(r5),0,0 - psql f3,0(r4),0,0 - psnmadd f5,f1,f0,f1 - psql f4,8(r4),1,0 - psnmadd f6,f2,f0,f2 - psmuls0 f7,f2,f5 - psmul f8,f6,f3 - psmuls0 f9,f2,f6 - pssum0 f8,f8,f8,f8 - psmuls1 f10,f2,f6 - psqst f7,32(r3),0,0 - pssum0 f2,f2,f2,f0 - psnmadd f8,f5,f4,f8 - pssum1 f10,f0,f10,f10 - psqst f9,0(r3),0,0 - psmuls0 f11,f2,f8 - psmerge00 f12,f5,f8 - psqst f10,16(r3),0,0 - psmerge00 f13,f7,f11 - psmuls0 f12,f12,f1 - psmerge11 f11,f7,f11 - psqst f13,8(r3),0,0 - pssum0 f12,f12,f12,f0 - psqst f11,24(r3),0,0 - psqst f12,40(r3),0,0 + psq_l f1,8(r5),1,0 + psq_l f2,0(r5),0,0 + psq_l f3,0(r4),0,0 + ps_nmadd f5,f1,f0,f1 + psq_l f4,8(r4),1,0 + ps_nmadd f6,f2,f0,f2 + ps_muls0 f7,f2,f5 + ps_mul f8,f6,f3 + ps_muls0 f9,f2,f6 + ps_sum0 f8,f8,f8,f8 + ps_muls1 f10,f2,f6 + psq_st f7,32(r3),0,0 + ps_sum0 f2,f2,f2,f0 + ps_nmadd f8,f5,f4,f8 + ps_sum1 f10,f0,f10,f10 + psq_st f9,0(r3),0,0 + ps_muls0 f11,f2,f8 + ps_merge00 f12,f5,f8 + psq_st f10,16(r3),0,0 + ps_merge00 f13,f7,f11 + ps_muls0 f12,f12,f1 + ps_merge11 f11,f7,f11 + psq_st f13,8(r3),0,0 + ps_sum0 f12,f12,f12,f0 + psq_st f11,24(r3),0,0 + psq_st f12,40(r3),0,0 blr _GLOBAL(ps_guVecAdd) //r3 = v1,r4 = v2,r5 = dst - psql V1_XY,0(r3),0,0 - psql V2_XY,0(r4),0,0 - psadd D1_XY,V1_XY,V2_XY - psqst D1_XY,0(r5),0,0 - psql V1_Z,8(r3),1,0 - psql V2_Z,8(r4),1,0 - psadd D1_Z,V1_Z,V2_Z - psqst D1_Z,8(r5),1,0 + psq_l V1_XY,0(r3),0,0 + psq_l V2_XY,0(r4),0,0 + ps_add D1_XY,V1_XY,V2_XY + psq_st D1_XY,0(r5),0,0 + psq_l V1_Z,8(r3),1,0 + psq_l V2_Z,8(r4),1,0 + ps_add D1_Z,V1_Z,V2_Z + psq_st D1_Z,8(r5),1,0 blr _GLOBAL(ps_guVecSub) //r3 = v1,r4 = v2,r5 = dst - psql V1_XY,0(r3),0,0 - psql V2_XY,0(r4),0,0 - pssub D1_XY,V1_XY,V2_XY - psqst D1_XY,0(r5),0,0 - psql V1_Z,8(r3),1,0 - psql V2_Z,8(r4),1,0 - pssub D1_Z,V1_Z,V2_Z - psqst D1_Z,8(r5),1,0 + psq_l V1_XY,0(r3),0,0 + psq_l V2_XY,0(r4),0,0 + ps_sub D1_XY,V1_XY,V2_XY + psq_st D1_XY,0(r5),0,0 + psq_l V1_Z,8(r3),1,0 + psq_l V2_Z,8(r4),1,0 + ps_sub D1_Z,V1_Z,V2_Z + psq_st D1_Z,8(r5),1,0 blr _GLOBAL(ps_guVecScale) //r3 = src,r4 = dst,f1 = S - psql f2,0(r3),0,0 - psql f3,8(r3),1,0 - psmuls0 f4,f2,f1 - psqst f4,0(r4),0,0 - psmuls0 f4,f3,f1 - psqst f4,8(r4),1,0 + psq_l f2,0(r3),0,0 + psq_l f3,8(r3),1,0 + ps_muls0 f4,f2,f1 + psq_st f4,0(r4),0,0 + ps_muls0 f4,f3,f1 + psq_st f4,8(r4),1,0 blr _GLOBAL(ps_guVecNormalize) @@ -465,100 +464,100 @@ addi r9,r9,NrmData@l lfs f0,0(r9) lfs f1,4(r9) - psql f2,0(r3),0,0 - psmul f4,f2,f2 - psql f3,8(r3),1,0 - psmadd f5,f3,f3,f4 - pssum0 f6,f5,f3,f4 + psq_l f2,0(r3),0,0 + ps_mul f4,f2,f2 + psq_l f3,8(r3),1,0 + ps_madd f5,f3,f3,f4 + ps_sum0 f6,f5,f3,f4 frsqrte f7,f6 fmuls f8,f7,f7 fmuls f9,f7,f0 fnmsubs f8,f8,f6,f1 fmuls f7,f8,f9 - psmuls0 f2,f2,f7 - psqst f2,0(r3),0,0 - psmuls0 f3,f3,f7 - psqst f3,8(r3),1,0 + ps_muls0 f2,f2,f7 + psq_st f2,0(r3),0,0 + ps_muls0 f3,f3,f7 + psq_st f3,8(r3),1,0 blr _GLOBAL(ps_guVecCross) //r3 = v1,r4 = v2,r5 = v12 - psql f1,0(r4),0,0 + psq_l f1,0(r4),0,0 lfs f2,8(r3) - psql f0,0(r3),0,0 - psmerge10 f6,f1,f1 + psq_l f0,0(r3),0,0 + ps_merge10 f6,f1,f1 lfs f3,8(r4) - psmul f4,f1,f2 - psmuls0 f7,f1,f0 - psmsub f5,f0,f3,f4 - psmsub f8,f0,f6,f7 - psmerge11 f9,f5,f5 - psmerge01 f10,f5,f8 - psqst f9,0(r5),1,0 - psneg f10,f10 - psqst f10,4(r5),0,0 + ps_mul f4,f1,f2 + ps_muls0 f7,f1,f0 + ps_msub f5,f0,f3,f4 + ps_msub f8,f0,f6,f7 + ps_merge11 f9,f5,f5 + ps_merge01 f10,f5,f8 + psq_st f9,0(r5),1,0 + ps_neg f10,f10 + psq_st f10,4(r5),0,0 blr _GLOBAL(ps_guVecDotProduct) //r3 = vec1,r4 = vec2 - psql f2,4(r3),0,0 - psql f3,4(r4),0,0 - psmul f2,f2,f3 - psql f5,0(r3),0,0 - psql f4,0(r4),0,0 - psmadd f3,f5,f4,f2 - pssum0 f1,f3,f2,f2 + psq_l f2,4(r3),0,0 + psq_l f3,4(r4),0,0 + ps_mul f2,f2,f3 + psq_l f5,0(r3),0,0 + psq_l f4,0(r4),0,0 + ps_madd f3,f5,f4,f2 + ps_sum0 f1,f3,f2,f2 blr _GLOBAL(ps_guVecMultiply) - psql f0,0(r4),0,0 - psql f2,0(r3),0,0 - psql f1,8(r4),1,0 - psmul f4,f2,f0 - psql f3,8(r3),0,0 - psmadd f5,f3,f1,f4 - psql f8,16(r3),0,0 - pssum0 f6,f5,f6,f5 - psql f9,24(r3),0,0 - psmul f10,f8,f0 - psqst f6,0(r5),1,0 - psmadd f11,f9,f1,f10 - psql f2,32(r3),0,0 - pssum0 f12,f11,f12,f11 - psql f3,40(r3),0,0 - psmul f4,f2,f0 - psqst f12,4(r5),1,0 - psmadd f5,f3,f1,f4 - pssum0 f6,f5,f6,f5 - psqst f6,8(r5),1,0 + psq_l f0,0(r4),0,0 + psq_l f2,0(r3),0,0 + psq_l f1,8(r4),1,0 + ps_mul f4,f2,f0 + psq_l f3,8(r3),0,0 + ps_madd f5,f3,f1,f4 + psq_l f8,16(r3),0,0 + ps_sum0 f6,f5,f6,f5 + psq_l f9,24(r3),0,0 + ps_mul f10,f8,f0 + psq_st f6,0(r5),1,0 + ps_madd f11,f9,f1,f10 + psq_l f2,32(r3),0,0 + ps_sum0 f12,f11,f12,f11 + psq_l f3,40(r3),0,0 + ps_mul f4,f2,f0 + psq_st f12,4(r5),1,0 + ps_madd f5,f3,f1,f4 + ps_sum0 f6,f5,f6,f5 + psq_st f6,8(r5),1,0 blr _GLOBAL(ps_guVecMultiplySR) // r3 = mt, r4 = src, r5 = dst - psql f0,0(r3),0,0 // m[0][0], m[0][1] GQR0 = 0 + psq_l f0,0(r3),0,0 // m[0][0], m[0][1] GQR0 = 0 // fp6 - x y - psql f6,0(r4),0,0 - psql f2,16(r3),0,0 // m[1][0], m[1][1] + psq_l f6,0(r4),0,0 + psq_l f2,16(r3),0,0 // m[1][0], m[1][1] // fp8 = m00x m01y // next X - psmul f8,f0,f6 - psql f4,32(r3),0,0 // m[2][0], m[2][1] + ps_mul f8,f0,f6 + psq_l f4,32(r3),0,0 // m[2][0], m[2][1] // fp10 = m10x m11y // next Y - psmul f10,f2,f6 - psql f7,8(r4),1,0 // fp7 - z,1.0 + ps_mul f10,f2,f6 + psq_l f7,8(r4),1,0 // fp7 - z,1.0 // fp12 = m20x m21y // next Z - psmul f12,f4,f6 // YYY last FP6 usage - psql f3,24(r3),0,0 // m[1][2], m[1][3] - pssum0 f8,f8,f8,f8 - psql f5,40(r3),0,0 // m[2][2], m[2][3] - pssum0 f10,f10,f10,f10 - psql f1,8(r3),0,0 // m[0][2], m[0][3] - pssum0 f12,f12,f12,f12 - psmadd f9,f1,f7,f8 - psqst f9,0(r5),1,0 // store X - psmadd f11,f3,f7,f10 - psqst f11,4(r5),1,0 // store Y - psmadd f13,f5,f7,f12 - psqst f13,8(r5),1,0 // sore Z + ps_mul f12,f4,f6 // YYY last FP6 usage + psq_l f3,24(r3),0,0 // m[1][2], m[1][3] + ps_sum0 f8,f8,f8,f8 + psq_l f5,40(r3),0,0 // m[2][2], m[2][3] + ps_sum0 f10,f10,f10,f10 + psq_l f1,8(r3),0,0 // m[0][2], m[0][3] + ps_sum0 f12,f12,f12,f12 + ps_madd f9,f1,f7,f8 + psq_st f9,0(r5),1,0 // store X + ps_madd f11,f3,f7,f10 + psq_st f11,4(r5),1,0 // store Y + ps_madd f13,f5,f7,f12 + psq_st f13,8(r5),1,0 // sore Z blr .section .data Index: gu.c =================================================================== RCS file: /cvsroot/gc-linux/libgx/src/gu.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- gu.c 31 Oct 2004 22:35:00 -0000 1.2 +++ gu.c 11 Apr 2006 18:25:03 -0000 1.3 @@ -244,32 +244,32 @@ ps_guVecNormalize(axis); __asm__ __volatile__( - "psql %%f1,0(%1),0,0\n" + "psq_l %%f1,0(%1),0,0\n" "lfs %%f2,8(%1)\n" - "psmerge00 %2,%2,%2\n" - "psmuls0 %%f5,%%f1,%3\n" - "psmuls0 %%f6,%%f2,%3\n" - "psmuls1 %%f4,%%f5,%%f1\n" - "psmuls0 %%f3,%%f5,%%f1\n" - "psmuls0 %%f1,%%f1,%4\n" - "psmuls0 %%f5,%%f5,%%f2\n" + "ps_merge00 %2,%2,%2\n" + "ps_muls0 %%f5,%%f1,%3\n" + "ps_muls0 %%f6,%%f2,%3\n" + "ps_muls1 %%f4,%%f5,%%f1\n" + "ps_muls0 %%f3,%%f5,%%f1\n" + "ps_muls0 %%f1,%%f1,%4\n" + "ps_muls0 %%f5,%%f5,%%f2\n" "fnmsubs %%f7,%%f2,%4,%%f4\n" "fmadds %%f8,%%f2,%4,%%f4\n" - "psneg %%f10,%%f1\n" - "pssum0 %%f9,%%f5,%5,%%f1\n" - "pssum0 %%f3,%%f3,%%f7,%2\n" - "pssum1 %%f4,%2,%%f8,%%f4\n" - "pssum0 %%f7,%%f10,%5,%%f5\n" - "pssum0 %%f10,%%f5,%%f5,%%f10\n" - "psqst %%f9,8(%0),0,0\n" - "psmuls0 %%f6,%%f6,%%f2\n" - "psqst %%f3,0(%0),0,0\n" - "pssum1 %%f5,%%f1,%%f10,%%f5\n" - "psqst %%f4,16(%0),0,0\n" - "pssum0 %%f6,%%f6,%5,%2\n" - "psqst %%f7,24(%0),0,0\n" - "psqst %%f5,32(%0),0,0\n" - "psqst %%f6,40(%0),0,0\n" + "ps_neg %%f10,%%f1\n" + "ps_sum0 %%f9,%%f5,%5,%%f1\n" + "ps_sum0 %%f3,%%f3,%%f7,%2\n" + "ps_sum1 %%f4,%2,%%f8,%%f4\n" + "ps_sum0 %%f7,%%f10,%5,%%f5\n" + "ps_sum0 %%f10,%%f5,%%f5,%%f10\n" + "psq_st %%f9,8(%0),0,0\n" + "ps_muls0 %%f6,%%f6,%%f2\n" + "psq_st %%f3,0(%0),0,0\n" + "ps_sum1 %%f5,%%f1,%%f10,%%f5\n" + "psq_st %%f4,16(%0),0,0\n" + "ps_sum0 %%f6,%%f6,%5,%2\n" + "psq_st %%f7,24(%0),0,0\n" + "psq_st %%f5,32(%0),0,0\n" + "psq_st %%f6,40(%0),0,0\n" : "=r"(mt) : "r"(axis), "f"(cT), "f"(tT), "f"(sT), "f"(fc0) ); } Index: ps_cast.S =================================================================== RCS file: /cvsroot/gc-linux/libgx/src/ps_cast.S,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- ps_cast.S 6 Dec 2004 18:35:15 -0000 1.1 +++ ps_cast.S 11 Apr 2006 18:25:03 -0000 1.2 @@ -3,12 +3,12 @@ #define DECLARES(prefix,float,int,gqr_arg,gqr) \ _GLOBAL(PSCast##prefix##float##to##int) \ - psql f0,0(r3),gqr_arg,gqr0; \ - psqst f0,0(r4),gqr_arg,gqr; \ + psq_l f0,0(r3),gqr_arg,gqr0; \ + psq_st f0,0(r4),gqr_arg,gqr; \ blr; \ _GLOBAL(PSCast##prefix##int##to##float) \ - psql f0,0(r3),gqr_arg,gqr; \ - psqst f0,0(r4),gqr_arg,gqr0; \ + psq_l f0,0(r3),gqr_arg,gqr; \ + psq_st f0,0(r4),gqr_arg,gqr0; \ blr; #define DECLARE(a,b,c) DECLARES(Single,a,b,1,c) DECLARES(Double,a,b,0,c) @@ -19,8 +19,8 @@ DECLARE(F32,S16,gqr5) _GLOBAL(PSCopy2Floats) - psql f0,0(r4),0,gqr0 - psqst f0,0(r3),0,gqr0 + psq_l f0,0(r4),0,gqr0 + psq_st f0,0(r3),0,gqr0 blr |