[Mplayerxp-cvslog] CVS: mplayerxp/codecs/mp3lib dct36_3dnow.c,NONE,1.1 decode_3dnow.c,NONE,1.1 decod
Brought to you by:
olov
From: Nick K. <nic...@us...> - 2002-11-23 19:09:59
|
Update of /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib In directory sc8-pr-cvs1:/tmp/cvs-serv8150 Modified Files: dct64_3dnow.c dct64_k7.c Makefile mpg123.h sr1.c test2.c test.c Added Files: dct36_3dnow.c decode_3dnow.c decode_3dnow.h Removed Files: dct36_3dnow.s dct36_k7.s dct64_sse.s decode_i586.c Log Message: Use standard decoder for 3dnow, enable MMX optimization for MMX-only cpus if no mp3 scaling specified --- NEW FILE: dct36_3dnow.c --- /* dct36_3dnow.s - 3DNow! optimized dct36() This code based 'dct36_3dnow.s' by Syuuhei Kashiyama <sq...@mb...>,only two types of changes have been made: - remove PREFETCH instruction for speedup - change function name for support 3DNow! automatic detect You can find Kashiyama's original 3dnow! support patch (for mpg123-0.59o) at http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). by KIMURA Takuhiro <ki...@ha...> - until 31.Mar.1999 <ki...@co...> - after 1.Apr.1999 Replacement of dct36() with AMD's 3DNow! SIMD operations support Syuuhei Kashiyama <sq...@mb...> The author of this program disclaim whole expressed or implied warranties with regard to this program, and in no event shall the author of this program liable to whatever resulted from the use of this program. Use it at your own risk. */ #include "mpg123.h" void dct36_3dnow(real *a,real *b,real *c,real *d,real *e) { __asm __volatile( " femms\n\t" " movq (%0),%%mm0\n\t" " movq 4(%0),%%mm1\n\t" " pfadd %%mm1,%%mm0\n\t" " movq %%mm0,4(%0)\n\t" " psrlq $32,%%mm1\n\t" " movq 12(%0),%%mm2\n\t" " punpckldq %%mm2,%%mm1\n\t" " pfadd %%mm2,%%mm1\n\t" " movq %%mm1,12(%0)\n\t" " psrlq $32,%%mm2\n\t" " movq 20(%0),%%mm3\n\t" " punpckldq %%mm3,%%mm2\n\t" " pfadd %%mm3,%%mm2\n\t" " movq %%mm2,20(%0)\n\t" " psrlq $32,%%mm3\n\t" " movq 28(%0),%%mm4\n\t" " punpckldq %%mm4,%%mm3\n\t" " pfadd %%mm4,%%mm3\n\t" " movq %%mm3,28(%0)\n\t" " psrlq $32,%%mm4\n\t" " movq 36(%0),%%mm5\n\t" " punpckldq %%mm5,%%mm4\n\t" " pfadd %%mm5,%%mm4\n\t" " movq %%mm4,36(%0)\n\t" " psrlq $32,%%mm5\n\t" " movq 44(%0),%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfadd %%mm6,%%mm5\n\t" " movq %%mm5,44(%0)\n\t" " psrlq $32,%%mm6\n\t" " movq 52(%0),%%mm7\n\t" " punpckldq %%mm7,%%mm6\n\t" " pfadd %%mm7,%%mm6\n\t" " movq %%mm6,52(%0)\n\t" " psrlq $32,%%mm7\n\t" " movq 60(%0),%%mm0\n\t" " punpckldq %%mm0,%%mm7\n\t" " pfadd %%mm0,%%mm7\n\t" " movq %%mm7,60(%0)\n\t" " psrlq $32,%%mm0\n\t" " movd 68(%0),%%mm1\n\t" " pfadd %%mm1,%%mm0\n\t" " movd %%mm0,68(%0)\n\t" " movd 4(%0),%%mm0\n\t" " movd 12(%0),%%mm1\n\t" " punpckldq %%mm1,%%mm0\n\t" " punpckldq 20(%0),%%mm1\n\t" " pfadd %%mm1,%%mm0\n\t" " movd %%mm0,12(%0)\n\t" " psrlq $32,%%mm0\n\t" " movd %%mm0,20(%0)\n\t" " psrlq $32,%%mm1\n\t" " movd 28(%0),%%mm2\n\t" " punpckldq %%mm2,%%mm1\n\t" " punpckldq 36(%0),%%mm2\n\t" " pfadd %%mm2,%%mm1\n\t" " movd %%mm1,28(%0)\n\t" " psrlq $32,%%mm1\n\t" " movd %%mm1,36(%0)\n\t" " psrlq $32,%%mm2\n\t" " movd 44(%0),%%mm3\n\t" " punpckldq %%mm3,%%mm2\n\t" " punpckldq 52(%0),%%mm3\n\t" " pfadd %%mm3,%%mm2\n\t" " movd %%mm2,44(%0)\n\t" " psrlq $32,%%mm2\n\t" " movd %%mm2,52(%0)\n\t" " psrlq $32,%%mm3\n\t" " movd 60(%0),%%mm4\n\t" " punpckldq %%mm4,%%mm3\n\t" " punpckldq 68(%0),%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " movd %%mm3,60(%0)\n\t" " psrlq $32,%%mm3\n\t" " movd %%mm3,68(%0)\n\t" " movq 24(%0),%%mm0\n\t" " movq 48(%0),%%mm1\n\t" " movd COS9+12,%%mm2\n\t" " punpckldq %%mm2,%%mm2\n\t" " movd COS9+24,%%mm3\n\t" " punpckldq %%mm3,%%mm3\n\t" " pfmul %%mm2,%%mm0\n\t" " pfmul %%mm3,%%mm1\n\t" " pushl %%eax\n\t" " movl $1,%%eax\n\t" " movd %%eax,%%mm7\n\t" " pi2fd %%mm7,%%mm7\n\t" " popl %%eax\n\t" " movq 8(%0),%%mm2\n\t" " movd COS9+4,%%mm3\n\t" " punpckldq %%mm3,%%mm3\n\t" " pfmul %%mm3,%%mm2\n\t" " pfadd %%mm0,%%mm2\n\t" " movq 40(%0),%%mm3\n\t" " movd COS9+20,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " pfadd %%mm3,%%mm2\n\t" " movq 56(%0),%%mm3\n\t" " movd COS9+28,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " pfadd %%mm3,%%mm2\n\t" " movq (%0),%%mm3\n\t" " movq 16(%0),%%mm4\n\t" " movd COS9+8,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " movq 32(%0),%%mm4\n\t" " movd COS9+16,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " pfadd %%mm1,%%mm3\n\t" " movq 64(%0),%%mm4\n\t" " movd COS9+32,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " movq %%mm2,%%mm4\n\t" " pfadd %%mm3,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+0,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 108(%3),%%mm6\n\t" " punpckldq 104(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,36(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,32(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 32(%3),%%mm6\n\t" " punpckldq 36(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 32(%1),%%mm6\n\t" " punpckldq 36(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,1024(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1152(%4)\n\t" " movq %%mm3,%%mm4\n\t" " pfsub %%mm2,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+32,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 140(%3),%%mm6\n\t" " punpckldq 72(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,68(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,0(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 0(%3),%%mm6\n\t" " punpckldq 68(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 0(%1),%%mm6\n\t" " punpckldq 68(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,0(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,2176(%4)\n\t" " movq 8(%0),%%mm2\n\t" " movq 40(%0),%%mm3\n\t" " pfsub %%mm3,%%mm2\n\t" " movq 56(%0),%%mm3\n\t" " pfsub %%mm3,%%mm2\n\t" " movd COS9+12,%%mm3\n\t" " punpckldq %%mm3,%%mm3\n\t" " pfmul %%mm3,%%mm2\n\t" " movq 16(%0),%%mm3\n\t" " movq 32(%0),%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " movq 64(%0),%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " movd COS9+24,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " movq 48(%0),%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " movq (%0),%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " movq %%mm2,%%mm4\n\t" " pfadd %%mm3,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+4,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 112(%3),%%mm6\n\t" " punpckldq 100(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,40(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,28(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 28(%3),%%mm6\n\t" " punpckldq 40(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 28(%1),%%mm6\n\t" " punpckldq 40(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,896(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1280(%4)\n\t" " movq %%mm3,%%mm4\n\t" " pfsub %%mm2,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+28,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 136(%3),%%mm6\n\t" " punpckldq 76(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,64(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,4(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 4(%3),%%mm6\n\t" " punpckldq 64(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 4(%1),%%mm6\n\t" " punpckldq 64(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,128(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,2048(%4)\n\t" " movq 8(%0),%%mm2\n\t" " movd COS9+20,%%mm3\n\t" " punpckldq %%mm3,%%mm3\n\t" " pfmul %%mm3,%%mm2\n\t" " pfsub %%mm0,%%mm2\n\t" " movq 40(%0),%%mm3\n\t" " movd COS9+28,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " pfsub %%mm3,%%mm2\n\t" " movq 56(%0),%%mm3\n\t" " movd COS9+4,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " pfadd %%mm3,%%mm2\n\t" " movq (%0),%%mm3\n\t" " movq 16(%0),%%mm4\n\t" " movd COS9+32,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " movq 32(%0),%%mm4\n\t" " movd COS9+8,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " pfadd %%mm1,%%mm3\n\t" " movq 64(%0),%%mm4\n\t" " movd COS9+16,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " movq %%mm2,%%mm4\n\t" " pfadd %%mm3,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+8,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 116(%3),%%mm6\n\t" " punpckldq 96(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,44(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,24(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 24(%3),%%mm6\n\t" " punpckldq 44(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 24(%1),%%mm6\n\t" " punpckldq 44(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,768(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1408(%4)\n\t" " movq %%mm3,%%mm4\n\t" " pfsub %%mm2,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+24,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 132(%3),%%mm6\n\t" " punpckldq 80(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,60(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,8(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 8(%3),%%mm6\n\t" " punpckldq 60(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 8(%1),%%mm6\n\t" " punpckldq 60(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,256(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1920(%4)\n\t" " movq 8(%0),%%mm2\n\t" " movd COS9+28,%%mm3\n\t" " punpckldq %%mm3,%%mm3\n\t" " pfmul %%mm3,%%mm2\n\t" " pfsub %%mm0,%%mm2\n\t" " movq 40(%0),%%mm3\n\t" " movd COS9+4,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " pfadd %%mm3,%%mm2\n\t" " movq 56(%0),%%mm3\n\t" " movd COS9+20,%%mm4\n\t" " punpckldq %%mm4,%%mm4\n\t" " pfmul %%mm4,%%mm3\n\t" " pfsub %%mm3,%%mm2\n\t" " movq (%0),%%mm3\n\t" " movq 16(%0),%%mm4\n\t" " movd COS9+16,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " movq 32(%0),%%mm4\n\t" " movd COS9+32,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfadd %%mm4,%%mm3\n\t" " pfadd %%mm1,%%mm3\n\t" " movq 64(%0),%%mm4\n\t" " movd COS9+8,%%mm5\n\t" " punpckldq %%mm5,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " pfsub %%mm4,%%mm3\n\t" " movq %%mm2,%%mm4\n\t" " pfadd %%mm3,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+12,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 120(%3),%%mm6\n\t" " punpckldq 92(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,48(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,20(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 20(%3),%%mm6\n\t" " punpckldq 48(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 20(%1),%%mm6\n\t" " punpckldq 48(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,640(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1536(%4)\n\t" " movq %%mm3,%%mm4\n\t" " pfsub %%mm2,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+20,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 128(%3),%%mm6\n\t" " punpckldq 84(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,56(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,12(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 12(%3),%%mm6\n\t" " punpckldq 56(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 12(%1),%%mm6\n\t" " punpckldq 56(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,384(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1792(%4)\n\t" " movq (%0),%%mm4\n\t" " movq 16(%0),%%mm3\n\t" " pfsub %%mm3,%%mm4\n\t" " movq 32(%0),%%mm3\n\t" " pfadd %%mm3,%%mm4\n\t" " movq 48(%0),%%mm3\n\t" " pfsub %%mm3,%%mm4\n\t" " movq 64(%0),%%mm3\n\t" " pfadd %%mm3,%%mm4\n\t" " movq %%mm7,%%mm5\n\t" " punpckldq tfcos36+16,%%mm5\n\t" " pfmul %%mm5,%%mm4\n\t" " movq %%mm4,%%mm5\n\t" " pfacc %%mm5,%%mm5\n\t" " movd 124(%3),%%mm6\n\t" " punpckldq 88(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd %%mm5,52(%2)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,16(%2)\n\t" " movq %%mm4,%%mm6\n\t" " punpckldq %%mm6,%%mm5\n\t" " pfsub %%mm6,%%mm5\n\t" " punpckhdq %%mm5,%%mm5\n\t" " movd 16(%3),%%mm6\n\t" " punpckldq 52(%3),%%mm6\n\t" " pfmul %%mm6,%%mm5\n\t" " movd 16(%1),%%mm6\n\t" " punpckldq 52(%1),%%mm6\n\t" " pfadd %%mm6,%%mm5\n\t" " movd %%mm5,512(%4)\n\t" " psrlq $32,%%mm5\n\t" " movd %%mm5,1664(%4)\n\t" " femms" : :"g"(a),"g"(b),"g"(c),"g"(d),"g"(e) :"memory"); } --- NEW FILE: decode_3dnow.c --- /* * Mpeg Layer-1,2,3 audio decoder * ------------------------------ * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved. * See also 'README' * * slighlty optimized for machines without autoincrement/decrement. * The performance is highly compiler dependend. Maybe * the decode.c version for 'normal' processor may be faster * even for Intel processors. */ #include "../config.h" #include "mpg123.h" #ifndef CAN_COMPILE_X86_ASM #ifdef ARCH_X86 #define CAN_COMPILE_X86_ASM #endif #endif #undef HAVE_3DNOW #undef HAVE_3DNOWEX #define HAVE_3DNOW #include "decode_3dnow.h" #define HAVE_3DNOWEX #include "decode_3dnow.h" --- NEW FILE: decode_3dnow.h --- /* * Mpeg Layer-1,2,3 audio decoder * ------------------------------ * copyright (c) 1995,1996,1997 by Michael Hipp, All rights reserved. * See also 'README' * * slighlty optimized for machines without autoincrement/decrement. * The performance is highly compiler dependend. Maybe * the decode.c version for 'normal' processor may be faster * even for Intel processors. */ #ifdef HAVE_3DNOWEX int synth_1to1_3dnowex(real *bandPtr,int channel,short *samples) #else int synth_1to1_3dnow(real *bandPtr,int channel,short *samples) #endif { static real buffs[2][2][0x110]; static const int step = 2; static int bo = 1; #if 0 short *samples = (short *) (out + *pnt); #endif real *b0,(*buf)[0x110]; int clip = 0; int bo1; if(!channel) { /* channel=0 */ bo--; bo &= 0xf; buf = buffs[0]; } else { samples++; buf = buffs[1]; } if(bo & 0x1) { b0 = buf[0]; bo1 = bo; #ifdef HAVE_3DNOWEX dct64_3dnowex(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); #else dct64_3dnow(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); #endif } else { b0 = buf[1]; bo1 = bo+1; #ifdef HAVE_3DNOWEX dct64_3dnowex(buf[0]+bo,buf[1]+bo+1,bandPtr); #else dct64_3dnow(buf[0]+bo,buf[1]+bo+1,bandPtr); #endif } { register int j; real _tmp[2]; register int res; real *window = decwin + 16 - bo1; __asm __volatile("femms":::"memory"); for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step) { __asm __volatile( "movq (%2), %%mm0\n\t" "movq (%3), %%mm1\n\t" "pfmul %%mm1, %%mm0\n\t" "movq 8(%2), %%mm2\n\t" "movq 8(%3), %%mm3\n\t" "pfmul %%mm3, %%mm2\n\t" "movq 16(%2), %%mm4\n\t" "movq 16(%3), %%mm5\n\t" "pfmul %%mm5, %%mm4\n\t" "movq 24(%2), %%mm6\n\t" "movq 24(%3), %%mm7\n\t" "pfmul %%mm7, %%mm6\n\t" "pfadd %%mm6, %%mm4\n\t" "pfadd %%mm2, %%mm0\n\t" "pfadd %%mm4, %%mm0\n\t" "movq %%mm0, (%1)\n\t" "movq 32(%2), %%mm0\n\t" "movq 32(%3), %%mm1\n\t" "pfmul %%mm1, %%mm0\n\t" "movq 40(%2), %%mm2\n\t" "movq 40(%3), %%mm3\n\t" "pfmul %%mm3, %%mm2\n\t" "movq 48(%2), %%mm4\n\t" "movq 48(%3), %%mm5\n\t" "pfmul %%mm5, %%mm4\n\t" "movq 56(%2), %%mm6\n\t" "movq 56(%3), %%mm7\n\t" "pfmul %%mm7, %%mm6\n\t" "pfadd %%mm6, %%mm4\n\t" "pfadd %%mm2, %%mm0\n\t" "pfadd %%mm4, %%mm0\n\t" "pfadd (%1), %%mm0\n\t" #ifdef HAVE_3DNOWEX "pfpnacc %%mm0, %%mm0\n\t" #else "movq %%mm0, %%mm1\n\t" "psrlq $32, %%mm1\n\t" "pfsub %%mm1, %%mm0\n\t" #endif "movd %4, %%mm6\n\t" "pfmul %%mm6, %%mm0\n\t" #ifdef HAVE_3DNOWEX "pf2iw %%mm0, %%mm0\n\t" #else "pf2id %%mm0, %%mm0\n\t" "packssdw %%mm0, %%mm0\n\t" #endif "movd %%mm0, %0" :"=r"(res) :"r"(_tmp), "r"(window), "r"(b0), "m"(mp3_scaler) :"memory"); *samples = res; } __asm __volatile( "movd (%1), %%mm0\n\t" "movd (%2), %%mm1\n\t" "punpckldq 8(%1), %%mm0\n\t" "punpckldq 8(%2), %%mm1\n\t" "pfmul %%mm1, %%mm0\n\t" "movd 16(%1), %%mm2\n\t" "movd 16(%2), %%mm3\n\t" "punpckldq 24(%1), %%mm2\n\t" "punpckldq 24(%2), %%mm3\n\t" "pfmul %%mm3, %%mm2\n\t" "movd 32(%1), %%mm4\n\t" "movd 32(%2), %%mm5\n\t" "punpckldq 40(%1), %%mm4\n\t" "punpckldq 40(%2), %%mm5\n\t" "pfmul %%mm5, %%mm4\n\t" "movd 48(%1), %%mm6\n\t" "movd 48(%2), %%mm7\n\t" "punpckldq 56(%1), %%mm6\n\t" "punpckldq 56(%2), %%mm7\n\t" "pfmul %%mm7, %%mm6\n\t" "pfadd %%mm2, %%mm0\n\t" "movd %3, %%mm7\n\t" "pfadd %%mm6, %%mm4\n\t" "pfadd %%mm4, %%mm0\n\t" "pfacc %%mm0, %%mm0\n\t" "pfmul %%mm7, %%mm0\n\t" #ifdef HAVE_3DNOWEX "pf2iw %%mm0, %%mm0\n\t" #else "pf2id %%mm0, %%mm0\n\t" "packssdw %%mm0,%%mm0\n\t" #endif "movd %%mm0, %0\n\t" :"=r"(res) :"g"(window),"g"(b0),"m"(mp3_scaler) ); *samples = res; b0-=0x10,window-=0x20,samples+=step; window += bo1<<1; for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step) { __asm __volatile( #ifdef HAVE_3DNOWEX "pswapd -8(%2), %%mm0\n\t" "movq (%3), %%mm1\n\t" #else "movd -4(%2), %%mm0\n\t" "movq (%3), %%mm1\n\t" "punpckldq -8(%2), %%mm0\n\t" #endif "pfmul %%mm1, %%mm0\n\t" #ifdef HAVE_3DNOWEX "pswapd -16(%2),%%mm2\n\t" "movq 8(%3), %%mm3\n\t" #else "movd -12(%2),%%mm2\n\t" "movq 8(%3), %%mm3\n\t" "punpckldq -16(%2),%%mm2\n\t" #endif "pfmul %%mm3, %%mm2\n\t" #ifdef HAVE_3DNOWEX "pswapd -24(%2),%%mm4\n\t" "movq 16(%3), %%mm5\n\t" #else "movd -20(%2),%%mm4\n\t" "movq 16(%3), %%mm5\n\t" "punpckldq -24(%2),%%mm4\n\t" #endif "pfmul %%mm5, %%mm4\n\t" #ifdef HAVE_3DNOWEX "pswapd -32(%2),%%mm6\n\t" "movq 24(%3), %%mm7\n\t" #else "movd -28(%2),%%mm6\n\t" "movq 24(%3), %%mm7\n\t" "punpckldq -32(%2),%%mm6\n\t" #endif "pfmul %%mm7, %%mm6\n\t" "pfadd %%mm2, %%mm0\n\t" "pfadd %%mm6, %%mm4\n\t" "pfadd %%mm4, %%mm0\n\t" "movq %%mm0, (%1)\n\t" #ifdef HAVE_3DNOWEX "pswapd -40(%2),%%mm0\n\t" "movq 32(%3), %%mm1\n\t" #else "movd -36(%2),%%mm0\n\t" "movq 32(%3), %%mm1\n\t" "punpckldq -40(%2), %%mm0\n\t" #endif "pfmul %%mm1, %%mm0\n\t" #ifdef HAVE_3DNOWEX "pswapd -48(%2),%%mm2\n\t" "movq 40(%3), %%mm3\n\t" #else "movd -44(%2),%%mm2\n\t" "movq 40(%3), %%mm3\n\t" "punpckldq -48(%2),%%mm2\n\t" #endif "pfmul %%mm3, %%mm2\n\t" #ifdef HAVE_3DNOWEX "pswapd -56(%2),%%mm4\n\t" "movq 48(%3), %%mm5\n\t" #else "movd -52(%2),%%mm4\n\t" "movq 48(%3), %%mm5\n\t" "punpckldq -56(%2),%%mm4\n\t" #endif "pfmul %%mm5, %%mm4\n\t" "movd -60(%2),%%mm6\n\t" "movq 56(%3), %%mm7\n\t" "punpckldq (%2),%%mm6\n\t" "pfmul %%mm7, %%mm6\n\t" "pfadd %%mm2, %%mm0\n\t" "pfadd %%mm6, %%mm4\n\t" "pfadd %%mm4, %%mm0\n\t" "pfadd (%1), %%mm0\n\t" "pxor %%mm7, %%mm7\n\t" "pfsub %%mm0, %%mm7\n\t" "movd %4, %%mm6\n\t" "pfacc %%mm7, %%mm7\n\t" "pfmul %%mm6, %%mm7\n\t" #ifdef HAVE_3DNOWEX "pf2iw %%mm7, %%mm7\n\t" #else "pf2id %%mm7, %%mm7\n\t" "packssdw %%mm7,%%mm7\n\t" #endif "movd %%mm7, %0" :"=r"(res) :"r"(_tmp), "r"(window), "r"(b0), "m"(mp3_scaler) :"memory"); *samples=res; } } __asm __volatile("femms":::"memory"); #if 0 *pnt += 128; #endif return clip; } Index: dct64_3dnow.c =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/dct64_3dnow.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -d -r1.1.1.1 -r1.2 --- dct64_3dnow.c 23 Mar 2002 11:09:19 -0000 1.1.1.1 +++ dct64_3dnow.c 23 Nov 2002 19:09:56 -0000 1.2 @@ -1,914 +1,692 @@ /* -* This code was taken from http://www.mpg123.org -* See ChangeLog of mpg123-0.59s-pre.1 for detail -* Applied to mplayer by Nick Kurshev <nic...@ma...> -* Partial 3dnow! optimization by Nick Kurshev -* -* TODO: optimize scalar 3dnow! code -* Warning: Phases 7 & 8 are not tested -*/ -#define real float /* ugly - but only way */ - [...1554 lines suppressed...] -" femms\n\t" - : - :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) - :"memory","%ebx","%esi","%edi"); +" movd 100(%0),%%mm5\n\t" +" pfadd %%mm5,%%mm4\n\t" +" movq %%mm4,%%mm6\n\t" +" pfadd %%mm1,%%mm6\n\t" +" movd %%mm6,192(%1)\n\t" +" psrlq $32,%%mm6\n\t" +" movd %%mm6,832(%2)\n\t" +" movd 68(%0),%%mm1\n\t" +" pfadd %%mm1,%%mm4\n\t" +" movd %%mm4,64(%1)\n\t" +" psrlq $32,%%mm4\n\t" +" movd %%mm4,960(%2)" + ::"r"(&tmp[0]),"r"(a), "r"(b), "r"(c), "r"(&tmp[128]) + :"memory","%eax"); + return; } Index: dct64_k7.c =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/dct64_k7.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -d -r1.3 -r1.4 --- dct64_k7.c 14 Nov 2002 18:49:28 -0000 1.3 +++ dct64_k7.c 23 Nov 2002 19:09:56 -0000 1.4 @@ -1,760 +1,655 @@ /* -* This code was taken from http://www.mpg123.org -* See ChangeLog of mpg123-0.59s-pre.1 for detail -* Applied to mplayer by Nick Kurshev <nic...@ma...> -* Partial 3dnowex-DSP! optimization by Nick Kurshev -* -* TODO: optimize scalar 3dnow! code -* Warning: Phases 7 & 8 are not tested -*/ -#define real float /* ugly - but only way */ - [...1353 lines suppressed...] - :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) - :"memory","%ebx","%esi","%edi"); -} \ No newline at end of file +" movd 100(%0),%%mm5\n\t" +" pfadd %%mm5,%%mm4\n\t" +" movq %%mm4,%%mm6\n\t" +" pfadd %%mm1,%%mm6\n\t" +" movd %%mm6,192(%1)\n\t" +" psrlq $32,%%mm6\n\t" +" movd %%mm6,832(%2)\n\t" +" movd 68(%0),%%mm1\n\t" +" pfadd %%mm1,%%mm4\n\t" +" movd %%mm4,64(%1)\n\t" +" psrlq $32,%%mm4\n\t" +" movd %%mm4,960(%2)" + ::"r"(&tmp[0]),"r"(a), "r"(b), "r"(c), "r"(&tmp[128]) + :"memory","%eax"); + return; +} Index: Makefile =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/Makefile,v retrieving revision 1.6 retrieving revision 1.7 diff -u -d -r1.6 -r1.7 --- Makefile 1 Sep 2002 12:41:40 -0000 1.6 +++ Makefile 23 Nov 2002 19:09:56 -0000 1.7 @@ -15,18 +15,12 @@ CFLAGS += -fPIC endif ifeq ($(TARGET_ARCH_X86),yes) -SRCS += decode_i586.c -OBJS += decode_i586.o SRCS += decode_MMX.c dct64_MMX.c tabinit_MMX.c OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o -#ifeq ($(TARGET_SSE),yes) -#SRCS += dct64_sse.s -#OBJS += dct64_sse.o -#endif -SRCS += dct36_3dnow.s dct64_3dnow.c -OBJS += dct36_3dnow.o dct64_3dnow.o -SRCS += dct36_k7.c dct64_k7.c -OBJS += dct36_k7.o dct64_k7.o +SRCS += dct36_3dnow.c dct64_3dnow.c decode_3dnow.c +OBJS += dct36_3dnow.o dct64_3dnow.o decode_3dnow.o +SRCS += dct64_k7.c +OBJS += dct64_k7.o endif .SUFFIXES: .c .o Index: mpg123.h =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/mpg123.h,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -d -r1.1.1.1 -r1.2 --- mpg123.h 23 Mar 2002 11:09:19 -0000 1.1.1.1 +++ mpg123.h 23 Nov 2002 19:09:56 -0000 1.2 @@ -16,8 +16,8 @@ #undef MPG123_REMOTE /* Get rid of this stuff for Win32 */ +typedef float real; /* -# define real float # define real long double # define real double #include "audio.h" @@ -127,3 +127,5 @@ typedef int (*synth_func_t)( real *,int,short * ); typedef void (*dct36_func_t)(real *,real *,real *,real *,real *); + +extern real mp3_scaler; Index: sr1.c =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/sr1.c,v retrieving revision 1.4 retrieving revision 1.5 diff -u -d -r1.4 -r1.5 --- sr1.c 20 Nov 2002 09:23:54 -0000 1.4 +++ sr1.c 23 Nov 2002 19:09:56 -0000 1.5 @@ -30,7 +30,7 @@ #ifdef ARCH_X86 #define CAN_COMPILE_X86_ASM #endif -#undef CAN_COMPILE_X86_ASM + //static FILE* mp3_file=NULL; int MP3_frames=0; @@ -46,7 +46,7 @@ int MP3_bps=2; static long outscale = 32768; -static float mp3_scaler=1.; +float mp3_scaler=1.; #include "tabinit.c" #if 1 @@ -370,6 +370,8 @@ static int tables_done_flag=0; /* It's hidden from gcc in assembler */ +extern int synth_1to1_3dnow(real *bandPtr,int channel,short *samples); +extern int synth_1to1_3dnowex(real *bandPtr,int channel,short *samples); extern void __attribute__((__stdcall__)) dct64_MMX(real *, real *, real *); extern void __attribute__((__stdcall__)) dct64_MMX_3dnow(real *, real *, real *); extern void __attribute__((__stdcall__)) dct64_MMX_3dnowex(real *, real *, real *); @@ -378,6 +380,9 @@ // Init decoder tables. Call first, once! void MP3_Init(int fakemono,unsigned accel,int (*audio_read)(char *buf,int size),const char *param){ +#ifdef CAN_COMPILE_X86_ASM + int use_mmx; +#endif mplayer_audio_read = audio_read; if(param) { @@ -385,17 +390,22 @@ } printf("mp3lib: using scaler=%f\n",mp3_scaler); #ifdef CAN_COMPILE_X86_ASM - if (accel & MM_ACCEL_X86_MMX) + use_mmx = accel & MM_ACCEL_X86_MMX && + /*!(accel & MM_ACCEL_X86_SSE) &&*/ + !(accel & MM_ACCEL_X86_3DNOW) && + mp3_scaler == 1.; + if (use_mmx) { _has_mmx = 1; make_decode_tables_MMX(outscale); printf("mp3lib: made decode tables with MMX optimization\n"); } else - make_decode_tables(outscale); -#else - make_decode_tables(outscale); #endif + { + printf("mp3lib: made common decode tables\n"); + make_decode_tables(outscale); + } if (fakemono == 1) fr.synth = synth_1to1_l; @@ -426,21 +436,19 @@ #endif if (accel & MM_ACCEL_X86_3DNOWEXT) { - synth_func=synth_1to1_MMX; - dct36_func=dct36_3dnowex; - dct64_MMX_func=dct64_MMX_3dnowex; + synth_func=synth_1to1_3dnowex; + dct36_func=dct36_3dnow; printf("mp3lib: using 3DNow!Ex optimized decore!\n"); } else if (accel & MM_ACCEL_X86_3DNOW) { - synth_func = synth_1to1_MMX; + synth_func = synth_1to1_3dnow; dct36_func = dct36_3dnow; - dct64_MMX_func = dct64_MMX_3dnow; printf("mp3lib: using 3DNow! optimized decore!\n"); } else - if (accel & MM_ACCEL_X86_MMX) + if (use_mmx) { synth_func = synth_1to1_MMX; dct64_MMX_func = dct64_MMX; Index: test2.c =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/test2.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- test2.c 1 Sep 2002 12:41:40 -0000 1.2 +++ test2.c 23 Nov 2002 19:09:56 -0000 1.3 @@ -32,7 +32,7 @@ if(!mp3file){ printf("file not found\n"); exit(1); } // MPEG Audio: - MP3_Init(0,0,mplayer_audio_read); + MP3_Init(0,0,mplayer_audio_read,"scaler=0.2"); MP3_samplerate=MP3_channels=0; len=MP3_DecodeFrame(buffer,-1); Index: test.c =================================================================== RCS file: /cvsroot/mplayerxp/mplayerxp/codecs/mp3lib/test.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- test.c 1 Sep 2002 12:41:40 -0000 1.2 +++ test.c 23 Nov 2002 19:09:56 -0000 1.3 @@ -46,7 +46,7 @@ if(!mp3file){ printf("file not found\n"); exit(1); } // MPEG Audio: - MP3_Init(0,MP3_ACCEL,mplayer_audio_read); + MP3_Init(0,MP3_ACCEL,mplayer_audio_read,"scaler=0.1"); MP3_samplerate=MP3_channels=0; time1=GetTimer(); --- dct36_3dnow.s DELETED --- --- dct36_k7.s DELETED --- --- dct64_sse.s DELETED --- --- decode_i586.c DELETED --- |