You can subscribe to this list here.
2008 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
(13) |
Dec
(10) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2009 |
Jan
(5) |
Feb
(14) |
Mar
(9) |
Apr
(9) |
May
(18) |
Jun
(13) |
Jul
(5) |
Aug
(5) |
Sep
(17) |
Oct
(8) |
Nov
(12) |
Dec
(24) |
2010 |
Jan
(5) |
Feb
(2) |
Mar
(3) |
Apr
(6) |
May
(6) |
Jun
(5) |
Jul
(3) |
Aug
(16) |
Sep
(7) |
Oct
|
Nov
|
Dec
(6) |
2011 |
Jan
|
Feb
|
Mar
|
Apr
(12) |
May
(8) |
Jun
(26) |
Jul
(32) |
Aug
|
Sep
(2) |
Oct
|
Nov
|
Dec
|
From: <de...@us...> - 2011-09-07 15:38:19
|
Revision: 358 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=358&view=rev Author: dececco Date: 2011-09-07 15:38:13 +0000 (Wed, 07 Sep 2011) Log Message: ----------- bugfixing Modified Paths: -------------- trunk/core/fts/lang/dsp/dsp.h trunk/core/fts/lang/dsp/dspgraph.c trunk/core/fts/lang/mess/classes.c trunk/packages/control/fts/bus.c Modified: trunk/core/fts/lang/dsp/dsp.h =================================================================== --- trunk/core/fts/lang/dsp/dsp.h 2011-09-06 09:45:35 UTC (rev 357) +++ trunk/core/fts/lang/dsp/dsp.h 2011-09-07 15:38:13 UTC (rev 358) @@ -43,7 +43,7 @@ typedef struct { int ninputs, noutputs; - dsp_signal **in, **out; + dsp_signal **ins, **outs; } fts_dsp_descr_t; /* Macro to access the input and output characteristics @@ -55,20 +55,20 @@ /* get input properties */ #define fts_dsp_get_ninputs(DESC) ((DESC)->ninputs) -#define fts_dsp_get_input_name(DESC, IN) ((DESC)->in[(IN)]->name) -#define fts_dsp_get_input_size(DESC, IN) ((DESC)->in[(IN)]->length) -#define fts_dsp_get_input_srate(DESC, IN) ((DESC)->in[(IN)]->srate) +#define fts_dsp_get_input_name(DESC, IN) ((DESC)->ins[(IN)]->name) +#define fts_dsp_get_input_size(DESC, IN) ((DESC)->ins[(IN)]->length) +#define fts_dsp_get_input_srate(DESC, IN) ((DESC)->ins[(IN)]->srate) /* test for the null input special case */ -#define fts_dsp_is_input_null(DESC, IN) ((DESC)->in[(IN)]->id == 0) +#define fts_dsp_is_input_null(DESC, IN) ((DESC)->ins[(IN)]->id == 0) /* get output properties */ #define fts_dsp_get_noutputs(DESC) ((DESC)->noutputs) -#define fts_dsp_get_output_name(DESC, OUT) ((DESC)->out[(OUT)]->name) -#define fts_dsp_get_output_size(DESC, OUT) ((DESC)->out[(OUT)]->length) -#define fts_dsp_get_output_srate(DESC, OUT) ((DESC)->out[(OUT)]->srate) +#define fts_dsp_get_output_name(DESC, OUT) ((DESC)->outs[(OUT)]->name) +#define fts_dsp_get_output_size(DESC, OUT) ((DESC)->outs[(OUT)]->length) +#define fts_dsp_get_output_srate(DESC, OUT) ((DESC)->outs[(OUT)]->srate) /* End of macros */ Modified: trunk/core/fts/lang/dsp/dspgraph.c =================================================================== --- trunk/core/fts/lang/dsp/dspgraph.c 2011-09-06 09:45:35 UTC (rev 357) +++ trunk/core/fts/lang/dsp/dspgraph.c 2011-09-07 15:38:13 UTC (rev 358) @@ -147,7 +147,7 @@ { static char buffer[256]; - append_sigs( buffer, descr->in, descr->ninputs); + append_sigs( buffer, descr->ins, descr->ninputs); return buffer; } @@ -156,7 +156,7 @@ { static char buffer[256]; - append_sigs( buffer, descr->out, descr->noutputs); + append_sigs( buffer, descr->outs, descr->noutputs); return buffer; } @@ -225,7 +225,7 @@ If both are present, we take down. */ - for (i = 0, iop = descr->in; i < descr->ninputs; i++, iop++) + for (i = 0, iop = descr->ins; i < descr->ninputs; i++, iop++) if (*iop != sig_zero) { if (invs < 0) @@ -266,7 +266,7 @@ /* Output signals are assigned only when the output have at least one connection to a dsp object. */ - for (i = 0, iop = descr->out; i< descr->noutputs; i++, iop++) + for (i = 0, iop = descr->outs; i< descr->noutputs; i++, iop++) { /* TO BE CHANGED USING Inlet and outlet properties !!! */ *iop = Sig_new(size, globalvs); @@ -296,10 +296,10 @@ if ( node->descr->ninputs) { - node->descr->in = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * node->descr->ninputs); + node->descr->ins = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * node->descr->ninputs); } node->descr->noutputs = dsp_output_get(node->o->cl, fts_object_get_outlets_number(node->o)); - node->descr->out = 0; /* safe initialization */ + node->descr->outs = 0; /* safe initialization */ } /* For IRIX 6.2 MipsPro 7.x, we don't reuse signals between in and @@ -308,7 +308,7 @@ pointer aliasing. */ - for (i = 0, sig = node->descr->in; i < node->descr->ninputs; i++, sig++) + for (i = 0, sig = node->descr->ins; i < node->descr->ninputs; i++, sig++) if (*sig) { if (*sig != sig_zero) @@ -319,7 +319,7 @@ if (node->descr->noutputs) { - node->descr->out = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * node->descr->noutputs); + node->descr->outs = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * node->descr->noutputs); } /* Now that dsp_gen_outputs compute the downsampling using object @@ -351,16 +351,16 @@ the reference count of all the outputs signals. This happens in case of dsp outlets which are not connected */ - for (i = 0, sig = node->descr->out; i < node->descr->noutputs; i++, sig++) + for (i = 0, sig = node->descr->outs; i < node->descr->noutputs; i++, sig++) if ( *sig) if ( (*sig)->refcnt == 0) Sig_free( *sig); if (node->descr->ninputs) - fts_block_free((char *) node->descr->in, sizeof(dsp_signal *) * node->descr->ninputs); + fts_block_free((char *) node->descr->ins, sizeof(dsp_signal *) * node->descr->ninputs); if (node->descr->noutputs) - fts_block_free((char *) node->descr->out, sizeof(dsp_signal *) * node->descr->noutputs); + fts_block_free((char *) node->descr->outs, sizeof(dsp_signal *) * node->descr->noutputs); fts_heap_free((char *)node->descr, dsp_descr_heap); node->descr = 0; @@ -381,7 +381,7 @@ static void *zero = 0; if ( node->descr) - sig = node->descr->out; + sig = node->descr->outs; else sig = (dsp_signal **)(&zero); @@ -438,8 +438,8 @@ dest->descr = (fts_dsp_descr_t *)fts_heap_zalloc(dsp_descr_heap); dest->descr->ninputs = ninputs; dest->descr->noutputs = dsp_output_get(dest->o->cl, fts_object_get_outlets_number(dest->o)); - dest->descr->in = 0; - dest->descr->out = 0; + dest->descr->ins = 0; + dest->descr->outs = 0; } if (ninputs) @@ -448,13 +448,13 @@ dsp_signal *previous_sig; #if 0 - if (! dest->descr->in) - dest->descr->in = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * ninputs); + if (! dest->descr->ins) + dest->descr->ins = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * ninputs); #else - if (! dest->descr->in) + if (! dest->descr->ins) { /* (fd) to avoid writing past the end of the dsp_descr... */ - dest->descr->in = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * fts_object_get_inlets_number(dest->o)); + dest->descr->ins = (dsp_signal **)fts_block_zalloc(sizeof(dsp_signal *) * fts_object_get_inlets_number(dest->o)); } #endif @@ -467,12 +467,12 @@ } #endif - previous_sig = dest->descr->in[nin]; + previous_sig = dest->descr->ins[nin]; if ((! previous_sig) || (previous_sig == sig_zero)) { if (sig) Sig_reference(sig); - dest->descr->in[nin] = sig; + dest->descr->ins[nin] = sig; } else { @@ -749,8 +749,8 @@ if (node->descr) { - fts_block_free((char *) node->descr->in, sizeof(dsp_signal *) * node->descr->ninputs); - fts_block_free((char *) node->descr->out, sizeof(dsp_signal *) * node->descr->noutputs); + fts_block_free((char *) node->descr->ins, sizeof(dsp_signal *) * node->descr->ninputs); + fts_block_free((char *) node->descr->outs, sizeof(dsp_signal *) * node->descr->noutputs); fts_heap_free((char *)node->descr, dsp_descr_heap); } Modified: trunk/core/fts/lang/mess/classes.c =================================================================== --- trunk/core/fts/lang/mess/classes.c 2011-09-06 09:45:35 UTC (rev 357) +++ trunk/core/fts/lang/mess/classes.c 2011-09-07 15:38:13 UTC (rev 358) @@ -1076,13 +1076,13 @@ static fts_class_mess_t * -fts_class_mess_exists(fts_inlet_decl_t *in, fts_class_mess_t *msg) +fts_class_mess_exists(fts_inlet_decl_t *inlet, fts_class_mess_t *msg) { fts_class_mess_t **mess; fts_symbol_t s = msg->tmess.symb; - int n, nmess = in->nmess; + int n, nmess = inlet->nmess; - for (mess = in->messlist, n = 0; n < nmess; mess++, n++) + for (mess = inlet->messlist, n = 0; n < nmess; mess++, n++) if ((*mess)->tmess.symb == s) return *mess; Modified: trunk/packages/control/fts/bus.c =================================================================== --- trunk/packages/control/fts/bus.c 2011-09-06 09:45:35 UTC (rev 357) +++ trunk/packages/control/fts/bus.c 2011-09-07 15:38:13 UTC (rev 358) @@ -65,17 +65,17 @@ static void fts_bus_free(fts_bus_t *bus) { int i; - fts_bus_input_t *in; + fts_bus_input_t *input; fts_bus_output_t *out; for (i = 0; i < bus->nch; i++) { - for (in = bus->inputs[i]; in;) + for (input = bus->inputs[i]; input;) { fts_bus_input_t *p; - p = in; - in = in->next; + p = input; + input = input->next; fts_heap_free(p, bus_input_heap); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-09-06 09:45:41
|
Revision: 357 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=357&view=rev Author: dececco Date: 2011-09-06 09:45:35 +0000 (Tue, 06 Sep 2011) Log Message: ----------- fixed linux 64 makefile to not use libporttime anymore Modified Paths: -------------- trunk/core/Makefiles/Makefile.Linux.amd64 Modified: trunk/core/Makefiles/Makefile.Linux.amd64 =================================================================== --- trunk/core/Makefiles/Makefile.Linux.amd64 2011-07-26 10:05:58 UTC (rev 356) +++ trunk/core/Makefiles/Makefile.Linux.amd64 2011-09-06 09:45:35 UTC (rev 357) @@ -57,7 +57,8 @@ CC_NONE_FLAG := --Wl,--no-whole-archive POST_CC := echo Compiled -FTS_SYS_LIBS = -lobjc -lm -ldl -laudiofile -lpthread -lportaudio -lportmidi -lporttime +FTS_SYS_LIBS = -lobjc -lm -ldl -laudiofile -lpthread -lportaudio -lportmidi +#FTS_SYS_LIBS = -lobjc -lm -ldl -laudiofile -lpthread -lportaudio -lportmidi -lporttime FTS_ARCH_LIBS = #Plaform specific linker rules This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 10:06:04
|
Revision: 356 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=356&view=rev Author: dececco Date: 2011-07-26 10:05:58 +0000 (Tue, 26 Jul 2011) Log Message: ----------- added mac loop unroll results Modified Paths: -------------- trunk/extra/tests/doTestMac.sh Added Paths: ----------- trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.unroll-loops.txt Modified: trunk/extra/tests/doTestMac.sh =================================================================== --- trunk/extra/tests/doTestMac.sh 2011-07-26 09:56:42 UTC (rev 355) +++ trunk/extra/tests/doTestMac.sh 2011-07-26 10:05:58 UTC (rev 356) @@ -2,7 +2,7 @@ # Without vector extension, gcc, gcc-llvm and clang -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc -DGCC" CCDESCR="gcc llvm 4.2" @@ -13,7 +13,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc-4.2 -DGCC" CCDESCR="gcc 4.2" @@ -23,7 +23,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="clang -DGCC" CCDESCR="clang" @@ -35,7 +35,7 @@ # With Vector Extensions, size 8 -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc -DGCC -DHAS_VECT8_EXT" CCDESCR="gcc llvm 4.2 vector extensions, size 8" @@ -45,7 +45,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc-4.2 -DGCC -DHAS_VECT8_EXT" CCDESCR="gcc 4.2 vector extensions, size 8" @@ -55,7 +55,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="clang -DGCC -DHAS_VECT8_EXT" CCDESCR="clang vector extensions, size 8" @@ -68,7 +68,7 @@ # With Vector Extensions, size 4 -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc llvm 4.2 vector extensions, size 4" @@ -78,7 +78,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc 4.2 vector extensions, size 4" @@ -88,7 +88,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="clang -DGCC -DHAS_VECT4_EXT" CCDESCR="clang vector extensions, size 4" @@ -100,7 +100,7 @@ # gcc only, ss2 and ss3 -FLAGS="-O3 -march=core2 -msse2 -ffast-math" +FLAGS="-O3 -funroll-loops -march=core2 -msse2 -ffast-math" CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc 4.2 vector extensions, size 4" @@ -110,7 +110,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2 -msse3 -ffast-math" +FLAGS="-O3 -funroll-loops -march=core2 -msse3 -ffast-math" CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc 4.2 vector extensions, size 4" Added: trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.unroll-loops.txt =================================================================== --- trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.unroll-loops.txt (rev 0) +++ trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.unroll-loops.txt 2011-07-26 10:05:58 UTC (rev 356) @@ -0,0 +1,89 @@ +gcc -DGCC -O3 -funroll-loops -march=core2 vectors.c -o vectors +Compiler gcc llvm 4.2 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m35.256s +user 0m35.252s +sys 0m0.004s + +Compiler gcc 4.2 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m24.978s +user 0m24.972s +sys 0m0.004s + +Compiler clang +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m35.311s +user 0m35.306s +sys 0m0.006s + +Compiler gcc llvm 4.2 vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m8.470s +user 0m8.466s +sys 0m0.002s + +Compiler gcc 4.2 vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 1m12.208s +user 1m12.201s +sys 0m0.009s + +Compiler clang vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m8.472s +user 0m8.466s +sys 0m0.002s + +Compiler gcc llvm 4.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m11.183s +user 0m11.181s +sys 0m0.002s + +Compiler gcc 4.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m6.263s +user 0m6.262s +sys 0m0.001s + +Compiler clang vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m11.175s +user 0m11.172s +sys 0m0.002s + +Compiler gcc 4.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 -msse2 -ffast-math +Result 2097152.000000 + +real 0m6.290s +user 0m6.289s +sys 0m0.002s + +Compiler gcc 4.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 -msse3 -ffast-math +Result 2097152.000000 + +real 0m6.288s +user 0m6.286s +sys 0m0.002s + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 09:56:49
|
Revision: 355 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=355&view=rev Author: dececco Date: 2011-07-26 09:56:42 +0000 (Tue, 26 Jul 2011) Log Message: ----------- Added Mac assembler Added Paths: ----------- trunk/extra/tests/doAsmMac.sh trunk/extra/tests/mac-os-x/ trunk/extra/tests/mac-os-x/vectors.clang.s trunk/extra/tests/mac-os-x/vectors.clang.vect4.s trunk/extra/tests/mac-os-x/vectors.clang.vect8.s trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.s trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect4.s trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect8.s trunk/extra/tests/mac-os-x/vectors.gcc.4.2.s trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect4.s trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect8.s Added: trunk/extra/tests/doAsmMac.sh =================================================================== --- trunk/extra/tests/doAsmMac.sh (rev 0) +++ trunk/extra/tests/doAsmMac.sh 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,67 @@ +#!/bin/bash + +# create directory for assembler + +/bin/rm -rf $1 +mkdir -p $1 + +# Without vector extension, gcc, gcc-llvm and clang + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC" +CCDESCR="gcc-llvm.4.2" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -march=core2" +CC="gcc-4.2 -DGCC" +CCDESCR="gcc.4.2" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -march=core2" +CC="clang -DGCC" +CCDESCR="clang" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +# With Vector Extensions, size 8 + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC -DHAS_VECT8_EXT" +CCDESCR="gcc-llvm.4.2.vect8" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -march=core2" +CC="gcc-4.2 -DGCC -DHAS_VECT8_EXT" +CCDESCR="gcc.4.2.vect8" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT8_EXT" +CCDESCR="clang.vect8" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +# With Vector Extensions, size 4 + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc-llvm.4.2.vect4" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -march=core2" +CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc.4.2.vect4" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT4_EXT" +CCDESCR="clang.vect4" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + Property changes on: trunk/extra/tests/doAsmMac.sh ___________________________________________________________________ Added: svn:executable + * Added: trunk/extra/tests/mac-os-x/vectors.clang.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.clang.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.clang.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,161 @@ + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__literal8,8byte_literals + .align 3 +LCPI0_0: + .quad 4613937818241073152 ## double 3.000000e+00 +LCPI0_1: + .quad 4727228949921267712 ## double 1.111111e+08 +LCPI0_2: + .quad 4501392635919089375 ## double 9.000000e-08 +LCPI0_3: + .quad 4666723172467343360 ## double 1.000000e+04 +LCPI0_4: + .quad 4516603487517057909 ## double 9.000001e-07 +LCPI0_5: + .quad 4607182418800017408 ## double 1.000000e+00 + .section __TEXT,__text,regular,pure_instructions + .globl _main + .align 4, 0x90 +_main: ## @main +Ltmp2: + .cfi_startproc +## BB#0: + pushq %rbp +Ltmp0: +Ltmp3: + .cfi_def_cfa_offset 16 +Ltmp4: + .cfi_offset 6, -16 + movq %rsp, %rbp +Ltmp1: +Ltmp5: + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4128, %rsp ## imm = 0x1020 + cmpl $2, %edi + je LBB0_2 +## BB#1: + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + leaq L_.str(%rip), %rdi + movl $75, %esi + movl $1, %edx + callq _fwrite + movl $-1, %eax + jmp LBB0_14 +LBB0_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd LCPI0_0(%rip), %xmm0 + divsd LCPI0_1(%rip), %xmm0 + xorl %eax, %eax + cvtsd2ss %xmm0, %xmm0 + movsd LCPI0_2(%rip), %xmm1 + movq %rax, %rcx + .align 4, 0x90 +LBB0_3: ## =>This Inner Loop Header: Depth=1 + movss %xmm0, 3072(%rsp,%rcx,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_3 +## BB#4: + movsd LCPI0_3(%rip), %xmm1 + movsd LCPI0_4(%rip), %xmm2 + movsd LCPI0_5(%rip), %xmm3 + xorl %ecx, %ecx + .align 4, 0x90 +LBB0_5: ## %.preheader15 + ## =>This Inner Loop Header: Depth=1 + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + addsd %xmm3, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax ## imm = 0x100 + jne LBB0_5 +## BB#6: + movsd LCPI0_2(%rip), %xmm1 + movl $100000000, %eax ## imm = 0x5F5E100 + .align 4, 0x90 +LBB0_7: ## %.preheader12 + ## =>This Inner Loop Header: Depth=1 + movss %xmm0, 1024(%rsp,%rcx,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_7 + .align 4, 0x90 +LBB0_8: ## %.preheader9 + ## =>This Loop Header: Depth=1 + ## Child Loop BB0_10 Depth 2 + ## Child Loop BB0_9 Depth 2 + xorl %ecx, %ecx + movq %rcx, %rdx + .align 4, 0x90 +LBB0_9: ## Parent Loop BB0_8 Depth=1 + ## => This Inner Loop Header: Depth=2 + movss 3072(%rsp,%rdx,4), %xmm0 + mulss 2048(%rsp,%rdx,4), %xmm0 + movss %xmm0, (%rsp,%rdx,4) + incq %rdx + cmpq $256, %rdx ## imm = 0x100 + jne LBB0_9 + .align 4, 0x90 +LBB0_10: ## %mul3_vec.exit + ## Parent Loop BB0_8 Depth=1 + ## => This Inner Loop Header: Depth=2 + movss (%rsp,%rcx,4), %xmm0 + addss 1024(%rsp,%rcx,4), %xmm0 + movss %xmm0, 3072(%rsp,%rcx,4) + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_10 +## BB#11: ## %add3_vec.exit + ## in Loop: Header=BB0_8 Depth=1 + pxor %xmm0, %xmm0 + xorl %ecx, %ecx + decl %eax + jne LBB0_8 + .align 4, 0x90 +LBB0_12: ## %.preheader + ## =>This Inner Loop Header: Depth=1 + movss 3072(%rsp,%rcx,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_12 +## BB#13: + leaq L_.str1(%rip), %rdi + movb $1, %al + callq _printf + xorl %eax, %eax +LBB0_14: + movq %rbp, %rsp + popq %rbp + ret +Ltmp6: + .cfi_endproc +Leh_func_end0: + + .section __TEXT,__cstring,cstring_literals + .align 4 ## @.str +L_.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + +L_.str1: ## @.str1 + .asciz "Result %f\n" + + +.subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.clang.vect4.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.clang.vect4.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.clang.vect4.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,161 @@ + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__literal8,8byte_literals + .align 3 +LCPI0_0: + .quad 4613937818241073152 ## double 3.000000e+00 +LCPI0_1: + .quad 4727228949921267712 ## double 1.111111e+08 +LCPI0_2: + .quad 4501392635919089375 ## double 9.000000e-08 +LCPI0_3: + .quad 4666723172467343360 ## double 1.000000e+04 +LCPI0_4: + .quad 4516603487517057909 ## double 9.000001e-07 +LCPI0_5: + .quad 4607182418800017408 ## double 1.000000e+00 + .section __TEXT,__text,regular,pure_instructions + .globl _main + .align 4, 0x90 +_main: ## @main +Ltmp2: + .cfi_startproc +## BB#0: + pushq %rbp +Ltmp0: +Ltmp3: + .cfi_def_cfa_offset 16 +Ltmp4: + .cfi_offset 6, -16 + movq %rsp, %rbp +Ltmp1: +Ltmp5: + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4128, %rsp ## imm = 0x1020 + cmpl $2, %edi + je LBB0_2 +## BB#1: + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + leaq L_.str(%rip), %rdi + movl $75, %esi + movl $1, %edx + callq _fwrite + movl $-1, %eax + jmp LBB0_14 +LBB0_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd LCPI0_0(%rip), %xmm0 + divsd LCPI0_1(%rip), %xmm0 + xorl %eax, %eax + cvtsd2ss %xmm0, %xmm0 + movsd LCPI0_2(%rip), %xmm1 + movq %rax, %rcx + .align 4, 0x90 +LBB0_3: ## =>This Inner Loop Header: Depth=1 + movss %xmm0, 3072(%rsp,%rcx,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_3 +## BB#4: + movsd LCPI0_3(%rip), %xmm1 + movsd LCPI0_4(%rip), %xmm2 + movsd LCPI0_5(%rip), %xmm3 + xorl %ecx, %ecx + .align 4, 0x90 +LBB0_5: ## %.preheader19 + ## =>This Inner Loop Header: Depth=1 + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + addsd %xmm3, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax ## imm = 0x100 + jne LBB0_5 +## BB#6: + movsd LCPI0_2(%rip), %xmm1 + movl $100000000, %eax ## imm = 0x5F5E100 + .align 4, 0x90 +LBB0_7: ## %.preheader16 + ## =>This Inner Loop Header: Depth=1 + movss %xmm0, 1024(%rsp,%rcx,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_7 + .align 4, 0x90 +LBB0_8: ## %.preheader13 + ## =>This Loop Header: Depth=1 + ## Child Loop BB0_10 Depth 2 + ## Child Loop BB0_9 Depth 2 + xorl %ecx, %ecx + movq %rcx, %rdx + .align 4, 0x90 +LBB0_9: ## Parent Loop BB0_8 Depth=1 + ## => This Inner Loop Header: Depth=2 + movaps 3072(%rsp,%rdx), %xmm0 + mulps 2048(%rsp,%rdx), %xmm0 + movaps %xmm0, (%rsp,%rdx) + addq $16, %rdx + cmpq $1024, %rdx ## imm = 0x400 + jne LBB0_9 + .align 4, 0x90 +LBB0_10: ## %mul3_vec.exit + ## Parent Loop BB0_8 Depth=1 + ## => This Inner Loop Header: Depth=2 + movaps (%rsp,%rcx), %xmm0 + addps 1024(%rsp,%rcx), %xmm0 + movaps %xmm0, 3072(%rsp,%rcx) + addq $16, %rcx + cmpq $1024, %rcx ## imm = 0x400 + jne LBB0_10 +## BB#11: ## %add3_vec.exit + ## in Loop: Header=BB0_8 Depth=1 + pxor %xmm0, %xmm0 + xorl %ecx, %ecx + decl %eax + jne LBB0_8 + .align 4, 0x90 +LBB0_12: ## %.preheader + ## =>This Inner Loop Header: Depth=1 + movss 3072(%rsp,%rcx,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_12 +## BB#13: + leaq L_.str1(%rip), %rdi + movb $1, %al + callq _printf + xorl %eax, %eax +LBB0_14: + movq %rbp, %rsp + popq %rbp + ret +Ltmp6: + .cfi_endproc +Leh_func_end0: + + .section __TEXT,__cstring,cstring_literals + .align 4 ## @.str +L_.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + +L_.str1: ## @.str1 + .asciz "Result %f\n" + + +.subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.clang.vect8.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.clang.vect8.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.clang.vect8.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,167 @@ + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__literal8,8byte_literals + .align 3 +LCPI0_0: + .quad 4613937818241073152 ## double 3.000000e+00 +LCPI0_1: + .quad 4727228949921267712 ## double 1.111111e+08 +LCPI0_2: + .quad 4501392635919089375 ## double 9.000000e-08 +LCPI0_3: + .quad 4666723172467343360 ## double 1.000000e+04 +LCPI0_4: + .quad 4516603487517057909 ## double 9.000001e-07 +LCPI0_5: + .quad 4607182418800017408 ## double 1.000000e+00 + .section __TEXT,__text,regular,pure_instructions + .globl _main + .align 4, 0x90 +_main: ## @main +Ltmp2: + .cfi_startproc +## BB#0: + pushq %rbp +Ltmp0: +Ltmp3: + .cfi_def_cfa_offset 16 +Ltmp4: + .cfi_offset 6, -16 + movq %rsp, %rbp +Ltmp1: +Ltmp5: + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4128, %rsp ## imm = 0x1020 + cmpl $2, %edi + je LBB0_2 +## BB#1: + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + leaq L_.str(%rip), %rdi + movl $75, %esi + movl $1, %edx + callq _fwrite + movl $-1, %eax + jmp LBB0_14 +LBB0_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd LCPI0_0(%rip), %xmm0 + divsd LCPI0_1(%rip), %xmm0 + xorl %eax, %eax + cvtsd2ss %xmm0, %xmm0 + movsd LCPI0_2(%rip), %xmm1 + movq %rax, %rcx + .align 4, 0x90 +LBB0_3: ## =>This Inner Loop Header: Depth=1 + movss %xmm0, 3072(%rsp,%rcx,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_3 +## BB#4: + movsd LCPI0_3(%rip), %xmm1 + movsd LCPI0_4(%rip), %xmm2 + movsd LCPI0_5(%rip), %xmm3 + xorl %ecx, %ecx + .align 4, 0x90 +LBB0_5: ## %.preheader19 + ## =>This Inner Loop Header: Depth=1 + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + addsd %xmm3, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax ## imm = 0x100 + jne LBB0_5 +## BB#6: + movsd LCPI0_2(%rip), %xmm1 + movl $100000000, %eax ## imm = 0x5F5E100 + .align 4, 0x90 +LBB0_7: ## %.preheader16 + ## =>This Inner Loop Header: Depth=1 + movss %xmm0, 1024(%rsp,%rcx,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_7 + .align 4, 0x90 +LBB0_8: ## %.preheader13 + ## =>This Loop Header: Depth=1 + ## Child Loop BB0_10 Depth 2 + ## Child Loop BB0_9 Depth 2 + xorl %ecx, %ecx + movq %rcx, %rdx + .align 4, 0x90 +LBB0_9: ## Parent Loop BB0_8 Depth=1 + ## => This Inner Loop Header: Depth=2 + movaps 3072(%rsp,%rdx), %xmm0 + movaps 3088(%rsp,%rdx), %xmm1 + mulps 2064(%rsp,%rdx), %xmm1 + mulps 2048(%rsp,%rdx), %xmm0 + movaps %xmm0, (%rsp,%rdx) + movaps %xmm1, 16(%rsp,%rdx) + addq $32, %rdx + cmpq $1024, %rdx ## imm = 0x400 + jne LBB0_9 + .align 4, 0x90 +LBB0_10: ## %mul3_vec.exit + ## Parent Loop BB0_8 Depth=1 + ## => This Inner Loop Header: Depth=2 + movaps (%rsp,%rcx), %xmm0 + movaps 16(%rsp,%rcx), %xmm1 + addps 1040(%rsp,%rcx), %xmm1 + addps 1024(%rsp,%rcx), %xmm0 + movaps %xmm0, 3072(%rsp,%rcx) + movaps %xmm1, 3088(%rsp,%rcx) + addq $32, %rcx + cmpq $1024, %rcx ## imm = 0x400 + jne LBB0_10 +## BB#11: ## %add3_vec.exit + ## in Loop: Header=BB0_8 Depth=1 + pxor %xmm0, %xmm0 + xorl %ecx, %ecx + decl %eax + jne LBB0_8 + .align 4, 0x90 +LBB0_12: ## %.preheader + ## =>This Inner Loop Header: Depth=1 + movss 3072(%rsp,%rcx,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rcx + cmpq $256, %rcx ## imm = 0x100 + jne LBB0_12 +## BB#13: + leaq L_.str1(%rip), %rdi + movb $1, %al + callq _printf + xorl %eax, %eax +LBB0_14: + movq %rbp, %rsp + popq %rbp + ret +Ltmp6: + .cfi_endproc +Leh_func_end0: + + .section __TEXT,__cstring,cstring_literals + .align 4 ## @.str +L_.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + +L_.str1: ## @.str1 + .asciz "Result %f\n" + + +.subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,186 @@ + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__literal8,8byte_literals + .align 3 +LCPI1_0: + .quad 4613937818241073152 +LCPI1_1: + .quad 4727228949921267712 +LCPI1_2: + .quad 4501392635919089375 +LCPI1_3: + .quad 4666723172467343360 +LCPI1_4: + .quad 4607182418800017408 +LCPI1_5: + .quad 4516603487517057909 + .section __TEXT,__text,regular,pure_instructions + .globl _main + .align 4, 0x90 +_main: +Leh_func_begin1: + pushq %rbp +Ltmp0: + movq %rsp, %rbp +Ltmp1: + andq $-32, %rsp + subq $4128, %rsp +Ltmp2: + cmpl $2, %edi + je LBB1_2 + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + leaq L_.str(%rip), %rdi + movl $1, %esi + movl $75, %edx + callq _fwrite + movl $-1, %eax + jmp LBB1_17 +LBB1_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd LCPI1_0(%rip), %xmm0 + divsd LCPI1_1(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd LCPI1_2(%rip), %xmm1 + .align 4, 0x90 +LBB1_3: + movss %xmm0, 3072(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_3 + xorl %eax, %eax + movsd LCPI1_3(%rip), %xmm1 + movsd LCPI1_4(%rip), %xmm2 + movsd LCPI1_5(%rip), %xmm3 + .align 4, 0x90 +LBB1_5: + cvtss2sd %xmm0, %xmm0 + movapd %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_5 + xorl %eax, %eax + movsd LCPI1_2(%rip), %xmm1 + .align 4, 0x90 +LBB1_7: + movss %xmm0, 1024(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_7 + movl $100000000, %eax + .align 4, 0x90 +LBB1_9: + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_10: + movss 3072(%rsp,%rcx,4), %xmm0 + mulss 2048(%rsp,%rcx,4), %xmm0 + movss %xmm0, (%rsp,%rcx,4) + incq %rcx + cmpq $256, %rcx + jne LBB1_10 + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_12: + movss (%rsp,%rcx,4), %xmm0 + addss 1024(%rsp,%rcx,4), %xmm0 + movss %xmm0, 3072(%rsp,%rcx,4) + incq %rcx + cmpq $256, %rcx + jne LBB1_12 + decl %eax + jne LBB1_9 + pxor %xmm0, %xmm0 + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_15: + movss 3072(%rsp,%rcx,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rcx + cmpq $256, %rcx + jne LBB1_15 + leaq L_.str1(%rip), %rdi + movb $1, %al + callq _printf + xorl %eax, %eax +LBB1_17: + movq %rbp, %rsp + popq %rbp + ret +Leh_func_end1: + + .section __TEXT,__cstring,cstring_literals + .align 3 +L_.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + +L_.str1: + .asciz "Result %f\n" + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame0: +Lsection_eh_frame: +Leh_frame_common: +Lset0 = Leh_frame_common_end-Leh_frame_common_begin + .long Lset0 +Leh_frame_common_begin: + .long 0 + .byte 1 + .asciz "zR" + .byte 1 + .byte 120 + .byte 16 + .byte 1 + .byte 16 + .byte 12 + .byte 7 + .byte 8 + .byte 144 + .byte 1 + .align 3 +Leh_frame_common_end: + .globl _main.eh +_main.eh: +Lset1 = Leh_frame_end1-Leh_frame_begin1 + .long Lset1 +Leh_frame_begin1: +Lset2 = Leh_frame_begin1-Leh_frame_common + .long Lset2 +Ltmp3: + .quad Leh_func_begin1-Ltmp3 +Lset3 = Leh_func_end1-Leh_func_begin1 + .quad Lset3 + .byte 0 + .byte 4 +Lset4 = Ltmp0-Leh_func_begin1 + .long Lset4 + .byte 14 + .byte 16 + .byte 134 + .byte 2 + .byte 4 +Lset5 = Ltmp1-Ltmp0 + .long Lset5 + .byte 13 + .byte 6 + .align 3 +Leh_frame_end1: + + +.subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect4.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect4.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect4.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,186 @@ + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__literal8,8byte_literals + .align 3 +LCPI1_0: + .quad 4613937818241073152 +LCPI1_1: + .quad 4727228949921267712 +LCPI1_2: + .quad 4501392635919089375 +LCPI1_3: + .quad 4666723172467343360 +LCPI1_4: + .quad 4607182418800017408 +LCPI1_5: + .quad 4516603487517057909 + .section __TEXT,__text,regular,pure_instructions + .globl _main + .align 4, 0x90 +_main: +Leh_func_begin1: + pushq %rbp +Ltmp0: + movq %rsp, %rbp +Ltmp1: + andq $-32, %rsp + subq $4128, %rsp +Ltmp2: + cmpl $2, %edi + je LBB1_2 + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + leaq L_.str(%rip), %rdi + movl $1, %esi + movl $75, %edx + callq _fwrite + movl $-1, %eax + jmp LBB1_17 +LBB1_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd LCPI1_0(%rip), %xmm0 + divsd LCPI1_1(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd LCPI1_2(%rip), %xmm1 + .align 4, 0x90 +LBB1_3: + movss %xmm0, 3072(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_3 + xorl %eax, %eax + movsd LCPI1_3(%rip), %xmm1 + movsd LCPI1_4(%rip), %xmm2 + movsd LCPI1_5(%rip), %xmm3 + .align 4, 0x90 +LBB1_5: + cvtss2sd %xmm0, %xmm0 + movapd %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_5 + xorl %eax, %eax + movsd LCPI1_2(%rip), %xmm1 + .align 4, 0x90 +LBB1_7: + movss %xmm0, 1024(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_7 + movl $100000000, %eax + .align 4, 0x90 +LBB1_9: + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_10: + movaps 3072(%rsp,%rcx), %xmm0 + mulps 2048(%rsp,%rcx), %xmm0 + movaps %xmm0, (%rsp,%rcx) + addq $16, %rcx + cmpq $1024, %rcx + jne LBB1_10 + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_12: + movaps (%rsp,%rcx), %xmm0 + addps 1024(%rsp,%rcx), %xmm0 + movaps %xmm0, 3072(%rsp,%rcx) + addq $16, %rcx + cmpq $1024, %rcx + jne LBB1_12 + decl %eax + jne LBB1_9 + pxor %xmm0, %xmm0 + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_15: + movss 3072(%rsp,%rcx,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rcx + cmpq $256, %rcx + jne LBB1_15 + leaq L_.str1(%rip), %rdi + movb $1, %al + callq _printf + xorl %eax, %eax +LBB1_17: + movq %rbp, %rsp + popq %rbp + ret +Leh_func_end1: + + .section __TEXT,__cstring,cstring_literals + .align 3 +L_.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + +L_.str1: + .asciz "Result %f\n" + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame0: +Lsection_eh_frame: +Leh_frame_common: +Lset0 = Leh_frame_common_end-Leh_frame_common_begin + .long Lset0 +Leh_frame_common_begin: + .long 0 + .byte 1 + .asciz "zR" + .byte 1 + .byte 120 + .byte 16 + .byte 1 + .byte 16 + .byte 12 + .byte 7 + .byte 8 + .byte 144 + .byte 1 + .align 3 +Leh_frame_common_end: + .globl _main.eh +_main.eh: +Lset1 = Leh_frame_end1-Leh_frame_begin1 + .long Lset1 +Leh_frame_begin1: +Lset2 = Leh_frame_begin1-Leh_frame_common + .long Lset2 +Ltmp3: + .quad Leh_func_begin1-Ltmp3 +Lset3 = Leh_func_end1-Leh_func_begin1 + .quad Lset3 + .byte 0 + .byte 4 +Lset4 = Ltmp0-Leh_func_begin1 + .long Lset4 + .byte 14 + .byte 16 + .byte 134 + .byte 2 + .byte 4 +Lset5 = Ltmp1-Ltmp0 + .long Lset5 + .byte 13 + .byte 6 + .align 3 +Leh_frame_end1: + + +.subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect8.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect8.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.gcc-llvm.4.2.vect8.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,192 @@ + .section __TEXT,__text,regular,pure_instructions + .section __TEXT,__literal8,8byte_literals + .align 3 +LCPI1_0: + .quad 4613937818241073152 +LCPI1_1: + .quad 4727228949921267712 +LCPI1_2: + .quad 4501392635919089375 +LCPI1_3: + .quad 4666723172467343360 +LCPI1_4: + .quad 4607182418800017408 +LCPI1_5: + .quad 4516603487517057909 + .section __TEXT,__text,regular,pure_instructions + .globl _main + .align 4, 0x90 +_main: +Leh_func_begin1: + pushq %rbp +Ltmp0: + movq %rsp, %rbp +Ltmp1: + andq $-32, %rsp + subq $4128, %rsp +Ltmp2: + cmpl $2, %edi + je LBB1_2 + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + leaq L_.str(%rip), %rdi + movl $1, %esi + movl $75, %edx + callq _fwrite + movl $-1, %eax + jmp LBB1_17 +LBB1_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd LCPI1_0(%rip), %xmm0 + divsd LCPI1_1(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd LCPI1_2(%rip), %xmm1 + .align 4, 0x90 +LBB1_3: + movss %xmm0, 3072(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_3 + xorl %eax, %eax + movsd LCPI1_3(%rip), %xmm1 + movsd LCPI1_4(%rip), %xmm2 + movsd LCPI1_5(%rip), %xmm3 + .align 4, 0x90 +LBB1_5: + cvtss2sd %xmm0, %xmm0 + movapd %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_5 + xorl %eax, %eax + movsd LCPI1_2(%rip), %xmm1 + .align 4, 0x90 +LBB1_7: + movss %xmm0, 1024(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne LBB1_7 + movl $100000000, %eax + .align 4, 0x90 +LBB1_9: + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_10: + movaps 3072(%rsp,%rcx), %xmm0 + movaps 3088(%rsp,%rcx), %xmm1 + mulps 2064(%rsp,%rcx), %xmm1 + mulps 2048(%rsp,%rcx), %xmm0 + movaps %xmm0, (%rsp,%rcx) + movaps %xmm1, 16(%rsp,%rcx) + addq $32, %rcx + cmpq $1024, %rcx + jne LBB1_10 + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_12: + movaps (%rsp,%rcx), %xmm0 + movaps 16(%rsp,%rcx), %xmm1 + addps 1040(%rsp,%rcx), %xmm1 + addps 1024(%rsp,%rcx), %xmm0 + movaps %xmm0, 3072(%rsp,%rcx) + movaps %xmm1, 3088(%rsp,%rcx) + addq $32, %rcx + cmpq $1024, %rcx + jne LBB1_12 + decl %eax + jne LBB1_9 + pxor %xmm0, %xmm0 + xorl %ecx, %ecx + .align 4, 0x90 +LBB1_15: + movss 3072(%rsp,%rcx,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rcx + cmpq $256, %rcx + jne LBB1_15 + leaq L_.str1(%rip), %rdi + movb $1, %al + callq _printf + xorl %eax, %eax +LBB1_17: + movq %rbp, %rsp + popq %rbp + ret +Leh_func_end1: + + .section __TEXT,__cstring,cstring_literals + .align 3 +L_.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + +L_.str1: + .asciz "Result %f\n" + + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame0: +Lsection_eh_frame: +Leh_frame_common: +Lset0 = Leh_frame_common_end-Leh_frame_common_begin + .long Lset0 +Leh_frame_common_begin: + .long 0 + .byte 1 + .asciz "zR" + .byte 1 + .byte 120 + .byte 16 + .byte 1 + .byte 16 + .byte 12 + .byte 7 + .byte 8 + .byte 144 + .byte 1 + .align 3 +Leh_frame_common_end: + .globl _main.eh +_main.eh: +Lset1 = Leh_frame_end1-Leh_frame_begin1 + .long Lset1 +Leh_frame_begin1: +Lset2 = Leh_frame_begin1-Leh_frame_common + .long Lset2 +Ltmp3: + .quad Leh_func_begin1-Ltmp3 +Lset3 = Leh_func_end1-Leh_func_begin1 + .quad Lset3 + .byte 0 + .byte 4 +Lset4 = Ltmp0-Leh_func_begin1 + .long Lset4 + .byte 14 + .byte 16 + .byte 134 + .byte 2 + .byte 4 +Lset5 = Ltmp1-Ltmp0 + .long Lset5 + .byte 13 + .byte 6 + .align 3 +Leh_frame_end1: + + +.subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.gcc.4.2.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.gcc.4.2.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.gcc.4.2.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,212 @@ + .cstring + .align 3 +LC0: + .ascii "Usage:vectors <string>, where string is used as a seed for the computation\12\0" +LC8: + .ascii "Result %f\12\0" + .text + .align 4,0x90 +.globl _main +_main: +LFB5: + pushq %rbp +LCFI0: + movq %rsp, %rbp +LCFI1: + subq $4096, %rsp +LCFI2: + cmpl $2, %edi + jne L35 + movq 8(%rsi), %rax + movsbw (%rax),%ax + cwtl + cvtsi2sd %eax, %xmm0 + mulsd LC1(%rip), %xmm0 + divsd LC2(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, -1024(%rbp) + cvtss2sd %xmm0, %xmm0 + movsd LC3(%rip), %xmm2 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movl $1, %eax + leaq -1024(%rbp), %rcx +L5: + movss %xmm0, (%rcx,%rax,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rax + cmpq $256, %rax + jne L5 + cvtss2sd %xmm0, %xmm1 + movsd LC4(%rip), %xmm5 + movapd %xmm1, %xmm0 + divsd %xmm5, %xmm0 + movsd LC5(%rip), %xmm3 + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, -2048(%rbp) + movsd LC6(%rip), %xmm4 + addsd %xmm4, %xmm1 + cvtsd2ss %xmm1, %xmm0 + movw $1, %ax + leaq -2048(%rbp), %rdi +L7: + cvtss2sd %xmm0, %xmm1 + movapd %xmm1, %xmm0 + divsd %xmm5, %xmm0 + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, (%rdi,%rax,4) + addsd %xmm4, %xmm1 + cvtsd2ss %xmm1, %xmm0 + incq %rax + cmpq $256, %rax + jne L7 + movss %xmm0, -3072(%rbp) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movw $1, %ax + leaq -3072(%rbp), %rsi +L9: + movss %xmm0, (%rsi,%rax,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rax + cmpq $256, %rax + jne L9 + xorl %r8d, %r8d + leaq -4096(%rbp), %rdx +L15: + movss -1024(%rbp), %xmm0 + mulss -2048(%rbp), %xmm0 + movss %xmm0, -4096(%rbp) + movl $1, %eax + .align 4,0x90 +L11: + movss (%rcx,%rax,4), %xmm0 + mulss (%rdi,%rax,4), %xmm0 + movss %xmm0, (%rdx,%rax,4) + incq %rax + cmpq $256, %rax + jne L11 + movss -4096(%rbp), %xmm0 + addss -3072(%rbp), %xmm0 + movss %xmm0, -1024(%rbp) + movw $1, %ax + .align 4,0x90 +L13: + movss (%rdx,%rax,4), %xmm0 + addss (%rsi,%rax,4), %xmm0 + movss %xmm0, (%rcx,%rax,4) + incq %rax + cmpq $256, %rax + jne L13 + incl %r8d + cmpl $100000000, %r8d + jne L15 + cvtss2sd -1024(%rbp), %xmm1 + addsd LC7(%rip), %xmm1 + movw $1, %ax +L17: + cvtss2sd (%rcx,%rax,4), %xmm0 + addsd %xmm0, %xmm1 + incq %rax + cmpq $256, %rax + jne L17 + movapd %xmm1, %xmm0 + leaq LC8(%rip), %rdi + movl $1, %eax + call _printf + xorl %eax, %eax + leave + ret +L35: + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + movl $75, %edx + movl $1, %esi + leaq LC0(%rip), %rdi + call _fwrite + movl $-1, %eax + leave + ret +LFE5: + .literal8 + .align 3 +LC1: + .long 0 + .long 1074266112 + .align 3 +LC2: + .long 469762048 + .long 1100643759 + .align 3 +LC3: + .long 3263446751 + .long 1048062144 + .align 3 +LC4: + .long 0 + .long 1086556160 + .align 3 +LC5: + .long 0 + .long 1072693248 + .align 3 +LC6: + .long 549364597 + .long 1051603697 + .align 3 +LC7: + .long 0 + .long 0 + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 +LSCIE1: + .long 0x0 + .byte 0x1 + .ascii "zR\0" + .byte 0x1 + .byte 0x78 + .byte 0x10 + .byte 0x1 + .byte 0x10 + .byte 0xc + .byte 0x7 + .byte 0x8 + .byte 0x90 + .byte 0x1 + .align 3 +LECIE1: +.globl _main.eh +_main.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 +LASFDE1: + .long LASFDE1-EH_frame1 + .quad LFB5-. + .set L$set$2,LFE5-LFB5 + .quad L$set$2 + .byte 0x0 + .byte 0x4 + .set L$set$3,LCFI0-LFB5 + .long L$set$3 + .byte 0xe + .byte 0x10 + .byte 0x86 + .byte 0x2 + .byte 0x4 + .set L$set$4,LCFI1-LCFI0 + .long L$set$4 + .byte 0xd + .byte 0x6 + .align 3 +LEFDE1: + .subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect4.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect4.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect4.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,212 @@ + .cstring + .align 3 +LC0: + .ascii "Usage:vectors <string>, where string is used as a seed for the computation\12\0" +LC8: + .ascii "Result %f\12\0" + .text + .align 4,0x90 +.globl _main +_main: +LFB5: + pushq %rbp +LCFI0: + movq %rsp, %rbp +LCFI1: + subq $4096, %rsp +LCFI2: + cmpl $2, %edi + jne L35 + movq 8(%rsi), %rax + movsbw (%rax),%ax + cwtl + cvtsi2sd %eax, %xmm0 + mulsd LC1(%rip), %xmm0 + divsd LC2(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, -1024(%rbp) + cvtss2sd %xmm0, %xmm0 + movsd LC3(%rip), %xmm2 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movl $1, %eax + leaq -1024(%rbp), %rcx +L5: + movss %xmm0, (%rcx,%rax,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rax + cmpq $256, %rax + jne L5 + cvtss2sd %xmm0, %xmm1 + movsd LC4(%rip), %xmm5 + movapd %xmm1, %xmm0 + divsd %xmm5, %xmm0 + movsd LC5(%rip), %xmm4 + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, -2048(%rbp) + movsd LC6(%rip), %xmm3 + addsd %xmm3, %xmm1 + cvtsd2ss %xmm1, %xmm0 + movw $1, %ax + leaq -2048(%rbp), %rdi +L7: + cvtss2sd %xmm0, %xmm1 + movapd %xmm1, %xmm0 + divsd %xmm5, %xmm0 + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, (%rdi,%rax,4) + addsd %xmm3, %xmm1 + cvtsd2ss %xmm1, %xmm0 + incq %rax + cmpq $256, %rax + jne L7 + movss %xmm0, -3072(%rbp) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movw $1, %ax + leaq -3072(%rbp), %rsi +L9: + movss %xmm0, (%rsi,%rax,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rax + cmpq $256, %rax + jne L9 + xorl %r8d, %r8d + leaq -4096(%rbp), %rdx +L15: + movaps -2048(%rbp), %xmm0 + mulps -1024(%rbp), %xmm0 + movaps %xmm0, -4096(%rbp) + movl $16, %eax + .align 4,0x90 +L11: + movaps (%rax,%rdi), %xmm0 + mulps (%rax,%rcx), %xmm0 + movaps %xmm0, (%rax,%rdx) + addq $16, %rax + cmpq $1024, %rax + jne L11 + movaps -3072(%rbp), %xmm0 + addps -4096(%rbp), %xmm0 + movaps %xmm0, -1024(%rbp) + movw $16, %ax + .align 4,0x90 +L13: + movaps (%rax,%rsi), %xmm0 + addps (%rax,%rdx), %xmm0 + movaps %xmm0, (%rax,%rcx) + addq $16, %rax + cmpq $1024, %rax + jne L13 + incl %r8d + cmpl $100000000, %r8d + jne L15 + cvtss2sd -1024(%rbp), %xmm1 + addsd LC7(%rip), %xmm1 + movw $1, %ax +L17: + cvtss2sd (%rcx,%rax,4), %xmm0 + addsd %xmm0, %xmm1 + incq %rax + cmpq $256, %rax + jne L17 + movapd %xmm1, %xmm0 + leaq LC8(%rip), %rdi + movl $1, %eax + call _printf + xorl %eax, %eax + leave + ret +L35: + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + movl $75, %edx + movl $1, %esi + leaq LC0(%rip), %rdi + call _fwrite + movl $-1, %eax + leave + ret +LFE5: + .literal8 + .align 3 +LC1: + .long 0 + .long 1074266112 + .align 3 +LC2: + .long 469762048 + .long 1100643759 + .align 3 +LC3: + .long 3263446751 + .long 1048062144 + .align 3 +LC4: + .long 0 + .long 1086556160 + .align 3 +LC5: + .long 0 + .long 1072693248 + .align 3 +LC6: + .long 549364597 + .long 1051603697 + .align 3 +LC7: + .long 0 + .long 0 + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 +LSCIE1: + .long 0x0 + .byte 0x1 + .ascii "zR\0" + .byte 0x1 + .byte 0x78 + .byte 0x10 + .byte 0x1 + .byte 0x10 + .byte 0xc + .byte 0x7 + .byte 0x8 + .byte 0x90 + .byte 0x1 + .align 3 +LECIE1: +.globl _main.eh +_main.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 +LASFDE1: + .long LASFDE1-EH_frame1 + .quad LFB5-. + .set L$set$2,LFE5-LFB5 + .quad L$set$2 + .byte 0x0 + .byte 0x4 + .set L$set$3,LCFI0-LFB5 + .long L$set$3 + .byte 0xe + .byte 0x10 + .byte 0x86 + .byte 0x2 + .byte 0x4 + .set L$set$4,LCFI1-LCFI0 + .long L$set$4 + .byte 0xd + .byte 0x6 + .align 3 +LEFDE1: + .subsections_via_symbols Added: trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect8.s =================================================================== --- trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect8.s (rev 0) +++ trunk/extra/tests/mac-os-x/vectors.gcc.4.2.vect8.s 2011-07-26 09:56:42 UTC (rev 355) @@ -0,0 +1,320 @@ + .cstring + .align 3 +LC0: + .ascii "Usage:vectors <string>, where string is used as a seed for the computation\12\0" +LC8: + .ascii "Result %f\12\0" + .text + .align 4,0x90 +.globl _main +_main: +LFB5: + pushq %rbp +LCFI0: + movq %rsp, %rbp +LCFI1: + subq $4384, %rsp +LCFI2: + cmpl $2, %edi + jne L35 + movq 8(%rsi), %rax + movsbw (%rax),%ax + cwtl + cvtsi2sd %eax, %xmm0 + mulsd LC1(%rip), %xmm0 + divsd LC2(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, -1280(%rbp) + cvtss2sd %xmm0, %xmm0 + movsd LC3(%rip), %xmm2 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movl $1, %eax + leaq -1280(%rbp), %rsi +L5: + movss %xmm0, (%rsi,%rax,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rax + cmpq $256, %rax + jne L5 + cvtss2sd %xmm0, %xmm1 + movsd LC4(%rip), %xmm5 + movapd %xmm1, %xmm0 + divsd %xmm5, %xmm0 + movsd LC5(%rip), %xmm4 + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, -2304(%rbp) + movsd LC6(%rip), %xmm3 + addsd %xmm3, %xmm1 + cvtsd2ss %xmm1, %xmm0 + movw $1, %ax + leaq -2304(%rbp), %r8 +L7: + cvtss2sd %xmm0, %xmm1 + movapd %xmm1, %xmm0 + divsd %xmm5, %xmm0 + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movss %xmm0, (%r8,%rax,4) + addsd %xmm3, %xmm1 + cvtsd2ss %xmm1, %xmm0 + incq %rax + cmpq $256, %rax + jne L7 + movss %xmm0, -3328(%rbp) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + movw $1, %ax + leaq -3328(%rbp), %rdi +L9: + movss %xmm0, (%rdi,%rax,4) + cvtss2sd %xmm0, %xmm0 + addsd %xmm2, %xmm0 + cvtsd2ss %xmm0, %xmm0 + incq %rax + cmpq $256, %rax + jne L9 + xorl %r9d, %r9d + leaq -4352(%rbp), %rcx +L15: + movq -1280(%rbp), %rax + movq %rax, -256(%rbp) + movq -1272(%rbp), %rax + movq %rax, -248(%rbp) + movq -1264(%rbp), %rax + movq %rax, -240(%rbp) + movq -1256(%rbp), %rax + movq %rax, -232(%rbp) + movq -2304(%rbp), %rax + movq %rax, -224(%rbp) + movq -2296(%rbp), %rax + movq %rax, -216(%rbp) + movq -2288(%rbp), %rax + movq %rax, -208(%rbp) + movq -2280(%rbp), %rax + movq %rax, -200(%rbp) + movaps -224(%rbp), %xmm0 + mulps -256(%rbp), %xmm0 + movaps %xmm0, -4384(%rbp) + movaps -208(%rbp), %xmm0 + mulps -240(%rbp), %xmm0 + movaps %xmm0, -4368(%rbp) + movq -4384(%rbp), %rax + movq %rax, -4352(%rbp) + movq -4376(%rbp), %rax + movq %rax, -4344(%rbp) + movq -4368(%rbp), %rax + movq %rax, -4336(%rbp) + movq -4360(%rbp), %rax + movq %rax, -4328(%rbp) + movl $32, %edx + .align 4,0x90 +L11: + movq (%rdx,%rsi), %rax + movq %rax, -32(%rbp) + movq 8(%rdx,%rsi), %rax + movq %rax, -24(%rbp) + movq 16(%rdx,%rsi), %rax + movq %rax, -16(%rbp) + movq 24(%rdx,%rsi), %rax + movq %rax, -8(%rbp) + movq (%rdx,%r8), %rax + movq %rax, -64(%rbp) + movq 8(%rdx,%r8), %rax + movq %rax, -56(%rbp) + movq 16(%rdx,%r8), %rax + movq %rax, -48(%rbp) + movq 24(%rdx,%r8), %rax + movq %rax, -40(%rbp) + movaps -64(%rbp), %xmm0 + mulps -32(%rbp), %xmm0 + movaps %xmm0, -4384(%rbp) + movaps -48(%rbp), %xmm0 + mulps -16(%rbp), %xmm0 + movaps %xmm0, -4368(%rbp) + movq -4384(%rbp), %rax + movq %rax, (%rdx,%rcx) + movq -4376(%rbp), %rax + movq %rax, 8(%rdx,%rcx) + movq -4368(%rbp), %rax + movq %rax, 16(%rdx,%rcx) + movq -4360(%rbp), %rax + movq %rax, 24(%rdx,%rcx) + addq $32, %rdx + cmpq $1024, %rdx + jne L11 + movq -4352(%rbp), %rax + movq %rax, -192(%rbp) + movq -4344(%rbp), %rax + movq %rax, -184(%rbp) + movq -4336(%rbp), %rax + movq %rax, -176(%rbp) + movq -4328(%rbp), %rax + movq %rax, -168(%rbp) + movq -3328(%rbp), %rax + movq %rax, -160(%rbp) + movq -3320(%rbp), %rax + movq %rax, -152(%rbp) + movq -3312(%rbp), %rax + movq %rax, -144(%rbp) + movq -3304(%rbp), %rax + movq %rax, -136(%rbp) + movaps -160(%rbp), %xmm0 + addps -192(%rbp), %xmm0 + movaps %xmm0, -4384(%rbp) + movaps -144(%rbp), %xmm0 + addps -176(%rbp), %xmm0 + movaps %xmm0, -4368(%rbp) + movq -4384(%rbp), %rax + movq %rax, -1280(%rbp) + movq -4376(%rbp), %rax + movq %rax, -1272(%rbp) + movq -4368(%rbp), %rax + movq %rax, -1264(%rbp) + movq -4360(%rbp), %rax + movq %rax, -1256(%rbp) + movw $32, %dx + .align 4,0x90 +L13: + movq (%rdx,%rcx), %rax + movq %rax, -96(%rbp) + movq 8(%rdx,%rcx), %rax + movq %rax, -88(%rbp) + movq 16(%rdx,%rcx), %rax + movq %rax, -80(%rbp) + movq 24(%rdx,%rcx), %rax + movq %rax, -72(%rbp) + movq (%rdx,%rdi), %rax + movq %rax, -128(%rbp) + movq 8(%rdx,%rdi), %rax + movq %rax, -120(%rbp) + movq 16(%rdx,%rdi), %rax + movq %rax, -112(%rbp) + movq 24(%rdx,%rdi), %rax + movq %rax, -104(%rbp) + movaps -128(%rbp), %xmm0 + addps -96(%rbp), %xmm0 + movaps %xmm0, -4384(%rbp) + movaps -112(%rbp), %xmm0 + addps -80(%rbp), %xmm0 + movaps %xmm0, -4368(%rbp) + movq -4384(%rbp), %rax + movq %rax, (%rdx,%rsi) + movq -4376(%rbp), %rax + movq %rax, 8(%rdx,%rsi) + movq -4368(%rbp), %rax + movq %rax, 16(%rdx,%rsi) + movq -4360(%rbp), %rax + movq %rax, 24(%rdx,%rsi) + addq $32, %rdx + cmpq $1024, %rdx + jne L13 + incl %r9d + cmpl $100000000, %r9d + jne L15 + cvtss2sd -1280(%rbp), %xmm1 + addsd LC7(%rip), %xmm1 + movl $1, %eax +L17: + cvtss2sd (%rsi,%rax,4), %xmm0 + addsd %xmm0, %xmm1 + incq %rax + cmpq $256, %rax + jne L17 + movapd %xmm1, %xmm0 + leaq LC8(%rip), %rdi + movl $1, %eax + call _printf + xorl %eax, %eax + leave + ret +L35: + movq ___stderrp@GOTPCREL(%rip), %rax + movq (%rax), %rcx + movl $75, %edx + movl $1, %esi + leaq LC0(%rip), %rdi + call _fwrite + movl $-1, %eax + leave + ret +LFE5: + .literal8 + .align 3 +LC1: + .long 0 + .long 1074266112 + .align 3 +LC2: + .long 469762048 + .long 1100643759 + .align 3 +LC3: + .long 3263446751 + .long 1048062144 + .align 3 +LC4: + .long 0 + .long 1086556160 + .align 3 +LC5: + .long 0 + .long 1072693248 + .align 3 +LC6: + .long 549364597 + .long 1051603697 + .align 3 +LC7: + .long 0 + .long 0 + .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EH_frame1: + .set L$set$0,LECIE1-LSCIE1 + .long L$set$0 +LSCIE1: + .long 0x0 + .byte 0x1 + .ascii "zR\0" + .byte 0x1 + .byte 0x78 + .byte 0x10 + .byte 0x1 + .byte 0x10 + .byte 0xc + .byte 0x7 + .byte 0x8 + .byte 0x90 + .byte 0x1 + .align 3 +LECIE1: +.globl _main.eh +_main.eh: +LSFDE1: + .set L$set$1,LEFDE1-LASFDE1 + .long L$set$1 +LASFDE1: + .long LASFDE1-EH_frame1 + .quad LFB5-. + .set L$set$2,LFE5-LFB5 + .quad L$set$2 + .byte 0x0 + .byte 0x4 + .set L$set$3,LCFI0-LFB5 + .long L$set$3 + .byte 0xe + .byte 0x10 + .byte 0x86 + .byte 0x2 + .byte 0x4 + .set L$set$4,LCFI1-LCFI0 + .long L$set$4 + .byte 0xd + .byte 0x6 + .align 3 +LEFDE1: + .subsections_via_symbols This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 09:49:50
|
Revision: 354 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=354&view=rev Author: dececco Date: 2011-07-26 09:49:43 +0000 (Tue, 26 Jul 2011) Log Message: ----------- Added 64 assemblers Added Paths: ----------- trunk/extra/tests/ubuntu-64bit.asm/ trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.s trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect4.s trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect8.s trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.s trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.s trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss2.s trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss3.s trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect8.s Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,191 @@ + .file "vectors.c" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4613937818241073152 +.LCPI0_1: + .quad 4727228949921267712 +.LCPI0_2: + .quad 4501392635919089375 +.LCPI0_3: + .quad 4666723172467343360 +.LCPI0_4: + .quad 4607182418800017408 +.LCPI0_5: + .quad 4516603487517057909 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: +.Leh_func_begin0: + pushq %rbp +.Ltmp0: + movq %rsp, %rbp +.Ltmp1: + andq $-32, %rsp + subq $4128, %rsp +.Ltmp2: + cmpl $2, %edi + je .LBB0_2 + movq stderr(%rip), %rcx + movl $.L.str, %edi + movl $75, %esi + movl $1, %edx + callq fwrite + movl $-1, %eax + jmp .LBB0_17 +.LBB0_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd .LCPI0_0(%rip), %xmm0 + divsd .LCPI0_1(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd .LCPI0_2(%rip), %xmm1 + .align 16, 0x90 +.LBB0_3: + movss %xmm0, 3072(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_3 + xorl %eax, %eax + movsd .LCPI0_3(%rip), %xmm1 + movsd .LCPI0_4(%rip), %xmm2 + movsd .LCPI0_5(%rip), %xmm3 + .align 16, 0x90 +.LBB0_5: + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_5 + xorl %eax, %eax + movsd .LCPI0_2(%rip), %xmm1 + .align 16, 0x90 +.LBB0_7: + movss %xmm0, 1024(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_7 + movl $100000000, %eax + .align 16, 0x90 +.LBB0_9: + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_10: + movss 3072(%rsp,%rcx,4), %xmm0 + mulss 2048(%rsp,%rcx,4), %xmm0 + movss %xmm0, (%rsp,%rcx,4) + incq %rcx + cmpq $256, %rcx + jne .LBB0_10 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_12: + movss (%rsp,%rcx,4), %xmm0 + addss 1024(%rsp,%rcx,4), %xmm0 + movss %xmm0, 3072(%rsp,%rcx,4) + incq %rcx + cmpq $256, %rcx + jne .LBB0_12 + decl %eax + jne .LBB0_9 + pxor %xmm0, %xmm0 + xorl %eax, %eax + .align 16, 0x90 +.LBB0_15: + movss 3072(%rsp,%rax,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rax + cmpq $256, %rax + jne .LBB0_15 + movl $.L.str1, %edi + movb $1, %al + callq printf + xorl %eax, %eax +.LBB0_17: + movq %rbp, %rsp + popq %rbp + ret +.Ltmp3: + .size main, .Ltmp3-main +.Leh_func_end0: + + .type .L.str,@object + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 +.L.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + .size .L.str, 76 + + .type .L.str1,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str1: + .asciz "Result %f\n" + .size .L.str1, 11 + + .section .eh_frame,"aw",@progbits +.LEH_frame0: +.Lsection_eh_frame0: +.Leh_frame_common0: +.Lset0 = .Leh_frame_common_end0-.Leh_frame_common_begin0 + .long .Lset0 +.Leh_frame_common_begin0: + .long 0 + .byte 1 + .asciz "zR" + .uleb128 1 + .sleb128 -8 + .byte 16 + .uleb128 1 + .byte 3 + .byte 12 + .uleb128 7 + .uleb128 8 + .byte 144 + .uleb128 1 + .align 8 +.Leh_frame_common_end0: +.Lmain.eh: +.Lset1 = .Leh_frame_end0-.Leh_frame_begin0 + .long .Lset1 +.Leh_frame_begin0: +.Lset2 = .Leh_frame_begin0-.Leh_frame_common0 + .long .Lset2 + .long .Leh_func_begin0 +.Lset3 = .Leh_func_end0-.Leh_func_begin0 + .long .Lset3 + .uleb128 0 + .byte 4 +.Lset4 = .Ltmp0-.Leh_func_begin0 + .long .Lset4 + .byte 14 + .uleb128 16 + .byte 134 + .uleb128 2 + .byte 4 +.Lset5 = .Ltmp1-.Ltmp0 + .long .Lset5 + .byte 13 + .uleb128 6 + .align 8 +.Leh_frame_end0: + + + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect4.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect4.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect4.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,191 @@ + .file "vectors.c" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4613937818241073152 +.LCPI0_1: + .quad 4727228949921267712 +.LCPI0_2: + .quad 4501392635919089375 +.LCPI0_3: + .quad 4666723172467343360 +.LCPI0_4: + .quad 4607182418800017408 +.LCPI0_5: + .quad 4516603487517057909 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: +.Leh_func_begin0: + pushq %rbp +.Ltmp0: + movq %rsp, %rbp +.Ltmp1: + andq $-32, %rsp + subq $4128, %rsp +.Ltmp2: + cmpl $2, %edi + je .LBB0_2 + movq stderr(%rip), %rcx + movl $.L.str, %edi + movl $75, %esi + movl $1, %edx + callq fwrite + movl $-1, %eax + jmp .LBB0_17 +.LBB0_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd .LCPI0_0(%rip), %xmm0 + divsd .LCPI0_1(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd .LCPI0_2(%rip), %xmm1 + .align 16, 0x90 +.LBB0_3: + movss %xmm0, 3072(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_3 + xorl %eax, %eax + movsd .LCPI0_3(%rip), %xmm1 + movsd .LCPI0_4(%rip), %xmm2 + movsd .LCPI0_5(%rip), %xmm3 + .align 16, 0x90 +.LBB0_5: + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_5 + xorl %eax, %eax + movsd .LCPI0_2(%rip), %xmm1 + .align 16, 0x90 +.LBB0_7: + movss %xmm0, 1024(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_7 + movl $100000000, %eax + .align 16, 0x90 +.LBB0_9: + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_10: + movaps 3072(%rsp,%rcx), %xmm0 + mulps 2048(%rsp,%rcx), %xmm0 + movaps %xmm0, (%rsp,%rcx) + addq $16, %rcx + cmpq $1024, %rcx + jne .LBB0_10 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_12: + movaps (%rsp,%rcx), %xmm0 + addps 1024(%rsp,%rcx), %xmm0 + movaps %xmm0, 3072(%rsp,%rcx) + addq $16, %rcx + cmpq $1024, %rcx + jne .LBB0_12 + decl %eax + jne .LBB0_9 + pxor %xmm0, %xmm0 + xorl %eax, %eax + .align 16, 0x90 +.LBB0_15: + movss 3072(%rsp,%rax,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rax + cmpq $256, %rax + jne .LBB0_15 + movl $.L.str1, %edi + movb $1, %al + callq printf + xorl %eax, %eax +.LBB0_17: + movq %rbp, %rsp + popq %rbp + ret +.Ltmp3: + .size main, .Ltmp3-main +.Leh_func_end0: + + .type .L.str,@object + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 +.L.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + .size .L.str, 76 + + .type .L.str1,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str1: + .asciz "Result %f\n" + .size .L.str1, 11 + + .section .eh_frame,"aw",@progbits +.LEH_frame0: +.Lsection_eh_frame0: +.Leh_frame_common0: +.Lset0 = .Leh_frame_common_end0-.Leh_frame_common_begin0 + .long .Lset0 +.Leh_frame_common_begin0: + .long 0 + .byte 1 + .asciz "zR" + .uleb128 1 + .sleb128 -8 + .byte 16 + .uleb128 1 + .byte 3 + .byte 12 + .uleb128 7 + .uleb128 8 + .byte 144 + .uleb128 1 + .align 8 +.Leh_frame_common_end0: +.Lmain.eh: +.Lset1 = .Leh_frame_end0-.Leh_frame_begin0 + .long .Lset1 +.Leh_frame_begin0: +.Lset2 = .Leh_frame_begin0-.Leh_frame_common0 + .long .Lset2 + .long .Leh_func_begin0 +.Lset3 = .Leh_func_end0-.Leh_func_begin0 + .long .Lset3 + .uleb128 0 + .byte 4 +.Lset4 = .Ltmp0-.Leh_func_begin0 + .long .Lset4 + .byte 14 + .uleb128 16 + .byte 134 + .uleb128 2 + .byte 4 +.Lset5 = .Ltmp1-.Ltmp0 + .long .Lset5 + .byte 13 + .uleb128 6 + .align 8 +.Leh_frame_end0: + + + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect8.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect8.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.clang.vect8.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,197 @@ + .file "vectors.c" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4613937818241073152 +.LCPI0_1: + .quad 4727228949921267712 +.LCPI0_2: + .quad 4501392635919089375 +.LCPI0_3: + .quad 4666723172467343360 +.LCPI0_4: + .quad 4607182418800017408 +.LCPI0_5: + .quad 4516603487517057909 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: +.Leh_func_begin0: + pushq %rbp +.Ltmp0: + movq %rsp, %rbp +.Ltmp1: + andq $-32, %rsp + subq $4128, %rsp +.Ltmp2: + cmpl $2, %edi + je .LBB0_2 + movq stderr(%rip), %rcx + movl $.L.str, %edi + movl $75, %esi + movl $1, %edx + callq fwrite + movl $-1, %eax + jmp .LBB0_17 +.LBB0_2: + movq 8(%rsi), %rax + movsbl (%rax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd .LCPI0_0(%rip), %xmm0 + divsd .LCPI0_1(%rip), %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd .LCPI0_2(%rip), %xmm1 + .align 16, 0x90 +.LBB0_3: + movss %xmm0, 3072(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_3 + xorl %eax, %eax + movsd .LCPI0_3(%rip), %xmm1 + movsd .LCPI0_4(%rip), %xmm2 + movsd .LCPI0_5(%rip), %xmm3 + .align 16, 0x90 +.LBB0_5: + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm4 + divsd %xmm1, %xmm4 + addsd %xmm2, %xmm4 + cvtsd2ss %xmm4, %xmm4 + movss %xmm4, 2048(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + addsd %xmm3, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_5 + xorl %eax, %eax + movsd .LCPI0_2(%rip), %xmm1 + .align 16, 0x90 +.LBB0_7: + movss %xmm0, 1024(%rsp,%rax,4) + incq %rax + cmpq $256, %rax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_7 + movl $100000000, %eax + .align 16, 0x90 +.LBB0_9: + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_10: + movaps 3072(%rsp,%rcx), %xmm0 + movaps 3088(%rsp,%rcx), %xmm1 + mulps 2064(%rsp,%rcx), %xmm1 + mulps 2048(%rsp,%rcx), %xmm0 + movaps %xmm0, (%rsp,%rcx) + movaps %xmm1, 16(%rsp,%rcx) + addq $32, %rcx + cmpq $1024, %rcx + jne .LBB0_10 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_12: + movaps (%rsp,%rcx), %xmm0 + movaps 16(%rsp,%rcx), %xmm1 + addps 1040(%rsp,%rcx), %xmm1 + addps 1024(%rsp,%rcx), %xmm0 + movaps %xmm0, 3072(%rsp,%rcx) + movaps %xmm1, 3088(%rsp,%rcx) + addq $32, %rcx + cmpq $1024, %rcx + jne .LBB0_12 + decl %eax + jne .LBB0_9 + pxor %xmm0, %xmm0 + xorl %eax, %eax + .align 16, 0x90 +.LBB0_15: + movss 3072(%rsp,%rax,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incq %rax + cmpq $256, %rax + jne .LBB0_15 + movl $.L.str1, %edi + movb $1, %al + callq printf + xorl %eax, %eax +.LBB0_17: + movq %rbp, %rsp + popq %rbp + ret +.Ltmp3: + .size main, .Ltmp3-main +.Leh_func_end0: + + .type .L.str,@object + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 +.L.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + .size .L.str, 76 + + .type .L.str1,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str1: + .asciz "Result %f\n" + .size .L.str1, 11 + + .section .eh_frame,"aw",@progbits +.LEH_frame0: +.Lsection_eh_frame0: +.Leh_frame_common0: +.Lset0 = .Leh_frame_common_end0-.Leh_frame_common_begin0 + .long .Lset0 +.Leh_frame_common_begin0: + .long 0 + .byte 1 + .asciz "zR" + .uleb128 1 + .sleb128 -8 + .byte 16 + .uleb128 1 + .byte 3 + .byte 12 + .uleb128 7 + .uleb128 8 + .byte 144 + .uleb128 1 + .align 8 +.Leh_frame_common_end0: +.Lmain.eh: +.Lset1 = .Leh_frame_end0-.Leh_frame_begin0 + .long .Lset1 +.Leh_frame_begin0: +.Lset2 = .Leh_frame_begin0-.Leh_frame_common0 + .long .Lset2 + .long .Leh_func_begin0 +.Lset3 = .Leh_func_end0-.Leh_func_begin0 + .long .Lset3 + .uleb128 0 + .byte 4 +.Lset4 = .Ltmp0-.Leh_func_begin0 + .long .Lset4 + .byte 14 + .uleb128 16 + .byte 134 + .uleb128 2 + .byte 4 +.Lset5 = .Ltmp1-.Ltmp0 + .long .Lset5 + .byte 13 + .uleb128 6 + .align 8 +.Leh_frame_end0: + + + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,388 @@ + .file "vectors.c" + .section .rodata.str1.8,"aMS",@progbits,1 + .align 8 +.LC1: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC8: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: +.LFB24: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + movq %rsp, %rbp + .cfi_offset 6, -16 + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4096, %rsp + cmpl $2, %edi + jne .L84 + movq 8(%rsi), %rdx + leaq 3072(%rsp), %rcx + movsbl (%rdx), %eax + movsd .LC4(%rip), %xmm0 + cvtsi2sd %eax, %xmm1 + movq %rcx, %rdx + mulsd .LC2(%rip), %xmm1 + leaq 4096(%rsp), %rax + divsd .LC3(%rip), %xmm1 + cvtsd2ss %xmm1, %xmm1 + .p2align 4,,10 + .p2align 3 +.L4: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 4(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 8(%rdx) + cvtss2sd %xmm13, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 12(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 16(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 20(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 24(%rdx) + cvtss2sd %xmm5, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rax, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L4 + leaq 2048(%rsp), %rdx + movsd .LC5(%rip), %xmm4 + movq %rdx, %rsi + movsd .LC6(%rip), %xmm3 + addq $1024, %rsi + movsd .LC7(%rip), %xmm2 + .p2align 4,,10 + .p2align 3 +.L5: + cvtss2sd %xmm1, %xmm10 + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + divsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, (%rdx) + addsd %xmm2, %xmm6 + divsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 4(%rdx) + addsd %xmm2, %xmm14 + divsd %xmm4, %xmm1 + cvtsd2ss %xmm14, %xmm13 + addsd %xmm3, %xmm1 + cvtss2sd %xmm13, %xmm10 + cvtsd2ss %xmm1, %xmm15 + movapd %xmm10, %xmm12 + movss %xmm15, 8(%rdx) + addsd %xmm2, %xmm10 + divsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 12(%rdx) + addsd %xmm2, %xmm6 + divsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 16(%rdx) + divsd %xmm4, %xmm1 + addsd %xmm2, %xmm14 + addsd %xmm3, %xmm1 + cvtsd2ss %xmm14, %xmm13 + cvtsd2ss %xmm1, %xmm15 + cvtss2sd %xmm13, %xmm10 + movss %xmm15, 20(%rdx) + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + divsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 24(%rdx) + divsd %xmm4, %xmm8 + movapd %xmm6, %xmm5 + addsd %xmm3, %xmm8 + addsd %xmm2, %xmm5 + cvtsd2ss %xmm8, %xmm7 + cvtsd2ss %xmm5, %xmm1 + movss %xmm7, 28(%rdx) + addq $32, %rdx + cmpq %rsi, %rdx + jne .L5 + leaq 1024(%rsp), %rdx + movq %rdx, %rsi + addq $1024, %rsi + .p2align 4,,10 + .p2align 3 +.L6: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 4(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 8(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 12(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 16(%rdx) + cvtss2sd %xmm5, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 20(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 24(%rdx) + cvtss2sd %xmm13, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rsi, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L6 + movl $100000000, %esi + .p2align 4,,10 + .p2align 3 +.L9: + xorl %edx, %edx + .p2align 4,,10 + .p2align 3 +.L7: + movaps (%rcx,%rdx), %xmm1 + mulps 2048(%rsp,%rdx), %xmm1 + movaps %xmm1, (%rsp,%rdx) + movaps 16(%rcx,%rdx), %xmm15 + mulps 2064(%rsp,%rdx), %xmm15 + movaps %xmm15, 16(%rsp,%rdx) + movaps 32(%rcx,%rdx), %xmm14 + mulps 2080(%rsp,%rdx), %xmm14 + movaps %xmm14, 32(%rsp,%rdx) + movaps 48(%rcx,%rdx), %xmm13 + mulps 2096(%rsp,%rdx), %xmm13 + movaps %xmm13, 48(%rsp,%rdx) + movaps 64(%rcx,%rdx), %xmm4 + mulps 2112(%rsp,%rdx), %xmm4 + movaps %xmm4, 64(%rsp,%rdx) + movaps 80(%rcx,%rdx), %xmm3 + mulps 2128(%rsp,%rdx), %xmm3 + movaps %xmm3, 80(%rsp,%rdx) + movaps 96(%rcx,%rdx), %xmm2 + mulps 2144(%rsp,%rdx), %xmm2 + movaps %xmm2, 96(%rsp,%rdx) + movaps 112(%rcx,%rdx), %xmm0 + mulps 2160(%rsp,%rdx), %xmm0 + movaps %xmm0, 112(%rsp,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L7 + xorw %dx, %dx + .p2align 4,,10 + .p2align 3 +.L8: + movaps (%rsp,%rdx), %xmm12 + addps 1024(%rsp,%rdx), %xmm12 + movaps %xmm12, (%rcx,%rdx) + movaps 16(%rsp,%rdx), %xmm11 + addps 1040(%rsp,%rdx), %xmm11 + movaps %xmm11, 16(%rcx,%rdx) + movaps 32(%rsp,%rdx), %xmm10 + addps 1056(%rsp,%rdx), %xmm10 + movaps %xmm10, 32(%rcx,%rdx) + movaps 48(%rsp,%rdx), %xmm9 + addps 1072(%rsp,%rdx), %xmm9 + movaps %xmm9, 48(%rcx,%rdx) + movaps 64(%rsp,%rdx), %xmm8 + addps 1088(%rsp,%rdx), %xmm8 + movaps %xmm8, 64(%rcx,%rdx) + movaps 80(%rsp,%rdx), %xmm7 + addps 1104(%rsp,%rdx), %xmm7 + movaps %xmm7, 80(%rcx,%rdx) + movaps 96(%rsp,%rdx), %xmm6 + addps 1120(%rsp,%rdx), %xmm6 + movaps %xmm6, 96(%rcx,%rdx) + movaps 112(%rsp,%rdx), %xmm5 + addps 1136(%rsp,%rdx), %xmm5 + movaps %xmm5, 112(%rcx,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L8 + decl %esi + jne .L9 + cvtss2sd (%rcx), %xmm0 + movq %rax, %rdx + addsd .LC0(%rip), %xmm0 + subq %rcx, %rdx + addq $4, %rcx + subq $4, %rdx + shrq $2, %rdx + andl $7, %edx + cmpq %rax, %rcx + je .L82 + testq %rdx, %rdx + je .L10 + cmpq $1, %rdx + je .L76 + cmpq $2, %rdx + je .L77 + cmpq $3, %rdx + je .L78 + cmpq $4, %rdx + je .L79 + cmpq $5, %rdx + je .L80 + cmpq $6, %rdx + je .L81 + cvtss2sd (%rcx), %xmm6 + addq $4, %rcx + addsd %xmm6, %xmm0 +.L81: + cvtss2sd (%rcx), %xmm7 + addq $4, %rcx + addsd %xmm7, %xmm0 +.L80: + cvtss2sd (%rcx), %xmm8 + addq $4, %rcx + addsd %xmm8, %xmm0 +.L79: + cvtss2sd (%rcx), %xmm9 + addq $4, %rcx + addsd %xmm9, %xmm0 +.L78: + cvtss2sd (%rcx), %xmm10 + addq $4, %rcx + addsd %xmm10, %xmm0 +.L77: + cvtss2sd (%rcx), %xmm11 + addq $4, %rcx + addsd %xmm11, %xmm0 +.L76: + cvtss2sd (%rcx), %xmm12 + addq $4, %rcx + addsd %xmm12, %xmm0 + cmpq %rax, %rcx + je .L82 + .p2align 4,,10 + .p2align 3 +.L10: + cvtss2sd (%rcx), %xmm5 + cvtss2sd 4(%rcx), %xmm1 + cvtss2sd 8(%rcx), %xmm15 + cvtss2sd 12(%rcx), %xmm14 + cvtss2sd 16(%rcx), %xmm13 + cvtss2sd 20(%rcx), %xmm4 + cvtss2sd 24(%rcx), %xmm3 + cvtss2sd 28(%rcx), %xmm2 + addsd %xmm5, %xmm0 + addq $32, %rcx + addsd %xmm1, %xmm0 + cmpq %rax, %rcx + addsd %xmm15, %xmm0 + addsd %xmm14, %xmm0 + addsd %xmm13, %xmm0 + addsd %xmm4, %xmm0 + addsd %xmm3, %xmm0 + addsd %xmm2, %xmm0 + jne .L10 +.L82: + movl $.LC8, %esi + movl $1, %edi + movl $1, %eax + call __printf_chk + xorl %eax, %eax + leave + .cfi_remember_state + .cfi_def_cfa 7, 8 + ret +.L84: + .cfi_restore_state + movq stderr(%rip), %rcx + movl $75, %edx + movl $1, %esi + movl $.LC1, %edi + call fwrite + orl $-1, %eax + leave + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE24: + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC0: + .long 0 + .long 0 + .align 8 +.LC2: + .long 0 + .long 1074266112 + .align 8 +.LC3: + .long 469762048 + .long 1100643759 + .align 8 +.LC4: + .long 3263446751 + .long 1048062144 + .align 8 +.LC5: + .long 0 + .long 1086556160 + .align 8 +.LC6: + .long 0 + .long 1072693248 + .align 8 +.LC7: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,388 @@ + .file "vectors.c" + .section .rodata.str1.8,"aMS",@progbits,1 + .align 8 +.LC1: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC8: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: +.LFB24: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + movq %rsp, %rbp + .cfi_offset 6, -16 + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4096, %rsp + cmpl $2, %edi + jne .L84 + movq 8(%rsi), %rdx + leaq 3072(%rsp), %rcx + movsbl (%rdx), %eax + movsd .LC4(%rip), %xmm0 + cvtsi2sd %eax, %xmm1 + movq %rcx, %rdx + mulsd .LC2(%rip), %xmm1 + leaq 4096(%rsp), %rax + divsd .LC3(%rip), %xmm1 + cvtsd2ss %xmm1, %xmm1 + .p2align 4,,10 + .p2align 3 +.L4: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 4(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 8(%rdx) + cvtss2sd %xmm13, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 12(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 16(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 20(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 24(%rdx) + cvtss2sd %xmm5, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rax, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L4 + leaq 2048(%rsp), %rdx + movsd .LC5(%rip), %xmm4 + movq %rdx, %rsi + movsd .LC6(%rip), %xmm3 + addq $1024, %rsi + movsd .LC7(%rip), %xmm2 + .p2align 4,,10 + .p2align 3 +.L5: + cvtss2sd %xmm1, %xmm10 + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + divsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, (%rdx) + addsd %xmm2, %xmm6 + divsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 4(%rdx) + addsd %xmm2, %xmm14 + divsd %xmm4, %xmm1 + cvtsd2ss %xmm14, %xmm13 + addsd %xmm3, %xmm1 + cvtss2sd %xmm13, %xmm10 + cvtsd2ss %xmm1, %xmm15 + movapd %xmm10, %xmm12 + movss %xmm15, 8(%rdx) + addsd %xmm2, %xmm10 + divsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 12(%rdx) + addsd %xmm2, %xmm6 + divsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 16(%rdx) + divsd %xmm4, %xmm1 + addsd %xmm2, %xmm14 + addsd %xmm3, %xmm1 + cvtsd2ss %xmm14, %xmm13 + cvtsd2ss %xmm1, %xmm15 + cvtss2sd %xmm13, %xmm10 + movss %xmm15, 20(%rdx) + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + divsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 24(%rdx) + divsd %xmm4, %xmm8 + movapd %xmm6, %xmm5 + addsd %xmm3, %xmm8 + addsd %xmm2, %xmm5 + cvtsd2ss %xmm8, %xmm7 + cvtsd2ss %xmm5, %xmm1 + movss %xmm7, 28(%rdx) + addq $32, %rdx + cmpq %rsi, %rdx + jne .L5 + leaq 1024(%rsp), %rdx + movq %rdx, %rsi + addq $1024, %rsi + .p2align 4,,10 + .p2align 3 +.L6: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 4(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 8(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 12(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 16(%rdx) + cvtss2sd %xmm5, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 20(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 24(%rdx) + cvtss2sd %xmm13, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rsi, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L6 + movl $100000000, %esi + .p2align 4,,10 + .p2align 3 +.L7: + xorl %edx, %edx + .p2align 4,,10 + .p2align 3 +.L8: + movaps (%rcx,%rdx), %xmm1 + mulps 2048(%rsp,%rdx), %xmm1 + movaps %xmm1, (%rsp,%rdx) + movaps 16(%rcx,%rdx), %xmm15 + mulps 2064(%rsp,%rdx), %xmm15 + movaps %xmm15, 16(%rsp,%rdx) + movaps 32(%rcx,%rdx), %xmm14 + mulps 2080(%rsp,%rdx), %xmm14 + movaps %xmm14, 32(%rsp,%rdx) + movaps 48(%rcx,%rdx), %xmm13 + mulps 2096(%rsp,%rdx), %xmm13 + movaps %xmm13, 48(%rsp,%rdx) + movaps 64(%rcx,%rdx), %xmm4 + mulps 2112(%rsp,%rdx), %xmm4 + movaps %xmm4, 64(%rsp,%rdx) + movaps 80(%rcx,%rdx), %xmm3 + mulps 2128(%rsp,%rdx), %xmm3 + movaps %xmm3, 80(%rsp,%rdx) + movaps 96(%rcx,%rdx), %xmm2 + mulps 2144(%rsp,%rdx), %xmm2 + movaps %xmm2, 96(%rsp,%rdx) + movaps 112(%rcx,%rdx), %xmm0 + mulps 2160(%rsp,%rdx), %xmm0 + movaps %xmm0, 112(%rsp,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L8 + xorw %dx, %dx + .p2align 4,,10 + .p2align 3 +.L9: + movaps (%rsp,%rdx), %xmm12 + addps 1024(%rsp,%rdx), %xmm12 + movaps %xmm12, (%rcx,%rdx) + movaps 16(%rsp,%rdx), %xmm11 + addps 1040(%rsp,%rdx), %xmm11 + movaps %xmm11, 16(%rcx,%rdx) + movaps 32(%rsp,%rdx), %xmm10 + addps 1056(%rsp,%rdx), %xmm10 + movaps %xmm10, 32(%rcx,%rdx) + movaps 48(%rsp,%rdx), %xmm9 + addps 1072(%rsp,%rdx), %xmm9 + movaps %xmm9, 48(%rcx,%rdx) + movaps 64(%rsp,%rdx), %xmm8 + addps 1088(%rsp,%rdx), %xmm8 + movaps %xmm8, 64(%rcx,%rdx) + movaps 80(%rsp,%rdx), %xmm7 + addps 1104(%rsp,%rdx), %xmm7 + movaps %xmm7, 80(%rcx,%rdx) + movaps 96(%rsp,%rdx), %xmm6 + addps 1120(%rsp,%rdx), %xmm6 + movaps %xmm6, 96(%rcx,%rdx) + movaps 112(%rsp,%rdx), %xmm5 + addps 1136(%rsp,%rdx), %xmm5 + movaps %xmm5, 112(%rcx,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L9 + decl %esi + jne .L7 + cvtss2sd (%rcx), %xmm0 + movq %rax, %rdx + addsd .LC0(%rip), %xmm0 + subq %rcx, %rdx + addq $4, %rcx + subq $4, %rdx + shrq $2, %rdx + andl $7, %edx + cmpq %rax, %rcx + je .L82 + testq %rdx, %rdx + je .L10 + cmpq $1, %rdx + je .L76 + cmpq $2, %rdx + je .L77 + cmpq $3, %rdx + je .L78 + cmpq $4, %rdx + je .L79 + cmpq $5, %rdx + je .L80 + cmpq $6, %rdx + je .L81 + cvtss2sd (%rcx), %xmm6 + addq $4, %rcx + addsd %xmm6, %xmm0 +.L81: + cvtss2sd (%rcx), %xmm7 + addq $4, %rcx + addsd %xmm7, %xmm0 +.L80: + cvtss2sd (%rcx), %xmm8 + addq $4, %rcx + addsd %xmm8, %xmm0 +.L79: + cvtss2sd (%rcx), %xmm9 + addq $4, %rcx + addsd %xmm9, %xmm0 +.L78: + cvtss2sd (%rcx), %xmm10 + addq $4, %rcx + addsd %xmm10, %xmm0 +.L77: + cvtss2sd (%rcx), %xmm11 + addq $4, %rcx + addsd %xmm11, %xmm0 +.L76: + cvtss2sd (%rcx), %xmm12 + addq $4, %rcx + addsd %xmm12, %xmm0 + cmpq %rax, %rcx + je .L82 + .p2align 4,,10 + .p2align 3 +.L10: + cvtss2sd (%rcx), %xmm5 + cvtss2sd 4(%rcx), %xmm1 + cvtss2sd 8(%rcx), %xmm15 + cvtss2sd 12(%rcx), %xmm14 + cvtss2sd 16(%rcx), %xmm13 + cvtss2sd 20(%rcx), %xmm4 + cvtss2sd 24(%rcx), %xmm3 + cvtss2sd 28(%rcx), %xmm2 + addsd %xmm5, %xmm0 + addq $32, %rcx + addsd %xmm1, %xmm0 + cmpq %rax, %rcx + addsd %xmm15, %xmm0 + addsd %xmm14, %xmm0 + addsd %xmm13, %xmm0 + addsd %xmm4, %xmm0 + addsd %xmm3, %xmm0 + addsd %xmm2, %xmm0 + jne .L10 +.L82: + movl $.LC8, %esi + movl $1, %edi + movl $1, %eax + call __printf_chk + xorl %eax, %eax + leave + .cfi_remember_state + .cfi_def_cfa 7, 8 + ret +.L84: + .cfi_restore_state + movq stderr(%rip), %rcx + movl $75, %edx + movl $1, %esi + movl $.LC1, %edi + call fwrite + orl $-1, %eax + leave + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE24: + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC0: + .long 0 + .long 0 + .align 8 +.LC2: + .long 0 + .long 1074266112 + .align 8 +.LC3: + .long 469762048 + .long 1100643759 + .align 8 +.LC4: + .long 3263446751 + .long 1048062144 + .align 8 +.LC5: + .long 0 + .long 1086556160 + .align 8 +.LC6: + .long 0 + .long 1072693248 + .align 8 +.LC7: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss2.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss2.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss2.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,446 @@ + .file "vectors.c" + .section .rodata.str1.8,"aMS",@progbits,1 + .align 8 +.LC0: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC6: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: +.LFB24: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + movq %rsp, %rbp + .cfi_offset 6, -16 + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4096, %rsp + cmpl $2, %edi + jne .L84 + movq 8(%rsi), %rdx + leaq 3072(%rsp), %rcx + movsbl (%rdx), %eax + movsd .LC2(%rip), %xmm0 + cvtsi2sd %eax, %xmm1 + movq %rcx, %rdx + mulsd .LC1(%rip), %xmm1 + leaq 4096(%rsp), %rax + cvtsd2ss %xmm1, %xmm1 + .p2align 4,,10 + .p2align 3 +.L4: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 4(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 8(%rdx) + cvtss2sd %xmm13, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 12(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 16(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 20(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 24(%rdx) + cvtss2sd %xmm5, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rax, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L4 + leaq 2048(%rsp), %rdx + movsd .LC3(%rip), %xmm4 + movq %rdx, %rsi + movsd .LC4(%rip), %xmm3 + addq $1024, %rsi + movsd .LC5(%rip), %xmm2 + .p2align 4,,10 + .p2align 3 +.L5: + cvtss2sd %xmm1, %xmm10 + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + mulsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, (%rdx) + addsd %xmm2, %xmm6 + mulsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 4(%rdx) + addsd %xmm2, %xmm14 + mulsd %xmm4, %xmm1 + cvtsd2ss %xmm14, %xmm13 + addsd %xmm3, %xmm1 + cvtss2sd %xmm13, %xmm10 + cvtsd2ss %xmm1, %xmm15 + movapd %xmm10, %xmm12 + movss %xmm15, 8(%rdx) + addsd %xmm2, %xmm10 + mulsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 12(%rdx) + addsd %xmm2, %xmm6 + mulsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 16(%rdx) + mulsd %xmm4, %xmm1 + addsd %xmm2, %xmm14 + addsd %xmm3, %xmm1 + cvtsd2ss %xmm14, %xmm13 + cvtsd2ss %xmm1, %xmm15 + cvtss2sd %xmm13, %xmm10 + movss %xmm15, 20(%rdx) + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + mulsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 24(%rdx) + mulsd %xmm4, %xmm8 + movapd %xmm6, %xmm5 + addsd %xmm3, %xmm8 + addsd %xmm2, %xmm5 + cvtsd2ss %xmm8, %xmm7 + cvtsd2ss %xmm5, %xmm1 + movss %xmm7, 28(%rdx) + addq $32, %rdx + cmpq %rsi, %rdx + jne .L5 + leaq 1024(%rsp), %rdx + movq %rdx, %rsi + addq $1024, %rsi + .p2align 4,,10 + .p2align 3 +.L6: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 4(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 8(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 12(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 16(%rdx) + cvtss2sd %xmm5, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 20(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 24(%rdx) + cvtss2sd %xmm13, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rsi, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L6 + movl $100000000, %esi + .p2align 4,,10 + .p2align 3 +.L7: + xorl %edx, %edx + .p2align 4,,10 + .p2align 3 +.L8: + movaps (%rcx,%rdx), %xmm1 + mulps 2048(%rsp,%rdx), %xmm1 + movaps %xmm1, (%rsp,%rdx) + movaps 16(%rcx,%rdx), %xmm15 + mulps 2064(%rsp,%rdx), %xmm15 + movaps %xmm15, 16(%rsp,%rdx) + movaps 32(%rcx,%rdx), %xmm14 + mulps 2080(%rsp,%rdx), %xmm14 + movaps %xmm14, 32(%rsp,%rdx) + movaps 48(%rcx,%rdx), %xmm13 + mulps 2096(%rsp,%rdx), %xmm13 + movaps %xmm13, 48(%rsp,%rdx) + movaps 64(%rcx,%rdx), %xmm4 + mulps 2112(%rsp,%rdx), %xmm4 + movaps %xmm4, 64(%rsp,%rdx) + movaps 80(%rcx,%rdx), %xmm3 + mulps 2128(%rsp,%rdx), %xmm3 + movaps %xmm3, 80(%rsp,%rdx) + movaps 96(%rcx,%rdx), %xmm2 + mulps 2144(%rsp,%rdx), %xmm2 + movaps %xmm2, 96(%rsp,%rdx) + movaps 112(%rcx,%rdx), %xmm0 + mulps 2160(%rsp,%rdx), %xmm0 + movaps %xmm0, 112(%rsp,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L8 + xorw %dx, %dx + .p2align 4,,10 + .p2align 3 +.L9: + movaps (%rsp,%rdx), %xmm12 + addps 1024(%rsp,%rdx), %xmm12 + movaps %xmm12, (%rcx,%rdx) + movaps 16(%rsp,%rdx), %xmm11 + addps 1040(%rsp,%rdx), %xmm11 + movaps %xmm11, 16(%rcx,%rdx) + movaps 32(%rsp,%rdx), %xmm10 + addps 1056(%rsp,%rdx), %xmm10 + movaps %xmm10, 32(%rcx,%rdx) + movaps 48(%rsp,%rdx), %xmm9 + addps 1072(%rsp,%rdx), %xmm9 + movaps %xmm9, 48(%rcx,%rdx) + movaps 64(%rsp,%rdx), %xmm8 + addps 1088(%rsp,%rdx), %xmm8 + movaps %xmm8, 64(%rcx,%rdx) + movaps 80(%rsp,%rdx), %xmm7 + addps 1104(%rsp,%rdx), %xmm7 + movaps %xmm7, 80(%rcx,%rdx) + movaps 96(%rsp,%rdx), %xmm6 + addps 1120(%rsp,%rdx), %xmm6 + movaps %xmm6, 96(%rcx,%rdx) + movaps 112(%rsp,%rdx), %xmm5 + addps 1136(%rsp,%rdx), %xmm5 + movaps %xmm5, 112(%rcx,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L9 + decl %esi + jne .L7 + movaps (%rcx), %xmm0 + movq %rax, %rdx + cvtps2pd %xmm0, %xmm2 + subq %rcx, %rdx + xorps %xmm1, %xmm1 + subq $16, %rdx + movhlps %xmm0, %xmm1 + shrq $4, %rdx + cvtps2pd %xmm1, %xmm0 + andl $7, %edx + addq $16, %rcx + addpd %xmm2, %xmm0 + cmpq %rax, %rcx + je .L82 + testq %rdx, %rdx + je .L11 + cmpq $1, %rdx + je .L76 + cmpq $2, %rdx + je .L77 + cmpq $3, %rdx + je .L78 + cmpq $4, %rdx + je .L79 + cmpq $5, %rdx + je .L80 + cmpq $6, %rdx + je .L81 + movaps (%rcx), %xmm9 + addq $16, %rcx + cvtps2pd %xmm9, %xmm8 + movhlps %xmm9, %xmm1 + addpd %xmm0, %xmm8 + cvtps2pd %xmm1, %xmm0 + addpd %xmm8, %xmm0 +.L81: + movaps (%rcx), %xmm11 + addq $16, %rcx + cvtps2pd %xmm11, %xmm10 + movhlps %xmm11, %xmm1 + addpd %xmm0, %xmm10 + cvtps2pd %xmm1, %xmm0 + addpd %xmm10, %xmm0 +.L80: + movaps (%rcx), %xmm2 + addq $16, %rcx + cvtps2pd %xmm2, %xmm12 + movhlps %xmm2, %xmm1 + addpd %xmm0, %xmm12 + cvtps2pd %xmm1, %xmm0 + addpd %xmm12, %xmm0 +.L79: + movaps (%rcx), %xmm3 + addq $16, %rcx + cvtps2pd %xmm3, %xmm4 + movhlps %xmm3, %xmm1 + addpd %xmm0, %xmm4 + cvtps2pd %xmm1, %xmm0 + addpd %xmm4, %xmm0 +.L78: + movaps (%rcx), %xmm14 + addq $16, %rcx + cvtps2pd %xmm14, %xmm13 + movhlps %xmm14, %xmm1 + addpd %xmm0, %xmm13 + cvtps2pd %xmm1, %xmm0 + addpd %xmm13, %xmm0 +.L77: + movaps (%rcx), %xmm5 + addq $16, %rcx + cvtps2pd %xmm5, %xmm15 + movhlps %xmm5, %xmm1 + addpd %xmm0, %xmm15 + cvtps2pd %xmm1, %xmm0 + addpd %xmm15, %xmm0 +.L76: + movaps (%rcx), %xmm7 + addq $16, %rcx + cvtps2pd %xmm7, %xmm6 + movhlps %xmm7, %xmm1 + addpd %xmm0, %xmm6 + cmpq %rax, %rcx + cvtps2pd %xmm1, %xmm0 + addpd %xmm6, %xmm0 + je .L82 + .p2align 4,,10 + .p2align 3 +.L11: + movaps (%rcx), %xmm7 + movaps 16(%rcx), %xmm15 + movhlps %xmm7, %xmm1 + movaps 32(%rcx), %xmm3 + movaps 48(%rcx), %xmm2 + movaps 64(%rcx), %xmm10 + cvtps2pd %xmm7, %xmm6 + cvtps2pd %xmm1, %xmm5 + addpd %xmm0, %xmm6 + movaps 80(%rcx), %xmm7 + addpd %xmm6, %xmm5 + cvtps2pd %xmm15, %xmm14 + cvtps2pd %xmm3, %xmm4 + addpd %xmm5, %xmm14 + cvtps2pd %xmm2, %xmm12 + movaps %xmm1, %xmm5 + cvtps2pd %xmm10, %xmm9 + movhlps %xmm15, %xmm5 + cvtps2pd %xmm7, %xmm1 + movaps 96(%rcx), %xmm15 + cvtps2pd %xmm5, %xmm13 + movhlps %xmm3, %xmm5 + addpd %xmm14, %xmm13 + movaps 112(%rcx), %xmm3 + addpd %xmm13, %xmm4 + cvtps2pd %xmm5, %xmm0 + cvtps2pd %xmm15, %xmm14 + addpd %xmm4, %xmm0 + movhlps %xmm2, %xmm5 + addpd %xmm0, %xmm12 + cvtps2pd %xmm5, %xmm11 + cvtps2pd %xmm3, %xmm4 + movhlps %xmm10, %xmm5 + addpd %xmm12, %xmm11 + cvtps2pd %xmm5, %xmm8 + addpd %xmm11, %xmm9 + movhlps %xmm7, %xmm5 + addpd %xmm9, %xmm8 + cvtps2pd %xmm5, %xmm6 + addpd %xmm8, %xmm1 + subq $-128, %rcx + addpd %xmm1, %xmm6 + cmpq %rax, %rcx + addpd %xmm6, %xmm14 + movaps %xmm5, %xmm1 + movhlps %xmm15, %xmm1 + cvtps2pd %xmm1, %xmm13 + movhlps %xmm3, %xmm1 + addpd %xmm14, %xmm13 + cvtps2pd %xmm1, %xmm0 + addpd %xmm13, %xmm4 + addpd %xmm4, %xmm0 + jne .L11 +.L82: + haddpd %xmm0, %xmm0 + movl $.LC6, %esi + movl $1, %edi + movl $1, %eax + call __printf_chk + xorl %eax, %eax + leave + .cfi_remember_state + .cfi_def_cfa 7, 8 + ret +.L84: + .cfi_restore_state + movq stderr(%rip), %rcx + movl $75, %edx + movl $1, %esi + movl $.LC0, %edi + call fwrite + orl $-1, %eax + leave + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE24: + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC1: + .long 406720020 + .long 1046281652 + .align 8 +.LC2: + .long 3263446751 + .long 1048062144 + .align 8 +.LC3: + .long 3944497965 + .long 1058682594 + .align 8 +.LC4: + .long 0 + .long 1072693248 + .align 8 +.LC5: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss3.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss3.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect4.ss3.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,446 @@ + .file "vectors.c" + .section .rodata.str1.8,"aMS",@progbits,1 + .align 8 +.LC0: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC6: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: +.LFB24: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + movq %rsp, %rbp + .cfi_offset 6, -16 + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4096, %rsp + cmpl $2, %edi + jne .L84 + movq 8(%rsi), %rdx + leaq 3072(%rsp), %rcx + movsbl (%rdx), %eax + movsd .LC2(%rip), %xmm0 + cvtsi2sd %eax, %xmm1 + movq %rcx, %rdx + mulsd .LC1(%rip), %xmm1 + leaq 4096(%rsp), %rax + cvtsd2ss %xmm1, %xmm1 + .p2align 4,,10 + .p2align 3 +.L4: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 4(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 8(%rdx) + cvtss2sd %xmm13, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 12(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 16(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 20(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 24(%rdx) + cvtss2sd %xmm5, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rax, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L4 + leaq 2048(%rsp), %rdx + movsd .LC3(%rip), %xmm4 + movq %rdx, %rsi + movsd .LC4(%rip), %xmm3 + addq $1024, %rsi + movsd .LC5(%rip), %xmm2 + .p2align 4,,10 + .p2align 3 +.L5: + cvtss2sd %xmm1, %xmm10 + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + mulsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, (%rdx) + addsd %xmm2, %xmm6 + mulsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 4(%rdx) + addsd %xmm2, %xmm14 + mulsd %xmm4, %xmm1 + cvtsd2ss %xmm14, %xmm13 + addsd %xmm3, %xmm1 + cvtss2sd %xmm13, %xmm10 + cvtsd2ss %xmm1, %xmm15 + movapd %xmm10, %xmm12 + movss %xmm15, 8(%rdx) + addsd %xmm2, %xmm10 + mulsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 12(%rdx) + addsd %xmm2, %xmm6 + mulsd %xmm4, %xmm8 + cvtsd2ss %xmm6, %xmm5 + addsd %xmm3, %xmm8 + cvtss2sd %xmm5, %xmm14 + cvtsd2ss %xmm8, %xmm7 + movapd %xmm14, %xmm1 + movss %xmm7, 16(%rdx) + mulsd %xmm4, %xmm1 + addsd %xmm2, %xmm14 + addsd %xmm3, %xmm1 + cvtsd2ss %xmm14, %xmm13 + cvtsd2ss %xmm1, %xmm15 + cvtss2sd %xmm13, %xmm10 + movss %xmm15, 20(%rdx) + movapd %xmm10, %xmm12 + addsd %xmm2, %xmm10 + mulsd %xmm4, %xmm12 + cvtsd2ss %xmm10, %xmm9 + addsd %xmm3, %xmm12 + cvtss2sd %xmm9, %xmm6 + cvtsd2ss %xmm12, %xmm11 + movapd %xmm6, %xmm8 + movss %xmm11, 24(%rdx) + mulsd %xmm4, %xmm8 + movapd %xmm6, %xmm5 + addsd %xmm3, %xmm8 + addsd %xmm2, %xmm5 + cvtsd2ss %xmm8, %xmm7 + cvtsd2ss %xmm5, %xmm1 + movss %xmm7, 28(%rdx) + addq $32, %rdx + cmpq %rsi, %rdx + jne .L5 + leaq 1024(%rsp), %rdx + movq %rdx, %rsi + addq $1024, %rsi + .p2align 4,,10 + .p2align 3 +.L6: + movss %xmm1, (%rdx) + cvtss2sd %xmm1, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 4(%rdx) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 8(%rdx) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 12(%rdx) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 16(%rdx) + cvtss2sd %xmm5, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 20(%rdx) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 24(%rdx) + cvtss2sd %xmm13, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rdx) + cvtss2sd %xmm3, %xmm2 + addq $32, %rdx + addsd %xmm0, %xmm2 + cmpq %rsi, %rdx + cvtsd2ss %xmm2, %xmm1 + jne .L6 + movl $100000000, %esi + .p2align 4,,10 + .p2align 3 +.L7: + xorl %edx, %edx + .p2align 4,,10 + .p2align 3 +.L8: + movaps (%rcx,%rdx), %xmm1 + mulps 2048(%rsp,%rdx), %xmm1 + movaps %xmm1, (%rsp,%rdx) + movaps 16(%rcx,%rdx), %xmm15 + mulps 2064(%rsp,%rdx), %xmm15 + movaps %xmm15, 16(%rsp,%rdx) + movaps 32(%rcx,%rdx), %xmm14 + mulps 2080(%rsp,%rdx), %xmm14 + movaps %xmm14, 32(%rsp,%rdx) + movaps 48(%rcx,%rdx), %xmm13 + mulps 2096(%rsp,%rdx), %xmm13 + movaps %xmm13, 48(%rsp,%rdx) + movaps 64(%rcx,%rdx), %xmm4 + mulps 2112(%rsp,%rdx), %xmm4 + movaps %xmm4, 64(%rsp,%rdx) + movaps 80(%rcx,%rdx), %xmm3 + mulps 2128(%rsp,%rdx), %xmm3 + movaps %xmm3, 80(%rsp,%rdx) + movaps 96(%rcx,%rdx), %xmm2 + mulps 2144(%rsp,%rdx), %xmm2 + movaps %xmm2, 96(%rsp,%rdx) + movaps 112(%rcx,%rdx), %xmm0 + mulps 2160(%rsp,%rdx), %xmm0 + movaps %xmm0, 112(%rsp,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L8 + xorw %dx, %dx + .p2align 4,,10 + .p2align 3 +.L9: + movaps (%rsp,%rdx), %xmm12 + addps 1024(%rsp,%rdx), %xmm12 + movaps %xmm12, (%rcx,%rdx) + movaps 16(%rsp,%rdx), %xmm11 + addps 1040(%rsp,%rdx), %xmm11 + movaps %xmm11, 16(%rcx,%rdx) + movaps 32(%rsp,%rdx), %xmm10 + addps 1056(%rsp,%rdx), %xmm10 + movaps %xmm10, 32(%rcx,%rdx) + movaps 48(%rsp,%rdx), %xmm9 + addps 1072(%rsp,%rdx), %xmm9 + movaps %xmm9, 48(%rcx,%rdx) + movaps 64(%rsp,%rdx), %xmm8 + addps 1088(%rsp,%rdx), %xmm8 + movaps %xmm8, 64(%rcx,%rdx) + movaps 80(%rsp,%rdx), %xmm7 + addps 1104(%rsp,%rdx), %xmm7 + movaps %xmm7, 80(%rcx,%rdx) + movaps 96(%rsp,%rdx), %xmm6 + addps 1120(%rsp,%rdx), %xmm6 + movaps %xmm6, 96(%rcx,%rdx) + movaps 112(%rsp,%rdx), %xmm5 + addps 1136(%rsp,%rdx), %xmm5 + movaps %xmm5, 112(%rcx,%rdx) + subq $-128, %rdx + cmpq $1024, %rdx + jne .L9 + decl %esi + jne .L7 + movaps (%rcx), %xmm0 + movq %rax, %rdx + cvtps2pd %xmm0, %xmm2 + subq %rcx, %rdx + xorps %xmm1, %xmm1 + subq $16, %rdx + movhlps %xmm0, %xmm1 + shrq $4, %rdx + cvtps2pd %xmm1, %xmm0 + andl $7, %edx + addq $16, %rcx + addpd %xmm2, %xmm0 + cmpq %rax, %rcx + je .L82 + testq %rdx, %rdx + je .L11 + cmpq $1, %rdx + je .L76 + cmpq $2, %rdx + je .L77 + cmpq $3, %rdx + je .L78 + cmpq $4, %rdx + je .L79 + cmpq $5, %rdx + je .L80 + cmpq $6, %rdx + je .L81 + movaps (%rcx), %xmm9 + addq $16, %rcx + cvtps2pd %xmm9, %xmm8 + movhlps %xmm9, %xmm1 + addpd %xmm0, %xmm8 + cvtps2pd %xmm1, %xmm0 + addpd %xmm8, %xmm0 +.L81: + movaps (%rcx), %xmm11 + addq $16, %rcx + cvtps2pd %xmm11, %xmm10 + movhlps %xmm11, %xmm1 + addpd %xmm0, %xmm10 + cvtps2pd %xmm1, %xmm0 + addpd %xmm10, %xmm0 +.L80: + movaps (%rcx), %xmm2 + addq $16, %rcx + cvtps2pd %xmm2, %xmm12 + movhlps %xmm2, %xmm1 + addpd %xmm0, %xmm12 + cvtps2pd %xmm1, %xmm0 + addpd %xmm12, %xmm0 +.L79: + movaps (%rcx), %xmm3 + addq $16, %rcx + cvtps2pd %xmm3, %xmm4 + movhlps %xmm3, %xmm1 + addpd %xmm0, %xmm4 + cvtps2pd %xmm1, %xmm0 + addpd %xmm4, %xmm0 +.L78: + movaps (%rcx), %xmm14 + addq $16, %rcx + cvtps2pd %xmm14, %xmm13 + movhlps %xmm14, %xmm1 + addpd %xmm0, %xmm13 + cvtps2pd %xmm1, %xmm0 + addpd %xmm13, %xmm0 +.L77: + movaps (%rcx), %xmm5 + addq $16, %rcx + cvtps2pd %xmm5, %xmm15 + movhlps %xmm5, %xmm1 + addpd %xmm0, %xmm15 + cvtps2pd %xmm1, %xmm0 + addpd %xmm15, %xmm0 +.L76: + movaps (%rcx), %xmm7 + addq $16, %rcx + cvtps2pd %xmm7, %xmm6 + movhlps %xmm7, %xmm1 + addpd %xmm0, %xmm6 + cmpq %rax, %rcx + cvtps2pd %xmm1, %xmm0 + addpd %xmm6, %xmm0 + je .L82 + .p2align 4,,10 + .p2align 3 +.L11: + movaps (%rcx), %xmm7 + movaps 16(%rcx), %xmm15 + movhlps %xmm7, %xmm1 + movaps 32(%rcx), %xmm3 + movaps 48(%rcx), %xmm2 + movaps 64(%rcx), %xmm10 + cvtps2pd %xmm7, %xmm6 + cvtps2pd %xmm1, %xmm5 + addpd %xmm0, %xmm6 + movaps 80(%rcx), %xmm7 + addpd %xmm6, %xmm5 + cvtps2pd %xmm15, %xmm14 + cvtps2pd %xmm3, %xmm4 + addpd %xmm5, %xmm14 + cvtps2pd %xmm2, %xmm12 + movaps %xmm1, %xmm5 + cvtps2pd %xmm10, %xmm9 + movhlps %xmm15, %xmm5 + cvtps2pd %xmm7, %xmm1 + movaps 96(%rcx), %xmm15 + cvtps2pd %xmm5, %xmm13 + movhlps %xmm3, %xmm5 + addpd %xmm14, %xmm13 + movaps 112(%rcx), %xmm3 + addpd %xmm13, %xmm4 + cvtps2pd %xmm5, %xmm0 + cvtps2pd %xmm15, %xmm14 + addpd %xmm4, %xmm0 + movhlps %xmm2, %xmm5 + addpd %xmm0, %xmm12 + cvtps2pd %xmm5, %xmm11 + cvtps2pd %xmm3, %xmm4 + movhlps %xmm10, %xmm5 + addpd %xmm12, %xmm11 + cvtps2pd %xmm5, %xmm8 + addpd %xmm11, %xmm9 + movhlps %xmm7, %xmm5 + addpd %xmm9, %xmm8 + cvtps2pd %xmm5, %xmm6 + addpd %xmm8, %xmm1 + subq $-128, %rcx + addpd %xmm1, %xmm6 + cmpq %rax, %rcx + addpd %xmm6, %xmm14 + movaps %xmm5, %xmm1 + movhlps %xmm15, %xmm1 + cvtps2pd %xmm1, %xmm13 + movhlps %xmm3, %xmm1 + addpd %xmm14, %xmm13 + cvtps2pd %xmm1, %xmm0 + addpd %xmm13, %xmm4 + addpd %xmm4, %xmm0 + jne .L11 +.L82: + haddpd %xmm0, %xmm0 + movl $.LC6, %esi + movl $1, %edi + movl $1, %eax + call __printf_chk + xorl %eax, %eax + leave + .cfi_remember_state + .cfi_def_cfa 7, 8 + ret +.L84: + .cfi_restore_state + movq stderr(%rip), %rcx + movl $75, %edx + movl $1, %esi + movl $.LC0, %edi + call fwrite + orl $-1, %eax + leave + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE24: + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC1: + .long 406720020 + .long 1046281652 + .align 8 +.LC2: + .long 3263446751 + .long 1048062144 + .align 8 +.LC3: + .long 3944497965 + .long 1058682594 + .align 8 +.LC4: + .long 0 + .long 1072693248 + .align 8 +.LC5: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect8.s =================================================================== --- trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect8.s (rev 0) +++ trunk/extra/tests/ubuntu-64bit.asm/vectors.gcc.4.5.2.vect8.s 2011-07-26 09:49:43 UTC (rev 354) @@ -0,0 +1,455 @@ + .file "vectors.c" + .section .rodata.str1.8,"aMS",@progbits,1 + .align 8 +.LC1: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC8: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: +.LFB24: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + movq %rsp, %rbp + .cfi_offset 6, -16 + .cfi_def_cfa_register 6 + andq $-32, %rsp + subq $4352, %rsp + cmpl $2, %edi + jne .L72 + movq 8(%rsi), %rdx + movsd .LC4(%rip), %xmm0 + movsbl (%rdx), %eax + leaq 3136(%rsp), %rdx + cvtsi2sd %eax, %xmm1 + leaq 1024(%rdx), %r8 + mulsd .LC2(%rip), %xmm1 + movq %rdx, %rax + divsd .LC3(%rip), %xmm1 + cvtsd2ss %xmm1, %xmm1 +.L4: + movss %xmm1, (%rax) + cvtss2sd %xmm1, %xmm1 + addsd %xmm0, %xmm1 + cvtsd2ss %xmm1, %xmm15 + movss %xmm15, 4(%rax) + cvtss2sd %xmm15, %xmm14 + addsd %xmm0, %xmm14 + cvtsd2ss %xmm14, %xmm13 + movss %xmm13, 8(%rax) + cvtss2sd %xmm13, %xmm12 + addsd %xmm0, %xmm12 + cvtsd2ss %xmm12, %xmm11 + movss %xmm11, 12(%rax) + cvtss2sd %xmm11, %xmm10 + addsd %xmm0, %xmm10 + cvtsd2ss %xmm10, %xmm9 + movss %xmm9, 16(%rax) + cvtss2sd %xmm9, %xmm8 + addsd %xmm0, %xmm8 + cvtsd2ss %xmm8, %xmm7 + movss %xmm7, 20(%rax) + cvtss2sd %xmm7, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm5 + movss %xmm5, 24(%rax) + cvtss2sd %xmm5, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rax) + cvtss2sd %xmm3, %xmm2 + addq $32, %rax + addsd %xmm0, %xmm2 + cmpq %r8, %rax + cvtsd2ss %xmm2, %xmm1 + jne .L4 + leaq 2112(%rsp), %rax + movsd .LC5(%rip), %xmm4 + movq %rax, %rcx + movsd .LC6(%rip), %xmm3 + addq $1024, %rcx + movsd .LC7(%rip), %xmm2 +.L5: + cvtss2sd %xmm1, %xmm9 + movapd %xmm9, %xmm11 + addsd %xmm2, %xmm9 + divsd %xmm4, %xmm11 + cvtsd2ss %xmm9, %xmm8 + addsd %xmm3, %xmm11 + cvtss2sd %xmm8, %xmm6 + cvtsd2ss %xmm11, %xmm10 + movapd %xmm6, %xmm7 + movss %xmm10, (%rax) + addsd %xmm2, %xmm6 + divsd %xmm4, %xmm7 + cvtsd2ss %xmm6, %xmm1 + addsd %xmm3, %xmm7 + cvtss2sd %xmm1, %xmm13 + cvtsd2ss %xmm7, %xmm5 + movapd %xmm13, %xmm15 + movss %xmm5, 4(%rax) + addsd %xmm2, %xmm13 + divsd %xmm4, %xmm15 + cvtsd2ss %xmm13, %xmm12 + addsd %xmm3, %xmm15 + cvtss2sd %xmm12, %xmm9 + cvtsd2ss %xmm15, %xmm14 + movapd %xmm9, %xmm11 + movss %xmm14, 8(%rax) + addsd %xmm2, %xmm9 + divsd %xmm4, %xmm11 + cvtsd2ss %xmm9, %xmm8 + addsd %xmm3, %xmm11 + cvtss2sd %xmm8, %xmm6 + cvtsd2ss %xmm11, %xmm10 + movapd %xmm6, %xmm7 + movss %xmm10, 12(%rax) + addsd %xmm2, %xmm6 + divsd %xmm4, %xmm7 + cvtsd2ss %xmm6, %xmm1 + addsd %xmm3, %xmm7 + cvtss2sd %xmm1, %xmm13 + cvtsd2ss %xmm7, %xmm5 + movapd %xmm13, %xmm15 + movss %xmm5, 16(%rax) + divsd %xmm4, %xmm15 + addsd %xmm2, %xmm13 + addsd %xmm3, %xmm15 + cvtsd2ss %xmm13, %xmm12 + cvtsd2ss %xmm15, %xmm14 + cvtss2sd %xmm12, %xmm9 + movss %xmm14, 20(%rax) + movapd %xmm9, %xmm11 + addsd %xmm2, %xmm9 + divsd %xmm4, %xmm11 + cvtsd2ss %xmm9, %xmm8 + addsd %xmm3, %xmm11 + cvtss2sd %xmm8, %xmm6 + cvtsd2ss %xmm11, %xmm10 + movapd %xmm6, %xmm7 + movss %xmm10, 24(%rax) + divsd %xmm4, %xmm7 + addsd %xmm2, %xmm6 + addsd %xmm3, %xmm7 + cvtsd2ss %xmm6, %xmm1 + cvtsd2ss %xmm7, %xmm5 + movss %xmm5, 28(%rax) + addq $32, %rax + cmpq %rcx, %rax + jne .L5 + leaq 1088(%rsp), %rax + movq %rax, %rcx + addq $1024, %rcx +.L6: + movss %xmm1, (%rax) + cvtss2sd %xmm1, %xmm11 + addsd %xmm0, %xmm11 + cvtsd2ss %xmm11, %xmm10 + movss %xmm10, 4(%rax) + cvtss2sd %xmm10, %xmm9 + addsd %xmm0, %xmm9 + cvtsd2ss %xmm9, %xmm8 + movss %xmm8, 8(%rax) + cvtss2sd %xmm8, %xmm7 + addsd %xmm0, %xmm7 + cvtsd2ss %xmm7, %xmm5 + movss %xmm5, 12(%rax) + cvtss2sd %xmm5, %xmm6 + addsd %xmm0, %xmm6 + cvtsd2ss %xmm6, %xmm1 + movss %xmm1, 16(%rax) + cvtss2sd %xmm1, %xmm15 + addsd %xmm0, %xmm15 + cvtsd2ss %xmm15, %xmm14 + movss %xmm14, 20(%rax) + cvtss2sd %xmm14, %xmm13 + addsd %xmm0, %xmm13 + cvtsd2ss %xmm13, %xmm12 + movss %xmm12, 24(%rax) + cvtss2sd %xmm12, %xmm4 + addsd %xmm0, %xmm4 + cvtsd2ss %xmm4, %xmm3 + movss %xmm3, 28(%rax) + cvtss2sd %xmm3, %xmm2 + addq $32, %rax + addsd %xmm0, %xmm2 + cmpq %rcx, %rax + cvtsd2ss %xmm2, %xmm1 + jne .L6 + movl $100000000, %edi + .p2align 4,,10 + .p2align 3 +.L7: + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L8: + movq (%rdx,%rax), %r10 + movq %r10, 4320(%rsp) + movq 8(%rdx,%rax), %r9 + movq %r9, 4328(%rsp) + movq 16(%rdx,%rax), %rcx + movaps 4320(%rsp), %xmm4 + movq %rcx, 4336(%rsp) + movq 24(%rdx,%rax), %rsi + movq %rsi, 4344(%rsp) + movq 2112(%rsp,%rax), %r11 + movaps 4336(%rsp), %xmm3 + movq %r11, 4288(%rsp) + movq 2120(%rsp,%rax), %r10 + movq %r10, 4296(%rsp) + movq 2128(%rsp,%rax), %r9 + mulps 4288(%rsp), %xmm4 + movq %r9, 4304(%rsp) + movq 2136(%rsp,%rax), %rcx + movaps %xmm4, 32(%rsp) + movq %rcx, 4312(%rsp) + movaps %xmm4, 16(%rsp) + mulps 4304(%rsp), %xmm3 + movq 16(%rsp), %rsi + movaps %xmm3, 48(%rsp) + movq %rsi, 64(%rsp,%rax) + movq 40(%rsp), %r11 + movq %r11, 72(%rsp,%rax) + movq 48(%rsp), %r10 + movq %r10, 80(%rsp,%rax) + movq 56(%rsp), %r9 + movq %r9, 88(%rsp,%rax) + movq 32(%rdx,%rax), %rcx + movq %rcx, 4320(%rsp) + movq 40(%rdx,%rax), %rsi + movq %rsi, 4328(%rsp) + movq 48(%rdx,%rax), %r11 + movaps 4320(%rsp), %xmm2 + movq %r11, 4336(%rsp) + movq 56(%rdx,%rax), %r10 + movq %r10, 4344(%rsp) + movq 2144(%rsp,%rax), %r9 + movaps 4336(%rsp), %xmm0 + movq %r9, 4288(%rsp) + movq 2152(%rsp,%rax), %rcx + movq %rcx, 4296(%rsp) + movq 2160(%rsp,%rax), %rsi + mulps 4288(%rsp), %xmm2 + movq %rsi, 4304(%rsp) + movq 2168(%rsp,%rax), %r11 + movaps %xmm2, 16(%rsp) + movq %r11, 4312(%rsp) + movaps %xmm2, 32(%rsp) + mulps 4304(%rsp), %xmm0 + movq 16(%rsp), %r10 + movaps %xmm0, 48(%rsp) + movq %r10, 96(%rsp,%rax) + movq 40(%rsp), %r9 + movq %r9, 104(%rsp,%rax) + movq 48(%rsp), %rcx + movq %rcx, 112(%rsp,%rax) + movq 56(%rsp), %rsi + movq %rsi, 120(%rsp,%rax) + addq $64, %rax + cmpq $1024, %rax + jne .L8 + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L9: + movq 64(%rsp,%rax), %r9 + movq %r9, 4224(%rsp) + movq 72(%rsp,%rax), %rcx + movq %rcx, 4232(%rsp) + movq 80(%rsp,%rax), %rsi + movaps 4224(%rsp), %xmm15 + movq %rsi, 4240(%rsp) + movq 88(%rsp,%rax), %r11 + movq %r11, 4248(%rsp) + movq 1088(%rsp,%rax), %r10 + movaps 4240(%rsp), %xmm14 + movq %r10, 4192(%rsp) + movq 1096(%rsp,%rax), %r9 + movq %r9, 4200(%rsp) + movq 1104(%rsp,%rax), %rcx + addps 4192(%rsp), %xmm15 + movq %rcx, 4208(%rsp) + movq 1112(%rsp,%rax), %rsi + movaps %xmm15, 32(%rsp) + movq %rsi, 4216(%rsp) + movaps %xmm15, 16(%rsp) + addps 4208(%rsp), %xmm14 + movq 16(%rsp), %r11 + movaps %xmm14, 48(%rsp) + movq %r11, (%rdx,%rax) + movq 40(%rsp), %r10 + movq %r10, 8(%rdx,%rax) + movq 48(%rsp), %r9 + movq %r9, 16(%rdx,%rax) + movq 56(%rsp), %rcx + movq %rcx, 24(%rdx,%rax) + movq 96(%rsp,%rax), %rsi + movq %rsi, 4224(%rsp) + movq 104(%rsp,%rax), %r11 + movq %r11, 4232(%rsp) + movq 112(%rsp,%rax), %r10 + movaps 4224(%rsp), %xmm13 + movq %r10, 4240(%rsp) + movq 120(%rsp,%rax), %r9 + movq %r9, 4248(%rsp) + movq 1120(%rsp,%rax), %rcx + movaps 4240(%rsp), %xmm12 + movq %rcx, 4192(%rsp) + movq 1128(%rsp,%rax), %rsi + movq %rsi, 4200(%rsp) + movq 1136(%rsp,%rax), %r11 + addps 4192(%rsp), %xmm13 + movq %r11, 4208(%rsp) + movq 1144(%rsp,%rax), %r10 + movaps %xmm13, 16(%rsp) + movq %r10, 4216(%rsp) + movaps %xmm13, 32(%rsp) + addps 4208(%rsp), %xmm12 + movq 16(%rsp), %r9 + movaps %xmm12, 48(%rsp) + movq %r9, 32(%rdx,%rax) + movq 40(%rsp), %rcx + movq %rcx, 40(%rdx,%rax) + movq 48(%rsp), %rsi + movq %rsi, 48(%rdx,%rax) + movq 56(%rsp), %r11 + movq %r11, 56(%rdx,%rax) + addq $64, %rax + cmpq $1024, %rax + jne .L9 + decl %edi + jne .L7 + cvtss2sd (%rdx), %xmm0 + movq %r8, %rax + addsd .LC0(%rip), %xmm0 + subq %rdx, %rax + addq $4, %rdx + subq $4, %rax + shrq $2, %rax + andl $7, %eax + cmpq %r8, %rdx + je .L70 + testq %rax, %rax + je .L10 + cmpq $1, %rax + je .L64 + cmpq $2, %rax + je .L65 + cmpq $3, %rax + je .L66 + cmpq $4, %rax + je .L67 + cmpq $5, %rax + je .L68 + cmpq $6, %rax + je .L69 + cvtss2sd (%rdx), %xmm2 + addq $4, %rdx + addsd %xmm2, %xmm0 +.L69: + cvtss2sd (%rdx), %xmm3 + addq $4, %rdx + addsd %xmm3, %xmm0 +.L68: + cvtss2sd (%rdx), %xmm4 + addq $4, %rdx + addsd %xmm4, %xmm0 +.L67: + cvtss2sd (%rdx), %xmm12 + addq $4, %rdx + addsd %xmm12, %xmm0 +.L66: + cvtss2sd (%rdx), %xmm13 + addq $4, %rdx + addsd %xmm13, %xmm0 +.L65: + cvtss2sd (%rdx), %xmm14 + addq $4, %rdx + addsd %xmm14, %xmm0 +.L64: + cvtss2sd (%rdx), %xmm15 + addq $4, %rdx + addsd %xmm15, %xmm0 + cmpq %r8, %rdx + je .L70 +.L10: + cvtss2sd (%rdx), %xmm11 + cvtss2sd 4(%rdx), %xmm10 + cvtss2sd 8(%rdx), %xmm9 + cvtss2sd 12(%rdx), %xmm8 + cvtss2sd 16(%rdx), %xmm7 + cvtss2sd 20(%rdx), %xmm5 + cvtss2sd 24(%rdx), %xmm6 + cvtss2sd 28(%rdx), %xmm1 + addsd %xmm11, %xmm0 + addq $32, %rdx + addsd %xmm10, %xmm0 + cmpq %r8, %rdx + addsd %xmm9, %xmm0 + addsd %xmm8, %xmm0 + addsd %xmm7, %xmm0 + addsd %xmm5, %xmm0 + addsd %xmm6, %xmm0 + addsd %xmm1, %xmm0 + jne .L10 +.L70: + movl $.LC8, %esi + movl $1, %edi + movl $1, %eax + call __printf_chk + xorl %eax, %eax + leave + .cfi_remember_state + .cfi_def_cfa 7, 8 + ret +.L72: + .cfi_restore_state + movq stderr(%rip), %rcx + movl $75, %edx + movl $1, %esi + movl $.LC1, %edi + call fwrite + orl $-1, %eax + leave + .cfi_def_cfa 7, 8 + ret + .cfi_endproc +.LFE24: + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC0: + .long 0 + .long 0 + .align 8 +.LC2: + .long 0 + .long 1074266112 + .align 8 +.LC3: + .long 469762048 + .long 1100643759 + .align 8 +.LC4: + .long 3263446751 + .long 1048062144 + .align 8 +.LC5: + .long 0 + .long 1086556160 + .align 8 +.LC6: + .long 0 + .long 1072693248 + .align 8 +.LC7: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 09:39:54
|
Revision: 353 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=353&view=rev Author: dececco Date: 2011-07-26 09:39:47 +0000 (Tue, 26 Jul 2011) Log Message: ----------- added assembler generation script Added Paths: ----------- trunk/extra/tests/doAsmLinux.sh trunk/extra/tests/ubuntu-32bit.asm/ trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.s trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect4.s trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect8.s trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.s trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.s trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss2.s trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss3.s trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect8.s Added: trunk/extra/tests/doAsmLinux.sh =================================================================== --- trunk/extra/tests/doAsmLinux.sh (rev 0) +++ trunk/extra/tests/doAsmLinux.sh 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,67 @@ +#!/bin/bash + +# create directory for assembler + +/bin/rm -rf $1 +mkdir -p $1 + +# Without vector extension, gcc, gcc-llvm and clang + +FLAGS="-O3 -funroll-loops -march=core2" +CC="gcc -DGCC" +CCDESCR="gcc.4.5.2" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -funroll-loops -march=core2" +CC="clang -DGCC" +CCDESCR="clang" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +# With Vector Extensions, size 8 + +FLAGS="-O3 -funroll-loops -march=core2" +CC="gcc -DGCC -DHAS_VECT8_EXT" +CCDESCR="gcc.4.5.2.vect8" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + + +FLAGS="-O3 -funroll-loops -march=core2" +CC="clang -DGCC -DHAS_VECT8_EXT" +CCDESCR="clang.vect8" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + + +# With Vector Extensions, size 4 + +FLAGS="-O3 -funroll-loops -march=core2" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc.4.5.2.vect4" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + + +FLAGS="-O3 -funroll-loops -march=core2" +CC="clang -DGCC -DHAS_VECT4_EXT" +CCDESCR="clang.vect4" + + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +# gcc only, ss2 and ss3 + +FLAGS="-O3 -funroll-loops -march=core2 -msse2 -ffast-math" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc.4.5.2.vect4.ss2" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + +FLAGS="-O3 -funroll-loops -march=core2 -msse3 -ffast-math" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc.4.5.2.vect4.ss3" + +$CC $FLAGS vectors.c -S -o $1/vectors.$CCDESCR.s + Property changes on: trunk/extra/tests/doAsmLinux.sh ___________________________________________________________________ Added: svn:executable + * Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,140 @@ + .file "vectors.c" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4613937818241073152 +.LCPI0_1: + .quad 4727228949921267712 +.LCPI0_2: + .quad 4501392635919089375 +.LCPI0_3: + .quad 4666723172467343360 +.LCPI0_4: + .quad 4607182418800017408 +.LCPI0_5: + .quad 4516603487517057909 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4160, %esp + cmpl $2, 8(%ebp) + je .LBB0_2 + movl stderr, %eax + movl %eax, 12(%esp) + movl $1, 8(%esp) + movl $75, 4(%esp) + movl $.L.str, (%esp) + call fwrite + movl $-1, %eax + jmp .LBB0_17 +.LBB0_2: + movl 12(%ebp), %eax + movl 4(%eax), %eax + movsbl (%eax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd .LCPI0_0, %xmm0 + divsd .LCPI0_1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd .LCPI0_2, %xmm1 + .align 16, 0x90 +.LBB0_3: + movss %xmm0, 3104(%esp,%eax,4) + incl %eax + cmpl $256, %eax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_3 + xorl %eax, %eax + movsd .LCPI0_3, %xmm2 + movsd .LCPI0_4, %xmm3 + movsd .LCPI0_5, %xmm4 + .align 16, 0x90 +.LBB0_5: + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm5 + divsd %xmm2, %xmm5 + addsd %xmm3, %xmm5 + cvtsd2ss %xmm5, %xmm5 + movss %xmm5, 2080(%esp,%eax,4) + incl %eax + cmpl $256, %eax + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_5 + xorl %eax, %eax + .align 16, 0x90 +.LBB0_7: + movss %xmm0, 1056(%esp,%eax,4) + incl %eax + cmpl $256, %eax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_7 + movl $100000000, %eax + .align 16, 0x90 +.LBB0_9: + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_10: + movss 3104(%esp,%ecx,4), %xmm0 + mulss 2080(%esp,%ecx,4), %xmm0 + movss %xmm0, 32(%esp,%ecx,4) + incl %ecx + cmpl $256, %ecx + jne .LBB0_10 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_12: + movss 32(%esp,%ecx,4), %xmm0 + addss 1056(%esp,%ecx,4), %xmm0 + movss %xmm0, 3104(%esp,%ecx,4) + incl %ecx + cmpl $256, %ecx + jne .LBB0_12 + decl %eax + jne .LBB0_9 + xorl %eax, %eax + pxor %xmm0, %xmm0 + .align 16, 0x90 +.LBB0_15: + movss 3104(%esp,%eax,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incl %eax + cmpl $256, %eax + jne .LBB0_15 + movsd %xmm0, 4(%esp) + movl $.L.str1, (%esp) + call printf + xorl %eax, %eax +.LBB0_17: + movl %ebp, %esp + popl %ebp + ret +.Ltmp0: + .size main, .Ltmp0-main + + .type .L.str,@object + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 +.L.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + .size .L.str, 76 + + .type .L.str1,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str1: + .asciz "Result %f\n" + .size .L.str1, 11 + + + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect4.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect4.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect4.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,140 @@ + .file "vectors.c" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4613937818241073152 +.LCPI0_1: + .quad 4727228949921267712 +.LCPI0_2: + .quad 4501392635919089375 +.LCPI0_3: + .quad 4666723172467343360 +.LCPI0_4: + .quad 4607182418800017408 +.LCPI0_5: + .quad 4516603487517057909 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4160, %esp + cmpl $2, 8(%ebp) + je .LBB0_2 + movl stderr, %eax + movl %eax, 12(%esp) + movl $1, 8(%esp) + movl $75, 4(%esp) + movl $.L.str, (%esp) + call fwrite + movl $-1, %eax + jmp .LBB0_17 +.LBB0_2: + movl 12(%ebp), %eax + movl 4(%eax), %eax + movsbl (%eax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd .LCPI0_0, %xmm0 + divsd .LCPI0_1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd .LCPI0_2, %xmm1 + .align 16, 0x90 +.LBB0_3: + movss %xmm0, 3104(%esp,%eax,4) + incl %eax + cmpl $256, %eax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_3 + xorl %eax, %eax + movsd .LCPI0_3, %xmm2 + movsd .LCPI0_4, %xmm3 + movsd .LCPI0_5, %xmm4 + .align 16, 0x90 +.LBB0_5: + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm5 + divsd %xmm2, %xmm5 + addsd %xmm3, %xmm5 + cvtsd2ss %xmm5, %xmm5 + movss %xmm5, 2080(%esp,%eax,4) + incl %eax + cmpl $256, %eax + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_5 + xorl %eax, %eax + .align 16, 0x90 +.LBB0_7: + movss %xmm0, 1056(%esp,%eax,4) + incl %eax + cmpl $256, %eax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_7 + movl $100000000, %eax + .align 16, 0x90 +.LBB0_9: + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_10: + movaps 3104(%esp,%ecx), %xmm0 + mulps 2080(%esp,%ecx), %xmm0 + movaps %xmm0, 32(%esp,%ecx) + addl $16, %ecx + cmpl $1024, %ecx + jne .LBB0_10 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_12: + movaps 32(%esp,%ecx), %xmm0 + addps 1056(%esp,%ecx), %xmm0 + movaps %xmm0, 3104(%esp,%ecx) + addl $16, %ecx + cmpl $1024, %ecx + jne .LBB0_12 + decl %eax + jne .LBB0_9 + xorl %eax, %eax + pxor %xmm0, %xmm0 + .align 16, 0x90 +.LBB0_15: + movss 3104(%esp,%eax,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incl %eax + cmpl $256, %eax + jne .LBB0_15 + movsd %xmm0, 4(%esp) + movl $.L.str1, (%esp) + call printf + xorl %eax, %eax +.LBB0_17: + movl %ebp, %esp + popl %ebp + ret +.Ltmp0: + .size main, .Ltmp0-main + + .type .L.str,@object + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 +.L.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + .size .L.str, 76 + + .type .L.str1,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str1: + .asciz "Result %f\n" + .size .L.str1, 11 + + + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect8.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect8.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.clang.vect8.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,146 @@ + .file "vectors.c" + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LCPI0_0: + .quad 4613937818241073152 +.LCPI0_1: + .quad 4727228949921267712 +.LCPI0_2: + .quad 4501392635919089375 +.LCPI0_3: + .quad 4666723172467343360 +.LCPI0_4: + .quad 4607182418800017408 +.LCPI0_5: + .quad 4516603487517057909 + .text + .globl main + .align 16, 0x90 + .type main,@function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4160, %esp + cmpl $2, 8(%ebp) + je .LBB0_2 + movl stderr, %eax + movl %eax, 12(%esp) + movl $1, 8(%esp) + movl $75, 4(%esp) + movl $.L.str, (%esp) + call fwrite + movl $-1, %eax + jmp .LBB0_17 +.LBB0_2: + movl 12(%ebp), %eax + movl 4(%eax), %eax + movsbl (%eax), %eax + cvtsi2ss %eax, %xmm0 + cvtss2sd %xmm0, %xmm0 + mulsd .LCPI0_0, %xmm0 + divsd .LCPI0_1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + xorl %eax, %eax + movsd .LCPI0_2, %xmm1 + .align 16, 0x90 +.LBB0_3: + movss %xmm0, 3104(%esp,%eax,4) + incl %eax + cmpl $256, %eax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_3 + xorl %eax, %eax + movsd .LCPI0_3, %xmm2 + movsd .LCPI0_4, %xmm3 + movsd .LCPI0_5, %xmm4 + .align 16, 0x90 +.LBB0_5: + cvtss2sd %xmm0, %xmm0 + movaps %xmm0, %xmm5 + divsd %xmm2, %xmm5 + addsd %xmm3, %xmm5 + cvtsd2ss %xmm5, %xmm5 + movss %xmm5, 2080(%esp,%eax,4) + incl %eax + cmpl $256, %eax + addsd %xmm4, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_5 + xorl %eax, %eax + .align 16, 0x90 +.LBB0_7: + movss %xmm0, 1056(%esp,%eax,4) + incl %eax + cmpl $256, %eax + cvtss2sd %xmm0, %xmm0 + addsd %xmm1, %xmm0 + cvtsd2ss %xmm0, %xmm0 + jne .LBB0_7 + movl $100000000, %eax + .align 16, 0x90 +.LBB0_9: + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_10: + movaps 3104(%esp,%ecx), %xmm0 + movaps 3120(%esp,%ecx), %xmm1 + mulps 2096(%esp,%ecx), %xmm1 + mulps 2080(%esp,%ecx), %xmm0 + movaps %xmm0, 32(%esp,%ecx) + movaps %xmm1, 48(%esp,%ecx) + addl $32, %ecx + cmpl $1024, %ecx + jne .LBB0_10 + xorl %ecx, %ecx + .align 16, 0x90 +.LBB0_12: + movaps 32(%esp,%ecx), %xmm0 + movaps 48(%esp,%ecx), %xmm1 + addps 1072(%esp,%ecx), %xmm1 + addps 1056(%esp,%ecx), %xmm0 + movaps %xmm0, 3104(%esp,%ecx) + movaps %xmm1, 3120(%esp,%ecx) + addl $32, %ecx + cmpl $1024, %ecx + jne .LBB0_12 + decl %eax + jne .LBB0_9 + xorl %eax, %eax + pxor %xmm0, %xmm0 + .align 16, 0x90 +.LBB0_15: + movss 3104(%esp,%eax,4), %xmm1 + cvtss2sd %xmm1, %xmm1 + addsd %xmm1, %xmm0 + incl %eax + cmpl $256, %eax + jne .LBB0_15 + movsd %xmm0, 4(%esp) + movl $.L.str1, (%esp) + call printf + xorl %eax, %eax +.LBB0_17: + movl %ebp, %esp + popl %ebp + ret +.Ltmp0: + .size main, .Ltmp0-main + + .type .L.str,@object + .section .rodata.str1.16,"aMS",@progbits,1 + .align 16 +.L.str: + .asciz "Usage:vectors <string>, where string is used as a seed for the computation\n" + .size .L.str, 76 + + .type .L.str1,@object + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str1: + .asciz "Result %f\n" + .size .L.str1, 11 + + + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,299 @@ + .file "vectors.c" + .section .rodata.str1.4,"aMS",@progbits,1 + .align 4 +.LC1: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC8: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4144, %esp + cmpl $2, 8(%ebp) + jne .L59 + movl 12(%ebp), %eax + movl 4(%eax), %ecx + xorl %eax, %eax + movsbw (%ecx), %dx + movw %dx, 42(%esp) + filds 42(%esp) + fmuls .LC2 + fdivl .LC3 + fstps 44(%esp) + flds 44(%esp) + fldl .LC4 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L4: + fsts 3120(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3124(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3128(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3132(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3136(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3140(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3144(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3148(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + fstps 44(%esp) + flds 44(%esp) + jne .L4 + fstp %st(1) + flds .LC5 + movw $1, %ax + fld %st(1) + fdiv %st(1), %st + fadds .LC6 + fstps 2096(%esp) + fxch %st(1) + faddl .LC7 + fstps 44(%esp) + flds 44(%esp) + .p2align 4,,10 + .p2align 3 +.L5: + fld %st(0) + fdiv %st(2), %st + fld1 + fadd %st, %st(1) + fxch %st(1) + fstps 2096(%esp,%eax,4) + fldl .LC7 + fadd %st, %st(2) + fxch %st(2) + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2100(%esp,%eax,4) + fadd %st(2), %st + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2104(%esp,%eax,4) + fadd %st(2), %st + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2108(%esp,%eax,4) + fadd %st(2), %st + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + faddp %st, %st(2) + fxch %st(1) + fstps 2112(%esp,%eax,4) + addl $5, %eax + faddp %st, %st(1) + cmpl $256, %eax + fstps 44(%esp) + flds 44(%esp) + jne .L5 + fstp %st(1) + xorw %ax, %ax + fldl .LC4 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L6: + fsts 1072(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1076(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1080(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1084(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1088(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1092(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1096(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1100(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + fstps 44(%esp) + flds 44(%esp) + jne .L6 + fstp %st(0) + fstp %st(0) + movl $100000000, %edx + .p2align 4,,10 + .p2align 3 +.L9: + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L7: + movaps 3120(%esp,%eax), %xmm7 + movaps 3136(%esp,%eax), %xmm6 + mulps 2096(%esp,%eax), %xmm7 + mulps 2112(%esp,%eax), %xmm6 + movaps 3152(%esp,%eax), %xmm5 + movaps 3168(%esp,%eax), %xmm4 + mulps 2128(%esp,%eax), %xmm5 + mulps 2144(%esp,%eax), %xmm4 + movaps 3184(%esp,%eax), %xmm3 + movaps 3200(%esp,%eax), %xmm2 + mulps 2160(%esp,%eax), %xmm3 + mulps 2176(%esp,%eax), %xmm2 + movaps 3216(%esp,%eax), %xmm1 + movaps 3232(%esp,%eax), %xmm0 + mulps 2192(%esp,%eax), %xmm1 + mulps 2208(%esp,%eax), %xmm0 + movaps %xmm7, 48(%esp,%eax) + movaps %xmm6, 64(%esp,%eax) + movaps %xmm5, 80(%esp,%eax) + movaps %xmm4, 96(%esp,%eax) + movaps %xmm3, 112(%esp,%eax) + movaps %xmm2, 128(%esp,%eax) + movaps %xmm1, 144(%esp,%eax) + movaps %xmm0, 160(%esp,%eax) + subl $-128, %eax + cmpl $1024, %eax + jne .L7 + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L8: + movaps 48(%esp,%eax), %xmm7 + movaps 64(%esp,%eax), %xmm6 + addps 1072(%esp,%eax), %xmm7 + addps 1088(%esp,%eax), %xmm6 + movaps 80(%esp,%eax), %xmm5 + movaps 96(%esp,%eax), %xmm4 + addps 1104(%esp,%eax), %xmm5 + addps 1120(%esp,%eax), %xmm4 + movaps 112(%esp,%eax), %xmm3 + movaps 128(%esp,%eax), %xmm2 + addps 1136(%esp,%eax), %xmm3 + addps 1152(%esp,%eax), %xmm2 + movaps 144(%esp,%eax), %xmm1 + movaps 160(%esp,%eax), %xmm0 + addps 1168(%esp,%eax), %xmm1 + addps 1184(%esp,%eax), %xmm0 + movaps %xmm7, 3120(%esp,%eax) + movaps %xmm6, 3136(%esp,%eax) + movaps %xmm5, 3152(%esp,%eax) + movaps %xmm4, 3168(%esp,%eax) + movaps %xmm3, 3184(%esp,%eax) + movaps %xmm2, 3200(%esp,%eax) + movaps %xmm1, 3216(%esp,%eax) + movaps %xmm0, 3232(%esp,%eax) + subl $-128, %eax + cmpl $1024, %eax + jne .L8 + decl %edx + jne .L9 + fldz + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L10: + fadds 3120(%esp,%eax,4) + fadds 3124(%esp,%eax,4) + fadds 3128(%esp,%eax,4) + fadds 3132(%esp,%eax,4) + fadds 3136(%esp,%eax,4) + fadds 3140(%esp,%eax,4) + fadds 3144(%esp,%eax,4) + fadds 3148(%esp,%eax,4) + addl $8, %eax + cmpl $256, %eax + jne .L10 + fstpl 8(%esp) + movl $.LC8, 4(%esp) + movl $1, (%esp) + call __printf_chk + xorl %eax, %eax + leave + ret +.L59: + movl stderr, %eax + movl $75, 8(%esp) + movl %eax, 12(%esp) + movl $1, 4(%esp) + movl $.LC1, (%esp) + call fwrite + orl $-1, %eax + leave + ret + .size main, .-main + .section .rodata.cst4,"aM",@progbits,4 + .align 4 +.LC2: + .long 1077936128 + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC3: + .long 469762048 + .long 1100643759 + .align 8 +.LC4: + .long -1031520545 + .long 1048062144 + .section .rodata.cst4 + .align 4 +.LC5: + .long 1176256512 + .align 4 +.LC6: + .long 1065353216 + .section .rodata.cst8 + .align 8 +.LC7: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,314 @@ + .file "vectors.c" + .section .rodata.str1.4,"aMS",@progbits,1 + .align 4 +.LC1: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC8: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4144, %esp + cmpl $2, 8(%ebp) + jne .L59 + movl 12(%ebp), %eax + movl 4(%eax), %ecx + xorl %eax, %eax + movsbw (%ecx), %dx + movw %dx, 42(%esp) + filds 42(%esp) + fmuls .LC2 + fdivl .LC3 + fstps 44(%esp) + flds 44(%esp) + fldl .LC4 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L4: + fsts 3120(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3124(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3128(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3132(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3136(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3140(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3144(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 3148(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + fstps 44(%esp) + flds 44(%esp) + jne .L4 + fstp %st(1) + flds .LC5 + movw $1, %ax + fld %st(1) + fdiv %st(1), %st + fadds .LC6 + fstps 2096(%esp) + fldl .LC7 + faddp %st, %st(2) + fxch %st(1) + fstps 44(%esp) + flds 44(%esp) + .p2align 4,,10 + .p2align 3 +.L5: + fld %st(0) + fdiv %st(2), %st + fld1 + fadd %st, %st(1) + fxch %st(1) + fstps 2096(%esp,%eax,4) + fldl .LC7 + fadd %st, %st(2) + fxch %st(2) + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2100(%esp,%eax,4) + fadd %st(2), %st + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2104(%esp,%eax,4) + fadd %st(2), %st + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2108(%esp,%eax,4) + fadd %st(2), %st + fstps 44(%esp) + flds 44(%esp) + fld %st(0) + fdiv %st(4), %st + faddp %st, %st(2) + fxch %st(1) + fstps 2112(%esp,%eax,4) + addl $5, %eax + faddp %st, %st(1) + cmpl $256, %eax + fstps 44(%esp) + flds 44(%esp) + jne .L5 + fstp %st(1) + xorw %ax, %ax + fldl .LC4 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L6: + fsts 1072(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1076(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1080(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1084(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1088(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1092(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1096(%esp,%eax,4) + fadd %st(1), %st + fstps 44(%esp) + flds 44(%esp) + fsts 1100(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + fstps 44(%esp) + flds 44(%esp) + jne .L6 + fstp %st(0) + fstp %st(0) + movl $100000000, %ecx + .p2align 4,,10 + .p2align 3 +.L7: + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L8: + movaps 3120(%esp,%eax,4), %xmm7 + leal 4(%eax), %edx + mulps 2096(%esp,%eax,4), %xmm7 + movaps %xmm7, 48(%esp,%eax,4) + movaps 3120(%esp,%edx,4), %xmm6 + mulps 2096(%esp,%edx,4), %xmm6 + movaps %xmm6, 48(%esp,%edx,4) + leal 8(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm5 + mulps 2096(%esp,%edx,4), %xmm5 + movaps %xmm5, 48(%esp,%edx,4) + leal 12(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm4 + mulps 2096(%esp,%edx,4), %xmm4 + movaps %xmm4, 48(%esp,%edx,4) + leal 16(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm3 + mulps 2096(%esp,%edx,4), %xmm3 + movaps %xmm3, 48(%esp,%edx,4) + leal 20(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm2 + mulps 2096(%esp,%edx,4), %xmm2 + movaps %xmm2, 48(%esp,%edx,4) + leal 24(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm1 + mulps 2096(%esp,%edx,4), %xmm1 + movaps %xmm1, 48(%esp,%edx,4) + leal 28(%eax), %edx + addl $32, %eax + movaps 3120(%esp,%edx,4), %xmm0 + cmpl $256, %eax + mulps 2096(%esp,%edx,4), %xmm0 + movaps %xmm0, 48(%esp,%edx,4) + jne .L8 + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L9: + movaps 48(%esp,%eax,4), %xmm7 + leal 4(%eax), %edx + addps 1072(%esp,%eax,4), %xmm7 + movaps %xmm7, 3120(%esp,%eax,4) + movaps 48(%esp,%edx,4), %xmm6 + addps 1072(%esp,%edx,4), %xmm6 + movaps %xmm6, 3120(%esp,%edx,4) + leal 8(%eax), %edx + movaps 48(%esp,%edx,4), %xmm5 + addps 1072(%esp,%edx,4), %xmm5 + movaps %xmm5, 3120(%esp,%edx,4) + leal 12(%eax), %edx + movaps 48(%esp,%edx,4), %xmm4 + addps 1072(%esp,%edx,4), %xmm4 + movaps %xmm4, 3120(%esp,%edx,4) + leal 16(%eax), %edx + movaps 48(%esp,%edx,4), %xmm3 + addps 1072(%esp,%edx,4), %xmm3 + movaps %xmm3, 3120(%esp,%edx,4) + leal 20(%eax), %edx + movaps 48(%esp,%edx,4), %xmm2 + addps 1072(%esp,%edx,4), %xmm2 + movaps %xmm2, 3120(%esp,%edx,4) + leal 24(%eax), %edx + movaps 48(%esp,%edx,4), %xmm1 + addps 1072(%esp,%edx,4), %xmm1 + movaps %xmm1, 3120(%esp,%edx,4) + leal 28(%eax), %edx + addl $32, %eax + movaps 48(%esp,%edx,4), %xmm0 + cmpl $256, %eax + addps 1072(%esp,%edx,4), %xmm0 + movaps %xmm0, 3120(%esp,%edx,4) + jne .L9 + decl %ecx + jne .L7 + fldz + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L11: + fadds 3120(%esp,%eax,4) + fadds 3124(%esp,%eax,4) + fadds 3128(%esp,%eax,4) + fadds 3132(%esp,%eax,4) + fadds 3136(%esp,%eax,4) + fadds 3140(%esp,%eax,4) + fadds 3144(%esp,%eax,4) + fadds 3148(%esp,%eax,4) + addl $8, %eax + cmpl $256, %eax + jne .L11 + fstpl 8(%esp) + movl $.LC8, 4(%esp) + movl $1, (%esp) + call __printf_chk + xorl %eax, %eax + leave + ret +.L59: + movl stderr, %eax + movl $75, 8(%esp) + movl %eax, 12(%esp) + movl $1, 4(%esp) + movl $.LC1, (%esp) + call fwrite + orl $-1, %eax + leave + ret + .size main, .-main + .section .rodata.cst4,"aM",@progbits,4 + .align 4 +.LC2: + .long 1077936128 + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC3: + .long 469762048 + .long 1100643759 + .align 8 +.LC4: + .long -1031520545 + .long 1048062144 + .section .rodata.cst4 + .align 4 +.LC5: + .long 1176256512 + .align 4 +.LC6: + .long 1065353216 + .section .rodata.cst8 + .align 8 +.LC7: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss2.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss2.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss2.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,308 @@ + .file "vectors.c" + .section .rodata.str1.4,"aMS",@progbits,1 + .align 4 +.LC0: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC6: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4144, %esp + cmpl $2, 8(%ebp) + jne .L61 + movl 12(%ebp), %eax + movl 4(%eax), %ecx + xorl %eax, %eax + movsbw (%ecx), %dx + movw %dx, 46(%esp) + filds 46(%esp) + fmull .LC1 + fldl .LC2 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L4: + fsts 3120(%esp,%eax,4) + fadd %st(1), %st + fsts 3124(%esp,%eax,4) + fadd %st(1), %st + fsts 3128(%esp,%eax,4) + fadd %st(1), %st + fsts 3132(%esp,%eax,4) + fadd %st(1), %st + fsts 3136(%esp,%eax,4) + fadd %st(1), %st + fsts 3140(%esp,%eax,4) + fadd %st(1), %st + fsts 3144(%esp,%eax,4) + fadd %st(1), %st + fsts 3148(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + jne .L4 + fstp %st(1) + xorw %ax, %ax + fld1 + .p2align 4,,10 + .p2align 3 +.L5: + fldl .LC3 + fld %st(2) + fmul %st(1), %st + fadd %st(2), %st + fstps 2096(%esp,%eax,4) + fldl .LC5 + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2100(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2104(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2108(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2112(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2116(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2120(%esp,%eax,4) + fadd %st, %st(3) + fxch %st(1) + fmul %st(3), %st + fadd %st(2), %st + fstps 2124(%esp,%eax,4) + addl $8, %eax + faddp %st, %st(2) + cmpl $256, %eax + jne .L5 + fstp %st(0) + xorw %ax, %ax + fldl .LC2 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L6: + fsts 1072(%esp,%eax,4) + fadd %st(1), %st + fsts 1076(%esp,%eax,4) + fadd %st(1), %st + fsts 1080(%esp,%eax,4) + fadd %st(1), %st + fsts 1084(%esp,%eax,4) + fadd %st(1), %st + fsts 1088(%esp,%eax,4) + fadd %st(1), %st + fsts 1092(%esp,%eax,4) + fadd %st(1), %st + fsts 1096(%esp,%eax,4) + fadd %st(1), %st + fsts 1100(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + jne .L6 + fstp %st(0) + fstp %st(0) + movl $100000000, %ecx + .p2align 4,,10 + .p2align 3 +.L7: + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L8: + movaps 3120(%esp,%eax,4), %xmm0 + leal 4(%eax), %edx + mulps 2096(%esp,%eax,4), %xmm0 + movaps %xmm0, 48(%esp,%eax,4) + movaps 3120(%esp,%edx,4), %xmm7 + mulps 2096(%esp,%edx,4), %xmm7 + movaps %xmm7, 48(%esp,%edx,4) + leal 8(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm6 + mulps 2096(%esp,%edx,4), %xmm6 + movaps %xmm6, 48(%esp,%edx,4) + leal 12(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm5 + mulps 2096(%esp,%edx,4), %xmm5 + movaps %xmm5, 48(%esp,%edx,4) + leal 16(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm4 + mulps 2096(%esp,%edx,4), %xmm4 + movaps %xmm4, 48(%esp,%edx,4) + leal 20(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm3 + mulps 2096(%esp,%edx,4), %xmm3 + movaps %xmm3, 48(%esp,%edx,4) + leal 24(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm2 + mulps 2096(%esp,%edx,4), %xmm2 + movaps %xmm2, 48(%esp,%edx,4) + leal 28(%eax), %edx + addl $32, %eax + movaps 3120(%esp,%edx,4), %xmm0 + cmpl $256, %eax + mulps 2096(%esp,%edx,4), %xmm0 + movaps %xmm0, 48(%esp,%edx,4) + jne .L8 + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L9: + movaps 48(%esp,%eax,4), %xmm2 + leal 4(%eax), %edx + addps 1072(%esp,%eax,4), %xmm2 + movaps %xmm2, 3120(%esp,%eax,4) + movaps 48(%esp,%edx,4), %xmm0 + addps 1072(%esp,%edx,4), %xmm0 + movaps %xmm0, 3120(%esp,%edx,4) + leal 8(%eax), %edx + movaps 48(%esp,%edx,4), %xmm7 + addps 1072(%esp,%edx,4), %xmm7 + movaps %xmm7, 3120(%esp,%edx,4) + leal 12(%eax), %edx + movaps 48(%esp,%edx,4), %xmm6 + addps 1072(%esp,%edx,4), %xmm6 + movaps %xmm6, 3120(%esp,%edx,4) + leal 16(%eax), %edx + movaps 48(%esp,%edx,4), %xmm5 + addps 1072(%esp,%edx,4), %xmm5 + movaps %xmm5, 3120(%esp,%edx,4) + leal 20(%eax), %edx + movaps 48(%esp,%edx,4), %xmm4 + addps 1072(%esp,%edx,4), %xmm4 + movaps %xmm4, 3120(%esp,%edx,4) + leal 24(%eax), %edx + movaps 48(%esp,%edx,4), %xmm3 + addps 1072(%esp,%edx,4), %xmm3 + movaps %xmm3, 3120(%esp,%edx,4) + leal 28(%eax), %edx + addl $32, %eax + movaps 48(%esp,%edx,4), %xmm2 + cmpl $256, %eax + addps 1072(%esp,%edx,4), %xmm2 + movaps %xmm2, 3120(%esp,%edx,4) + jne .L9 + decl %ecx + jne .L7 + leal 3120(%esp), %edx + leal 4144(%esp), %eax + xorpd %xmm0, %xmm0 + .p2align 4,,10 + .p2align 3 +.L11: + movaps (%edx), %xmm3 + movaps 16(%edx), %xmm2 + movhlps %xmm3, %xmm1 + cvtps2pd %xmm3, %xmm5 + cvtps2pd %xmm1, %xmm4 + addpd %xmm0, %xmm5 + cvtps2pd %xmm2, %xmm7 + addpd %xmm5, %xmm4 + movaps 32(%edx), %xmm5 + addpd %xmm4, %xmm7 + cvtps2pd %xmm5, %xmm3 + movaps %xmm1, %xmm4 + movhlps %xmm2, %xmm4 + movaps 48(%edx), %xmm2 + cvtps2pd %xmm4, %xmm6 + cvtps2pd %xmm2, %xmm1 + addpd %xmm7, %xmm6 + movhlps %xmm5, %xmm4 + addpd %xmm6, %xmm3 + cvtps2pd %xmm4, %xmm0 + movaps 64(%edx), %xmm6 + addpd %xmm3, %xmm0 + movhlps %xmm2, %xmm4 + addpd %xmm0, %xmm1 + movaps 80(%edx), %xmm2 + cvtps2pd %xmm4, %xmm7 + cvtps2pd %xmm6, %xmm5 + addpd %xmm1, %xmm7 + movhlps %xmm6, %xmm4 + addpd %xmm7, %xmm5 + cvtps2pd %xmm4, %xmm3 + movaps 96(%edx), %xmm7 + addpd %xmm5, %xmm3 + movhlps %xmm2, %xmm4 + cvtps2pd %xmm2, %xmm1 + cvtps2pd %xmm4, %xmm0 + addpd %xmm3, %xmm1 + cvtps2pd %xmm7, %xmm6 + addpd %xmm1, %xmm0 + movaps 112(%edx), %xmm3 + addpd %xmm0, %xmm6 + movaps %xmm4, %xmm1 + subl $-128, %edx + movhlps %xmm7, %xmm1 + cvtps2pd %xmm3, %xmm4 + cvtps2pd %xmm1, %xmm5 + cmpl %eax, %edx + addpd %xmm6, %xmm5 + movhlps %xmm3, %xmm1 + addpd %xmm5, %xmm4 + cvtps2pd %xmm1, %xmm0 + addpd %xmm4, %xmm0 + jne .L11 + haddpd %xmm0, %xmm0 + movl $.LC6, 4(%esp) + movlpd %xmm0, 8(%esp) + movl $1, (%esp) + call __printf_chk + xorl %eax, %eax + leave + ret +.L61: + movl stderr, %eax + movl $75, 8(%esp) + movl %eax, 12(%esp) + movl $1, 4(%esp) + movl $.LC0, (%esp) + call fwrite + orl $-1, %eax + leave + ret + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC1: + .long 406720020 + .long 1046281652 + .align 8 +.LC2: + .long -1031520545 + .long 1048062144 + .align 8 +.LC3: + .long -350469331 + .long 1058682594 + .align 8 +.LC5: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss3.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss3.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect4.ss3.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,308 @@ + .file "vectors.c" + .section .rodata.str1.4,"aMS",@progbits,1 + .align 4 +.LC0: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC6: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4144, %esp + cmpl $2, 8(%ebp) + jne .L61 + movl 12(%ebp), %eax + movl 4(%eax), %ecx + xorl %eax, %eax + movsbw (%ecx), %dx + movw %dx, 46(%esp) + filds 46(%esp) + fmull .LC1 + fldl .LC2 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L4: + fsts 3120(%esp,%eax,4) + fadd %st(1), %st + fsts 3124(%esp,%eax,4) + fadd %st(1), %st + fsts 3128(%esp,%eax,4) + fadd %st(1), %st + fsts 3132(%esp,%eax,4) + fadd %st(1), %st + fsts 3136(%esp,%eax,4) + fadd %st(1), %st + fsts 3140(%esp,%eax,4) + fadd %st(1), %st + fsts 3144(%esp,%eax,4) + fadd %st(1), %st + fsts 3148(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + jne .L4 + fstp %st(1) + xorw %ax, %ax + fld1 + .p2align 4,,10 + .p2align 3 +.L5: + fldl .LC3 + fld %st(2) + fmul %st(1), %st + fadd %st(2), %st + fstps 2096(%esp,%eax,4) + fldl .LC5 + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2100(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2104(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2108(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2112(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2116(%esp,%eax,4) + fadd %st, %st(3) + fld %st(3) + fmul %st(2), %st + fadd %st(3), %st + fstps 2120(%esp,%eax,4) + fadd %st, %st(3) + fxch %st(1) + fmul %st(3), %st + fadd %st(2), %st + fstps 2124(%esp,%eax,4) + addl $8, %eax + faddp %st, %st(2) + cmpl $256, %eax + jne .L5 + fstp %st(0) + xorw %ax, %ax + fldl .LC2 + fxch %st(1) + .p2align 4,,10 + .p2align 3 +.L6: + fsts 1072(%esp,%eax,4) + fadd %st(1), %st + fsts 1076(%esp,%eax,4) + fadd %st(1), %st + fsts 1080(%esp,%eax,4) + fadd %st(1), %st + fsts 1084(%esp,%eax,4) + fadd %st(1), %st + fsts 1088(%esp,%eax,4) + fadd %st(1), %st + fsts 1092(%esp,%eax,4) + fadd %st(1), %st + fsts 1096(%esp,%eax,4) + fadd %st(1), %st + fsts 1100(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + jne .L6 + fstp %st(0) + fstp %st(0) + movl $100000000, %ecx + .p2align 4,,10 + .p2align 3 +.L7: + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L8: + movaps 3120(%esp,%eax,4), %xmm0 + leal 4(%eax), %edx + mulps 2096(%esp,%eax,4), %xmm0 + movaps %xmm0, 48(%esp,%eax,4) + movaps 3120(%esp,%edx,4), %xmm7 + mulps 2096(%esp,%edx,4), %xmm7 + movaps %xmm7, 48(%esp,%edx,4) + leal 8(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm6 + mulps 2096(%esp,%edx,4), %xmm6 + movaps %xmm6, 48(%esp,%edx,4) + leal 12(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm5 + mulps 2096(%esp,%edx,4), %xmm5 + movaps %xmm5, 48(%esp,%edx,4) + leal 16(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm4 + mulps 2096(%esp,%edx,4), %xmm4 + movaps %xmm4, 48(%esp,%edx,4) + leal 20(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm3 + mulps 2096(%esp,%edx,4), %xmm3 + movaps %xmm3, 48(%esp,%edx,4) + leal 24(%eax), %edx + movaps 3120(%esp,%edx,4), %xmm2 + mulps 2096(%esp,%edx,4), %xmm2 + movaps %xmm2, 48(%esp,%edx,4) + leal 28(%eax), %edx + addl $32, %eax + movaps 3120(%esp,%edx,4), %xmm0 + cmpl $256, %eax + mulps 2096(%esp,%edx,4), %xmm0 + movaps %xmm0, 48(%esp,%edx,4) + jne .L8 + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L9: + movaps 48(%esp,%eax,4), %xmm2 + leal 4(%eax), %edx + addps 1072(%esp,%eax,4), %xmm2 + movaps %xmm2, 3120(%esp,%eax,4) + movaps 48(%esp,%edx,4), %xmm0 + addps 1072(%esp,%edx,4), %xmm0 + movaps %xmm0, 3120(%esp,%edx,4) + leal 8(%eax), %edx + movaps 48(%esp,%edx,4), %xmm7 + addps 1072(%esp,%edx,4), %xmm7 + movaps %xmm7, 3120(%esp,%edx,4) + leal 12(%eax), %edx + movaps 48(%esp,%edx,4), %xmm6 + addps 1072(%esp,%edx,4), %xmm6 + movaps %xmm6, 3120(%esp,%edx,4) + leal 16(%eax), %edx + movaps 48(%esp,%edx,4), %xmm5 + addps 1072(%esp,%edx,4), %xmm5 + movaps %xmm5, 3120(%esp,%edx,4) + leal 20(%eax), %edx + movaps 48(%esp,%edx,4), %xmm4 + addps 1072(%esp,%edx,4), %xmm4 + movaps %xmm4, 3120(%esp,%edx,4) + leal 24(%eax), %edx + movaps 48(%esp,%edx,4), %xmm3 + addps 1072(%esp,%edx,4), %xmm3 + movaps %xmm3, 3120(%esp,%edx,4) + leal 28(%eax), %edx + addl $32, %eax + movaps 48(%esp,%edx,4), %xmm2 + cmpl $256, %eax + addps 1072(%esp,%edx,4), %xmm2 + movaps %xmm2, 3120(%esp,%edx,4) + jne .L9 + decl %ecx + jne .L7 + leal 3120(%esp), %edx + leal 4144(%esp), %eax + xorpd %xmm0, %xmm0 + .p2align 4,,10 + .p2align 3 +.L11: + movaps (%edx), %xmm3 + movaps 16(%edx), %xmm2 + movhlps %xmm3, %xmm1 + cvtps2pd %xmm3, %xmm5 + cvtps2pd %xmm1, %xmm4 + addpd %xmm0, %xmm5 + cvtps2pd %xmm2, %xmm7 + addpd %xmm5, %xmm4 + movaps 32(%edx), %xmm5 + addpd %xmm4, %xmm7 + cvtps2pd %xmm5, %xmm3 + movaps %xmm1, %xmm4 + movhlps %xmm2, %xmm4 + movaps 48(%edx), %xmm2 + cvtps2pd %xmm4, %xmm6 + cvtps2pd %xmm2, %xmm1 + addpd %xmm7, %xmm6 + movhlps %xmm5, %xmm4 + addpd %xmm6, %xmm3 + cvtps2pd %xmm4, %xmm0 + movaps 64(%edx), %xmm6 + addpd %xmm3, %xmm0 + movhlps %xmm2, %xmm4 + addpd %xmm0, %xmm1 + movaps 80(%edx), %xmm2 + cvtps2pd %xmm4, %xmm7 + cvtps2pd %xmm6, %xmm5 + addpd %xmm1, %xmm7 + movhlps %xmm6, %xmm4 + addpd %xmm7, %xmm5 + cvtps2pd %xmm4, %xmm3 + movaps 96(%edx), %xmm7 + addpd %xmm5, %xmm3 + movhlps %xmm2, %xmm4 + cvtps2pd %xmm2, %xmm1 + cvtps2pd %xmm4, %xmm0 + addpd %xmm3, %xmm1 + cvtps2pd %xmm7, %xmm6 + addpd %xmm1, %xmm0 + movaps 112(%edx), %xmm3 + addpd %xmm0, %xmm6 + movaps %xmm4, %xmm1 + subl $-128, %edx + movhlps %xmm7, %xmm1 + cvtps2pd %xmm3, %xmm4 + cvtps2pd %xmm1, %xmm5 + cmpl %eax, %edx + addpd %xmm6, %xmm5 + movhlps %xmm3, %xmm1 + addpd %xmm5, %xmm4 + cvtps2pd %xmm1, %xmm0 + addpd %xmm4, %xmm0 + jne .L11 + haddpd %xmm0, %xmm0 + movl $.LC6, 4(%esp) + movlpd %xmm0, 8(%esp) + movl $1, (%esp) + call __printf_chk + xorl %eax, %eax + leave + ret +.L61: + movl stderr, %eax + movl $75, 8(%esp) + movl %eax, 12(%esp) + movl $1, 4(%esp) + movl $.LC0, (%esp) + call fwrite + orl $-1, %eax + leave + ret + .size main, .-main + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC1: + .long 406720020 + .long 1046281652 + .align 8 +.LC2: + .long -1031520545 + .long 1048062144 + .align 8 +.LC3: + .long -350469331 + .long 1058682594 + .align 8 +.LC5: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits Added: trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect8.s =================================================================== --- trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect8.s (rev 0) +++ trunk/extra/tests/ubuntu-32bit.asm/vectors.gcc.4.5.2.vect8.s 2011-07-26 09:39:47 UTC (rev 353) @@ -0,0 +1,353 @@ + .file "vectors.c" + .section .rodata.str1.4,"aMS",@progbits,1 + .align 4 +.LC1: + .string "Usage:vectors <string>, where string is used as a seed for the computation\n" + .section .rodata.str1.1,"aMS",@progbits,1 +.LC8: + .string "Result %f\n" + .text + .p2align 4,,15 +.globl main + .type main, @function +main: + pushl %ebp + movl %esp, %ebp + andl $-32, %esp + subl $4400, %esp + cmpl $2, 8(%ebp) + jne .L45 + movl 12(%ebp), %eax + movl 4(%eax), %ecx + xorl %eax, %eax + movsbw (%ecx), %dx + movw %dx, 62(%esp) + filds 62(%esp) + fmuls .LC2 + fdivl .LC3 + fstps 108(%esp) + flds 108(%esp) + fldl .LC4 + fxch %st(1) +.L4: + fsts 3184(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3188(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3192(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3196(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3200(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3204(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3208(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 3212(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + fstps 108(%esp) + flds 108(%esp) + jne .L4 + fstp %st(1) + flds .LC5 + movw $1, %ax + fld %st(1) + fdiv %st(1), %st + fadds .LC6 + fstps 2160(%esp) + fxch %st(1) + faddl .LC7 + fstps 108(%esp) + flds 108(%esp) +.L5: + fld %st(0) + fdiv %st(2), %st + fld1 + fadd %st, %st(1) + fxch %st(1) + fstps 2160(%esp,%eax,4) + fldl .LC7 + fadd %st, %st(2) + fxch %st(2) + fstps 108(%esp) + flds 108(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2164(%esp,%eax,4) + fadd %st(2), %st + fstps 108(%esp) + flds 108(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2168(%esp,%eax,4) + fadd %st(2), %st + fstps 108(%esp) + flds 108(%esp) + fld %st(0) + fdiv %st(4), %st + fadd %st(2), %st + fstps 2172(%esp,%eax,4) + fadd %st(2), %st + fstps 108(%esp) + flds 108(%esp) + fld %st(0) + fdiv %st(4), %st + faddp %st, %st(2) + fxch %st(1) + fstps 2176(%esp,%eax,4) + addl $5, %eax + faddp %st, %st(1) + cmpl $256, %eax + fstps 108(%esp) + flds 108(%esp) + jne .L5 + fstp %st(1) + xorw %ax, %ax + fldl .LC4 + fxch %st(1) +.L6: + fsts 1136(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1140(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1144(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1148(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1152(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1156(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1160(%esp,%eax,4) + fadd %st(1), %st + fstps 108(%esp) + flds 108(%esp) + fsts 1164(%esp,%eax,4) + addl $8, %eax + fadd %st(1), %st + cmpl $256, %eax + fstps 108(%esp) + flds 108(%esp) + jne .L6 + fstp %st(0) + fstp %st(0) + movl $100000000, %ecx + .p2align 4,,10 + .p2align 3 +.L7: + xorl %eax, %eax + .p2align 4,,10 + .p2align 3 +.L8: + movl 3184(%esp,%eax,4), %edx + movl %edx, 4368(%esp) + movl 3188(%esp,%eax,4), %edx + movl %edx, 4372(%esp) + movl 3192(%esp,%eax,4), %edx + movl %edx, 4376(%esp) + movl 3196(%esp,%eax,4), %edx + movl %edx, 4380(%esp) + movl 3200(%esp,%eax,4), %edx + movaps 4368(%esp), %xmm1 + movl %edx, 4384(%esp) + movl 3204(%esp,%eax,4), %edx + movl %edx, 4388(%esp) + movl 3208(%esp,%eax,4), %edx + movl %edx, 4392(%esp) + movl 3212(%esp,%eax,4), %edx + movl %edx, 4396(%esp) + movl 2160(%esp,%eax,4), %edx + movl %edx, 4336(%esp) + movl 2164(%esp,%eax,4), %edx + movl %edx, 4340(%esp) + movl 2168(%esp,%eax,4), %edx + movl %edx, 4344(%esp) + movl 2172(%esp,%eax,4), %edx + movl %edx, 4348(%esp) + movl 2176(%esp,%eax,4), %edx + mulps 4336(%esp), %xmm1 + movl %edx, 4352(%esp) + movl 2180(%esp,%eax,4), %edx + movl %edx, 4356(%esp) + movl 2184(%esp,%eax,4), %edx + movl %edx, 4360(%esp) + movl 2188(%esp,%eax,4), %edx + movl %edx, 4364(%esp) + movaps %xmm1, 32(%esp) + movaps %xmm1, 64(%esp) + movl 32(%esp), %edx + movaps 4384(%esp), %xmm0 + mulps 4352(%esp), %xmm0 + movaps %xmm0, 80(%esp) + movl %edx, 112(%esp,%eax,4) + movl 68(%esp), %edx + movl %edx, 116(%esp,%eax,4) + movl 72(%esp), %edx + movl %edx, 120(%esp,%eax,4) + movl 76(%esp), %edx + movl %edx, 124(%esp,%eax,4) + movl 80(%esp), %edx + movl %edx, 128(%esp,%eax,4) + movl 84(%esp), %edx + movl %edx, 132(%esp,%eax,4) + movl 88(%esp), %edx + movl %edx, 136(%esp,%eax,4) + movl 92(%esp), %edx + movl %edx, 140(%esp,%eax,4) + addl $8, %eax + cmpl $256, %eax + jne .L8 + xorw %ax, %ax + .p2align 4,,10 + .p2align 3 +.L9: + movl 112(%esp,%eax,4), %edx + movl %edx, 4272(%esp) + movl 116(%esp,%eax,4), %edx + movl %edx, 4276(%esp) + movl 120(%esp,%eax,4), %edx + movl %edx, 4280(%esp) + movl 124(%esp,%eax,4), %edx + movl %edx, 4284(%esp) + movl 128(%esp,%eax,4), %edx + movaps 4272(%esp), %xmm3 + movl %edx, 4288(%esp) + movl 132(%esp,%eax,4), %edx + movl %edx, 4292(%esp) + movl 136(%esp,%eax,4), %edx + movl %edx, 4296(%esp) + movl 140(%esp,%eax,4), %edx + movl %edx, 4300(%esp) + movl 1136(%esp,%eax,4), %edx + movl %edx, 4240(%esp) + movl 1140(%esp,%eax,4), %edx + movl %edx, 4244(%esp) + movl 1144(%esp,%eax,4), %edx + movl %edx, 4248(%esp) + movl 1148(%esp,%eax,4), %edx + movl %edx, 4252(%esp) + movl 1152(%esp,%eax,4), %edx + addps 4240(%esp), %xmm3 + movl %edx, 4256(%esp) + movl 1156(%esp,%eax,4), %edx + movl %edx, 4260(%esp) + movl 1160(%esp,%eax,4), %edx + movl %edx, 4264(%esp) + movl 1164(%esp,%eax,4), %edx + movl %edx, 4268(%esp) + movaps %xmm3, 32(%esp) + movaps %xmm3, 64(%esp) + movl 32(%esp), %edx + movaps 4288(%esp), %xmm2 + addps 4256(%esp), %xmm2 + movaps %xmm2, 80(%esp) + movl %edx, 3184(%esp,%eax,4) + movl 68(%esp), %edx + movl %edx, 3188(%esp,%eax,4) + movl 72(%esp), %edx + movl %edx, 3192(%esp,%eax,4) + movl 76(%esp), %edx + movl %edx, 3196(%esp,%eax,4) + movl 80(%esp), %edx + movl %edx, 3200(%esp,%eax,4) + movl 84(%esp), %edx + movl %edx, 3204(%esp,%eax,4) + movl 88(%esp), %edx + movl %edx, 3208(%esp,%eax,4) + movl 92(%esp), %edx + movl %edx, 3212(%esp,%eax,4) + addl $8, %eax + cmpl $256, %eax + jne .L9 + decl %ecx + jne .L7 + fldz + xorl %eax, %eax +.L11: + fadds 3184(%esp,%eax,4) + fadds 3188(%esp,%eax,4) + fadds 3192(%esp,%eax,4) + fadds 3196(%esp,%eax,4) + fadds 3200(%esp,%eax,4) + fadds 3204(%esp,%eax,4) + fadds 3208(%esp,%eax,4) + fadds 3212(%esp,%eax,4) + addl $8, %eax + cmpl $256, %eax + jne .L11 + fstpl 8(%esp) + movl $.LC8, 4(%esp) + movl $1, (%esp) + call __printf_chk + xorl %eax, %eax + leave + ret +.L45: + movl stderr, %eax + movl $75, 8(%esp) + movl %eax, 12(%esp) + movl $1, 4(%esp) + movl $.LC1, (%esp) + call fwrite + orl $-1, %eax + leave + ret + .size main, .-main + .section .rodata.cst4,"aM",@progbits,4 + .align 4 +.LC2: + .long 1077936128 + .section .rodata.cst8,"aM",@progbits,8 + .align 8 +.LC3: + .long 469762048 + .long 1100643759 + .align 8 +.LC4: + .long -1031520545 + .long 1048062144 + .section .rodata.cst4 + .align 4 +.LC5: + .long 1176256512 + .align 4 +.LC6: + .long 1065353216 + .section .rodata.cst8 + .align 8 +.LC7: + .long 549364597 + .long 1051603697 + .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" + .section .note.GNU-stack,"",@progbits This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 09:03:25
|
Revision: 352 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=352&view=rev Author: dececco Date: 2011-07-26 09:03:19 +0000 (Tue, 26 Jul 2011) Log Message: ----------- Added loop unroll to Linux 32bit makefile Modified Paths: -------------- trunk/core/Makefiles/Makefile.Linux.i386 trunk/core/Makefiles/Makefile.Linux.x86 Modified: trunk/core/Makefiles/Makefile.Linux.i386 =================================================================== --- trunk/core/Makefiles/Makefile.Linux.i386 2011-07-26 09:01:19 UTC (rev 351) +++ trunk/core/Makefiles/Makefile.Linux.i386 2011-07-26 09:03:19 UTC (rev 352) @@ -45,7 +45,7 @@ # These flags are quite strict and give a lot of warning. But they are safe ARCH_CFLAGS := -pipe -x objective-c -Wall -W -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wno-unused -DLINUX -DLINUXPC ARCH_LIB_CFLAGS = -ARCH_OPT_CFLAGS := -O3 -mtune=core2 -march=core2 +ARCH_OPT_CFLAGS := -O3 -mtune=core2 -march=core2 -funroll-loops ARCH_LDFLAGS := ARCH_SOFLAGS := -rdynamic -shared LD_ALL_FLAG := -Wl,--whole-archive Modified: trunk/core/Makefiles/Makefile.Linux.x86 =================================================================== --- trunk/core/Makefiles/Makefile.Linux.x86 2011-07-26 09:01:19 UTC (rev 351) +++ trunk/core/Makefiles/Makefile.Linux.x86 2011-07-26 09:03:19 UTC (rev 352) @@ -45,7 +45,7 @@ # For now, we use ARCH_CFLAGS := -pipe -x objective-c -Wall -W -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wstrict-prototypes -Wno-unused -DLINUX -DLINUXPC -fPIC ARCH_LIB_CFLAGS = -ARCH_OPT_CFLAGS := -O3 -mtune=core2 -march=core2 +ARCH_OPT_CFLAGS := -O3 -mtune=core2 -march=core2 -funroll-loops ARCH_LDFLAGS := -rdynamic LD_ALL_FLAG := --whole-archive LD_NONE_FLAG := --no-whole-archive This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 09:01:26
|
Revision: 351 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=351&view=rev Author: dececco Date: 2011-07-26 09:01:19 +0000 (Tue, 26 Jul 2011) Log Message: ----------- added ubuntu 32 bit test results Added Paths: ----------- trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.32.txt Added: trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.32.txt =================================================================== --- trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.32.txt (rev 0) +++ trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.32.txt 2011-07-26 09:01:19 UTC (rev 351) @@ -0,0 +1,64 @@ +Compiler gcc 4.5.2 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m6.467s +user 0m6.456s +sys 0m0.000s + +Compiler clang +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m49.903s +user 0m49.835s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 2m44.861s +user 2m44.214s +sys 0m0.072s + +Compiler clang vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m8.396s +user 0m8.361s +sys 0m0.004s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m6.980s +user 0m6.968s +sys 0m0.000s + +Compiler clang vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m10.122s +user 0m10.097s +sys 0m0.004s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 -msse2 -ffast-math +Result 2097152.000000 + +real 0m6.846s +user 0m6.824s +sys 0m0.008s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 -msse3 -ffast-math +Result 2097152.000000 + +real 0m6.851s +user 0m6.840s +sys 0m0.000s + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 08:35:00
|
Revision: 350 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=350&view=rev Author: dececco Date: 2011-07-26 08:34:54 +0000 (Tue, 26 Jul 2011) Log Message: ----------- Fix to reselease script Modified Paths: -------------- trunk/core/scripts/make-release.sh Modified: trunk/core/scripts/make-release.sh =================================================================== --- trunk/core/scripts/make-release.sh 2011-07-26 08:29:57 UTC (rev 349) +++ trunk/core/scripts/make-release.sh 2011-07-26 08:34:54 UTC (rev 350) @@ -62,7 +62,7 @@ # Build cd trunk -make BUILDDIR="$RELEASEDIR/build" DISTDIR="$RELEASEDIR/distrib/jmax/" clean clobber tar +make MODE=opt BUILDDIR="$RELEASEDIR/build" DISTDIR="$RELEASEDIR/distrib/jmax/" clean clobber tar # Run the tests This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 08:30:03
|
Revision: 349 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=349&view=rev Author: dececco Date: 2011-07-26 08:29:57 +0000 (Tue, 26 Jul 2011) Log Message: ----------- Fix to reselease script Modified Paths: -------------- trunk/core/scripts/make-release.sh Modified: trunk/core/scripts/make-release.sh =================================================================== --- trunk/core/scripts/make-release.sh 2011-07-26 08:28:39 UTC (rev 348) +++ trunk/core/scripts/make-release.sh 2011-07-26 08:29:57 UTC (rev 349) @@ -62,11 +62,11 @@ # Build cd trunk -make MODE=debug BUILDDIR="$RELEASEDIR/build" DISTDIR="$RELEASEDIR/distrib/jmax/" clean clobber tar +make BUILDDIR="$RELEASEDIR/build" DISTDIR="$RELEASEDIR/distrib/jmax/" clean clobber tar # Run the tests -make MODE=debug BUILDDIR="$RELEASEDIR/build" DISTDIR="$RELEASEDIR/distrib/jmax/" test +make BUILDDIR="$RELEASEDIR/build" DISTDIR="$RELEASEDIR/distrib/jmax/" test # Report results This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-26 08:28:45
|
Revision: 348 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=348&view=rev Author: dececco Date: 2011-07-26 08:28:39 +0000 (Tue, 26 Jul 2011) Log Message: ----------- tweaks on optimiser flags Modified Paths: -------------- trunk/Makefile trunk/core/Makefiles/Makefile.Linux.amd64 trunk/extra/tests/doTestLinux.sh Added Paths: ----------- trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.txt Modified: trunk/Makefile =================================================================== --- trunk/Makefile 2011-07-25 09:51:14 UTC (rev 347) +++ trunk/Makefile 2011-07-26 08:28:39 UTC (rev 348) @@ -10,7 +10,7 @@ # of the License, or (at your option) any later version. # # See file LICENSE for further informations on licensing terms. -# +# b # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Modified: trunk/core/Makefiles/Makefile.Linux.amd64 =================================================================== --- trunk/core/Makefiles/Makefile.Linux.amd64 2011-07-25 09:51:14 UTC (rev 347) +++ trunk/core/Makefiles/Makefile.Linux.amd64 2011-07-26 08:28:39 UTC (rev 348) @@ -48,7 +48,7 @@ # For now, we use ARCH_CFLAGS := -pipe -x objective-c -Wall -Wextra -W -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wstrict-prototypes -Wno-unused -DLINUX -DLINUXPC -fPIC ARCH_LIB_CFLAGS = -fPIC -ARCH_OPT_CFLAGS := -O3 -mtune=core2 -march=core2 +ARCH_OPT_CFLAGS := -O3 -mtune=core2 -march=core2 -funroll-loops ARCH_LDFLAGS := ARCH_SOFLAGS := -rdynamic -shared LD_ALL_FLAG := -Wl,--whole-archive Modified: trunk/extra/tests/doTestLinux.sh =================================================================== --- trunk/extra/tests/doTestLinux.sh 2011-07-25 09:51:14 UTC (rev 347) +++ trunk/extra/tests/doTestLinux.sh 2011-07-26 08:28:39 UTC (rev 348) @@ -2,7 +2,7 @@ # Without vector extension, gcc, gcc-llvm and clang -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc -DGCC" CCDESCR="gcc 4.5.2" @@ -12,7 +12,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="clang -DGCC" CCDESCR="clang" @@ -24,7 +24,7 @@ # With Vector Extensions, size 8 -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc -DGCC -DHAS_VECT8_EXT" CCDESCR="gcc 4.5.2 vector extensions, size 8" @@ -34,7 +34,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="clang -DGCC -DHAS_VECT8_EXT" CCDESCR="clang vector extensions, size 8" @@ -47,7 +47,7 @@ # With Vector Extensions, size 4 -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="gcc -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc 4.5.2 vector extensions, size 4" @@ -57,7 +57,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2" +FLAGS="-O3 -funroll-loops -march=core2" CC="clang -DGCC -DHAS_VECT4_EXT" CCDESCR="clang vector extensions, size 4" @@ -69,7 +69,7 @@ # gcc only, ss2 and ss3 -FLAGS="-O3 -march=core2 -msse2 -ffast-math" +FLAGS="-O3 -funroll-loops -march=core2 -msse2 -ffast-math" CC="gcc -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc 4.5.2 vector extensions, size 4" @@ -79,7 +79,7 @@ time ./vectors x echo -FLAGS="-O3 -march=core2 -msse3 -ffast-math" +FLAGS="-O3 -funroll-loops -march=core2 -msse3 -ffast-math" CC="gcc -DGCC -DHAS_VECT4_EXT" CCDESCR="gcc 4.5.2 vector extensions, size 4" Added: trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.txt =================================================================== --- trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.txt (rev 0) +++ trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.loopunroll.txt 2011-07-26 08:28:39 UTC (rev 348) @@ -0,0 +1,64 @@ +Compiler gcc 4.5.2 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m6.483s +user 0m6.470s +sys 0m0.000s + +Compiler clang +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m37.076s +user 0m37.040s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 2m57.375s +user 2m57.230s +sys 0m0.020s + +Compiler clang vector extensions, size 8 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m6.228s +user 0m6.220s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m6.378s +user 0m6.370s +sys 0m0.000s + +Compiler clang vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 +Result 2097152.000000 + +real 0m11.206s +user 0m11.190s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 -msse2 -ffast-math +Result 2097152.000000 + +real 0m6.363s +user 0m6.350s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -funroll-loops -march=core2 -msse3 -ffast-math +Result 2097152.000000 + +real 0m6.353s +user 0m6.340s +sys 0m0.000s + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-25 09:51:20
|
Revision: 347 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=347&view=rev Author: dececco Date: 2011-07-25 09:51:14 +0000 (Mon, 25 Jul 2011) Log Message: ----------- added ubuntu results Modified Paths: -------------- trunk/extra/tests/doTestLinux.sh Added Paths: ----------- trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.txt Modified: trunk/extra/tests/doTestLinux.sh =================================================================== --- trunk/extra/tests/doTestLinux.sh 2011-07-25 09:35:51 UTC (rev 346) +++ trunk/extra/tests/doTestLinux.sh 2011-07-25 09:51:14 UTC (rev 347) @@ -1,10 +1,11 @@ #!/bin/bash +# Without vector extension, gcc, gcc-llvm and clang + FLAGS="-O3 -march=core2" CC="gcc -DGCC" CCDESCR="gcc 4.5.2" -echo $CC $FLAGS vectors.c -o vectors $CC $FLAGS vectors.c -o vectors echo Compiler $CCDESCR echo Flags $FLAGS @@ -15,29 +16,75 @@ CC="clang -DGCC" CCDESCR="clang" -$(CC) $(FLAGS) vectors.c -o vectors -echo Compiler $(CCDESCR) -echo Flags $(FLAGS) +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS time ./vectors x echo +# With Vector Extensions, size 8 + FLAGS="-O3 -march=core2" -CC="gcc -DGCC -DHAS_VECT_EXT" -CCDESCR= "gcc 4.5.2 vector extensions" +CC="gcc -DGCC -DHAS_VECT8_EXT" +CCDESCR="gcc 4.5.2 vector extensions, size 8" -$(CC) $(FLAGS) vectors.c -o vectors -echo Compiler $(CCDESCR) -echo Flags $(FLAGS) +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS time ./vectors x echo +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT8_EXT" +CCDESCR="clang vector extensions, size 8" +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + + +# With Vector Extensions, size 4 + FLAGS="-O3 -march=core2" -CC="clang -DGCC -DHAS_VECT_EXT" -CCDESCR= "clang vector extensions" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc 4.5.2 vector extensions, size 4" -$(CC) $(FLAGS) vectors.c -o vectors -echo Compiler $(CCDESCR) -echo Flags $(FLAGS) +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS time ./vectors x echo + +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT4_EXT" +CCDESCR="clang vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +# gcc only, ss2 and ss3 + +FLAGS="-O3 -march=core2 -msse2 -ffast-math" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc 4.5.2 vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2 -msse3 -ffast-math" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc 4.5.2 vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo Added: trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.txt =================================================================== --- trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.txt (rev 0) +++ trunk/extra/tests/results-ubuntu11.04-on-vmwarefusion-on-macosx10.6.8-corei7sb2.7Ghz.txt 2011-07-25 09:51:14 UTC (rev 347) @@ -0,0 +1,64 @@ +Compiler gcc 4.5.2 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m11.278s +user 0m11.250s +sys 0m0.000s + +Compiler clang +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m35.372s +user 0m35.350s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 8 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 2m26.902s +user 2m26.800s +sys 0m0.000s + +Compiler clang vector extensions, size 8 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m6.243s +user 0m6.230s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m11.264s +user 0m11.240s +sys 0m0.000s + +Compiler clang vector extensions, size 4 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m11.218s +user 0m11.200s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -march=core2 -msse2 -ffast-math +Result 2097152.000000 + +real 0m11.252s +user 0m11.240s +sys 0m0.000s + +Compiler gcc 4.5.2 vector extensions, size 4 +Flags -O3 -march=core2 -msse3 -ffast-math +Result 2097152.000000 + +real 0m11.254s +user 0m11.240s +sys 0m0.000s + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-25 09:35:57
|
Revision: 346 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=346&view=rev Author: dececco Date: 2011-07-25 09:35:51 +0000 (Mon, 25 Jul 2011) Log Message: ----------- Changes in vector test scripts Removed Paths: ------------- trunk/extra/tests/vectors Deleted: trunk/extra/tests/vectors =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-25 09:35:39
|
Revision: 345 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=345&view=rev Author: dececco Date: 2011-07-25 09:35:33 +0000 (Mon, 25 Jul 2011) Log Message: ----------- Changes in vector test scripts Modified Paths: -------------- trunk/extra/tests/vectors trunk/extra/tests/vectors.c Added Paths: ----------- trunk/extra/tests/doTestLinux.sh trunk/extra/tests/doTestMac.sh trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.txt Removed Paths: ------------- trunk/extra/tests/Makefile trunk/extra/tests/times.ods Deleted: trunk/extra/tests/Makefile =================================================================== --- trunk/extra/tests/Makefile 2011-07-23 09:47:47 UTC (rev 344) +++ trunk/extra/tests/Makefile 2011-07-25 09:35:33 UTC (rev 345) @@ -1,32 +0,0 @@ - -gcc-llvm: - gcc -O3 -march=core2 -DGCC vectors.c -o vectors - chmod +x vectors - time ./vectors x - -gcc-mac: - gcc-4.2 -O3 -march=core2 -DGCC vectors.c -o vectors - chmod +x vectors - time ./vectors x - - -clang: - clang -O3 -march=core2 -DCLANG vectors.c -o vectors - chmod +x vectors - time ./vectors x - -gcc-mac-vect: - gcc-4.2 -O3 -march=core2 -DGCC -DHAS_VECT_EXT vectors.c -o vectors - chmod +x vectors - time ./vectors x - -gcc-llvm-vect: - gcc -O3 -march=core2 -DGCC -DHAS_VECT_EXT vectors.c -o vectors - chmod +x vectors - time ./vectors x - - -clang-vect: - clang -O3 -march=core2 -DCLANG -DHAS_VECT_EXT vectors.c -o vectors - chmod +x vectors - time ./vectors x Added: trunk/extra/tests/doTestLinux.sh =================================================================== --- trunk/extra/tests/doTestLinux.sh (rev 0) +++ trunk/extra/tests/doTestLinux.sh 2011-07-25 09:35:33 UTC (rev 345) @@ -0,0 +1,43 @@ +#!/bin/bash + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC" +CCDESCR="gcc 4.5.2" + +echo $CC $FLAGS vectors.c -o vectors +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="clang -DGCC" +CCDESCR="clang" + +$(CC) $(FLAGS) vectors.c -o vectors +echo Compiler $(CCDESCR) +echo Flags $(FLAGS) +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC -DHAS_VECT_EXT" +CCDESCR= "gcc 4.5.2 vector extensions" + +$(CC) $(FLAGS) vectors.c -o vectors +echo Compiler $(CCDESCR) +echo Flags $(FLAGS) +time ./vectors x +echo + + +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT_EXT" +CCDESCR= "clang vector extensions" + +$(CC) $(FLAGS) vectors.c -o vectors +echo Compiler $(CCDESCR) +echo Flags $(FLAGS) +time ./vectors x +echo Added: trunk/extra/tests/doTestMac.sh =================================================================== --- trunk/extra/tests/doTestMac.sh (rev 0) +++ trunk/extra/tests/doTestMac.sh 2011-07-25 09:35:33 UTC (rev 345) @@ -0,0 +1,121 @@ +#!/bin/bash + +# Without vector extension, gcc, gcc-llvm and clang + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC" +CCDESCR="gcc llvm 4.2" + +echo $CC $FLAGS vectors.c -o vectors +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="gcc-4.2 -DGCC" +CCDESCR="gcc 4.2" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="clang -DGCC" +CCDESCR="clang" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +# With Vector Extensions, size 8 + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC -DHAS_VECT8_EXT" +CCDESCR="gcc llvm 4.2 vector extensions, size 8" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="gcc-4.2 -DGCC -DHAS_VECT8_EXT" +CCDESCR="gcc 4.2 vector extensions, size 8" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT8_EXT" +CCDESCR="clang vector extensions, size 8" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + + +# With Vector Extensions, size 4 + +FLAGS="-O3 -march=core2" +CC="gcc -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc llvm 4.2 vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc 4.2 vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2" +CC="clang -DGCC -DHAS_VECT4_EXT" +CCDESCR="clang vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +# gcc only, ss2 and ss3 + +FLAGS="-O3 -march=core2 -msse2 -ffast-math" +CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc 4.2 vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo + +FLAGS="-O3 -march=core2 -msse3 -ffast-math" +CC="gcc-4.2 -DGCC -DHAS_VECT4_EXT" +CCDESCR="gcc 4.2 vector extensions, size 4" + +$CC $FLAGS vectors.c -o vectors +echo Compiler $CCDESCR +echo Flags $FLAGS +time ./vectors x +echo Added: trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.txt =================================================================== --- trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.txt (rev 0) +++ trunk/extra/tests/results-macosx10.6.8-corei7sb2.7Ghz.txt 2011-07-25 09:35:33 UTC (rev 345) @@ -0,0 +1,89 @@ +gcc -DGCC -O3 -march=core2 vectors.c -o vectors +Compiler gcc llvm 4.2 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m35.241s +user 0m35.234s +sys 0m0.005s + +Compiler gcc 4.2 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m33.256s +user 0m33.249s +sys 0m0.004s + +Compiler clang +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m35.244s +user 0m35.237s +sys 0m0.004s + +Compiler gcc llvm 4.2 vector extensions, size 8 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m8.474s +user 0m8.469s +sys 0m0.002s + +Compiler gcc 4.2 vector extensions, size 8 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 1m12.190s +user 1m12.187s +sys 0m0.004s + +Compiler clang vector extensions, size 8 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m8.460s +user 0m8.456s +sys 0m0.001s + +Compiler gcc llvm 4.2 vector extensions, size 4 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m11.175s +user 0m11.173s +sys 0m0.002s + +Compiler gcc 4.2 vector extensions, size 4 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m8.857s +user 0m8.855s +sys 0m0.002s + +Compiler clang vector extensions, size 4 +Flags -O3 -march=core2 +Result 2097152.000000 + +real 0m11.174s +user 0m11.173s +sys 0m0.001s + +Compiler gcc 4.2 vector extensions, size 4 +Flags -O3 -march=core2 -msse2 -ffast-math +Result 2097152.000000 + +real 0m11.059s +user 0m11.057s +sys 0m0.001s + +Compiler gcc 4.2 vector extensions, size 4 +Flags -O3 -march=core2 -msse3 -ffast-math +Result 2097152.000000 + +real 0m11.063s +user 0m11.061s +sys 0m0.002s + Deleted: trunk/extra/tests/times.ods =================================================================== (Binary files differ) Modified: trunk/extra/tests/vectors =================================================================== (Binary files differ) Modified: trunk/extra/tests/vectors.c =================================================================== --- trunk/extra/tests/vectors.c 2011-07-23 09:47:47 UTC (rev 344) +++ trunk/extra/tests/vectors.c 2011-07-25 09:35:33 UTC (rev 345) @@ -8,7 +8,7 @@ #define restrict #endif -#if defined(HAS_VECT_EXT) +#if defined(HAS_VECT8_EXT) typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); @@ -44,6 +44,42 @@ } } +#elif defined(HAS_VECT4_EXT) + +typedef float v4sf __attribute__ ((vector_size (4 * sizeof(float)))); + +static inline void add3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) +{ + unsigned int i; + v4sf v0, v1, v2; + + for (i=0; i < vecsize; i+=4) + { + v0 = *((v4sf * restrict) (arg0 + i)); + v1 = *((v4sf * restrict) (arg1 + i)); + + v2 = v0 + v1; + + *((v4sf * restrict) (arg2 + i)) = v2; + } +} + +static inline void mul3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) +{ + unsigned int i; + v4sf v0, v1, v2; + + for (i=0; i < vecsize; i+=4) + { + v0 = *((v4sf * restrict) (arg0 + i)); + v1 = *((v4sf * restrict) (arg1 + i)); + + v2 = v0 * v1; + + *((v4sf * restrict) (arg2 + i)) = v2; + } +} + #else static inline void add3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) @@ -69,10 +105,10 @@ int main(int argc, const char *argv[]) { - float va[VECT_SIZE]; - float vb[VECT_SIZE]; - float vc[VECT_SIZE]; - float temp[VECT_SIZE]; + float va[VECT_SIZE] __attribute__ ((__aligned__(8 * sizeof(float)))) ; + float vb[VECT_SIZE] __attribute__ ((__aligned__(8 * sizeof(float)))) ; + float vc[VECT_SIZE] __attribute__ ((__aligned__(8 * sizeof(float)))) ; + float temp[VECT_SIZE] __attribute__ ((__aligned__(8 * sizeof(float)))) ; int i; float f; double acc; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-23 09:47:53
|
Revision: 344 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=344&view=rev Author: dececco Date: 2011-07-23 09:47:47 +0000 (Sat, 23 Jul 2011) Log Message: ----------- Added tests benchmark table Modified Paths: -------------- trunk/extra/tests/Makefile trunk/extra/tests/vectors Added Paths: ----------- trunk/extra/tests/times.ods Modified: trunk/extra/tests/Makefile =================================================================== --- trunk/extra/tests/Makefile 2011-07-23 09:02:58 UTC (rev 343) +++ trunk/extra/tests/Makefile 2011-07-23 09:47:47 UTC (rev 344) @@ -1,21 +1,31 @@ -gcc: +gcc-llvm: gcc -O3 -march=core2 -DGCC vectors.c -o vectors chmod +x vectors time ./vectors x +gcc-mac: + gcc-4.2 -O3 -march=core2 -DGCC vectors.c -o vectors + chmod +x vectors + time ./vectors x + clang: clang -O3 -march=core2 -DCLANG vectors.c -o vectors chmod +x vectors time ./vectors x -gcc-vect: +gcc-mac-vect: gcc-4.2 -O3 -march=core2 -DGCC -DHAS_VECT_EXT vectors.c -o vectors chmod +x vectors time ./vectors x +gcc-llvm-vect: + gcc -O3 -march=core2 -DGCC -DHAS_VECT_EXT vectors.c -o vectors + chmod +x vectors + time ./vectors x + clang-vect: clang -O3 -march=core2 -DCLANG -DHAS_VECT_EXT vectors.c -o vectors chmod +x vectors Added: trunk/extra/tests/times.ods =================================================================== (Binary files differ) Property changes on: trunk/extra/tests/times.ods ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: trunk/extra/tests/vectors =================================================================== (Binary files differ) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-23 09:03:04
|
Revision: 343 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=343&view=rev Author: dececco Date: 2011-07-23 09:02:58 +0000 (Sat, 23 Jul 2011) Log Message: ----------- Added test for vector performance, in directory extra Added Paths: ----------- trunk/extra/tests/ trunk/extra/tests/Makefile trunk/extra/tests/vectors trunk/extra/tests/vectors.c Added: trunk/extra/tests/Makefile =================================================================== --- trunk/extra/tests/Makefile (rev 0) +++ trunk/extra/tests/Makefile 2011-07-23 09:02:58 UTC (rev 343) @@ -0,0 +1,22 @@ + +gcc: + gcc -O3 -march=core2 -DGCC vectors.c -o vectors + chmod +x vectors + time ./vectors x + + +clang: + clang -O3 -march=core2 -DCLANG vectors.c -o vectors + chmod +x vectors + time ./vectors x + +gcc-vect: + gcc-4.2 -O3 -march=core2 -DGCC -DHAS_VECT_EXT vectors.c -o vectors + chmod +x vectors + time ./vectors x + + +clang-vect: + clang -O3 -march=core2 -DCLANG -DHAS_VECT_EXT vectors.c -o vectors + chmod +x vectors + time ./vectors x Added: trunk/extra/tests/vectors =================================================================== (Binary files differ) Property changes on: trunk/extra/tests/vectors ___________________________________________________________________ Added: svn:executable + * Added: svn:mime-type + application/octet-stream Added: trunk/extra/tests/vectors.c =================================================================== --- trunk/extra/tests/vectors.c (rev 0) +++ trunk/extra/tests/vectors.c 2011-07-23 09:02:58 UTC (rev 343) @@ -0,0 +1,132 @@ +#include <stdio.h> + +#ifdef GCC +#define restrict __restrict__ +#endif + +#ifdef CLANG +#define restrict +#endif + +#if defined(HAS_VECT_EXT) + +typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); + +static inline void add3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) +{ + unsigned int i; + v8sf v0, v1, v2; + + for (i=0; i < vecsize; i+=8) + { + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); + + v2 = v0 + v1; + + *((v8sf * restrict) (arg2 + i)) = v2; + } +} + +static inline void mul3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) +{ + unsigned int i; + v8sf v0, v1, v2; + + for (i=0; i < vecsize; i+=8) + { + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); + + v2 = v0 * v1; + + *((v8sf * restrict) (arg2 + i)) = v2; + } +} + +#else + +static inline void add3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg2[i] = arg0[i] + arg1[i]; +} + +static inline void mul3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg2[i] = arg0[i] * arg1[i]; +} + +#endif + +#define VECT_SIZE 256 +#define ITERATIONS 100000000 + +int main(int argc, const char *argv[]) +{ + float va[VECT_SIZE]; + float vb[VECT_SIZE]; + float vc[VECT_SIZE]; + float temp[VECT_SIZE]; + int i; + float f; + double acc; + + /* + Compute va = va * vb + vc N times. + + va, vb and vc are initialized with values based on the first argument. + Finally, print the sum of the elements of va, just to avoid that the optimiser optimise away the code. + */ + + if (argc != 2) + { + fprintf(stderr, "Usage:vectors <string>, where string is used as a seed for the computation\n"); + return -1; + } + + f = (((float)*argv[1]) * 3.)/111111111.; + + for (i = 0; i < VECT_SIZE; i++) + { + va[i] = f; + f += (1. / 11111111.); + } + + + for (i = 0; i < VECT_SIZE; i++) + { + vb[i] = 1. + f / 10000.; + f += (1. / 1111111.); + } + + + for (i = 0; i < VECT_SIZE; i++) + { + vc[i] = f; + f += (1. / 11111111.); + } + + + for (i = 0; i < ITERATIONS; i++) + { + mul3_vec(va, vb, temp, VECT_SIZE); + add3_vec(temp, vc, va, VECT_SIZE); + } + + + acc = 0.; + + for (i = 0; i < VECT_SIZE; i++) + acc += va[i]; + + printf("Result %f\n", acc); + + + return 0; +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-23 09:02:19
|
Revision: 342 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=342&view=rev Author: dececco Date: 2011-07-23 09:02:13 +0000 (Sat, 23 Jul 2011) Log Message: ----------- Added test for vector performance, in directory extra Added Paths: ----------- trunk/extra/ This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-22 08:41:43
|
Revision: 341 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=341&view=rev Author: dececco Date: 2011-07-22 08:41:37 +0000 (Fri, 22 Jul 2011) Log Message: ----------- Suppressed the restrict keyword for clang compilation, no effect or maybe a small slowdown Modified Paths: -------------- trunk/benchmarks.ods trunk/core/fts/sys/platform.h trunk/packages/ispw/fts/sampling/ftl_delay.c Modified: trunk/benchmarks.ods =================================================================== (Binary files differ) Modified: trunk/core/fts/sys/platform.h =================================================================== --- trunk/core/fts/sys/platform.h 2011-07-20 20:10:01 UTC (rev 340) +++ trunk/core/fts/sys/platform.h 2011-07-22 08:41:37 UTC (rev 341) @@ -42,7 +42,7 @@ //#define HAS_GCC #define HAS_CLANG #define FTS_ARCH_NAME "MacOsX" -// #define restrict +#define restrict #define HAS_UNIX #define HAS_DTD /* direct to disk support */ #define HAS_PTHREADS @@ -162,7 +162,7 @@ #define restrict __restrict__ -#define HAS_VECT_EXT +//#define HAS_VECT_EXT typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); typedef union {v8sf v; float f[8];} pvector8; Modified: trunk/packages/ispw/fts/sampling/ftl_delay.c =================================================================== --- trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-20 20:10:01 UTC (rev 340) +++ trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-22 08:41:37 UTC (rev 341) @@ -84,7 +84,7 @@ del_buf_t * restrict buf = (del_buf_t *) fts_word_get_ptr(argv + 1); float * restrict delay_line = buf->delay_line; int n_tick = fts_word_get_int(argv + 2); - int *del_time = (int *)fts_word_get_ptr(argv + 3); + int * restrict del_time = (int *)fts_word_get_ptr(argv + 3); int del = *del_time; int i; int phase; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-20 20:10:07
|
Revision: 340 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=340&view=rev Author: dececco Date: 2011-07-20 20:10:01 +0000 (Wed, 20 Jul 2011) Log Message: ----------- Discovered that old include files defined the restrict keyword as empty. Fixed and fixed a number of compilation errors. Marginal speedup on clang, do not compile yet on gcc Modified Paths: -------------- trunk/benchmarks.ods trunk/core/fts/lang/ftl/ftl.c trunk/core/fts/lang/veclib/portable/vec_cpy.c trunk/core/fts/lang/veclib/portable/vecx_cpy.c trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c trunk/core/fts/sys/platform.h trunk/packages/ispw/fts/sampling/ftl_delay.c trunk/packages/ispw/fts/signal/sig1.c trunk/packages/ispw/fts/signal/sigline.c trunk/packages/ispw/fts/signal/sigthrow.c Modified: trunk/benchmarks.ods =================================================================== (Binary files differ) Modified: trunk/core/fts/lang/ftl/ftl.c =================================================================== --- trunk/core/fts/lang/ftl/ftl.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/core/fts/lang/ftl/ftl.c 2011-07-20 20:10:01 UTC (rev 340) @@ -289,12 +289,12 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); - v1 = *((v8sf restrict *) (arg1 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); v2 = v0 + v1; - *((v8sf restrict *) (arg2 + i)) = v2; + *((v8sf * restrict) (arg2 + i)) = v2; } } @@ -305,12 +305,12 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); - v1 = *((v8sf restrict *) (arg1 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); v2 = v0 + v1; - *((v8sf restrict *) (arg1 + i)) = v2; + *((v8sf * restrict) (arg1 + i)) = v2; } } @@ -321,9 +321,9 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); r = v0 + v0; - *((v8sf restrict *) (arg1 + i)) = r; + *((v8sf * restrict) (arg1 + i)) = r; } } @@ -334,9 +334,9 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); r = v0 + v0; - *((v8sf restrict *) (arg0 + i)) = r; + *((v8sf * restrict) (arg0 + i)) = r; } } @@ -349,12 +349,12 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); - v1 = *((v8sf restrict *) (arg1 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); v2 = v0 - v1; - *((v8sf restrict *) (arg2 + i)) = v2; + *((v8sf * restrict) (arg2 + i)) = v2; } } @@ -365,12 +365,12 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); - v1 = *((v8sf restrict *) (arg1 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); v2 = v0 - v1; - *((v8sf restrict *) (arg1 + i)) = v2; + *((v8sf * restrict) (arg1 + i)) = v2; } } @@ -381,12 +381,12 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); - v1 = *((v8sf restrict *) (arg1 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); v2 = v0 * v1; - *((v8sf restrict *) (arg2 + i)) = v2; + *((v8sf * restrict) (arg2 + i)) = v2; } } @@ -397,12 +397,12 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); - v1 = *((v8sf restrict *) (arg1 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); + v1 = *((v8sf * restrict) (arg1 + i)); v2 = v0 * v1; - *((v8sf restrict *) (arg1 + i)) = v2; + *((v8sf * restrict) (arg1 + i)) = v2; } } @@ -413,11 +413,11 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); r = v0 * v0; - *((v8sf restrict *) (arg1 + i)) = r; + *((v8sf * restrict) (arg1 + i)) = r; } } @@ -428,11 +428,11 @@ for (i=0; i < vecsize; i+=8) { - v0 = *((v8sf restrict *) (arg0 + i)); + v0 = *((v8sf * restrict) (arg0 + i)); r = v0 * v0; - *((v8sf restrict *) (arg0 + i)) = r; + *((v8sf * restrict) (arg0 + i)) = r; } } @@ -443,8 +443,8 @@ for (i=0; i < vecsize; i+=8) { - v = *((v8sf restrict *) (arg0 + i)); - *((v8sf restrict *) (arg1 + i)) = v; + v = *((v8sf * restrict) (arg0 + i)); + *((v8sf * restrict) (arg1 + i)) = v; } } @@ -557,7 +557,7 @@ for (i = 0; i < vecsize; i+=8) { - *((v8sf restrict *) (arg0 + i)) = PVEC_VECT(z); + *((v8sf * restrict) (arg0 + i)) = PVEC_VECT(z); } } Modified: trunk/core/fts/lang/veclib/portable/vec_cpy.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vec_cpy.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/core/fts/lang/veclib/portable/vec_cpy.c 2011-07-20 20:10:01 UTC (rev 340) @@ -52,8 +52,8 @@ for (i = 0; i < size; i+=8) { - v = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v; + v = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = v; } #else for (i = 0; i < size; i++) @@ -217,7 +217,7 @@ for (i = 0; i < size; i+=8) { - *((v8sf restrict *) (out + i)) = PVEC_VECT(z); + *((v8sf *) (out + i)) = PVEC_VECT(z); } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/lang/veclib/portable/vecx_cpy.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_cpy.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/core/fts/lang/veclib/portable/vecx_cpy.c 2011-07-20 20:10:01 UTC (rev 340) @@ -51,8 +51,8 @@ for (i = 0; i < size; i+=8) { - v = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v; + v = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = v; } #else for (i = 0; i < size; i++) @@ -216,7 +216,7 @@ for (i = 0; i < size; i+=8) { - *((v8sf restrict *) (out + i)) = PVEC_VECT(z); + *((v8sf *) (out + i)) = PVEC_VECT(z); } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-20 20:10:01 UTC (rev 340) @@ -61,8 +61,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin + PVEC_VECT(v); + vin = *((v8sf * ) (in + i)); + *((v8sf * ) (out + i)) = vin + PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -90,8 +90,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin - PVEC_VECT(v); + vin = *((v8sf * ) (in + i)); + *((v8sf * ) (out + i)) = vin - PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -119,8 +119,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin * PVEC_VECT(v); + vin = *((v8sf * ) (in + i)); + *((v8sf * ) (out + i)) = vin * PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -149,8 +149,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin / PVEC_VECT(v); + vin = *((v8sf * ) (in + i)); + *((v8sf * ) (out + i)) = vin / PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -178,8 +178,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = PVEC_VECT(v) - vin; + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = PVEC_VECT(v) - vin; } #else for (i = 0; i < size; i++) @@ -207,8 +207,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = PVEC_VECT(v) / vin; + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = PVEC_VECT(v) / vin; } #else for (i = 0; i < size; i++) @@ -242,8 +242,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin + PVEC_VECT(v); + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = vin + PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -275,8 +275,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin - PVEC_VECT(v); + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = vin - PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -308,8 +308,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin * PVEC_VECT(v); + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = vin * PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -341,8 +341,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin /PVEC_VECT(v); + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = vin /PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -374,8 +374,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = PVEC_VECT(v) - vin; + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = PVEC_VECT(v) - vin; } #else for (i = 0; i < size; i++) @@ -407,8 +407,8 @@ for (i = 0; i < size; i+=8) { - vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = PVEC_VECT(v) / vin; + vin = *((v8sf *) (in + i)); + *((v8sf *) (out + i)) = PVEC_VECT(v) / vin; } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/sys/platform.h =================================================================== --- trunk/core/fts/sys/platform.h 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/core/fts/sys/platform.h 2011-07-20 20:10:01 UTC (rev 340) @@ -39,9 +39,10 @@ */ #if defined(MACOSX) - +//#define HAS_GCC +#define HAS_CLANG #define FTS_ARCH_NAME "MacOsX" -#define restrict +// #define restrict #define HAS_UNIX #define HAS_DTD /* direct to disk support */ #define HAS_PTHREADS @@ -50,18 +51,7 @@ #define HAS_PORTAUDIO #define PORTAUDIO_INTERLEAVE #define HAS_PORTMIDI -#define HAS_VECT_EXT -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); - -// The following is to simplify declarations for code that access directly to -// vector content and must run on both gcc and llvm - -typedef v8sf pvector8; - -#define PVEC_ELEM(t, i) t[i] -#define PVEC_VECT(t) t - /* This macro is defined here because not all platforms have a isnanf macro (or function */ #define fts_isnanf(x) (((*(unsigned int *)&(x) & 0x7f800000L)==0x7f800000L)&& \ @@ -76,7 +66,7 @@ #elif defined(LINUX) #define FTS_ARCH_NAME "linux" -#define restrict +#define HAS_GCC #define HAS_UNIX #define HAS_DTD /* direct to disk support */ #define HAS_PTHREADS @@ -85,14 +75,6 @@ #define FTS_HAS_LITTLE_ENDIAN #define HAS_PORTAUDIO #define HAS_PORTMIDI -// #define HAS_VECT_EXT - -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); -typedef union {v8sf v; float f[8];} pvector8; - -#define PVEC_ELEM(t, i) t.f[i] -#define PVEC_VECT(t) t.v - /* This macro is defined here because not all platforms have a isnanf macro (or function */ #define fts_isnanf(x) (((*(unsigned int *)&(x) & 0x7f800000L)==0x7f800000L)&& \ @@ -107,7 +89,7 @@ #elif defined(CYGWIN) #define FTS_ARCH_NAME "cygwin" -#define restrict +#define HAS_GCC #define HAS_UNIX #define HAS_DTD /* direct to disk support */ #define HAS_PTHREADS @@ -173,10 +155,42 @@ #endif +// Vector extensions definitions + +#ifdef HAS_GCC + +#define restrict __restrict__ + + +#define HAS_VECT_EXT + +typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); +typedef union {v8sf v; float f[8];} pvector8; + +#define PVEC_ELEM(t, i) t.f[i] +#define PVEC_VECT(t) t.v + #endif +#ifdef HAS_CLANG +#define HAS_VECT_EXT +typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); +// The following is to simplify declarations for code that access directly to +// vector content and must run on both gcc and llvm +typedef v8sf pvector8; +#define PVEC_ELEM(t, i) t[i] +#define PVEC_VECT(t) t +#endif + +#endif + + + + + + Modified: trunk/packages/ispw/fts/sampling/ftl_delay.c =================================================================== --- trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-20 20:10:01 UTC (rev 340) @@ -44,8 +44,8 @@ for (i=0; i<n_tick; i+=8) { - v = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (delay_line + phase + i)) = v; + v = *((v8sf * restrict) (in + i)); + *((v8sf * restrict) (delay_line + phase + i)) = v; } if (phase >= ring_size) /* ring buffer wrap around */ @@ -54,8 +54,8 @@ for (i=0; i<n_tick; i+=8) { - v = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (delay_line + i)) = v; + v = *((v8sf * restrict) (in + i)); + *((v8sf * restrict) (delay_line + i)) = v; } } else @@ -112,7 +112,7 @@ PVEC_ELEM(v, 6) = delay_line[phase + i + 6]; PVEC_ELEM(v, 7) = delay_line[phase + i + 7]; - *((v8sf restrict *) (out + i)) = PVEC_VECT(v); + *((v8sf * restrict) (out + i)) = PVEC_VECT(v); } #else for (i=0; i<n_tick; i++) Modified: trunk/packages/ispw/fts/signal/sig1.c =================================================================== --- trunk/packages/ispw/fts/signal/sig1.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/packages/ispw/fts/signal/sig1.c 2011-07-20 20:10:01 UTC (rev 340) @@ -116,7 +116,7 @@ PVEC_ELEM(v, 7) = f; for (i = 0; i < n; i+=8) - *((v8sf restrict *) (out + i)) = PVEC_VECT(v); + *((v8sf * restrict) (out + i)) = PVEC_VECT(v); #else for (i = 0; i < n; i++) out[i] = f; Modified: trunk/packages/ispw/fts/signal/sigline.c =================================================================== --- trunk/packages/ispw/fts/signal/sigline.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/packages/ispw/fts/signal/sigline.c 2011-07-20 20:10:01 UTC (rev 340) @@ -93,7 +93,7 @@ PVEC_ELEM(v, 6) = f; PVEC_ELEM(v, 7) = f; - *((v8sf restrict *) (fp + i)) = PVEC_VECT(v) + PVEC_VECT(vinc); + *((v8sf * restrict) (fp + i)) = PVEC_VECT(v) + PVEC_VECT(vinc); f = f + incr8; } @@ -116,7 +116,7 @@ PVEC_ELEM(v, 7) = f; for (i = 0; i < n; i+=8) - *((v8sf restrict *) (fp + i)) = PVEC_VECT(v); + *((v8sf * restrict) (fp + i)) = PVEC_VECT(v); lctl->value = f; } Modified: trunk/packages/ispw/fts/signal/sigthrow.c =================================================================== --- trunk/packages/ispw/fts/signal/sigthrow.c 2011-07-19 17:24:19 UTC (rev 339) +++ trunk/packages/ispw/fts/signal/sigthrow.c 2011-07-20 20:10:01 UTC (rev 340) @@ -108,9 +108,9 @@ for (i = 0; i < n; i+=8) { - v = *((v8sf restrict *) (buf + i)); - *((v8sf restrict *) (out + i)) = v; - *((v8sf restrict *) (buf + i)) = PVEC_VECT(z); + v = *((v8sf * restrict) (buf + i)); + *((v8sf * restrict) (out + i)) = v; + *((v8sf * restrict) (buf + i)) = PVEC_VECT(z); } #else for (i = 0; i < n; i++) @@ -241,10 +241,10 @@ #ifdef HAS_VECT_EXT for (i = 0; i < n; i+=8) { - v1 = *((v8sf restrict *) (p + i)); - v2 = *((v8sf restrict *) (in + i)); + v1 = *((v8sf * restrict) (p + i)); + v2 = *((v8sf * restrict) (in + i)); r = v1 + v2; - *((v8sf restrict *) (p + i)) = r; + *((v8sf * restrict) (p + i)) = r; } #else for (i = 0; i < n; i++) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-19 17:24:25
|
Revision: 339 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=339&view=rev Author: dececco Date: 2011-07-19 17:24:19 +0000 (Tue, 19 Jul 2011) Log Message: ----------- bug fix on the non vect version Modified Paths: -------------- trunk/core/Makefiles/Makefile.Linux.amd64 trunk/core/fts/lang/ftl/ftl.c trunk/core/fts/sys/platform.h Modified: trunk/core/Makefiles/Makefile.Linux.amd64 =================================================================== --- trunk/core/Makefiles/Makefile.Linux.amd64 2011-07-19 07:50:30 UTC (rev 338) +++ trunk/core/Makefiles/Makefile.Linux.amd64 2011-07-19 17:24:19 UTC (rev 339) @@ -42,6 +42,7 @@ CC := gcc LD := gcc + # These flags are quite strict and give a lot of warning. But they are safe #ARCH_CFLAGS := -pipe -Wall -W -Wpointer-arith -Wbad-function-cast -Wcast-qual -Wcast-align -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wno-unused -DLINUX -DLINUXPC # For now, we use Modified: trunk/core/fts/lang/ftl/ftl.c =================================================================== --- trunk/core/fts/lang/ftl/ftl.c 2011-07-19 07:50:30 UTC (rev 338) +++ trunk/core/fts/lang/ftl/ftl.c 2011-07-19 17:24:19 UTC (rev 339) @@ -527,7 +527,7 @@ unsigned int i; for (i = 0; i < vecsize; i++) - arg1[i] = arg0[i] * arg0[i]; + arg0[i] = arg0[i] * arg0[i]; } static inline void copy_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) Modified: trunk/core/fts/sys/platform.h =================================================================== --- trunk/core/fts/sys/platform.h 2011-07-19 07:50:30 UTC (rev 338) +++ trunk/core/fts/sys/platform.h 2011-07-19 17:24:19 UTC (rev 339) @@ -85,7 +85,7 @@ #define FTS_HAS_LITTLE_ENDIAN #define HAS_PORTAUDIO #define HAS_PORTMIDI -#define HAS_VECT_EXT +// #define HAS_VECT_EXT typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); typedef union {v8sf v; float f[8];} pvector8; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-19 07:50:36
|
Revision: 338 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=338&view=rev Author: dececco Date: 2011-07-19 07:50:30 +0000 (Tue, 19 Jul 2011) Log Message: ----------- Bug fixes in Linux vector code Modified Paths: -------------- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c trunk/core/fts/sys/platform.h Modified: trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-15 10:16:27 UTC (rev 337) +++ trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-19 07:50:30 UTC (rev 338) @@ -179,7 +179,7 @@ for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v - PVEC_VECT(vin); + *((v8sf restrict *) (out + i)) = PVEC_VECT(v) - vin; } #else for (i = 0; i < size; i++) @@ -208,7 +208,7 @@ for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v / PVEC_VECT(vin); + *((v8sf restrict *) (out + i)) = PVEC_VECT(v) / vin; } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/sys/platform.h =================================================================== --- trunk/core/fts/sys/platform.h 2011-07-15 10:16:27 UTC (rev 337) +++ trunk/core/fts/sys/platform.h 2011-07-19 07:50:30 UTC (rev 338) @@ -88,7 +88,7 @@ #define HAS_VECT_EXT typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); -typedef union {v8sf v; float f[4];} pvector8; +typedef union {v8sf v; float f[8];} pvector8; #define PVEC_ELEM(t, i) t.f[i] #define PVEC_VECT(t) t.v This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-15 10:16:34
|
Revision: 337 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=337&view=rev Author: dececco Date: 2011-07-15 10:16:27 +0000 (Fri, 15 Jul 2011) Log Message: ----------- Implemented portable code for vectors between gcc and llvm, all code run on gcc and llvm. Implementation considered completed Modified Paths: -------------- trunk/core/fts/lang/ftl/TODO-FTL trunk/core/fts/lang/ftl/ftl.c trunk/core/fts/lang/veclib/portable/vec_cpy.c trunk/core/fts/lang/veclib/portable/vecx_cpy.c trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c trunk/core/fts/sys/platform.h trunk/packages/ispw/fts/sampling/ftl_delay.c trunk/packages/ispw/fts/signal/sig1.c trunk/packages/ispw/fts/signal/sigline.c trunk/packages/ispw/fts/signal/sigthrow.c Modified: trunk/core/fts/lang/ftl/TODO-FTL =================================================================== --- trunk/core/fts/lang/ftl/TODO-FTL 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/core/fts/lang/ftl/TODO-FTL 2011-07-15 10:16:27 UTC (rev 337) @@ -1,10 +1,13 @@ -Complete the gcc version of the llcm specific code, move the vector definition to the platform.h file +Commit, Benchmark and store in the bench file. +Releases for Mac OSX, Linux 32 64 +For future versions: + Implements the zero based optimisations: MUL by zero produce fill of zero, ADD of zero a copy of the original value. Check if it is feasible: yes, in the ftl3arg operator, by adding two methods zeroArgZeroResult zeroArgOtherResult, yes but not very useful for multiply> Postponed at the graph level optimisations. ftlmem: really useful ? Bench ? -Benchmark and store in the bench file. + Modified: trunk/core/fts/lang/ftl/ftl.c =================================================================== --- trunk/core/fts/lang/ftl/ftl.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/core/fts/lang/ftl/ftl.c 2011-07-15 10:16:27 UTC (rev 337) @@ -539,24 +539,25 @@ } #endif -#ifdef HAS_LLVM_VECT +#ifdef HAS_VECT_EXT + static inline void zero_vec(float * restrict arg0, unsigned int vecsize) { unsigned int i; - v8sf z; + pvector8 z; - z[0] = 0.0; - z[1] = 0.0; - z[2] = 0.0; - z[3] = 0.0; - z[4] = 0.0; - z[5] = 0.0; - z[6] = 0.0; - z[7] = 0.0; - + PVEC_ELEM(z, 0) = 0.0; + PVEC_ELEM(z, 1) = 0.0; + PVEC_ELEM(z, 2) = 0.0; + PVEC_ELEM(z, 3) = 0.0; + PVEC_ELEM(z, 4) = 0.0; + PVEC_ELEM(z, 5) = 0.0; + PVEC_ELEM(z, 6) = 0.0; + PVEC_ELEM(z, 7) = 0.0; + for (i = 0; i < vecsize; i+=8) { - *((v8sf restrict *) (arg0 + i)) = z; + *((v8sf restrict *) (arg0 + i)) = PVEC_VECT(z); } } Modified: trunk/core/fts/lang/veclib/portable/vec_cpy.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vec_cpy.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/core/fts/lang/veclib/portable/vec_cpy.c 2011-07-15 10:16:27 UTC (rev 337) @@ -203,21 +203,21 @@ fts_vec_fzero (float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf z; +#ifdef HAS_VECT_EXT + pvector8 z; - z[0] = 0.0; - z[1] = 0.0; - z[2] = 0.0; - z[3] = 0.0; - z[4] = 0.0; - z[5] = 0.0; - z[6] = 0.0; - z[7] = 0.0; - + PVEC_ELEM(z, 0) = 0.0; + PVEC_ELEM(z, 1) = 0.0; + PVEC_ELEM(z, 2) = 0.0; + PVEC_ELEM(z, 3) = 0.0; + PVEC_ELEM(z, 4) = 0.0; + PVEC_ELEM(z, 5) = 0.0; + PVEC_ELEM(z, 6) = 0.0; + PVEC_ELEM(z, 7) = 0.0; + for (i = 0; i < size; i+=8) { - *((v8sf restrict *) (out + i)) = z; + *((v8sf restrict *) (out + i)) = PVEC_VECT(z); } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/lang/veclib/portable/vecx_cpy.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_cpy.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/core/fts/lang/veclib/portable/vecx_cpy.c 2011-07-15 10:16:27 UTC (rev 337) @@ -202,21 +202,21 @@ fts_vecx_fzero (float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf z; +#ifdef HAS_VECT_EXT + pvector8 z; - z[0] = 0.0; - z[1] = 0.0; - z[2] = 0.0; - z[3] = 0.0; - z[4] = 0.0; - z[5] = 0.0; - z[6] = 0.0; - z[7] = 0.0; - + PVEC_ELEM(z, 0) = 0.0; + PVEC_ELEM(z, 1) = 0.0; + PVEC_ELEM(z, 2) = 0.0; + PVEC_ELEM(z, 3) = 0.0; + PVEC_ELEM(z, 4) = 0.0; + PVEC_ELEM(z, 5) = 0.0; + PVEC_ELEM(z, 6) = 0.0; + PVEC_ELEM(z, 7) = 0.0; + for (i = 0; i < size; i+=8) { - *((v8sf restrict *) (out + i)) = z; + *((v8sf restrict *) (out + i)) = PVEC_VECT(z); } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-15 10:16:27 UTC (rev 337) @@ -46,22 +46,23 @@ fts_vecx_scl_fadd (float *in, float f, float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; - + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin + v; + *((v8sf restrict *) (out + i)) = vin + PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -73,22 +74,24 @@ fts_vecx_scl_fsub (float *in, float f, float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin - v; + *((v8sf restrict *) (out + i)) = vin - PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -100,22 +103,24 @@ fts_vecx_scl_fmul (float *in, float f, float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin * v; + *((v8sf restrict *) (out + i)) = vin * PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -128,22 +133,24 @@ fts_vecx_scl_fdiv (float *in, float f, float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin / v; + *((v8sf restrict *) (out + i)) = vin / PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -155,22 +162,24 @@ fts_vecx_scl_fbus (float *in, float f, float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v - vin; + *((v8sf restrict *) (out + i)) = v - PVEC_VECT(vin); } #else for (i = 0; i < size; i++) @@ -182,22 +191,24 @@ fts_vecx_scl_fvid (float *in, float f, float *out, int size) { int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v / vin; + *((v8sf restrict *) (out + i)) = v / PVEC_VECT(vin); } #else for (i = 0; i < size; i++) @@ -215,22 +226,24 @@ float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin + v; + *((v8sf restrict *) (out + i)) = vin + PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -246,22 +259,24 @@ float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin - v; + *((v8sf restrict *) (out + i)) = vin - PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -277,22 +292,24 @@ float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin * v; + *((v8sf restrict *) (out + i)) = vin * PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -308,22 +325,24 @@ float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = vin / v; + *((v8sf restrict *) (out + i)) = vin /PVEC_VECT(v); } #else for (i = 0; i < size; i++) @@ -339,22 +358,24 @@ float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v - vin; + *((v8sf restrict *) (out + i)) = PVEC_VECT(v) - vin; } #else for (i = 0; i < size; i++) @@ -370,22 +391,24 @@ float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); int i; -#ifdef HAS_LLVM_VECT - v8sf v, vin; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; +#ifdef HAS_VECT_EXT + v8sf vin; + pvector8 v; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < size; i+=8) { vin = *((v8sf restrict *) (in + i)); - *((v8sf restrict *) (out + i)) = v / vin; + *((v8sf restrict *) (out + i)) = PVEC_VECT(v) / vin; } #else for (i = 0; i < size; i++) Modified: trunk/core/fts/sys/platform.h =================================================================== --- trunk/core/fts/sys/platform.h 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/core/fts/sys/platform.h 2011-07-15 10:16:27 UTC (rev 337) @@ -50,10 +50,18 @@ #define HAS_PORTAUDIO #define PORTAUDIO_INTERLEAVE #define HAS_PORTMIDI -#define HAS_LLVM_VECT #define HAS_VECT_EXT + typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); +// The following is to simplify declarations for code that access directly to +// vector content and must run on both gcc and llvm + +typedef v8sf pvector8; + +#define PVEC_ELEM(t, i) t[i] +#define PVEC_VECT(t) t + /* This macro is defined here because not all platforms have a isnanf macro (or function */ #define fts_isnanf(x) (((*(unsigned int *)&(x) & 0x7f800000L)==0x7f800000L)&& \ @@ -78,10 +86,13 @@ #define HAS_PORTAUDIO #define HAS_PORTMIDI #define HAS_VECT_EXT -#define HAS_GCC_VECT typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); +typedef union {v8sf v; float f[4];} pvector8; +#define PVEC_ELEM(t, i) t.f[i] +#define PVEC_VECT(t) t.v + /* This macro is defined here because not all platforms have a isnanf macro (or function */ #define fts_isnanf(x) (((*(unsigned int *)&(x) & 0x7f800000L)==0x7f800000L)&& \ @@ -108,7 +119,6 @@ #define HAS_PORTAUDIO /* #define HAS_PORTMIDI */ #define HAS_VECT_EXT -#define HAS_GCC_VECT /* This macro is defined here because not all platforms have a isnanf macro (or function */ Modified: trunk/packages/ispw/fts/sampling/ftl_delay.c =================================================================== --- trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-15 10:16:27 UTC (rev 337) @@ -89,8 +89,8 @@ int i; int phase; -#ifdef HAS_LLVM_VECT - v8sf v; +#ifdef HAS_VECT_EXT + pvector8 v; #endif phase = buf->phase - del; @@ -98,22 +98,21 @@ if (phase < 0) phase += buf->ring_size; /* ring buffer wrap around */ -#ifdef HAS_LLVM_VECT - +#ifdef HAS_VECT_EXT for (i=0; i<n_tick; i+=8) { // Cannot load in one shot because of alignement - v[0] = delay_line[phase + i + 0]; - v[1] = delay_line[phase + i + 1]; - v[2] = delay_line[phase + i + 2]; - v[3] = delay_line[phase + i + 3]; - v[4] = delay_line[phase + i + 4]; - v[5] = delay_line[phase + i + 5]; - v[6] = delay_line[phase + i + 6]; - v[7] = delay_line[phase + i + 7]; - - *((v8sf restrict *) (out + i)) = v; + PVEC_ELEM(v, 0) = delay_line[phase + i + 0]; + PVEC_ELEM(v, 1) = delay_line[phase + i + 1]; + PVEC_ELEM(v, 2) = delay_line[phase + i + 2]; + PVEC_ELEM(v, 3) = delay_line[phase + i + 3]; + PVEC_ELEM(v, 4) = delay_line[phase + i + 4]; + PVEC_ELEM(v, 5) = delay_line[phase + i + 5]; + PVEC_ELEM(v, 6) = delay_line[phase + i + 6]; + PVEC_ELEM(v, 7) = delay_line[phase + i + 7]; + + *((v8sf restrict *) (out + i)) = PVEC_VECT(v); } #else for (i=0; i<n_tick; i++) Modified: trunk/packages/ispw/fts/signal/sig1.c =================================================================== --- trunk/packages/ispw/fts/signal/sig1.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/packages/ispw/fts/signal/sig1.c 2011-07-15 10:16:27 UTC (rev 337) @@ -103,20 +103,20 @@ int n = fts_word_get_int(argv + 2); int i; -#ifdef HAS_LLVM_VECT - v8sf v; +#ifdef HAS_VECT_EXT + pvector8 v; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; - + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < n; i+=8) - *((v8sf restrict *) (out + i)) = v; + *((v8sf restrict *) (out + i)) = PVEC_VECT(v); #else for (i = 0; i < n; i++) out[i] = f; Modified: trunk/packages/ispw/fts/signal/sigline.c =================================================================== --- trunk/packages/ispw/fts/signal/sigline.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/packages/ispw/fts/signal/sigline.c 2011-07-15 10:16:27 UTC (rev 337) @@ -55,8 +55,10 @@ ftl_data_t ftl_data; } sigline_t; -#ifdef HAS_LLVM_VECT + +#ifdef HAS_VECT_EXT + static void ftl_line(fts_word_t *argv) { float * restrict fp = (float *)fts_word_get_ptr(argv); @@ -69,29 +71,29 @@ double f = lctl->value; double incr = lctl->incr; double incr8 = incr * 8; - v8sf v, vinc; + pvector8 v, vinc; - vinc[0] = incr * 1; - vinc[1] = incr * 2; - vinc[2] = incr * 3; - vinc[3] = incr * 4; - vinc[4] = incr * 5; - vinc[5] = incr * 6; - vinc[6] = incr * 7; - vinc[7] = incr * 8; - + PVEC_ELEM(vinc, 0) = incr * 1; + PVEC_ELEM(vinc, 1) = incr * 2; + PVEC_ELEM(vinc, 2) = incr * 3; + PVEC_ELEM(vinc, 3) = incr * 4; + PVEC_ELEM(vinc, 4) = incr * 5; + PVEC_ELEM(vinc, 5) = incr * 6; + PVEC_ELEM(vinc, 6) = incr * 7; + PVEC_ELEM(vinc, 7) = incr * 8; + for (i=0; i<n; i+=8) { - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; - - *((v8sf restrict *) (fp + i)) = v + vinc; + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + + *((v8sf restrict *) (fp + i)) = PVEC_VECT(v) + PVEC_VECT(vinc); f = f + incr8; } @@ -102,19 +104,19 @@ { int i; float f = lctl->target; - v8sf v; + pvector8 v; - v[0] = f; - v[1] = f; - v[2] = f; - v[3] = f; - v[4] = f; - v[5] = f; - v[6] = f; - v[7] = f; - + PVEC_ELEM(v, 0) = f; + PVEC_ELEM(v, 1) = f; + PVEC_ELEM(v, 2) = f; + PVEC_ELEM(v, 3) = f; + PVEC_ELEM(v, 4) = f; + PVEC_ELEM(v, 5) = f; + PVEC_ELEM(v, 6) = f; + PVEC_ELEM(v, 7) = f; + for (i = 0; i < n; i+=8) - *((v8sf restrict *) (fp + i)) = v; + *((v8sf restrict *) (fp + i)) = PVEC_VECT(v); lctl->value = f; } Modified: trunk/packages/ispw/fts/signal/sigthrow.c =================================================================== --- trunk/packages/ispw/fts/signal/sigthrow.c 2011-07-12 16:41:09 UTC (rev 336) +++ trunk/packages/ispw/fts/signal/sigthrow.c 2011-07-15 10:16:27 UTC (rev 337) @@ -93,23 +93,24 @@ int n = fts_word_get_int(argv+2); int i; -#ifdef HAS_LLVM_VECT - v8sf z, v; +#ifdef HAS_VECT_EXT + v8sf v; + pvector8 z; - z[0] = 0.0; - z[1] = 0.0; - z[2] = 0.0; - z[3] = 0.0; - z[4] = 0.0; - z[5] = 0.0; - z[6] = 0.0; - z[7] = 0.0; - + PVEC_ELEM(z, 0) = 0.0; + PVEC_ELEM(z, 1) = 0.0; + PVEC_ELEM(z, 2) = 0.0; + PVEC_ELEM(z, 3) = 0.0; + PVEC_ELEM(z, 4) = 0.0; + PVEC_ELEM(z, 5) = 0.0; + PVEC_ELEM(z, 6) = 0.0; + PVEC_ELEM(z, 7) = 0.0; + for (i = 0; i < n; i+=8) { v = *((v8sf restrict *) (buf + i)); *((v8sf restrict *) (out + i)) = v; - *((v8sf restrict *) (buf + i)) = z; + *((v8sf restrict *) (buf + i)) = PVEC_VECT(z); } #else for (i = 0; i < n; i++) This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-12 16:41:15
|
Revision: 336 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=336&view=rev Author: dececco Date: 2011-07-12 16:41:09 +0000 (Tue, 12 Jul 2011) Log Message: ----------- restored the print dsp chain command, with a different syntax, some cleanup Modified Paths: -------------- trunk/core/fts/lang/dsp/dsp.h trunk/core/fts/lang/dsp/dspgraph.c trunk/core/fts/lang/ftl/FtlProgram.h trunk/core/fts/lang/ftl/FtlProgram.m trunk/core/fts/lang/ftl/FtlSignal.h trunk/core/fts/lang/ftl/FtlSignal.m trunk/core/fts/lang/ftl/Sources trunk/core/fts/lang/ftl/TODO-FTL trunk/core/fts/lang/ftl/ftl.c trunk/core/fts/lang/ftl.h trunk/packages/system/fts/dsp1.c Removed Paths: ------------- trunk/core/fts/lang/ftl/ftlutils.c trunk/core/fts/lang/ftl/ftlutils.h Modified: trunk/core/fts/lang/dsp/dsp.h =================================================================== --- trunk/core/fts/lang/dsp/dsp.h 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/dsp/dsp.h 2011-07-12 16:41:09 UTC (rev 336) @@ -116,12 +116,7 @@ extern void dsp_chain_delete(void); extern void dsp_chain_post(void); -extern void dsp_chain_post_signals(void); -extern void dsp_chain_fprint(FILE *f); -extern void dsp_chain_fprint_signals(FILE *f); - - extern fts_object_t *dsp_get_current_object(void); extern int dsp_is_running( void); Modified: trunk/core/fts/lang/dsp/dspgraph.c =================================================================== --- trunk/core/fts/lang/dsp/dspgraph.c 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/dsp/dspgraph.c 2011-07-12 16:41:09 UTC (rev 336) @@ -828,8 +828,7 @@ } /* - * Post and print temporarly not availables, the ftl layer - * is being reimplemented. + * Post restored, print deleted. */ void dsp_chain_post(void) @@ -838,25 +837,6 @@ ftl_program_post(dsp_chain); } -void dsp_chain_post_signals(void) -{ - fts_post("printing signals:\n"); - ftl_program_post_signals_count(dsp_chain); -} - -void dsp_chain_fprint(FILE *f) -{ - fprintf(f, "printing dsp chain:\n"); - ftl_program_fprint(f, dsp_chain); -} - -void dsp_chain_fprint_signals(FILE *f) -{ - fprintf(f, "printing signals:\n"); - ftl_program_fprint_signals_count(f, dsp_chain); -} - - int dsp_is_running( void) { return dsp_chain == dsp_chain_on; Modified: trunk/core/fts/lang/ftl/FtlProgram.h =================================================================== --- trunk/core/fts/lang/ftl/FtlProgram.h 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/FtlProgram.h 2011-07-12 16:41:09 UTC (rev 336) @@ -89,8 +89,6 @@ // Pretty printing - (void) postProgram; -- (void) postSignalCount; -- (void) printProgramTo:(FILE *)f; -- (void) printSignalCountTo:(FILE *)f; +- (void) postSignals; @end Modified: trunk/core/fts/lang/ftl/FtlProgram.m =================================================================== --- trunk/core/fts/lang/ftl/FtlProgram.m 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/FtlProgram.m 2011-07-12 16:41:09 UTC (rev 336) @@ -240,8 +240,9 @@ fts_post("FTL Program\n"); fts_post("Data Section\n"); - fts_post("\t To be done ....\n"); + [self postSignals]; + fts_post("Code Section \n"); l = [operators length]; @@ -254,16 +255,22 @@ } } -- (void) postSignalCount -{ -} -- (void) printProgramTo:(FILE *)f +- (void) postSignals { -} + SymbolHashTableIterator *it; -- (void) printSignalCountTo:(FILE *)f -{ + it = [SymbolHashTableIterator newWithTable:signals]; + + while (! [it atEnd]) + { + FtlSignal *signal = (FtlSignal *) [it currentValue]; + + [signal post]; + [it next]; + } + + [it dealloc]; } @end Modified: trunk/core/fts/lang/ftl/FtlSignal.h =================================================================== --- trunk/core/fts/lang/ftl/FtlSignal.h 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/FtlSignal.h 2011-07-12 16:41:09 UTC (rev 336) @@ -53,5 +53,7 @@ - (fts_symbol_t) type; - (fts_symbol_t) name; +- (void) post; + @end Modified: trunk/core/fts/lang/ftl/FtlSignal.m =================================================================== --- trunk/core/fts/lang/ftl/FtlSignal.m 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/FtlSignal.m 2011-07-12 16:41:09 UTC (rev 336) @@ -76,5 +76,9 @@ return name; } +- (void) post +{ + fts_post("\t%s : %s %d\n", fts_symbol_name(name), fts_symbol_name(type), size); +} @end Modified: trunk/core/fts/lang/ftl/Sources =================================================================== --- trunk/core/fts/lang/ftl/Sources 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/Sources 2011-07-12 16:41:09 UTC (rev 336) @@ -26,7 +26,6 @@ SOURCES = \ ftl.c \ ftlmem.c \ - ftlutils.c \ FtlEnvironment.m \ FtlSignal.m \ FtlSignalType.m \ Modified: trunk/core/fts/lang/ftl/TODO-FTL =================================================================== --- trunk/core/fts/lang/ftl/TODO-FTL 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/TODO-FTL 2011-07-12 16:41:09 UTC (rev 336) @@ -1,12 +1,10 @@ -Complete the post Program code. -Pretty printing and dsp chain print restore. Complete the gcc version of the llcm specific code, move the vector definition to the platform.h file -4 Implements the zero based optimisations: MUL by zero produce fill of zero, ADD of zero a copy of the original value. - Check if it is feasible: yes, in the ftl3arg operator, by adding two methods zeroArgZeroResult zeroArgOtherResult. +Implements the zero based optimisations: MUL by zero produce fill of zero, ADD of zero a copy of the original value. + Check if it is feasible: yes, in the ftl3arg operator, by adding two methods zeroArgZeroResult zeroArgOtherResult, yes but not very useful for multiply> Postponed at the graph level optimisations. -CHECK ftlutil, still used ? +ftlmem: really useful ? Bench ? Benchmark and store in the bench file. Modified: trunk/core/fts/lang/ftl/ftl.c =================================================================== --- trunk/core/fts/lang/ftl/ftl.c 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/ftl.c 2011-07-12 16:41:09 UTC (rev 336) @@ -244,10 +244,6 @@ { [prog->program compile]; prog->bytecode = [prog->program bytecode]; - - // DEBUG !!! - - [prog->program postProgram]; } Deleted: trunk/core/fts/lang/ftl/ftlutils.c =================================================================== --- trunk/core/fts/lang/ftl/ftlutils.c 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/ftlutils.c 2011-07-12 16:41:09 UTC (rev 336) @@ -1,49 +0,0 @@ -/* - * jMax - * Copyright (C) 1994, 1995, 1998, 1999 by IRCAM-Centre Georges Pompidou, Paris, France. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * See file LICENSE for further informations on licensing terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Based on Max/ISPW by Miller Puckette. - * - * Authors: Maurizio De Cecco, Francois Dechelle, Enzo Maggi, Norbert Schnell. - * - */ - -#include "sys.h" -#include "lang/mess.h" -#include "lang/ftl.h" - -void -ftl_memory_declaration_init( ftl_memory_declaration *decl, int size) -{ - decl->size = size; - decl->address = 0; -} - -ftl_memory_declaration * -ftl_memory_declaration_new( int size ) -{ - ftl_memory_declaration *decl; - - decl = (ftl_memory_declaration *) fts_malloc(sizeof( ftl_memory_declaration)); - if (decl) - ftl_memory_declaration_init( decl, size); - return decl; -} - - Deleted: trunk/core/fts/lang/ftl/ftlutils.h =================================================================== --- trunk/core/fts/lang/ftl/ftlutils.h 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl/ftlutils.h 2011-07-12 16:41:09 UTC (rev 336) @@ -1,38 +0,0 @@ -/* - * jMax - * Copyright (C) 1994, 1995, 1998, 1999 by IRCAM-Centre Georges Pompidou, Paris, France. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * See file LICENSE for further informations on licensing terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Based on Max/ISPW by Miller Puckette. - * - * Authors: Maurizio De Cecco, Francois Dechelle, Enzo Maggi, Norbert Schnell. - * - */ - -#ifndef _FTLUTILS_H_ -#define _FTLUTILS_H_ - -typedef struct ftl_memory_declaration { - int size; - void *address; -} ftl_memory_declaration; - -extern void ftl_memory_declaration_init( ftl_memory_declaration *decl, int size); -extern ftl_memory_declaration *ftl_memory_declaration_new( int size ); - -#endif Modified: trunk/core/fts/lang/ftl.h =================================================================== --- trunk/core/fts/lang/ftl.h 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/core/fts/lang/ftl.h 2011-07-12 16:41:09 UTC (rev 336) @@ -29,7 +29,6 @@ #include "ftl/ftl.h" #include "ftl/ftlmem.h" -#include "ftl/ftlutils.h" #include "ftl/FtlSignalType.h" #include "ftl/FtlSignal.h" #include "ftl/FtlEnvironment.h" Modified: trunk/packages/system/fts/dsp1.c =================================================================== --- trunk/packages/system/fts/dsp1.c 2011-07-11 20:51:56 UTC (rev 335) +++ trunk/packages/system/fts/dsp1.c 2011-07-12 16:41:09 UTC (rev 336) @@ -88,33 +88,6 @@ } static void -dsp_save(fts_object_t *o, int winlet, fts_symbol_t s, int ac, const fts_atom_t *at) -{ - FILE *f; - - if (fts_is_symbol(at)) - { - const char *filename; - - filename = fts_symbol_name(fts_get_symbol(at)); - - f = fopen(filename, "w"); - - if (f) - dsp_chain_fprint(f); - - fclose(f); - } -} - -static void -dsp_print_signals(fts_object_t *o, int winlet, fts_symbol_t s, int ac, const fts_atom_t *at) -{ - dsp_chain_post_signals(); -} - - -static void dsp_init(fts_object_t *o, int winlet, fts_symbol_t s, int ac, const fts_atom_t *at) { fts_param_add_listener(fts_s_dsp_on, o, dsp_on_listener); @@ -150,11 +123,6 @@ fts_method_define(cl, 0, fts_s_bang, dsp_print, 0, 0); fts_method_define(cl, 0, fts_new_symbol("print"), dsp_print, 0, 0); - a[0] = fts_s_symbol; - fts_method_define(cl, 0, fts_new_symbol("save"), dsp_save, 1, a); - - fts_method_define(cl, 0, fts_new_symbol("print-signals"), dsp_print_signals, 0, 0); - return fts_Success; } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-11 20:52:03
|
Revision: 335 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=335&view=rev Author: dececco Date: 2011-07-11 20:51:56 +0000 (Mon, 11 Jul 2011) Log Message: ----------- Completed migration to vectors for the simplest objects and the veclib functions that are actually used; today done for clang vec dialect, to be completed for gcc Modified Paths: -------------- trunk/core/fts/lang/ftl/FtlOperator.m trunk/core/fts/lang/ftl/FtlProgram.m trunk/core/fts/lang/ftl/TODO-FTL trunk/core/fts/lang/ftl/ftl.c trunk/core/fts/lang/ftl/ftlex.h trunk/core/fts/lang/mess/atoms.c trunk/core/fts/lang/veclib/portable/vec_cpy.c trunk/core/fts/lang/veclib/portable/vecx_cpy.c trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c trunk/core/fts/sys/platform.h trunk/packages/ispw/fts/sampling/ftl_delay.c trunk/packages/ispw/fts/signal/sig1.c trunk/packages/ispw/fts/signal/sigline.c trunk/packages/ispw/fts/signal/sigthrow.c Modified: trunk/core/fts/lang/ftl/FtlOperator.m =================================================================== --- trunk/core/fts/lang/ftl/FtlOperator.m 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/ftl/FtlOperator.m 2011-07-11 20:51:56 UTC (rev 335) @@ -60,7 +60,7 @@ [self compileOpcode:[self opcode] to:bytecode length:[self bytecodeSize]]; } -- (void) compileOpcode:(unsigned int)code to:(fts_word_t *)bytecode length:(unsigned int)length; +- (void) compileOpcode:(unsigned int)code to:(fts_word_t *)bytecode length:(unsigned int)length { fts_word_set_int(bytecode, (code << 16) | length); } Modified: trunk/core/fts/lang/ftl/FtlProgram.m =================================================================== --- trunk/core/fts/lang/ftl/FtlProgram.m 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/ftl/FtlProgram.m 2011-07-11 20:51:56 UTC (rev 335) @@ -238,10 +238,11 @@ { unsigned int i, l; - fts_post("FTL Program"); - fts_post("Data Section"); + fts_post("FTL Program\n"); + fts_post("Data Section\n"); + fts_post("\t To be done ....\n"); - fts_post("Code Section"); + fts_post("Code Section \n"); l = [operators length]; Modified: trunk/core/fts/lang/ftl/TODO-FTL =================================================================== --- trunk/core/fts/lang/ftl/TODO-FTL 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/ftl/TODO-FTL 2011-07-11 20:51:56 UTC (rev 335) @@ -1,24 +1,12 @@ +Complete the post Program code. +Pretty printing and dsp chain print restore. -FtlProgram: write the dealloc method, right now it is just leaking. -2 add the sig operator, scalar/vector. +Complete the gcc version of the llcm specific code, move the vector definition to the platform.h file -3 implements the automatic optimisation 1/2/3 arguments for operators and simplify the calling code. DONE 4 Implements the zero based optimisations: MUL by zero produce fill of zero, ADD of zero a copy of the original value. + Check if it is feasible: yes, in the ftl3arg operator, by adding two methods zeroArgZeroResult zeroArgOtherResult. -Now, move the add funcall of sub and sig to the new operators. +CHECK ftlutil, still used ? -Bench. +Benchmark and store in the bench file. -Vector operators ? Loop unrolling ? - -Connection to the ftl.c code. -Pretty printing and dsp chain print restore. - -Simplification: operator are can select two arg version if the in and out overlap and the @ arg version is there. -Simplify the vector operation code> At this point, integrate the code for sub operator in the arith objects. - -Introduce vector primitives in ftlex. - -CHECK ftltil, still used ? - -Benchmark. Modified: trunk/core/fts/lang/ftl/ftl.c =================================================================== --- trunk/core/fts/lang/ftl/ftl.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/ftl/ftl.c 2011-07-11 20:51:56 UTC (rev 335) @@ -281,9 +281,6 @@ /* Vector operations function */ /* ********************************************************************** */ -#ifdef HAS_VECT_EXT -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); -#endif #if defined(HAS_VECT_EXT) @@ -317,10 +314,38 @@ v2 = v0 + v1; - *((v8sf restrict *) (arg0 + i)) = v2; + *((v8sf restrict *) (arg1 + i)) = v2; } } +static inline void add2sa_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) +{ + unsigned int i; + v8sf v0, r; + + for (i=0; i < vecsize; i+=8) + { + v0 = *((v8sf restrict *) (arg0 + i)); + r = v0 + v0; + *((v8sf restrict *) (arg1 + i)) = r; + } +} + +static inline void add1_vec(float * restrict arg0, unsigned int vecsize) +{ + unsigned int i; + v8sf v0, r; + + for (i=0; i < vecsize; i+=8) + { + v0 = *((v8sf restrict *) (arg0 + i)); + r = v0 + v0; + *((v8sf restrict *) (arg0 + i)) = r; + } +} + + + static inline void sub3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) { unsigned int i; @@ -349,7 +374,7 @@ v2 = v0 - v1; - *((v8sf restrict *) (arg0 + i)) = v2; + *((v8sf restrict *) (arg1 + i)) = v2; } } @@ -381,10 +406,52 @@ v2 = v0 * v1; - *((v8sf restrict *) (arg0 + i)) = v2; + *((v8sf restrict *) (arg1 + i)) = v2; } } +static inline void mul2sa_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) +{ + unsigned int i; + v8sf v0, r; + + for (i=0; i < vecsize; i+=8) + { + v0 = *((v8sf restrict *) (arg0 + i)); + + r = v0 * v0; + + *((v8sf restrict *) (arg1 + i)) = r; + } +} + +static inline void mul1_vec(float * restrict arg0, unsigned int vecsize) +{ + unsigned int i; + v8sf v0, r; + + for (i=0; i < vecsize; i+=8) + { + v0 = *((v8sf restrict *) (arg0 + i)); + + r = v0 * v0; + + *((v8sf restrict *) (arg0 + i)) = r; + } +} + +static inline void copy_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) +{ + unsigned int i; + v8sf v; + + for (i=0; i < vecsize; i+=8) + { + v = *((v8sf restrict *) (arg0 + i)); + *((v8sf restrict *) (arg1 + i)) = v; + } +} + #else static inline void add3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) @@ -400,9 +467,25 @@ unsigned int i; for (i = 0; i < vecsize; i++) - arg0[i] = arg0[i] + arg1[i]; + arg1[i] = arg0[i] + arg1[i]; } +static inline void add2sa_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg1[i] = arg0[i] + arg0[i]; +} + +static inline void add1_vec(float * restrict arg0, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg0[i] = arg0[i] + arg0[i]; +} + static inline void sub3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) { unsigned int i; @@ -416,7 +499,7 @@ unsigned int i; for (i = 0; i < vecsize; i++) - arg0[i] = arg0[i] - arg1[i]; + arg1[i] = arg0[i] - arg1[i]; } static inline void mul3_vec(float * restrict arg0, float * restrict arg1, float * restrict arg2, unsigned int vecsize) @@ -432,17 +515,71 @@ unsigned int i; for (i = 0; i < vecsize; i++) - arg0[i] = arg0[i] * arg1[i]; + arg1[i] = arg0[i] * arg1[i]; } +static inline void mul2sa_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg1[i] = arg0[i] * arg0[i]; +} + +static inline void mul1_vec(float * restrict arg0, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg1[i] = arg0[i] * arg0[i]; +} + +static inline void copy_vec(float * restrict arg0, float * restrict arg1, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg1[i] = arg0[i]; +} #endif +#ifdef HAS_LLVM_VECT +static inline void zero_vec(float * restrict arg0, unsigned int vecsize) +{ + unsigned int i; + v8sf z; + + z[0] = 0.0; + z[1] = 0.0; + z[2] = 0.0; + z[3] = 0.0; + z[4] = 0.0; + z[5] = 0.0; + z[6] = 0.0; + z[7] = 0.0; + + for (i = 0; i < vecsize; i+=8) + { + *((v8sf restrict *) (arg0 + i)) = z; + } +} + + +#else +static inline void zero_vec(float * restrict arg0, unsigned int vecsize) +{ + unsigned int i; + + for (i = 0; i < vecsize; i++) + arg0[i] = 0.0; +} +#endif + /* ********************************************************************** */ /* Run function */ /* ********************************************************************** */ -static int debug = 1; // Debug, put at 1 static ftl_program_t *last_prog = 0; @@ -455,12 +592,6 @@ unsigned int opcode; int running = 1; - if (prog != last_prog) - { - debug = 1; // debug - last_prog = prog; - } - bytecode = prog->bytecode; code = (unsigned int) fts_word_get_int(bytecode); @@ -472,12 +603,9 @@ switch (code >> 16) { case FTLEX_RETURN_OPCODE: - running = 0; - if (debug) fprintf(stderr, "executing : FTLEX_RETURN_OPCODE\n"); break; case FTLEX_FUNCALL_OPCODE: - if (debug) fprintf(stderr, "executing : FTLEX_FUNCALL_OPCODE %x %lx\n", code, next.fts_fun); (* ((ftl_wrapper_t) next.fts_fun))(bytecode); bytecode += (code & 0xffff) - 2; // take away the opcode and the function pointer that has been read already break; @@ -493,8 +621,6 @@ arg2 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_ADD3_OPCODE %lx %lx %lx size %d\n", arg0, arg1, arg2, vecsize); - add3_vec(arg0, arg1, arg2, vecsize); } break; @@ -508,8 +634,6 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_ADD2_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - add2_vec(arg0, arg1, vecsize); } break; @@ -523,13 +647,7 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_ADD2SA_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - - for (i = 0; i < vecsize; i++) - { - float v = arg0[i]; - arg1[i] = v + v; - } + add2sa_vec(arg0, arg1, vecsize); } break; @@ -541,13 +659,7 @@ arg0 = (float *) next.fts_ptr; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_ADD1_OPCODE %lx size %d\n", arg0, vecsize); - - for (i = 0; i < vecsize; i++) - { - float v = arg0[i]; - arg0[i] = v + v; - } + add1_vec(arg0, vecsize); } break; @@ -563,8 +675,6 @@ arg2 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_SUB3_OPCODE %lx %lx %lx size %d\n", arg0, arg1, arg2, vecsize); - sub3_vec(arg0, arg1, arg2, vecsize); } break; @@ -578,8 +688,6 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_SUB2_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - sub2_vec(arg0, arg1, vecsize); } break; @@ -593,10 +701,7 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_SUB2SA_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - - for (i = 0; i < vecsize; i++) - arg1[i] = 0.; + zero_vec(arg1, vecsize); } break; @@ -608,10 +713,7 @@ arg0 = (float *) next.fts_ptr; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_SUB1_OPCODE %lx size %d\n", arg0, vecsize); - - for (i = 0; i < vecsize; i++) - arg0[i] = 0.; + zero_vec(arg0, vecsize); } break; @@ -627,8 +729,6 @@ arg2 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_MUL3_OPCODE %lx %lx %lx size %d\n", arg0, arg1, arg2, vecsize); - mul3_vec(arg0, arg1, arg2, vecsize); } break; @@ -643,8 +743,6 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_MUL2_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - mul2_vec(arg0, arg1, vecsize); } break; @@ -659,13 +757,7 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_MUL2SA_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - - for (i = 0; i < vecsize; i++) - { - float v = arg1[i]; - arg0[i] = v * v; - } + mul2sa_vec(arg0, arg1, vecsize); } break; case FTLEX_MUL1_OPCODE: @@ -676,13 +768,7 @@ arg0 = (float *) next.fts_ptr; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_ADD1_OPCODE %lx size %d\n", arg0, vecsize); - - for (i = 0; i < vecsize; i++) - { - float v = arg0[i]; - arg0[i] = v * v; - } + mul1_vec(arg0, vecsize); } break; @@ -696,10 +782,7 @@ arg1 = (float *) fts_word_get_ptr(bytecode); bytecode++; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_COPY_OPCODE %lx %lx size %d\n", arg0, arg1, vecsize); - - for (i = 0; i < vecsize; i++) - arg1[i] = arg0[i]; + copy_vec(arg0, arg1, vecsize); } break; case FTLEX_ZERO_OPCODE: @@ -710,10 +793,7 @@ arg0 = (float *) next.fts_ptr; vecsize = fts_word_get_int(bytecode); bytecode++; - if (debug) fprintf(stderr, "executing : FTLEX_ZERO_OPCODE %lx size %d\n", arg0, vecsize); - - for (i = 0; i < vecsize; i++) - arg0[i] = 0.; + zero_vec(arg0, vecsize); } break; default: @@ -726,8 +806,6 @@ bytecode++; next = *(bytecode)++; // Assume there is an extra word in the bytecode !! } - - debug = 0; // debug } Modified: trunk/core/fts/lang/ftl/ftlex.h =================================================================== --- trunk/core/fts/lang/ftl/ftlex.h 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/ftl/ftlex.h 2011-07-11 20:51:56 UTC (rev 335) @@ -39,17 +39,17 @@ #define FTLEX_RETURN_OPCODE 0 #define FTLEX_FUNCALL_OPCODE 1 #define FTLEX_ADD3_OPCODE 2 // arg0 + arg1 -> arg2, arg3 vector size -#define FTLEX_ADD2_OPCODE 3 // arg0 + arg1 -> arg0, arg2 vector size +#define FTLEX_ADD2_OPCODE 3 // arg0 + arg1 -> arg1, arg2 vector size #define FTLEX_ADD2SA_OPCODE 4 // arg0 + arg0 -> arg1, arg2 vector size #define FTLEX_ADD1_OPCODE 5 // arg0 + arg0 -> arg0, arg2 vector size #define FTLEX_SUB3_OPCODE 6 // arg0 - arg1 -> arg2, arg3 vector size -#define FTLEX_SUB2_OPCODE 7 // arg0 - arg1 -> arg0, arg2 vector size +#define FTLEX_SUB2_OPCODE 7 // arg0 - arg1 -> arg1, arg2 vector size #define FTLEX_SUB2SA_OPCODE 8 // arg0 - arg0 -> arg1, [0 -> arg1] arg2 vector size #define FTLEX_SUB1_OPCODE 9 // arg0 - arg0 -> arg0 [0. -> arg0], arg1 vector size #define FTLEX_MUL3_OPCODE 10 // arg0 * arg1 -> arg2, arg3 vector size -#define FTLEX_MUL2_OPCODE 11 // arg0 * arg1 -> arg0, arg2 vector size -#define FTLEX_MUL2SA_OPCODE 12 // arg0 * arg1 -> arg0, arg2 vector size -#define FTLEX_MUL1_OPCODE 13 // arg0 * arg1 -> arg0, arg2 vector size +#define FTLEX_MUL2_OPCODE 11 // arg0 * arg1 -> arg1, arg2 vector size +#define FTLEX_MUL2SA_OPCODE 12 // arg0 * arg0 -> arg1, arg2 vector size +#define FTLEX_MUL1_OPCODE 13 // arg0 * arg0 -> arg0, arg2 vector size #define FTLEX_COPY_OPCODE 14 // arg0 -> arg1, arg3 vector size #define FTLEX_ZERO_OPCODE 16 // zero -> arg0, arg2 vector size Modified: trunk/core/fts/lang/mess/atoms.c =================================================================== --- trunk/core/fts/lang/mess/atoms.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/mess/atoms.c 2011-07-11 20:51:56 UTC (rev 335) @@ -38,7 +38,7 @@ const fts_atom_t fts_null = FTS_NULL; -/* Currently, there is no way to extend this function when adding +/* Currently, there is no way (other coding here) to extend this function when adding new atom types */ void fprintf_atoms(FILE *f, int ac, const fts_atom_t *at) Modified: trunk/core/fts/lang/veclib/portable/vec_cpy.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vec_cpy.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/veclib/portable/vec_cpy.c 2011-07-11 20:51:56 UTC (rev 335) @@ -45,15 +45,22 @@ void fts_vec_fcpy (float *in, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float in_0; - in_0 = in[i]; - (out[i] = (in_0)); -}}} + int i; +#ifdef HAS_VECT_EXT + v8sf v; + + for (i = 0; i < size; i+=8) + { + v = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i]; +#endif +} + void fts_vec_icpy (int *in, int *out, int size) { @@ -195,13 +202,27 @@ void fts_vec_fzero (float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - (out[i] = 0); - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf z; + + z[0] = 0.0; + z[1] = 0.0; + z[2] = 0.0; + z[3] = 0.0; + z[4] = 0.0; + z[5] = 0.0; + z[6] = 0.0; + z[7] = 0.0; + + for (i = 0; i < size; i+=8) + { + *((v8sf restrict *) (out + i)) = z; + } +#else + for (i = 0; i < size; i++) + out[i] = 0; +#endif } void Modified: trunk/core/fts/lang/veclib/portable/vecx_cpy.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_cpy.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/veclib/portable/vecx_cpy.c 2011-07-11 20:51:56 UTC (rev 335) @@ -45,15 +45,21 @@ void fts_vecx_fcpy (float *in, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float in_0; - in_0 = in[i]; - (out[i] = (in_0)); -}}} + int i; +#ifdef HAS_VECT_EXT + v8sf v; + for (i = 0; i < size; i+=8) + { + v = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i]; +#endif +} + void fts_vecx_icpy (int *in, int *out, int size) { @@ -195,13 +201,27 @@ void fts_vecx_fzero (float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - (out[i] = 0); - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf z; + + z[0] = 0.0; + z[1] = 0.0; + z[2] = 0.0; + z[3] = 0.0; + z[4] = 0.0; + z[5] = 0.0; + z[6] = 0.0; + z[7] = 0.0; + + for (i = 0; i < size; i+=8) + { + *((v8sf restrict *) (out + i)) = z; + } +#else + for (i = 0; i < size; i++) + out[i] = 0; +#endif } void Modified: trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-11 20:51:56 UTC (rev 335) @@ -42,10 +42,6 @@ #include "sys.h" #include "lang/veclib/include/vecdef.h" -#ifdef HAS_LLVM_VECT -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); -#endif - void fts_vecx_scl_fadd (float *in, float f, float *out, int size) { Modified: trunk/core/fts/sys/platform.h =================================================================== --- trunk/core/fts/sys/platform.h 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/core/fts/sys/platform.h 2011-07-11 20:51:56 UTC (rev 335) @@ -52,6 +52,7 @@ #define HAS_PORTMIDI #define HAS_LLVM_VECT #define HAS_VECT_EXT +typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); /* This macro is defined here because not all platforms have a isnanf macro (or function */ @@ -79,6 +80,8 @@ #define HAS_VECT_EXT #define HAS_GCC_VECT +typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); + /* This macro is defined here because not all platforms have a isnanf macro (or function */ #define fts_isnanf(x) (((*(unsigned int *)&(x) & 0x7f800000L)==0x7f800000L)&& \ Modified: trunk/packages/ispw/fts/sampling/ftl_delay.c =================================================================== --- trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/packages/ispw/fts/sampling/ftl_delay.c 2011-07-11 20:51:56 UTC (rev 335) @@ -28,10 +28,6 @@ #include "delbuf.h" #include "vd.h" -#ifdef HAS_LLVM_VECT -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); -#endif - void ftl_delwrite(fts_word_t *argv) { Modified: trunk/packages/ispw/fts/signal/sig1.c =================================================================== --- trunk/packages/ispw/fts/signal/sig1.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/packages/ispw/fts/signal/sig1.c 2011-07-11 20:51:56 UTC (rev 335) @@ -93,10 +93,9 @@ * dsp * */ -#ifdef HAS_LLVM_VECT -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); -#endif + + static void ftl_sig(fts_word_t *argv) { float f = *((float *)fts_word_get_ptr(argv + 0)); Modified: trunk/packages/ispw/fts/signal/sigline.c =================================================================== --- trunk/packages/ispw/fts/signal/sigline.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/packages/ispw/fts/signal/sigline.c 2011-07-11 20:51:56 UTC (rev 335) @@ -57,8 +57,6 @@ #ifdef HAS_LLVM_VECT -typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); - static void ftl_line(fts_word_t *argv) { float * restrict fp = (float *)fts_word_get_ptr(argv); Modified: trunk/packages/ispw/fts/signal/sigthrow.c =================================================================== --- trunk/packages/ispw/fts/signal/sigthrow.c 2011-07-09 07:33:02 UTC (rev 334) +++ trunk/packages/ispw/fts/signal/sigthrow.c 2011-07-11 20:51:56 UTC (rev 335) @@ -85,7 +85,6 @@ float samps[MAXVS]; }; - static void sigcatch_dsp_fun(fts_word_t *argv) { @@ -94,11 +93,31 @@ int n = fts_word_get_int(argv+2); int i; +#ifdef HAS_LLVM_VECT + v8sf z, v; + + z[0] = 0.0; + z[1] = 0.0; + z[2] = 0.0; + z[3] = 0.0; + z[4] = 0.0; + z[5] = 0.0; + z[6] = 0.0; + z[7] = 0.0; + + for (i = 0; i < n; i+=8) + { + v = *((v8sf restrict *) (buf + i)); + *((v8sf restrict *) (out + i)) = v; + *((v8sf restrict *) (buf + i)) = z; + } +#else for (i = 0; i < n; i++) { out[i] = buf[i]; buf[i] = 0; } +#endif } static void @@ -216,9 +235,20 @@ if (p) { int i; + v8sf v1, v2, r; +#ifdef HAS_VECT_EXT + for (i = 0; i < n; i+=8) + { + v1 = *((v8sf restrict *) (p + i)); + v2 = *((v8sf restrict *) (in + i)); + r = v1 + v2; + *((v8sf restrict *) (p + i)) = r; + } +#else for (i = 0; i < n; i++) p[i] = p[i] + in[i]; +#endif } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |
From: <de...@us...> - 2011-07-09 07:33:08
|
Revision: 334 http://jmax-phoenix.svn.sourceforge.net/jmax-phoenix/?rev=334&view=rev Author: dececco Date: 2011-07-09 07:33:02 +0000 (Sat, 09 Jul 2011) Log Message: ----------- More object optimisations: cdg 30Ms/sec, shepard 15Ms/against 11Ms/s yesterday Modified Paths: -------------- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c trunk/packages/ispw/fts/signal/sigline.c Modified: trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c =================================================================== --- trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-08 09:55:28 UTC (rev 333) +++ trunk/core/fts/lang/veclib/portable/vecx_scl_farith.c 2011-07-09 07:33:02 UTC (rev 334) @@ -42,90 +42,171 @@ #include "sys.h" #include "lang/veclib/include/vecdef.h" +#ifdef HAS_LLVM_VECT +typedef float v8sf __attribute__ ((vector_size (8 * sizeof(float)))); +#endif - void -fts_vecx_scl_fadd (float *in, float scl, float *out, int size) +fts_vecx_scl_fadd (float *in, float f, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) + (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin + v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] + f; +#endif } void -fts_vecx_scl_fsub (float *in, float scl, float *out, int size) +fts_vecx_scl_fsub (float *in, float f, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) - (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin - v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] - f; +#endif } void -fts_vecx_scl_fmul (float *in, float scl, float *out, int size) +fts_vecx_scl_fmul (float *in, float f, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) * (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin * v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] * f; +#endif } + void -fts_vecx_scl_fdiv (float *in, float scl, float *out, int size) +fts_vecx_scl_fdiv (float *in, float f, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) / (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin / v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] / f; +#endif } void -fts_vecx_scl_fbus (float *in, float scl, float *out, int size) +fts_vecx_scl_fbus (float *in, float f, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (scl) - (in[i]); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = v - vin; + } +#else + for (i = 0; i < size; i++) + out[i] = f - in[i]; +#endif } void -fts_vecx_scl_fvid (float *in, float scl, float *out, int size) +fts_vecx_scl_fvid (float *in, float f, float *out, int size) { - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (scl) / (in[i]); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = v / vin; + } +#else + for (i = 0; i < size; i++) + out[i] = f / in[i]; +#endif } @@ -134,106 +215,184 @@ ftl_vecx_scl_fadd (fts_word_t * argv) { float *in = (float *) ((argv + 0)->fts_ptr); - float scl = *((float *) ((argv + 1)->fts_ptr)); + float f = *((float *) ((argv + 1)->fts_ptr)); float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) + (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin + v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] + f; +#endif } void ftl_vecx_scl_fsub (fts_word_t * argv) { float *in = (float *) ((argv + 0)->fts_ptr); - float scl = *((float *) ((argv + 1)->fts_ptr)); + float f = *((float *) ((argv + 1)->fts_ptr)); float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) - (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin - v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] - f; +#endif } void ftl_vecx_scl_fmul (fts_word_t * argv) { float *in = (float *) ((argv + 0)->fts_ptr); - float scl = *((float *) ((argv + 1)->fts_ptr)); + float f = *((float *) ((argv + 1)->fts_ptr)); float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) * (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin * v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] * f; +#endif } void ftl_vecx_scl_fdiv (fts_word_t * argv) { float *in = (float *) ((argv + 0)->fts_ptr); - float scl = *((float *) ((argv + 1)->fts_ptr)); + float f = *((float *) ((argv + 1)->fts_ptr)); float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (in[i]) / (scl); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = vin / v; + } +#else + for (i = 0; i < size; i++) + out[i] = in[i] / f; +#endif } void ftl_vecx_scl_fbus (fts_word_t * argv) { float *in = (float *) ((argv + 0)->fts_ptr); - float scl = *((float *) ((argv + 1)->fts_ptr)); + float f = *((float *) ((argv + 1)->fts_ptr)); float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (scl) - (in[i]); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = v - vin; + } +#else + for (i = 0; i < size; i++) + out[i] = f - in[i]; +#endif } void ftl_vecx_scl_fvid (fts_word_t * argv) { float *in = (float *) ((argv + 0)->fts_ptr); - float scl = *((float *) ((argv + 1)->fts_ptr)); + float f = *((float *) ((argv + 1)->fts_ptr)); float *out = (float *) ((argv + 2)->fts_ptr); int size = ((argv + 3)->fts_int); - { - int i; - for (i = 0; i < size; i++) - { - float out_0; - (out_0) = (scl) / (in[i]); - out[i] = out_0; - } - } + int i; +#ifdef HAS_LLVM_VECT + v8sf v, vin; + + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; + + for (i = 0; i < size; i+=8) + { + vin = *((v8sf restrict *) (in + i)); + *((v8sf restrict *) (out + i)) = v / vin; + } +#else + for (i = 0; i < size; i++) + out[i] = f / in[i]; +#endif } Modified: trunk/packages/ispw/fts/signal/sigline.c =================================================================== --- trunk/packages/ispw/fts/signal/sigline.c 2011-07-08 09:55:28 UTC (rev 333) +++ trunk/packages/ispw/fts/signal/sigline.c 2011-07-09 07:33:02 UTC (rev 334) @@ -70,23 +70,31 @@ int i; double f = lctl->value; double incr = lctl->incr; - v8sf v; + double incr8 = incr * 8; + v8sf v, vinc; - for (i = 0; i < n; i+=8) - *((v8sf restrict *) (fp + i)) = v; + vinc[0] = incr * 1; + vinc[1] = incr * 2; + vinc[2] = incr * 3; + vinc[3] = incr * 4; + vinc[4] = incr * 5; + vinc[5] = incr * 6; + vinc[6] = incr * 7; + vinc[7] = incr * 8; for (i=0; i<n; i+=8) { - f += incr; v[0] = f; - f += incr; v[1] = f; - f += incr; v[2] = f; - f += incr; v[3] = f; - f += incr; v[4] = f; - f += incr; v[5] = f; - f += incr; v[6] = f; - f += incr; v[7] = f; + v[0] = f; + v[1] = f; + v[2] = f; + v[3] = f; + v[4] = f; + v[5] = f; + v[6] = f; + v[7] = f; - *((v8sf restrict *) (fp + i)) = v; + *((v8sf restrict *) (fp + i)) = v + vinc; + f = f + incr8; } lctl->value = f; This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |