|
From: Julian S. <js...@ac...> - 2006-08-17 00:44:52
|
I'm trying to implement amd64 instructions cmpxchg8b and cmpxchg16b so as to close bug #127521 (see https://bugs.kde.org/show_bug.cgi?id=127521). I think I have a correct implementation; however it appears my Athlon64 does not support cmpxchg16b and so I cannot obtain reference results for cmpxchg16b. Is it possible someone with a machine supporting cmpxchg16b could run the test program below and send the results? Thanks, J #include <stdlib.h> #include <stdio.h> typedef unsigned int UInt; typedef unsigned long long int ULong; void do_cmpxchg8b ( /*OUT*/ ULong* rdxOut, ULong* raxOut, ULong* memHiOut, ULong* memLoOut, ULong* zOut, /*IN*/ ULong rdxIn, ULong raxIn, ULong memHiIn, ULong memLoIn, ULong rcxIn, ULong rbxIn ) { UInt mem[2]; ULong block[6]; mem[0] = (UInt)memLoIn; mem[1] = (UInt)memHiIn; block[0] = rdxIn; block[1] = raxIn; block[2] = rcxIn; block[3] = rbxIn; block[4] = (ULong)&mem[0]; block[5] = ~(0ULL); __asm__ __volatile__( "movq %0,%%r11\n" "\tmovq 0(%%r11),%%rdx\n" "\tmovq 8(%%r11),%%rax\n" "\tmovq 16(%%r11),%%rcx\n" "\tmovq 24(%%r11),%%rbx\n" "\tmovq 32(%%r11),%%r10\n" "\tlock cmpxchg8b (%%r10)\n" "\tmovabsq $0,%%r10\n" "\tsetz %%r10b\n" "\tmovq %%r10,40(%%r11)\n" "\tmovq %%rdx,0(%%r11)\n" "\tmovq %%rax,8(%%r11)\n" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx", "cc", "memory" ); *rdxOut = block[0]; *raxOut = block[1]; *memLoOut = (ULong)mem[0]; *memHiOut = (ULong)mem[1]; *zOut = block[5]; } void try8b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b ) { ULong dd, aa, mmHi, mmLo, zz; do_cmpxchg8b( &dd, &aa, &mmHi, &mmLo, &zz, d,a,mHi,mLo,c,b); printf(" Q d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx " "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n", d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo ); } void do_cmpxchg16b ( /*OUT*/ ULong* rdxOut, ULong* raxOut, ULong* memHiOut, ULong* memLoOut, ULong* zOut, /*IN*/ ULong rdxIn, ULong raxIn, ULong memHiIn, ULong memLoIn, ULong rcxIn, ULong rbxIn ) { ULong mem[2]; ULong block[6]; mem[0] = memLoIn; mem[1] = memHiIn; block[0] = rdxIn; block[1] = raxIn; block[2] = rcxIn; block[3] = rbxIn; block[4] = (ULong)&mem[0]; block[5] = ~(0ULL); __asm__ __volatile__( "movq %0,%%r11\n" "\tmovq 0(%%r11),%%rdx\n" "\tmovq 8(%%r11),%%rax\n" "\tmovq 16(%%r11),%%rcx\n" "\tmovq 24(%%r11),%%rbx\n" "\tmovq 32(%%r11),%%r10\n" "\tlock cmpxchg16b (%%r10)\n" "\tmovabsq $0,%%r10\n" "\tsetz %%r10b\n" "\tmovq %%r10,40(%%r11)\n" "\tmovq %%rdx,0(%%r11)\n" "\tmovq %%rax,8(%%r11)\n" : /*out*/ : /*in*/ "r"(&block[0]) : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx", "cc", "memory" ); *rdxOut = block[0]; *raxOut = block[1]; *memLoOut = mem[0]; *memHiOut = mem[1]; *zOut = block[5]; } void try16b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b ) { ULong dd, aa, mmHi, mmLo, zz; do_cmpxchg16b( &dd, &aa, &mmHi, &mmLo, &zz, d,a,mHi,mLo,c,b); printf("QQ d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx " "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n", d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo ); } int main(void) { ULong z = 0xDEADBEEF00000000ULL; try8b( 0,1, 5,4, 3,2 ); try8b( 0,1, 0,1, 3,2 ); try8b( 0,1, 0,4, 3,2 ); try8b( 0,1, 0,0, 3,2 ); try8b( 0,1, 5,0, 3,2 ); try8b( 0,1, 1,1, 3,2 ); try8b( 0+z,1+z, 5+z,4+z, 3+z,2+z ); try8b( 0+z,1+z, 0+z,1+z, 3+z,2+z ); try8b( 0+z,1+z, 0+z,4+z, 3+z,2+z ); try8b( 0+z,1+z, 0+z,0+z, 3+z,2+z ); try8b( 0+z,1+z, 5+z,0+z, 3+z,2+z ); try8b( 0+z,1+z, 1+z,1+z, 3+z,2+z ); try16b( 0,1, 5,4, 3,2 ); try16b( 0,1, 0,1, 3,2 ); try16b( 0,1, 0,4, 3,2 ); try16b( 0,1, 0,0, 3,2 ); try16b( 0,1, 5,0, 3,2 ); try16b( 0,1, 1,1, 3,2 ); try16b( 0+z,1+z, 5+z,4+z, 3+z,2+z ); try16b( 0+z,1+z, 0+z,1+z, 3+z,2+z ); try16b( 0+z,1+z, 0+z,4+z, 3+z,2+z ); try16b( 0+z,1+z, 0+z,0+z, 3+z,2+z ); try16b( 0+z,1+z, 5+z,0+z, 3+z,2+z ); try16b( 0+z,1+z, 1+z,1+z, 3+z,2+z ); return 0; } |
|
From: Robert W. <rj...@du...> - 2006-08-17 01:07:53
Attachments:
out.txt
|
On Thu, 2006-08-17 at 01:44 +0100, Julian Seward wrote: > I'm trying to implement amd64 instructions cmpxchg8b and cmpxchg16b > so as to close bug #127521 > (see https://bugs.kde.org/show_bug.cgi?id=127521). I think I have > a correct implementation; however it appears my Athlon64 does not > support cmpxchg16b and so I cannot obtain reference results for > cmpxchg16b. > > Is it possible someone with a machine supporting cmpxchg16b > could run the test program below and send the results? Attached is the output. This was run on: processor : 0 vendor_id : AuthenticAMD cpu family : 15 model : 65 model name : Dual-Core AMD Opteron(tm) Processor 8218 stepping : 2 cpu MHz : 2588.806 cache size : 1024 KB physical id : 0 siblings : 2 core id : 0 cpu cores : 2 fpu : yes fpu_exception : yes cpuid level : 1 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext lm 3dnowext 3dnow pni cmpxchg16b lahf_lm cmp_legacy bogomips : 5111.80 TLB size : 1088 4K pages clflush size : 64 cache_alignment : 64 address sizes : 40 bits physical, 48 bits virtual power management: ts fid vid ttp [4] [5] shared cores : 0 1 The executable was compiled on FC5, but run on SLES9, as the SLES9 assembler didn't understand the instruction, but the FC5 assembler did. However, the FC5 CPU didn't understand the instruction... :-) Regards, Robert. -- Robert Walsh Amalgamated Durables, Inc. - "We don't make the things you buy." Email: rj...@du... |
|
From: Julian S. <js...@ac...> - 2006-08-17 01:18:31
|
On Thursday 17 August 2006 02:07, Robert Walsh wrote: > Attached is the output. This was run on: Thanks. My implementation seems correct therefore; will commit. J |