> /* Optimization barrier */
> /* The "volatile" is due to gcc bugs */
> #define barrier() __asm__ __volatile__("": : :"memory")
Here's a description of the meaning/usage of barrier:
http://www.tux.org/hypermail/linux-gcc/1998-Jun/0068.html
I compiled etherboot's tulip.c with and without your changes and
looked at the assembly output of the relevant part of mdio_read:
They're both basicly the same -- here's the changed c source:
if (tp->chip_id == LC82C168) {
int i = 1000;
outl(0x60020000 + (phy_id<<23) + (location<<18), ioaddr + 0xA0);
inl(ioaddr + 0xA0);
inl(ioaddr + 0xA0);
while (--i > 0) {
barrier();
if ( ! ((retval = inl(ioaddr + 0xA0)) & 0x80000000))
return retval & 0xffff;
}
return 0xffff;
}
Here's the asm code of the while loop:
.L111:
.stabn 68,0,588,.LM24-mdio_read
.LM24:
#APP
.stabn 68,0,589,.LM25-mdio_read
.LM25:
#NO_APP
movw ioaddr, %dx
addl $160, %edx
.stabs "../etherboot-5.0.1/src/linux-asm-io.h",132,0,0,.Ltext7
.Ltext7:
.stabn 68,0,108,.LM26-mdio_read
.LM26:
#APP
inl %dx,%eax
#NO_APP
movl %eax, %edi
.stabs "tulip.c",132,0,0,.Ltext8
.Ltext8:
.stabn 68,0,589,.LM27-mdio_read
.LM27:
testl %edi, %edi
jns .L198
.stabn 68,0,591,.LM28-mdio_read
.LM28:
decl %ecx
testl %ecx, %ecx
jg .L111
Here's the asm for the while loop for the original code:
.L111:
.stabs "../etherboot-5.0.1/src/linux-asm-io.h",132,0,0,.Ltext7
.Ltext7:
.stabn 68,0,108,.LM24-mdio_read
.LM24:
#APP
inl %dx,%eax
#NO_APP
movl %eax, %edi
.stabs "tulip-orig.c",132,0,0,.Ltext8
.Ltext8:
.stabn 68,0,584,.LM25-mdio_read
.LM25:
testl %edi, %edi
jns .L198
decl %ecx
testl %ecx, %ecx
jg .L111
So the barrier caused ioaddr to be refetched each time around the
loop. This could effect the timing, but I doubt that ioaddr is
actually changing.
So I'm doubting that this will fix the problem. I'll try it anyway when
I get a chance (tuesday at best).
This was with gcc:
[mas@s02 test]$ gcc --version
2.96
Then I wondered what kgcc would do:
[mas@s02 test]$ kgcc --version
egcs-2.91.66
-- with kgcc & barrier:
.L113:
.stabn 68,0,588,.LM16-mdio_read
.LM16:
.LBB15:
.LBB16:
.LBE16:
.LBE15:
.LBB17:
.LBB18:
.LBE18:
.LBE17:
#APP
#NO_APP
.stabn 68,0,589,.LM17-mdio_read
.LM17:
movw ioaddr,%dx
addl $160,%edx
.stabs "../etherboot-5.0.1/src/linux-asm-io.h",132,0,0,.Ltext5
.Ltext5:
.stabn 68,0,108,.LM18-mdio_read
.LM18:
#APP
inl %dx,%eax
#NO_APP
movl %eax,%ebx
.stabs "tulip.c",132,0,0,.Ltext6
.Ltext6:
.stabn 68,0,589,.LM19-mdio_read
.LM19:
testl %ebx,%ebx
jge .L213
.stabn 68,0,591,.LM20-mdio_read
.LM20:
decl %ecx
testl %ecx,%ecx
jg .L113
-- with kgcc and original code:
.L113:
.stabs "../etherboot-5.0.1/src/linux-asm-io.h",132,0,0,.Ltext5
.Ltext5:
.stabn 68,0,108,.LM16-mdio_read
.LM16:
.LBB15:
.LBB16:
.LBE16:
.LBE15:
.LBB17:
.LBB18:
.LBE18:
.LBE17:
movl %esi,%edx
#APP
inl %dx,%eax
#NO_APP
movl %eax,%ebx
.stabs "tulip-orig.c",132,0,0,.Ltext6
.Ltext6:
.stabn 68,0,584,.LM17-mdio_read
.LM17:
testl %ebx,%ebx
jge .L213
decl %ecx
testl %ecx,%ecx
jg .L113
So it doesn't look like kgcc is different either..
PS: Why is the number of loops 1000? The loop might only
be a few clocks long and at 2 ns/clock (466 Mhz, almost 500 Mhz)
that's only a few ms. Perhaps it just takes longer. Or
the problem is something else and is completely unrelated to this.
|