|
From: James B. <ja...@ex...> - 2000-04-26 06:59:50
|
Hi all, I just joined this project. I'd like to take a crack at the asm versions of __dv_decode_vlc and dv_decode_vlc, so I'll be making a few changes in vlc.h and vlc.c, as well as adding a new file for the asm implementation itself. -- James Bowman ja...@ex... |
|
From: Erik W. <om...@cs...> - 2000-04-26 07:51:58
|
On Tue, 25 Apr 2000, James Bowman wrote:
> I'd like to take a crack at the asm versions of __dv_decode_vlc and
> dv_decode_vlc, so I'll be making a few changes in vlc.h and vlc.c, as
> well as adding a new file for the asm implementation itself.
I'd suggest working on it in your working copy, and not checking into CVS
for a day or so. I have some ideas as to how a project like this can deal
with multiple optimizations (for different processors, etc.), but I need
to think about them a bit and go over the code as it's structured now.
That said, I think creating vlc_x86.[ch] with identical contents to the
standard vlc.[ch] except for the code itself is a good place to start.
We'll want to come up with some combination of stuff that allows a
Makefile to select one or the other without having to duplicate tables and
such.
I'll post tomorrow sometime on my ideas for this.
I'd be very interested in taking a peek at what you get as soon as you get
something working, though, regardless of CVS.
TTYL,
Omega
Erik Walthinsen <om...@cs...> - Staff Programmer @ OGI
Quasar project - http://www.cse.ogi.edu/DISC/projects/quasar/
Video4Linux Two drivers and stuff - http://www.cse.ogi.edu/~omega/v4l2/
__
/ \ SEUL: Simple End-User Linux - http://www.seul.org/
| | M E G A Helping Linux become THE choice
_\ /_ for the home or office user
|
|
From: James B. <ja...@ex...> - 2000-04-26 21:43:34
|
Erik Walthinsen wrote:
>
> On Tue, 25 Apr 2000, James Bowman wrote:
>
> > I'd like to take a crack at the asm versions of __dv_decode_vlc and
> > dv_decode_vlc, so I'll be making a few changes in vlc.h and vlc.c, as
> > well as adding a new file for the asm implementation itself.
>
> I'd suggest working on it in your working copy, and not checking into CVS
> for a day or so. I have some ideas as to how a project like this can deal
> with multiple optimizations (for different processors, etc.), but I need
> to think about them a bit and go over the code as it's structured now.
>
> That said, I think creating vlc_x86.[ch] with identical contents to the
> standard vlc.[ch] except for the code itself is a good place to start.
> We'll want to come up with some combination of stuff that allows a
> Makefile to select one or the other without having to duplicate tables and
> such.
>
> I'll post tomorrow sometime on my ideas for this.
>
> I'd be very interested in taking a peek at what you get as soon as you get
> something working, though, regardless of CVS.
Here it is, nice and branchless, just like the C. Uses the tables
defined in vlc.c.
I actually tweaked the C a little before I started to make the job a
little simpler. The biggest change was to make dv_vlc_t small enough to
fit in a single 32-bit word. The main function now looks like this:
void __dv_decode_vlc(gint bits, dv_vlc_t *result) {
gint class, has_sign, amps[2];
class = dv_vlc_classes[16][(bits & (dv_vlc_class_index_mask[16])) >>
(dv_vlc_class_index_rshift[16])];
*result = dv_vlc_lookups[class][(bits & (dv_vlc_index_mask[class])) >>
(dv_vlc_index_rshift[class])];
amps[1] = -(amps[0] = result->amp);
has_sign = amps[0] > 0;
result->amp = amps[has_sign & // or vlc not valid
(bits >> sign_rshift[result->len])];
} // __dv_decode_vlc
.text
.align 4
.globl __dv_decode_vlc
.type __dv_decode_vlc,@function
__dv_decode_vlc:
pushl %ebx
/* Args are at 20(%esp). */
movl 8(%esp),%eax /* %eax is bits */
movl %eax,%edx /* %edx is class */
andl $0xfe00,%edx
sarl $9,%edx
movsbl dv_vlc_class_lookup5(%edx),%edx
movl dv_vlc_index_mask(,%edx,4),%ebx
movl dv_vlc_index_rshift(,%edx,4),%ecx
andl %eax,%ebx
sarl %cl,%ebx
movl dv_vlc_lookups(,%edx,4),%edx
movl (%edx,%ebx,4),%edx
/* Now %edx holds result, like this:
bits 0-7 run
bits 8-15 len
bits 16-31 amp
*/
/* code needs to do this with result:
if ((amp > 0) &&
if ((bits >> sign_rshift[result->len]) & 1)
amp = -amp;
}
*/
/* if (amp < 0) %edx is 0, else 0xffff0000. */
movl %edx,%ecx
sarl $8,%ecx
andl $0xff,%ecx
movl sign_mask(,%ecx,4),%ecx
andl %ecx,%eax
negl %eax
sarl $31,%eax
movl %edx,%ebx
sarl $31,%ebx
xorl $0xffffffff,%ebx
andl $0xffff0000,%ebx
andl %ebx,%eax
xorl %eax,%edx
subl %eax,%edx
movl 12(%esp),%eax
movl %edx,(%eax)
popl %ebx
ret
--
James Bowman
ja...@ex...
|
|
From: Erik W. <om...@cs...> - 2000-04-26 23:16:34
|
On Wed, 26 Apr 2000, James Bowman wrote:
Some issues:
> .text
> .align 4
> .globl __dv_decode_vlc
> .type __dv_decode_vlc,@function
> __dv_decode_vlc:
> pushl %ebx
>
> /* Args are at 20(%esp). */
> movl 8(%esp),%eax /* %eax is bits */
>
> movl %eax,%edx /* %edx is class */
> andl $0xfe00,%edx
> sarl $9,%edx
> movsbl dv_vlc_class_lookup5(%edx),%edx
I'm confused. What does this do?
>
> movl dv_vlc_index_mask(,%edx,4),%ebx
> movl dv_vlc_index_rshift(,%edx,4),%ecx
> andl %eax,%ebx
> sarl %cl,%ebx
>
> movl dv_vlc_lookups(,%edx,4),%edx
> movl (%edx,%ebx,4),%edx
This is where it segfaults right now. I can't seem to get gdb to tell me
what's in the register, though.
>
> /* Now %edx holds result, like this:
> bits 0-7 run
> bits 8-15 len
> bits 16-31 amp
> */
> /* code needs to do this with result:
> if ((amp > 0) &&
> if ((bits >> sign_rshift[result->len]) & 1)
> amp = -amp;
> }
> */
> /* if (amp < 0) %edx is 0, else 0xffff0000. */
> movl %edx,%ecx
> sarl $8,%ecx
> andl $0xff,%ecx
> movl sign_mask(,%ecx,4),%ecx
> andl %ecx,%eax
> negl %eax
> sarl $31,%eax
>
> movl %edx,%ebx
> sarl $31,%ebx
> xorl $0xffffffff,%ebx
> andl $0xffff0000,%ebx
>
> andl %ebx,%eax
>
> xorl %eax,%edx
> subl %eax,%edx
>
> movl 12(%esp),%eax
> movl %edx,(%eax)
>
> popl %ebx
> ret
Erik Walthinsen <om...@cs...> - Staff Programmer @ OGI
Quasar project - http://www.cse.ogi.edu/DISC/projects/quasar/
Video4Linux Two drivers and stuff - http://www.cse.ogi.edu/~omega/v4l2/
__
/ \ SEUL: Simple End-User Linux - http://www.seul.org/
| | M E G A Helping Linux become THE choice
_\ /_ for the home or office user
|