|
From: James B. <ja...@ex...> - 2000-04-26 21:43:34
|
Erik Walthinsen wrote:
>
> On Tue, 25 Apr 2000, James Bowman wrote:
>
> > I'd like to take a crack at the asm versions of __dv_decode_vlc and
> > dv_decode_vlc, so I'll be making a few changes in vlc.h and vlc.c, as
> > well as adding a new file for the asm implementation itself.
>
> I'd suggest working on it in your working copy, and not checking into CVS
> for a day or so. I have some ideas as to how a project like this can deal
> with multiple optimizations (for different processors, etc.), but I need
> to think about them a bit and go over the code as it's structured now.
>
> That said, I think creating vlc_x86.[ch] with identical contents to the
> standard vlc.[ch] except for the code itself is a good place to start.
> We'll want to come up with some combination of stuff that allows a
> Makefile to select one or the other without having to duplicate tables and
> such.
>
> I'll post tomorrow sometime on my ideas for this.
>
> I'd be very interested in taking a peek at what you get as soon as you get
> something working, though, regardless of CVS.
Here it is, nice and branchless, just like the C. Uses the tables
defined in vlc.c.
I actually tweaked the C a little before I started to make the job a
little simpler. The biggest change was to make dv_vlc_t small enough to
fit in a single 32-bit word. The main function now looks like this:
void __dv_decode_vlc(gint bits, dv_vlc_t *result) {
gint class, has_sign, amps[2];
class = dv_vlc_classes[16][(bits & (dv_vlc_class_index_mask[16])) >>
(dv_vlc_class_index_rshift[16])];
*result = dv_vlc_lookups[class][(bits & (dv_vlc_index_mask[class])) >>
(dv_vlc_index_rshift[class])];
amps[1] = -(amps[0] = result->amp);
has_sign = amps[0] > 0;
result->amp = amps[has_sign & // or vlc not valid
(bits >> sign_rshift[result->len])];
} // __dv_decode_vlc
.text
.align 4
.globl __dv_decode_vlc
.type __dv_decode_vlc,@function
__dv_decode_vlc:
pushl %ebx
/* Args are at 20(%esp). */
movl 8(%esp),%eax /* %eax is bits */
movl %eax,%edx /* %edx is class */
andl $0xfe00,%edx
sarl $9,%edx
movsbl dv_vlc_class_lookup5(%edx),%edx
movl dv_vlc_index_mask(,%edx,4),%ebx
movl dv_vlc_index_rshift(,%edx,4),%ecx
andl %eax,%ebx
sarl %cl,%ebx
movl dv_vlc_lookups(,%edx,4),%edx
movl (%edx,%ebx,4),%edx
/* Now %edx holds result, like this:
bits 0-7 run
bits 8-15 len
bits 16-31 amp
*/
/* code needs to do this with result:
if ((amp > 0) &&
if ((bits >> sign_rshift[result->len]) & 1)
amp = -amp;
}
*/
/* if (amp < 0) %edx is 0, else 0xffff0000. */
movl %edx,%ecx
sarl $8,%ecx
andl $0xff,%ecx
movl sign_mask(,%ecx,4),%ecx
andl %ecx,%eax
negl %eax
sarl $31,%eax
movl %edx,%ebx
sarl $31,%ebx
xorl $0xffffffff,%ebx
andl $0xffff0000,%ebx
andl %ebx,%eax
xorl %eax,%edx
subl %eax,%edx
movl 12(%esp),%eax
movl %edx,(%eax)
popl %ebx
ret
--
James Bowman
ja...@ex...
|