From: Mike M. <tm...@us...> - 2004-02-01 05:33:06
|
Update of /cvsroot/xine/xine-lib/src/libffmpeg/libavcodec/alpha In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv14058/alpha Modified Files: dsputil_alpha.c motion_est_alpha.c mpegvideo_alpha.c Log Message: sync to ffmpeg build 4699 Index: dsputil_alpha.c =================================================================== RCS file: /cvsroot/xine/xine-lib/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- dsputil_alpha.c 27 Oct 2003 15:24:39 -0000 1.11 +++ dsputil_alpha.c 1 Feb 2004 05:31:16 -0000 1.12 @@ -39,11 +39,11 @@ const uint8_t *restrict pixels, int line_size); void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); #if 0 /* These functions were the base for the optimized assembler routines, @@ -290,11 +290,6 @@ return pix_abs16x16_mvi_asm(a, b, stride); } -static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride) -{ - return pix_abs8x8_mvi(a, b, stride); -} - void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -347,12 +342,13 @@ c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; c->sad[0] = sad16x16_mvi; - c->sad[1] = sad8x8_mvi; - c->pix_abs8x8 = pix_abs8x8_mvi; - c->pix_abs16x16 = pix_abs16x16_mvi_asm; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; + c->sad[1] = pix_abs8x8_mvi; +// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed + c->pix_abs[0][0] = sad16x16_mvi; + c->pix_abs[1][0] = pix_abs8x8_mvi; + c->pix_abs[0][1] = pix_abs16x16_x2_mvi; + c->pix_abs[0][2] = pix_abs16x16_y2_mvi; + c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; } put_pixels_clamped_axp_p = c->put_pixels_clamped; Index: motion_est_alpha.c =================================================================== RCS file: /cvsroot/xine/xine-lib/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- motion_est_alpha.c 29 Oct 2002 16:29:18 -0000 1.2 +++ motion_est_alpha.c 1 Feb 2004 05:31:16 -0000 1.3 @@ -84,10 +84,9 @@ return r1 + r2; } -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 8; if ((size_t) pix2 & 0x7) { /* works only when pix2 is actually unaligned */ @@ -160,10 +159,9 @@ } #endif -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t disalign = (size_t) pix2 & 0x7; switch (disalign) { @@ -234,10 +232,9 @@ return result; } -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; if ((size_t) pix2 & 0x7) { uint64_t t, p2_l, p2_r; @@ -288,10 +285,9 @@ return result; } -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t p1_l, p1_r; uint64_t p2_l, p2_r, p2_x; Index: mpegvideo_alpha.c =================================================================== RCS file: /cvsroot/xine/xine-lib/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- mpegvideo_alpha.c 27 Oct 2003 15:24:39 -0000 1.6 +++ mpegvideo_alpha.c 1 Feb 2004 05:31:16 -0000 1.7 @@ -21,7 +21,7 @@ #include "../dsputil.h" #include "../mpegvideo.h" -static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, +static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, n_coeffs; @@ -35,19 +35,15 @@ /* This mask kills spill from negative subwords to the next subword. */ correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ - if (s->mb_intra) { - if (!s->h263_aic) { - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - } else { - qadd = 0; - } - n_coeffs = 63; // does not always use zigzag table + if (!s->h263_aic) { + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; } else { - n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + qadd = 0; } + n_coeffs = 63; // does not always use zigzag table for(i = 0; i <= n_coeffs; block += 4, i += 4) { uint64_t levels, negmask, zeros, add; @@ -90,7 +86,62 @@ orig_block[0] = block0; } +static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, + int n, int qscale) +{ + int i, n_coeffs; + uint64_t qmul, qadd; + uint64_t correction; + DCTELEM *orig_block = block; + DCTELEM block0; + + qadd = WORD_VEC((qscale - 1) | 1); + qmul = qscale << 1; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + + n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + + for(i = 0; i <= n_coeffs; block += 4, i += 4) { + uint64_t levels, negmask, zeros, add; + + levels = ldq(block); + if (levels == 0) + continue; + +#ifdef __alpha_max__ + /* I don't think the speed difference justifies runtime + detection. */ + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ +#else + negmask = cmpbge(WORD_VEC(0x7fff), levels); + negmask &= (negmask >> 1) | (1 << 7); + negmask = zap(-1, negmask); +#endif + + zeros = cmpbge(0, levels); + zeros &= zeros >> 1; + /* zeros |= zeros << 1 is not needed since qadd <= 255, so + zapping the lower byte suffices. */ + + levels *= qmul; + levels -= correction & (negmask << 16); + + /* Negate qadd for negative levels. */ + add = qadd ^ negmask; + add += WORD_VEC(0x0001) & negmask; + /* Set qadd to 0 for levels == 0. */ + add = zap(add, zeros); + + levels += add; + + stq(levels, block); + } +} + void MPV_common_init_axp(MpegEncContext *s) { - s->dct_unquantize_h263 = dct_unquantize_h263_axp; + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; } |