[xine-cvs] CVS: xine-lib/src/libffmpeg/libavcodec/alpha asm.h,NONE,1.1 dsputil_alpha.c,NONE,1.1 mpeg

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/xine/xine-lib/src/libffmpeg/libavcodec/alpha
In directory usw-pr-cvs1:/tmp/cvs-serv13417/libavcodec/alpha

Added Files:
	asm.h dsputil_alpha.c mpegvideo_alpha.c pixops.h 
Log Message:
sync to ffmpeg cvs, trying to keep differences to a mininum. 
diff_to_ffmpeg_cvs.txt updated.
tested only on x86, please report any problems, compilation errors, etc.
alpha architecture added but makefiles were not updated.

--- NEW FILE: asm.h ---
/*
 * Alpha optimized DSP utils
 * Copyright (c) 2002 Falk Hueffner <fa...@de...>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef LIBAVCODEC_ALPHA_ASM_H
#define LIBAVCODEC_ALPHA_ASM_H

#include <stdint.h>

#define AMASK_BWX (1 << 0)
#define AMASK_FIX (1 << 1)
#define AMASK_MVI (1 << 8)

static inline uint64_t BYTE_VEC(uint64_t x)
{
    x |= x <<  8;
    x |= x << 16;
    x |= x << 32;
    return x;
}
static inline uint64_t WORD_VEC(uint64_t x)
{
    x |= x << 16;
    x |= x << 32;
    return x;
}

static inline int32_t ldl(const void* p)
{
    return *(const int32_t*) p;
}
static inline uint64_t ldq(const void* p)
{
    return *(const uint64_t*) p;
}
/* FIXME ccc doesn't seem to get it? Use inline asm?  */
static inline uint64_t ldq_u(const void* p)
{
    return *(const uint64_t*) ((uintptr_t) p & ~7ul);
}
static inline void stl(uint32_t l, void* p)
{
    *(uint32_t*) p = l;
}
static inline void stq(uint64_t l, void* p)
{
    *(uint64_t*) p = l;
}

#ifdef __GNUC__
#define OPCODE1(name)						\
static inline uint64_t name(uint64_t l)				\
{								\
    uint64_t r;							\
    asm (#name " %1, %0" : "=r" (r) : "r" (l));			\
    return r;							\
}

#define OPCODE2(name)						\
static inline uint64_t name(uint64_t l1, uint64_t l2)		\
{								\
    uint64_t r;							\
    asm (#name " %1, %2, %0" : "=r" (r) : "r" (l1), "rI" (l2));	\
    return r;							\
}

/* We don't want gcc to move this around or combine it with another
   rpcc, so mark it volatile.  */
static inline uint64_t rpcc(void)
{
    uint64_t r;
    asm volatile ("rpcc %0" : "=r" (r));
    return r;
}

static inline uint64_t uldq(const void* v)
{
    struct foo {
	unsigned long l;
    } __attribute__((packed));

    return ((const struct foo*) v)->l;
}

#elif defined(__DECC)		/* Compaq "ccc" compiler */

#include <c_asm.h>
#define OPCODE1(name)							\
static inline uint64_t name(uint64_t l)					\
{									\
    return asm (#name " %a0, %v0", l);					\
}

#define OPCODE2(name)							\
static inline uint64_t name(uint64_t l1, uint64_t l2)			\
{									\
    return asm (#name " %a0, %a1, %v0", l1, l2);			\
}

static inline uint64_t rpcc(void)
{
    return asm  ("rpcc %v0");
}

static inline uint64_t uldq(const void* v)
{
    return *(const __unaligned uint64_t *) v;
}

#endif

OPCODE1(amask);
OPCODE1(unpkbw);
OPCODE1(pkwb);
OPCODE2(extql);
OPCODE2(extqh);
OPCODE2(zap);
OPCODE2(cmpbge);
OPCODE2(minsw4);
OPCODE2(minuw4);
OPCODE2(minub8);
OPCODE2(maxsw4);
OPCODE2(maxuw4);
OPCODE2(perr);

#endif /* LIBAVCODEC_ALPHA_ASM_H */

--- NEW FILE: dsputil_alpha.c ---
/*
 * Alpha optimized DSP utils
 * Copyright (c) 2002 Falk Hueffner <fa...@de...>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "asm.h"
#include "../dsputil.h"

void simple_idct_axp(DCTELEM *block);

static void put_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels, 
				   int line_size)
{
    int i = 8;
    do {
	UINT64 shorts;

	shorts = ldq(block);
	shorts = maxsw4(shorts, 0);
	shorts = minsw4(shorts, WORD_VEC(0x00ff));
	stl(pkwb(shorts), pixels);

	shorts = ldq(block + 4);
	shorts = maxsw4(shorts, 0);
	shorts = minsw4(shorts, WORD_VEC(0x00ff));
	stl(pkwb(shorts), pixels + 4);

	pixels += line_size;
	block += 8;
    } while (--i);
}

static void add_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels, 
				   int line_size)
{
    int i = 8;
    do {
	UINT64 shorts; 

	shorts = ldq(block);
	shorts &= ~WORD_VEC(0x8000); /* clear highest bit to avoid overflow */
	shorts += unpkbw(ldl(pixels));
	shorts &= ~WORD_VEC(0x8000); /* hibit would be set for e. g. -2 + 3 */
	shorts = minuw4(shorts, WORD_VEC(0x4000)); /* set neg. to 0x4000 */
	shorts &= ~WORD_VEC(0x4000); /* ...and zap them */
	shorts = minsw4(shorts, WORD_VEC(0x00ff)); /* clamp to 255 */
	stl(pkwb(shorts), pixels);

	/* next 4 */
	shorts = ldq(block + 4);
	shorts &= ~WORD_VEC(0x8000);
	shorts += unpkbw(ldl(pixels + 4));
	shorts &= ~WORD_VEC(0x8000);
	shorts = minuw4(shorts, WORD_VEC(0x4000));
	shorts &= ~WORD_VEC(0x4000);
	shorts = minsw4(shorts, WORD_VEC(0x00ff));
	stl(pkwb(shorts), pixels + 4);

	pixels += line_size;
	block += 8;
    } while (--i);
}

/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
   Since the immediate result could be greater than 255, we do the
   shift first. The result is too low by one if the bytes were both
   odd, so we need to add (l1 & l2) & BYTE_VEC(0x01).  */
static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
{
    UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
    return l1 + l2 + correction;
}

/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
   The '1' only has an effect when one byte is even and the other odd,
   i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
   Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01).  */
static inline UINT64 avg2(UINT64 l1, UINT64 l2)
{
    UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
    return l1 + l2 + correction;
}

static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
{
    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
	      + ((l2 & ~BYTE_VEC(0x03)) >> 2)
	      + ((l3 & ~BYTE_VEC(0x03)) >> 2)
	      + ((l4 & ~BYTE_VEC(0x03)) >> 2);
    UINT64 r2 = ((  (l1 & BYTE_VEC(0x03))
		  + (l2 & BYTE_VEC(0x03))
		  + (l3 & BYTE_VEC(0x03))
		  + (l4 & BYTE_VEC(0x03))
		  + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
    return r1 + r2;
}

static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
{
    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
	      + ((l2 & ~BYTE_VEC(0x03)) >> 2)
	      + ((l3 & ~BYTE_VEC(0x03)) >> 2)
	      + ((l4 & ~BYTE_VEC(0x03)) >> 2);
    UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
		 + (l2 & BYTE_VEC(0x03))
		 + (l3 & BYTE_VEC(0x03))
		 + (l4 & BYTE_VEC(0x03))
		 + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
    return r1 + r2;
}

#define PIXOPNAME(suffix) put ## suffix
#define BTYPE UINT8
#define AVG2 avg2
#define AVG4 avg4
#define STORE(l, b) stq(l, b)
#include "pixops.h"
#undef PIXOPNAME
#undef BTYPE
#undef AVG2
#undef AVG4
#undef STORE

#define PIXOPNAME(suffix) put_no_rnd ## suffix
#define BTYPE UINT8
#define AVG2 avg2_no_rnd
#define AVG4 avg4_no_rnd
#define STORE(l, b) stq(l, b)
#include "pixops.h"
#undef PIXOPNAME
#undef BTYPE
#undef AVG2
#undef AVG4
#undef STORE

/* The following functions are untested.  */
#if 0

#define PIXOPNAME(suffix) avg ## suffix
#define BTYPE UINT8
#define AVG2 avg2
#define AVG4 avg4
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
#include "pixops.h"
#undef PIXOPNAME
#undef BTYPE
#undef AVG2
#undef AVG4
#undef STORE

#define PIXOPNAME(suffix) avg_no_rnd ## suffix
#define BTYPE UINT8
#define AVG2 avg2_no_rnd
#define AVG4 avg4_no_rnd
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
#include "pixops.h"
#undef PIXOPNAME
#undef BTYPE
#undef AVG2
#undef AVG4
#undef STORE

#define PIXOPNAME(suffix) sub ## suffix
#define BTYPE DCTELEM
#define AVG2 avg2
#define AVG4 avg4
#define STORE(l, block) do {		\
    UINT64 xxx = l;			\
    (block)[0] -= (xxx >>  0) & 0xff;	\
    (block)[1] -= (xxx >>  8) & 0xff;	\
    (block)[2] -= (xxx >> 16) & 0xff;	\
    (block)[3] -= (xxx >> 24) & 0xff;	\
    (block)[4] -= (xxx >> 32) & 0xff;	\
    (block)[5] -= (xxx >> 40) & 0xff;	\
    (block)[6] -= (xxx >> 48) & 0xff;	\
    (block)[7] -= (xxx >> 56) & 0xff;	\
} while (0)
#include "pixops.h"
#undef PIXOPNAME
#undef BTYPE
#undef AVG2
#undef AVG4
#undef STORE

#endif

void dsputil_init_alpha(void)
{
    put_pixels_tab[0] = put_pixels_axp;
    put_pixels_tab[1] = put_pixels_x2_axp;
    put_pixels_tab[2] = put_pixels_y2_axp;
    put_pixels_tab[3] = put_pixels_xy2_axp;

    put_no_rnd_pixels_tab[0] = put_pixels_axp;
    put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp;
    put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
    put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;

    /* amask clears all bits that correspond to present features.  */
    if (amask(AMASK_MVI) == 0) {
	fprintf(stderr, "MVI extension detected\n");
	put_pixels_clamped = put_pixels_clamped_axp;
	add_pixels_clamped = add_pixels_clamped_axp;
    }
}

--- NEW FILE: mpegvideo_alpha.c ---
/*
 * Alpha optimized DSP utils
 * Copyright (c) 2002 Falk Hueffner <fa...@de...>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "asm.h"
#include "../dsputil.h"
#include "../mpegvideo.h"

extern UINT8 zigzag_end[64];

static void dct_unquantize_h263_axp(MpegEncContext *s, 
				    DCTELEM *block, int n, int qscale)
{
    int i, level;
    UINT64 qmul, qadd;
    if (s->mb_intra) {
        if (n < 4) 
            block[0] = block[0] * s->y_dc_scale;
        else
            block[0] = block[0] * s->c_dc_scale;
	/* Catch up to aligned point.  */
	qmul = s->qscale << 1;
	qadd = (s->qscale - 1) | 1;
	for (i = 1; i < 4; ++i) {
	    level = block[i];
	    if (level) {
		if (level < 0) {
		    level = level * qmul - qadd;
		} else {
		    level = level * qmul + qadd;
		}
		block[i] = level;
	    }
	}
	block += 4;
	i = 60 / 4;
    } else {
        i = zigzag_end[s->block_last_index[n]] / 4;
    }
    qmul = s->qscale << 1;
    qadd = WORD_VEC((qscale - 1) | 1);
    do {
	UINT64 levels, negmask, zeromask, corr;
	levels = ldq(block);
	if (levels == 0)
	    continue;
	zeromask = cmpbge(0, levels);
	zeromask &= zeromask >> 1;
	/* Negate all negative words.  */
	negmask = maxsw4(levels, WORD_VEC(0xffff)); /* negative -> ffff (-1) */
	negmask = minsw4(negmask, 0);		    /* positive -> 0000 (0) */
	corr    = negmask & WORD_VEC(0x0001); /* twos-complement correction */
	levels ^= negmask;
	levels += corr;

	levels = levels * qmul;
	levels += zap(qadd, zeromask);

	/* Re-negate negative words.  */
	levels -= corr;
	levels ^= negmask;

	stq(levels, block);
    } while (block += 4, --i);
}

void MPV_common_init_axp(MpegEncContext *s)
{
    if (amask(AMASK_MVI) == 0) {
        if (s->out_format == FMT_H263)
	    s->dct_unquantize = dct_unquantize_h263_axp;
    }
}

--- NEW FILE: pixops.h ---
/*
 * Alpha optimized DSP utils
 * Copyright (c) 2002 Falk Hueffner <fa...@de...>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/* This file is intended to be #included with proper definitions of
 * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE.  */

static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels,
				   int line_size, int h)
{
    if ((size_t) pixels & 0x7) {
	do {
	    STORE(uldq(pixels), block);
	    pixels += line_size;
	    block  += line_size;
	} while (--h);
    } else {
	do {
	    STORE(ldq(pixels), block);
	    pixels += line_size;
	    block  += line_size;
	} while (--h);
    }
}

static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels,
				      int line_size, int h)
{
    if ((size_t) pixels & 0x7) {
	do {
	    UINT64 pix1, pix2;

	    pix1 = uldq(pixels);
	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
	    STORE(AVG2(pix1, pix2), block);
	    pixels += line_size;
	    block += line_size;
	} while (--h);
    } else {
	do {
	    UINT64 pix1, pix2;

	    pix1 = ldq(pixels);
	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
	    STORE(AVG2(pix1, pix2), block);
	    pixels += line_size;
	    block += line_size;
	} while (--h);
    }
}

static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels,
				      int line_size, int h)
{
    if ((size_t) pixels & 0x7) {
	UINT64 pix = uldq(pixels);
	do {
	    UINT64 next_pix;

	    pixels += line_size;
	    next_pix = uldq(pixels);
	    STORE(AVG2(pix, next_pix), block);
	    block += line_size;
	    pix = next_pix;
	} while (--h);
    } else {
	UINT64 pix = ldq(pixels);
	do {
	    UINT64 next_pix;

	    pixels += line_size;
	    next_pix = ldq(pixels);
	    STORE(AVG2(pix, next_pix), block);
	    block += line_size;
	    pix = next_pix;
	} while (--h);
    }
}

/* This could be further sped up by recycling AVG4 intermediate
  results from the previous loop pass.  */
static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels,
				       int line_size, int h)
{
    if ((size_t) pixels & 0x7) {
	UINT64 pix1 = uldq(pixels);
	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);

	do {
	    UINT64 next_pix1, next_pix2;

	    pixels += line_size;
	    next_pix1 = uldq(pixels);
	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);

	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);

	    block += line_size;
	    pix1 = next_pix1;
	    pix2 = next_pix2;
	} while (--h);
    } else {
	UINT64 pix1 = ldq(pixels);
	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);

	do {
	    UINT64 next_pix1, next_pix2;

	    pixels += line_size;
	    next_pix1 = ldq(pixels);
	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);

	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);

	    block += line_size;
	    pix1 = next_pix1;
	    pix2 = next_pix2;
	} while (--h);
    }
}

[xine-cvs] CVS: xine-lib/src/libffmpeg/libavcodec/alpha asm.h,NONE,1.1 dsputil_alpha.c,NONE,1.1 mpeg

[xine-cvs] CVS: xine-lib/src/libffmpeg/libavcodec/alpha asm.h,NONE,1.1 dsputil_alpha.c,NONE,1.1 mpegvideo_alpha.c,NONE,1.1 pixops.h,NONE,1.1