[Mplayerxp-cvslog] SF.net SVN: mplayerxp:[19] mplayerxp
Brought to you by:
olov
From: <nic...@us...> - 2009-12-06 12:02:00
|
Revision: 19 http://mplayerxp.svn.sourceforge.net/mplayerxp/?rev=19&view=rev Author: nickols_k Date: 2009-12-06 12:01:50 +0000 (Sun, 06 Dec 2009) Log Message: ----------- remove duplicated stuff from MPlayerXP Modified Paths: -------------- mplayerxp/libmpcodecs/ad_dshow.c mplayerxp/libmpcodecs/codecs_ld.c mplayerxp/libvo/vo_x11.c mplayerxp/mplayer.c mplayerxp/postproc/Makefile mplayerxp/postproc/postprocess.c mplayerxp/postproc/swscale.c mplayerxp/postproc/swscale.h mplayerxp/postproc/vf_1bpp.c mplayerxp/postproc/vf_down3dright.c mplayerxp/postproc/vf_mirror.c mplayerxp/postproc/vf_palette.c mplayerxp/postproc/vf_raw.c mplayerxp/postproc/vf_rgb2bgr.c mplayerxp/postproc/vf_rotate.c mplayerxp/postproc/vf_test.c mplayerxp/postproc/vf_yuy2.c mplayerxp/postproc/vf_yvu9.c Removed Paths: ------------- mplayerxp/postproc/rgb2rgb.c mplayerxp/postproc/rgb2rgb.h mplayerxp/postproc/rgb2rgb_template.c mplayerxp/postproc/swscale_altivec_template.c mplayerxp/postproc/swscale_internal.h mplayerxp/postproc/swscale_template.c mplayerxp/postproc/yuv2rgb.c mplayerxp/postproc/yuv2rgb_altivec.c mplayerxp/postproc/yuv2rgb_mlib.c mplayerxp/postproc/yuv2rgb_template.c Modified: mplayerxp/libmpcodecs/ad_dshow.c =================================================================== --- mplayerxp/libmpcodecs/ad_dshow.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/libmpcodecs/ad_dshow.c 2009-12-06 12:01:50 UTC (rev 19) @@ -40,10 +40,10 @@ static int load_lib( const char *libname ) { if(!(dll_handle=ld_codec(libname,NULL))) return 0; - DS_AudioDecoder_Open_ptr = dlsym(dll_handle,"DS_AudioDecoder_Open"); - DS_AudioDecoder_Destroy_ptr = dlsym(dll_handle,"DS_AudioDecoder_Destroy"); - DS_AudioDecoder_Convert_ptr = dlsym(dll_handle,"DS_AudioDecoder_Convert"); - DS_AudioDecoder_GetSrcSize_ptr = dlsym(dll_handle,"DS_AudioDecoder_GetSrcSize"); + DS_AudioDecoder_Open_ptr = ld_sym(dll_handle,"DS_AudioDecoder_Open"); + DS_AudioDecoder_Destroy_ptr = ld_sym(dll_handle,"DS_AudioDecoder_Destroy"); + DS_AudioDecoder_Convert_ptr = ld_sym(dll_handle,"DS_AudioDecoder_Convert"); + DS_AudioDecoder_GetSrcSize_ptr = ld_sym(dll_handle,"DS_AudioDecoder_GetSrcSize"); return DS_AudioDecoder_Open_ptr && DS_AudioDecoder_Convert_ptr && DS_AudioDecoder_GetSrcSize_ptr && DS_AudioDecoder_Destroy_ptr; } Modified: mplayerxp/libmpcodecs/codecs_ld.c =================================================================== --- mplayerxp/libmpcodecs/codecs_ld.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/libmpcodecs/codecs_ld.c 2009-12-06 12:01:50 UTC (rev 19) @@ -43,7 +43,8 @@ void * ld_sym(void *handle,const char *sym_name) { void *rval; - if(!(rval=dlsym(handle,sym_name))) + if(!(rval=dlsym(handle,sym_name))) { MSG_ERR(MSGTR_CODEC_DLL_SYM_ERR,sym_name); + } return rval; } Modified: mplayerxp/libvo/vo_x11.c =================================================================== --- mplayerxp/libvo/vo_x11.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/libvo/vo_x11.c 2009-12-06 12:01:50 UTC (rev 19) @@ -48,7 +48,7 @@ #define MSG_D(args...) #define LOCK_VDECODING() { MSG_D(DA_PREFIX"LOCK_VDECODING\n"); pthread_mutex_lock(&vdecoding_mutex); } -#include "../postproc/rgb2rgb.h" /* for MODE_RGB(BGR) definitions */ +#include "../postproc/swscale.h" /* for MODE_RGB(BGR) definitions */ #include "video_out_internal.h" #include "dri_vo.h" #include "../mp_image.h" Modified: mplayerxp/mplayer.c =================================================================== --- mplayerxp/mplayer.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/mplayer.c 2009-12-06 12:01:50 UTC (rev 19) @@ -655,7 +655,7 @@ pinfo[xp_id].current_module="uninit_xp"; uninit_dec_ahead(0); } - + if (mask&INITED_SPUDEC){ inited_flags&=~INITED_SPUDEC; pinfo[xp_id].current_module="uninit_spudec"; @@ -746,6 +746,7 @@ pinfo[xp_id].current_module="exit_player"; + sws_uninit(); if(how) MSG_HINT(MSGTR_Exiting,how); MSG_DBG2("max framesize was %d bytes\n",max_framesize); mp_msg_uninit(); @@ -2048,7 +2049,6 @@ memset(&vstat,0,sizeof(video_stat_t)); mp_msg_init(MSGL_STATUS); - MSG_INFO("%s",banner_text); // memset(&vtune,0,sizeof(vo_tune_info_t)); /* Test for cpu capabilities (and corresponding OS support) for optimizing */ @@ -2065,8 +2065,12 @@ xp_num_cpu=get_number_cpu(); #if defined( ARCH_X86 ) || defined(ARCH_X86_64) - get_mmx_optimizations(); + get_mmx_optimizations(); #endif + if(!sws_init()) { + MSG_ERR("MPlayerXP requires working copy of libswscaler\n"); + return 0; + } if(shuffle_playback) playtree->flags|=PLAY_TREE_RND; else playtree->flags&=~PLAY_TREE_RND; playtree = play_tree_cleanup(playtree); @@ -2090,7 +2094,7 @@ if(!filename){ // no file/vcd/dvd -> show HELP: MSG_INFO("%s",help_text); - exit(0); + return 0; } // Many users forget to include command line in bugreports... Modified: mplayerxp/postproc/Makefile =================================================================== --- mplayerxp/postproc/Makefile 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/postproc/Makefile 2009-12-06 12:01:50 UTC (rev 19) @@ -6,7 +6,7 @@ SUBDIRS=libmenu DO_MAKE = @ for i in $(SUBDIRS); do $(MAKE) -C $$i $@ || exit; done -SRCS=postprocess.c swscale.c rgb2rgb.c yuv2rgb.c +SRCS=postprocess.c swscale.c SRCS+=af.c af_ao2.c af_crystality.c af_mp.c af_dummy.c af_delay.c af_channels.c af_format.c af_resample.c af_volume.c af_equalizer.c af_tools.c af_comp.c af_gate.c af_pan.c af_surround.c af_sub.c af_export.c af_volnorm.c af_extrastereo.c af_lp.c af_dyn.c af_echo3d.c af_hrtf.c af_ffenc.c af_raw.c af_karaoke.c af_center.c af_sinesuppress.c af_scaletempo.c SRCS+=dsp.c SRCS+=vf.c vf_vo.c vf_expand.c vf_format.c vf_yuy2.c vf_rgb2bgr.c vf_rotate.c vf_mirror.c vf_palette.c vf_test.c vf_noise.c vf_yvu9.c vf_rectangle.c vf_eq.c vf_dint.c vf_1bpp.c vf_unsharp.c vf_swapuv.c vf_il.c vf_smartblur.c vf_perspective.c vf_down3dright.c vf_denoise3d.c vf_aspect.c vf_softpulldown.c vf_delogo.c vf_yuvcsp.c vf_pp.c vf_scale.c vf_panscan.c vf_raw.c vf_ow.c vf_2xsai.c vf_framestep.c vf_menu.c Modified: mplayerxp/postproc/postprocess.c =================================================================== --- mplayerxp/postproc/postprocess.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/postproc/postprocess.c 2009-12-06 12:01:50 UTC (rev 19) @@ -30,12 +30,12 @@ static int load_dll(const char *libname) { if(!(dll_handle=ld_codec(libname,"http://ffmpeg.sf.net"))) return 0; - pp_postprocess_ptr=dlsym(dll_handle,"pp_postprocess"); - pp_get_context_ptr=dlsym(dll_handle,"pp_get_context"); - pp_free_context_ptr=dlsym(dll_handle,"pp_free_context"); - pp_get_mode_by_name_and_quality_ptr=dlsym(dll_handle,"pp_get_mode_by_name_and_quality"); - pp_free_mode_ptr=dlsym(dll_handle,"pp_free_mode"); - pp_help_ptr=dlsym(dll_handle,"pp_help"); + pp_postprocess_ptr=ld_sym(dll_handle,"pp_postprocess"); + pp_get_context_ptr=ld_sym(dll_handle,"pp_get_context"); + pp_free_context_ptr=ld_sym(dll_handle,"pp_free_context"); + pp_get_mode_by_name_and_quality_ptr=ld_sym(dll_handle,"pp_get_mode_by_name_and_quality"); + pp_free_mode_ptr=ld_sym(dll_handle,"pp_free_mode"); + pp_help_ptr=ld_sym(dll_handle,"pp_help"); return pp_postprocess_ptr && pp_get_context_ptr && pp_free_context_ptr && pp_get_mode_by_name_and_quality_ptr && pp_free_mode_ptr; } @@ -48,8 +48,8 @@ { MSG_ERR("Detected error during loading libpostproc"SLIBSUFFIX"! Try to upgrade this codec\n"); return 0; - } - return 1; + } + return 1; } extern void exit_player(char *); Deleted: mplayerxp/postproc/rgb2rgb.c =================================================================== --- mplayerxp/postproc/rgb2rgb.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/postproc/rgb2rgb.c 2009-12-06 12:01:50 UTC (rev 19) @@ -1,642 +0,0 @@ -/* - * - * rgb2rgb.c, Software RGB to RGB convertor - * pluralize by Software PAL8 to RGB convertor - * Software YUV to YUV convertor - * Software YUV to RGB convertor - * Written by Nickols_K. - * palette & yuv & runtime cpu stuff by Michael (mic...@gm...) (under GPL) - */ -#include <inttypes.h> -#include "../config.h" -#include "rgb2rgb.h" -#include "swscale.h" -#include "../mangle.h" -#include "../bswap.h" -#include "../libvo/fastmemcpy.h" -#include "../cpudetect.h" - -#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit - -rgb2rgb_f rgb24to32; -rgb2rgb_f rgb24to16; -rgb2rgb_f rgb24to15; -rgb2rgb_f rgb32to24; -rgb2rgb_f rgb32to16; -rgb2rgb_f rgb32to15; -rgb2rgb_f rgb15to16; -rgb2rgb_f rgb15to24; -rgb2rgb_f rgb15to32; -rgb2rgb_f rgb16to15; -rgb2rgb_f rgb16to24; -rgb2rgb_f rgb16to32; -rgb2rgb_f rgb24tobgr24; -rgb2rgb_f rgb24tobgr16; -rgb2rgb_f rgb24tobgr15; -rgb2rgb_f rgb32tobgr32; -rgb2rgb_f rgb32tobgr16; -rgb2rgb_f rgb32tobgr15; - -planar2packet_f yv12toyuy2; -planar2packet_f yv12touyvy; -planar2packet_f yuv422ptoyuy2; -packet2planar_f yuy2toyv12; -packet2planar_f rgb24toyv12; - -void (* __FASTCALL__ planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); -void (* __FASTCALL__ interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst, - unsigned width, unsigned height, int src1Stride, - int src2Stride, int dstStride); -void (* __FASTCALL__ vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - unsigned width, unsigned height, - int srcStride1, int srcStride2, - int dstStride1, int dstStride2); -void (* __FASTCALL__ yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - uint8_t *dst, - unsigned width, unsigned height, - int srcStride1, int srcStride2, - int srcStride3, int dstStride); -void (* __FASTCALL__ yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, - uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - unsigned int width, unsigned int height, int lumStride, int chromStride); -#if defined( ARCH_X86 ) || defined(ARCH_X86_64) -static const uint64_t mmx_null __attribute__((used)) __attribute__((aligned(8))) = 0x0000000000000000ULL; -static const uint64_t mmx_one __attribute__((used)) __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL; -static const uint64_t mask32b __attribute__((used)) __attribute__((aligned(8))) = 0x000000FF000000FFULL; -static const uint64_t mask32g __attribute__((used)) __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; -static const uint64_t mask32r __attribute__((used)) __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; -static const uint64_t mask32 __attribute__((used)) __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL; -static const uint64_t mask3216br __attribute__((used)) __attribute__((aligned(8)))=0x00F800F800F800F8ULL; -static const uint64_t mask3216g __attribute__((used)) __attribute__((aligned(8)))=0x0000FC000000FC00ULL; -static const uint64_t mask3215g __attribute__((used)) __attribute__((aligned(8)))=0x0000F8000000F800ULL; -static const uint64_t mul3216 __attribute__((used)) __attribute__((aligned(8))) = 0x2000000420000004ULL; -static const uint64_t mul3215 __attribute__((used)) __attribute__((aligned(8))) = 0x2000000820000008ULL; -static const uint64_t mask24b __attribute__((used)) __attribute__((aligned(8))) = 0x00FF0000FF0000FFULL; -static const uint64_t mask24g __attribute__((used)) __attribute__((aligned(8))) = 0xFF0000FF0000FF00ULL; -static const uint64_t mask24r __attribute__((used)) __attribute__((aligned(8))) = 0x0000FF0000FF0000ULL; -static const uint64_t mask24l __attribute__((used)) __attribute__((aligned(8))) = 0x0000000000FFFFFFULL; -static const uint64_t mask24h __attribute__((used)) __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL; -static const uint64_t mask24hh __attribute__((used)) __attribute__((aligned(8))) = 0xffff000000000000ULL; -static const uint64_t mask24hhh __attribute__((used)) __attribute__((aligned(8))) = 0xffffffff00000000ULL; -static const uint64_t mask24hhhh __attribute__((used)) __attribute__((aligned(8))) = 0xffffffffffff0000ULL; -static const uint64_t mask15b __attribute__((used)) __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ -static const uint64_t mask15rg __attribute__((used)) __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ -static const uint64_t mask15s __attribute__((used)) __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL; -static const uint64_t mask15g __attribute__((used)) __attribute__((aligned(8))) = 0x03E003E003E003E0ULL; -static const uint64_t mask15r __attribute__((used)) __attribute__((aligned(8))) = 0x7C007C007C007C00ULL; -#define mask16b mask15b -static const uint64_t mask16g __attribute__((used)) __attribute__((aligned(8))) = 0x07E007E007E007E0ULL; -static const uint64_t mask16r __attribute__((used)) __attribute__((aligned(8))) = 0xF800F800F800F800ULL; -static const uint64_t red_16mask __attribute__((used)) __attribute__((aligned(8))) = 0x0000f8000000f800ULL; -static const uint64_t green_16mask __attribute__((used)) __attribute__((aligned(8)))= 0x000007e0000007e0ULL; -static const uint64_t blue_16mask __attribute__((used)) __attribute__((aligned(8))) = 0x0000001f0000001fULL; -static const uint64_t red_15mask __attribute__((used)) __attribute__((aligned(8))) = 0x00007c000000f800ULL; -static const uint64_t green_15mask __attribute__((used)) __attribute__((aligned(8)))= 0x000003e0000007e0ULL; -static const uint64_t blue_15mask __attribute__((used)) __attribute__((aligned(8))) = 0x0000001f0000001fULL; - -#ifdef FAST_BGR2YV12 -static const uint64_t bgr2YCoeff __attribute__((used)) __attribute__((aligned(8))) = 0x000000210041000DULL; -static const uint64_t bgr2UCoeff __attribute__((used)) __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL; -static const uint64_t bgr2VCoeff __attribute__((used)) __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL; -#else -static const uint64_t bgr2YCoeff __attribute__((used)) __attribute__((aligned(8))) = 0x000020E540830C8BULL; -static const uint64_t bgr2UCoeff __attribute__((used)) __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL; -static const uint64_t bgr2VCoeff __attribute__((used)) __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL; -#endif -static const uint64_t bgr2YOffset __attribute__((used)) __attribute__((aligned(8))) = 0x1010101010101010ULL; -static const uint64_t bgr2UVOffset __attribute__((used)) __attribute__((aligned(8)))= 0x8080808080808080ULL; -static const uint64_t w1111 __attribute__((used)) __attribute__((aligned(8))) = 0x0001000100010001ULL; - -#if 0 -static volatile uint64_t __attribute__((aligned(8))) b5Dither; -static volatile uint64_t __attribute__((aligned(8))) g5Dither; -static volatile uint64_t __attribute__((aligned(8))) g6Dither; -static volatile uint64_t __attribute__((aligned(8))) r5Dither; - -static uint64_t __attribute__((aligned(8))) dither4[2]={ - 0x0103010301030103LL, - 0x0200020002000200LL,}; - -static uint64_t __attribute__((aligned(8))) dither8[2]={ - 0x0602060206020602LL, - 0x0004000400040004LL,}; -#endif -#endif - -#define RGB2YUV_SHIFT 8 -#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) -#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) -#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) -#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5)) -#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5)) -#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5)) -#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5)) -#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5)) -#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5)) - -//Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one -//Plain C versions -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#undef HAVE_SSE2 -#define RENAME(a) a ## _C -#include "rgb2rgb_template.c" - -#if defined( ARCH_X86 ) || defined(ARCH_X86_64) - -//MMX versions -#ifdef CAN_COMPILE_MMX -#undef RENAME -#define HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#undef HAVE_SSE2 -#define RENAME(a) a ## _MMX -#include "rgb2rgb_template.c" -#endif - -//MMX2 versions -#ifdef CAN_COMPILE_MMX2 -#undef RENAME -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW -#undef HAVE_SSE2 -#define RENAME(a) a ## _MMX2 -#include "rgb2rgb_template.c" -#endif - -//3DNOW versions -#ifdef CAN_COMPILE_3DNOW -#undef RENAME -#define HAVE_MMX -#undef HAVE_MMX2 -#define HAVE_3DNOW -#undef HAVE_SSE2 -#define RENAME(a) a ## _3DNOW -#include "rgb2rgb_template.c" -#endif - -#endif //ARCH_X86 - -/* - rgb15->rgb16 Original by Strepto/Astral - ported to gcc & bugfixed : A'rpi - MMX2, 3DNOW optimization by Nickols_K - 32bit c version, and and&add trick by Michael Niedermayer -*/ - -void sws_rgb2rgb_init(int flags){ -#if defined( ARCH_X86 ) || defined(ARCH_X86_64) -#ifdef CAN_COMPILE_MMX2 - if(flags & SWS_CPU_CAPS_MMX2){ - rgb15to16= rgb15to16_MMX2; - rgb15to24= rgb15to24_MMX2; - rgb15to32= rgb15to32_MMX2; - rgb16to24= rgb16to24_MMX2; - rgb16to32= rgb16to32_MMX2; - rgb16to15= rgb16to15_MMX2; - rgb24to16= rgb24to16_MMX2; - rgb24to15= rgb24to15_MMX2; - rgb24to32= rgb24to32_MMX2; - rgb32to16= rgb32to16_MMX2; - rgb32to15= rgb32to15_MMX2; - rgb32to24= rgb32to24_MMX2; - rgb24tobgr15= rgb24tobgr15_MMX2; - rgb24tobgr16= rgb24tobgr16_MMX2; - rgb24tobgr24= rgb24tobgr24_MMX2; - rgb32tobgr32= rgb32tobgr32_MMX2; - rgb32tobgr16= rgb32tobgr16_MMX2; - rgb32tobgr15= rgb32tobgr15_MMX2; - yv12toyuy2= yv12toyuy2_MMX2; - yv12touyvy= yv12touyvy_MMX2; - yuv422ptoyuy2= yuv422ptoyuy2_MMX2; - yuy2toyv12= yuy2toyv12_MMX2; -// uyvytoyv12= uyvytoyv12_MMX2; - yvu9toyv12= yvu9toyv12_MMX2; - planar2x= planar2x_MMX2; - rgb24toyv12= rgb24toyv12_MMX2; - interleaveBytes= interleaveBytes_MMX2; - vu9_to_vu12= vu9_to_vu12_MMX2; - yvu9_to_yuy2= yvu9_to_yuy2_MMX2; - }else -#endif -#ifdef CAN_COMPILE_3DNOW - if(flags & SWS_CPU_CAPS_3DNOW){ - rgb15to16= rgb15to16_3DNOW; - rgb15to24= rgb15to24_3DNOW; - rgb15to32= rgb15to32_3DNOW; - rgb16to24= rgb16to24_3DNOW; - rgb16to32= rgb16to32_3DNOW; - rgb16to15= rgb16to15_3DNOW; - rgb24to16= rgb24to16_3DNOW; - rgb24to15= rgb24to15_3DNOW; - rgb24to32= rgb24to32_3DNOW; - rgb32to16= rgb32to16_3DNOW; - rgb32to15= rgb32to15_3DNOW; - rgb32to24= rgb32to24_3DNOW; - rgb24tobgr15= rgb24tobgr15_3DNOW; - rgb24tobgr16= rgb24tobgr16_3DNOW; - rgb24tobgr24= rgb24tobgr24_3DNOW; - rgb32tobgr32= rgb32tobgr32_3DNOW; - rgb32tobgr16= rgb32tobgr16_3DNOW; - rgb32tobgr15= rgb32tobgr15_3DNOW; - yv12toyuy2= yv12toyuy2_3DNOW; - yv12touyvy= yv12touyvy_3DNOW; - yuv422ptoyuy2= yuv422ptoyuy2_3DNOW; - yuy2toyv12= yuy2toyv12_3DNOW; -// uyvytoyv12= uyvytoyv12_3DNOW; - yvu9toyv12= yvu9toyv12_3DNOW; - planar2x= planar2x_3DNOW; - rgb24toyv12= rgb24toyv12_3DNOW; - interleaveBytes= interleaveBytes_3DNOW; - vu9_to_vu12= vu9_to_vu12_3DNOW; - yvu9_to_yuy2= yvu9_to_yuy2_3DNOW; - }else -#endif -#ifdef CAN_COMPILE_MMX - if(flags & SWS_CPU_CAPS_MMX){ - rgb15to16= rgb15to16_MMX; - rgb15to24= rgb15to24_MMX; - rgb15to32= rgb15to32_MMX; - rgb16to24= rgb16to24_MMX; - rgb16to32= rgb16to32_MMX; - rgb16to15= rgb16to15_MMX; - rgb24to16= rgb24to16_MMX; - rgb24to15= rgb24to15_MMX; - rgb24to32= rgb24to32_MMX; - rgb32to16= rgb32to16_MMX; - rgb32to15= rgb32to15_MMX; - rgb32to24= rgb32to24_MMX; - rgb24tobgr15= rgb24tobgr15_MMX; - rgb24tobgr16= rgb24tobgr16_MMX; - rgb24tobgr24= rgb24tobgr24_MMX; - rgb32tobgr32= rgb32tobgr32_MMX; - rgb32tobgr16= rgb32tobgr16_MMX; - rgb32tobgr15= rgb32tobgr15_MMX; - yv12toyuy2= yv12toyuy2_MMX; - yv12touyvy= yv12touyvy_MMX; - yuv422ptoyuy2= yuv422ptoyuy2_MMX; - yuy2toyv12= yuy2toyv12_MMX; -// uyvytoyv12= uyvytoyv12_MMX; - yvu9toyv12= yvu9toyv12_MMX; - planar2x= planar2x_MMX; - rgb24toyv12= rgb24toyv12_MMX; - interleaveBytes= interleaveBytes_MMX; - vu9_to_vu12= vu9_to_vu12_MMX; - yvu9_to_yuy2= yvu9_to_yuy2_MMX; - }else -#endif -#endif - { - rgb15to16= rgb15to16_C; - rgb15to24= rgb15to24_C; - rgb15to32= rgb15to32_C; - rgb16to24= rgb16to24_C; - rgb16to32= rgb16to32_C; - rgb16to15= rgb16to15_C; - rgb24to16= rgb24to16_C; - rgb24to15= rgb24to15_C; - rgb24to32= rgb24to32_C; - rgb32to16= rgb32to16_C; - rgb32to15= rgb32to15_C; - rgb32to24= rgb32to24_C; - rgb24tobgr15= rgb24tobgr15_C; - rgb24tobgr16= rgb24tobgr16_C; - rgb24tobgr24= rgb24tobgr24_C; - rgb32tobgr32= rgb32tobgr32_C; - rgb32tobgr16= rgb32tobgr16_C; - rgb32tobgr15= rgb32tobgr15_C; - yv12toyuy2= yv12toyuy2_C; - yv12touyvy= yv12touyvy_C; - yuv422ptoyuy2= yuv422ptoyuy2_C; - yuy2toyv12= yuy2toyv12_C; -// uyvytoyv12= uyvytoyv12_C; - yvu9toyv12= yvu9toyv12_C; - planar2x= planar2x_C; - rgb24toyv12= rgb24toyv12_C; - interleaveBytes= interleaveBytes_C; - vu9_to_vu12= vu9_to_vu12_C; - yvu9_to_yuy2= yvu9_to_yuy2_C; - } -} - -/** - * Pallete is assumed to contain bgr32 - */ -void __FASTCALL__ palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; - -/* - for(i=0; i<num_pixels; i++) - ((unsigned *)dst)[i] = ((unsigned *)palette)[ src[i] ]; -*/ - - for(i=0; i<num_pixels; i++) - { -#ifdef WORDS_BIGENDIAN - dst[3]= palette[ src[i]*4+2 ]; - dst[2]= palette[ src[i]*4+1 ]; - dst[1]= palette[ src[i]*4+0 ]; -#else - //FIXME slow? - dst[0]= palette[ src[i]*4+2 ]; - dst[1]= palette[ src[i]*4+1 ]; - dst[2]= palette[ src[i]*4+0 ]; -// dst[3]= 0; /* do we need this cleansing? */ -#endif - dst+= 4; - } -} - -void __FASTCALL__ palette8tobgr32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; - for(i=0; i<num_pixels; i++) - { -#ifdef WORDS_BIGENDIAN - dst[3]= palette[ src[i]*4+0 ]; - dst[2]= palette[ src[i]*4+1 ]; - dst[1]= palette[ src[i]*4+2 ]; -#else - //FIXME slow? - dst[0]= palette[ src[i]*4+0 ]; - dst[1]= palette[ src[i]*4+1 ]; - dst[2]= palette[ src[i]*4+2 ]; -// dst[3]= 0; /* do we need this cleansing? */ -#endif - dst+= 4; - } -} - -/** - * Pallete is assumed to contain bgr32 - */ -void __FASTCALL__ palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; -/* - writes 1 byte o much and might cause alignment issues on some architectures? - for(i=0; i<num_pixels; i++) - ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ]; -*/ - for(i=0; i<num_pixels; i++) - { - //FIXME slow? - dst[0]= palette[ src[i]*4+2 ]; - dst[1]= palette[ src[i]*4+1 ]; - dst[2]= palette[ src[i]*4+0 ]; - dst+= 3; - } -} - -void __FASTCALL__ palette8tobgr24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; -/* - writes 1 byte o much and might cause alignment issues on some architectures? - for(i=0; i<num_pixels; i++) - ((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[ src[i] ]; -*/ - for(i=0; i<num_pixels; i++) - { - //FIXME slow? - dst[0]= palette[ src[i]*4+0 ]; - dst[1]= palette[ src[i]*4+1 ]; - dst[2]= palette[ src[i]*4+2 ]; - dst+= 3; - } -} - -/** - * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette - */ -void __FASTCALL__ palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; - for(i=0; i<num_pixels; i++) - { - unsigned char r,g,b; - b=palette[src[i]*4+0]; - g=palette[src[i]*4+1]; - r=palette[src[i]*4+2]; - ((uint16_t *)dst)[i]=((b&0xF8)<<8)+((g&0xFC)<<3)+(r>>3); - } -} -void __FASTCALL__ palette8tobgr16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; - for(i=0; i<num_pixels; i++) - { - unsigned char r,g,b; - r=palette[src[i]*4+0]; - g=palette[src[i]*4+1]; - b=palette[src[i]*4+2]; - ((uint16_t *)dst)[i]=((b&0xF8)<<8)+((g&0xFC)<<3)+(r>>3); - } -} - -/** - * Pallete is assumed to contain bgr15, see rgb32to15 to convert the palette - */ -void __FASTCALL__ palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; - for(i=0; i<num_pixels; i++) - { - unsigned char r,g,b; - b=palette[src[i]*4+0]; - g=palette[src[i]*4+1]; - r=palette[src[i]*4+2]; - ((uint16_t *)dst)[i]=((b&0xF8)<<7)+((g&0xF8)<<2)+(r>>3); - } -} -void __FASTCALL__ palette8tobgr15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette) -{ - unsigned i; - for(i=0; i<num_pixels; i++) - { - unsigned char r,g,b; - r=palette[src[i]*4+0]; - g=palette[src[i]*4+1]; - b=palette[src[i]*4+2]; - ((uint16_t *)dst)[i]=((b&0xF8)<<7)+((g&0xF8)<<2)+(r>>3); - } -} - -void __FASTCALL__ rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - unsigned num_pixels = src_size >> 2; - for(i=0; i<num_pixels; i++) - { - dst[3*i + 0] = src[4*i + 2]; - dst[3*i + 1] = src[4*i + 1]; - dst[3*i + 2] = src[4*i + 0]; - } -} - -void __FASTCALL__ rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - for(i=0; 3*i<src_size; i++) - { - dst[4*i + 0] = src[3*i + 2]; - dst[4*i + 1] = src[3*i + 1]; - dst[4*i + 2] = src[3*i + 0]; - dst[4*i + 3] = 0; - } -} - -void __FASTCALL__ rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - const uint16_t *end; - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (uint16_t *)src; - end = s + src_size/2; - while(s < end) - { - register uint16_t bgr; - bgr = *s++; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; - *d++ = 0; - } -} - -void __FASTCALL__ rgb16tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - const uint16_t *end; - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (const uint16_t *)src; - end = s + src_size/2; - while(s < end) - { - register uint16_t bgr; - bgr = *s++; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; - } -} - -void __FASTCALL__ rgb16tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - unsigned num_pixels = src_size >> 1; - - for(i=0; i<num_pixels; i++) - { - unsigned b,g,r; - register uint16_t rgb; - rgb = src[2*i]; - r = rgb&0x1F; - g = (rgb&0x7E0)>>5; - b = (rgb&0xF800)>>11; - dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11); - } -} - -void __FASTCALL__ rgb16tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - unsigned num_pixels = src_size >> 1; - - for(i=0; i<num_pixels; i++) - { - unsigned b,g,r; - register uint16_t rgb; - rgb = src[2*i]; - r = rgb&0x1F; - g = (rgb&0x7E0)>>5; - b = (rgb&0xF800)>>11; - dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10); - } -} - -void __FASTCALL__ rgb15tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - const uint16_t *end; - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (const uint16_t *)src; - end = s + src_size/2; - while(s < end) - { - register uint16_t bgr; - bgr = *s++; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; - *d++ = 0; - } -} - -void __FASTCALL__ rgb15tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - const uint16_t *end; - uint8_t *d = (uint8_t *)dst; - const uint16_t *s = (uint16_t *)src; - end = s + src_size/2; - while(s < end) - { - register uint16_t bgr; - bgr = *s++; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; - } -} - -void __FASTCALL__ rgb15tobgr16(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - unsigned num_pixels = src_size >> 1; - - for(i=0; i<num_pixels; i++) - { - unsigned b,g,r; - register uint16_t rgb; - rgb = src[2*i]; - r = rgb&0x1F; - g = (rgb&0x3E0)>>5; - b = (rgb&0x7C00)>>10; - dst[2*i] = (b&0x1F) | ((g&0x3F)<<5) | ((r&0x1F)<<11); - } -} - -void __FASTCALL__ rgb15tobgr15(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - unsigned num_pixels = src_size >> 1; - - for(i=0; i<num_pixels; i++) - { - unsigned b,g,r; - register uint16_t rgb; - rgb = src[2*i]; - r = rgb&0x1F; - g = (rgb&0x3E0)>>5; - b = (rgb&0x7C00)>>10; - dst[2*i] = (b&0x1F) | ((g&0x1F)<<5) | ((r&0x1F)<<10); - } -} - -void __FASTCALL__ rgb8tobgr8(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - unsigned i; - unsigned num_pixels = src_size; - for(i=0; i<num_pixels; i++) - { - unsigned b,g,r; - register uint8_t rgb; - rgb = src[i]; - r = (rgb&0x07); - g = (rgb&0x38)>>3; - b = (rgb&0xC0)>>6; - dst[i] = ((b<<1)&0x07) | ((g&0x07)<<3) | ((r&0x03)<<6); - } -} Deleted: mplayerxp/postproc/rgb2rgb.h =================================================================== --- mplayerxp/postproc/rgb2rgb.h 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/postproc/rgb2rgb.h 2009-12-06 12:01:50 UTC (rev 19) @@ -1,134 +0,0 @@ -/* - * - * rgb2rgb.h, Software RGB to RGB convertor - * pluralize by Software PAL8 to RGB convertor - * Software YUV to YUV convertor - * Software YUV to RGB convertor - */ - -#ifndef RGB2RGB_INCLUDED -#define RGB2RGB_INCLUDED - -// Note: do not fix the dependence on stdio.h - -/* A full collection of rgb to rgb(bgr) convertors */ -typedef void (* __FASTCALL__ rgb2rgb_f)(const uint8_t *src,uint8_t *dst,unsigned src_size); - -extern rgb2rgb_f rgb24to32; -extern rgb2rgb_f rgb24to16; -extern rgb2rgb_f rgb24to15; -extern rgb2rgb_f rgb32to24; -extern rgb2rgb_f rgb32to16; -extern rgb2rgb_f rgb32to15; -extern rgb2rgb_f rgb15to16; -extern rgb2rgb_f rgb15to24; -extern rgb2rgb_f rgb15to32; -extern rgb2rgb_f rgb16to15; -extern rgb2rgb_f rgb16to24; -extern rgb2rgb_f rgb16to32; -extern rgb2rgb_f rgb24tobgr24; -extern rgb2rgb_f rgb24tobgr16; -extern rgb2rgb_f rgb24tobgr15; -extern rgb2rgb_f rgb32tobgr32; -extern rgb2rgb_f rgb32tobgr16; -extern rgb2rgb_f rgb32tobgr15; - -extern void __FASTCALL__ rgb24tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb32tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb16tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb16tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb16tobgr16(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb16tobgr15(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb15tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb15tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb15tobgr16(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb15tobgr15(const uint8_t *src, uint8_t *dst, unsigned src_size); -extern void __FASTCALL__ rgb8tobgr8(const uint8_t *src, uint8_t *dst, unsigned src_size); - - -extern void __FASTCALL__ palette8torgb32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8tobgr32(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8torgb24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8tobgr24(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8torgb16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8tobgr16(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8torgb15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); -extern void __FASTCALL__ palette8tobgr15(const uint8_t *src, uint8_t *dst, unsigned num_pixels, const uint8_t *palette); - -/** - * - * height should be a multiple of 2 and width should be a multiple of 16 (if this is a - * problem for anyone then tell me, and ill fix it) - * chrominance data is only taken from every secound line others are ignored FIXME write HQ version - */ -//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - -/** - * - * height should be a multiple of 2 and width should be a multiple of 16 (if this is a - * problem for anyone then tell me, and ill fix it) - */ - -typedef void (* __FASTCALL__ planar2packet_f)(const uint8_t *ysrc, const uint8_t *usrc, - const uint8_t *vsrc, uint8_t *dst, - unsigned int width, unsigned int height, - int lumStride, int chromStride, int dstStride); -typedef void (* __FASTCALL__ packet2planar_f)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - unsigned int width, unsigned int height, - int lumStride, int chromStride, int srcStride); -/** - * - * width should be a multiple of 16 - */ -extern planar2packet_f yv12toyuy2; -extern planar2packet_f yuv422ptoyuy2; - -/** - * - * height should be a multiple of 2 and width should be a multiple of 16 (if this is a - * problem for anyone then tell me, and ill fix it) - */ -extern packet2planar_f yuy2toyv12; - -/** - * - * height should be a multiple of 2 and width should be a multiple of 16 (if this is a - * problem for anyone then tell me, and ill fix it) - */ -extern planar2packet_f yv12touyvy; - -/** - * - * height should be a multiple of 2 and width should be a multiple of 2 (if this is a - * problem for anyone then tell me, and ill fix it) - * chrominance data is only taken from every secound line others are ignored FIXME write HQ version - */ -extern packet2planar_f rgb24toyv12; -extern void (* __FASTCALL__ planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, int srcStride, int dstStride); - -extern void (* __FASTCALL__ interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dst, - unsigned width, unsigned height, int src1Stride, - int src2Stride, int dstStride); - -extern void (* __FASTCALL__ yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, - uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - unsigned int width, unsigned int height, int lumStride, int chromStride); - -extern void (* __FASTCALL__ vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, - uint8_t *dst1, uint8_t *dst2, - unsigned width, unsigned height, - int srcStride1, int srcStride2, - int dstStride1, int dstStride2); - -extern void (* __FASTCALL__ yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, - uint8_t *dst, - unsigned width, unsigned height, - int srcStride1, int srcStride2, - int srcStride3, int dstStride); - -void sws_rgb2rgb_init(int flags); - -#define MODE_RGB 0x1 -#define MODE_BGR 0x2 - -#endif Deleted: mplayerxp/postproc/rgb2rgb_template.c =================================================================== --- mplayerxp/postproc/rgb2rgb_template.c 2009-12-06 09:53:07 UTC (rev 18) +++ mplayerxp/postproc/rgb2rgb_template.c 2009-12-06 12:01:50 UTC (rev 19) @@ -1,3202 +0,0 @@ -/* - * - * rgb2rgb.c, Software RGB to RGB convertor - * pluralize by Software PAL8 to RGB convertor - * Software YUV to YUV convertor - * Software YUV to RGB convertor - * Written by Nickols_K. - * palette & yuv & runtime cpu stuff by Michael (mic...@gm...) (under GPL) - * lot of big-endian byteorder fixes by Alex Beregszaszi - */ - -#include <stddef.h> -#include <inttypes.h> /* for __WORDSIZE */ - -#ifndef __WORDSIZE -// #warning You have misconfigured system and probably will lose performance! -#define __WORDSIZE MP_WORDSIZE -#endif - -#undef PREFETCH -#undef MOVNTQ -#undef EMMS -#undef SFENCE -#undef MMREG_SIZE -#undef PREFETCHW -#undef PAVGB - -#ifdef HAVE_SSE2 -#define MMREG_SIZE 16 -#else -#define MMREG_SIZE 8 -#endif - -#ifdef HAVE_3DNOW -#define PREFETCH "prefetch" -#define PREFETCHW "prefetchw" -#define PAVGB "pavgusb" -#elif defined ( HAVE_MMX2 ) -#define PREFETCH "prefetchnta" -#define PREFETCHW "prefetcht0" -#define PAVGB "pavgb" -#else -#define PREFETCH "/nop" -#define PREFETCHW "/nop" -#endif - -#ifdef HAVE_3DNOW -/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#ifdef HAVE_MMX2 -#define MOVNTQ "movntq" -#define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE "/nop" -#endif - -static void __FASTCALL__ RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 23; - __asm __volatile("movq %0, %%mm7"::"m"(mask32) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "punpckldq 3%1, %%mm0\n\t" - "movd 6%1, %%mm1\n\t" - "punpckldq 9%1, %%mm1\n\t" - "movd 12%1, %%mm2\n\t" - "punpckldq 15%1, %%mm2\n\t" - "movd 18%1, %%mm3\n\t" - "punpckldq 21%1, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm1\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm3\n\t" - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm1, 8%0\n\t" - MOVNTQ" %%mm2, 16%0\n\t" - MOVNTQ" %%mm3, 24%0" - :"=m"(*dest) - :"m"(*s) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - dest += 32; - s += 24; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { -#ifdef WORDS_BIGENDIAN - *dest++ = 0; - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; -#else - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - *dest++ = 0; -#endif - } -} - -static void __FASTCALL__ RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned src_size) -{ - uint8_t *dest = dst; - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); - mm_end = end - 31; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq 8%1, %%mm1\n\t" - "movq 16%1, %%mm4\n\t" - "movq 24%1, %%mm5\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm1, %%mm3\n\t" - "movq %%mm4, %%mm6\n\t" - "movq %%mm5, %%mm7\n\t" - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm3\n\t" - "psrlq $8, %%mm6\n\t" - "psrlq $8, %%mm7\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm1\n\t" - "pand %2, %%mm4\n\t" - "pand %2, %%mm5\n\t" - "pand %3, %%mm2\n\t" - "pand %3, %%mm3\n\t" - "pand %3, %%mm6\n\t" - "pand %3, %%mm7\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm3, %%mm1\n\t" - "por %%mm6, %%mm4\n\t" - "por %%mm7, %%mm5\n\t" - - "movq %%mm1, %%mm2\n\t" - "movq %%mm4, %%mm3\n\t" - "psllq $48, %%mm2\n\t" - "psllq $32, %%mm3\n\t" - "pand %4, %%mm2\n\t" - "pand %5, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "psrlq $16, %%mm1\n\t" - "psrlq $32, %%mm4\n\t" - "psllq $16, %%mm5\n\t" - "por %%mm3, %%mm1\n\t" - "pand %6, %%mm5\n\t" - "por %%mm5, %%mm4\n\t" - - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm1, 8%0\n\t" - MOVNTQ" %%mm4, 16%0" - :"=m"(*dest) - :"m"(*s),"m"(mask24l), - "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - dest += 24; - s += 32; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { -#ifdef WORDS_BIGENDIAN - s++; - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; -#else - *dest++ = *s++; - *dest++ = *s++; - *dest++ = *s++; - s++; -#endif - } -} - -/* - Original by Strepto/Astral - ported to gcc & bugfixed : A'rpi - MMX2, 3DNOW optimization by Nickols_K - 32bit c version, and and&add trick by Michael Niedermayer -*/ -static void __FASTCALL__ RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) -{ - register const uint8_t* s=src; - register uint8_t* d=dst; - register const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s)); - __asm __volatile("movq %0, %%mm4"::"m"(mask15s) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 15; - while(s<mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq 8%1, %%mm2\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm2, %%mm3\n\t" - "pand %%mm4, %%mm0\n\t" - "pand %%mm4, %%mm2\n\t" - "paddw %%mm1, %%mm0\n\t" - "paddw %%mm3, %%mm2\n\t" - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d+=16; - s+=16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - mm_end = end - 3; - while(s < mm_end) - { - register unsigned x= *((uint32_t *)s); - *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); - d+=4; - s+=4; - } - if(s < end) - { - register unsigned short x= *((uint16_t *)s); - *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); - } -} - -static void __FASTCALL__ RENAME(rgb16to15)(const uint8_t *src,uint8_t *dst,unsigned src_size) -{ - register const uint8_t* s=src; - register uint8_t* d=dst; - register const uint8_t *end; - const uint8_t *mm_end; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*s)); - __asm __volatile( "movq %0, %%mm7\n\t" - "movq %1, %%mm6"::"m"(mask15rg),"m"(mask15b) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 15; - while(s<mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movq %1, %%mm0\n\t" - "movq 8%1, %%mm2\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm2, %%mm3\n\t" - "psrlq $1, %%mm0\n\t" - "psrlq $1, %%mm2\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm3\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm3, %%mm2\n\t" - MOVNTQ" %%mm0, %0\n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d+=16; - s+=16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - mm_end = end - 3; - while(s < mm_end) - { - register uint32_t x= *((uint32_t *)s); - *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); - s+=4; - d+=4; - } - if(s < end) - { - register uint16_t x= *((uint16_t *)s); - *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); - s+=2; - d+=2; - } -} - -static void __FASTCALL__ RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) - asm volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - ".balign 16 \n\t" - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $5, %%mm0 \n\t" - "pslld $11, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) - : "memory", "cc" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#else - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 16; - } -#endif - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - // FIXME on bigendian - const int src= *s; s += 4; - *d++ = ((src&0xFF)>>3) + ((src&0xFC00)>>5) + ((src&0xF80000)>>8); -// *d++ = ((src>>3)&0x1F) + ((src>>5)&0x7E0) + ((src>>8)&0xF800); - } -} - -static void __FASTCALL__ RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $8, %%mm0\n\t" - "psllq $8, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - // FIXME on bigendian - const int src= *s; s += 4; - *d++ = ((src&0xF8)<<8) + ((src&0xFC00)>>5) + ((src&0xF80000)>>19); - } -} - -static void __FASTCALL__ RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) - asm volatile( - "movq %3, %%mm5 \n\t" - "movq %4, %%mm6 \n\t" - "movq %5, %%mm7 \n\t" - ".balign 16 \n\t" - "1: \n\t" - PREFETCH" 32(%1) \n\t" - "movd (%1), %%mm0 \n\t" - "movd 4(%1), %%mm3 \n\t" - "punpckldq 8(%1), %%mm0 \n\t" - "punpckldq 12(%1), %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm6, %%mm0 \n\t" - "pand %%mm6, %%mm3 \n\t" - "pmaddwd %%mm7, %%mm0 \n\t" - "pmaddwd %%mm7, %%mm3 \n\t" - "pand %%mm5, %%mm1 \n\t" - "pand %%mm5, %%mm4 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "psrld $6, %%mm0 \n\t" - "pslld $10, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, (%0) \n\t" - "add $16, %1 \n\t" - "add $8, %0 \n\t" - "cmp %2, %1 \n\t" - " jb 1b \n\t" - : "+r" (d), "+r"(s) - : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) - : "memory", "cc" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#else - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $9, %%mm2\n\t" - "psrlq $9, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 16; - } -#endif - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - // FIXME on bigendian - const int src= *s; s += 4; - *d++ = ((src&0xFF)>>3) + ((src&0xF800)>>6) + ((src&0xF80000)>>9); - } -} - -static void __FASTCALL__ RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 4%1, %%mm3\n\t" - "punpckldq 8%1, %%mm0\n\t" - "punpckldq 12%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $7, %%mm0\n\t" - "psllq $7, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 16; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - // FIXME on bigendian - const int src= *s; s += 4; - *d++ = ((src&0xF8)<<7) + ((src&0xF800)>>6) + ((src&0xF80000)>>19); - } -} - -static void __FASTCALL__ RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 11; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $8, %%mm2\n\t" - "psrlq $8, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - const int b= *s++; - const int g= *s++; - const int r= *s++; - *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -} - -static void __FASTCALL__ RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, unsigned int src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_16mask),"m"(green_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $8, %%mm0\n\t" - "psllq $8, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $5, %%mm1\n\t" - "psrlq $5, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - const int r= *s++; - const int g= *s++; - const int b= *s++; - *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); - } -} - -static void __FASTCALL__ RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 11; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psrlq $3, %%mm0\n\t" - "psrlq $3, %%mm3\n\t" - "pand %2, %%mm0\n\t" - "pand %2, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $9, %%mm2\n\t" - "psrlq $9, %%mm5\n\t" - "pand %%mm7, %%mm2\n\t" - "pand %%mm7, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - const int b= *s++; - const int g= *s++; - const int r= *s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -} - -static void __FASTCALL__ RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, unsigned src_size) -{ - const uint8_t *s = src; - const uint8_t *end; -#ifdef HAVE_MMX - const uint8_t *mm_end; -#endif - uint16_t *d = (uint16_t *)dst; - end = s + src_size; -#ifdef HAVE_MMX - __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm __volatile( - "movq %0, %%mm7\n\t" - "movq %1, %%mm6\n\t" - ::"m"(red_15mask),"m"(green_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - mm_end = end - 15; - while(s < mm_end) - { - __asm __volatile( - PREFETCH" 32%1\n\t" - "movd %1, %%mm0\n\t" - "movd 3%1, %%mm3\n\t" - "punpckldq 6%1, %%mm0\n\t" - "punpckldq 9%1, %%mm3\n\t" - "movq %%mm0, %%mm1\n\t" - "movq %%mm0, %%mm2\n\t" - "movq %%mm3, %%mm4\n\t" - "movq %%mm3, %%mm5\n\t" - "psllq $7, %%mm0\n\t" - "psllq $7, %%mm3\n\t" - "pand %%mm7, %%mm0\n\t" - "pand %%mm7, %%mm3\n\t" - "psrlq $6, %%mm1\n\t" - "psrlq $6, %%mm4\n\t" - "pand %%mm6, %%mm1\n\t" - "pand %%mm6, %%mm4\n\t" - "psrlq $19, %%mm2\n\t" - "psrlq $19, %%mm5\n\t" - "pand %2, %%mm2\n\t" - "pand %2, %%mm5\n\t" - "por %%mm1, %%mm0\n\t" - "por %%mm4, %%mm3\n\t" - "por %%mm2, %%mm0\n\t" - "por %%mm5, %%mm3\n\t" - "psllq $16, %%mm3\n\t" - "por %%mm3, %%mm0\n\t" - MOVNTQ" %%mm0, %0\n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask) - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - d += 4; - s += 12; - } - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - while(s < end) - { - const int r= *s++; - const int g= *s++; - const int b= *s++; - *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); - } -} - -/* - I use here less accurate approximation by simply - left-shifti... [truncated message content] |