[Mplayerxp-cvslog] SF.net SVN: mplayerxp:[107] mplayerxp
Brought to you by:
olov
From: <nic...@us...> - 2010-01-18 15:55:32
|
Revision: 107 http://mplayerxp.svn.sourceforge.net/mplayerxp/?rev=107&view=rev Author: nickols_k Date: 2010-01-18 15:55:23 +0000 (Mon, 18 Jan 2010) Log Message: ----------- fastmemcpy-related improvements Modified Paths: -------------- DOCS/mplayerxp.1 mplayerxp/cfg-mplayer.h mplayerxp/dec_ahead.h mplayerxp/libmpcodecs/ad_mp3.c mplayerxp/libmpcodecs/dec_video.c mplayerxp/libmpcodecs/vd_ffmpeg.c mplayerxp/libvo/aclib.c mplayerxp/libvo/aclib_template.c mplayerxp/libvo/dri_vo.h mplayerxp/libvo/fastmemcpy.h mplayerxp/libvo/osd.c mplayerxp/libvo/osd.h mplayerxp/libvo/osd_template.c mplayerxp/libvo/video_out.c mplayerxp/libvo/video_out.h mplayerxp/libvo/vo_x11.c mplayerxp/libvo/vosub_vidix.c mplayerxp/mp_image.h mplayerxp/postproc/af_export.c mplayerxp/postproc/af_scaletempo.c mplayerxp/postproc/libmenu/menu.c mplayerxp/postproc/vf_delogo.c mplayerxp/postproc/vf_dint.c mplayerxp/postproc/vf_down3dright.c mplayerxp/postproc/vf_expand.c mplayerxp/postproc/vf_il.c mplayerxp/postproc/vf_mirror.c mplayerxp/postproc/vf_noise.c mplayerxp/postproc/vf_panscan.c mplayerxp/postproc/vf_rectangle.c mplayerxp/postproc/vf_scale.c mplayerxp/postproc/vf_softpulldown.c mplayerxp/postproc/vf_unsharp.c mplayerxp/postproc/vf_vo.c mplayerxp/postproc/vf_yvu9.c Removed Paths: ------------- mplayerxp/libvo/aclib_x86_64.h Modified: DOCS/mplayerxp.1 =================================================================== --- DOCS/mplayerxp.1 2010-01-17 18:46:44 UTC (rev 106) +++ DOCS/mplayerxp.1 2010-01-18 15:55:23 UTC (rev 107) @@ -210,7 +210,7 @@ .TP modprobe dhahelper .TP -mplayerxp -vo xvidix -core.xp -video.bm -video.fs -video.zoom videoout.avi +mplayerxp -vo xvidix -core.xp -video.bm -video.fs -video.aspect-ratio videoout.avi .SS .I Another ways to speedup playback: In general, there are two ways to increase performance of playback - @@ -666,13 +666,13 @@ -video.monitorpixelaspect=4:3 or 1.3333 .TP -.B \-video.x\ <x> -scale image to x width (if driver supports) +.B \-video.width\ <x> +scale image to width (if driver supports) .TP -.B \-video.y\ <y> -scale image to y height (if driver supports) +.B \-video.height\ <y> +scale image to height (if driver supports) .TP -.B \-video.xy\ <factor> +.B \-video.zoom\ <factor> scale image by <factor> .TP @@ -717,7 +717,7 @@ try to change to a different video mode. dga2, x11 (XF86VidMode) and sdl output drivers support it. .TP -.B \-video.zoom +.B \-video.aspect-ratio Keeps aspect ratio on the screen .I [default] .TP @@ -809,7 +809,7 @@ .TP .B \-video.sws\ <software\ scaler\ type> this option sets the quality (and speed, respectively) of the software scaler, -with the -zoom option. For example with x11 or other outputs which lack +with the -video.aspect-ratio option. For example with x11 or other outputs which lack hardware scaler. Possible settings are: 0 - fast bilinear (default) Modified: mplayerxp/cfg-mplayer.h =================================================================== --- mplayerxp/cfg-mplayer.h 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/cfg-mplayer.h 2010-01-18 15:55:23 UTC (rev 107) @@ -132,12 +132,12 @@ */ static const config_t xpcore_config[]={ - {"xp", &enable_xp, CONF_TYPE_INT, CONF_RANGE, 0, 4, NULL, "starts MPlayerXP in multi-thread and multi-buffer XP mode"}, - {"noxp", &enable_xp, CONF_TYPE_FLAG, 0, 1, 0, NULL, "starts MPlayerXP in single-thread mode"}, + {"xp", &enable_xp, CONF_TYPE_INT, CONF_RANGE, 0, 4, NULL, "starts MPlayerXP in multi-thread and multi-buffer XP mode"}, + {"noxp", &enable_xp, CONF_TYPE_FLAG, 0, 1, 0, NULL, "starts MPlayerXP in single-thread mode"}, {"dump", &stream_dump, CONF_TYPE_STRING, 0, 0, 0, NULL, "specifies dump type and name for the dump of stream"}, - {"gomp", &enable_gomp, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables usage of OpenMP extensions"}, - {"nogomp", &enable_gomp, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables usage of OpenMP extensions"}, - {"da_buffs", &vo_da_buffs, CONF_TYPE_INT, CONF_RANGE, 4, 1024, NULL, "specifies number of buffers for decoding-ahead in XP mode"}, + {"gomp", &enable_gomp, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables usage of OpenMP extensions"}, + {"nogomp", &enable_gomp, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables usage of OpenMP extensions"}, + {"da_buffs", &vo_da_buffs, CONF_TYPE_INT, CONF_RANGE, 4, 1024, NULL, "specifies number of buffers for decoding-ahead in XP mode"}, {"double", &vo_doublebuffering, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables double-buffering for single-thread decoding"}, {"nodouble", &vo_doublebuffering, CONF_TYPE_FLAG, 0, 1, 0, NULL, "enables single-buffer for single-thread decoding"}, {"cache", &stream_cache_size, CONF_TYPE_INT, CONF_RANGE, 4, 65536, NULL,"specifies amount of memory for precaching a file/URL"}, @@ -168,32 +168,32 @@ #if defined( ARCH_X86 ) || defined(ARCH_X86_64) static const config_t cpu_config[]={ - {"mmx", &x86_mmx, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of MMX extensions of CPU"}, - {"nommx", &x86_mmx, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of MMX extensions of CPU"}, - {"mmx2", &x86_mmx2, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of MMX2 extensions of CPU"}, - {"nommx2", &x86_mmx2, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of MMX2 extensions of CPU"}, - {"3dnow", &x86_3dnow, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of 3DNow! extensions of CPU"}, - {"no3dnow", &x86_3dnow, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of 3DNow! extensions of CPU"}, - {"3dnow2", &x86_3dnow2, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of 3DNow-2! extensions of CPU"}, - {"no3dnow2", &x86_3dnow2, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of 3DNow-2! extensions of CPU"}, - {"sse", &x86_sse, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE extensions of CPU"}, - {"nosse", &x86_sse, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE extensions of CPU"}, - {"sse2", &x86_sse2, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE2 extensions of CPU"}, - {"nosse2", &x86_sse2, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE2 extensions of CPU"}, - {"sse3", &x86_sse3, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE3 extensions of CPU"}, - {"nosse3", &x86_sse3, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE3 extensions of CPU"}, - {"ssse3", &x86_ssse3, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSSE3 extensions of CPU"}, - {"nossse3", &x86_ssse3, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSSE3 extensions of CPU"}, - {"sse41", &x86_sse41, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE41 extensions of CPU"}, - {"nosse41", &x86_sse41, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE41 extensions of CPU"}, - {"sse42", &x86_sse42, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE42 extensions of CPU"}, - {"nosse42", &x86_sse42, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE42 extensions of CPU"}, - {"aes", &x86_aes, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of AES extensions of CPU"}, - {"noaes", &x86_aes, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of AES extensions of CPU"}, - {"avx", &x86_avx, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of AVX extensions of CPU"}, - {"noavx", &x86_avx, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of AVX extensions of CPU"}, - {"fma", &x86_fma, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of FMA extensions of CPU"}, - {"nofma", &x86_fma, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of FMA extensions of CPU"}, + {"mmx", &x86_mmx, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of MMX extensions of CPU"}, + {"nommx", &x86_mmx, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of MMX extensions of CPU"}, + {"mmx2", &x86_mmx2, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of MMX2 extensions of CPU"}, + {"nommx2", &x86_mmx2, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of MMX2 extensions of CPU"}, + {"3dnow", &x86_3dnow, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of 3DNow! extensions of CPU"}, + {"no3dnow", &x86_3dnow, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of 3DNow! extensions of CPU"}, + {"3dnow2", &x86_3dnow2, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of 3DNow-2! extensions of CPU"}, + {"no3dnow2", &x86_3dnow2, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of 3DNow-2! extensions of CPU"}, + {"sse", &x86_sse, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE extensions of CPU"}, + {"nosse", &x86_sse, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE extensions of CPU"}, + {"sse2", &x86_sse2, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE2 extensions of CPU"}, + {"nosse2", &x86_sse2, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE2 extensions of CPU"}, + {"sse3", &x86_sse3, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE3 extensions of CPU"}, + {"nosse3", &x86_sse3, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE3 extensions of CPU"}, + {"ssse3", &x86_ssse3, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSSE3 extensions of CPU"}, + {"nossse3", &x86_ssse3, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSSE3 extensions of CPU"}, + {"sse41", &x86_sse41, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE41 extensions of CPU"}, + {"nosse41", &x86_sse41, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE41 extensions of CPU"}, + {"sse42", &x86_sse42, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of SSE42 extensions of CPU"}, + {"nosse42", &x86_sse42, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of SSE42 extensions of CPU"}, + {"aes", &x86_aes, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of AES extensions of CPU"}, + {"noaes", &x86_aes, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of AES extensions of CPU"}, + {"avx", &x86_avx, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of AVX extensions of CPU"}, + {"noavx", &x86_avx, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of AVX extensions of CPU"}, + {"fma", &x86_fma, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of FMA extensions of CPU"}, + {"nofma", &x86_fma, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of FMA extensions of CPU"}, {NULL, NULL, 0, 0, 0, 0, NULL,NULL}, }; #endif @@ -227,7 +227,6 @@ {"framedrop", &frame_dropping, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables frame-dropping on slow systems: decodes all video frames, but skips displaying some ones"}, /*UD*/ {"hardframedrop", &frame_dropping, CONF_TYPE_FLAG, 0, 0, 2, NULL, "enables hard frame-dropping on slow systems: skips displaying and decoding of some frames"}, {"noframedrop", &frame_dropping, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables frame dropping"}, - {"pts", &av_sync_pts, CONF_TYPE_FLAG, 0, 0, 1, NULL, "use PTS-based method of A/V synchronization"}, {"nopts", &av_sync_pts, CONF_TYPE_FLAG, 0, 1, 0, NULL, "use BPS-based method of A/V synchronization"}, {"dap", &dapsync, CONF_TYPE_FLAG, 0, 0, 1, NULL, "use alternative method of A/V synchronization"}, @@ -266,7 +265,7 @@ {"nounicode", &sub_unicode, CONF_TYPE_FLAG, 0, 1, 0, NULL, "tells MPlayerXP to handle the subtitle file as non-UNICODE"}, {"utf8", &sub_utf8, CONF_TYPE_FLAG, 0, 0, 1, NULL, "tells MPlayerXP to handle the subtitle file as UTF8"}, {"noutf8", &sub_utf8, CONF_TYPE_FLAG, 0, 1, 0, NULL, "tells MPlayerXP to handle the subtitle file as non-UTF8"}, - {"pos",&sub_pos, CONF_TYPE_INT, CONF_RANGE, 0, 100, NULL, "specifies vertical shift of subtitles"}, + {"pos",&sub_pos, CONF_TYPE_INT, CONF_RANGE, 0, 100, NULL, "specifies vertical shift of subtitles"}, #endif {"cc", &subcc_enabled, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enable DVD Closed Caption (CC) subtitles"}, {"nocc", &subcc_enabled, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disable DVD Closed Caption (CC) subtitles"}, @@ -291,7 +290,7 @@ static const config_t audio_config[]={ {"on", &has_audio, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables audio-steam playback"}, {"off", &has_audio, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables audio-stream playback"}, - {"mixer", &oss_mixer_device, CONF_TYPE_STRING, 0, 0, 0, NULL, "select audio-mixer device"}, + {"mixer", &oss_mixer_device, CONF_TYPE_STRING, 0, 0, 0, NULL, "select audio-mixer device"}, {"channels", &audio_output_channels, CONF_TYPE_INT, CONF_RANGE, 2, 8, NULL, "select number of audio output channels to be used"}, {"rate", &force_srate, CONF_TYPE_INT, CONF_RANGE, 1000, 8*48000, NULL, "specifies Hz for audio playback"}, {"lang", &audio_lang, CONF_TYPE_STRING, 0, 0, 0, NULL, "specifies language of DVD-audio stream as two-letter country code(s)"}, @@ -304,28 +303,28 @@ }; static const config_t video_config[]={ - {"x", &opt_screen_size_x, CONF_TYPE_INT, CONF_RANGE, 0, 4096, NULL, "scale output image to x width (if driver supports)"}, - {"y", &opt_screen_size_y, CONF_TYPE_INT, CONF_RANGE, 0, 4096, NULL, "scale output image to y height (if driver supports)"}, - {"xy", &screen_size_xy, CONF_TYPE_FLOAT, CONF_RANGE, 0, 4096, NULL, "scale output image by given factor"}, + {"width", &opt_screen_size_x, CONF_TYPE_INT, CONF_RANGE, 0, 4096, NULL, "scale output image to width (if driver supports)"}, + {"height", &opt_screen_size_y, CONF_TYPE_INT, CONF_RANGE, 0, 4096, NULL, "scale output image to height (if driver supports)"}, + {"zoom", &screen_size_xy, CONF_TYPE_FLOAT, CONF_RANGE, 0, 4096, NULL, "scale output image by given factor"}, {"screenw", &vo_screenwidth, CONF_TYPE_INT, CONF_RANGE, 0, 4096, NULL, "specifies the horizontal resolution of the screen (if supported)"}, {"screenh", &vo_screenheight, CONF_TYPE_INT, CONF_RANGE, 0, 4096, NULL, "specifies the vertical resolution of the screen (if supported)"}, {"speed", &playbackspeed_factor, CONF_TYPE_FLOAT, CONF_RANGE, 0.01, 100.0, NULL, "sets playback speed factor"}, {"aspect", &movie_aspect, CONF_TYPE_FLOAT, CONF_RANGE, 0.2, 3.0, NULL, "sets aspect-ratio of movies (autodetect)"}, {"noaspect", &movie_aspect, CONF_TYPE_FLAG, 0, 0, 0, NULL, "unsets aspect-ratio of movies"}, + {"aspect-ratio", &softzoom, CONF_TYPE_FLAG, 0, 0, 1, NULL, "keeps aspect-ratio of the movie during window resize"}, + {"noaspect-ratio", &softzoom, CONF_TYPE_FLAG, 0, 1, 0, NULL, "render movie to the user-defined window's geometry"}, {"monitorpixelaspect", &monitor_pixel_aspect, CONF_TYPE_FLOAT, CONF_RANGE, 0.2, 9.0, NULL, "sets the aspect-ratio of a single pixel of TV screen"}, - {"vm", &vidmode, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables video-mode changing during playback"}, + {"vm", &vidmode, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables video-mode changing during playback"}, {"novm", &vidmode, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables video-mode changing during playback"}, {"fs", &fullscreen, CONF_TYPE_FLAG, 0, 0, 1, NULL, "fullscreen playback"}, {"nofs", &fullscreen, CONF_TYPE_FLAG, 0, 1, 0, NULL, "windowed playback"}, {"fsmode", &vo_fsmode, CONF_TYPE_INT, CONF_RANGE, 0, 15, NULL, "enables workaround for some fullscreen related problems"}, - {"zoom", &softzoom, CONF_TYPE_FLAG, 0, 0, 1, NULL, "keeps aspect-ratio of the movie during window resize"}, - {"nozoom", &softzoom, CONF_TYPE_FLAG, 0, 1, 0, NULL, "render movie to the user-defined window's geometry"}, - {"flip", &flip, CONF_TYPE_FLAG, 0, -1, 1, NULL, "flip output image upside-down"}, - {"noflip", &flip, CONF_TYPE_FLAG, 0, -1, 0, NULL, "render output image as is"}, - {"bpp", &vo_dbpp, CONF_TYPE_INT, CONF_RANGE, 0, 32, NULL, "use different color depth than autodetect"}, - {"bm", &vo_use_bm, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of bus-mastering (if it available for given OS/videocard)"}, - {"bm2", &vo_use_bm, CONF_TYPE_FLAG, 0, 0, 2, NULL, "enables using of bus-mastering to store all decoded-ahead frames in video-memory"}, - {"nobm", &vo_use_bm, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of bus-mastering"}, + {"flip", &flip, CONF_TYPE_FLAG, 0, -1, 1, NULL, "flip output image upside-down"}, + {"noflip", &flip, CONF_TYPE_FLAG, 0, -1, 0, NULL, "render output image as is"}, + {"bpp", &vo_dbpp, CONF_TYPE_INT, CONF_RANGE, 0, 32, NULL, "use different color depth than autodetect"}, + {"bm", &vo_use_bm, CONF_TYPE_FLAG, 0, 0, 1, NULL, "enables using of bus-mastering (if it available for given OS/videocard)"}, + {"bm2", &vo_use_bm, CONF_TYPE_FLAG, 0, 0, 2, NULL, "enables using of bus-mastering to store all decoded-ahead frames in video-memory"}, + {"nobm", &vo_use_bm, CONF_TYPE_FLAG, 0, 1, 0, NULL, "disables using of bus-mastering"}, {"id", &video_id, CONF_TYPE_INT, CONF_RANGE, 0, 255, NULL, "selects video channel"}, {"pp", &npp_options, CONF_TYPE_STRING, 0, 0, 0, NULL, "specifies options of post-processing"}, {"sws", &sws_flags, CONF_TYPE_INT, 0, 0, 2, NULL, "specifies the quality of the software scaler"}, @@ -336,7 +335,7 @@ {"noxv", &sdl_noxv, CONF_TYPE_FLAG, 0, 0, 1, NULL, "disable XVideo hardware acceleration for SDL"}, {"forcexv", &sdl_forcexv, CONF_TYPE_FLAG, 0, 0, 1, NULL, "force XVideo hardware acceleration for SDL"}, {"forcegl", &sdl_forcegl, CONF_TYPE_FLAG, 0, 0, 1, NULL, "force OpenGL hardware acceleration for SDL"}, -#endif +#endif {"eq",&veq_config, CONF_TYPE_SUBCONFIG, 0, 0, 0, NULL, "Video-equalizer specific options"}, {NULL, NULL, 0, 0, 0, 0, NULL,NULL}, }; @@ -346,8 +345,8 @@ {"ss", &seek_to_sec, CONF_TYPE_STRING, CONF_MIN, 0, 0, NULL, "seek to given time position before playback"}, {"loop", &loop_times, CONF_TYPE_INT, CONF_RANGE, -1, 10000, NULL, "loops movie playback given number of times. 0 means forever"}, {"noloop", &loop_times, CONF_TYPE_FLAG, 0, 0, -1, NULL, "disable loop of playback"}, - {"shuffle",&shuffle_playback, CONF_TYPE_FLAG, 0, 0, 1, NULL, "play files in random order"}, - {"noshuffle",&shuffle_playback, CONF_TYPE_FLAG, 0, 1, 0, NULL, "play files in regular order"}, + {"shuffle",&shuffle_playback, CONF_TYPE_FLAG, 0, 0, 1, NULL, "play files in random order"}, + {"noshuffle",&shuffle_playback, CONF_TYPE_FLAG, 0, 1, 0, NULL, "play files in regular order"}, {"list", NULL, CONF_TYPE_STRING, 0, 0, 0, NULL, "specifies playlist (1 file/row or Winamp or ASX format)"}, {"frames", &play_n_frames, CONF_TYPE_INT, CONF_MIN, 0, 0, NULL, "play given number of frames and exit"}, {NULL, NULL, 0, 0, 0, 0, NULL,NULL}, Modified: mplayerxp/dec_ahead.h =================================================================== --- mplayerxp/dec_ahead.h 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/dec_ahead.h 2010-01-18 15:55:23 UTC (rev 107) @@ -60,6 +60,8 @@ #define LOCK_VIDEO_DECODE() { MSG_D(DA_PREFIX"LOCK_VIDEO_DECODE\n"); pthread_mutex_lock(&video_decode_mutex); } #define UNLOCK_VIDEO_DECODE() { MSG_D(DA_PREFIX"UNLOCK_VIDEO_DECODE\n"); pthread_mutex_unlock(&video_decode_mutex); } +#define __MP_ATOMIC(OP) { pthread_mutex_t loc_mutex; pthread_mutex_lock(&loc_mutex); OP; pthread_mutex_unlock(&loc_mutex); } + typedef struct sh_video_attr { int eof; /* indicates last frame in stream */ Modified: mplayerxp/libmpcodecs/ad_mp3.c =================================================================== --- mplayerxp/libmpcodecs/ad_mp3.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libmpcodecs/ad_mp3.c 2010-01-18 15:55:23 UTC (rev 107) @@ -361,8 +361,10 @@ if(!((err==MPG123_OK)||(err==MPG123_NEED_MORE))) { MSG_ERR("mpg123_read = %s done = %u minlen = %u\n",mpg123_plain_strerror(err),done,minlen); } - else + else { + MSG_DBG2("ad_mp3.decode: copy %u bytes from %p\n",done,outdata); memcpy(buf,outdata,done); + } if(err==MPG123_NEED_MORE) { indata_size=ds_get_packet_r(sh->ds,&indata,pts); if(indata_size<0) return 0; Modified: mplayerxp/libmpcodecs/dec_video.c =================================================================== --- mplayerxp/libmpcodecs/dec_video.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libmpcodecs/dec_video.c 2010-01-18 15:55:23 UTC (rev 107) @@ -80,12 +80,10 @@ sh_video->inited=0; } -#ifdef _OPENMP #define MPDEC_THREAD_COND (VF_FLAGS_THREADS|VF_FLAGS_SLICES) static unsigned smp_num_cpus=1; static unsigned use_vf_threads=0; extern int enable_gomp; -#endif extern char *video_codec; int init_video(sh_video_t *sh_video,const char* codecname,const char * vfm,int status){ @@ -207,12 +205,12 @@ for(j=0;j<num_slices;j+=smp_num_cpus) { #pragma omp parallel for shared(vf) private(i) for(i=j;i<smp_num_cpus;i++) { - MSG_DBG2("Put slice[%u %u] in threads\n",ampi[i].y,ampi[i].h); + MSG_DBG2("parallel: dec_video.put_slice[%ux%u] %i %i %i %i\n",ampi[i].width,ampi[i].height,ampi[i].x,ampi[i].y,ampi[i].w,ampi[i].h); vf->put_slice(vf,&i[i]); } } for(;j<num_slices;j++) { - MSG_DBG2("Put slice[%u %u] in threads\n",ampi[j].y,h_step); + MSG_DBG2("par_tail: dec_video.put_slice[%ux%u] %i %i %i %i\n",ampi[i].width,ampi[i].height,ampi[i].x,ampi[i].y,ampi[i].w,ampi[i].h); vf->put_slice(vf,&i[j]); } } @@ -221,12 +219,12 @@ { /* execute slices instead of whole frame make faster multiple filters */ for(i=0;i<num_slices;i++) { - MSG_DBG2("vf(%s) Put slice[%u %u] in threads\n",vf->info->name,ampi[i].y,ampi[i].h); + MSG_DBG2("dec_video.put_slice[%ux%u] %i %i %i %i\n",ampi[i].width,ampi[i].height,ampi[i].x,ampi[i].y,ampi[i].w,ampi[i].h); vf->put_slice(vf,&i[i]); } } } else { - MSG_DBG2("Put whole frame\n"); + MSG_DBG2("Put whole frame[%ux%u]\n",mpi->width,mpi->height); vf->put_slice(vf,mpi); } } Modified: mplayerxp/libmpcodecs/vd_ffmpeg.c =================================================================== --- mplayerxp/libmpcodecs/vd_ffmpeg.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libmpcodecs/vd_ffmpeg.c 2010-01-18 15:55:23 UTC (rev 107) @@ -5,6 +5,7 @@ #include <dlfcn.h> /* GLIBC specific. Exists under cygwin too! */ #include "mp_config.h" +#include "../dec_ahead.h" #ifdef HAVE_GOMP #include <omp.h> #endif @@ -578,17 +579,18 @@ priv_t *vdff_ctx=sh->context; mp_image_t *mpi; if(vdff_ctx->use_dr1) { MSG_DBG2("Ignoring draw_slice due dr1\n"); return; } /* we may call vo_start_slice() here */ - mpi=mpcodecs_get_image(sh,MP_IMGTYPE_EXPORT, MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_DRAW_CALLBACK|MP_IMGFLAG_DIRECT,s->width,height); + mpi=mpcodecs_get_image(sh,MP_IMGTYPE_EXPORT, MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_DRAW_CALLBACK|MP_IMGFLAG_DIRECT,s->width,s->height); mpi->stride[0]=src->linesize[0]; mpi->stride[1]=src->linesize[1]; mpi->stride[2]=src->linesize[2]; - mpi->planes[0] = src->base[0]+offset[0]; - mpi->planes[1] = src->base[1]+offset[1]; - mpi->planes[2] = src->base[2]+offset[2]; + mpi->planes[0] = src->data[0]; + mpi->planes[1] = src->data[1]; + mpi->planes[2] = src->data[2]; mpi->w=s->width; mpi->y=y; mpi->h=height; + mpi->chroma_height = height >> mpi->chroma_y_shift; /* provide info for pp */ mpi->qscale=(QP_STORE_T *)vdff_ctx->lavc_picture->qscale_table; mpi->qstride=vdff_ctx->lavc_picture->qstride; @@ -607,14 +609,10 @@ mpi->stride[2]=mpi->stride[1]; mpi->stride[1]=ls; } - MSG_DBG2("ff_draw_callback %i %i %i %i\n",mpi->x,mpi->y,mpi->w,mpi->h); - pthread_mutex_lock(&sh->mutex); - sh->active_slices++; - pthread_mutex_unlock(&sh->mutex); + MSG_DBG2("ff_draw_callback[%ux%u] %i %i %i %i\n",mpi->width,mpi->height,mpi->x,mpi->y,mpi->w,mpi->h); + __MP_ATOMIC(sh->active_slices++); mpcodecs_draw_slice (sh, mpi); - pthread_mutex_lock(&sh->mutex); - sh->active_slices--; - pthread_mutex_unlock(&sh->mutex); + __MP_ATOMIC(sh->active_slices--); } /* copypaste from demux_real.c - it should match to get it working!*/ Modified: mplayerxp/libvo/aclib.c =================================================================== --- mplayerxp/libvo/aclib.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/aclib.c 2010-01-18 15:55:23 UTC (rev 107) @@ -8,110 +8,81 @@ #if defined(USE_FASTMEMCPY) #include "fastmemcpy.h" #include "../cpudetect.h" -/* - aclib - advanced C library ;) - This file contains functions which improve and expand standard C-library - see aclib_template.c ... this file only contains runtime cpu detection and config options stuff - runtime cpu detection by michael niedermayer (mic...@gm...) is under GPL -*/ -#if defined( CAN_COMPILE_MMX ) && defined (ARCH_X86) #define BLOCK_SIZE 4096 #define CONFUSION_FACTOR 0 -//Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) -//#define STATISTICS +/* generic version */ +#undef OPTIMIZE_AVX +#undef OPTIMIZE_SSE4 +#undef OPTIMIZE_SSSE3 +#undef OPTIMIZE_SSE3 +#undef OPTIMIZE_SSE2 +#undef OPTIMIZE_SSE +#undef OPTIMIZE_MMX2 +#undef OPTIMIZE_MMX -#if defined( ARCH_X86 ) -#define CAN_COMPILE_X86_ASM -#endif - -//Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one -//Plain C versions -//#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) -//#define COMPILE_C -//#endif - -#ifdef CAN_COMPILE_X86_ASM - -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW -#undef HAVE_SSE - -//MMX versions -#ifdef CAN_COMPILE_MMX -#undef RENAME +#ifndef __x86_64__ +#ifdef __MMX__ +#define OPTIMIZE_MMX #undef CL_SIZE #define CL_SIZE 32 -#define HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef RENAME #define RENAME(a) a ## _MMX #include "aclib_template.c" #endif - -//MMX2 versions 32-byte cache-line size -#ifdef CAN_COMPILE_MMX2 -#undef RENAME +#ifdef __MMX2__ +#define OPTIMIZE_MMX2 #undef CL_SIZE #define CL_SIZE 32 -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW +#undef RENAME #define RENAME(a) a ## _MMX2_CL32 #include "aclib_template.c" #endif - -//MMX2 versions 64-byte cache-line size -#ifdef CAN_COMPILE_MMX2 -#undef RENAME +#ifdef __MMX2__ +#define OPTIMIZE_MMX2 #undef CL_SIZE #define CL_SIZE 64 -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW +#undef RENAME #define RENAME(a) a ## _MMX2_CL64 #include "aclib_template.c" #endif - -//MMX2 versions 128-byte cache-line size -#ifdef CAN_COMPILE_MMX2 -#undef RENAME +#ifdef __MMX2__ +#define OPTIMIZE_MMX2 #undef CL_SIZE #define CL_SIZE 128 -#define HAVE_MMX -#define HAVE_MMX2 -#undef HAVE_3DNOW +#undef RENAME #define RENAME(a) a ## _MMX2_CL128 #include "aclib_template.c" #endif - -//3DNOW versions all K6 have 32-bit cache-line size -#ifdef CAN_COMPILE_3DNOW +#endif // __x86_64__ +#ifdef __SSE2__ +#define OPTIMIZE_SSE2 #undef RENAME #undef CL_SIZE -#define CL_SIZE 32 -#define HAVE_MMX -#undef HAVE_MMX2 -#define HAVE_3DNOW -#define RENAME(a) a ## _3DNow +#define CL_SIZE 128 +#define RENAME(a) a ## _SSE2 #include "aclib_template.c" #endif -#endif // CAN_COMPILE_X86_ASM +/* + aclib - advanced C library ;) + This file contains functions which improve and expand standard C-library + see aclib_template.c ... this file only contains runtime cpu detection and config options stuff + runtime cpu detection by michael niedermayer (mic...@gm...) is under GPL +*/ -#elif defined( ARCH_X86_64 ) -#define RENAME(a) a ## _x86_64 -#include "aclib_x86_64.h" -#endif - static void * init_fast_memcpy(void * to, const void * from, size_t len) { -#if defined( ARCH_X86_64 ) && defined( USE_FASTMEMCPY ) - fast_memcpy_ptr = fast_memcpy_x86_64; -#elif defined( CAN_COMPILE_X86_ASM ) - // ordered per speed fasterst first -#ifdef CAN_COMPILE_MMX2 +#ifdef __SSE2__ + if(gCpuCaps.hasSSE2) + { + MSG_V("Using SSE2 optimized memcpy\n"); + fast_memcpy_ptr = fast_memcpy_SSE2; + } + else +#endif +#ifndef __x86_64__ +#ifdef __MMX2__ if(gCpuCaps.hasMMX2) { MSG_V("Using MMX2 optimized memcpy\n"); @@ -123,14 +94,6 @@ } else #endif -#ifdef CAN_COMPILE_3DNOW - if(gCpuCaps.has3DNow) - { - MSG_V("Using 3DNow optimized memcpy\n"); - fast_memcpy_ptr = fast_memcpy_3DNow; - } - else -#endif #ifdef CAN_COMPILE_MMX if(gCpuCaps.hasMMX) { @@ -139,60 +102,54 @@ } else #endif -#else +#endif { MSG_V("Using generic memcpy\n"); fast_memcpy_ptr = memcpy; /* prior to mmx we use the standart memcpy */ } -#endif return (*fast_memcpy_ptr)(to,from,len); } -static void * init_mem2agpcpy(void * to, const void * from, size_t len) +static void * init_stream_copy(void * to, const void * from, size_t len) { -#if defined( ARCH_X86_64 ) && defined( USE_FASTMEMCPY ) - mem2agpcpy_ptr = mem2agpcpy_x86_64; -#elif defined ( CAN_COMPILE_X86_ASM ) - // ordered per speed fasterst first -#ifdef CAN_COMPILE_MMX2 +#ifdef __SSE2__ + if(gCpuCaps.hasSSE2) + { + MSG_V("Using SSE2 optimized agpcpy\n"); + fast_stream_copy_ptr = fast_stream_copy_SSE2; + } +#endif +#ifndef __x86_64__ +#ifdef __MMX2__ if(gCpuCaps.hasMMX2) { MSG_V("Using MMX2 optimized agpcpy\n"); - if(gCpuCaps.cl_size >= 128) mem2agpcpy_ptr = mem2agpcpy_MMX2_CL128; + if(gCpuCaps.cl_size >= 128) fast_stream_copy_ptr = fast_stream_copy_MMX2_CL128; else - if(gCpuCaps.cl_size == 64) mem2agpcpy_ptr = mem2agpcpy_MMX2_CL64; + if(gCpuCaps.cl_size == 64) fast_stream_copy_ptr = fast_stream_copy_MMX2_CL64; else - mem2agpcpy_ptr = mem2agpcpy_MMX2_CL32; + fast_stream_copy_ptr = fast_stream_copy_MMX2_CL32; } else #endif -#ifdef CAN_COMPILE_3DNOW - if(gCpuCaps.has3DNow) - { - MSG_V("Using 3DNow optimized agpcpy\n"); - mem2agpcpy_ptr = mem2agpcpy_3DNow; - } - else -#endif -#ifdef CAN_COMPILE_MMX +#ifdef __MMX__ if(gCpuCaps.hasMMX) { MSG_V("Using MMX optimized agpcpy\n"); - mem2agpcpy_ptr = mem2agpcpy_MMX; + fast_stream_copy_ptr = fast_stream_copy_MMX; } else #endif -#else +#endif { MSG_V("Using generic optimized agpcpy\n"); - mem2agpcpy_ptr = memcpy; /* prior to mmx we use the standart memcpy */ + fast_stream_copy_ptr = memcpy; /* prior to mmx we use the standart memcpy */ } -#endif - return (*mem2agpcpy_ptr)(to,from,len); + return (*fast_stream_copy_ptr)(to,from,len); } void *(*fast_memcpy_ptr)(void * to, const void * from, size_t len) = init_fast_memcpy; -void *(*mem2agpcpy_ptr)(void * to, const void * from, size_t len) = init_mem2agpcpy; +void *(*fast_stream_copy_ptr)(void * to, const void * from, size_t len) = init_stream_copy; #endif /* use fastmemcpy */ Modified: mplayerxp/libvo/aclib_template.c =================================================================== --- mplayerxp/libvo/aclib_template.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/aclib_template.c 2010-01-18 15:55:23 UTC (rev 107) @@ -2,102 +2,70 @@ aclib - advanced C library ;) This file contains functions which improve and expand standard C-library */ +#include "pvector/pvector.h" -#ifndef HAVE_SSE2 -/* - P3 processor has only one SSE decoder so can execute only 1 sse insn per - cpu clock, but it has 3 mmx decoders (include load/store unit) - and executes 3 mmx insns per cpu clock. - P4 processor has some chances, but after reading: - http://www.emulators.com/pentium4.htm - I have doubts. Anyway SSE2 version of this code can be written better. -*/ -#undef HAVE_SSE -#endif - - -/* - This part of code was taken by me from Linux-2.4.3 and slightly modified -for MMX, MMX2, SSE instruction set. I have done it since linux uses page aligned -blocks but mplayer uses weakly ordered data and original sources can not -speedup them. Only using PREFETCHNTA and MOVNTQ together have effect! - ->From IA-32 Intel Architecture Software Developer's Manual Volume 1, - -Order Number 245470: -"10.4.6. Cacheability Control, Prefetch, and Memory Ordering Instructions" - -Data referenced by a program can be temporal (data will be used again) or -non-temporal (data will be referenced once and not reused in the immediate -future). To make efficient use of the processor's caches, it is generally -desirable to cache temporal data and not cache non-temporal data. Overloading -the processor's caches with non-temporal data is sometimes referred to as -"polluting the caches". -The non-temporal data is written to memory with Write-Combining semantics. - -The PREFETCHh instructions permits a program to load data into the processor -at a suggested cache level, so that it is closer to the processors load and -store unit when it is needed. If the data is already present in a level of -the cache hierarchy that is closer to the processor, the PREFETCHh instruction -will not result in any data movement. -But we should you PREFETCHNTA: Non-temporal data fetch data into location -close to the processor, minimizing cache pollution. - -The MOVNTQ (store quadword using non-temporal hint) instruction stores -packed integer data from an MMX register to memory, using a non-temporal hint. -The MOVNTPS (store packed single-precision floating-point values using -non-temporal hint) instruction stores packed floating-point data from an -XMM register to memory, using a non-temporal hint. - -The SFENCE (Store Fence) instruction controls write ordering by creating a -fence for memory store operations. This instruction guarantees that the results -of every store instruction that precedes the store fence in program order is -globally visible before any store instruction that follows the fence. The -SFENCE instruction provides an efficient way of ensuring ordering between -procedures that produce weakly-ordered data and procedures that consume that -data. - -If you have questions please contact with me: Nickols_K <nic...@ma...>. -*/ - -/* 3dnow memcpy support from kernel 2.4.2 - by Pontscho/fresh!mindworkz */ - - /* for small memory blocks (<256 bytes) this version is faster */ +#ifdef __x86_64__ #define small_memcpy(to,from,n)\ {\ +register unsigned long int siz;\ register unsigned long int dummy;\ + siz=n&0x7; n>>=3;\ + if(siz)\ __asm__ __volatile__(\ "rep; movsb"\ :"=&D"(to), "=&S"(from), "=&c"(dummy)\ /* It's most portable way to notify compiler */\ /* that edi, esi and ecx are clobbered in asm block. */\ /* Thanks to A'rpi for hint!!! */\ + :"0" (to), "1" (from),"2" (siz)\ + : "memory","cc");\ + if(n)\ +__asm__ __volatile__(\ + "rep; movsq"\ + :"=&D"(to), "=&S"(from), "=&c"(dummy)\ +/* It's most portable way to notify compiler */\ +/* that edi, esi and ecx are clobbered in asm block. */\ +/* Thanks to A'rpi for hint!!! */\ :"0" (to), "1" (from),"2" (n)\ : "memory","cc");\ } +#else +#define small_memcpy(to,from,n)\ +{\ +register unsigned long int dummy;\ +__asm__ __volatile__(\ + "rep; movsb"\ + :"=&D"(to), "=&S"(from), "=&c"(dummy)\ +/* It's most portable way to notify compiler */\ +/* that edi, esi and ecx are clobbered in asm block. */\ +/* Thanks to A'rpi for hint!!! */\ + :"0" (to), "1" (from),"2" (n)\ + : "memory","cc");\ +} +#endif -#include "../mmx_defs.h" #undef MMREG_SIZE -#ifdef HAVE_SSE +#ifdef OPTIMIZE_SSE2 #define MMREG_SIZE 16 #else #define MMREG_SIZE 64 //8 #endif #undef MIN_LEN -#ifdef HAVE_MMX1 +#ifndef OPTIMIZE_MMX2 #define MIN_LEN 0x800 /* 2K blocks */ #else #define MIN_LEN 0x40 /* 64-byte blocks */ #endif -static inline void * RENAME(fast_memcpy)(void * to, const void * from, size_t len) +static inline void * RENAME(fast_memory_copy)(void * to, const void * from, size_t len,int final) { void *retval; const unsigned char *cfrom=from; unsigned char *tto=to; + const unsigned ivec_block_size = 8*__IVEC_SIZE; + __ivec iarr[8]; size_t i; retval = to; if(!len) return retval; @@ -114,45 +82,41 @@ MSG_V("freq < %8d %4d\n", 1<<i, freq[i]); } #endif -#ifndef HAVE_MMX1 - /* PREFETCH has effect even for MOVSB instruction ;) */ - __asm__ __volatile__ ( - PREFETCH" (%0)\n" + + _ivec_prefetch(cfrom); #if CL_SIZE == 32 - PREFETCH" 32(%0)\n" + _ivec_prefetch(&cfrom[32]); #endif #if CL_SIZE < 128 - PREFETCH" 64(%0)\n" + _ivec_prefetch(&cfrom[64]); #endif #if CL_SIZE == 32 - PREFETCH" 96(%0)\n" + _ivec_prefetch(&cfrom[96]); #endif - PREFETCH" 128(%0)\n" + _ivec_prefetch(&cfrom[128]); #if CL_SIZE == 32 - PREFETCH" 160(%0)\n" + _ivec_prefetch(&cfrom[160]); #endif #if CL_SIZE < 128 - PREFETCH" 192(%0)\n" + _ivec_prefetch(&cfrom[192]); #endif #if CL_SIZE == 32 - PREFETCH" 224(%0)\n" + _ivec_prefetch(&cfrom[224]); #endif - PREFETCH" 256(%0)\n" - : : "r" (cfrom) ); -#endif - if(len >= MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to cache-line size -boundary */ - delta = ((unsigned long int)tto)&(CL_SIZE-1); - if(delta) - { + _ivec_prefetch(&cfrom[256]); + + if(len >= MIN_LEN) + { + register unsigned long int delta; + /* Align destinition to cache-line size -boundary */ + delta = ((unsigned long int)tto)&(CL_SIZE-1); + if(delta) { delta=MMREG_SIZE-delta; len -= delta; small_memcpy(tto, cfrom, delta); - } - i = len >> 6; /* len/64 */ - len&=63; + } + i = len/ivec_block_size; + len&=(ivec_block_size-1); /* This algorithm is top effective when the code consequently reads and writes blocks which have size of cache line. @@ -162,340 +126,73 @@ perform reading and writing to be multiple to a number of processor's decoders, but it's not always possible. */ -#ifdef HAVE_SSE /* Only P3 (may be Cyrix3) */ - if(((unsigned long)cfrom) & 15) - /* if SRC is misaligned */ for(; i>0; i--) { - __asm__ __volatile__ ( - PREFETCH" 320(%0)\n" + _ivec_prefetch(&cfrom[320]); #if CL_SIZE == 32 - PREFETCH" 352(%0)\n" + _ivec_prefetch(&cfrom[352]); #endif - "movups (%0), %%xmm0\n" - "movups 16(%0), %%xmm1\n" - "movups 32(%0), %%xmm2\n" - "movups 48(%0), %%xmm3\n" - "movntps %%xmm0, (%1)\n" - "movntps %%xmm1, 16(%1)\n" - "movntps %%xmm2, 32(%1)\n" - "movntps %%xmm3, 48(%1)\n" - :: "r" (cfrom), "r" (tto): - "memory" -#ifdef SSE_CLOBBERED - ,SSE_CLOBBERED -#endif - ); - cfrom+=64; - tto+=64; + if(((unsigned long)cfrom) & 15) { + /* if SRC is misaligned */ + iarr[0] = _ivec_loadu(&cfrom[__IVEC_SIZE*0]); + iarr[1] = _ivec_loadu(&cfrom[__IVEC_SIZE*1]); + iarr[2] = _ivec_loadu(&cfrom[__IVEC_SIZE*2]); + iarr[3] = _ivec_loadu(&cfrom[__IVEC_SIZE*3]); + iarr[4] = _ivec_loadu(&cfrom[__IVEC_SIZE*4]); + iarr[5] = _ivec_loadu(&cfrom[__IVEC_SIZE*5]); + iarr[6] = _ivec_loadu(&cfrom[__IVEC_SIZE*6]); + iarr[7] = _ivec_loadu(&cfrom[__IVEC_SIZE*7]); + } else { + iarr[0] = _ivec_loada(&cfrom[__IVEC_SIZE*0]); + iarr[1] = _ivec_loada(&cfrom[__IVEC_SIZE*1]); + iarr[2] = _ivec_loada(&cfrom[__IVEC_SIZE*2]); + iarr[3] = _ivec_loada(&cfrom[__IVEC_SIZE*3]); + iarr[4] = _ivec_loada(&cfrom[__IVEC_SIZE*4]); + iarr[5] = _ivec_loada(&cfrom[__IVEC_SIZE*5]); + iarr[6] = _ivec_loada(&cfrom[__IVEC_SIZE*6]); + iarr[7] = _ivec_loada(&cfrom[__IVEC_SIZE*7]); + } + if(final) { + _ivec_stream(&tto[__IVEC_SIZE*0],iarr[0]); + _ivec_stream(&tto[__IVEC_SIZE*1],iarr[1]); + _ivec_stream(&tto[__IVEC_SIZE*2],iarr[2]); + _ivec_stream(&tto[__IVEC_SIZE*3],iarr[3]); + _ivec_stream(&tto[__IVEC_SIZE*4],iarr[4]); + _ivec_stream(&tto[__IVEC_SIZE*5],iarr[5]); + _ivec_stream(&tto[__IVEC_SIZE*6],iarr[6]); + _ivec_stream(&tto[__IVEC_SIZE*7],iarr[7]); + } else { + _ivec_storea(&tto[__IVEC_SIZE*0],iarr[0]); + _ivec_storea(&tto[__IVEC_SIZE*1],iarr[1]); + _ivec_storea(&tto[__IVEC_SIZE*2],iarr[2]); + _ivec_storea(&tto[__IVEC_SIZE*3],iarr[3]); + _ivec_storea(&tto[__IVEC_SIZE*4],iarr[4]); + _ivec_storea(&tto[__IVEC_SIZE*5],iarr[5]); + _ivec_storea(&tto[__IVEC_SIZE*6],iarr[6]); + _ivec_storea(&tto[__IVEC_SIZE*7],iarr[7]); + } + cfrom+=ivec_block_size; + tto+=ivec_block_size; } - else - /* - Only if SRC is aligned on 16-byte boundary. - It allows to use movaps instead of movups, which required data - to be aligned or a general-protection exception (#GP) is generated. - */ - for(; i>0; i--) - { - __asm__ __volatile__ ( - PREFETCH" 320(%0)\n" -#if CL_SIZE == 32 - PREFETCH" 352(%0)\n" -#endif - "movaps (%0), %%xmm0\n" - "movaps 16(%0), %%xmm1\n" - "movaps 32(%0), %%xmm2\n" - "movaps 48(%0), %%xmm3\n" - "movntps %%xmm0, (%1)\n" - "movntps %%xmm1, 16(%1)\n" - "movntps %%xmm2, 32(%1)\n" - "movntps %%xmm3, 48(%1)\n" - :: "r" (cfrom), "r" (tto) - :"memory" -#ifdef SSE_CLOBBERED - ,SSE_CLOBBERED -#endif - ); - cfrom+=64; - tto+=64; - } -#else - // Align destination at BLOCK_SIZE boundary - for(; ((int)tto & (BLOCK_SIZE-1)) && i>0; i--) - { - __asm__ __volatile__ ( -#ifndef HAVE_MMX1 - PREFETCH" 320(%0)\n" -#if CL_SIZE == 32 - PREFETCH" 352(%0)\n" -#endif -#endif - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "movq 32(%0), %%mm4\n" - "movq 40(%0), %%mm5\n" - "movq 48(%0), %%mm6\n" - "movq 56(%0), %%mm7\n" - MOVNTQ" %%mm0, (%1)\n" - MOVNTQ" %%mm1, 8(%1)\n" - MOVNTQ" %%mm2, 16(%1)\n" - MOVNTQ" %%mm3, 24(%1)\n" - MOVNTQ" %%mm4, 32(%1)\n" - MOVNTQ" %%mm5, 40(%1)\n" - MOVNTQ" %%mm6, 48(%1)\n" - MOVNTQ" %%mm7, 56(%1)\n" - :: "r" (cfrom), "r" (tto) - : "memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - cfrom+=64; - tto+=64; - } - - // Pure Assembly cuz gcc is a bit unpredictable ;) - if(i>=BLOCK_SIZE/64) - asm volatile( - "xorl %%"REG_a", %%"REG_a" \n\t" - ".balign 16 \n\t" - "1: \n\t" - "movl (%0, %%"REG_a"), %%"REG_b" \n\t" -#if CL_SIZE == 32 - "movl 32(%0, %%"REG_a"), %%"REG_b" \n\t" -#endif -#if CL_SIZE < 128 - "movl 64(%0, %%"REG_a"), %%"REG_b" \n\t" -#endif -#if CL_SIZE == 32 - "movl 96(%0, %%"REG_a"), %%"REG_b" \n\t" -#endif - "addl $128, %%"REG_a" \n\t" - "cmpl %3, %%"REG_a" \n\t" - " jb 1b \n\t" - - "xorl %%"REG_a", %%"REG_a" \n\t" - - ".balign 16 \n\t" - "2: \n\t" - "movq (%0, %%"REG_a"), %%mm0\n" - "movq 8(%0, %%"REG_a"), %%mm1\n" - "movq 16(%0, %%"REG_a"), %%mm2\n" - "movq 24(%0, %%"REG_a"), %%mm3\n" - "movq 32(%0, %%"REG_a"), %%mm4\n" - "movq 40(%0, %%"REG_a"), %%mm5\n" - "movq 48(%0, %%"REG_a"), %%mm6\n" - "movq 56(%0, %%"REG_a"), %%mm7\n" - MOVNTQ" %%mm0, (%1, %%"REG_a")\n" - MOVNTQ" %%mm1, 8(%1, %%"REG_a")\n" - MOVNTQ" %%mm2, 16(%1, %%"REG_a")\n" - MOVNTQ" %%mm3, 24(%1, %%"REG_a")\n" - MOVNTQ" %%mm4, 32(%1, %%"REG_a")\n" - MOVNTQ" %%mm5, 40(%1, %%"REG_a")\n" - MOVNTQ" %%mm6, 48(%1, %%"REG_a")\n" - MOVNTQ" %%mm7, 56(%1, %%"REG_a")\n" - "addl $64, %%"REG_a" \n\t" - "cmpl %3, %%"REG_a" \n\t" - "jb 2b \n\t" - -#if CONFUSION_FACTOR > 0 - // a few percent speedup on out of order executing CPUs - "movl %5, %%"REG_a" \n\t" - "2: \n\t" - "movl (%0), %%"REG_b" \n\t" - "movl (%0), %%"REG_b" \n\t" - "movl (%0), %%"REG_b" \n\t" - "movl (%0), %%"REG_b" \n\t" - "decl %%"REG_a" \n\t" - " jnz 2b \n\t" -#endif - - "xorl %%"REG_a", %%"REG_a" \n\t" - "addl %3, %0 \n\t" - "addl %3, %1 \n\t" - "subl %4, %2 \n\t" - "cmpl %4, %2 \n\t" - " jae 1b \n\t" - : "+r" (cfrom), "+r" (tto), "+r" (i) - : "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR) - : "%"REG_a, "%"REG_b, "memory", "cc" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - - for(; i>0; i--) - { - __asm__ __volatile__ ( -#ifndef HAVE_MMX1 - PREFETCH" 320(%0)\n" -#if CL_SIZE == 32 - PREFETCH" 352(%0)\n" -#endif -#endif - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "movq 32(%0), %%mm4\n" - "movq 40(%0), %%mm5\n" - "movq 48(%0), %%mm6\n" - "movq 56(%0), %%mm7\n" - MOVNTQ" %%mm0, (%1)\n" - MOVNTQ" %%mm1, 8(%1)\n" - MOVNTQ" %%mm2, 16(%1)\n" - MOVNTQ" %%mm3, 24(%1)\n" - MOVNTQ" %%mm4, 32(%1)\n" - MOVNTQ" %%mm5, 40(%1)\n" - MOVNTQ" %%mm6, 48(%1)\n" - MOVNTQ" %%mm7, 56(%1)\n" - :: "r" (cfrom), "r" (tto) - : "memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - cfrom+=64; - tto+=64; - } - -#endif /* Have SSE */ -#ifdef HAVE_MMX2 - /* since movntq is weakly-ordered, a "sfence" - * is needed to become ordered again. */ - __asm__ __volatile__ ("sfence":::"memory"); -#endif -#ifndef HAVE_SSE - /* enables to use FPU */ - __asm__ __volatile__ (EMMS:: - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); -#endif - } - /* - * Now do the tail of the block - */ - if(len) small_memcpy(tto, cfrom, len); - return retval; + _ivec_sfence(); + _ivec_empty(); + } + /* + * Now do the tail of the block + */ + if(len) small_memcpy(tto, cfrom, len); + return retval; } /** * special copy routine for mem -> agp/pci copy (based upon fast_memcpy) */ -static inline void * RENAME(mem2agpcpy)(void * to, const void * from, size_t len) +static inline void * RENAME(fast_memcpy)(void * to, const void * from, size_t len) { - void *retval; - const unsigned char *cfrom=from; - unsigned char *tto=to; - size_t i; - retval = to; - if(!len) return retval; -#ifdef STATISTICS - { - static int freq[33]; - static int t=0; - int i; - for(i=0; len>(1<<i); i++); - freq[i]++; - t++; - if(1024*1024*1024 % t == 0) - for(i=0; i<32; i++) - MSG_V("mem2agp freq < %8d %4d\n", 1<<i, freq[i]); - } -#endif - if(len >= MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to cache-line size -boundary */ - delta = ((unsigned long int)tto)&(CL_SIZE-1); - if(delta) - { - delta=8-delta; - len -= delta; - small_memcpy(tto, cfrom, delta); - } - i = len >> 6; /* len/64 */ - len &= 63; - /* - This algorithm is top effective when the code consequently - reads and writes blocks which have size of cache line. - Size of cache line is processor-dependent. - It will, however, be a minimum of 32 bytes on any processors. - It would be better to have a number of instructions which - perform reading and writing to be multiple to a number of - processor's decoders, but it's not always possible. - */ - for(; i>0; i--) - { - __asm__ __volatile__ ( - PREFETCH" 320(%0)\n" -#if CL_SIZE == 32 - PREFETCH" 352(%0)\n" -#endif - "movq (%0), %%mm0\n" - "movq 8(%0), %%mm1\n" - "movq 16(%0), %%mm2\n" - "movq 24(%0), %%mm3\n" - "movq 32(%0), %%mm4\n" - "movq 40(%0), %%mm5\n" - "movq 48(%0), %%mm6\n" - "movq 56(%0), %%mm7\n" - MOVNTQ" %%mm0, (%1)\n" - MOVNTQ" %%mm1, 8(%1)\n" - MOVNTQ" %%mm2, 16(%1)\n" - MOVNTQ" %%mm3, 24(%1)\n" - MOVNTQ" %%mm4, 32(%1)\n" - MOVNTQ" %%mm5, 40(%1)\n" - MOVNTQ" %%mm6, 48(%1)\n" - MOVNTQ" %%mm7, 56(%1)\n" - :: "r" (cfrom), "r" (tto) - : "memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - cfrom+=64; - tto+=64; - } -#ifdef HAVE_MMX2 - /* since movntq is weakly-ordered, a "sfence" - * is needed to become ordered again. */ - __asm__ __volatile__ ("sfence":::"memory"); -#endif - /* enables to use FPU */ - __asm__ __volatile__ (EMMS:: - :"memory" -#ifdef FPU_CLOBBERED - ,FPU_CLOBBERED -#endif -#ifdef MMX_CLOBBERED - ,MMX_CLOBBERED -#endif - ); - } - /* - * Now do the tail of the block - */ - if(len) small_memcpy(tto, cfrom, len); - return retval; + return RENAME(fast_memory_copy)(to,from,len,0); } + +static inline void * RENAME(fast_stream_copy)(void * to, const void * from, size_t len) +{ + return RENAME(fast_memory_copy)(to,from,len,1); +} Deleted: mplayerxp/libvo/aclib_x86_64.h =================================================================== --- mplayerxp/libvo/aclib_x86_64.h 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/aclib_x86_64.h 2010-01-18 15:55:23 UTC (rev 107) @@ -1,191 +0,0 @@ -/* - aclib - advanced C library ;) - This file contains functions which improve and expand standard C-library -*/ - -/* for small memory blocks (<256 bytes) this version is faster */ -#define small_memcpy(to,from,n)\ -{\ -register unsigned long int siz;\ -register unsigned long int dummy;\ - siz=n&0x7; n>>=3;\ - if(siz)\ -__asm__ __volatile__(\ - "rep; movsb"\ - :"=&D"(to), "=&S"(from), "=&c"(dummy)\ -/* It's most portable way to notify compiler */\ -/* that edi, esi and ecx are clobbered in asm block. */\ -/* Thanks to A'rpi for hint!!! */\ - :"0" (to), "1" (from),"2" (siz)\ - : "memory","cc");\ - if(n)\ -__asm__ __volatile__(\ - "rep; movsq"\ - :"=&D"(to), "=&S"(from), "=&c"(dummy)\ -/* It's most portable way to notify compiler */\ -/* that edi, esi and ecx are clobbered in asm block. */\ -/* Thanks to A'rpi for hint!!! */\ - :"0" (to), "1" (from),"2" (n)\ - : "memory","cc");\ -} - - -#define MMREG_SIZE 16ULL -#define MIN_LEN 257ULL -#define CL_SIZE 256ULL /*always align on 256 byte boundary */ - -static inline void * RENAME(fast_memcpy)(void * to, const void * from, size_t len) -{ - void *retval; - const unsigned char *cfrom=from; - unsigned char *tto=to; - size_t i=0; - retval = to; - if(!len) return retval; - /* PREFETCH has effect even for MOVSB instruction ;) */ - __asm__ __volatile__ ( - "prefetcht0 (%0)\n" - "prefetcht0 64(%0)\n" - "prefetcht0 128(%0)\n" - "prefetcht0 192(%0)\n" - :: "r" (cfrom)); - if(len >= MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to cache-line size -boundary */ - delta = ((unsigned long int)tto)&(CL_SIZE-1ULL); - if(delta) - { - delta=CL_SIZE-delta; - len -=delta; - small_memcpy(tto, cfrom, delta); - } - i = len>>8; /* len/256 */ - len=len-(i<<8); - } - if(i) { - /* - This algorithm is top effective when the code consequently - reads and writes blocks which have size of cache line. - Size of cache line is processor-dependent. - It will, however, be a minimum of 32 bytes on any processors. - It would be better to have a number of instructions which - perform reading and writing to be multiple to a number of - processor's decoders, but it's not always possible. - */ - if(((unsigned long)cfrom) & 15) - /* if SRC is misaligned */ - for(; i>0; i--) - { - __asm__ __volatile__ ( - "prefetcht0 256(%0)\n" - "prefetcht0 320(%0)\n" - "movdqu (%0), %%xmm0\n" - "movdqu 16(%0), %%xmm1\n" - "movdqu 32(%0), %%xmm2\n" - "movdqu 48(%0), %%xmm3\n" - "movdqu 64(%0), %%xmm4\n" - "movdqu 80(%0), %%xmm5\n" - "movdqu 96(%0), %%xmm6\n" - "movdqu 112(%0), %%xmm7\n" - "prefetcht0 384(%0)\n" - "prefetcht0 448(%0)\n" - "movdqu 128(%0), %%xmm8\n" - "movdqu 144(%0), %%xmm9\n" - "movdqu 160(%0), %%xmm10\n" - "movdqu 176(%0), %%xmm11\n" - "movdqu 192(%0), %%xmm12\n" - "movdqu 208(%0), %%xmm13\n" - "movdqu 224(%0), %%xmm14\n" - "movdqu 240(%0), %%xmm15\n" - "movntdq %%xmm0, (%1)\n" - "movntdq %%xmm1, 16(%1)\n" - "movntdq %%xmm2, 32(%1)\n" - "movntdq %%xmm3, 48(%1)\n" - "movntdq %%xmm4, 64(%1)\n" - "movntdq %%xmm5, 80(%1)\n" - "movntdq %%xmm6, 96(%1)\n" - "movntdq %%xmm7, 112(%1)\n" - "movntdq %%xmm8, 128(%1)\n" - "movntdq %%xmm9, 144(%1)\n" - "movntdq %%xmm10, 160(%1)\n" - "movntdq %%xmm11, 176(%1)\n" - "movntdq %%xmm12, 192(%1)\n" - "movntdq %%xmm13, 208(%1)\n" - "movntdq %%xmm14, 224(%1)\n" - "movntdq %%xmm15, 240(%1)\n" - :: "r" (cfrom), "r" (tto): - "memory" - ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" - ); - cfrom+=256ULL; - tto+=256ULL; - } - else - /* - Only if SRC is aligned on 16-byte boundary. - It allows to use movdqa instead of movdqu, which required data - to be aligned or a general-protection exception (#GP) is generated. - */ - for(; i>0; i--) - { - __asm__ __volatile__ ( - "prefetcht0 256(%0)\n" - "prefetcht0 320(%0)\n" - "movdqa (%0), %%xmm0\n" - "movdqa 16(%0), %%xmm1\n" - "movdqa 32(%0), %%xmm2\n" - "movdqa 48(%0), %%xmm3\n" - "movdqa 64(%0), %%xmm4\n" - "movdqa 80(%0), %%xmm5\n" - "movdqa 96(%0), %%xmm6\n" - "movdqa 112(%0), %%xmm7\n" - "prefetcht0 384(%0)\n" - "prefetcht0 448(%0)\n" - "movdqa 128(%0), %%xmm8\n" - "movdqa 144(%0), %%xmm9\n" - "movdqa 160(%0), %%xmm10\n" - "movdqa 176(%0), %%xmm11\n" - "movdqa 192(%0), %%xmm12\n" - "movdqa 208(%0), %%xmm13\n" - "movdqa 224(%0), %%xmm14\n" - "movdqa 240(%0), %%xmm15\n" - "movntdq %%xmm0, (%1)\n" - "movntdq %%xmm1, 16(%1)\n" - "movntdq %%xmm2, 32(%1)\n" - "movntdq %%xmm3, 48(%1)\n" - "movntdq %%xmm4, 64(%1)\n" - "movntdq %%xmm5, 80(%1)\n" - "movntdq %%xmm6, 96(%1)\n" - "movntdq %%xmm7, 112(%1)\n" - "movntdq %%xmm8, 128(%1)\n" - "movntdq %%xmm9, 144(%1)\n" - "movntdq %%xmm10, 160(%1)\n" - "movntdq %%xmm11, 176(%1)\n" - "movntdq %%xmm12, 192(%1)\n" - "movntdq %%xmm13, 208(%1)\n" - "movntdq %%xmm14, 224(%1)\n" - "movntdq %%xmm15, 240(%1)\n" - :: "r" (cfrom), "r" (tto): - "memory" - ,"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" - ); - cfrom+=256ULL; - tto+=256ULL; - } - __asm__ __volatile__ ("sfence":::"memory"); - } - /* - * Now do the tail of the block - */ - if(len) small_memcpy(tto, cfrom, len); - return retval; -} - -/** - * special copy routine for mem -> agp/pci copy (based upon fast_memcpy) - */ -static inline void * RENAME(mem2agpcpy)(void * to, const void * from, size_t len) -{ - return memcpy(to,from,len); -} Modified: mplayerxp/libvo/dri_vo.h =================================================================== --- mplayerxp/libvo/dri_vo.h 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/dri_vo.h 2010-01-18 15:55:23 UTC (rev 107) @@ -20,6 +20,7 @@ #define DRI_CAP_HORZSCALER 0x00000040UL /**< Driver supports horizontal scaling */ #define DRI_CAP_VERTSCALER 0x00000080UL /**< Driver supports vertical scaling */ #define DRI_CAP_HWOSD 0x00000100UL /**< Driver supports OSD painting */ +#define DRI_CAP_BUSMASTERING 0x80000000UL /**< Means: final video buffer but allocated in RAM */ typedef struct dri_surface_cap_s { Modified: mplayerxp/libvo/fastmemcpy.h =================================================================== --- mplayerxp/libvo/fastmemcpy.h 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/fastmemcpy.h 2010-01-18 15:55:23 UTC (rev 107) @@ -7,24 +7,24 @@ #include <stddef.h> #include <string.h> /* memcpy prototypes */ extern void * (*fast_memcpy_ptr)(void * to, const void * from, size_t len); -extern void * (*mem2agpcpy_ptr)(void * to, const void * from, size_t len); +extern void * (*fast_stream_copy_ptr)(void * to, const void * from, size_t len); #define memcpy(a,b,c) (*fast_memcpy_ptr)(a,b,c) -#define mem2agpcpy(a,b,c) (*mem2agpcpy_ptr)(a,b,c) +#define stream_copy(a,b,c) (*fast_stream_copy_ptr)(a,b,c) #else -#define mem2agpcpy(a,b,c) memcpy(a,b,c) +#define stream_copy(a,b,c) memcpy(a,b,c) #endif -static inline void * mem2agpcpy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride) +static inline void * stream_copy_pic(void * dst, const void * src, int bytesPerLine, int height, int dstStride, int srcStride) { int i; void *retval=dst; - if(dstStride == srcStride) mem2agpcpy(dst, src, srcStride*height); + if(dstStride == srcStride) stream_copy(dst, src, srcStride*height); else { for(i=0; i<height; i++) { - mem2agpcpy(dst, src, bytesPerLine); + stream_copy(dst, src, bytesPerLine); src+= srcStride; dst+= dstStride; } Modified: mplayerxp/libvo/osd.c =================================================================== --- mplayerxp/libvo/osd.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/osd.c 2010-01-18 15:55:23 UTC (rev 107) @@ -73,7 +73,7 @@ static unsigned short fast_osd_16bpp_table[256]; #endif -static void __FASTCALL__ vo_draw_alpha_rgb15_C(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +static void __FASTCALL__ vo_draw_alpha_rgb15_C(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride,int finalize){ int y; for(y=0;y<h;y++){ register unsigned short *dst = (unsigned short*) dstbase; @@ -105,7 +105,7 @@ return; } -static void __FASTCALL__ vo_draw_alpha_rgb16_C(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ +static void __FASTCALL__ vo_draw_alpha_rgb16_C(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride,int finalize){ int y; for(y=0;y<h;y++){ register unsigned short *dst = (unsigned short*) dstbase; @@ -136,8 +136,8 @@ return; } -static void __FASTCALL__ vo_draw_alpha_uyvy_C(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){ - (*vo_draw_alpha_yuy2_ptr)(w,h,src,srca,srcstride,dstbase+1,dststride); +static void __FASTCALL__ vo_draw_alpha_uyvy_C(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride,int finalize){ + (*vo_draw_alpha_yuy2_ptr)(w,h,src,srca,srcstride,dstbase+1,dststride,finalize); } draw_alpha_f vo_draw_alpha_yv12_ptr=NULL; Modified: mplayerxp/libvo/osd.h =================================================================== --- mplayerxp/libvo/osd.h 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/osd.h 2010-01-18 15:55:23 UTC (rev 107) @@ -6,7 +6,7 @@ extern void vo_draw_alpha_init( void ); /* build tables */ -typedef void (* __FASTCALL__ draw_alpha_f)(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride); +typedef void (* __FASTCALL__ draw_alpha_f)(int w,int h, const unsigned char* src, const unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride,int finalize); extern draw_alpha_f vo_draw_alpha_yv12_ptr; extern draw_alpha_f vo_draw_alpha_yuy2_ptr; @@ -15,11 +15,11 @@ extern draw_alpha_f vo_draw_alpha_rgb32_ptr; extern draw_alpha_f vo_draw_alpha_rgb15_ptr; extern draw_alpha_f vo_draw_alpha_rgb16_ptr; -#define vo_draw_alpha_yv12(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_yv12_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) -#define vo_draw_alpha_yuy2(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_yuy2_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) -#define vo_draw_alpha_uyvy(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_uyvy_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) -#define vo_draw_alpha_rgb24(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_rgb24_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) -#define vo_draw_alpha_rgb32(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_rgb32_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) -#define vo_draw_alpha_rgb15(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_rgb15_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) -#define vo_draw_alpha_rgb16(w,h,src,srca,srcstride,dstbase,dstrstride) (*vo_draw_alpha_rgb16_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride) +#define vo_draw_alpha_yv12(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_yv12_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) +#define vo_draw_alpha_yuy2(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_yuy2_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) +#define vo_draw_alpha_uyvy(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_uyvy_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) +#define vo_draw_alpha_rgb24(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_rgb24_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) +#define vo_draw_alpha_rgb32(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_rgb32_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) +#define vo_draw_alpha_rgb15(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_rgb15_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) +#define vo_draw_alpha_rgb16(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) (*vo_draw_alpha_rgb16_ptr)(w,h,src,srca,srcstride,dstbase,dstrstride,finalize) #endif Modified: mplayerxp/libvo/osd_template.c =================================================================== --- mplayerxp/libvo/osd_template.c 2010-01-17 18:46:44 UTC (rev 106) +++ mplayerxp/libvo/osd_template.c 2010-01-18 15:55:23 UTC (rev 107) @@ -50,7 +50,7 @@ #endif -static inline void RENAME(vo_draw_alpha_yv12)(int w,int h,const unsigned char* src,const unsig... [truncated message content] |