[Mplayerxp-cvslog] SF.net SVN: mplayerxp:[390] mplayerxp/osdep

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 390
          http://mplayerxp.svn.sourceforge.net/mplayerxp/?rev=390&view=rev
Author:   nickols_k
Date:     2012-11-17 13:56:54 +0000 (Sat, 17 Nov 2012)
Log Message:
-----------
more c++ sources

Modified Paths:
--------------
    mplayerxp/osdep/Makefile
    mplayerxp/osdep/fastmemcpy.h

Added Paths:
-----------
    mplayerxp/osdep/aclib.cpp
    mplayerxp/osdep/aclib_template.h

Removed Paths:
-------------
    mplayerxp/osdep/aclib.c
    mplayerxp/osdep/aclib_template.c

Modified: mplayerxp/osdep/Makefile
===================================================================

--- mplayerxp/osdep/Makefile	2012-11-17 10:42:51 UTC (rev 389)
+++ mplayerxp/osdep/Makefile	2012-11-17 13:56:54 UTC (rev 390)
@@ -3,8 +3,8 @@
 
 LIBNAME = libosdep.a
 
-SRCS=aclib.c 
-CXXSRCS=mplib.cpp mp_malloc.cpp getch2.cpp timer-lx.cpp timer.cpp cpudetect.cpp shmem.cpp get_path.cpp
+SRCS=
+CXXSRCS=mplib.cpp mp_malloc.cpp getch2.cpp timer-lx.cpp timer.cpp cpudetect.cpp shmem.cpp get_path.cpp aclib.cpp
 OBJS=$(SRCS:.c=.o)
 CXXOBJS=$(CXXSRCS:.cpp=.o)
 

Deleted: mplayerxp/osdep/aclib.c
===================================================================
--- mplayerxp/osdep/aclib.c	2012-11-17 10:42:51 UTC (rev 389)
+++ mplayerxp/osdep/aclib.c	2012-11-17 13:56:54 UTC (rev 390)
@@ -1,98 +0,0 @@
-#include <stdio.h>
-#include <pthread.h>
-#include "mp_config.h"
-#include "mplayerxp.h"
-#define MSGT_CLASS MSGT_GLOBAL
-#include "mp_msg.h"
-
-#if defined(USE_FASTMEMCPY)
-#include "fastmemcpy.h"
-#include "osdep/cpudetect.h"
-
-#define BLOCK_SIZE 4096
-#define CONFUSION_FACTOR 0
-
-#define PVECTOR_ACCEL_H "aclib_template.c"
-#include "pvector/pvector_inc.h"
-
-/*
-  aclib - advanced C library ;)
-  This file contains functions which improve and expand standard C-library
-  see aclib_template.c ... this file only contains runtime cpu detection and config options stuff
-  runtime cpu detection by michael niedermayer (mic...@gm...) is under GPL
-*/
-
-static any_t* init_fast_memcpy(any_t* to, const any_t* from, size_t len)
-{
-#ifdef __SSE2__
-	if(gCpuCaps.hasSSE2)
-	{
-		MSG_V("Using SSE2 optimized memcpy\n");
-		fast_memcpy_ptr = fast_memcpy_SSE2;
-	}
-	else
-#endif
-#ifndef __x86_64__
-#ifdef __SSE__
-	if(gCpuCaps.hasMMX2)
-	{
-		MSG_V("Using MMX2 optimized memcpy\n");
-		fast_memcpy_ptr = fast_memcpy_SSE;
-	}
-	else
-#endif
-//#ifdef __MMX__
-//	if(gCpuCaps.hasMMX)
-//	{
-//		MSG_V("Using MMX optimized memcpy\n");
-//		fast_memcpy_ptr = fast_memcpy_MMX;
-//	}
-//	else
-//#endif
-#endif
-	{
-		MSG_V("Using generic memcpy\n");
-		fast_memcpy_ptr = memcpy; /* prior to mmx we use the standart memcpy */
-	}
-	return (*fast_memcpy_ptr)(to,from,len);
-}
-
-static any_t* init_stream_copy(any_t* to, const any_t* from, size_t len)
-{
-#ifdef __SSE2__
-	if(gCpuCaps.hasSSE2)
-	{
-		MSG_V("Using SSE2 optimized agpcpy\n");
-		fast_stream_copy_ptr = fast_stream_copy_SSE2;
-	}
-#endif
-#ifndef __x86_64__
-#ifdef __SSE__
-	if(gCpuCaps.hasMMX2)
-	{
-		MSG_V("Using MMX2 optimized agpcpy\n");
-		fast_stream_copy_ptr = fast_stream_copy_SSE;
-	}
-	else
-#endif
-//#ifdef __MMX__
-//	if(gCpuCaps.hasMMX)
-//	{
-//		MSG_V("Using MMX optimized agpcpy\n");
-//		fast_stream_copy_ptr = fast_stream_copy_MMX;
-//	}
-//	else
-//#endif
-#endif
-	{
-		MSG_V("Using generic optimized agpcpy\n");
-		fast_stream_copy_ptr = memcpy; /* prior to mmx we use the standart memcpy */
-	}
-	return (*fast_stream_copy_ptr)(to,from,len);
-}
-
-any_t*(*fast_memcpy_ptr)(any_t* to, const any_t* from, size_t len) = init_fast_memcpy;
-any_t*(*fast_stream_copy_ptr)(any_t* to, const any_t* from, size_t len) = init_stream_copy;
-
-#endif /* use fastmemcpy */
-

Copied: mplayerxp/osdep/aclib.cpp (from rev 369, mplayerxp/osdep/aclib.c)
===================================================================
--- mplayerxp/osdep/aclib.cpp	                        (rev 0)
+++ mplayerxp/osdep/aclib.cpp	2012-11-17 13:56:54 UTC (rev 390)
@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <pthread.h>
+#include "mp_config.h"
+#include "mplayerxp.h"
+#define MSGT_CLASS MSGT_GLOBAL
+#include "mp_msg.h"
+
+#if defined(USE_FASTMEMCPY)
+#include "fastmemcpy.h"
+#include "osdep/cpudetect.h"
+
+#define BLOCK_SIZE 4096
+#define CONFUSION_FACTOR 0
+
+#define PVECTOR_ACCEL_H "aclib_template.h"
+#include "pvector/pvector_inc.h"
+
+/*
+  aclib - advanced C library ;)
+  This file contains functions which improve and expand standard C-library
+  see aclib_template.c ... this file only contains runtime cpu detection and config options stuff
+  runtime cpu detection by michael niedermayer (mic...@gm...) is under GPL
+*/
+
+static any_t* init_fast_memcpy(any_t* to, const any_t* from, size_t len)
+{
+#ifdef __SSE2__
+	if(gCpuCaps.hasSSE2)
+	{
+		MSG_V("Using SSE2 optimized memcpy\n");
+		fast_memcpy_ptr = fast_memcpy_SSE2;
+	}
+	else
+#endif
+#ifndef __x86_64__
+#ifdef __SSE__
+	if(gCpuCaps.hasMMX2)
+	{
+		MSG_V("Using MMX2 optimized memcpy\n");
+		fast_memcpy_ptr = fast_memcpy_SSE;
+	}
+	else
+#endif
+//#ifdef __MMX__
+//	if(gCpuCaps.hasMMX)
+//	{
+//		MSG_V("Using MMX optimized memcpy\n");
+//		fast_memcpy_ptr = fast_memcpy_MMX;
+//	}
+//	else
+//#endif
+#endif
+	{
+		MSG_V("Using generic memcpy\n");
+		fast_memcpy_ptr = memcpy; /* prior to mmx we use the standart memcpy */
+	}
+	return (*fast_memcpy_ptr)(to,from,len);
+}
+
+static any_t* init_stream_copy(any_t* to, const any_t* from, size_t len)
+{
+#ifdef __SSE2__
+	if(gCpuCaps.hasSSE2)
+	{
+		MSG_V("Using SSE2 optimized agpcpy\n");
+		fast_stream_copy_ptr = fast_stream_copy_SSE2;
+	}
+#endif
+#ifndef __x86_64__
+#ifdef __SSE__
+	if(gCpuCaps.hasMMX2)
+	{
+		MSG_V("Using MMX2 optimized agpcpy\n");
+		fast_stream_copy_ptr = fast_stream_copy_SSE;
+	}
+	else
+#endif
+//#ifdef __MMX__
+//	if(gCpuCaps.hasMMX)
+//	{
+//		MSG_V("Using MMX optimized agpcpy\n");
+//		fast_stream_copy_ptr = fast_stream_copy_MMX;
+//	}
+//	else
+//#endif
+#endif
+	{
+		MSG_V("Using generic optimized agpcpy\n");
+		fast_stream_copy_ptr = memcpy; /* prior to mmx we use the standart memcpy */
+	}
+	return (*fast_stream_copy_ptr)(to,from,len);
+}
+
+any_t*(*fast_memcpy_ptr)(any_t* to, const any_t* from, size_t len) = init_fast_memcpy;
+any_t*(*fast_stream_copy_ptr)(any_t* to, const any_t* from, size_t len) = init_stream_copy;
+
+#endif /* use fastmemcpy */
+

Deleted: mplayerxp/osdep/aclib_template.c
===================================================================
--- mplayerxp/osdep/aclib_template.c	2012-11-17 10:42:51 UTC (rev 389)
+++ mplayerxp/osdep/aclib_template.c	2012-11-17 13:56:54 UTC (rev 390)
@@ -1,179 +0,0 @@
-/*
-  aclib - advanced C library ;)
-  This file contains functions which improve and expand standard C-library
-*/
-#include "pvector/pvector.h"
-
-#ifdef HAVE_INT_PVECTOR
-/* for small memory blocks (<256 bytes) this version is faster */
-#undef small_memcpy
-#ifdef __x86_64__
-#define small_memcpy(to,from,n)\
-{\
-register unsigned long int siz;\
-register unsigned long int dummy;\
-    siz=n&0x7;  n>>=3;\
-    if(siz)\
-__asm__ __volatile__(\
-	"rep; movsb"\
-	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
-/* It's most portable way to notify compiler */\
-/* that edi, esi and ecx are clobbered in asm block. */\
-/* Thanks to A'rpi for hint!!! */\
-	:"0" (to), "1" (from),"2" (siz)\
-	: "memory","cc");\
-    if(n)\
-__asm__ __volatile__(\
-	"rep; movsq"\
-	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
-/* It's most portable way to notify compiler */\
-/* that edi, esi and ecx are clobbered in asm block. */\
-/* Thanks to A'rpi for hint!!! */\
-	:"0" (to), "1" (from),"2" (n)\
-	: "memory","cc");\
-}
-#else
-#define small_memcpy(to,from,n)\
-{\
-register unsigned long int dummy;\
-__asm__ __volatile__(\
-	"rep; movsb"\
-	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
-/* It's most portable way to notify compiler */\
-/* that edi, esi and ecx are clobbered in asm block. */\
-/* Thanks to A'rpi for hint!!! */\
-	:"0" (to), "1" (from),"2" (n)\
-	: "memory","cc");\
-}
-#endif
-
-#undef MIN_LEN
-#if defined( OPTIMIZE_MMX ) && !defined( OPTIMIZE_MMX2 )
-#define MIN_LEN 0x800  /* 2K blocks. Was found experimentally */
-#else
-#define MIN_LEN _ivec_size()*8
-#endif
-
-#undef FAST_MEMORY_COPY
-#define FAST_MEMORY_COPY(to,from, len)\
-{\
-    any_t*retval;\
-    const unsigned char *cfrom=from;\
-    unsigned char *tto=to;\
-    const unsigned block_size = _ivec_size()*8;\
-    __ivec iarr[8];\
-    size_t i;\
-    retval = to;\
-    if(!len) return retval;\
-\
-    _ivec_prefetch(&cfrom[0]);\
-    _ivec_prefetch(&cfrom[32]);\
-    _ivec_prefetch(&cfrom[64]);\
-    _ivec_prefetch(&cfrom[96]);\
-    _ivec_prefetch(&cfrom[128]);\
-    _ivec_prefetch(&cfrom[160]);\
-    _ivec_prefetch(&cfrom[192]);\
-    _ivec_prefetch(&cfrom[224]);\
-    _ivec_prefetch(&cfrom[256]);\
-\
-    if(len >= MIN_LEN)\
-    {\
-	register unsigned long int delta;\
-	/* Align destinition to cache-line size -boundary */\
-	delta = ((unsigned long int)tto)&(gCpuCaps.cl_size-1);\
-	if(delta) {\
-	    delta=gCpuCaps.cl_size-delta;\
-	    len -= delta;\
-	    small_memcpy(tto, cfrom, delta);\
-	}\
-	i = len/block_size;\
-	len&=(block_size-1);\
-	/*\
-	   This algorithm is top effective when the code consequently\
-	   reads and writes blocks which have size of cache line.\
-	   Size of cache line is processor-dependent.\
-	   It will, however, be a minimum of 32 bytes on any processors.\
-	   It would be better to have a number of instructions which\
-	   perform reading and writing to be multiple to a number of\
-	   processor's decoders, but it's not always possible.\
-	*/\
-	if(((unsigned long)cfrom) & 15)\
-	/* if SRC is misaligned */\
-	for(; i>0; i--)\
-	{\
-	    _ivec_prefetch(&cfrom[_ivec_size()*8]);\
-	    _ivec_prefetch(&cfrom[_ivec_size()*8+32]);\
-	    iarr[0] = _ivec_loadu(&cfrom[_ivec_size()*0]);\
-	    iarr[1] = _ivec_loadu(&cfrom[_ivec_size()*1]);\
-	    iarr[2] = _ivec_loadu(&cfrom[_ivec_size()*2]);\
-	    iarr[3] = _ivec_loadu(&cfrom[_ivec_size()*3]);\
-	    iarr[4] = _ivec_loadu(&cfrom[_ivec_size()*4]);\
-	    iarr[5] = _ivec_loadu(&cfrom[_ivec_size()*5]);\
-	    iarr[6] = _ivec_loadu(&cfrom[_ivec_size()*6]);\
-	    iarr[7] = _ivec_loadu(&cfrom[_ivec_size()*7]);\
-	    MEM_STORE(&tto[_ivec_size()*0],iarr[0]);\
-	    MEM_STORE(&tto[_ivec_size()*1],iarr[1]);\
-	    MEM_STORE(&tto[_ivec_size()*2],iarr[2]);\
-	    MEM_STORE(&tto[_ivec_size()*3],iarr[3]);\
-	    MEM_STORE(&tto[_ivec_size()*4],iarr[4]);\
-	    MEM_STORE(&tto[_ivec_size()*5],iarr[5]);\
-	    MEM_STORE(&tto[_ivec_size()*6],iarr[6]);\
-	    MEM_STORE(&tto[_ivec_size()*7],iarr[7]);\
-	    cfrom+=block_size;\
-	    tto+=block_size;\
-	}\
-	else\
-	/* if SRC is aligned */\
-	for(; i>0; i--)\
-	{\
-	    _ivec_prefetch(&cfrom[_ivec_size()*8]);\
-	    _ivec_prefetch(&cfrom[_ivec_size()*8+32]);\
-	    iarr[0] = _ivec_loada(&cfrom[_ivec_size()*0]);\
-	    iarr[1] = _ivec_loada(&cfrom[_ivec_size()*1]);\
-	    iarr[2] = _ivec_loada(&cfrom[_ivec_size()*2]);\
-	    iarr[3] = _ivec_loada(&cfrom[_ivec_size()*3]);\
-	    iarr[4] = _ivec_loada(&cfrom[_ivec_size()*4]);\
-	    iarr[5] = _ivec_loada(&cfrom[_ivec_size()*5]);\
-	    iarr[6] = _ivec_loada(&cfrom[_ivec_size()*6]);\
-	    iarr[7] = _ivec_loada(&cfrom[_ivec_size()*7]);\
-	    MEM_STORE(&tto[_ivec_size()*0],iarr[0]);\
-	    MEM_STORE(&tto[_ivec_size()*1],iarr[1]);\
-	    MEM_STORE(&tto[_ivec_size()*2],iarr[2]);\
-	    MEM_STORE(&tto[_ivec_size()*3],iarr[3]);\
-	    MEM_STORE(&tto[_ivec_size()*4],iarr[4]);\
-	    MEM_STORE(&tto[_ivec_size()*5],iarr[5]);\
-	    MEM_STORE(&tto[_ivec_size()*6],iarr[6]);\
-	    MEM_STORE(&tto[_ivec_size()*7],iarr[7]);\
-	    cfrom+=block_size;\
-	    tto+=block_size;\
-	}\
-	MEM_SFENCE\
-	_ivec_empty();\
-    }\
-    /*\
-     *	Now do the tail of the block\
-     */\
-    if(len) small_memcpy(tto, cfrom, len);\
-    return retval;\
-}
-
-#undef MEM_STORE
-#undef MEM_SFENCE
-#define MEM_STORE  _ivec_stream
-#define MEM_SFENCE _ivec_sfence();
-static inline any_t* PVECTOR_RENAME(fast_stream_copy)(any_t* to, const any_t* from, size_t len)
-{
-    MSG_DBG3("fast_stream_copy(%p, %p, %u) [cl_size=%u]\n",to,from,len,gCpuCaps.cl_size);
-    FAST_MEMORY_COPY(to,from,len);
-}
-
-#undef MEM_STORE
-#undef MEM_SFENCE
-#define MEM_STORE _ivec_storea
-#define MEM_SFENCE
-static inline any_t* PVECTOR_RENAME(fast_memcpy)(any_t* to, const any_t* from, size_t len)
-{
-    MSG_DBG3("fast_memcpy(%p, %p, %u) [cl_size=%u]\n",to,from,len,gCpuCaps.cl_size);
-    FAST_MEMORY_COPY(to,from,len);
-}
-#endif

Copied: mplayerxp/osdep/aclib_template.h (from rev 369, mplayerxp/osdep/aclib_template.c)
===================================================================
--- mplayerxp/osdep/aclib_template.h	                        (rev 0)
+++ mplayerxp/osdep/aclib_template.h	2012-11-17 13:56:54 UTC (rev 390)
@@ -0,0 +1,179 @@
+/*
+  aclib - advanced C library ;)
+  This file contains functions which improve and expand standard C-library
+*/
+#include "pvector/pvector.h"
+
+#ifdef HAVE_INT_PVECTOR
+/* for small memory blocks (<256 bytes) this version is faster */
+#undef small_memcpy
+#ifdef __x86_64__
+#define small_memcpy(to,from,n)\
+{\
+register unsigned long int siz;\
+register unsigned long int dummy;\
+    siz=n&0x7;  n>>=3;\
+    if(siz)\
+__asm__ __volatile__(\
+	"rep; movsb"\
+	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
+/* It's most portable way to notify compiler */\
+/* that edi, esi and ecx are clobbered in asm block. */\
+/* Thanks to A'rpi for hint!!! */\
+	:"0" (to), "1" (from),"2" (siz)\
+	: "memory","cc");\
+    if(n)\
+__asm__ __volatile__(\
+	"rep; movsq"\
+	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
+/* It's most portable way to notify compiler */\
+/* that edi, esi and ecx are clobbered in asm block. */\
+/* Thanks to A'rpi for hint!!! */\
+	:"0" (to), "1" (from),"2" (n)\
+	: "memory","cc");\
+}
+#else
+#define small_memcpy(to,from,n)\
+{\
+register unsigned long int dummy;\
+__asm__ __volatile__(\
+	"rep; movsb"\
+	:"=&D"(to), "=&S"(from), "=&c"(dummy)\
+/* It's most portable way to notify compiler */\
+/* that edi, esi and ecx are clobbered in asm block. */\
+/* Thanks to A'rpi for hint!!! */\
+	:"0" (to), "1" (from),"2" (n)\
+	: "memory","cc");\
+}
+#endif
+
+#undef MIN_LEN
+#if defined( OPTIMIZE_MMX ) && !defined( OPTIMIZE_MMX2 )
+#define MIN_LEN 0x800  /* 2K blocks. Was found experimentally */
+#else
+#define MIN_LEN _ivec_size()*8
+#endif
+
+#undef FAST_MEMORY_COPY
+#define FAST_MEMORY_COPY(to,from, len)\
+{\
+    any_t*retval;\
+    const unsigned char *cfrom=(const unsigned char *)from;\
+    unsigned char *tto=(unsigned char *)to;\
+    const unsigned block_size = _ivec_size()*8;\
+    __ivec iarr[8];\
+    size_t i;\
+    retval = to;\
+    if(!len) return retval;\
+\
+    _ivec_prefetch(&cfrom[0]);\
+    _ivec_prefetch(&cfrom[32]);\
+    _ivec_prefetch(&cfrom[64]);\
+    _ivec_prefetch(&cfrom[96]);\
+    _ivec_prefetch(&cfrom[128]);\
+    _ivec_prefetch(&cfrom[160]);\
+    _ivec_prefetch(&cfrom[192]);\
+    _ivec_prefetch(&cfrom[224]);\
+    _ivec_prefetch(&cfrom[256]);\
+\
+    if(len >= MIN_LEN)\
+    {\
+	register unsigned long int delta;\
+	/* Align destinition to cache-line size -boundary */\
+	delta = ((unsigned long int)tto)&(gCpuCaps.cl_size-1);\
+	if(delta) {\
+	    delta=gCpuCaps.cl_size-delta;\
+	    len -= delta;\
+	    small_memcpy(tto, cfrom, delta);\
+	}\
+	i = len/block_size;\
+	len&=(block_size-1);\
+	/*\
+	   This algorithm is top effective when the code consequently\
+	   reads and writes blocks which have size of cache line.\
+	   Size of cache line is processor-dependent.\
+	   It will, however, be a minimum of 32 bytes on any processors.\
+	   It would be better to have a number of instructions which\
+	   perform reading and writing to be multiple to a number of\
+	   processor's decoders, but it's not always possible.\
+	*/\
+	if(((unsigned long)cfrom) & 15)\
+	/* if SRC is misaligned */\
+	for(; i>0; i--)\
+	{\
+	    _ivec_prefetch(&cfrom[_ivec_size()*8]);\
+	    _ivec_prefetch(&cfrom[_ivec_size()*8+32]);\
+	    iarr[0] = _ivec_loadu(&cfrom[_ivec_size()*0]);\
+	    iarr[1] = _ivec_loadu(&cfrom[_ivec_size()*1]);\
+	    iarr[2] = _ivec_loadu(&cfrom[_ivec_size()*2]);\
+	    iarr[3] = _ivec_loadu(&cfrom[_ivec_size()*3]);\
+	    iarr[4] = _ivec_loadu(&cfrom[_ivec_size()*4]);\
+	    iarr[5] = _ivec_loadu(&cfrom[_ivec_size()*5]);\
+	    iarr[6] = _ivec_loadu(&cfrom[_ivec_size()*6]);\
+	    iarr[7] = _ivec_loadu(&cfrom[_ivec_size()*7]);\
+	    MEM_STORE(&tto[_ivec_size()*0],iarr[0]);\
+	    MEM_STORE(&tto[_ivec_size()*1],iarr[1]);\
+	    MEM_STORE(&tto[_ivec_size()*2],iarr[2]);\
+	    MEM_STORE(&tto[_ivec_size()*3],iarr[3]);\
+	    MEM_STORE(&tto[_ivec_size()*4],iarr[4]);\
+	    MEM_STORE(&tto[_ivec_size()*5],iarr[5]);\
+	    MEM_STORE(&tto[_ivec_size()*6],iarr[6]);\
+	    MEM_STORE(&tto[_ivec_size()*7],iarr[7]);\
+	    cfrom+=block_size;\
+	    tto+=block_size;\
+	}\
+	else\
+	/* if SRC is aligned */\
+	for(; i>0; i--)\
+	{\
+	    _ivec_prefetch(&cfrom[_ivec_size()*8]);\
+	    _ivec_prefetch(&cfrom[_ivec_size()*8+32]);\
+	    iarr[0] = _ivec_loada(&cfrom[_ivec_size()*0]);\
+	    iarr[1] = _ivec_loada(&cfrom[_ivec_size()*1]);\
+	    iarr[2] = _ivec_loada(&cfrom[_ivec_size()*2]);\
+	    iarr[3] = _ivec_loada(&cfrom[_ivec_size()*3]);\
+	    iarr[4] = _ivec_loada(&cfrom[_ivec_size()*4]);\
+	    iarr[5] = _ivec_loada(&cfrom[_ivec_size()*5]);\
+	    iarr[6] = _ivec_loada(&cfrom[_ivec_size()*6]);\
+	    iarr[7] = _ivec_loada(&cfrom[_ivec_size()*7]);\
+	    MEM_STORE(&tto[_ivec_size()*0],iarr[0]);\
+	    MEM_STORE(&tto[_ivec_size()*1],iarr[1]);\
+	    MEM_STORE(&tto[_ivec_size()*2],iarr[2]);\
+	    MEM_STORE(&tto[_ivec_size()*3],iarr[3]);\
+	    MEM_STORE(&tto[_ivec_size()*4],iarr[4]);\
+	    MEM_STORE(&tto[_ivec_size()*5],iarr[5]);\
+	    MEM_STORE(&tto[_ivec_size()*6],iarr[6]);\
+	    MEM_STORE(&tto[_ivec_size()*7],iarr[7]);\
+	    cfrom+=block_size;\
+	    tto+=block_size;\
+	}\
+	MEM_SFENCE\
+	_ivec_empty();\
+    }\
+    /*\
+     *	Now do the tail of the block\
+     */\
+    if(len) small_memcpy(tto, cfrom, len);\
+    return retval;\
+}
+
+#undef MEM_STORE
+#undef MEM_SFENCE
+#define MEM_STORE  _ivec_stream
+#define MEM_SFENCE _ivec_sfence();
+static inline any_t* PVECTOR_RENAME(fast_stream_copy)(any_t* to, const any_t* from, size_t len)
+{
+    MSG_DBG3("fast_stream_copy(%p, %p, %u) [cl_size=%u]\n",to,from,len,gCpuCaps.cl_size);
+    FAST_MEMORY_COPY(to,from,len);
+}
+
+#undef MEM_STORE
+#undef MEM_SFENCE
+#define MEM_STORE _ivec_storea
+#define MEM_SFENCE
+static inline any_t* PVECTOR_RENAME(fast_memcpy)(any_t* to, const any_t* from, size_t len)
+{
+    MSG_DBG3("fast_memcpy(%p, %p, %u) [cl_size=%u]\n",to,from,len,gCpuCaps.cl_size);
+    FAST_MEMORY_COPY(to,from,len);
+}
+#endif

Modified: mplayerxp/osdep/fastmemcpy.h
===================================================================
--- mplayerxp/osdep/fastmemcpy.h	2012-11-17 10:42:51 UTC (rev 389)
+++ mplayerxp/osdep/fastmemcpy.h	2012-11-17 13:56:54 UTC (rev 390)
@@ -25,8 +25,8 @@
 		for(i=0; i<height; i++)
 		{
 			stream_copy(dst, src, bytesPerLine);
-			src+= srcStride;
-			dst+= dstStride;
+			src=(char *)src+ srcStride;
+			dst=(char *)dst+ dstStride;
 		}
 	}
 
@@ -44,8 +44,8 @@
 		for(i=0; i<height; i++)
 		{
 			memcpy(dst, src, bytesPerLine);
-			src+= srcStride;
-			dst+= dstStride;
+			src=(char *)src+ srcStride;
+			dst=(char *)dst+ dstStride;
 		}
 	}
 

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.