Re: [Linux-fbdev-devel] Re: [PATCH]: cfb_imageblit() fix: handle widths not divisible by 8

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

On Wed, 2003-01-15 at 17:28, Geert Uytterhoeven wrote:
> On 15 Jan 2003, Antonino Daplas wrote:
> > On Wed, 2003-01-15 at 08:26, James Simmons wrote:
> > > Applied.
> > > 
> > > > c. Fix for fast_imageblit() so it always refer to mask tables in 32-bits
> > > > which should make it work for 64-bit machines.
> > > 
> > > Ug. I rather try yo take advantge of using the full 64 bits of data to 
> > > pass across the bus. What I was think is treat the 64 bit case as two 32 
> > > bit cases. The 64 bit data comes in and we run the data twice at tabs[].
> > > 
> > Hi James,
> > 
> > Yes, I was trying to find a way to make fast_imageblit() be fast for all
> > machine architectures.  With the patch attached, there's
> > fast_imageblit32() and fast_imageblit64().  fast_imageblit32() is
> > probably slower than fast_imageblit64 on 64-bit machines and, on the
> > other hand, fast_imageblit64() is 20% slower on 32-bit machines, but is
> > probably faster on 64-bit and higher machines.  So, the only way I can
> > think of doing this on all machine architectures is to have them go
> > separate paths.
> 
> Can't you merge fast_imageblit32() and fast_imageblit64() a bit more (with some
> #ifdef's), and just call the result fast_imageblit()? Then the definition of
> FAST_IMAGEBLIT can go away.
> 
> u32 is the same as unsigned long if BITS_PER_LONG == 32.
> 
That's true.  I don't want to do the merge before you people have seen
it.  Anyway, here's an updated one.

Tony

diff -Naur linux-2.5.56-fbdev/drivers/video/cfbimgblt.c linux/drivers/video/cfbimgblt.c

--- linux-2.5.56-fbdev/drivers/video/cfbimgblt.c	2003-01-15 01:56:47.000000000 +0000
+++ linux/drivers/video/cfbimgblt.c	2003-01-15 11:43:53.000000000 +0000
@@ -73,14 +73,6 @@
 	0x00000000, 0xffffffff
 };
 
-#if BITS_PER_LONG == 32
-#define FB_WRITEL fb_writel
-#define FB_READL  fb_readl
-#else
-#define FB_WRITEL fb_writeq
-#define FB_READL  fb_readq
-#endif 
-
 #if defined (__BIG_ENDIAN)
 #define LEFT_POS(bpp)          (BITS_PER_LONG - bpp)
 #define LEFT_POS32(bpp)        (32 - bpp)
@@ -95,6 +87,28 @@
 #define SHIFT_LOW(val, bits)   ((val) >> (bits))
 #endif
 
+#if BITS_PER_LONG == 32
+#define FB_WRITEL        fb_writel
+#define FB_READL         fb_readl
+#define DECLARE_FASTPATH {}
+#define INIT_FASTPATH    {}
+#define FASTPATH         fb_writel((end_mask & eorx)^bgx, dst++)
+#else
+#define FB_WRITEL        fb_writeq
+#define FB_READL         fb_readq
+#define DECLARE_FASTPATH unsigned long val, bpl
+#define INIT_FASTPATH    { val = 0; bpl = 0; }
+#define FASTPATH {                                     \
+	val |= SHIFT_HIGH((end_mask & eorx)^bgx, bpl); \
+	bpl += 32;                                     \
+	bpl &= BITS_PER_LONG - 1;                      \
+	if (!bpl) {                                    \
+		FB_WRITEL(val, dst++);                 \
+		val = 0;                               \
+	}                                              \
+}                                                      
+#endif 
+
 static inline void color_imageblit(struct fb_image *image, struct fb_info *p, 
 				   u8 *dst1, unsigned long start_index, 
 				   unsigned long pitch_index)
@@ -242,10 +256,11 @@
 	u32 bit_mask, end_mask, eorx, shift; 
 	u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
 	u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
-	u32 *dst;
 	u32 *tab = NULL;
+	unsigned long *dst;
 	char *s = image->data, *src;
-		
+	DECLARE_FASTPATH;
+
 	switch (bpp) {
 	case 8:
 		tab = cfb_tab8;
@@ -270,18 +285,19 @@
 	k = image->width/ppw;
 
 	for (i = image->height; i--; ) {
-		dst = (u32 *) dst1; shift = 8; src = s;
+		dst = (unsigned long *) dst1; shift = 8; src = s;
+		INIT_FASTPATH;
 		for (j = k; j--; ) {
 			shift -= ppw;
 			end_mask = tab[(*src >> shift) & bit_mask]; 
-			fb_writel((end_mask & eorx)^bgx, dst++);
+			FASTPATH;
 			if (!shift) { shift = 8; src++; }
 		}
 		dst1 += p->fix.line_length;
 		s += spitch;
 	}
 }	
-	
+
 void cfb_imageblit(struct fb_info *p, struct fb_image *image)
 {
 	int x2, y2, vxres, vyres;
@@ -331,7 +347,7 @@
 		
 		if (BITS_PER_LONG % bpp == 0 && !start_index && 
 		    !pitch_index && bpp >= 8 && bpp <= 32 && 
-		    (image->width & (32/bpp-1)) == 0) 
+		    (image->width & (BITS_PER_LONG/bpp-1)) == 0) 
 			fast_imageblit(image, p, dst1, fgcolor, bgcolor);
 		else 
 			slow_imageblit(image, p, dst1, fgcolor, bgcolor,