From: Adrian M. <ad...@ne...> - 2007-09-22 13:17:19
|
The code for PVR2 DMA in the kernel is currently completely broken at both hardware handling and algorithmic levels and more than likely to cash a fatal crash if used. This patch fixes the code and it seems to work well. I know it is not up to the standards required to go to mainline yet, but any commentary on it would be helpful. Signed-off by Adrian McMenamin <ad...@mc...> --- a/arch/sh/drivers/dma/dma-pvr2.c +++ b/arch/sh/drivers/dma/dma-pvr2.c @@ -18,23 +18,14 @@ #include <asm/dma.h> #include <asm/io.h> -static unsigned int xfer_complete; -static int count; - static irqreturn_t pvr2_dma_interrupt(int irq, void *dev_id) { - if (get_dma_residue(PVR2_CASCADE_CHAN)) { - printk(KERN_WARNING "DMA: SH DMAC did not complete transfer " - "on channel %d, waiting..\n", PVR2_CASCADE_CHAN); - dma_wait_for_completion(PVR2_CASCADE_CHAN); + struct dma_channel *channel; + if (likely(!get_dma_residue(PVR2_CASCADE_CHAN))) { + channel = get_dma_channel(PVR2_CASCADE_CHAN); + wake_up(&channel->wait_queue); } - if (count++ < 10) - pr_debug("Got a pvr2 dma interrupt for channel %d\n", - irq - HW_EVENT_PVR2_DMA); - - xfer_complete = 1; - return IRQ_HANDLED; } @@ -42,15 +33,12 @@ static int pvr2_request_dma(struct dma_channel *chan) { if (ctrl_inl(PVR2_DMA_MODE) != 0) return -EBUSY; - - ctrl_outl(0, PVR2_DMA_LMMODE0); - return 0; } static int pvr2_get_dma_residue(struct dma_channel *chan) { - return xfer_complete == 0; + return ctrl_inl(PVR2_DMA_MODE); } static int pvr2_xfer_dma(struct dma_channel *chan) @@ -58,11 +46,9 @@ static int pvr2_xfer_dma(struct dma_channel *chan) if (chan->sar || !chan->dar) return -EINVAL; - xfer_complete = 0; - ctrl_outl(chan->dar, PVR2_DMA_ADDR); ctrl_outl(chan->count, PVR2_DMA_COUNT); - ctrl_outl(chan->mode & DMA_MODE_MASK, PVR2_DMA_MODE); + ctrl_outl(1, PVR2_DMA_MODE); return 0; } --- a/drivers/video/pvr2fb.c +++ b/drivers/video/pvr2fb.c @@ -677,11 +677,14 @@ static int pvr2_init_cable(void) static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, size_t count, loff_t *ppos) { - unsigned long dst, start, end, len; + unsigned long dst, start, end, len, phys_dst; unsigned int nr_pages; struct page **pages; int ret, i; + /* Bounds check */ + if (*ppos > 0x2FFFFF) return -ENOSPC; + nr_pages = (count + PAGE_SIZE - 1) >> PAGE_SHIFT; pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); @@ -698,39 +701,57 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, ret = -EINVAL; goto out_unmap; } - - dma_configure_channel(shdma, 0x12c1); + dst = (unsigned long)fb_info->screen_base + *ppos; start = (unsigned long)page_address(pages[0]); - end = (unsigned long)page_address(pages[nr_pages]); + end = (unsigned long)page_address(pages[nr_pages - 1]); len = nr_pages << PAGE_SHIFT; + if ((*ppos + len) > 0x2FFFFF) + len = 0x2FFFFF - *ppos; + + /* transfer is via Tile Accelerator */ + phys_dst = PHYSADDR(*ppos + 0x11000000); + if (phys_dst >= 0x11000000 && phys_dst <= 0x11FFFFE0){ + ctrl_outl(0, PVR2_DMA_LMMODE0); + ctrl_outl(1, PVR2_DMA_LMMODE1); + } + else if (phys_dst >= 0x13000000 && phys_dst <= 0x13FFFFE0) { + ctrl_outl(1, PVR2_DMA_LMMODE0); + ctrl_outl(0, PVR2_DMA_LMMODE1); + } + else { + ctrl_outl(1, PVR2_DMA_LMMODE0); + ctrl_outl(1, PVR2_DMA_LMMODE1); + } - /* Half-assed contig check */ - if (start + len == end) { - /* As we do this in one shot, it's either all or nothing.. */ - if ((*ppos + len) > fb_info->fix.smem_len) { - ret = -ENOSPC; - goto out_unmap; - } - - dma_write(shdma, start, 0, len); - dma_write(pvr2dma, 0, dst, len); - dma_wait_for_completion(pvr2dma); + if (start + len == end + (1 << PAGE_SHIFT)) { + ctrl_outl(0, PVR2_DMA_MODE); /* DMA off for now */ + ctrl_outl(0x12c0, 0xFFA0002C); /* set transfer flags to off */ + ctrl_outl(start, 0xFFA00020); /* set channel 2 start address */ + ctrl_outl(len/32, 0xFFA00028); /* chan2 dma is in 32 byte lumps */ + ctrl_outl(0x12c1, 0xFFA0002C); /* set flags to be ready to go */ + ctrl_outl(phys_dst, PVR2_DMA_ADDR); /* transfer is via the TA */ + ctrl_outl(len, PVR2_DMA_COUNT); + ctrl_outl(1, PVR2_DMA_MODE); /* and go... */ + dma_wait_for_completion(shdma); /* sleep till we are done */ goto out; } /* Not contiguous, writeout per-page instead.. */ for (i = 0; i < nr_pages; i++, dst += PAGE_SIZE) { - if ((*ppos + (i << PAGE_SHIFT)) > fb_info->fix.smem_len) { - ret = -ENOSPC; - goto out_unmap; - } - - dma_write_page(shdma, (unsigned long)page_address(pages[i]), 0); - dma_write_page(pvr2dma, 0, dst); - dma_wait_for_completion(pvr2dma); + if ((*ppos + (1 << PAGE_SHIFT)) > 0x2FFFFF) + break; + ctrl_outl(0, PVR2_DMA_MODE); + ctrl_outl(0x12c0, 0xFFA0002C); + ctrl_outl((unsigned long)page_address(pages[i]), 0xFFA00020); + ctrl_outl(len/32, 0xFFA00028); + ctrl_outl(0x12c1, 0xFFA0002C); + ctrl_outl(phys_dst, PVR2_DMA_ADDR); + ctrl_outl(1 << PAGE_SHIFT, PVR2_DMA_COUNT); + ctrl_outl(1, PVR2_DMA_MODE); + dma_wait_for_completion(shdma); } out: @@ -890,7 +911,7 @@ static int __init pvr2fb_dc_init(void) pvr2_fix.mmio_start = 0xa05f8000; /* registers start here */ pvr2_fix.mmio_len = 0x2000; - if (request_irq(HW_EVENT_VSYNC, pvr2fb_interrupt, 0, + if (request_irq(HW_EVENT_VSYNC, pvr2fb_interrupt, IRQF_SHARED, "pvr2 VBL handler", fb_info)) { return -EBUSY; } |
From: Adrian M. <ad...@ne...> - 2007-09-22 21:32:31
|
On Sat, 2007-09-22 at 14:16 +0100, Adrian McMenamin wrote: > The code for PVR2 DMA in the kernel is currently completely broken at > both hardware handling and algorithmic levels and more than likely to > cash a fatal crash if used. > > This patch fixes the code and it seems to work well. I know it is not up > to the standards required to go to mainline yet, but any commentary on > it would be helpful. What I now realise is that this is not transferring a bit map, but a series of instructions to the tile accelerator. You can make it work like a bitmap, but that will also require more work... |
From: Paul M. <le...@li...> - 2007-09-23 07:21:44
|
On Sat, Sep 22, 2007 at 02:16:30PM +0100, Adrian McMenamin wrote: > The code for PVR2 DMA in the kernel is currently completely broken at > both hardware handling and algorithmic levels and more than likely to > cash a fatal crash if used. > "Completely" is a bit of an overstatement. I had writes working with it without using the TA and had the PVR2 DMAC completion IRQ firing in the ->write() path without any difficulty. The main issue is that it was very sensitive, and would immediately stop working if something on the SH DMAC cascade side was misconfigured. > This patch fixes the code and it seems to work well. I know it is not up > to the standards required to go to mainline yet, but any commentary on > it would be helpful. > It's certainly a good start, there's obviously more information to go on these days then there was back when this was initially written, and it's good to see that someone is actually carrying it forward :-) I suppose the main thing to note is that the ->write() path is really a useless place to do DMA. Most applications mmap() the framebuffer and dirty pages all over the place, which this simply won't catch. The idea behind the ->write() path was just to have something we could trivially test via things like cat /dev/urandom > /dev/fb0 and so on. Ideally this should be plugged in via the fb deferred I/O, now that we can build scatterlists of dirtied pages in the mmap() path and submit those back in one shot. hecubafb currently uses this, but not for DMA. The DMA work should be submitted by the ->deferred_io() callback, which you can probably just rip out of the write path for better utilization. This is really the way we want to go for fb DMA in general. |
From: Adrian M. <ad...@ne...> - 2007-09-23 10:21:39
|
On Sun, 2007-09-23 at 16:21 +0900, Paul Mundt wrote: > On Sat, Sep 22, 2007 at 02:16:30PM +0100, Adrian McMenamin wrote: > > The code for PVR2 DMA in the kernel is currently completely broken at > > both hardware handling and algorithmic levels and more than likely to > > cash a fatal crash if used. > > > "Completely" is a bit of an overstatement. I had writes working with it > without using the TA and had the PVR2 DMAC completion IRQ firing in the > ->write() path without any difficulty. The main issue is that it was very > sensitive, and would immediately stop working if something on the SH DMAC > cascade side was misconfigured. I'm at a loss to see how it worked at all! It was true that the first transfer would work if it was less than a page long, but no susbsequent transfer would and the contiguity check ought to have blown up every time because of an out-by-one fault in checking the number of pages. Also the code slept in the interrupt context but I know you knew that because it was a debugging message :) > > > This patch fixes the code and it seems to work well. I know it is not up > > to the standards required to go to mainline yet, but any commentary on > > it would be helpful. > > > It's certainly a good start, there's obviously more information to go on > these days then there was back when this was initially written, and it's > good to see that someone is actually carrying it forward :-) > > I suppose the main thing to note is that the ->write() path is really a > useless place to do DMA. Most applications mmap() the framebuffer and > dirty pages all over the place, which this simply won't catch. The idea > behind the ->write() path was just to have something we could trivially > test via things like cat /dev/urandom > /dev/fb0 and so on. > > Ideally this should be plugged in via the fb deferred I/O, now that we > can build scatterlists of dirtied pages in the mmap() path and submit > those back in one shot. hecubafb currently uses this, but not for DMA. > The DMA work should be submitted by the ->deferred_io() callback, which > you can probably just rip out of the write path for better utilization. > This is really the way we want to go for fb DMA in general. I have got a patch now that handles all this using the SH DMA API but... What I now understand (or think I do) is that "texture" (which is what you are really transferring) isn't the same as a bitmap and you have to give the TA commands to handle how it is rendered. Not really versed in the ways of 3D graphics so this is slow going. I am away for a few days now but if anybody has a burning desire to mess with the code they can look here: http://newgolddream.dyndns.info/cgi-bin/cvsweb/aica/pvr2/ |