From: David W. <dw...@in...> - 2001-08-14 14:26:45
|
If we really care about it being faster to flush the whole cache instead of looping over a large virtual address range, then the arch-specific cache code should make that call, depending on the size of the cache actually present on the current CPU, etc. It doesn't belong in the generic pci_alloc_consistent(), imo. OK? Index: ChangeLog =================================================================== RCS file: /cvsroot/linuxsh/kernel/ChangeLog,v retrieving revision 1.327 diff -u -r1.327 ChangeLog --- ChangeLog 2001/08/11 10:18:29 1.327 +++ ChangeLog 2001/08/14 14:23:25 @@ -1,3 +1,11 @@ +2001-08-14 David Woodhouse <dw...@in...> + + * arch/sh/kernel/pci-dma.c: Use dma_cache_wback_inv() in + pci_alloc_consistent instead of flush_cache_all(), which is + a nop on SH3. If the size is so large that it's quicker just + to flush the whole cache, that's a decision for the + arch-specific cache management code to make. + 2001-08-11 David Woodhouse <dw...@in...> * arch/sh/config.in: New CONFIG_SH_PCIDMA_NONCOHERENT option Index: arch/sh/kernel/pci-dma.c =================================================================== RCS file: /cvsroot/linuxsh/kernel/arch/sh/kernel/pci-dma.c,v retrieving revision 1.1 diff -u -r1.1 pci-dma.c --- arch/sh/kernel/pci-dma.c 2001/05/09 07:40:23 1.1 +++ arch/sh/kernel/pci-dma.c 2001/08/14 14:23:25 @@ -29,7 +29,7 @@ *dma_handle = virt_to_bus(ret); } /* We must flush the cache before we pass it on to the device */ - flush_cache_all(); + dma_cache_wback_inv(ret, size); return P2SEGADDR(ret); } -- dwmw2 |
From: David M. <Dav...@st...> - 2001-08-14 14:39:27
|
On Aug 14, 3:26pm, dw...@in... wrote: > Subject: [linuxsh-dev] Small fix for PCI DMA code. > > If we really care about it being faster to flush the whole cache instead of > looping over a large virtual address range, then the arch-specific cache > code should make that call, depending on the size of the cache actually > present on the current CPU, etc. It doesn't belong in the generic > pci_alloc_consistent(), imo. > > OK? I agree. Calling flush_cache_all() was simply done for expediency, I just never had the time to do it properly. I think it should be faster doing it this way, as you don't need to flush the icache. Yet another thing gone from my TODO list! Cheers! -- Dave McKay Software Engineer STMicroelectronics Email: dav...@st... |
From: David W. <dw...@in...> - 2001-08-14 14:49:19
|
Dav...@st... said: > I agree. Calling flush_cache_all() was simply done for expediency, I > just never had the time to do it properly. I think it should be faster > doing it this way, as you don't need to flush the icache. OK, committed. Next I need to clean up some broken drivers... Index: ChangeLog =================================================================== RCS file: /cvsroot/linuxsh/kernel/ChangeLog,v retrieving revision 1.328 diff -u -r1.328 ChangeLog --- ChangeLog 2001/08/14 14:44:58 1.328 +++ ChangeLog 2001/08/14 14:48:09 @@ -1,5 +1,8 @@ 2001-08-14 David Woodhouse <dw...@in...> + * drivers/net/via-rhine.c: Make some attempt to actually + deal with cache coherency issues rather than just inserting + a couple of wmb() calls. * arch/sh/kernel/pci-dma.c: Use dma_cache_wback_inv() in pci_alloc_consistent instead of flush_cache_all(), which is a nop on SH3. If the size is so large that it's quicker just Index: drivers/net/via-rhine.c =================================================================== RCS file: /cvsroot/linuxsh/kernel/drivers/net/via-rhine.c,v retrieving revision 1.9 diff -u -r1.9 via-rhine.c --- drivers/net/via-rhine.c 2001/07/18 02:54:27 1.9 +++ drivers/net/via-rhine.c 2001/08/14 14:48:09 @@ -69,6 +69,9 @@ - Manfred Spraul: use "singlecopy" for unaligned buffers don't allocate bounce buffers for !ReqTxAlign cards + LK1.1.11: + - David Woodhouse: PCI DMA cache coherency fixes. + */ @@ -151,7 +154,7 @@ /* These identify the driver base version and may not be removed. */ static char version[] __devinitdata = -KERN_INFO "via-rhine.c:v1.10-LK1.1.10 07/12/2001 Written by Donald Becker\n" +KERN_INFO "via-rhine.c:v1.10-LK1.1.11 14/08/2001 Written by Donald Becker\n" KERN_INFO " http://www.scyld.com/network/via-rhine.html\n"; static char shortname[] __devinitdata = "via-rhine"; @@ -758,8 +761,9 @@ TX_RING_SIZE * sizeof(struct tx_desc), np->rx_ring, np->rx_ring_dma); - pci_free_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE, - np->tx_bufs, np->tx_bufs_dma); + if(np->tx_bufs) + pci_free_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE, + np->tx_bufs, np->tx_bufs_dma); } @@ -801,6 +805,8 @@ np->rx_ring[i].addr = cpu_to_le32(np->rx_skbuff_dma[i]); np->rx_ring[i].rx_status = cpu_to_le32(DescOwn); } + pci_dma_sync_single(np->pdev, np->rx_ring_dma, + RX_RING_SIZE * sizeof(struct rx_desc), PCI_DMA_TODEVICE); np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); } @@ -821,6 +827,8 @@ } np->rx_skbuff[i] = 0; } + pci_dma_sync_single(np->pdev, np->rx_ring_dma, + RX_RING_SIZE * sizeof(struct rx_desc), PCI_DMA_TODEVICE); } static void alloc_tbufs(struct net_device* dev) @@ -841,6 +849,9 @@ } np->tx_ring[i-1].next_desc = cpu_to_le32(np->tx_ring_dma); + pci_dma_sync_single(np->pdev, np->tx_ring_dma, + TX_RING_SIZE * sizeof(struct tx_desc), PCI_DMA_TODEVICE); + } static void free_tbufs(struct net_device* dev) @@ -863,6 +874,8 @@ np->tx_skbuff[i] = 0; np->tx_buf[i] = 0; } + pci_dma_sync_single(np->pdev, np->tx_ring_dma, + TX_RING_SIZE * sizeof(struct tx_desc), PCI_DMA_TODEVICE); } static void init_registers(struct net_device *dev) @@ -1124,6 +1137,8 @@ return 0; } skb_copy_and_csum_dev(skb, np->tx_buf[entry]); + pci_dma_sync_single(np->pdev, np->tx_bufs_dma + entry*PKT_BUF_SZ, + PKT_BUF_SZ, PCI_DMA_TODEVICE); np->tx_skbuff_dma[entry] = 0; np->tx_ring[entry].addr = cpu_to_le32(np->tx_bufs_dma + (np->tx_buf[entry] - np->tx_bufs)); @@ -1136,16 +1151,17 @@ np->tx_ring[entry].desc_length = cpu_to_le32(0x00E08000 | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN)); + pci_dma_sync_single(np->pdev, np->tx_ring_dma * entry * sizeof(struct tx_desc), sizeof(struct tx_desc), PCI_DMA_TODEVICE); + /* lock eth irq */ spin_lock_irq (&np->lock); - wmb(); + np->tx_ring[entry].tx_status = cpu_to_le32(DescOwn); - wmb(); + pci_dma_sync_single(np->pdev, np->tx_ring_dma * entry * sizeof(struct tx_desc), sizeof(struct tx_desc), PCI_DMA_TODEVICE); + np->cur_tx++; - /* Non-x86 Todo: explicitly flush cache lines here. */ - /* Wake the potentially-idle transmit channel. */ writew(CmdTxDemand | np->chip_cmd, dev->base_addr + ChipCmd); @@ -1215,6 +1231,9 @@ struct netdev_private *np = dev->priv; int txstatus = 0, entry = np->dirty_tx % TX_RING_SIZE; + pci_dma_sync_single(np->pdev, np->tx_ring_dma, + TX_RING_SIZE * sizeof(struct tx_desc), PCI_DMA_FROMDEVICE); + spin_lock (&np->lock); /* find and cleanup dirty tx descriptors */ @@ -1264,6 +1283,8 @@ struct netdev_private *np = dev->priv; int entry = np->cur_rx % RX_RING_SIZE; int boguscnt = np->dirty_rx + RX_RING_SIZE - np->cur_rx; + + pci_dma_sync_single(np->pdev, np->rx_ring_dma, RX_RING_SIZE * sizeof(struct rx_desc), PCI_DMA_FROMDEVICE); if (debug > 4) { printk(KERN_DEBUG " In via_rhine_rx(), entry %d status %8.8x.\n", @@ -1368,6 +1389,8 @@ } np->rx_ring[entry].rx_status = cpu_to_le32(DescOwn); } + + pci_dma_sync_single(np->pdev, np->rx_ring_dma, RX_RING_SIZE * sizeof(struct rx_desc), PCI_DMA_TODEVICE); /* Pre-emptively restart Rx engine. */ writew(CmdRxDemand | np->chip_cmd, dev->base_addr + ChipCmd); -- dwmw2 |
From: David W. <dw...@in...> - 2001-08-15 10:49:34
|
dw...@in... said: + * drivers/net/via-rhine.c: Make some attempt to actually + deal with cache coherency issues rather than just inserting + a couple of wmb() calls. I'm not committing this - it's bogus. The ranges it was trying to flush were uncached anyway, and for some reason the delays introduced by walking the cache entries looking for nonexistent matches were what was making it work. -- dwmw2 |