From: Stephen H. <she...@os...> - 2004-02-20 01:04:32
|
Please give this driver some more work before I push the update to Dave. Changes: * viro's disconnect feedback * async unlink of receive requests to avoid waiting for each one individually. * in fir, copy small receive packets * in fir, use get_unaligned(le32_to_cpu(..)) rather than explicit shifts * in fir, don't turnaround prematurely just because of a error (like crc) * separate sir and fir polling intervals * slightly better reset, change speed logic * transmit path formats wrapped data into a skbuff which allows wrapping in parallel with usb transfer, and avoids allocating a fixed size buffer. * some code cleanup of the txwait path * use do_gettimeofday() rather than CURRENT_TIME for better resolution Tested with two different vendor dongles and against nsc-ircc. diff -Nru a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c --- a/drivers/net/irda/stir4200.c Thu Feb 19 16:29:02 2004 +++ b/drivers/net/irda/stir4200.c Thu Feb 19 16:29:02 2004 @@ -49,12 +49,15 @@ #include <linux/suspend.h> #include <linux/slab.h> #include <linux/usb.h> +#include <linux/crc32.h> #include <net/irda/irda.h> #include <net/irda/irlap.h> #include <net/irda/irda_device.h> #include <net/irda/wrapper.h> #include <net/irda/crc.h> -#include <linux/crc32.h> +#include <asm/byteorder.h> +#include <asm/unaligned.h> + MODULE_AUTHOR("Stephen Hemminger <she...@os...>"); MODULE_DESCRIPTION("IrDA-USB Dongle Driver for SigmaTel STIr4200"); @@ -72,14 +75,19 @@ module_param(tx_power, int, 0); MODULE_PARM_DESC(tx_power, "Set Transmitter power (0-3, 0 is highest power)"); -static int rx_interval = 5; /* milliseconds */ -module_param(rx_interval, int, 0); -MODULE_PARM_DESC(rx_interval, "Receive polling interval (ms)"); +static int fir_interval = 1; /* milliseconds */ +module_param(fir_interval, int, 0); +MODULE_PARM_DESC(fir_interval, "Fast IR polling interval (ms)"); + +static int sir_interval = 5; /* milliseconds */ +module_param(sir_interval, int, 0); +MODULE_PARM_DESC(sir_interval, "Slow IR polling interval (ms)"); #define STIR_IRDA_HEADER 4 #define CTRL_TIMEOUT 100 /* milliseconds */ #define TRANSMIT_TIMEOUT 200 /* milliseconds */ #define STIR_FIFO_SIZE 4096 +#define STIR_RX_SIZE STIR_FIFO_SIZE #define NUM_RX_URBS 2 enum FirChars { @@ -167,18 +175,12 @@ TEST_TSTOSC = 0x0F, }; -enum StirState { - STIR_STATE_RECEIVING=0, - STIR_STATE_TXREADY, -}; - struct stir_cb { struct usb_device *usbdev; /* init: probe_irda */ struct net_device *netdev; /* network layer */ struct irlap_cb *irlap; /* The link layer we are binded to */ struct net_device_stats stats; /* network statistics */ struct qos_info qos; - unsigned long state; unsigned speed; /* Current speed */ wait_queue_head_t thr_wait; /* transmit thread wakeup */ @@ -186,14 +188,14 @@ pid_t thr_pid; unsigned int tx_bulkpipe; - void *tx_data; /* wrapped data out */ - unsigned tx_len; - unsigned tx_newspeed; - unsigned tx_mtt; + struct sk_buff *tx_pending; /* wrapped data out */ unsigned int rx_intpipe; iobuff_t rx_buff; /* receive unwrap state machine */ - struct timespec rx_time; + struct timeval rx_time; + struct completion rx_stop_complete; + int rx_receiving; + atomic_t rx_pending; struct urb *rx_urbs[NUM_RX_URBS]; void *rx_data[NUM_RX_URBS]; @@ -209,7 +211,6 @@ MODULE_DEVICE_TABLE(usb, dongles); -static int fifo_txwait(struct stir_cb *stir, unsigned space); static void stir_usb_receive(struct urb *urb, struct pt_regs *regs); /* Send control message to set dongle register */ @@ -310,12 +311,13 @@ return wraplen + STIR_IRDA_HEADER; } -static unsigned wrap_sir_skb(struct sk_buff *skb, __u8 *buf) +static unsigned wrap_sir_skb(struct sk_buff *skb, __u8 *buf, + unsigned space) { __u16 wraplen; - wraplen = async_wrap_skb(skb, buf + STIR_IRDA_HEADER, - STIR_FIFO_SIZE - STIR_IRDA_HEADER); + wraplen = async_wrap_skb(skb, buf + STIR_IRDA_HEADER, + space - STIR_IRDA_HEADER); buf[0] = 0x55; buf[1] = 0xAA; buf[2] = wraplen & 0xff; @@ -329,60 +331,63 @@ * and pass up to irlap * setup for next receive */ -static void fir_eof(struct stir_cb *stir) +static int fir_eof(struct stir_cb *stir) { iobuff_t *rx_buff = &stir->rx_buff; int len = rx_buff->len - 4; + struct sk_buff *skb; __u32 fcs; - struct sk_buff *nskb; if (unlikely(len <= 0)) { pr_debug("%s: short frame len %d\n", stir->netdev->name, len); - - ++stir->stats.rx_errors; ++stir->stats.rx_length_errors; - return; + goto err_out; } - fcs = rx_buff->data[len] | - rx_buff->data[len+1] << 8 | - rx_buff->data[len+2] << 16 | - rx_buff->data[len+3] << 24; - - if (unlikely(fcs != ~(crc32_le(~0, rx_buff->data, len)))) { + fcs = le32_to_cpu(get_unaligned((u32 *)(rx_buff->data+len))); + if (fcs != ~(crc32_le(~0, rx_buff->data, len))) { pr_debug("%s: crc error\n", stir->netdev->name); - irda_device_set_media_busy(stir->netdev, TRUE); - stir->stats.rx_errors++; stir->stats.rx_crc_errors++; - return; + goto err_out; } - /* If can't get new buffer, just drop and reuse */ - nskb = dev_alloc_skb(IRDA_SKB_MAX_MTU); - if (unlikely(!nskb)) - ++stir->stats.rx_dropped; - else { - struct sk_buff *oskb = rx_buff->skb; + /* if frame is short then just copy it */ + if (len < IRDA_RX_COPY_THRESHOLD) { + skb = dev_alloc_skb(len + 1); + if (unlikely(!skb)) + goto nomem; + skb_reserve(skb, 1); + memcpy(skb->data, rx_buff->data, len); + } else { + struct sk_buff *nskb = dev_alloc_skb(IRDA_SKB_MAX_MTU); + if (unlikely(!nskb)) + goto nomem; + + skb = rx_buff->skb; skb_reserve(nskb, 1); + rx_buff->skb = nskb; + rx_buff->head = nskb->data; + } - /* Set correct length in socket buffer */ - skb_put(oskb, len); + skb_put(skb, len); - oskb->mac.raw = oskb->data; - oskb->protocol = htons(ETH_P_IRDA); - oskb->dev = stir->netdev; + skb->mac.raw = skb->data; + skb->protocol = htons(ETH_P_IRDA); + skb->dev = stir->netdev; - netif_rx(oskb); + netif_rx(skb); - stir->stats.rx_packets++; - stir->stats.rx_bytes += len; - rx_buff->skb = nskb; - rx_buff->head = nskb->data; - } + stir->stats.rx_packets++; + stir->stats.rx_bytes += len; rx_buff->data = rx_buff->head; rx_buff->len = 0; + return 0; + nomem: + ++stir->stats.rx_dropped; + err_out: + return 1; } /* Unwrap FIR stuffed data and bump it to IrLAP */ @@ -439,7 +444,8 @@ case FIR_EOF: rx_buff->state = OUTSIDE_FRAME; rx_buff->in_frame = FALSE; - fir_eof(stir); + if (fir_eof(stir)) + goto error_recovery; continue; } break; @@ -461,7 +467,6 @@ error_recovery: ++stir->stats.rx_errors; - irda_device_set_media_busy(stir->netdev, TRUE); rx_buff->state = OUTSIDE_FRAME; rx_buff->in_frame = FALSE; } @@ -519,7 +524,6 @@ int i, err; __u8 mode; - pr_debug("%s: change speed %d\n", stir->netdev->name, speed); for (i = 0; i < ARRAY_SIZE(stir_modes); ++i) { if (speed == stir_modes[i].speed) goto found; @@ -532,12 +536,15 @@ pr_debug("%s: speed change from %d to %d\n", stir->netdev->name, stir->speed, speed); - /* Make sure any previous Tx is really finished. This happens - * when we answer an incomming request ; the ua:rsp and the - * speed change are bundled together, so we need to wait until - * the packet we just submitted has been sent. Jean II */ - if (fifo_txwait(stir, 0)) - return -EIO; + /* Reset modulator */ + err = write_reg(stir, REG_CTRL1, CTRL1_SRESET); + if (err) + goto out; + + /* Undocumented magic to tweak the DPLL */ + err = write_reg(stir, REG_DPLL, 0x15); + if (err) + goto out; /* Set clock */ err = write_reg(stir, REG_PDCLK, stir_modes[i].pdclk); @@ -564,33 +571,13 @@ goto out; err = write_reg(stir, REG_CTRL1, (tx_power & 3) << 1); - - out: - stir->speed = speed; - return err; -} - -static int stir_reset(struct stir_cb *stir) -{ - int err; - - /* reset state */ - stir->rx_buff.in_frame = FALSE; - stir->rx_buff.state = OUTSIDE_FRAME; - stir->speed = -1; - - /* Undocumented magic to tweak the DPLL */ - err = write_reg(stir, REG_DPLL, 0x15); if (err) goto out; /* Reset sensitivity */ err = write_reg(stir, REG_CTRL2, (rx_sensitivity & 7) << 5); - if (err) - goto out; - - err = change_speed(stir, 9600); out: + stir->speed = speed; return err; } @@ -600,50 +587,69 @@ static int stir_hard_xmit(struct sk_buff *skb, struct net_device *netdev) { struct stir_cb *stir = netdev->priv; + struct sk_buff *nskb; + unsigned maxlen; netif_stop_queue(netdev); /* the IRDA wrapping routines don't deal with non linear skb */ SKB_LINEAR_ASSERT(skb); - if (unlikely(skb->len) == 0) /* speed change only */ - stir->tx_len = 0; - else if (isfir(stir->speed)) - stir->tx_len = wrap_fir_skb(skb, stir->tx_data); - else - stir->tx_len = wrap_sir_skb(skb, stir->tx_data); - - stir->stats.tx_packets++; - stir->stats.tx_bytes += skb->len; + /* need enough space for worst case */ + maxlen = STIR_IRDA_HEADER + irda_get_xbofs(skb) + + 2 * skb->len + 10; - stir->tx_mtt = irda_get_mtt(skb); - stir->tx_newspeed = irda_get_next_speed(skb); + nskb = dev_alloc_skb(maxlen); + if (!nskb) { + stir->stats.tx_errors++; + return 0; + } + + /* copy cb containing speed and mtt info */ + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + + /* speed change only */ + if (skb->len > 0) { + unsigned wraplen; - if (!test_and_set_bit(STIR_STATE_TXREADY, &stir->state)) - wake_up(&stir->thr_wait); + if (isfir(stir->speed)) + wraplen = wrap_fir_skb(skb, nskb->data); + else + wraplen = wrap_sir_skb(skb, nskb->data, maxlen); + + skb_put(nskb, wraplen); + stir->stats.tx_packets++; + stir->stats.tx_bytes += skb->len; + } dev_kfree_skb(skb); + + skb = xchg(&stir->tx_pending, nskb); + + BUG_ON(skb != NULL); /* stop/wakeup logic error */ + wake_up(&stir->thr_wait); + return 0; } /* * Wait for the transmit FIFO to have space for next data + * + * If space < 0 then wait till FIFO completely drains. + * FYI: can take up to 13 seconds at 2400baud. */ -static int fifo_txwait(struct stir_cb *stir, unsigned space) +static int fifo_txwait(struct stir_cb *stir, int space) { int err; - unsigned count; __u8 regs[3]; - unsigned long timeout = jiffies + HZ/10; - for(;;) { - /* Read FIFO status and count */ - err = read_reg(stir, REG_FIFOCTL, regs, 3); - if (unlikely(err != 3)) { - WARNING("%s: FIFO register read error: %d\n", - stir->netdev->name, err); - return err; - } + /* Read FIFO status and count */ + while ((err = read_reg(stir, REG_FIFOCTL, regs, 3)) == 3) { + unsigned count; + + count = (unsigned)(regs[2] & 0x1f) << 8 | regs[1]; + pr_debug("%s: fifo status 0x%x count %u\n", + stir->netdev->name, regs[0], count); /* is fifo receiving already, or empty */ if (!(regs[0] & FIFOCTL_DIR) @@ -658,40 +664,32 @@ || !netif_device_present(stir->netdev)) return -ESHUTDOWN; - count = (unsigned)(regs[2] & 0x1f) << 8 | regs[1]; - - pr_debug("%s: fifo status 0x%x count %u\n", - stir->netdev->name, regs[0], count); - /* only waiting for some space */ - if (space && STIR_FIFO_SIZE - 4 > space + count) + if (space >= 0 && STIR_FIFO_SIZE - 4 > space + count) return 0; - if (time_after(jiffies, timeout)) { - WARNING("%s: transmit fifo timeout status=0x%x count=%d\n", - stir->netdev->name, regs[0], count); - ++stir->stats.tx_errors; - irda_device_set_media_busy(stir->netdev, TRUE); - return -ETIMEDOUT; - } - /* estimate transfer time for remaining chars */ - wait_ms((count * 8000) / stir->speed); + wait_ms( (count * 8000) / stir->speed); } + + WARNING("%s: FIFO register read error: %d\n", + stir->netdev->name, err); + return err; } /* Wait for turnaround delay before starting transmit. */ -static void turnaround_delay(long us, const struct timespec *last) +static void turnaround_delay(const struct stir_cb *stir, long us) { long ticks; - struct timespec now = CURRENT_TIME; + struct timeval now; if (us <= 0) return; - us -= (now.tv_sec - last->tv_sec) * USEC_PER_SEC; - us -= (now.tv_nsec - last->tv_nsec) / NSEC_PER_USEC; + do_gettimeofday(&now); + us -= (now.tv_sec - stir->rx_time.tv_sec) * USEC_PER_SEC; + us -= now.tv_usec - stir->rx_time.tv_usec; if (us < 10) return; @@ -711,31 +709,28 @@ { int i; - if (test_and_set_bit(STIR_STATE_RECEIVING, &stir->state)) - return; - - if (fifo_txwait(stir, 0)) - return; - + /* reset state */ + stir->rx_buff.in_frame = FALSE; + stir->rx_buff.state = OUTSIDE_FRAME; + stir->rx_receiving = 1; + wmb(); for (i = 0; i < NUM_RX_URBS; i++) { struct urb *urb = stir->rx_urbs[i]; + urb->status = 0; usb_fill_int_urb(urb, stir->usbdev, stir->rx_intpipe, - stir->rx_data[i], STIR_FIFO_SIZE, - stir_usb_receive, stir, rx_interval); + stir->rx_data[i], STIR_RX_SIZE, + stir_usb_receive, stir, + isfir(stir->speed) ? fir_interval + : sir_interval); + urb->transfer_flags = URB_ASYNC_UNLINK; - if (usb_submit_urb(urb, GFP_KERNEL)) - urb->status = -EINVAL; + if (0 == usb_submit_urb(urb, GFP_KERNEL)) + atomic_inc(&stir->rx_pending); } - if (i == 0) { - /* if nothing got queued, then just retry next time */ - if (net_ratelimit()) - WARNING("%s: no receive buffers avaiable\n", - stir->netdev->name); - - clear_bit(STIR_STATE_RECEIVING, &stir->state); - } + if (atomic_read(&stir->rx_pending) == 0) + stir->rx_receiving = 0; } /* Stop all pending receive Urb's */ @@ -743,34 +738,34 @@ { int i; - for (i = 0; i < NUM_RX_URBS; i++) { - struct urb *urb = stir->rx_urbs[i]; - usb_unlink_urb(urb); - } + stir->rx_receiving = 0; + wmb(); + for (i = 0; i < NUM_RX_URBS; i++) + usb_unlink_urb(stir->rx_urbs[i]); + + wait_for_completion(&stir->rx_stop_complete); } -/* Send wrapped data (in tx_data) to device */ -static void stir_send(struct stir_cb *stir) +/* Send wrapped data */ +static void stir_send(struct stir_cb *stir, struct sk_buff *skb) { int rc; - if (test_and_clear_bit(STIR_STATE_RECEIVING, &stir->state)) { + if (stir->rx_receiving) { receive_stop(stir); - - turnaround_delay(stir->tx_mtt, &stir->rx_time); + turnaround_delay(stir, irda_get_mtt(skb)); if (stir->rx_buff.in_frame) ++stir->stats.collisions; } - else if (fifo_txwait(stir, stir->tx_len)) + else if (fifo_txwait(stir, skb->len)) return; /* shutdown or major errors */ stir->netdev->trans_start = jiffies; - pr_debug("%s: send %d\n", stir->netdev->name, stir->tx_len); - rc = usb_bulk_msg(stir->usbdev, - stir->tx_bulkpipe, - stir->tx_data, stir->tx_len, + pr_debug("%s: send len=%d\n", stir->netdev->name, skb->len); + rc = usb_bulk_msg(stir->usbdev, stir->tx_bulkpipe, + skb->data, skb->len, NULL, MSECS_TO_JIFFIES(TRANSMIT_TIMEOUT)); if (unlikely(rc)) { @@ -796,44 +791,59 @@ && netif_device_present(dev) && !signal_pending(current)) { - /* make swsusp happy with our thread */ + struct sk_buff *skb; + + /* if suspending, then power off and wait */ if (current->flags & PF_FREEZE) { - receive_stop(stir); + if (stir->rx_receiving) + receive_stop(stir); + else + fifo_txwait(stir, -1); write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD); refrigerator(PF_IOTHREAD); - stir_reset(stir); + if (change_speed(stir, stir->speed)) + break; } /* if something to send? */ - if (test_and_clear_bit(STIR_STATE_TXREADY, &stir->state)) { - unsigned new_speed = stir->tx_newspeed; - - /* Note that we may both send a packet and - * change speed in some cases. Jean II */ - - if (stir->tx_len != 0) - stir_send(stir); + skb = xchg(&stir->tx_pending, NULL); + if (skb) { + unsigned new_speed = irda_get_next_speed(skb); + + netif_wake_queue(dev); + if (skb->len != 0) + stir_send(stir, skb); + + dev_kfree_skb(skb); + if (stir->speed != new_speed) { + if (fifo_txwait(stir, -1)) + break; + if (change_speed(stir, new_speed)) { + WARNING("%s: speed change hardware error\n", + dev->name); + } + } + } else { + if (!stir->rx_receiving + && irda_device_txqueue_empty(dev)) { + /* Wait otherwise chip gets confused. */ + if (fifo_txwait(stir, -1)) + break; - if (stir->speed != new_speed) - change_speed(stir, new_speed); + receive_start(stir); + } - netif_wake_queue(stir->netdev); - continue; + set_task_state(current, TASK_INTERRUPTIBLE); + add_wait_queue(&stir->thr_wait, &wait); + if (stir->tx_pending) + __set_task_state(current, TASK_RUNNING); + else + schedule_timeout(HZ/10); + remove_wait_queue(&stir->thr_wait, &wait); } - - if (irda_device_txqueue_empty(dev)) - receive_start(stir); - - set_task_state(current, TASK_INTERRUPTIBLE); - add_wait_queue(&stir->thr_wait, &wait); - if (test_bit(STIR_STATE_TXREADY, &stir->state)) - __set_task_state(current, TASK_RUNNING); - else - schedule_timeout(HZ/10); - remove_wait_queue(&stir->thr_wait, &wait); } complete_and_exit (&stir->thr_exited, 0); @@ -848,53 +858,27 @@ static void stir_usb_receive(struct urb *urb, struct pt_regs *regs) { struct stir_cb *stir = urb->context; - int err; - - if (!netif_running(stir->netdev)) - return; - switch (urb->status) { - case 0: + if (netif_running(stir->netdev) && urb->status == 0) { if(urb->actual_length > 0) { pr_debug("%s: receive %d\n", stir->netdev->name, urb->actual_length); unwrap_chars(stir, urb->transfer_buffer, urb->actual_length); - + stir->netdev->last_rx = jiffies; - stir->rx_time = CURRENT_TIME; + do_gettimeofday(&stir->rx_time); } - break; - case -ECONNRESET: /* killed but pending */ - case -ENOENT: /* killed but not in use */ - case -ESHUTDOWN: - /* These are normal errors when URB is cancelled */ - stir->rx_buff.in_frame = FALSE; - stir->rx_buff.state = OUTSIDE_FRAME; - return; + /* Normal case, resubmit same urb to pick up more data */ + if (stir->rx_receiving && 0 == usb_submit_urb(urb, GFP_ATOMIC)) + return; - default: - WARNING("%s: received status %d\n", stir->netdev->name, - urb->status); - stir->stats.rx_errors++; - urb->status = 0; } - /* kernel thread is stopping receiver don't resubmit */ - if (!test_bit(STIR_STATE_RECEIVING, &stir->state)) - return; - - /* resubmit existing urb */ - err = usb_submit_urb(urb, GFP_ATOMIC); - - /* in case of error, the kernel thread will restart us */ - if (err) { - WARNING("%s: usb receive submit error: %d\n", - stir->netdev->name, err); - urb->status = -ENOENT; - wake_up(&stir->thr_wait); - } + /* don't resubmit, and wakeup receive_stop */ + if (atomic_dec_and_test(&stir->rx_pending)) + complete(&stir->rx_stop_complete); } @@ -909,45 +893,42 @@ int i, err; char hwname[16]; - err = stir_reset(stir); + err = change_speed(stir, 9600); if (err) goto err_out1; err = -ENOMEM; - - /* Note: Max SIR frame possible is 4273 */ - stir->tx_data = kmalloc(STIR_FIFO_SIZE, GFP_KERNEL); - if (!stir->tx_data) { - ERROR("%s(), alloc failed for rxbuf!\n", __FUNCTION__); - goto err_out1; - } - /* Initialize for SIR/FIR to copy data directly into skb. */ + stir->rx_receiving = 0; + atomic_set(&stir->rx_pending, 0); + init_completion(&stir->rx_stop_complete); stir->rx_buff.truesize = IRDA_SKB_MAX_MTU; stir->rx_buff.skb = dev_alloc_skb(IRDA_SKB_MAX_MTU); if (!stir->rx_buff.skb) { ERROR("%s(), dev_alloc_skb() failed for rxbuf!\n", __FUNCTION__); - goto err_out2; + goto err_out1; } skb_reserve(stir->rx_buff.skb, 1); stir->rx_buff.head = stir->rx_buff.skb->data; - stir->rx_time = CURRENT_TIME; + do_gettimeofday(&stir->rx_time); /* Allocate N receive buffer's and urbs */ for (i = 0; i < NUM_RX_URBS; i++) { - stir->rx_urbs[i] = usb_alloc_urb(0, GFP_KERNEL); - if (!stir->rx_urbs[i]){ + struct urb *urb = usb_alloc_urb(0, GFP_KERNEL); + + if (!urb) { ERROR("%s(), usb_alloc_urb failed\n", __FUNCTION__); goto err_out3; } - stir->rx_data[i] = kmalloc(STIR_FIFO_SIZE, GFP_KERNEL); + stir->rx_data[i] = kmalloc(STIR_RX_SIZE, GFP_KERNEL); if (!stir->rx_data) { - usb_free_urb(stir->rx_urbs[i]); + usb_free_urb(urb); ERROR("%s(), alloc failed for rxbuf!\n", __FUNCTION__); goto err_out3; } + stir->rx_urbs[i] = urb; } /* @@ -984,8 +965,6 @@ kfree(stir->rx_data[i]); } kfree_skb(stir->rx_buff.skb); - err_out2: - kfree(stir->tx_data); err_out1: return err; } @@ -1001,19 +980,22 @@ struct stir_cb *stir = netdev->priv; int i; + pr_debug("%s: net close\n", stir->netdev->name); + /* Stop transmit processing */ netif_stop_queue(netdev); /* Kill transmit thread */ kill_proc(stir->thr_pid, SIGTERM, 1); wait_for_completion(&stir->thr_exited); - kfree(stir->tx_data); - - clear_bit(STIR_STATE_RECEIVING, &stir->state); - receive_stop(stir); + /* Mop up receive urb's */ for (i = 0; i < NUM_RX_URBS; i++) { - usb_free_urb(stir->rx_urbs[i]); + struct urb *urb = stir->rx_urbs[i]; + urb->transfer_flags &= ~URB_ASYNC_UNLINK; + usb_unlink_urb(urb); + usb_free_urb(urb); + stir->rx_urbs[i] = NULL; kfree(stir->rx_data[i]); } kfree_skb(stir->rx_buff.skb); @@ -1057,7 +1039,7 @@ case SIOCGRECEIVING: /* Only approximately true */ - irq->ifr_receiving = test_bit(STIR_STATE_RECEIVING, &stir->state); + irq->ifr_receiving = stir->rx_receiving; break; default: @@ -1170,8 +1152,8 @@ stir->qos.min_turn_time.bits &= qos_mtt_bits; irda_qos_bits_to_value(&stir->qos); - init_completion (&stir->thr_exited); - init_waitqueue_head (&stir->thr_wait); + init_completion(&stir->thr_exited); + init_waitqueue_head(&stir->thr_wait); /* Override the network functions we need to use */ net->hard_start_xmit = stir_hard_xmit; @@ -1180,10 +1162,6 @@ net->get_stats = stir_net_get_stats; net->do_ioctl = stir_net_ioctl; - ret = stir_reset(stir); - if (ret) - goto err_out2; - ret = register_netdev(net); if (ret != 0) goto err_out2; @@ -1206,23 +1184,14 @@ static void stir_disconnect(struct usb_interface *intf) { struct stir_cb *stir = usb_get_intfdata(intf); - struct net_device *net; - usb_set_intfdata(intf, NULL); if (!stir) return; - /* Stop transmitter */ - net = stir->netdev; - netif_device_detach(net); + unregister_netdev(stir->netdev); + free_netdev(stir->netdev); - /* Remove netdevice */ - unregister_netdev(net); - - /* No longer attached to USB bus */ - stir->usbdev = NULL; - - free_netdev(net); + usb_set_intfdata(intf, NULL); } |
From: Jean T. <jt...@bo...> - 2004-02-20 01:28:15
|
On Thu, Feb 19, 2004 at 04:57:51PM -0800, Stephen Hemminger wrote: > Please give this driver some more work before I push the update to Dave. > Changes: > * viro's disconnect feedback > * async unlink of receive requests to avoid waiting for each one > individually. Be careful that it won't catch you when you disconnect. Yep, it seems that you have been careful. > * in fir, copy small receive packets I didn't realised that you remove the RX_COPYBREAK that I added to the previous version of the driver. The old code was IMHO much nicer ;-) > * transmit path formats wrapped data into a skbuff which allows > wrapping in parallel with usb transfer, and avoids allocating a fixed > size buffer. Wrapping is not that bad, it takes ~170us to wrap 2000 bytes on a P6 200MHz with a 16 bits CRC. Thanks... Jean |
From: Jean T. <jt...@bo...> - 2004-02-20 02:41:28
|
On Thu, Feb 19, 2004 at 04:57:51PM -0800, Stephen Hemminger wrote: > Please give this driver some more work before I push the update to Dave. Ok, tested it. First comment : the patch did not apply cleanly here. It's fairly possible that I didn't had the right stir version to start with, or that Dave's version is already patched, but you may want to look at it. On hunk fail : @@ -909,45 +893,42 @@ Second : Performance and stability is much better, but it's still easy to make the driver fail at FIR. A summary of my perf testing is below (you can compare to the numbers on my web page with dyn-perf.txt). Don't get me wrong, I'm not criticising your work, and I consider that if it works at all it's almost a miracle, but you may want to know how it goes ;-) Otherwise, yes, just push that to Dave ;-) Jean ------------------------------------------------------- Tx -> SIR [Unidirectional big packets] ./irdaspray -d -n 500 -b 2000 Transmitted 1000000 bytes in 88.212331 seconds (11.071 kbytes/s) [Unidirectional small packets] ./irdaspray -d -n 5000 -b 10 Transmitted 50000 bytes in 28.520014 seconds (1.712 kbytes/s) [Bidirectional big packets] ./irdaspray -n 250 -b 2000 Transmitted 500000 bytes in 84.328434 seconds (5.790 kbytes/s) Received 500000 bytes in 88.776200 seconds (5.500 kbytes/s) [Bidirectional small packets] ./irdaspray -n 5000 -b 10 Transmitted 50000 bytes in 35.600106 seconds (1.372 kbytes/s) Received 50000 bytes in 35.739996 seconds (1.366 kbytes/s) ------------------------------------------------------- Rx <- SIR [Unidirectional big packets] ./irdaspray -d -n 500 -b 2000 Transmitted 1000000 bytes in 88.211238 seconds (11.071 kbytes/s) [Unidirectional small packets] ./irdaspray -d -n 5000 -b 10 Transmitted 50000 bytes in 14.250093 seconds (3.427 kbytes/s) [Bidirectional big packets] ./irdaspray -n 250 -b 2000 Transmitted 500000 bytes in 84.331258 seconds (5.790 kbytes/s) Received 500000 bytes in 88.780991 seconds (5.500 kbytes/s) [Bidirectional small packets] ./irdaspray -n 5000 -b 10 Transmitted 50000 bytes in 35.600320 seconds (1.372 kbytes/s) Received 50000 bytes in 35.740193 seconds (1.366 kbytes/s) ------------------------------------------------------- Tx -> FIR [Unidirectional big packets] ./irdaspray -d -n 500 -b 2000 Transmitted 1000000 bytes in 2.925108 seconds (333.855 kbytes/s) + loose sync at disconnect [Unidirectional small packets] ./irdaspray -d -n 5000 -b 10 => loose sync during first try Transmitted 50000 bytes in 22.075038 seconds (2.212 kbytes/s) [Bidirectional big packets] ./irdaspray -n 250 -b 2000 Transmitted 500000 bytes in 84.140410 seconds (5.803 kbytes/s) Received 500000 bytes in 88.317188 seconds (5.529 kbytes/s) [Bidirectional small packets] ./irdaspray -n 5000 -b 10 Transmitted 50000 bytes in 23.528428 seconds (2.075 kbytes/s) Received 50000 bytes in 23.587906 seconds (2.070 kbytes/s) ------------------------------------------------------- Rx <- FIR [Unidirectional big packets] ./irdaspray -d -n 500 -b 2000 Transmitted 1000000 bytes in 107.801718 seconds (9.059 kbytes/s) [Unidirectional small packets] ./irdaspray -d -n 5000 -b 10 => loose sync during first try Transmitted 50000 bytes in 6.415484 seconds (7.611 kbytes/s) [Bidirectional big packets] ./irdaspray -n 250 -b 2000 Transmitted 500000 bytes in 53.719613 seconds (9.089 kbytes/s) Received 500000 bytes in 56.380229 seconds (8.661 kbytes/s) [Bidirectional small packets] ./irdaspray -n 5000 -b 10 Transmitted 50000 bytes in 23.460882 seconds (2.081 kbytes/s) Received 50000 bytes in 23.588824 seconds (2.070 kbytes/s) |
From: Martin D. <li...@md...> - 2004-02-25 17:57:06
|
On Thu, 19 Feb 2004, Jean Tourrilhes wrote: > On Thu, Feb 19, 2004 at 04:57:51PM -0800, Stephen Hemminger wrote: > > Please give this driver some more work before I push the update to Dave. > > Ok, tested it. > First comment : the patch did not apply cleanly here. It's > fairly possible that I didn't had the right stir version to start > with, or that Dave's version is already patched, but you may want to > look at it. > On hunk fail : @@ -909,45 +893,42 @@ Tried 2.6.3-bk6 + this patch, applied cleanly. Then added my patch to have sufficient tx-buffer, see the other mail. > Second : Performance and stability is much better, but it's > still easy to make the driver fail at FIR. A summary of my perf > testing is below (you can compare to the numbers on my web page with > dyn-perf.txt). Don't get me wrong, I'm not criticising your work, and > I consider that if it works at all it's almost a miracle, but you may > want to know how it goes ;-) > > Otherwise, yes, just push that to Dave ;-) Second this - Thanks Stephen! > > Jean > Tested on ohci_hcd, against vlsi_ir (2.6.3). Basically same results here, details below. I think we are basically limited by the turnaround handling: Both polling the FIFOCTL and unlinking the rx-urbs take up to 2-3 msec to complete, depending on the usb-hcd. Maybe that's why I'm slightly better with ohci and short sir-frames - I'm assuming you are using uhci_hcd. If I switch to ehci_hcd the usb latencies get reduced to some 0.4 msec (due to interrupt-completion on 125 usec microframe boundary) and I'm getting up to 50% better performance with small frames (3.2 kb/s Tx with 10 byte unidirectional packets). I think this demonstrates we are currently limited by the usb latencies. For now, I haven't checked why my FIR results are less than yours when using default mtt=1 msec and tx/rx-window=7. Maybe my testbox is slower and takes more time to wakeup the stir-thread. But this can't be the whole story since I'm getting up to 170 kb/s Tx when setting tx_window=1 on the stir-side! It seems the more frequent turnaround avoids loosing some pf=1 frames every now and then. Again, there appears to be some difference between ohci and ehci - also for the lockup at lap-disconnect - but I haven't yet spotted some pattern there. See below for FIR results with different configurations. Martin > ------------------------------------------------------- > Tx -> SIR > > [Unidirectional big packets] > ./irdaspray -d -n 500 -b 2000 > Transmitted 1000000 bytes in 88.212331 seconds (11.071 kbytes/s) 11.068 > [Unidirectional small packets] > ./irdaspray -d -n 5000 -b 10 > Transmitted 50000 bytes in 28.520014 seconds (1.712 kbytes/s) 2.140 > [Bidirectional big packets] > ./irdaspray -n 250 -b 2000 > Transmitted 500000 bytes in 84.328434 seconds (5.790 kbytes/s) > Received 500000 bytes in 88.776200 seconds (5.500 kbytes/s) 5.807 (tx) / 5.516 (rx) > [Bidirectional small packets] > ./irdaspray -n 5000 -b 10 > Transmitted 50000 bytes in 35.600106 seconds (1.372 kbytes/s) > Received 50000 bytes in 35.739996 seconds (1.366 kbytes/s) 1.558 (tx) / 1.552 (rx) > ------------------------------------------------------- > Rx <- SIR > > [Unidirectional big packets] > ./irdaspray -d -n 500 -b 2000 > Transmitted 1000000 bytes in 88.211238 seconds (11.071 kbytes/s) 11.035 > [Unidirectional small packets] > ./irdaspray -d -n 5000 -b 10 > Transmitted 50000 bytes in 14.250093 seconds (3.427 kbytes/s) 3.412 > [Bidirectional big packets] > ./irdaspray -n 250 -b 2000 > Transmitted 500000 bytes in 84.331258 seconds (5.790 kbytes/s) > Received 500000 bytes in 88.780991 seconds (5.500 kbytes/s) 5.470 (rx) / 5.733 (tx) > [Bidirectional small packets] > ./irdaspray -n 5000 -b 10 > Transmitted 50000 bytes in 35.600320 seconds (1.372 kbytes/s) > Received 50000 bytes in 35.740193 seconds (1.366 kbytes/s) 1.553 (rx) / 1.558 (tx) > ------------------------------------------------------- Many (in fact most) of the FIR tests ended with the dongle being unable to receive any more data. Dongle power cycle was required to recover. This seems to coincide with the vlsi reporting rx-framing errors. Apparently triggered by the LAP-disconnect at FIR speeds. But sometimes it was also locking up when switching to FIR or even in the middle of the connection. > Tx -> FIR > > [Unidirectional big packets] > ./irdaspray -d -n 500 -b 2000 > Transmitted 1000000 bytes in 2.925108 seconds (333.855 kbytes/s) > + loose sync at disconnect 86.071 (ohci, mtt=5, wnd=7/7) 140.073 (ohci, mtt=1, wnd=1/1) 110.262 (ohci, mtt=1, wnd=7/7) 108.218 (ohci, mtt=1, wnd=2/2) 13.507 (ehci, mtt=1, wnd=2/2) ca. 160 (ehci, mtt=1, wnd=1/1) (varying) ca. 140 (ehci, mtt=1, wnd=7/7) (varying) > [Unidirectional small packets] > ./irdaspray -d -n 5000 -b 10 > => loose sync during first try > Transmitted 50000 bytes in 22.075038 seconds (2.212 kbytes/s) 1.802 (ohci, mtt=5, wnd=7/7) 1.316 (ohci, mtt=1, wnd=1/1) 2.085 (ohci, mtt=1, wnd=7/7) 1.468 (ohci, mtt=1, wnd=2/2) 2.278 (ehci, mtt=1, wnd=1/1) 6.163 (ehci, mtt=1, wnd=7/7) > [Bidirectional big packets] > ./irdaspray -n 250 -b 2000 > Transmitted 500000 bytes in 84.140410 seconds (5.803 kbytes/s) > Received 500000 bytes in 88.317188 seconds (5.529 kbytes/s) 5.894 (tx) / 5.312 (rx) (ohci, mtt=5, wnd=7/7) 44.813 (tx) / 39.644 (rx) (ohci, mtt=1, wnd=1/1) 5.509 (tx) / 5.261 (rx) (ohci, mtt=1, wnd=7/7) 39.417 (tx) / 33.618 (rx) (ohci, mtt=1, wnd=2/2) 14.910 (tx) / 14.259 (rx) (ehci, mtt=1, wnd=1/1) 3.913 (tx) / 3.778 (rx) (ehci, mtt=1, wnd=7/7) > [Bidirectional small packets] > ./irdaspray -n 5000 -b 10 > Transmitted 50000 bytes in 23.528428 seconds (2.075 kbytes/s) > Received 50000 bytes in 23.587906 seconds (2.070 kbytes/s) 1.598 (tx) / 1.592 (rx) (ohci, mtt=5, wnd=7/7) 1.225 (tx) / 1.196 (rx) (ohci, mtt=1, wnd=1/1) 1.883 (tx) / 1.875 (rx) (ohci, mtt=1, wnd=7/7) 1.321 (tx) / 1.314 (rx) (ohci, mtt=1, wnd=2/2) 2.040 (tx) / 2.025 (rx) (ehci, mtt=1, wnd=1/1) 2.813 (tx) / 2.800 (rx) (ehci, mtt=1, wnd=7/7) > ------------------------------------------------------- > Rx <- FIR > > [Unidirectional big packets] > ./irdaspray -d -n 500 -b 2000 > Transmitted 1000000 bytes in 107.801718 seconds (9.059 kbytes/s) 47.806 (ohci, mtt=5, wnd=1/1) 50.875 (ohci, mtt=1, wnd=1/1) 7.656 (ohci, mtt=1, wnd=7/7) 7.506 (ohci, mtt=5, wnd=7/7) 54.598 (ohci, mtt=5, wnd=2/2) 54.938 (ohci, mtt=1, wnd=2/2) 15.018 (ehci, mtt=1, wnd=1/1) > [Unidirectional small packets] > ./irdaspray -d -n 5000 -b 10 > => loose sync during first try > Transmitted 50000 bytes in 6.415484 seconds (7.611 kbytes/s) 0.999 (ohci, mtt=5, wnd=1/1) 1.286 (ohci, mtt=1, wnd=1/1) 4.484 (ohci, mtt=1, wnd=7/7) 2.196 (ohci, mtt=1, wnd=2/2) 3.703 (ehci, mtt=1, wnd=7/7) > [Bidirectional big packets] > ./irdaspray -n 250 -b 2000 > Transmitted 500000 bytes in 53.719613 seconds (9.089 kbytes/s) > Received 500000 bytes in 56.380229 seconds (8.661 kbytes/s) 40.272 (rx) / 42.064 (tx) (ohci, mtt=5, wnd=1/1) 42.918 (rx) / 44.761 (tx) (ohci, mtt=1, wnd=1/1) 4.014 (rx) / 4.142 (tx) (ohci, mtt=1, wnd=7/7) 3.753 (rx) / 3.926 (tx) (ohci, mtt=1, wnd=7/7) 12.601 (rx) / 15.393 (tx) (ohci, mtt=5, wnd=2/2) 33.771 (rx) / 35.537 (tx) (ohci, mtt=1, wnd=2/2) > [Bidirectional small packets] > ./irdaspray -n 5000 -b 10 > Transmitted 50000 bytes in 23.460882 seconds (2.081 kbytes/s) > Received 50000 bytes in 23.588824 seconds (2.070 kbytes/s) 0.920 (rx) / 0.926 (tx) (ohci, mtt=5, wnd=1/1) 1.110 (rx) / 1.248 (tx) (ohci, mtt=1, wnd=1/1) 1.729 (rx) / 1.771 (tx) (ohci, mtt=1, wnd=7/7) 1.351 (rx) / 1.477 (tx) (ohci, mtt=1, wnd=2/2) |
From: Jean T. <jt...@bo...> - 2004-02-25 22:33:07
|
On Wed, Feb 25, 2004 at 06:33:27PM +0100, Martin Diehl wrote: > > Tested on ohci_hcd, against vlsi_ir (2.6.3). Basically same results here, > details below. I think we are basically limited by the turnaround > handling: Both polling the FIFOCTL and unlinking the rx-urbs take up to > 2-3 msec to complete, depending on the usb-hcd. Maybe that's why I'm > slightly better with ohci and short sir-frames - I'm assuming you are > using uhci_hcd. I'm using ohci_hcd. I had all king of issues with BlueTooth and uhci in 2.4.X, so I don't trust uhci_hcd. > If I switch to ehci_hcd the usb latencies get reduced to some 0.4 msec > (due to interrupt-completion on 125 usec microframe boundary) and I'm > getting up to 50% better performance with small frames (3.2 kb/s Tx with > 10 byte unidirectional packets). I think this demonstrates we are > currently limited by the usb latencies. Interesting. Maybe I should get a EHCI card ;-) > For now, I haven't checked why my FIR results are less than yours when > using default mtt=1 msec and tx/rx-window=7. Maybe my testbox is slower > and takes more time to wakeup the stir-thread. Remember that my boxes are also SMP. I get better result with my older Dual PPro200 than my laptop Celeron 550. > But this can't be the whole story since I'm getting up to 170 kb/s Tx when > setting tx_window=1 on the stir-side! It seems the more frequent > turnaround avoids loosing some pf=1 frames every now and then. Again, > there appears to be some difference between ohci and ehci - also for the > lockup at lap-disconnect - but I haven't yet spotted some pattern there. > See below for FIR results with different configurations. > > Martin Personally, I believe that performance is only a 'nice to have' feature, my main goal is robustness and compatibility. By the way, yell at me when I'm supposed to push something to David. Thanks... Jean |
From: Martin D. <li...@md...> - 2004-02-26 00:12:26
|
On Wed, 25 Feb 2004, Jean Tourrilhes wrote: > > Tested on ohci_hcd, against vlsi_ir (2.6.3). Basically same results here, > > details below. I think we are basically limited by the turnaround > > handling: Both polling the FIFOCTL and unlinking the rx-urbs take up to > > 2-3 msec to complete, depending on the usb-hcd. Maybe that's why I'm > > slightly better with ohci and short sir-frames - I'm assuming you are > > using uhci_hcd. > > I'm using ohci_hcd. I had all king of issues with BlueTooth > and uhci in 2.4.X, so I don't trust uhci_hcd. ;-) > > If I switch to ehci_hcd the usb latencies get reduced to some 0.4 msec > > (due to interrupt-completion on 125 usec microframe boundary) and I'm > > getting up to 50% better performance with small frames (3.2 kb/s Tx with > > 10 byte unidirectional packets). I think this demonstrates we are > > currently limited by the usb latencies. > > Interesting. Maybe I should get a EHCI card ;-) It's a good idea for any usb-related driver work in general because there are some subtle differences when the device is sitting behind a HS/TT hub, mainly timing and error path... > > For now, I haven't checked why my FIR results are less than yours when > > using default mtt=1 msec and tx/rx-window=7. Maybe my testbox is slower > > and takes more time to wakeup the stir-thread. > > Remember that my boxes are also SMP. I get better result with > my older Dual PPro200 than my laptop Celeron 550. Ok. I was just wondering why I was beating you with short SIR frames while you are faster at FIR. But SMP favouring the thread wakeup might explain it as well - never mind... > > But this can't be the whole story since I'm getting up to 170 kb/s Tx when > > setting tx_window=1 on the stir-side! It seems the more frequent > > turnaround avoids loosing some pf=1 frames every now and then. Again, > > there appears to be some difference between ohci and ehci - also for the > > lockup at lap-disconnect - but I haven't yet spotted some pattern there. > > See below for FIR results with different configurations. > > Personally, I believe that performance is only a 'nice to > have' feature, my main goal is robustness and compatibility. Ansolutely. However, it seems there is some close relation in this case. Only talking about FIR (SIR appears pretty good for me) it appears much of the performance limitation comes from lost frames. That's why I gain more than a factor of 10 in some configuration when limiting the window size (both) to 1. It's not entirely clear why the frames get lost otherwise, but it seems the stir lockups happen when receiving data at the wrong moment. I guess this is why Stephen was asking for the collision handling a few days ago. Since there is nothing we can do in order to 100% rule out any rx traffic at the wrong moment, the best we can do for stability is reducing the probability. So performance is just a result of trying to reduce the lockups. Yes, configurations with 150 kb/s due to window=1 appear to be somewhat less likely to lockup compared to window=7 giving much worse performance in many cases. Btw, I have to stop now, but my last observation was the the lockups on lap disconnect seem to be related with a rd:rsp from the other side never seen on the stir. At least I have seen several such situations where it locked up while it didn't if either the rd:rsp was actually received or the primary side (stir) was actively driving the disconnect. But I'm completely lost wrt. what might be special there if we lose the rd_rsp. Note, it doesn't look like it would be the len=0 speed change later because the rd:rsp is not seen and the primary continues retrying rr:cmd until N2 * F timed out. But only the first of these rr:cmd is seen by the secondary. Could it be the speed change back to 9600 on the secondary gets triggered by te rd:rsp plus some delay? My understanding of the NRM(S) state machine is it would have to repeat the rd:rsp until either the disc:cmd arrives or the WD-timer expires... > By the way, yell at me when I'm supposed to push something to > David. Ok, the tx-assertion revealed some more issues with the existing sir-wrapper. I need to check this too. Do you know if any existing driver cares for tx buffer overrun in async_wrap_skb? If not, I'd suggest to change it to return 0 in that case so drivers at least have the opportunity to check and drop the frame instead of sending partial frames. Martin |
From: Stephen H. <she...@os...> - 2004-02-26 00:40:43
|
> Ansolutely. However, it seems there is some close relation in this case. > Only talking about FIR (SIR appears pretty good for me) it appears much of > the performance limitation comes from lost frames. That's why I gain more > than a factor of 10 in some configuration when limiting the window size > (both) to 1. It's not entirely clear why the frames get lost otherwise, > but it seems the stir lockups happen when receiving data at the wrong > moment. I guess this is why Stephen was asking for the collision handling > a few days ago. > > Since there is nothing we can do in order to 100% rule out any rx traffic > at the wrong moment, the best we can do for stability is reducing the > probability. So performance is just a result of trying to reduce the > lockups. Yes, configurations with 150 kb/s due to window=1 appear to be > somewhat less likely to lockup compared to window=7 giving much worse > performance in many cases. > > Btw, I have to stop now, but my last observation was the the lockups on > lap disconnect seem to be related with a rd:rsp from the other side never > seen on the stir. At least I have seen several such situations where it > locked up while it didn't if either the rd:rsp was actually received or > the primary side (stir) was actively driving the disconnect. > > But I'm completely lost wrt. what might be special there if we lose the > rd_rsp. Note, it doesn't look like it would be the len=0 speed change > later because the rd:rsp is not seen and the primary continues retrying > rr:cmd until N2 * F timed out. But only the first of these rr:cmd is seen > by the secondary. Could it be the speed change back to 9600 on the > secondary gets triggered by te rd:rsp plus some delay? My understanding of > the NRM(S) state machine is it would have to repeat the rd:rsp until > either the disc:cmd arrives or the WD-timer expires... > > > > By the way, yell at me when I'm supposed to push something to > > David. > > Ok, the tx-assertion revealed some more issues with the existing > sir-wrapper. I need to check this too. Do you know if any existing driver > cares for tx buffer overrun in async_wrap_skb? If not, I'd suggest to > change it to return 0 in that case so drivers at least have the > opportunity to check and drop the frame instead of sending partial frames. I have a new version of the driver split into pieces, but want to get a few more things stable. Probably tommorow. The main thing left is that is stalls that occur during FIR receive. What happens is that it gets a full receive buffer/urb then a partial one. This is with a window size of 7. The last buffer contains a two partial frames, the first is the fine and gets sent up. The second partial frame has a start, but no EOF or CRC... The partial is always a multiple of USB xfer (64) and eventually (500ms) the timer goes off and we try to send. When the send collides everything returns to normal. We aren't overruning the FIFO (been there, done that, new version has code to handle it), it seems like something related to USB and using the receive pipe as interrupt. Or another UHCI bug? |
From: Jean T. <jt...@bo...> - 2004-02-26 01:31:04
|
On Wed, Feb 25, 2004 at 04:37:54PM -0800, Stephen Hemminger wrote: > > I have a new version of the driver split into pieces, but want to get > a few more things stable. Probably tommorow. The main thing left is > that is stalls that occur during FIR receive. What happens is that > it gets a full receive buffer/urb then a partial one. This is with > a window size of 7. The last buffer contains a two partial frames, > the first is the fine and gets sent up. The second partial frame > has a start, but no EOF or CRC... The partial is always a multiple > of USB xfer (64) and eventually (500ms) the timer goes off and > we try to send. When the send collides everything returns to > normal. > > We aren't overruning the FIFO (been there, done that, new version > has code to handle it), it seems like something related to USB > and using the receive pipe as interrupt. Or another UHCI bug? Sounds like what I saw initially when writting the FIR code. You have two IrDA frames bundled in a single URB. And of course no framing/CRC between them. Because of the lack of framing between the frames, I don't think it's a USB problem. It's more like the Rx FIFO that doesn't manage properly the conversion from HDLC framing to pseudo-SIR framing. Jean |
From: Martin D. <li...@md...> - 2004-02-27 00:51:11
|
On Wed, 25 Feb 2004, Stephen Hemminger wrote: > I have a new version of the driver split into pieces, but want to get > a few more things stable. Probably tommorow. The main thing left is > that is stalls that occur during FIR receive. What happens is that > it gets a full receive buffer/urb then a partial one. This is with > a window size of 7. The last buffer contains a two partial frames, > the first is the fine and gets sent up. The second partial frame > has a start, but no EOF or CRC... The partial is always a multiple > of USB xfer (64) and eventually (500ms) the timer goes off and > we try to send. When the send collides everything returns to > normal. Ok, let's see. > We aren't overruning the FIFO (been there, done that, new version > has code to handle it), it seems like something related to USB > and using the receive pipe as interrupt. Or another UHCI bug? Well, further UHCI are possible - but I'm getting the fifo lockups with both OHCI and EHCI - I assume this makes it pretty clear the real trigger is somewhere else. Maybe different HCI-flavours make it appear differently. Martin |
From: Jean T. <jt...@bo...> - 2004-02-26 01:26:32
|
On Thu, Feb 26, 2004 at 12:33:38AM +0100, Martin Diehl wrote: > > Ansolutely. However, it seems there is some close relation in this case. > Only talking about FIR (SIR appears pretty good for me) it appears much of > the performance limitation comes from lost frames. That's why I gain more > than a factor of 10 in some configuration when limiting the window size > (both) to 1. It's not entirely clear why the frames get lost otherwise, > but it seems the stir lockups happen when receiving data at the wrong > moment. I guess this is why Stephen was asking for the collision handling > a few days ago. Unfortunately, collisions will happen in the real world, as well as interference. That's the rule for wireless. If the stir lockup every time it finds something it doesn't like on the channel, that's just a bad design. > Btw, I have to stop now, but my last observation was the the lockups on > lap disconnect seem to be related with a rd:rsp from the other side never > seen on the stir. At least I have seen several such situations where it > locked up while it didn't if either the rd:rsp was actually received or > the primary side (stir) was actively driving the disconnect. > > But I'm completely lost wrt. what might be special there if we lose the > rd_rsp. Note, it doesn't look like it would be the len=0 speed change > later because the rd:rsp is not seen and the primary continues retrying > rr:cmd until N2 * F timed out. But only the first of these rr:cmd is seen > by the secondary. Could it be the speed change back to 9600 on the > secondary gets triggered by te rd:rsp plus some delay? My understanding of > the NRM(S) state machine is it would have to repeat the rd:rsp until > either the disc:cmd arrives or the WD-timer expires... The first difference is that rd:rsp is a path that is very rarely used. Before I starting introducing the incomming socket watchdog (sometime in 2.4.X), it was never used and was actually buggy. So, there might still be bugs lurking in there. I think you are mostly right. I believe RECV_DM_RSP is never generated. In theory, the rd:rsp would be repeated by the LAP layer, but I don't think that would happen because in 'sclose' we don't process retry and we wait only one WD_TIMER_EXPIRED (~2s) before getting out. This seems consistent with you seeing only one rr:cmd after the rd:rsp, because when the link is idle they are sent every FINAL_TIMER_EXPIRED which is half WD_TIMER_EXPIRED. > Ok, the tx-assertion revealed some more issues with the existing > sir-wrapper. I need to check this too. Do you know if any existing driver > cares for tx buffer overrun in async_wrap_skb? If not, I'd suggest to > change it to return 0 in that case so drivers at least have the > opportunity to check and drop the frame instead of sending partial frames. Sending partial frames is not an issue for me, especially that it's statistically rare and they will fail CRC check. It's as if the frame was corrupted in flight. Note that in those case, the whole IrDA protocol will deadlock, because IrLAP will retry this frame forever. I did not check which driver would complain about that, I would assume that driver set buffsize to something they can handle. > Martin Jean |
From: Martin D. <li...@md...> - 2004-02-27 01:08:26
|
On Wed, 25 Feb 2004, Jean Tourrilhes wrote: > On Thu, Feb 26, 2004 at 12:33:38AM +0100, Martin Diehl wrote: > > > > Ansolutely. However, it seems there is some close relation in this case. > > Only talking about FIR (SIR appears pretty good for me) it appears much of > > the performance limitation comes from lost frames. That's why I gain more > > than a factor of 10 in some configuration when limiting the window size > > (both) to 1. It's not entirely clear why the frames get lost otherwise, > > but it seems the stir lockups happen when receiving data at the wrong > > moment. I guess this is why Stephen was asking for the collision handling > > a few days ago. > > Unfortunately, collisions will happen in the real world, as > well as interference. That's the rule for wireless. If the stir lockup > every time it finds something it doesn't like on the channel, that's > just a bad design. Sure. Nevertheless it would be nice if we could avoid triggering them too often. However, it might be, it's not worth the effort because 90+% of people will only need SIR which is working now pretty well here with Stephens latest patch. > The first difference is that rd:rsp is a path that is very > rarely used. Before I starting introducing the incomming socket > watchdog (sometime in 2.4.X), it was never used and was actually > buggy. So, there might still be bugs lurking in there. > I think you are mostly right. I believe RECV_DM_RSP is never > generated. In theory, the rd:rsp would be repeated by the LAP layer, > but I don't think that would happen because in 'sclose' we don't > process retry and we wait only one WD_TIMER_EXPIRED (~2s) before getting out. > This seems consistent with you seeing only one rr:cmd after > the rd:rsp, because when the link is idle they are sent every > FINAL_TIMER_EXPIRED which is half WD_TIMER_EXPIRED. Ok, thanks for explaining. From page 82 of the IrLAP-1.1 spec I'm pretty sure we shall resend the rd:rsp and restart the WD-timer in those situations. What about the patch below? As you can see from the attached irdadump output before and after the patch, from both ends (primary is stir4200, secondary vlsi) it's working as expected and the disconnect is much smoother now if the rd:rsp gets lost. Unfortunately it appeared things weren't changed wrt. to the stir fifo lockup. Anyway, I'd suggest applying it. > > Ok, the tx-assertion revealed some more issues with the existing > > sir-wrapper. I need to check this too. Do you know if any existing driver > > cares for tx buffer overrun in async_wrap_skb? If not, I'd suggest to > > change it to return 0 in that case so drivers at least have the > > opportunity to check and drop the frame instead of sending partial frames. > > Sending partial frames is not an issue for me, especially that > it's statistically rare and they will fail CRC check. It's as if the > frame was corrupted in flight. Note that in those case, the whole IrDA > protocol will deadlock, because IrLAP will retry this frame forever. > I did not check which driver would complain about that, I > would assume that driver set buffsize to something they can handle. Ok, after some first scan over the drivers I was under the impression it wouldn't hurt, because virtually none of them is checking for buffer overrun in async_wrap_skb - probably because this function doesn't return a clear error value. However, using retval=0 as error indications doesn't seem to be a good idea at the moment, because drivers don't check for wrappedlen==0 either - so they would pass a len=0 buffer to the underlaying hardware and probably wait for a tx-complete interrupt which would never occur. So I think I'd better stay with the current approach to return incomplete frames in the overrun case. Martin ---------------------------- --- v2.6.3-bk6-md/net/irda/irlap_event.c.orig Thu Feb 26 23:08:09 2004 +++ v2.6.3-bk6-md/net/irda/irlap_event.c Thu Feb 26 23:11:49 2004 @@ -2236,6 +2236,14 @@ static int irlap_state_sclose(struct irl irlap_disconnect_indication(self, LAP_DISC_INDICATION); break; case RECV_DM_RSP: + /* IrLAP-1.1 p.82: in SCLOSE, S and I type RSP frames + * shall take us down into default NDM state, like DM_RSP + */ + case RECV_RR_RSP: + case RECV_RNR_RSP: + case RECV_REJ_RSP: + case RECV_SREJ_RSP: + case RECV_I_RSP: /* Always switch state before calling upper layers */ irlap_next_state(self, LAP_NDM); @@ -2253,6 +2261,17 @@ static int irlap_state_sclose(struct irl irlap_disconnect_indication(self, LAP_DISC_INDICATION); break; default: + /* IrLAP-1.1 p.82: in SCLOSE, basically any received frame + * with pf=1 shall restart the wd-timer and resend the rd:rsp + */ + if (info != NULL && info->pf) { + del_timer(&self->wd_timer); + irlap_wait_min_turn_around(self, &self->qos_tx); + irlap_send_rd_frame(self); + irlap_start_wd_timer(self, self->wd_timeout); + break; /* stay in SCLOSE */ + } + IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __FUNCTION__, event, irlap_event[event]); |
From: Jean T. <jt...@bo...> - 2004-02-27 01:31:11
|
On Fri, Feb 27, 2004 at 01:30:49AM +0100, Martin Diehl wrote: > > Ok, thanks for explaining. From page 82 of the IrLAP-1.1 spec I'm pretty > sure we shall resend the rd:rsp and restart the WD-timer in those > situations. I did not check the spec, but that sound logical. > What about the patch below? As you can see from the attached irdadump > output before and after the patch, from both ends (primary is stir4200, > secondary vlsi) it's working as expected and the disconnect is much > smoother now if the rd:rsp gets lost. Yep, looks good. Thanks ! > Unfortunately it appeared things weren't changed wrt. to the stir fifo > lockup. Anyway, I'd suggest applying it. I did not expect miracles from my own analysis of what mess the Rx fifo was doing. > Ok, after some first scan over the drivers I was under the impression it > wouldn't hurt, because virtually none of them is checking for buffer > overrun in async_wrap_skb - probably because this function doesn't return > a clear error value. However, using retval=0 as error indications doesn't > seem to be a good idea at the moment, because drivers don't check for > wrappedlen==0 either - so they would pass a len=0 buffer to the > underlaying hardware and probably wait for a tx-complete interrupt which > would never occur. So I think I'd better stay with the current approach to > return incomplete frames in the overrun case. Because of the IrLAP deadlock on retry, if ever overrun occur, you will know about it. I think that because the various headers have predictable values, it's more difficult to hit an overrun. I would need to check the maths (or maybe experiment with irdaspray). > Martin Have fun... Jean |
From: Martin D. <li...@md...> - 2004-02-25 18:16:34
|
On Thu, 19 Feb 2004, Stephen Hemminger wrote: > Please give this driver some more work before I push the update to Dave. > * async unlink of receive requests to avoid waiting for each one > individually. Good, this should improve the performance, particularly for FIR. > * transmit path formats wrapped data into a skbuff which allows > wrapping in parallel with usb transfer, and avoids allocating a fixed > size buffer. > > Tested with two different vendor dongles and against nsc-ircc. Well, it seems you were both using some pretty fast (wrt. mtt) peer for testing. With peer-mtt = 1 msec (vlsi_ir) I'm getting tons of buffersize asserts from the sir-wrapper once the link switches to 115200 and starts sending rr-frames. The point is the wrapper adds more xbofs (in addition to what was negotiated) to implement the turnaround delay - i.e. irda_get_mtt(skb) is always zero at sir-speed. The allocated buffer wasn't sufficient to hold the wrapped frame in this case. See below for a patch to fix this. Furthermore, in receive_start: > + if (0 == usb_submit_urb(urb, GFP_KERNEL)) > + atomic_inc(&stir->rx_pending); Isn't there a (theoretical at least) window where the submit succeeds and the urb gets completed very fast, before the atomic_inc? Say with SMP and CONFIG_PREEMPT? Because in the urb-completion below we have: > static void stir_usb_receive(struct urb *urb, struct pt_regs *regs) ... > + /* don't resubmit, and wakeup receive_stop */ > + if (atomic_dec_and_test(&stir->rx_pending)) > + complete(&stir->rx_stop_complete); Depending on who wins the race I think we might either miss the completion and deadlock the thread or see some premature completion there. Below the patch to fix the tx-buffer issues, against 2.6.3-bk6 with your patch. Btw, maybe wrap_fir_skb() might take some additional buffsize argument to check we are not overriding memory? Martin ----------------- --- v2.6.3-bk6-md/drivers/net/irda/stir4200.c.sh-20040219 Wed Feb 25 18:04:58 2004 +++ v2.6.3-bk6-md/drivers/net/irda/stir4200.c Wed Feb 25 18:04:58 2004 @@ -587,6 +587,7 @@ static int change_speed(struct stir_cb * static int stir_hard_xmit(struct sk_buff *skb, struct net_device *netdev) { struct stir_cb *stir = netdev->priv; + struct irda_skb_cb *cb = (struct irda_skb_cb *)skb->cb; struct sk_buff *nskb; unsigned maxlen; @@ -595,9 +596,31 @@ static int stir_hard_xmit(struct sk_buff /* the IRDA wrapping routines don't deal with non linear skb */ SKB_LINEAR_ASSERT(skb); - /* need enough space for worst case */ - maxlen = STIR_IRDA_HEADER + irda_get_xbofs(skb) - + 2 * skb->len + 10; + /* need enough space for worst case + * In addition to the stir-header we need 1 byte for each BOF+EOF + * plus space for the whole data and 16-bit CRC all probably escaped + * and thus doubled in size. Furthermore, depending on negotiation + * and/or turnaround delay there might be up to 163 xbofs. + * And we need to be careful with non-SIR speeds where we have + * 2+2 bytes for BOF/EOF and in the case of FIR even further + * 16 PA-bytes. And FIR-CRC is 32 bit, not 16. + */ + + /* header + 2x2 byte BOF/EOF + wrapped data and crc16 */ + maxlen = STIR_IRDA_HEADER + 2 * (skb->len + 2 + 2); + + if (isfir(stir->speed)) + maxlen += 16 + 2 * 2; /* PA + wrapped crc16->crc32 */ + + /* ### FIXME: this needs to be consistent with the sir-wrapper! + * Chances are this should be transformed to some + * irda_get_total_xbofs(skb) call! + */ + + if (cb->magic != LAP_MAGIC) + maxlen += 10; /* frame from userland */ + else + maxlen += cb->xbofs + cb->xbofs_delay; nskb = dev_alloc_skb(maxlen); if (!nskb) { |