Todd Davis - 2004-02-24

A SMP kernel deadlocks when v30 OpenIPMI objects are loaded but not right away. The clock does not advance when date is run several times in a row before the system locks up completely and needs to be reset.

ipmi_timeout_handler() calls handle_new_recv_msg() with the waiting_msgs_lock held.
ipmi_smi_msg_received() calls handle_new_recv_msg() with the waiting_msgs_lock released.
Could this inconsistency cause the problem?

ipmi_timeout_handler(long timeout_period)
...
/* See if any waiting messages need to be processed. */
spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
list_for_each_safe(entry, entry2, &(intf->waiting_msgs)) {
    smi_msg = list_entry(entry, struct ipmi_smi_msg, link);
    if (! handle_new_recv_msg(intf, smi_msg)) {
        list_del(entry);
        ipmi_free_smi_msg(smi_msg);
    } else {
    /* To preserve message order, quit if we
       can't handle a message. */
    break;
    }
}
spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);

/* Handle a new message from the lower layer. */
void ipmi_smi_msg_received(ipmi_smi_t          intf,
               struct ipmi_smi_msg *msg)
...
/* To preserve message order, if the list is not empty, we
     tack this message onto the end of the list. */
spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
if (!list_empty(&(intf->waiting_msgs))) {
        list_add_tail(&(msg->link), &(intf->waiting_msgs));
    spin_unlock(&(intf->waiting_msgs_lock));
    goto out_unlock;
}
spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);

rv = handle_new_recv_msg(intf, msg);
if (rv > 0) {
    /* Could not handle the message now, just add it to a
                 list to handle later. */
    spin_lock(&(intf->waiting_msgs_lock));
    list_add_tail(&(msg->link), &(intf->waiting_msgs));
    spin_unlock(&(intf->waiting_msgs_lock));
} else if (rv == 0) {
    ipmi_free_smi_msg(msg);
}