From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:43
|
Hi, I've got some qemu crashes while trying to passthrough an ide device to a kvm guest. After some investigation, it turned out that register_ioport_{read/write} will abort on errors instead of returning a meaningful error. However, even if we do return an error, the asynchronous nature of pci config space mapping updates makes it a little bit hard to treat. This series of patches basically treats errors in the mapping functions in the pci layer. If anything goes wrong, we unregister the pci device, unmapping any mappings that happened to be sucessfull already. After these patches are applied, a lot of warnings appears. And, you know, everytime there is a warning, god kills a kitten. But I'm not planning on touching the other pieces of qemu code for this until we set up (or not) in this solution Comments are very welcome, specially from qemu folks (since it is a bit invasive) |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:43
|
From: Glauber Costa <gc...@re...> map which io registers where already sucessfuly registered Signed-off-by: Glauber Costa <gc...@re...> --- qemu/hw/pci.c | 5 +++-- qemu/hw/pci.h | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 1937408..7e4ce2d 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -199,7 +199,7 @@ static void pci_unregister_io_regions(PCIDevice *pci_dev) for(i = 0; i < PCI_NUM_REGIONS; i++) { r = &pci_dev->io_regions[i]; - if (!r->size) + if ((!r->size) || (r->status != PCI_STATUS_REGISTERED)) continue; if (r->type == PCI_ADDRESS_SPACE_IO) { isa_unassign_ioport(r->addr, r->size); @@ -321,7 +321,7 @@ static void pci_update_mappings(PCIDevice *d) } else { isa_unassign_ioport(r->addr, r->size); } - } else { + } else if (r->status == PCI_STATUS_REGISTERED) { cpu_register_physical_memory(pci_to_cpu_addr(r->addr), r->size, IO_MEM_UNASSIGNED); @@ -330,6 +330,7 @@ static void pci_update_mappings(PCIDevice *d) r->addr = new_addr; if (r->addr != -1) { r->map_func(d, i, r->addr, r->size, r->type); + r->status = PCI_STATUS_REGISTERED; } } } diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index e11fbbf..6350ad2 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -27,9 +27,12 @@ typedef struct PCIIORegion { uint32_t addr; /* current PCI mapping address. -1 means not mapped */ uint32_t size; uint8_t type; + uint8_t status; PCIMapIORegionFunc *map_func; } PCIIORegion; +#define PCI_STATUS_REGISTERED 1 + #define PCI_ROM_SLOT 6 #define PCI_NUM_REGIONS 7 -- 1.5.5 |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:54
|
From: Glauber Costa <gc...@re...> In situations like pci-passthrough, the ioport registering can fail, because another device is already present and in charge for an io address. The current state would crash qemu, but we can propagate the errors up to the pci layer, avoiding it. Signed-off-by: Glauber Costa <gc...@re...> --- qemu/hw/pci-passthrough.c | 28 ++++++++++++++++++++++++---- qemu/hw/pci.c | 30 ++++++++++++++++++++---------- qemu/hw/pci.h | 2 +- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c index 7ffcc7b..3912447 100644 --- a/qemu/hw/pci-passthrough.c +++ b/qemu/hw/pci-passthrough.c @@ -127,7 +127,7 @@ pt_ioport_read(b) pt_ioport_read(w) pt_ioport_read(l) -static void pt_iomem_map(PCIDevice * d, int region_num, +static int pt_iomem_map(PCIDevice * d, int region_num, uint32_t e_phys, uint32_t e_size, int type) { pt_dev_t *r_dev = (pt_dev_t *) d; @@ -141,6 +141,7 @@ static void pt_iomem_map(PCIDevice * d, int region_num, cpu_register_physical_memory(e_phys, r_dev->dev.io_regions[region_num].size, r_dev->v_addrs[region_num].memory_index); + return 0; } @@ -148,7 +149,8 @@ static void pt_ioport_map(PCIDevice * pci_dev, int region_num, uint32_t addr, uint32_t size, int type) { pt_dev_t *r_dev = (pt_dev_t *) pci_dev; - int i; + int i, err; + uint32_t ((*rf[])(void *, uint32_t)) = { pt_ioport_readb, pt_ioport_readw, pt_ioport_readl @@ -163,10 +165,14 @@ static void pt_ioport_map(PCIDevice * pci_dev, int region_num, "region_num=%d \n", addr, type, size, region_num); for (i = 0; i < 3; i++) { - register_ioport_write(addr, size, 1<<i, wf[i], + err = register_ioport_write(addr, size, 1<<i, wf[i], (void *) (r_dev->v_addrs + region_num)); - register_ioport_read(addr, size, 1<<i, rf[i], + if (err < 0) + return err; + err = register_ioport_read(addr, size, 1<<i, rf[i], (void *) (r_dev->v_addrs + region_num)); + if (err < 0) + return err; } } @@ -455,6 +461,18 @@ struct { int nptdevs; extern int piix_get_irq(int); +static int pt_pci_unregister(PCIDevice *pci_dev) +{ + pt_dev_t *pt = (pt_dev_t *)pci_dev; + int i; + for (i = 0; i < MAX_PTDEVS ; i++) { + if (ptdevs[i].ptdev == pt) + ptdevs[i].ptdev = NULL; + } + return 0; +} + + /* The pci config space got updated. Check if irq numbers have changed * for our devices */ @@ -572,6 +590,8 @@ int pt_init(PCIBus * bus) ret = -1; } ptdevs[i].ptdev = dev; + /* FIXME: Can the unregister callback be ever called before this point? */ + dev->dev.unregister = pt_pci_unregister; } if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 7e4ce2d..5265b81 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -48,7 +48,7 @@ struct PCIBus { int irq_count[]; }; -static void pci_update_mappings(PCIDevice *d); +static int pci_update_mappings(PCIDevice *d); static void pci_set_irq(void *opaque, int irq_num, int level); void pci_pt_update_irq(PCIDevice *d); @@ -133,13 +133,14 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) int pci_device_load(PCIDevice *s, QEMUFile *f) { uint32_t version_id; - int i; + int i, err; version_id = qemu_get_be32(f); if (version_id > 2) return -EINVAL; qemu_get_buffer(f, s->config, 256); - pci_update_mappings(s); + if ((err = pci_update_mappings(s)) < 0) + return err; if (version_id >= 2) for (i = 0; i < 4; i ++) @@ -192,7 +193,7 @@ static target_phys_addr_t pci_to_cpu_addr(target_phys_addr_t addr) return addr + pci_mem_base; } -static void pci_unregister_io_regions(PCIDevice *pci_dev) +void pci_unregister_io_regions(PCIDevice *pci_dev) { PCIIORegion *r; int i; @@ -256,11 +257,22 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num, *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type); } +static int map_pci_region(PCIDevice *d, int i, PCIIORegion *r) +{ + int err = 0; -static void pci_update_mappings(PCIDevice *d) + if ((err = r->map_func(d, i, r->addr, r->size, r->type)) < 0) { + fprintf(stderr, "Could not map pci device %s\n", d->name); + pci_unregister_device(d); + } + r->status = PCI_STATUS_REGISTERED; + return err; +} + +static int pci_update_mappings(PCIDevice *d) { PCIIORegion *r; - int cmd, i; + int cmd, i, err; uint32_t last_addr, new_addr, config_ofs; cmd = le16_to_cpu(*(uint16_t *)(d->config + PCI_COMMAND)); @@ -328,10 +340,8 @@ static void pci_update_mappings(PCIDevice *d) } } r->addr = new_addr; - if (r->addr != -1) { - r->map_func(d, i, r->addr, r->size, r->type); - r->status = PCI_STATUS_REGISTERED; - } + if ((r->addr != -1) && ((err = map_pci_region(d, i, r)) < 0)) + return err; } } } diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index 6350ad2..7eeff2e 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -15,7 +15,7 @@ typedef void PCIConfigWriteFunc(PCIDevice *pci_dev, uint32_t address, uint32_t data, int len); typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, uint32_t address, int len); -typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, +typedef int PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, uint32_t addr, uint32_t size, int type); typedef int PCIUnregisterFunc(PCIDevice *pci_dev); -- 1.5.5 |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:44
|
From: Glauber Costa <gc...@re...> Currently, any error in register_ioports make qemu abort through hw_error(). But there are situations in which those errors are not fatal. Just return < 0 instead Signed-off-by: Glauber Costa <gc...@re...> --- qemu/vl.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) diff --git a/qemu/vl.c b/qemu/vl.c index 35a0465..d7e07e2 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -351,13 +351,13 @@ int register_ioport_read(int start, int length, int size, } else if (size == 4) { bsize = 2; } else { - hw_error("register_ioport_read: invalid size"); + fprintf(stderr, "register_ioport_read: invalid size\n"); return -1; } for(i = start; i < start + length; i += size) { ioport_read_table[bsize][i] = func; if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) - hw_error("register_ioport_read: invalid opaque"); + fprintf(stderr, "register_ioport_read: invalid opaque\n"); ioport_opaque[i] = opaque; } return 0; @@ -376,13 +376,15 @@ int register_ioport_write(int start, int length, int size, } else if (size == 4) { bsize = 2; } else { - hw_error("register_ioport_write: invalid size"); + fprintf(stderr, "register_ioport_write: invalid size\n"); return -1; } for(i = start; i < start + length; i += size) { ioport_write_table[bsize][i] = func; - if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) - hw_error("register_ioport_write: invalid opaque"); + if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) { + fprintf(stderr, "register_ioport_write: invalid opaque\n"); + return -1; + } ioport_opaque[i] = opaque; } return 0; -- 1.5.5 |
From: Avi K. <av...@qu...> - 2008-04-18 16:28:09
|
Glauber de Oliveira Costa wrote: > Hi, > > I've got some qemu crashes while trying to passthrough an ide device > to a kvm guest. After some investigation, it turned out that > register_ioport_{read/write} will abort on errors instead of returning > a meaningful error. > > However, even if we do return an error, the asynchronous nature of pci > config space mapping updates makes it a little bit hard to treat. > > This series of patches basically treats errors in the mapping functions in > the pci layer. If anything goes wrong, we unregister the pci device, unmapping > any mappings that happened to be sucessfull already. > > After these patches are applied, a lot of warnings appears. And, you know, > everytime there is a warning, god kills a kitten. But I'm not planning on > touching the other pieces of qemu code for this until we set up (or not) in > this solution > > Comments are very welcome, specially from qemu folks (since it is a bit invasive) > > Have you considered, instead of rolling back the changes you already made before the failure, to have a function which checks if an ioport registration will be successful? This may simplify the code. -- Any sufficiently difficult bug is indistinguishable from a feature. |
From: Glauber C. <gl...@gm...> - 2008-04-19 21:11:16
|
On Fri, Apr 18, 2008 at 1:27 PM, Avi Kivity <av...@qu...> wrote: > > Glauber de Oliveira Costa wrote: > > > Hi, > > I've got some qemu crashes while trying to passthrough an ide device > > to a kvm guest. After some investigation, it turned out that > register_ioport_{read/write} will abort on errors instead of returning > > a meaningful error. > > > > However, even if we do return an error, the asynchronous nature of pci > > config space mapping updates makes it a little bit hard to treat. > > > > This series of patches basically treats errors in the mapping functions in > > the pci layer. If anything goes wrong, we unregister the pci device, > unmapping > > any mappings that happened to be sucessfull already. > > > > After these patches are applied, a lot of warnings appears. And, you know, > > everytime there is a warning, god kills a kitten. But I'm not planning on > > touching the other pieces of qemu code for this until we set up (or not) > in > > this solution > > > > Comments are very welcome, specially from qemu folks (since it is a bit > invasive) > > > > > > > > Have you considered, instead of rolling back the changes you already made > before the failure, to have a function which checks if an ioport > registration will be successful? This may simplify the code. > Yes, I did. Basic problem is that I basically could not find this information handy until we were deep in the stack, right before calling the update mapping functions. I turned out preferring this option. I can, however, take a fresh look at that. -- Glauber Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act." |
From: Glauber C. <gl...@gm...> - 2008-04-24 13:25:32
|
On Sat, Apr 19, 2008 at 6:11 PM, Glauber Costa <gl...@gm...> wrote: > > On Fri, Apr 18, 2008 at 1:27 PM, Avi Kivity <av...@qu...> wrote: > > > > Glauber de Oliveira Costa wrote: > > > > > Hi, > > > I've got some qemu crashes while trying to passthrough an ide device > > > to a kvm guest. After some investigation, it turned out that > > register_ioport_{read/write} will abort on errors instead of returning > > > a meaningful error. > > > > > > However, even if we do return an error, the asynchronous nature of pci > > > config space mapping updates makes it a little bit hard to treat. > > > > > > This series of patches basically treats errors in the mapping functions in > > > the pci layer. If anything goes wrong, we unregister the pci device, > > unmapping > > > any mappings that happened to be sucessfull already. > > > > > > After these patches are applied, a lot of warnings appears. And, you know, > > > everytime there is a warning, god kills a kitten. But I'm not planning on > > > touching the other pieces of qemu code for this until we set up (or not) > > in > > > this solution > > > > > > Comments are very welcome, specially from qemu folks (since it is a bit > > invasive) > > > > > > > > > > > > > Have you considered, instead of rolling back the changes you already made > > before the failure, to have a function which checks if an ioport > > registration will be successful? This may simplify the code. > > > Yes, I did. > > Basic problem is that I basically could not find this information > handy until we were deep in the stack, right before calling the update > mapping functions. I turned out preferring this option. I can, > however, take a fresh look at that. > Looked at this again, and it does seem to me that we don't have too much to gain from a "test-before" solution. We definitely can't test it reliably until update_mappings arrive, (since the mapping can change) and by this time, the pci device is already registered, and we would have to de-register it anyway. There is room for "improvement" (with a wide definition of improvement) if we test all the ports of a device in advance (inside update_mappings) instead of a port-by-port basis. We could get rid of the flag, but it would be traded off by another complexities. So unless someone have a very direct alternate solution for this I'm failing to see, I do advocate for those humble patches. -- Glauber Costa. "Free as in Freedom" http://glommer.net "The less confident you are, the more serious you have to act." |