You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(33) |
Nov
(325) |
Dec
(320) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(484) |
Feb
(438) |
Mar
(407) |
Apr
(713) |
May
(831) |
Jun
(806) |
Jul
(1023) |
Aug
(1184) |
Sep
(1118) |
Oct
(1461) |
Nov
(1224) |
Dec
(1042) |
2008 |
Jan
(1449) |
Feb
(1110) |
Mar
(1428) |
Apr
(1643) |
May
(682) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: H. P. A. <hp...@zy...> - 2008-04-17 06:03:41
|
> + .globl linux_boot > +linux_boot: > + cli > + cld > + mov $0x9000, %ax > + mov %ax, %ds > + mov %ax, %es > + mov %ax, %fs > + mov %ax, %gs > + mov %ax, %ss > + mov $0x8ffe, %sp > + ljmp $0x9000 + 0x20, $0 The hard use of segment 9000 is really highly unfortunate for bzImage, since it restricts its heap more than necessary. I suggest following the patterns used by the (new) Qemu loader. -hpa |
From: H. P. A. <hp...@zy...> - 2008-04-17 06:02:20
|
Nguyen Anh Quynh wrote: > This patch replaces the current assembly code of Extboot option rom > with new C code. Patch is against kvm-66. > > This version returns an error code in case int 13 handler cannot > handle a requested function. > > Signed-off-by: Nguyen Anh Quynh <aq...@gm...> > + /* -fomit-frame-pointer might clobber %ebp */ + pushl %ebp + call setup + popl %ebp No, it might not. %ebx, %ebp, %esi, and %edi are guaranteed preserved; %eax, %ecx and %edx are clobbered. It's also prudent to call cld before jumping to C code. -hpa |
From: Yunfeng Z. <yun...@in...> - 2008-04-17 05:48:15
|
Hi All, This is today's KVM test result against kvm.git eeff99b6f7e47630356cf2aee2e58d1b3e1af931 and kvm-userspace.git cd6ac0431e01c01c7870aadfafc6ddaf09a8fb95. One New Issue ================================================ 1.Can't boot smp guests on ia32e host https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1944629&group_id=180599 Three Old Issues: ================================================ 2. Booting four guests likely fails https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1919354&group_id=180599 3. booting smp windows guests has 30% chance of hang https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1910923&group_id=180599 4. Cannot boot guests with hugetlbfs https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1941302&group_id=180599 Test environment ================================================ Platform Woodcrest CPU 4 Memory size 8G' Details ================================================ IA32-pae: 1. boot guest with 256M memory PASS 2. boot two windows xp guest PASS 3. boot 4 same guest in parallel PASS 4. boot linux and windows guest in parallel PASS 5. boot guest with 1500M memory PASS 6. boot windows 2003 with ACPI enabled PASS 7. boot Windows xp with ACPI enabled PASS 8. boot Windows 2000 without ACPI PASS 9. kernel build on SMP linux guest PASS 10. LTP on SMP linux guest PASS 11. boot base kernel linux PASS 12. save/restore 32-bit HVM guests PASS 13. live migration 32-bit HVM guests PASS 14. boot SMP Windows xp with ACPI enabled PASS 15. boot SMP Windows 2003 with ACPI enabled PASS 16. boot SMP Windows 2000 with ACPI enabled PASS ================================================ IA32e: 1. boot four 32-bit guest in parallel PASS 2. boot four 64-bit guest in parallel PASS 3. boot 4G 64-bit guest PASS 4. boot 4G pae guest PASS 5. boot 32-bit linux and 32 bit windows guest in parallel PASS 6. boot 32-bit guest with 1500M memory PASS 7. boot 64-bit guest with 1500M memory PASS 8. boot 32-bit guest with 256M memory PASS 9. boot 64-bit guest with 256M memory PASS 10. boot two 32-bit windows xp in parallel PASS 11. boot four 32-bit different guest in para PASS 12. save/restore 64-bit linux guests PASS 13. save/restore 32-bit linux guests PASS 14. boot 32-bit SMP windows 2003 with ACPI enabled FAIL 15. boot 32-bit SMP Windows 2000 with ACPI enabled FAIL 16. boot 32-bit SMP Windows xp with ACPI enabled FAIL 17. boot 32-bit Windows 2000 without ACPI PASS 18. boot 64-bit Windows xp with ACPI enabled PASS 19. boot 32-bit Windows xp without ACPI PASS 20. boot 64-bit UP vista PASS 21. boot 64-bit SMP vista FAIL 22. kernel build in 32-bit linux guest OS PASS 23. kernel build in 64-bit linux guest OS PASS 24. LTP on 32-bit linux guest OS PASS 25. LTP on 64-bit linux guest OS PASS 26. boot 64-bit guests with ACPI enabled PASS 27. boot 32-bit x-server PASS 28. boot 64-bit SMP windows XP with ACPI enabled FAIL 29. boot 64-bit SMP windows 2003 with ACPI enabled FAIL 30. live migration 64bit linux guests PASS 31. live migration 32bit linux guests PASS 32. reboot 32bit windows xp guest PASS 33. reboot 32bit windows xp guest PASS Report Summary on IA32-pae Summary Test Report of Last Session ===================================================================== Total Pass Fail NoResult Crash ===================================================================== control_panel 7 7 0 0 0 Restart 2 2 0 0 0 gtest 15 15 0 0 0 ===================================================================== control_panel 7 7 0 0 0 :KVM_LM_PAE_gPAE 1 1 0 0 0 :KVM_four_sguest_PAE_gPA 1 1 0 0 0 :KVM_256M_guest_PAE_gPAE 1 1 0 0 0 :KVM_linux_win_PAE_gPAE 1 1 0 0 0 :KVM_1500M_guest_PAE_gPA 1 1 0 0 0 :KVM_SR_PAE_gPAE 1 1 0 0 0 :KVM_two_winxp_PAE_gPAE 1 1 0 0 0 Restart 2 2 0 0 0 :GuestPAE_PAE_gPAE 1 1 0 0 0 :BootTo32pae_PAE_gPAE 1 1 0 0 0 gtest 15 15 0 0 0 :ltp_nightly_PAE_gPAE 1 1 0 0 0 :boot_up_acpi_PAE_gPAE 1 1 0 0 0 :reboot_xp_PAE_gPAE 1 1 0 0 0 :boot_up_vista_PAE_gPAE 1 1 0 0 0 :boot_up_acpi_xp_PAE_gPA 1 1 0 0 0 :boot_up_acpi_win2k3_PAE 1 1 0 0 0 :boot_base_kernel_PAE_gP 1 1 0 0 0 :boot_smp_acpi_win2k3_PA 1 1 0 0 0 :boot_smp_acpi_win2k_PAE 1 1 0 0 0 :boot_up_acpi_win2k_PAE_ 1 1 0 0 0 :boot_smp_acpi_xp_PAE_gP 1 1 0 0 0 :boot_up_noacpi_win2k_PA 1 1 0 0 0 :boot_smp_vista_PAE_gPAE 1 1 0 0 0 :bootx_PAE_gPAE 1 1 0 0 0 :kb_nightly_PAE_gPAE 1 1 0 0 0 ===================================================================== Total 24 24 0 0 0 Report Summary on IA32e Summary Test Report of Last Session ===================================================================== Total Pass Fail NoResult Crash ===================================================================== control_panel 15 15 0 0 0 Restart 3 3 0 0 0 gtest 25 18 7 0 0 ===================================================================== control_panel 15 15 0 0 0 :KVM_LM_64_g64 1 1 0 0 0 :KVM_four_sguest_64_gPAE 1 1 0 0 0 :KVM_4G_guest_64_g64 1 1 0 0 0 :KVM_four_sguest_64_g64 1 1 0 0 0 :KVM_linux_win_64_gPAE 1 1 0 0 0 :KVM_1500M_guest_64_gPAE 1 1 0 0 0 :KVM_SR_64_g64 1 1 0 0 0 :KVM_LM_64_gPAE 1 1 0 0 0 :KVM_256M_guest_64_g64 1 1 0 0 0 :KVM_1500M_guest_64_g64 1 1 0 0 0 :KVM_4G_guest_64_gPAE 1 1 0 0 0 :KVM_SR_64_gPAE 1 1 0 0 0 :KVM_256M_guest_64_gPAE 1 1 0 0 0 :KVM_two_winxp_64_gPAE 1 1 0 0 0 :KVM_four_dguest_64_gPAE 1 1 0 0 0 Restart 3 3 0 0 0 :BootTo64_64_g64 1 1 0 0 0 :Guest64_64_gPAE 1 1 0 0 0 :GuestPAE_64_g64 1 1 0 0 0 gtest 25 18 7 0 0 :boot_up_acpi_64_gPAE 1 1 0 0 0 :boot_up_noacpi_xp_64_gP 1 1 0 0 0 :boot_smp_acpi_xp_64_g64 1 0 1 0 0 :boot_base_kernel_64_gPA 1 1 0 0 0 :boot_smp_acpi_win2k3_64 1 0 1 0 0 :boot_smp_acpi_win2k_64_ 1 0 1 0 0 :boot_base_kernel_64_g64 1 1 0 0 0 :bootx_64_gPAE 1 1 0 0 0 :kb_nightly_64_gPAE 1 1 0 0 0 :ltp_nightly_64_g64 1 1 0 0 0 :boot_up_acpi_64_g64 1 1 0 0 0 :boot_up_noacpi_win2k_64 1 1 0 0 0 :boot_smp_acpi_xp_64_gPA 1 0 1 0 0 :boot_smp_vista_64_gPAE 1 0 1 0 0 :boot_up_acpi_win2k3_64_ 1 1 0 0 0 :reboot_xp_64_gPAE 1 1 0 0 0 :bootx_64_g64 1 1 0 0 0 :boot_up_vista_64_g64 1 1 0 0 0 :boot_smp_vista_64_g64 1 0 1 0 0 :boot_up_acpi_xp_64_g64 1 1 0 0 0 :boot_up_vista_64_gPAE 1 1 0 0 0 :ltp_nightly_64_gPAE 1 1 0 0 0 :boot_smp_acpi_win2k3_64 1 0 1 0 0 :boot_up_noacpi_win2k3_6 1 1 0 0 0 :kb_nightly_64_g64 1 1 0 0 0 ===================================================================== Total 43 36 7 0 0 Best Regards, Yunfeng |
From: SourceForge.net <no...@so...> - 2008-04-17 05:44:19
|
Bugs item #1944629, was opened at 2008-04-17 13:44 Message generated for change (Tracker Item Submitted) made by Item Submitter You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1944629&group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: yunfeng (yunfeng) Assigned to: Nobody/Anonymous (nobody) Summary: Can't boot smp guests on ia32e host Initial Comment: Environment: ------------ Host:ia32e Guest: ia32pae/ia32e Commits: kernel eeff99b6f7e47630356cf2aee2e58d1b3e1af931 userspace cd6ac0431e01c01c7870aadfafc6ddaf09a8fb95 Hardware:Platform Woodcrest CPU 4 Memory size 8G' Bug detailed description: -------------------------- Can't boot smp guests on ia32e platform, both windows and linux smp guests will hang as the attachment shows. Using raw images or qcow images to boot, guests hang at the same point. With -no-kvm or -no-kvm-irqchip option, smp linux guests can boot; with -no-kvm-irqchip smp windows guests can boot. Reproduce steps: ---------------- 1.prepare a windows or linux image 2.boot guest with the command: qemu-system-x86_64 -m 256 -smp 2 -net nic,macaddr=00:16:3e:66:c3:4a,model=rtl8139 -net tap,script=/etc/kvm/qemu-ifup -hda /share/xvs/var/ia32p_xpsp2_smp_acpi.img Current result: ---------------- Expected result: ---------------- Basic root-causing log: ---------------------- ---------------------------------------------------------------------- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=893831&aid=1944629&group_id=180599 |
From: Nguyen A. Q. <aq...@gm...> - 2008-04-17 01:30:20
|
This patch replaces the current assembly code of Extboot option rom with new C code. Patch is against kvm-66. This version returns an error code in case int 13 handler cannot handle a requested function. Signed-off-by: Nguyen Anh Quynh <aq...@gm...> # diffstat extboot3.diff b/extboot/Makefile | 67 ++-- b/extboot/boot.S | 119 ++++++++ b/extboot/farvar.h | 113 ++++++++ b/extboot/rom.c | 366 ++++++++++++++++++++++++++ b/extboot/signrom.c | 83 ++++-- b/extboot/util.h | 89 ++++++ extboot/extboot.S | 705 ---------------------------------------------------- 7 files changed, 786 insertions(+), 756 deletions(-) |
From: Nguyen A. Q. <aq...@gm...> - 2008-04-17 01:26:06
|
On Thu, Apr 17, 2008 at 12:02 AM, Anthony Liguori <ali...@us...> wrote: > A couple general comments. > > I'd feel a lot more comfortable with the int13 handler returning an int and > the asm stub code uses that result to determine how to set CF. You set CF > deep within the function stack and there's no guarantee that GCC isn't going > to stomp on it. > > I also don't think we want to raise int18 when we get a command we don't > understand. We should just not change any of the register state. There are > a number of extended commands that look for a magic value to determine > whether the command exists or not. Absolutely. We should return error code in that case. Thanks, Quynh |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:54
|
From: Glauber Costa <gc...@re...> In situations like pci-passthrough, the ioport registering can fail, because another device is already present and in charge for an io address. The current state would crash qemu, but we can propagate the errors up to the pci layer, avoiding it. Signed-off-by: Glauber Costa <gc...@re...> --- qemu/hw/pci-passthrough.c | 28 ++++++++++++++++++++++++---- qemu/hw/pci.c | 30 ++++++++++++++++++++---------- qemu/hw/pci.h | 2 +- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c index 7ffcc7b..3912447 100644 --- a/qemu/hw/pci-passthrough.c +++ b/qemu/hw/pci-passthrough.c @@ -127,7 +127,7 @@ pt_ioport_read(b) pt_ioport_read(w) pt_ioport_read(l) -static void pt_iomem_map(PCIDevice * d, int region_num, +static int pt_iomem_map(PCIDevice * d, int region_num, uint32_t e_phys, uint32_t e_size, int type) { pt_dev_t *r_dev = (pt_dev_t *) d; @@ -141,6 +141,7 @@ static void pt_iomem_map(PCIDevice * d, int region_num, cpu_register_physical_memory(e_phys, r_dev->dev.io_regions[region_num].size, r_dev->v_addrs[region_num].memory_index); + return 0; } @@ -148,7 +149,8 @@ static void pt_ioport_map(PCIDevice * pci_dev, int region_num, uint32_t addr, uint32_t size, int type) { pt_dev_t *r_dev = (pt_dev_t *) pci_dev; - int i; + int i, err; + uint32_t ((*rf[])(void *, uint32_t)) = { pt_ioport_readb, pt_ioport_readw, pt_ioport_readl @@ -163,10 +165,14 @@ static void pt_ioport_map(PCIDevice * pci_dev, int region_num, "region_num=%d \n", addr, type, size, region_num); for (i = 0; i < 3; i++) { - register_ioport_write(addr, size, 1<<i, wf[i], + err = register_ioport_write(addr, size, 1<<i, wf[i], (void *) (r_dev->v_addrs + region_num)); - register_ioport_read(addr, size, 1<<i, rf[i], + if (err < 0) + return err; + err = register_ioport_read(addr, size, 1<<i, rf[i], (void *) (r_dev->v_addrs + region_num)); + if (err < 0) + return err; } } @@ -455,6 +461,18 @@ struct { int nptdevs; extern int piix_get_irq(int); +static int pt_pci_unregister(PCIDevice *pci_dev) +{ + pt_dev_t *pt = (pt_dev_t *)pci_dev; + int i; + for (i = 0; i < MAX_PTDEVS ; i++) { + if (ptdevs[i].ptdev == pt) + ptdevs[i].ptdev = NULL; + } + return 0; +} + + /* The pci config space got updated. Check if irq numbers have changed * for our devices */ @@ -572,6 +590,8 @@ int pt_init(PCIBus * bus) ret = -1; } ptdevs[i].ptdev = dev; + /* FIXME: Can the unregister callback be ever called before this point? */ + dev->dev.unregister = pt_pci_unregister; } if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 7e4ce2d..5265b81 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -48,7 +48,7 @@ struct PCIBus { int irq_count[]; }; -static void pci_update_mappings(PCIDevice *d); +static int pci_update_mappings(PCIDevice *d); static void pci_set_irq(void *opaque, int irq_num, int level); void pci_pt_update_irq(PCIDevice *d); @@ -133,13 +133,14 @@ void pci_device_save(PCIDevice *s, QEMUFile *f) int pci_device_load(PCIDevice *s, QEMUFile *f) { uint32_t version_id; - int i; + int i, err; version_id = qemu_get_be32(f); if (version_id > 2) return -EINVAL; qemu_get_buffer(f, s->config, 256); - pci_update_mappings(s); + if ((err = pci_update_mappings(s)) < 0) + return err; if (version_id >= 2) for (i = 0; i < 4; i ++) @@ -192,7 +193,7 @@ static target_phys_addr_t pci_to_cpu_addr(target_phys_addr_t addr) return addr + pci_mem_base; } -static void pci_unregister_io_regions(PCIDevice *pci_dev) +void pci_unregister_io_regions(PCIDevice *pci_dev) { PCIIORegion *r; int i; @@ -256,11 +257,22 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num, *(uint32_t *)(pci_dev->config + addr) = cpu_to_le32(type); } +static int map_pci_region(PCIDevice *d, int i, PCIIORegion *r) +{ + int err = 0; -static void pci_update_mappings(PCIDevice *d) + if ((err = r->map_func(d, i, r->addr, r->size, r->type)) < 0) { + fprintf(stderr, "Could not map pci device %s\n", d->name); + pci_unregister_device(d); + } + r->status = PCI_STATUS_REGISTERED; + return err; +} + +static int pci_update_mappings(PCIDevice *d) { PCIIORegion *r; - int cmd, i; + int cmd, i, err; uint32_t last_addr, new_addr, config_ofs; cmd = le16_to_cpu(*(uint16_t *)(d->config + PCI_COMMAND)); @@ -328,10 +340,8 @@ static void pci_update_mappings(PCIDevice *d) } } r->addr = new_addr; - if (r->addr != -1) { - r->map_func(d, i, r->addr, r->size, r->type); - r->status = PCI_STATUS_REGISTERED; - } + if ((r->addr != -1) && ((err = map_pci_region(d, i, r)) < 0)) + return err; } } } diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index 6350ad2..7eeff2e 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -15,7 +15,7 @@ typedef void PCIConfigWriteFunc(PCIDevice *pci_dev, uint32_t address, uint32_t data, int len); typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev, uint32_t address, int len); -typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, +typedef int PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, uint32_t addr, uint32_t size, int type); typedef int PCIUnregisterFunc(PCIDevice *pci_dev); -- 1.5.5 |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:44
|
From: Glauber Costa <gc...@re...> Currently, any error in register_ioports make qemu abort through hw_error(). But there are situations in which those errors are not fatal. Just return < 0 instead Signed-off-by: Glauber Costa <gc...@re...> --- qemu/vl.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) diff --git a/qemu/vl.c b/qemu/vl.c index 35a0465..d7e07e2 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -351,13 +351,13 @@ int register_ioport_read(int start, int length, int size, } else if (size == 4) { bsize = 2; } else { - hw_error("register_ioport_read: invalid size"); + fprintf(stderr, "register_ioport_read: invalid size\n"); return -1; } for(i = start; i < start + length; i += size) { ioport_read_table[bsize][i] = func; if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) - hw_error("register_ioport_read: invalid opaque"); + fprintf(stderr, "register_ioport_read: invalid opaque\n"); ioport_opaque[i] = opaque; } return 0; @@ -376,13 +376,15 @@ int register_ioport_write(int start, int length, int size, } else if (size == 4) { bsize = 2; } else { - hw_error("register_ioport_write: invalid size"); + fprintf(stderr, "register_ioport_write: invalid size\n"); return -1; } for(i = start; i < start + length; i += size) { ioport_write_table[bsize][i] = func; - if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) - hw_error("register_ioport_write: invalid opaque"); + if (ioport_opaque[i] != NULL && ioport_opaque[i] != opaque) { + fprintf(stderr, "register_ioport_write: invalid opaque\n"); + return -1; + } ioport_opaque[i] = opaque; } return 0; -- 1.5.5 |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:43
|
From: Glauber Costa <gc...@re...> map which io registers where already sucessfuly registered Signed-off-by: Glauber Costa <gc...@re...> --- qemu/hw/pci.c | 5 +++-- qemu/hw/pci.h | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 1937408..7e4ce2d 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -199,7 +199,7 @@ static void pci_unregister_io_regions(PCIDevice *pci_dev) for(i = 0; i < PCI_NUM_REGIONS; i++) { r = &pci_dev->io_regions[i]; - if (!r->size) + if ((!r->size) || (r->status != PCI_STATUS_REGISTERED)) continue; if (r->type == PCI_ADDRESS_SPACE_IO) { isa_unassign_ioport(r->addr, r->size); @@ -321,7 +321,7 @@ static void pci_update_mappings(PCIDevice *d) } else { isa_unassign_ioport(r->addr, r->size); } - } else { + } else if (r->status == PCI_STATUS_REGISTERED) { cpu_register_physical_memory(pci_to_cpu_addr(r->addr), r->size, IO_MEM_UNASSIGNED); @@ -330,6 +330,7 @@ static void pci_update_mappings(PCIDevice *d) r->addr = new_addr; if (r->addr != -1) { r->map_func(d, i, r->addr, r->size, r->type); + r->status = PCI_STATUS_REGISTERED; } } } diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index e11fbbf..6350ad2 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -27,9 +27,12 @@ typedef struct PCIIORegion { uint32_t addr; /* current PCI mapping address. -1 means not mapped */ uint32_t size; uint8_t type; + uint8_t status; PCIMapIORegionFunc *map_func; } PCIIORegion; +#define PCI_STATUS_REGISTERED 1 + #define PCI_ROM_SLOT 6 #define PCI_NUM_REGIONS 7 -- 1.5.5 |
From: Glauber de O. C. <gc...@re...> - 2008-04-17 00:58:43
|
Hi, I've got some qemu crashes while trying to passthrough an ide device to a kvm guest. After some investigation, it turned out that register_ioport_{read/write} will abort on errors instead of returning a meaningful error. However, even if we do return an error, the asynchronous nature of pci config space mapping updates makes it a little bit hard to treat. This series of patches basically treats errors in the mapping functions in the pci layer. If anything goes wrong, we unregister the pci device, unmapping any mappings that happened to be sucessfull already. After these patches are applied, a lot of warnings appears. And, you know, everytime there is a warning, god kills a kitten. But I'm not planning on touching the other pieces of qemu code for this until we set up (or not) in this solution Comments are very welcome, specially from qemu folks (since it is a bit invasive) |
From: Marcelo T. <mto...@re...> - 2008-04-16 23:49:07
|
On Wed, Apr 16, 2008 at 04:36:42PM -0500, Anthony Liguori wrote: > This isn't fully cooked yet, but pretty close. The basic idea is to > make the aio usage in block-raw go to a set of function pointers and > allow multiple simultaneous AIO implementations. The AIO API looks great. Looking forward to a linux-aio backend. > I converted the posix-aio support to this, and also introduced a "unix" > aio which just uses O_NONBLOCK and select(). The later only supports 1 > simultaneous request per-fd but currently posix-aio is limited to that > too. At least with my QEMU testing, the unix aio implementation > outperforms posix-aio by a factor of 2. Unfortunately O_NONBLOCK is not honoured for block backed storage, so its a no-go. > And it uses no signals... > > I'm inclined to suggest that we use signalfd with posix-aio, and for > older guests, just fall back to unix aio. We can also introduce a > linux-aio and use that when possible. I think Avi's suggestion to emulate signalfd with a separate thread which does sigtimedwait+write-to-a-pipe is sensate. |
From: Marcelo T. <mto...@re...> - 2008-04-16 23:37:42
|
On Thu, Apr 17, 2008 at 01:05:50AM +0200, Gerd von Egidy wrote: > Hi Marcelo, > > > virtio-blk is doing synchronous IO which blocks the guest CPU. > > > > This is especially bad for write intensive loads where the guest > > will hang in the host write throttling logic. > > > > In the meantime please try the following patch: > > > > http://www.mail-archive.com/kvm...@li.../msg14732.html > > Thank you for this hint. > > I tried it this evening with kvm 66 - which should include your patch, right? Hi Gerd, No its not included. The issue is being worked on. |
From: Gerd v. E. <li...@eg...> - 2008-04-16 23:06:13
|
Hi Marcelo, > virtio-blk is doing synchronous IO which blocks the guest CPU. > > This is especially bad for write intensive loads where the guest > will hang in the host write throttling logic. > > In the meantime please try the following patch: > > http://www.mail-archive.com/kvm...@li.../msg14732.html Thank you for this hint. I tried it this evening with kvm 66 - which should include your patch, right? The result, at least with bonnie++, is nearly the same. I looked at the patch for the guest kernel you sent with this patch but looking at the discussion it did not work. Kind regards, Gerd -- Address (better: trap) for people I really don't want to get mail from: ja...@ca... |
From: Hollis B. <ho...@us...> - 2008-04-16 21:57:24
|
On Wednesday 16 April 2008 16:32:30 Anthony Liguori wrote: > > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > > --- a/drivers/block/virtio_blk.c > > +++ b/drivers/block/virtio_blk.c > > @@ -157,10 +157,25 @@ static int virtblk_ioctl(struct inode *i > > /* We provide getgeo only to please some old bootloader/partitioning tools */ > > static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) > > { > > - /* some standard values, similar to sd */ > > - geo->heads = 1 << 6; > > - geo->sectors = 1 << 5; > > - geo->cylinders = get_capacity(bd->bd_disk) >> 11; > > + struct virtio_blk *vblk = bd->bd_disk->private_data; > > + struct virtio_blk_geometry vgeo; > > + int err; > > + > > + /* see if the host passed in geometry config */ > > + err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, > > + offsetof(struct virtio_blk_config, geometry), > > + &vgeo); > > + > > + if (!err) { > > + geo->heads = vgeo.heads; > > + geo->sectors = vgeo.sectors; > > + geo->cylinders = vgeo.cylinders; > > + } else { > > + /* some standard values, similar to sd */ > > + geo->heads = 1 << 6; > > + geo->sectors = 1 << 5; > > + geo->cylinders = get_capacity(bd->bd_disk) >> 11; > > + } > > return 0; > > } > > > > You're probably breaking PPC since the values in the config space are in > little endian format. virtio_config_val does automagic endianness > conversion if the size is 2, 4, or 8. In this case, the structure size > is 4 so the endianness conversion will do the wrong thing. Good catch; byte-swapping an entire structure is a terrible terrible idea. > Magic endianness conversion based on read size is looking pretty evil to > me... Perhaps we need explicit *_val[8,16,32,64]? Implicit byteswapping based on access size is the standard way of implementing accessors. In this case, reading each structure member individually will do the right implicit swapping, rather than trying to load the whole thing as a single access. -- Hollis Blanchard IBM Linux Technology Center |
From: Anthony L. <ali...@us...> - 2008-04-16 21:36:54
|
This isn't fully cooked yet, but pretty close. The basic idea is to make the aio usage in block-raw go to a set of function pointers and allow multiple simultaneous AIO implementations. I converted the posix-aio support to this, and also introduced a "unix" aio which just uses O_NONBLOCK and select(). The later only supports 1 simultaneous request per-fd but currently posix-aio is limited to that too. At least with my QEMU testing, the unix aio implementation outperforms posix-aio by a factor of 2. And it uses no signals... I'm inclined to suggest that we use signalfd with posix-aio, and for older guests, just fall back to unix aio. We can also introduce a linux-aio and use that when possible. Regards, Anthony Liguori |
From: Anthony L. <ali...@us...> - 2008-04-16 21:32:36
|
Rusty Russell wrote: > On Thursday 17 April 2008 04:56:37 Ryan Harper wrote: > >> From: Ryan Harper <ry...@us...> >> >> Rather than faking up some geometry, allow the backend to push the disk >> geometry via virtio pci config option. Keep the old geo code around for >> compatibility. >> > > Hi Ryan, > > Looks good! Some brief review below. Mainly just "how I would have done > things" stuff. BTW, does this help in real life? I assume something in > userspace wants it? > Boot loaders (like grub) query the geometry from the kernel to figure out how to setup the stage1/stage2. We've seen strange issues with grub thinking it has crazy geometries when installed on a virtio disk (as opposed to booting from virtio with an existing disk). Ryan: have you tested a hardy install with your patches? Does it help when installing to virtio? I could pretty reliably reproduce the strangeness with a 20GB disk image FWIW. > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c > --- a/drivers/block/virtio_blk.c > +++ b/drivers/block/virtio_blk.c > @@ -157,10 +157,25 @@ static int virtblk_ioctl(struct inode *i > /* We provide getgeo only to please some old bootloader/partitioning tools */ > static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) > { > - /* some standard values, similar to sd */ > - geo->heads = 1 << 6; > - geo->sectors = 1 << 5; > - geo->cylinders = get_capacity(bd->bd_disk) >> 11; > + struct virtio_blk *vblk = bd->bd_disk->private_data; > + struct virtio_blk_geometry vgeo; > + int err; > + > + /* see if the host passed in geometry config */ > + err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, > + offsetof(struct virtio_blk_config, geometry), > + &vgeo); > + > + if (!err) { > + geo->heads = vgeo.heads; > + geo->sectors = vgeo.sectors; > + geo->cylinders = vgeo.cylinders; > + } else { > + /* some standard values, similar to sd */ > + geo->heads = 1 << 6; > + geo->sectors = 1 << 5; > + geo->cylinders = get_capacity(bd->bd_disk) >> 11; > + } > return 0; > } > You're probably breaking PPC since the values in the config space are in little endian format. virtio_config_val does automagic endianness conversion if the size is 2, 4, or 8. In this case, the structure size is 4 so the endianness conversion will do the wrong thing. Magic endianness conversion based on read size is looking pretty evil to me... Perhaps we need explicit *_val[8,16,32,64]? Regards, Anthony Liguori |
From: Rusty R. <ru...@ru...> - 2008-04-16 21:16:03
|
On Thursday 17 April 2008 04:56:37 Ryan Harper wrote: > From: Ryan Harper <ry...@us...> > > Rather than faking up some geometry, allow the backend to push the disk > geometry via virtio pci config option. Keep the old geo code around for > compatibility. Hi Ryan, Looks good! Some brief review below. Mainly just "how I would have done things" stuff. BTW, does this help in real life? I assume something in userspace wants it? > + int err = 0; > + > + /* see if the host passed in geometry config */ > + err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, > + offsetof(struct virtio_blk_config, cylinders), > + &geo->cylinders); Unnecessary err initialization. Sometimes gcc catches bugs when you defer initializations of err to as late as possible (ie. paths where err isn't set properly), so I tend to do it. > + /* if host sets geo flag, all 3 values must be present */ > + if (!err) { > + __virtio_config_val(vblk->vdev, > + offsetof(struct virtio_blk_config, heads), > + &geo->heads); > + __virtio_config_val(vblk->vdev, > + offsetof(struct virtio_blk_config, sectors), > + &geo->sectors); Kind of ugly; we can represent this in the data structure explicitly tho... > @@ -18,6 +19,12 @@ struct virtio_blk_config > __le32 size_max; > /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ > __le32 seg_max; > + /* cylinders of the device (if VIRTIO_BLK_F_GEOMETRY) */ > + __le16 cylinders; > + /* heads of the device (if VIRTIO_BLK_F_GEOMETRY) */ > + __u8 heads; > + /* sectors of the device (if VIRTIO_BLK_F_GEOMETRY) */ > + __u8 sectors; > } __attribute__((packed)); ... using a struct-within-a-struct. Here's the result: Subject: add virtio disk geometry feature Date: Wed, 16 Apr 2008 13:56:37 -0500 From: Ryan Harper <ry...@us...> Rather than faking up some geometry, allow the backend to push the disk geometry via virtio pci config option. Keep the old geo code around for compatibility. Signed-off-by: Ryan Harper <ry...@us...> Reviewed-by: Anthony Liguori <ali...@us...> Signed-off-by: Rusty Russell <ru...@ru...> (modified to single struct) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -157,10 +157,25 @@ static int virtblk_ioctl(struct inode *i /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { - /* some standard values, similar to sd */ - geo->heads = 1 << 6; - geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; + struct virtio_blk *vblk = bd->bd_disk->private_data; + struct virtio_blk_geometry vgeo; + int err; + + /* see if the host passed in geometry config */ + err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, + offsetof(struct virtio_blk_config, geometry), + &vgeo); + + if (!err) { + geo->heads = vgeo.heads; + geo->sectors = vgeo.sectors; + geo->cylinders = vgeo.cylinders; + } else { + /* some standard values, similar to sd */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + } return 0; } diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -9,6 +9,7 @@ #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ #define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ struct virtio_blk_config { @@ -18,6 +19,12 @@ struct virtio_blk_config __le32 size_max; /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ __le32 seg_max; + /* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */ + struct virtio_blk_geometry { + __le16 cylinders; + __u8 heads; + __u8 sectors; + } geometry; } __attribute__((packed)); /* These two define direction. */ |
From: Marcelo T. <mto...@re...> - 2008-04-16 20:15:50
|
kvm_pv_mmu_op should not take mmap_sem. All gfn_to_page() callers down in the MMU processing will take it if necessary, so as it is it can deadlock. Apparently a leftover from the days before slots_lock. Signed-off-by: Marcelo Tosatti <mto...@re...> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 078a7f1..2ad6f54 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2173,8 +2173,6 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, int r; struct kvm_pv_mmu_op_buffer buffer; - down_read(¤t->mm->mmap_sem); - buffer.ptr = buffer.buf; buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf); buffer.processed = 0; @@ -2194,7 +2192,6 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, r = 1; out: *ret = buffer.processed; - up_read(¤t->mm->mmap_sem); return r; } |
From: Anthony L. <ali...@us...> - 2008-04-16 19:55:23
|
Blue Swirl wrote: > On 4/16/08, Anthony Liguori <ali...@us...> wrote: > >> This patch introduces a DMA API and plumbs support through the DMA layer. We >> use a mostly opaque structure, IOVector to represent a scatter/gather list of >> physical memory. Associated with each IOVector is a read/write function and >> an opaque pointer. This allows arbitrary transformation/mapping of the >> data while providing an easy mechanism to short-cut the zero-copy case >> in the block/net backends. >> > > This looks much better also for Sparc uses. I converted pcnet to use > the IOVectors (see patch), it does not work yet but looks doable. > Excellent! > IMHO the read/write functions should be a property of the bus so that > they are hidden from the device, for pcnet it does not matter as we > have to do the swapping anyway. > For an IOMMU that has a per-device mapping, the read/write functions have to operate on a per-device basis. Regards, Anthony Liguori |
From: Alex D. <ale...@ya...> - 2008-04-16 19:52:24
|
--- Alex Davis <ale...@ya...> wrote: > Host software: > Linux 2.6.24.4 > KVM 65 (I am using the kernel modules from this release). > X11 7.2 from Xorg > SDL 1.2.13 > GCC 4.1.1 > Glibc 2.4 > > Host hardware: > Asus P5B Deluxe (P965 chipset based) motherboard > 4 GB RAM > Intel E6700 CPU > > Guest software: > Slackware 12.0 installed from CD-ROM. > Additional information: host arch. is x86_64(64-bit); guest arch. is x86(32-bit). I code, therefore I am ____________________________________________________________________________________ Be a better friend, newshound, and know-it-all with Yahoo! Mobile. Try it now. http://mobile.yahoo.com/;_ylt=Ahu06i62sR8HDtDypao8Wcj9tAcJ |
From: Blue S. <bla...@gm...> - 2008-04-16 19:51:04
|
On 4/16/08, Anthony Liguori <ali...@us...> wrote: > This patch introduces a DMA API and plumbs support through the DMA layer. We > use a mostly opaque structure, IOVector to represent a scatter/gather list of > physical memory. Associated with each IOVector is a read/write function and > an opaque pointer. This allows arbitrary transformation/mapping of the > data while providing an easy mechanism to short-cut the zero-copy case > in the block/net backends. This looks much better also for Sparc uses. I converted pcnet to use the IOVectors (see patch), it does not work yet but looks doable. IMHO the read/write functions should be a property of the bus so that they are hidden from the device, for pcnet it does not matter as we have to do the swapping anyway. |
From: Christoph L. <cla...@sg...> - 2008-04-16 19:15:15
|
On Wed, 16 Apr 2008, Robin Holt wrote: > On Wed, Apr 16, 2008 at 11:35:38AM -0700, Christoph Lameter wrote: > > On Wed, 16 Apr 2008, Robin Holt wrote: > > > > > I don't think this lock mechanism is completely working. I have > > > gotten a few failures trying to dereference 0x100100 which appears to > > > be LIST_POISON1. > > > > How does xpmem unregistering of notifiers work? > > For the tests I have been running, we are waiting for the release > callout as part of exit. Some more details on the failure may be useful. AFAICT list_del[_rcu] is the culprit here and that is only used on release or unregister. |
From: Robin H. <ho...@sg...> - 2008-04-16 19:02:17
|
On Wed, Apr 16, 2008 at 11:35:38AM -0700, Christoph Lameter wrote: > On Wed, 16 Apr 2008, Robin Holt wrote: > > > I don't think this lock mechanism is completely working. I have > > gotten a few failures trying to dereference 0x100100 which appears to > > be LIST_POISON1. > > How does xpmem unregistering of notifiers work? For the tests I have been running, we are waiting for the release callout as part of exit. Thanks, Robin |
From: Ryan H. <ry...@us...> - 2008-04-16 18:59:34
|
From: Ryan Harper <ry...@us...> Rather than faking up some geometry, allow the backend to push the disk geometry via virtio pci config option. Keep the old geo code around for compatibility. Signed-off-by: Ryan Harper <ry...@us...> Reviewed-by: Anthony Liguori <ali...@us...> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index ae87ab9..e78647a 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -1130,7 +1130,7 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, DriveInfo *info = &drives_table[extboot_drive]; int cyls, heads, secs; - if (info->type != IF_IDE) { + if (info->type != IF_IDE && info->type != IF_VIRTIO) { bdrv_guess_geometry(info->bdrv, &cyls, &heads, &secs); bdrv_set_geometry_hint(info->bdrv, cyls, heads, secs); } diff --git a/qemu/hw/virtio-blk.c b/qemu/hw/virtio-blk.c index 492bd7f..d51501e 100644 --- a/qemu/hw/virtio-blk.c +++ b/qemu/hw/virtio-blk.c @@ -25,12 +25,16 @@ #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ #define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Indicates support of legacy geometry */ struct virtio_blk_config { uint64_t capacity; uint32_t size_max; uint32_t seg_max; + uint16_t cylinders; + uint8_t heads; + uint8_t sectors; }; /* These two define direction. */ @@ -132,32 +136,40 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) VirtIOBlock *s = to_virtio_blk(vdev); struct virtio_blk_config blkcfg; int64_t capacity; + int cylinders, heads, secs; bdrv_get_geometry(s->bs, &capacity); + bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs); blkcfg.capacity = cpu_to_le64(capacity); blkcfg.seg_max = cpu_to_le32(128 - 2); + blkcfg.cylinders = cpu_to_le16(cylinders); + blkcfg.heads = heads; + blkcfg.sectors = secs; memcpy(config, &blkcfg, sizeof(blkcfg)); } static uint32_t virtio_blk_get_features(VirtIODevice *vdev) { - return (1 << VIRTIO_BLK_F_SEG_MAX); + return (1 << VIRTIO_BLK_F_SEG_MAX | 1 << VIRTIO_BLK_F_GEOMETRY); } void *virtio_blk_init(PCIBus *bus, uint16_t vendor, uint16_t device, BlockDriverState *bs) { VirtIOBlock *s; + int cylinders, heads, secs; s = (VirtIOBlock *)virtio_init_pci(bus, "virtio-blk", vendor, device, 0, VIRTIO_ID_BLOCK, 0x01, 0x80, 0x00, - 16, sizeof(VirtIOBlock)); + sizeof(struct virtio_blk_config), sizeof(VirtIOBlock)); s->vdev.update_config = virtio_blk_update_config; s->vdev.get_features = virtio_blk_get_features; s->bs = bs; bs->devfn = s->vdev.pci_dev.devfn; + bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs); + bdrv_set_geometry_hint(s->bs, cylinders, heads, secs); virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output); |
From: Ryan H. <ry...@us...> - 2008-04-16 18:57:54
|
From: Ryan Harper <ry...@us...> Rather than faking up some geometry, allow the backend to push the disk geometry via virtio pci config option. Keep the old geo code around for compatibility. Signed-off-by: Ryan Harper <ry...@us...> Reviewed-by: Anthony Liguori <ali...@us...> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0cfbe8c..1d2142a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -157,10 +157,28 @@ static int virtblk_ioctl(struct inode *inode, struct file *filp, /* We provide getgeo only to please some old bootloader/partitioning tools */ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) { - /* some standard values, similar to sd */ - geo->heads = 1 << 6; - geo->sectors = 1 << 5; - geo->cylinders = get_capacity(bd->bd_disk) >> 11; + struct virtio_blk *vblk = bd->bd_disk->private_data; + int err = 0; + + /* see if the host passed in geometry config */ + err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY, + offsetof(struct virtio_blk_config, cylinders), + &geo->cylinders); + + /* if host sets geo flag, all 3 values must be present */ + if (!err) { + __virtio_config_val(vblk->vdev, + offsetof(struct virtio_blk_config, heads), + &geo->heads); + __virtio_config_val(vblk->vdev, + offsetof(struct virtio_blk_config, sectors), + &geo->sectors); + } else { + /* some standard values, similar to sd */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + } return 0; } diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index bca0b10..142c496 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -9,6 +9,7 @@ #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ #define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ struct virtio_blk_config { @@ -18,6 +19,12 @@ struct virtio_blk_config __le32 size_max; /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ __le32 seg_max; + /* cylinders of the device (if VIRTIO_BLK_F_GEOMETRY) */ + __le16 cylinders; + /* heads of the device (if VIRTIO_BLK_F_GEOMETRY) */ + __u8 heads; + /* sectors of the device (if VIRTIO_BLK_F_GEOMETRY) */ + __u8 sectors; } __attribute__((packed)); /* These two define direction. */ |