You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(33) |
Nov
(325) |
Dec
(320) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(484) |
Feb
(438) |
Mar
(407) |
Apr
(713) |
May
(831) |
Jun
(806) |
Jul
(1023) |
Aug
(1184) |
Sep
(1118) |
Oct
(1461) |
Nov
(1224) |
Dec
(1042) |
2008 |
Jan
(1449) |
Feb
(1110) |
Mar
(1428) |
Apr
(1643) |
May
(682) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Christoph L. <cla...@sg...> - 2008-04-16 18:35:33
|
On Wed, 16 Apr 2008, Robin Holt wrote: > I don't think this lock mechanism is completely working. I have > gotten a few failures trying to dereference 0x100100 which appears to > be LIST_POISON1. How does xpmem unregistering of notifiers work? |
From: Marcelo T. <mto...@re...> - 2008-04-16 17:40:59
|
On Wed, Apr 16, 2008 at 11:21:05AM -0500, Hollis Blanchard wrote: > By the way Marcelo, it would be polite to provide these stubs yourself to > avoid breaking the build on other architectures. Indeed, should have been more careful. > It looks like IA64 is still broken because of this. Now I'm not sure if IA64 supports migration. Should it return -EINVAL or the ia64 mpstate ? |
From: Glauber de O. C. <gc...@re...> - 2008-04-16 17:35:26
|
Glauber Costa wrote: > It makes no sense for the clock initialization to be > hanging around in setup_32.c when we have a generic kvm guest > initialization function available. So, we move kvmclock_init() > inside such a function, leading to a cleaner code. > > Signed-off-by: Glauber Costa <gc...@re...> > --- > arch/x86/kernel/kvm.c | 2 ++ > arch/x86/kernel/setup_32.c | 4 ---- > include/linux/kvm_para.h | 5 +++++ > 3 files changed, 7 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index d9121f9..5cad368 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -210,6 +210,8 @@ static void paravirt_ops_setup(void) > pv_info.name = "KVM"; > pv_info.paravirt_enabled = 1; > > + kvmclock_init(); > + > if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) > pv_cpu_ops.io_delay = kvm_io_delay; > > diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c > index 65f3a23..029350c 100644 > --- a/arch/x86/kernel/setup_32.c > +++ b/arch/x86/kernel/setup_32.c > @@ -771,10 +771,6 @@ void __init setup_arch(char **cmdline_p) > > max_low_pfn = setup_memory(); > > -#ifdef CONFIG_KVM_CLOCK > - kvmclock_init(); > -#endif > - > #ifdef CONFIG_VMI > /* > * Must be after max_low_pfn is determined, and before kernel > diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h > index 3ddce03..c5e662c 100644 > --- a/include/linux/kvm_para.h > +++ b/include/linux/kvm_para.h > @@ -28,6 +28,11 @@ void __init kvm_guest_init(void); > #else > #define kvm_guest_init() do { } while (0) > #endif > +#ifdef CONFIG_KVM_CLOCK > +void kvmclock_init(void); > +#else > +#define kvmclock_init() do { } while (0) > +#endif > > static inline int kvm_para_has_feature(unsigned int feature) > { > Forget about it. Marelo just screamed to me (and somehow I heard it), that this create a bogus dependency between clock and the mmu functions. Duh. I'll resend a better version |
From: Glauber C. <gc...@re...> - 2008-04-16 17:05:06
|
It makes no sense for the clock initialization to be hanging around in setup_32.c when we have a generic kvm guest initialization function available. So, we move kvmclock_init() inside such a function, leading to a cleaner code. Signed-off-by: Glauber Costa <gc...@re...> --- arch/x86/kernel/kvm.c | 2 ++ arch/x86/kernel/setup_32.c | 4 ---- include/linux/kvm_para.h | 5 +++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d9121f9..5cad368 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -210,6 +210,8 @@ static void paravirt_ops_setup(void) pv_info.name = "KVM"; pv_info.paravirt_enabled = 1; + kvmclock_init(); + if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 65f3a23..029350c 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -771,10 +771,6 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = setup_memory(); -#ifdef CONFIG_KVM_CLOCK - kvmclock_init(); -#endif - #ifdef CONFIG_VMI /* * Must be after max_low_pfn is determined, and before kernel diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 3ddce03..c5e662c 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -28,6 +28,11 @@ void __init kvm_guest_init(void); #else #define kvm_guest_init() do { } while (0) #endif +#ifdef CONFIG_KVM_CLOCK +void kvmclock_init(void); +#else +#define kvmclock_init() do { } while (0) +#endif static inline int kvm_para_has_feature(unsigned int feature) { -- 1.5.0.6 |
From: Anthony L. <ali...@us...> - 2008-04-16 16:42:08
|
A couple general comments. I'd feel a lot more comfortable with the int13 handler returning an int and the asm stub code uses that result to determine how to set CF. You set CF deep within the function stack and there's no guarantee that GCC isn't going to stomp on it. I also don't think we want to raise int18 when we get a command we don't understand. We should just not change any of the register state. There are a number of extended commands that look for a magic value to determine whether the command exists or not. Regards, Anthony Liguori Nguyen Anh Quynh wrote: > Hi Anthony, > > I found a bug in the last code: send_command() failed to copy back the > result into extboot_cmd structure. This patch fixes it. > > I succesfully tested this version with guest Win2K (fully updated, > scsi boot) and Linux 2.6.25-rc8 (virtio). > > Let me know if you can boot Windows with this version. > > Thanks, > Quynh > --- > This code is an attempt to rewrite the current extboot option rom in > C. The new code now minimize the assembly code, so that the assembly > code is very small and simple: boot.S's only job is to interface with > C code, which does all the dirty job. "signrom" is modified to adapt > with the new result binary image. > > The result option rom has the same size as the original one: 1.5KB, > while the actual code size is around the same: 1.2KB (gcc can optimize > really well) > > To install this option rom, do the following steps as root: > > make > make save <--- backup the original option rom to > /usr/share/qemu/extboot.bin.org > make install <--- overwrite the new option rom to > /usr/share/qemu/extboot.bin > |
From: Robin H. <ho...@sg...> - 2008-04-16 16:33:44
|
I don't think this lock mechanism is completely working. I have gotten a few failures trying to dereference 0x100100 which appears to be LIST_POISON1. Thanks, Robin |
From: Alex D. <ale...@ya...> - 2008-04-16 16:30:34
|
Host software: Linux 2.6.24.4 KVM 65 (I am using the kernel modules from this release). X11 7.2 from Xorg SDL 1.2.13 GCC 4.1.1 Glibc 2.4 Host hardware: Asus P5B Deluxe (P965 chipset based) motherboard 4 GB RAM Intel E6700 CPU Guest software: Slackware 12.0 installed from CD-ROM. Command used to first KVM instance: /usr/local/bin/qemu-system-x86_64 -hda /spare/vdisk1.img -cdrom /dev/cdrom -boot c -m 384 -net nic,macaddr=DE:AD:BE:EF:11:29 -net tap,ifname=tap0,script=no & Command used to start second KVM instance: /usr/local/bin/qemu-system-x86_64 -hda /spare/vdisk2.img -cdrom /dev/cdrom -boot c -m 384 -net nic,macaddr=DE:AD:BE:EF:11:30 -net tap,ifname=tap1,script=no & tap0 and tap1 are bridged on the host. The guest OS was installed on /spare/vdisk1.img, which was initially created by /usr/local/bin/qemu-img create -f qcow /spare/vdisk.img 10G After the guest installation completed, vdisk1 was copied to vdisk2. The second instance always stops after printing Checking if the processor honours the WP bit even in supervisor mode... Ok. It stays hung until I press the return key in the first instance; sometimes clicking in another X window will wake it up as well. This is a test machine so I can test patches (almost) at will. I code, therefore I am ____________________________________________________________________________________ Be a better friend, newshound, and know-it-all with Yahoo! Mobile. Try it now. http://mobile.yahoo.com/;_ylt=Ahu06i62sR8HDtDypao8Wcj9tAcJ |
From: Hollis B. <ho...@us...> - 2008-04-16 16:27:32
|
By the way Marcelo, it would be polite to provide these stubs yourself to avoid breaking the build on other architectures. It looks like IA64 is still broken because of this. -- Hollis Blanchard IBM Linux Technology Center On Wednesday 16 April 2008 09:06:34 Carsten Otte wrote: > From: Christian Borntraeger <bor...@de...> > > Since > > commit ded6fb24fb694bcc5f308a02ec504d45fbc8aaa6 > Author: Marcelo Tosatti <mto...@re...> > Date: Fri Apr 11 13:24:45 2008 -0300 > KVM: add ioctls to save/store mpstate > > kvm does not compile on s390. > This patch provides ioctl stubs for s390 to make kvm.git compile again. > As migration is not yet supported, the ioctl definitions are empty. > > Signed-off-by: Christian Borntraeger <bor...@de...> > Signed-off-by: Carsten Otte <co...@de...> > --- > arch/s390/kvm/kvm-s390.c | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > Index: kvm/arch/s390/kvm/kvm-s390.c > =================================================================== > --- kvm.orig/arch/s390/kvm/kvm-s390.c > +++ kvm/arch/s390/kvm/kvm-s390.c > @@ -414,6 +414,18 @@ int kvm_arch_vcpu_ioctl_debug_guest(stru > return -EINVAL; /* not implemented yet */ > } > > +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, > + struct kvm_mp_state *mp_state) > +{ > + return -EINVAL; /* not implemented yet */ > +} > + > +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, > + struct kvm_mp_state *mp_state) > +{ > + return -EINVAL; /* not implemented yet */ > +} > + > static void __vcpu_run(struct kvm_vcpu *vcpu) > { > memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); > > > > ------------------------------------------------------------------------- > This SF.net email is sponsored by the 2008 JavaOne(SM) Conference > Don't miss this year's exciting event. There's still time to save $100. > Use priority code J8TL2D2. > http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone > _______________________________________________ > kvm-devel mailing list > kvm...@li... > https://lists.sourceforge.net/lists/listinfo/kvm-devel > |
From: Joerg R. <joe...@am...> - 2008-04-16 15:16:37
|
This patch adds syncing of the lapic.tpr field to the V_TPR field of the VMCB. With this change we can safely remove the CR8 read intercept. Signed-off-by: Joerg Roedel <joe...@am...> --- arch/x86/kvm/svm.c | 18 ++++++++++++++++-- 1 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3379e13..f8ce36e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -486,8 +486,7 @@ static void init_vmcb(struct vcpu_svm *svm) control->intercept_cr_read = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | - INTERCEPT_CR4_MASK | - INTERCEPT_CR8_MASK; + INTERCEPT_CR4_MASK; control->intercept_cr_write = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | @@ -1621,6 +1620,19 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) { } +static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 cr8; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + cr8 = kvm_get_cr8(vcpu); + svm->vmcb->control.int_ctl &= ~V_TPR_MASK; + svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; +} + static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1630,6 +1642,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) pre_svm_run(svm); + sync_lapic_to_cr8(vcpu); + save_host_msrs(vcpu); fs_selector = read_fs(); gs_selector = read_gs(); -- 1.5.3.7 |
From: Joerg R. <joe...@am...> - 2008-04-16 15:14:55
|
This patch exports the kvm_lapic_set_tpr() function from the lapic code to modules. It is required in the kvm-amd module to optimize CR8 intercepts. Signed-off-by: Joerg Roedel <joe...@am...> --- arch/x86/kvm/lapic.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2ccf994..57ac4e4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) apic_set_tpr(apic, ((cr8 & 0x0f) << 4) | (apic_get_reg(apic, APIC_TASKPRI) & 4)); } +EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { -- 1.5.3.7 |
From: Anthony L. <ali...@us...> - 2008-04-16 15:05:30
|
Anthony Liguori wrote: > A couple general comments. > > I'd feel a lot more comfortable with the int13 handler returning an > int and the asm stub code uses that result to determine how to set > CF. You set CF deep within the function stack and there's no > guarantee that GCC isn't going to stomp on it. Ignore that bit, I missed that you were only setting it within the regs structure. Regards, Anthony Liguori > I also don't think we want to raise int18 when we get a command we > don't understand. We should just not change any of the register > state. There are a number of extended commands that look for a magic > value to determine whether the command exists or not. > > Regards, > > Anthony Liguori > > Nguyen Anh Quynh wrote: >> Hi Anthony, >> >> I found a bug in the last code: send_command() failed to copy back the >> result into extboot_cmd structure. This patch fixes it. >> >> I succesfully tested this version with guest Win2K (fully updated, >> scsi boot) and Linux 2.6.25-rc8 (virtio). >> >> Let me know if you can boot Windows with this version. >> >> Thanks, >> Quynh >> --- >> This code is an attempt to rewrite the current extboot option rom in >> C. The new code now minimize the assembly code, so that the assembly >> code is very small and simple: boot.S's only job is to interface with >> C code, which does all the dirty job. "signrom" is modified to adapt >> with the new result binary image. >> >> The result option rom has the same size as the original one: 1.5KB, >> while the actual code size is around the same: 1.2KB (gcc can optimize >> really well) >> >> To install this option rom, do the following steps as root: >> >> make >> make save <--- backup the original option rom to >> /usr/share/qemu/extboot.bin.org >> make install <--- overwrite the new option rom to >> /usr/share/qemu/extboot.bin >> > > |
From: Joerg R. <joe...@am...> - 2008-04-16 15:04:32
|
There is not selective cr0 intercept bug. The code in the comment sets the CR0.PG bit. But KVM sets the CR4.PG bit for SVM always to implement the paged real mode. So the 'mov %eax,%cr0' instruction does not change the CR0.PG bit. Selective CR0 intercepts only occur when a bit is actually changed. So its the right behavior that there is no intercept on this instruction. Signed-off-by: Joerg Roedel <joe...@am...> --- arch/x86/kvm/svm.c | 11 ----------- 1 files changed, 0 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3379e13..55b5076 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -514,17 +514,6 @@ static void init_vmcb(struct vcpu_svm *svm) control->intercept = (1ULL << INTERCEPT_INTR) | (1ULL << INTERCEPT_NMI) | (1ULL << INTERCEPT_SMI) | - /* - * selective cr0 intercept bug? - * 0: 0f 22 d8 mov %eax,%cr3 - * 3: 0f 20 c0 mov %cr0,%eax - * 6: 0d 00 00 00 80 or $0x80000000,%eax - * b: 0f 22 c0 mov %eax,%cr0 - * set cr3 ->interception - * get cr0 ->interception - * set cr0 -> no interception - */ - /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */ (1ULL << INTERCEPT_CPUID) | (1ULL << INTERCEPT_INVD) | (1ULL << INTERCEPT_HLT) | -- 1.5.3.7 |
From: Joerg R. <joe...@am...> - 2008-04-16 14:56:46
|
If the CR8 write intercept is disabled the V_TPR field of the VMCB needs to be synced with the TPR field in the local apic. Signed-off-by: Joerg Roedel <joe...@am...> --- arch/x86/kvm/svm.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8ce36e..ee2ee83 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1620,6 +1620,16 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) { } +static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { + int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; + kvm_lapic_set_tpr(vcpu, cr8); + } +} + static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1791,6 +1801,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) stgi(); + sync_cr8_to_lapic(vcpu); + svm->next_rip = 0; } -- 1.5.3.7 |
From: Joerg R. <joe...@am...> - 2008-04-16 14:56:40
|
With the usage of the V_TPR field this comment is now obsolet. Signed-off-by: Joerg Roedel <joe...@am...> --- arch/x86/kvm/svm.c | 7 ------- 1 files changed, 0 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 61bb2cb..d643605 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -916,13 +916,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, } -/* FIXME: - - svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK; - svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); - -*/ - static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { return -EOPNOTSUPP; -- 1.5.3.7 |
From: Joerg R. <joe...@am...> - 2008-04-16 14:54:20
|
This patch series implements optimizations to the CR8 intercept handling in SVM. With these patches applied CR8 reads are not intercepted anymore. The writes to CR8 are only intercepted if the TPR masks interrupts. This significantly reduces the number of total CR8 intercepts when running Windows 64 bit versions. Some quick numbers: Boot and shudown of Vista 64: Without these patches: ~38.000.000 CR8 writes intercepted With these patches: ~38.000 CR8 writes intercepted diffstat: arch/x86/kvm/lapic.c | 1 + arch/x86/kvm/svm.c | 68 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 56 insertions(+), 13 deletions(-) |
From: Joerg R. <joe...@am...> - 2008-04-16 14:54:15
|
This patch disables the intercept of CR8 writes if the TPR is not masking interrupts. This reduces the total number CR8 intercepts to below 1 percent of what we have without this patch using Windows 64 bit guests. Signed-off-by: Joerg Roedel <joe...@am...> --- arch/x86/kvm/svm.c | 31 +++++++++++++++++++++++++++---- 1 files changed, 27 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ee2ee83..61bb2cb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1502,6 +1502,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) svm_inject_irq(svm, irq); } +static void update_cr8_intercept(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb; + int max_irr, tpr; + + if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) + return; + + vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + + max_irr = kvm_lapic_find_highest_irr(vcpu); + if (max_irr == -1) + return; + + tpr = kvm_lapic_get_cr8(vcpu) << 4; + + if (tpr >= (max_irr & 0xf0)) + vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; +} + static void svm_intr_assist(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1514,14 +1535,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) SVM_EVTINJ_VEC_MASK; vmcb->control.exit_int_info = 0; svm_inject_irq(svm, intr_vector); - return; + goto out; } if (vmcb->control.int_ctl & V_IRQ_MASK) - return; + goto out; if (!kvm_cpu_has_interrupt(vcpu)) - return; + goto out; if (!(vmcb->save.rflags & X86_EFLAGS_IF) || (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || @@ -1529,12 +1550,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu) /* unable to deliver irq, set pending irq */ vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); svm_inject_irq(svm, 0x0); - return; + goto out; } /* Okay, we can deliver the interrupt: grab it and update PIC state. */ intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); kvm_timer_intr_post(vcpu, intr_vector); +out: + update_cr8_intercept(vcpu); } static void kvm_reput_irq(struct vcpu_svm *svm) -- 1.5.3.7 |
From: Avi K. <av...@qu...> - 2008-04-16 14:48:03
|
Today's new kvm architecture is ia64, aka Itanium 2. Like s390, it is only provided in the git tree, not in the tarball. Windows and Linux guests are supported. On good old x86, we have a the new kvmtrace performance monitoring framework together with a sizable number of bug fixes. Changes from kvm-65: - adjust external module for 2.6.25 module locations (Anthony Liguori) - fix userspace compilation failure without kernel pit (Joerg Roedel) - kvmtrace performance monitoring mechanism (Eric Liu) - stop all vcpus before saving their state (Marcelo Tosatti) - fixes smp live migration - save/restore kernel apicbase (Marcelo Tosatti) - block SIG_IPI signals (Marcelo Tosatti) - smsw mem16, lmsw mem16 emulation and unit tests - fix compile warnings (Jerone Young) - fix reset with iothread - ia64 architecture support (Xiantao Zhang, Anthony Xu) - don't assume guest pages are backed by a 'struct page' (Anthony Liguori) - needed for pci device assignment - register kvm's ioctl range - fix hardware task switching buglet (Izik Eidus) - fix mce handling on AMD (Joerg Roedel) - do hardware task switching in hardware when NPT is enabled (Joerg Roedel) - fix timer race waking up a halted vcpu with smp (Marcelo Tosatti) - fix irq race leading to irqs delivery delays (Marcelo Tosatti) - fix triple fault handling on AMD - fix lea instruction emulation Notes: If you use the modules bundled with kvm-66, you can use any version of Linux from 2.6.17 upwards. If you use the modules bundled with Linux 2.6.20, you need to use kvm-12. If you use the modules bundled with Linux 2.6.21, you need to use kvm-17. Modules from Linux 2.6.22 and up will work with any kvm version from kvm-22. Some features may only be available in newer releases. For best performance, use Linux 2.6.23-rc2 or later as the host. http://kvm.qumranet.com |
From: Carsten O. <co...@de...> - 2008-04-16 14:32:40
|
Anthony Liguori wrote: > There is a 5th option. Do away with the use of posix aio. We get > absolutely no benefit from it because it's limited to a single thread. > Fabrice has reverted a patch to change that in the past. How about using linux aio for it? It seems much better, because it doesn't use userspace threads but has a direct in-kernel implementation. I've had good performance on zldisk with that, and it's stable. |
From: Anthony L. <ali...@us...> - 2008-04-16 14:32:35
|
Nguyen Anh Quynh wrote: > Hi Anthony, > > I found a bug in the last code: send_command() failed to copy back the > result into extboot_cmd structure. This patch fixes it. > > I succesfully tested this version with guest Win2K (fully updated, > scsi boot) and Linux 2.6.25-rc8 (virtio). > > Let me know if you can boot Windows with this version. > I'll test it out. Please send it to the list as a patch against kvm-userspace. Regards, Anthony Liguori > Thanks, > Quynh > --- > This code is an attempt to rewrite the current extboot option rom in > C. The new code now minimize the assembly code, so that the assembly > code is very small and simple: boot.S's only job is to interface with > C code, which does all the dirty job. "signrom" is modified to adapt > with the new result binary image. > > The result option rom has the same size as the original one: 1.5KB, > while the actual code size is around the same: 1.2KB (gcc can optimize > really well) > > To install this option rom, do the following steps as root: > > make > make save <--- backup the original option rom to > /usr/share/qemu/extboot.bin.org > make install <--- overwrite the new option rom to > /usr/share/qemu/extboot.bin > |
From: Carsten O. <co...@de...> - 2008-04-16 14:07:13
|
From: Christian Borntraeger <bor...@de...> Since commit ded6fb24fb694bcc5f308a02ec504d45fbc8aaa6 Author: Marcelo Tosatti <mto...@re...> Date: Fri Apr 11 13:24:45 2008 -0300 KVM: add ioctls to save/store mpstate kvm does not compile on s390. This patch provides ioctl stubs for s390 to make kvm.git compile again. As migration is not yet supported, the ioctl definitions are empty. Signed-off-by: Christian Borntraeger <bor...@de...> Signed-off-by: Carsten Otte <co...@de...> --- arch/s390/kvm/kvm-s390.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) Index: kvm/arch/s390/kvm/kvm-s390.c =================================================================== --- kvm.orig/arch/s390/kvm/kvm-s390.c +++ kvm/arch/s390/kvm/kvm-s390.c @@ -414,6 +414,18 @@ int kvm_arch_vcpu_ioctl_debug_guest(stru return -EINVAL; /* not implemented yet */ } +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + static void __vcpu_run(struct kvm_vcpu *vcpu) { memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); |
From: Carsten O. <co...@de...> - 2008-04-16 14:06:55
|
From: Christian Borntraeger <bor...@de...> This patch changes the interrupt defintions for virtio on s390. We now use the extint number 0x2603, which is used as a host interrupt already by z/VM for pfault and dasd_diag. We will use subcode 0x0D to distinguish virtio from dasd and pfault. Signed-off-by: Christian Borntraeger <bor...@de...> Signed-off-by: Carsten Otte <co...@de...> --- arch/s390/kvm/interrupt.c | 6 +++++- drivers/s390/kvm/kvm_virtio.c | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) Index: kvm/arch/s390/kvm/interrupt.c =================================================================== --- kvm.orig/arch/s390/kvm/interrupt.c +++ kvm/arch/s390/kvm/interrupt.c @@ -162,7 +162,11 @@ static void __do_deliver_interrupt(struc VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx", inti->ext.ext_params, inti->ext.ext_params2); vcpu->stat.deliver_virtio_interrupt++; - rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1237); + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); if (rc == -EFAULT) exception = 1; Index: kvm/drivers/s390/kvm/kvm_virtio.c =================================================================== --- kvm.orig/drivers/s390/kvm/kvm_virtio.c +++ kvm/drivers/s390/kvm/kvm_virtio.c @@ -23,6 +23,8 @@ #include <asm/setup.h> #include <asm/s390_ext.h> +#define VIRTIO_SUBCODE_64 0x0D00 + /* * The pointer to our (page) of device descriptions. */ @@ -291,6 +293,10 @@ static void scan_devices(void) static void kvm_extint_handler(u16 code) { void *data = (void *) *(long *) __LC_PFAULT_INTPARM; + u16 subcode = S390_lowcore.cpu_addr; + + if ((subcode & 0xff00) != VIRTIO_SUBCODE_64) + return; vring_interrupt(0, data); } @@ -319,8 +325,8 @@ static int __init kvm_devices_init(void) kvm_devices = (void *) (max_pfn << PAGE_SHIFT); - register_external_interrupt(0x1237, kvm_extint_handler); ctl_set_bit(0, 9); + register_external_interrupt(0x2603, kvm_extint_handler); scan_devices(); return 0; |
From: Carsten O. <co...@de...> - 2008-04-16 14:06:50
|
Hi Avi, these two fixes repair two things in kvm-s390: - #1 makes kvm complile again on s390 after a common code change - #2 changes our virtio interrupt definitions to the values that will be reserved for kvm use in s390 architecture I'd be great if both could make 2.6.26. so long, Carsten |
From: Alexey E. <al...@gm...> - 2008-04-16 14:03:01
|
On Wed, Apr 16, 2008 at 1:14 PM, Dietmar Maurer <di...@pr...> wrote: > Hi all, > > I am glad to announce the first beta release of 'Proxmox Virtual > Environment' - an open source virtualization platform for the > enterprise. > > The main features are: > > - All code is GPL > - OpenVZ and KVM support > - bare metal installer (debian etch 64) > - Backup/restore with vzdump/LVM2 > - web based management > - integrated virtual appliance download (include certified > appliances) > - configuration cluster > > You can find more information at http://pve.proxmox.com > > We encourage anyone interested to download and test. > The CD image is available at: http://pve.proxmox.com/wiki/Downloads > > Let us know what you think! > > Best regards, > > Dietmar This technology looks promising... I will try it as soon as time will permit. -- -Alexey Eromenko "Technologov" |
From: Anthony L. <an...@co...> - 2008-04-16 13:53:43
|
Avi Kivity wrote: > Anthony Liguori wrote: >>> >>> What about aio completions? The only race-free way to handle both >>> posix aio completion and fd readiness is signals AFAIK. >> >> We poll aio completion after the select don't we? Worst case >> scenario we miss a signal and wait to poll after the next select >> event. That's going to occur very often because of the timer. > > if select() doesn't enable signals (like you can do with pselect) you > may sit for a long time in select() until the timer expires. > > Consider a 100Hz Linux guest running 'ls -lR' out of a cold cache: > instead of 1-2 ms disk latencies you'll see 10 ms latencies, killing > performance by a factor of 5. > > I see the following possible solutions: > > 1. Apply Anders' patch and keep I/O completions signal based. > > 2. Use signalfd() to convert aio completions to fd readiness, > emulating signalfd() using a thread which does sigwait()+write() (to a > pipe) on older hosts > > 3. Use a separate thread for aio completions > > 4. Use pselect(), live with the race on older hosts (it was introduced > in 2.6.16, which we barely support anyway), live with the signal > delivery inefficiency. > > When I started writing this email I was in favor of (1), but now with > the new signalfd emulation I'm leaning towards (2). I still think (1) > should be merged, preferably to qemu upstream. There is a 5th option. Do away with the use of posix aio. We get absolutely no benefit from it because it's limited to a single thread. Fabrice has reverted a patch to change that in the past. Regards, Anthony Liguori |
From: <be...@il...> - 2008-04-16 13:44:53
|
From: Ben-Ami Yassour <be...@il...> Signed-off-by: Ben-Ami Yassour <be...@il...> Signed-off-by: Muli Ben-Yehuda <mu...@il...> --- arch/x86/kvm/mmu.c | 59 +++++++++++++++++++++++++++++-------------- arch/x86/kvm/paging_tmpl.h | 19 +++++++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 17 +++++++++++- 4 files changed, 69 insertions(+), 28 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 078a7f1..c89029d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -112,6 +112,8 @@ static int dbg = 1; #define PT_FIRST_AVAIL_BITS_SHIFT 9 #define PT64_SECOND_AVAIL_BITS_SHIFT 52 +#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define PT64_LEVEL_BITS 9 @@ -237,6 +239,9 @@ static int is_dirty_pte(unsigned long pte) static int is_rmap_pte(u64 pte) { + if (pte & PT_SHADOW_IO_MARK) + return false; + return is_shadow_present_pte(pte); } @@ -1034,7 +1039,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, int *ptwrite, int largepage, gfn_t gfn, - pfn_t pfn, bool speculative) + pfn_t pfn, bool speculative, + int direct_mmio) { u64 spte; int was_rmapped = 0; @@ -1114,6 +1120,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } + if (direct_mmio) + spte |= PT_SHADOW_IO_MARK; + unshadowed: if (pte_access & ACC_WRITE_MASK) @@ -1129,16 +1138,19 @@ unshadowed: ++vcpu->kvm->stat.lpages; page_header_update_slot(vcpu->kvm, shadow_pte, gfn); - if (!was_rmapped) { - rmap_add(vcpu, shadow_pte, gfn, largepage); - if (!is_rmap_pte(*shadow_pte)) - kvm_release_pfn_clean(pfn); - } else { - if (was_writeble) - kvm_release_pfn_dirty(pfn); - else - kvm_release_pfn_clean(pfn); + if (!direct_mmio) { + if (!was_rmapped) { + rmap_add(vcpu, shadow_pte, gfn, largepage); + if (!is_rmap_pte(*shadow_pte)) + kvm_release_pfn_clean(pfn); + } else { + if (was_writeble) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); + } } + if (!ptwrite || !*ptwrite) vcpu->arch.last_pte_updated = shadow_pte; } @@ -1149,7 +1161,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int largepage, gfn_t gfn, pfn_t pfn, - int level) + int level, int direct_mmio) { hpa_t table_addr = vcpu->arch.mmu.root_hpa; int pt_write = 0; @@ -1163,13 +1175,15 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, if (level == 1) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 0, gfn, pfn, false); + 0, write, 1, &pt_write, 0, gfn, pfn, + false, direct_mmio); return pt_write; } if (largepage && level == 2) { mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, - 0, write, 1, &pt_write, 1, gfn, pfn, false); + 0, write, 1, &pt_write, 1, gfn, pfn, + false, direct_mmio); return pt_write; } @@ -1200,6 +1214,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) int r; int largepage = 0; pfn_t pfn; + int direct_mmio = 0; down_read(¤t->mm->mmap_sem); if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { @@ -1207,10 +1222,10 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); - /* mmio */ + /* handle emulated mmio */ if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return 1; @@ -1219,7 +1234,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, v, write, largepage, gfn, pfn, - PT32E_ROOT_LEVEL); + PT32E_ROOT_LEVEL, direct_mmio); spin_unlock(&vcpu->kvm->mmu_lock); @@ -1355,6 +1370,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, int r; int largepage = 0; gfn_t gfn = gpa >> PAGE_SHIFT; + int direct_mmio = 0; ASSERT(vcpu); ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -1368,7 +1384,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); @@ -1377,7 +1393,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, - largepage, gfn, pfn, TDP_ROOT_LEVEL); + largepage, gfn, pfn, TDP_ROOT_LEVEL, + direct_mmio); spin_unlock(&vcpu->kvm->mmu_lock); return r; @@ -1643,6 +1660,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int r; u64 gpte = 0; pfn_t pfn; + int direct_mmio = 0; vcpu->arch.update_pte.largepage = 0; @@ -1678,9 +1696,12 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn &= ~(KVM_PAGES_PER_HPAGE-1); vcpu->arch.update_pte.largepage = 1; } - pfn = gfn_to_pfn(vcpu->kvm, gfn); + pfn = gfn_to_pfn(vcpu->kvm, gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); + if (direct_mmio) + return; + if (is_error_pfn(pfn)) { kvm_release_pfn_clean(pfn); return; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 156fe10..e85d8ae 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -264,9 +264,10 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (is_error_pfn(pfn)) return; kvm_get_pfn(pfn); + mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), - pfn, true); + pfn, true, false); } /* @@ -275,7 +276,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *walker, int user_fault, int write_fault, int largepage, - int *ptwrite, pfn_t pfn) + int *ptwrite, pfn_t pfn, int direct_mmio) { hpa_t shadow_addr; int level; @@ -349,11 +350,15 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, user_fault, write_fault, walker->ptes[walker->level-1] & PT_DIRTY_MASK, - ptwrite, largepage, walker->gfn, pfn, false); + ptwrite, largepage, walker->gfn, pfn, false, + direct_mmio); return shadow_ent; } +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr); + + /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte @@ -380,6 +385,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, int r; pfn_t pfn; int largepage = 0; + int direct_mmio = 0; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); kvm_mmu_audit(vcpu, "pre page fault"); @@ -413,10 +419,10 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, largepage = 1; } } - pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); + pfn = gfn_to_pfn(vcpu->kvm, walker.gfn, &direct_mmio); up_read(¤t->mm->mmap_sem); - /* mmio */ + /* handle emulated mmio */ if (is_error_pfn(pfn)) { pgprintk("gfn %x is mmio\n", walker.gfn); kvm_release_pfn_clean(pfn); @@ -426,7 +432,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, - largepage, &write_pt, pfn); + largepage, &write_pt, pfn, + direct_mmio); pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__, shadow_pte, *shadow_pte, write_pt); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 578c363..0910cc1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -173,7 +173,7 @@ void kvm_release_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio); void kvm_release_pfn_dirty(pfn_t); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a52c08..07b95f7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -526,20 +526,33 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) /* * Requires current->mm->mmap_sem to be held */ -pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) +pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn, int *direct_mmio) { struct page *page[1]; unsigned long addr; int npages; + struct vm_area_struct *vma; might_sleep(); + if (direct_mmio) + *direct_mmio = 0; + addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) { get_page(bad_page); return page_to_pfn(bad_page); } + /* handle mmio */ + vma = find_vma(current->mm, addr); + if (vma->vm_flags & VM_IO) { + if (direct_mmio) + *direct_mmio = 1; + + return ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + } + npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, NULL); @@ -555,7 +568,7 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { - return pfn_to_page(gfn_to_pfn(kvm, gfn)); + return pfn_to_page(gfn_to_pfn(kvm, gfn, NULL)); } EXPORT_SYMBOL_GPL(gfn_to_page); -- 1.5.4.5 |