You can subscribe to this list here.
2008 |
Jan
(41) |
Feb
(101) |
Mar
(164) |
Apr
(94) |
May
(27) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
---|
From: Zhang, X. <xia...@in...> - 2008-04-01 10:54:29
|
>From e984cdcbabd5fe7ede6f6fdb7266b7e7f0e7e66a Mon Sep 17 00:00:00 2001 From: Xiantao Zhang <xia...@in...> Date: Tue, 1 Apr 2008 15:29:29 +0800 Subject: [PATCH] KVM: IA64: Add kvm arch-specific core code for kvm/ia64. kvm_ia64.c is created to handle kvm ia64-specific core logic. Signed-off-by: Xiantao Zhang <xia...@in...> --- arch/ia64/kvm/kvm-ia64.c | 1790 ++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1790 insertions(+), 0 deletions(-) create mode 100644 arch/ia64/kvm/kvm-ia64.c diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c new file mode 100644 index 0000000..0ae8f61 --- /dev/null +++ b/arch/ia64/kvm/kvm-ia64.c @@ -0,0 +1,1790 @@ + +/* + * kvm_ia64.c: Basic KVM suppport On Itanium series processors + * + * + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xia...@in...) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/fs.h> +#include <linux/smp.h> +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/bitops.h> +#include <linux/hrtimer.h> +#include <linux/uaccess.h> + +#include <asm/pgtable.h> +#include <asm/gcc_intrin.h> +#include <asm/pal.h> +#include <asm/cacheflush.h> +#include <asm/div64.h> +#include <asm/tlb.h> + +#include "misc.h" +#include "vti.h" +#include "iodev.h" +#include "ioapic.h" +#include "lapic.h" + +static unsigned long kvm_vmm_base; +static unsigned long kvm_vsa_base; +static unsigned long kvm_vm_buffer; +static unsigned long kvm_vm_buffer_size; +unsigned long kvm_vmm_gp; + +static long vp_env_info; + +static struct kvm_vmm_info *kvm_vmm_info; + +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + + +struct fdesc{ + unsigned long ip; + unsigned long gp; +}; + +static void kvm_flush_icache(unsigned long start, unsigned long len) +{ + int l; + + for (l = 0; l < (len + 32); l += 32) + ia64_fc(start + l); + + ia64_sync_i(); + ia64_srlz_i(); +} + +static void kvm_flush_tlb_all(void) +{ + unsigned long i, j, count0, count1, stride0, stride1, addr; + long flags; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, + (u64)opt_handler); + + return iprv.status; +} + +static DEFINE_SPINLOCK(vp_lock); + +void kvm_arch_hardware_enable(void *garbage) +{ + long status; + long tmp_base; + unsigned long pte; + unsigned long saved_psr; + int slot; + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + spin_lock(&vp_lock); + status = ia64_pal_vp_init_env(kvm_vsa_base ? + VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); + if (status != 0) { + printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); + return ; + } + + if (!kvm_vsa_base) { + kvm_vsa_base = tmp_base; + printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); + } + spin_unlock(&vp_lock); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_hardware_disable(void *garbage) +{ + + long status; + int slot; + unsigned long pte; + unsigned long saved_psr; + unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + status = ia64_pal_vp_exit_env(host_iva); + if (status) + printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", + status); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_USER_MEMORY: + + r = 1; + break; + default: + r = 0; + } + return r; + +} + +static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + struct kvm_io_device *dev; + + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + + return dev; +} + +static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 1; + return 0; +} + +static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct kvm_mmio_req *p; + struct kvm_io_device *mmio_dev; + + p = kvm_get_vcpu_ioreq(vcpu); + + if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) + goto mmio; + vcpu->mmio_needed = 1; + vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; + vcpu->mmio_size = kvm_run->mmio.len = p->size; + vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; + + if (vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, &p->data, p->size); + memcpy(kvm_run->mmio.data, &p->data, p->size); + kvm_run->exit_reason = KVM_EXIT_MMIO; + return 0; +mmio: + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + if (mmio_dev) { + if (!p->dir) + kvm_iodevice_write(mmio_dev, p->addr, p->size, + &p->data); + else + kvm_iodevice_read(mmio_dev, p->addr, p->size, + &p->data); + + } else + printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); + p->state = STATE_IORESP_READY; + + return 1; +} + +static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_PAL_CALL) + return kvm_pal_emul(vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 2; + return 0; + } +} + +static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + kvm_sal_emul(vcpu); + return 1; + } else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 3; + return 0; + } + +} + +/* + * offset: address offset to IPI space. + * value: deliver value. + */ +static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, + uint64_t vector) +{ + switch (dm) { + case SAPIC_FIXED: + kvm_apic_set_irq(vcpu, vector, 0); + break; + case SAPIC_NMI: + kvm_apic_set_irq(vcpu, 2, 0); + break; + case SAPIC_EXTINT: + kvm_apic_set_irq(vcpu, 0, 0); + break; + case SAPIC_INIT: + case SAPIC_PMI: + default: + printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); + break; + } +} + +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, + unsigned long eid) +{ + union ia64_lid lid; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (kvm->vcpus[i]) { + lid.val = VCPU_LID(kvm->vcpus[i]); + if (lid.id == id && lid.eid == eid) + return kvm->vcpus[i]; + } + } + + return NULL; +} + +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm_vcpu *target_vcpu; + struct kvm_pt_regs *regs; + union ia64_ipi_a addr = p->u.ipi_data.addr; + union ia64_ipi_d data = p->u.ipi_data.data; + + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); + + if (!target_vcpu->arch.launched) { + regs = vcpu_regs(target_vcpu); + + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + + target_vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + if (waitqueue_active(&target_vcpu->wq)) + wake_up_interruptible(&target_vcpu->wq); + } else { + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); + } + + return 1; +} + +struct call_data { + struct kvm_ptc_g ptc_g_data; + struct kvm_vcpu *vcpu; +}; + +static void vcpu_global_purge(void *info) +{ + struct call_data *p = (struct call_data *)info; + struct kvm_vcpu *vcpu = p->vcpu; + + if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + return; + + set_bit(KVM_REQ_PTC_G, &vcpu->requests); + if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { + vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = + p->ptc_g_data; + } else { + clear_bit(KVM_REQ_PTC_G, &vcpu->requests); + vcpu->arch.ptc_g_count = 0; + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); + } +} + +static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm *kvm = vcpu->kvm; + struct call_data call_data; + int i; + call_data.ptc_g_data = p->u.ptc_g_data; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == + VCPU_MP_STATE_UNINITIALIZED || + vcpu == kvm->vcpus[i]) + continue; + + if (waitqueue_active(&kvm->vcpus[i]->wq)) + wake_up_interruptible(&kvm->vcpus[i]->wq); + + if (kvm->vcpus[i]->cpu != -1) { + call_data.vcpu = kvm->vcpus[i]; + smp_call_function_single(kvm->vcpus[i]->cpu, + vcpu_global_purge, &call_data, 0, 1); + } else + printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); + + } + return 1; +} + +static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + return 1; +} + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + + ktime_t kt; + long itc_diff; + unsigned long vcpu_now_itc; + + unsigned long expires; + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; + + if (time_after(vcpu_now_itc, vpd->itm)) { + vcpu->arch.timer_check = 1; + return 1; + } + itc_diff = vpd->itm - vcpu_now_itc; + if (itc_diff < 0) + itc_diff = -itc_diff; + + expires = div64_64(itc_diff, cyc_per_usec); + kt = ktime_set(0, 1000 * expires); + vcpu->arch.ht_active = 1; + hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.mp_state = VCPU_MP_STATE_HALTED; + kvm_vcpu_block(vcpu); + hrtimer_cancel(p_ht); + vcpu->arch.ht_active = 0; + + if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE) + return -EINTR; + return 1; + } else { + printk(KERN_ERR"kvm: Unsupported userspace halt!"); + return 0; + } +} + +static int handle_vm_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} + +static int handle_external_interrupt(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return 1; +} + +static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) = { + [EXIT_REASON_VM_PANIC] = handle_vm_error, + [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, + [EXIT_REASON_PAL_CALL] = handle_pal_call, + [EXIT_REASON_SAL_CALL] = handle_sal_call, + [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, + [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_IPI] = handle_ipi, + [EXIT_REASON_PTC_G] = handle_global_purge, + +}; + +static const int kvm_vti_max_exit_handlers = + sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); + +static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) +{ +} + +static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_exit_data; + + p_exit_data = kvm_get_exit_data(vcpu); + return p_exit_data->exit_reason; +} + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + u32 exit_reason = kvm_get_exit_reason(vcpu); + vcpu->arch.last_exit = exit_reason; + + if (exit_reason < kvm_vti_max_exit_handlers + && kvm_vti_exit_handlers[exit_reason]) + return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = exit_reason; + } + return 0; +} + +static inline void vti_set_rr6(unsigned long rr6) +{ + ia64_set_rr(RR6, rr6); + ia64_srlz_i(); +} + +static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) +{ + unsigned long pte; + struct kvm *kvm = vcpu->kvm; + int r; + + /*Insert a pair of tr to map vmm*/ + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vmm_tr_slot = r; + /*Insert a pairt of tr to map data of vm*/ + pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, + pte, KVM_VM_DATA_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vm_tr_slot = r; + r = 0; +out: + return r; + +} + +static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) +{ + + ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); + ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); + +} + +static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + if (vcpu->arch.last_run_cpu != cpu || + per_cpu(last_vcpu, cpu) != vcpu) { + per_cpu(last_vcpu, cpu) = vcpu; + vcpu->arch.last_run_cpu = cpu; + kvm_flush_tlb_all(); + } + + vcpu->arch.host_rr6 = ia64_get_rr(RR6); + vti_set_rr6(vcpu->arch.vmm_rr); + return kvm_insert_vmm_mapping(vcpu); +} +static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) +{ + kvm_purge_vmm_mapping(vcpu); + vti_set_rr6(vcpu->arch.host_rr6); +} + +static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + union context *host_ctx, *guest_ctx; + int r; + + /*Get host and guest context with guest address space.*/ + host_ctx = kvm_get_host_context(vcpu); + guest_ctx = kvm_get_guest_context(vcpu); + + r = kvm_vcpu_pre_transition(vcpu); + if (r < 0) + goto out; + kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); + kvm_vcpu_post_transition(vcpu); + r = 0; +out: + return r; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + +again: + preempt_disable(); + + kvm_prepare_guest_switch(vcpu); + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + goto out; + } + + vcpu->guest_mode = 1; + kvm_guest_enter(); + + r = vti_vcpu_run(vcpu, kvm_run); + if (r < 0) { + local_irq_enable(); + preempt_enable(); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + goto out; + } + + vcpu->arch.launched = 1; + vcpu->guest_mode = 0; + local_irq_enable(); + + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + + preempt_enable(); + + r = kvm_handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (!need_resched()) + goto again; + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto again; + } + + return r; +} + +static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) +{ + struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); + + if (!vcpu->mmio_is_write) + memcpy(&p->data, vcpu->mmio_data, 8); + p->state = STATE_IORESP_READY; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + kvm_set_mmio_data(vcpu); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + r = __vcpu_run(vcpu, kvm_run); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + return r; +} + +/* + * Allocate 16M memory for every vm to hold its specific data. + * Its memory map is defined in kvm_host.h. + */ +static struct kvm *kvm_alloc_kvm(void) +{ + + struct kvm *kvm; + uint64_t vm_base; + + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); + + if (!vm_base) + return ERR_PTR(-ENOMEM); + printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); + + /* Zero all pages before use! */ + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + + kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm->arch.vm_base = vm_base; + + return kvm; +} + +struct kvm_io_range { + unsigned long start; + unsigned long size; + unsigned long type; +}; + +static const struct kvm_io_range io_ranges[] = { + {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, + {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, + {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, + {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, + {PIB_START, PIB_SIZE, GPFN_PIB}, +}; + +static void kvm_build_io_pmt(struct kvm *kvm) +{ + unsigned long i, j; + + /* Mark I/O ranges */ + for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); + i++) { + for (j = io_ranges[i].start; + j < io_ranges[i].start + io_ranges[i].size; + j += PAGE_SIZE) + kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, + io_ranges[i].type, 0); + } + +} + +/*Use unused rids to virtualize guest rid.*/ +#define GUEST_PHYSICAL_RR0 0x1739 +#define GUEST_PHYSICAL_RR4 0x2739 +#define VMM_INIT_RR 0x1660 + +static void kvm_init_vm(struct kvm *kvm) +{ + long vm_base; + + BUG_ON(!kvm); + + kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; + kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; + kvm->arch.vmm_init_rr = VMM_INIT_RR; + + vm_base = kvm->arch.vm_base; + if (vm_base) { + kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; + kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; + kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; + } + + /* + *Fill P2M entries for MMIO/IO ranges + */ + kvm_build_io_pmt(kvm); + +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm = kvm_alloc_kvm(); + + if (IS_ERR(kvm)) + return ERR_PTR(-ENOMEM); + kvm_init_vm(kvm); + + return kvm; + +} + +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, + struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(ioapic_irqchip(kvm), + &chip->chip.ioapic, + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int r; + + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + vpd->vgr[i] = regs->vpd.vgr[i]; + vpd->vbgr[i] = regs->vpd.vbgr[i]; + } + for (i = 0; i < 128; i++) + vpd->vcr[i] = regs->vpd.vcr[i]; + vpd->vhpi = regs->vpd.vhpi; + vpd->vnat = regs->vpd.vnat; + vpd->vbnat = regs->vpd.vbnat; + vpd->vpsr = regs->vpd.vpsr; + + vpd->vpr = regs->vpd.vpr; + + r = -EFAULT; + r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, + sizeof(union context)); + if (r) + goto out; + r = copy_from_user(vcpu + 1, regs->saved_stack + + sizeof(struct kvm_vcpu), + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); + if (r) + goto out; + vcpu->arch.exit_data = + ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + + RESTORE_REGS(mp_state); + RESTORE_REGS(vmm_rr); + memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); + memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); + RESTORE_REGS(itr_regions); + RESTORE_REGS(dtr_regions); + RESTORE_REGS(tc_regions); + RESTORE_REGS(irq_check); + RESTORE_REGS(itc_check); + RESTORE_REGS(timer_check); + RESTORE_REGS(timer_pending); + RESTORE_REGS(last_itc); + for (i = 0; i < 8; i++) { + vcpu->arch.vrr[i] = regs->vrr[i]; + vcpu->arch.ibr[i] = regs->ibr[i]; + vcpu->arch.dbr[i] = regs->dbr[i]; + } + for (i = 0; i < 4; i++) + vcpu->arch.insvc[i] = regs->insvc[i]; + RESTORE_REGS(xtp); + RESTORE_REGS(metaphysical_rr0); + RESTORE_REGS(metaphysical_rr4); + RESTORE_REGS(metaphysical_saved_rr0); + RESTORE_REGS(metaphysical_saved_rr4); + RESTORE_REGS(fp_psr); + RESTORE_REGS(saved_gp); + + vcpu->arch.irq_new_pending = 1; + vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); + set_bit(KVM_REQ_RESUME, &vcpu->requests); + + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_SET_MEMORY_REGION: { + struct kvm_memory_region kvm_mem; + struct kvm_userspace_memory_region kvm_userspace_mem; + + r = -EFAULT; + if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) + goto out; + kvm_userspace_mem.slot = kvm_mem.slot; + kvm_userspace_mem.flags = kvm_mem.flags; + kvm_userspace_mem.guest_phys_addr = + kvm_mem.guest_phys_addr; + kvm_userspace_mem.memory_size = kvm_mem.memory_size; + r = kvm_vm_ioctl_set_memory_region(kvm, + &kvm_userspace_mem, 0); + if (r) + goto out; + break; + } + case KVM_CREATE_IRQCHIP: + r = -EFAULT; + r = kvm_ioapic_init(kvm); + if (r) + goto out; + break; + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + if (irqchip_in_kernel(kvm)) { + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, + irq_event.irq, + irq_event.level); + mutex_unlock(&kvm->lock); + r = 0; + } + break; + } + case KVM_GET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_get_irqchip(kvm, &chip); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &chip, sizeof chip)) + goto out; + r = 0; + break; + } + case KVM_SET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_set_irqchip(kvm, &chip); + if (r) + goto out; + r = 0; + break; + } + default: + ; + } +out: + return r; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; + +} +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + + return -EINVAL; +} + +static int kvm_alloc_vmm_area(void) +{ + if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { + kvm_vmm_base = __get_free_pages(GFP_KERNEL, + get_order(KVM_VMM_SIZE)); + if (!kvm_vmm_base) + return -ENOMEM; + + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; + + printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", + kvm_vmm_base, kvm_vm_buffer); + } + + return 0; +} + +static void kvm_free_vmm_area(void) +{ + if (kvm_vmm_base) { + /*Zero this area before free to avoid bits leak!!*/ + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); + kvm_vmm_base = 0; + kvm_vm_buffer = 0; + kvm_vsa_base = 0; + } +} + +/* + * Make sure that a cpu that is being hot-unplugged does not have any vcpus + * cached on it. Leave it as blank for IA64. + */ +void decache_vcpus_on_cpu(int cpu) +{ +} + +static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +static int vti_init_vpd(struct kvm_vcpu *vcpu) +{ + int i; + union cpuid3_t cpuid3; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (IS_ERR(vpd)) + return PTR_ERR(vpd); + + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + /*Set vac and vdc fields*/ + vpd->vac.a_from_int_cr = 1; + vpd->vac.a_to_int_cr = 1; + vpd->vac.a_from_psr = 1; + vpd->vac.a_from_cpuid = 1; + vpd->vac.a_cover = 1; + vpd->vac.a_bsw = 1; + vpd->vac.a_int = 1; + vpd->vdc.d_vmsw = 1; + + /*Set virtual buffer*/ + vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; + + return 0; +} + +static int vti_create_vp(struct kvm_vcpu *vcpu) +{ + long ret; + struct vpd *vpd = vcpu->arch.vpd; + unsigned long vmm_ivt; + + vmm_ivt = kvm_vmm_info->vmm_ivt; + + printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); + + ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); + + if (ret) { + printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); + return -EINVAL; + } + return 0; +} + +static void init_ptce_info(struct kvm_vcpu *vcpu) +{ + ia64_ptce_info_t ptce = {0}; + + ia64_get_ptce(&ptce); + vcpu->arch.ptce_base = ptce.base; + vcpu->arch.ptce_count[0] = ptce.count[0]; + vcpu->arch.ptce_count[1] = ptce.count[1]; + vcpu->arch.ptce_stride[0] = ptce.stride[0]; + vcpu->arch.ptce_stride[1] = ptce.stride[1]; +} + +static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) +{ + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + + if (hrtimer_cancel(p_ht)) + hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) +{ + struct kvm_vcpu *vcpu; + wait_queue_head_t *q; + + vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); + if (vcpu->arch.mp_state != VCPU_MP_STATE_HALTED) + goto out; + + q = &vcpu->wq; + if (waitqueue_active(q)) { + vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + wake_up_interruptible(q); + } +out: + vcpu->arch.timer_check = 1; + return HRTIMER_NORESTART; +} + +#define PALE_RESET_ENTRY 0x80000000ffffffb0UL + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *v; + int r; + int i; + long itc_offset; + struct kvm *kvm = vcpu->kvm; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + union context *p_ctx = &vcpu->arch.guest; + struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); + + /*Init vcpu context for first run.*/ + if (IS_ERR(vmm_vcpu)) + return PTR_ERR(vmm_vcpu); + + if (vcpu->vcpu_id == 0) { + vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + + /*Set entry address for first run.*/ + regs->cr_iip = PALE_RESET_ENTRY; + + /*Initilize itc offset for vcpus*/ + itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + v->arch.itc_offset = itc_offset; + v->arch.last_itc = 0; + } + } else + vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED; + + r = -ENOMEM; + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); + if (!vcpu->arch.apic) + goto out; + vcpu->arch.apic->vcpu = vcpu; + + p_ctx->gr[1] = 0; + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[13] = (unsigned long)vmm_vcpu; + p_ctx->psr = 0x1008522000UL; + p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ + p_ctx->caller_unat = 0; + p_ctx->pr = 0x0; + p_ctx->ar[36] = 0x0; /*unat*/ + p_ctx->ar[19] = 0x0; /*rnat*/ + p_ctx->ar[18] = (unsigned long)vmm_vcpu + + ((sizeof(struct kvm_vcpu)+15) & ~15); + p_ctx->ar[64] = 0x0; /*pfs*/ + p_ctx->cr[0] = 0x7e04UL; + p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; + p_ctx->cr[8] = 0x3c; + + /*Initilize region register*/ + p_ctx->rr[0] = 0x30; + p_ctx->rr[1] = 0x30; + p_ctx->rr[2] = 0x30; + p_ctx->rr[3] = 0x30; + p_ctx->rr[4] = 0x30; + p_ctx->rr[5] = 0x30; + p_ctx->rr[7] = 0x30; + + /*Initilize branch register 0*/ + p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; + + vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; + vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; + vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; + + hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + vcpu->arch.hlt_timer.function = hlt_timer_fn; + + vcpu->arch.last_run_cpu = -1; + vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vsa_base = kvm_vsa_base; + vcpu->arch.__gp = kvm_vmm_gp; + vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + init_ptce_info(vcpu); + + r = 0; +out: + return r; +} + +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) +{ + unsigned long psr; + int r; + + local_irq_save(psr); + r = kvm_insert_vmm_mapping(vcpu); + if (r) + goto fail; + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); + if (r) + goto fail; + + r = vti_init_vpd(vcpu); + if (r) { + printk(KERN_DEBUG"kvm: vpd init error!!\n"); + goto uninit; + } + + r = vti_create_vp(vcpu); + if (r) + goto uninit; + + kvm_purge_vmm_mapping(vcpu); + local_irq_restore(psr); + + return 0; +uninit: + kvm_vcpu_uninit(vcpu); +fail: + return r; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + unsigned long vm_base = kvm->arch.vm_base; + int r; + int cpu; + + r = -ENOMEM; + if (!vm_base) { + printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); + goto fail; + } + vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu->kvm = kvm; + + cpu = get_cpu(); + vti_vcpu_load(vcpu, cpu); + r = vti_vcpu_setup(vcpu, id); + put_cpu(); + + if (r) { + printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); + goto fail; + } + + return vcpu; +fail: + return ERR_PTR(r); +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; +} + +static void free_kvm(struct kvm *kvm) +{ + unsigned long vm_base = kvm->arch.vm_base; + + if (vm_base) { + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); + } + +} + +static void kvm_release_vm_pages(struct kvm *kvm) +{ + struct kvm_memory_slot *memslot; + int i, j; + unsigned long base_gfn; + + for (i = 0; i < kvm->nmemslots; i++) { + memslot = &kvm->memslots[i]; + base_gfn = memslot->base_gfn; + + for (j = 0; j < memslot->npages; j++) { + if (memslot->rmap[j]) + put_page((struct page *)memslot->rmap[j]); + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kfree(kvm->arch.vioapic); + kvm_release_vm_pages(kvm); + kvm_free_physmem(kvm); + free_kvm(kvm); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + if (cpu != vcpu->cpu) { + vcpu->cpu = cpu; + if (vcpu->arch.ht_active) + kvm_migrate_hlt_timer(vcpu); + } +} + +#define SAVE_REGS(_x) regs->_x = vcpu->arch._x + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + int r; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + regs->vpd.vgr[i] = vpd->vgr[i]; + regs->vpd.vbgr[i] = vpd->vbgr[i]; + } + for (i = 0; i < 128; i++) + regs->vpd.vcr[i] = vpd->vcr[i]; + regs->vpd.vhpi = vpd->vhpi; + regs->vpd.vnat = vpd->vnat; + regs->vpd.vbnat = vpd->vbnat; + regs->vpd.vpsr = vpd->vpsr; + regs->vpd.vpr = vpd->vpr; + + r = -EFAULT; + r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, + sizeof(union context)); + if (r) + goto out; + r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); + if (r) + goto out; + SAVE_REGS(mp_state); + SAVE_REGS(vmm_rr); + memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); + memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); + SAVE_REGS(itr_regions); + SAVE_REGS(dtr_regions); + SAVE_REGS(tc_regions); + SAVE_REGS(irq_check); + SAVE_REGS(itc_check); + SAVE_REGS(timer_check); + SAVE_REGS(timer_pending); + SAVE_REGS(last_itc); + for (i = 0; i < 8; i++) { + regs->vrr[i] = vcpu->arch.vrr[i]; + regs->ibr[i] = vcpu->arch.ibr[i]; + regs->dbr[i] = vcpu->arch.dbr[i]; + } + for (i = 0; i < 4; i++) + regs->insvc[i] = vcpu->arch.insvc[i]; + regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); + SAVE_REGS(xtp); + SAVE_REGS(metaphysical_rr0); + SAVE_REGS(metaphysical_rr4); + SAVE_REGS(metaphysical_saved_rr0); + SAVE_REGS(metaphysical_saved_rr4); + SAVE_REGS(fp_psr); + SAVE_REGS(saved_gp); + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + + hrtimer_cancel(&vcpu->arch.hlt_timer); + kfree(vcpu->arch.apic); +} + + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + unsigned long i; + struct page *page; + int npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; + unsigned long base_gfn = memslot->base_gfn; + + for (i = 0; i < npages; i++) { + page = gfn_to_page(kvm, base_gfn + i); + kvm_set_pmt_entry(kvm, base_gfn + i, + page_to_pfn(page) << PAGE_SHIFT, + _PAGE_AR_RWX|_PAGE_MA_WB); + memslot->rmap[i] = (unsigned long)page; + } + + return 0; +} + + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); +} + +static int vti_cpu_has_kvm_support(void) +{ + long avail = 1, status = 1, control = 1; + long ret; + + ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); + if (ret) + goto out; + + if (!(avail & PAL_PROC_VM_BIT)) + goto out; + + printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); + + ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); + if (ret) + goto out; + printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); + + if (!(vp_env_info & VP_OPCODE)) { + printk(KERN_WARNING"kvm: No opcode ability on hardware, " + "vm_env_info:0x%lx\n", vp_env_info); + } + + return 1; +out: + return 0; +} + +static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, + struct module *module) +{ + unsigned long module_base; + unsigned long vmm_size; + + unsigned long vmm_offset, func_offset, fdesc_offset; + struct fdesc *p_fdesc; + + BUG_ON(!module); + + if (!kvm_vmm_base) { + printk("kvm: kvm area hasn't been initilized yet!!\n"); + return -EFAULT; + } + + /*Calculate new position of relocated vmm module.*/ + module_base = (unsigned long)module->module_core; + vmm_size = module->core_size; + if (unlikely(vmm_size > KVM_VMM_SIZE)) + return -EFAULT; + + memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); + kvm_flush_icache(kvm_vmm_base, vmm_size); + + /*Recalculate kvm_vmm_info based on new VMM*/ + vmm_offset = vmm_info->vmm_ivt - module_base; + kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; + printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", + kvm_vmm_info->vmm_ivt); + + fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; + kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); + + printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", + KVM_VMM_BASE+func_offset); + + fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; + kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); + + kvm_vmm_gp = p_fdesc->gp; + + printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", + kvm_vmm_info->vmm_entry); + printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", + KVM_VMM_BASE + func_offset); + + return 0; +} + +int kvm_arch_init(void *opaque) +{ + int r; + struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; + + if (!vti_cpu_has_kvm_support()) { + printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); + r = -EOPNOTSUPP; + goto out; + } + + if (kvm_vmm_info) { + printk(KERN_ERR "kvm: Already loaded VMM module!\n"); + r = -EEXIST; + goto out; + } + + r = -ENOMEM; + kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); + if (!kvm_vmm_info) + goto out; + + if (kvm_alloc_vmm_area()) + goto out_free0; + + r = kvm_relocate_vmm(vmm_info, vmm_info->module); + if (r) + goto out_free1; + + return 0; + +out_free1: + kvm_free_vmm_area(); +out_free0: + kfree(kvm_vmm_info); +out: + return r; +} + +void kvm_arch_exit(void) +{ + kvm_free_vmm_area(); + kfree(kvm_vmm_info); + kvm_vmm_info = NULL; +} + +static int kvm_ia64_sync_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + int r, i; + long n, base; + unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS + + KVM_MEM_DIRTY_LOG_OFS); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = &kvm->memslots[log->slot]; + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + base = memslot->base_gfn / BITS_PER_LONG; + + for (i = 0; i < n/sizeof(long); ++i) { + memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; + dirty_bitmap[base + i] = 0; + } + r = 0; +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + int n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + spin_lock(&kvm->arch.dirty_log_lock); + + r = kvm_ia64_sync_dirty_log(kvm, log); + if (r) + goto out; + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + spin_unlock(&kvm->arch.dirty_log_lock); + return r; +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + + if (vcpu->guest_mode) + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); +} + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +{ + + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vec, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE) + kvm_vcpu_kick(vcpu); + else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) { + vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } + return 1; + } + return 0; +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +{ + return apic->vcpu->vcpu_id == dest; +} + +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +{ + return 0; +} + +struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, + unsigned long bitmap) +{ + struct kvm_vcpu *lvcpu = kvm->vcpus[0]; + int i; + + for (i = 1; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i]) + continue; + if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) + lvcpu = kvm->vcpus[i]; + } + + return lvcpu; +} + +static int find_highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + + return -1; +} + +int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (vpd->irr[0] & (1UL << NMI_VECTOR)) + return NMI_VECTOR; + if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return find_highest_bits((int *)&vpd->irr[0]); +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + if (kvm_highest_pending_irq(vcpu) != -1) + return 1; + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE; +} + -- 1.5.2 |
From: Zhang, X. <xia...@in...> - 2008-04-01 10:44:40
|
>From 7f1714377e6d5812b4557bb3ccd8268b57865952 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang <xia...@in...> Date: Tue, 1 Apr 2008 14:42:00 +0800 Subject: [PATCH] KVM: IA64 : Prepare some structure definitions and routines for kvm use. Register structures are defined per SDM. Add three small routines for kernel: ia64_ttag, ia64_loadrs, ia64_flushrs Signed-off-by: Xiantao Zhang <xia...@in...> --- include/asm-ia64/gcc_intrin.h | 12 ++++++++ include/asm-ia64/processor.h | 63 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 0 deletions(-) diff --git a/include/asm-ia64/gcc_intrin.h b/include/asm-ia64/gcc_intrin.h index de2ed2c..2fe292c 100644 --- a/include/asm-ia64/gcc_intrin.h +++ b/include/asm-ia64/gcc_intrin.h @@ -21,6 +21,10 @@ #define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") + +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") + extern void ia64_bad_param_for_setreg (void); extern void ia64_bad_param_for_getreg (void); @@ -517,6 +521,14 @@ do { \ #define ia64_ptrd(addr, size) \ asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") +#define ia64_ttag(addr) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + + /* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ #define ia64_lfhint_none 0 diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 741f7ec..6aff126 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -119,6 +119,69 @@ struct ia64_psr { __u64 reserved4 : 19; }; +union ia64_isr { + __u64 val; + struct { + __u64 code : 16; + __u64 vector : 8; + __u64 reserved1 : 8; + __u64 x : 1; + __u64 w : 1; + __u64 r : 1; + __u64 na : 1; + __u64 sp : 1; + __u64 rs : 1; + __u64 ir : 1; + __u64 ni : 1; + __u64 so : 1; + __u64 ei : 2; + __u64 ed : 1; + __u64 reserved2 : 20; + }; +}; + +union ia64_lid { + __u64 val; + struct { + __u64 rv : 16; + __u64 eid : 8; + __u64 id : 8; + __u64 ig : 32; + }; +}; + +union ia64_tpr { + __u64 val; + struct { + __u64 ig0 : 4; + __u64 mic : 4; + __u64 rsv : 8; + __u64 mmi : 1; + __u64 ig1 : 47; + }; +}; + +union ia64_itir { + __u64 val; + struct { + __u64 rv3 : 2; /* 0-1 */ + __u64 ps : 6; /* 2-7 */ + __u64 key : 24; /* 8-31 */ + __u64 rv4 : 32; /* 32-63 */ + }; +}; + +union ia64_rr { + __u64 val; + struct { + __u64 ve : 1; /* enable hw walker */ + __u64 reserved0: 1; /* reserved */ + __u64 ps : 6; /* log page size */ + __u64 rid : 24; /* region id */ + __u64 reserved1: 32; /* reserved */ + }; +}; + /* * CPU type, hardware bug flags, and per-CPU state. Frequently used * state comes earlier: -- 1.5.2 |
From: Zhang, X. <xia...@in...> - 2008-04-01 10:44:18
|
Hi, All According to the comments from V8, I refined the code, and worked out the new patchset. Please help to review. Thanks! :-) In this new version, most of typdefs are removed to comply with the requirement of coding style. Fixed the issues found by reviewers. Thanks for your effort! The whole patchset are checked by the script checkpatch.pl. Except that one file including asmbly code will report some warnings, the other one should be good to check-in. Xiantao |
From: Zhang, X. <xia...@in...> - 2008-04-01 10:40:51
|
>From b0c5c7fc45bbe0f56efba28e814ccb6bbbb7b8c8 Mon Sep 17 00:00:00 2001 From: Xiantao Zhang <xia...@in...> Date: Tue, 1 Apr 2008 14:34:50 +0800 Subject: [PATCH] IA64: Add API for allocating Dynamic TR resource. Dynamic TR resource should be managed in the uniform way. Add two interfaces for kernel: ia64_itr_entry: Allocate a (pair of) TR for caller. ia64_ptr_entry: Purge a (pair of ) TR by caller. Signed-off-by: Xiantao Zhang <xia...@in...> Signed-off-by: Anthony Xu<ant...@in...> --- arch/ia64/kernel/mca.c | 49 +++++++++++ arch/ia64/kernel/mca_asm.S | 5 + arch/ia64/mm/tlb.c | 198 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-ia64/kregs.h | 3 + include/asm-ia64/tlb.h | 26 ++++++ 5 files changed, 281 insertions(+), 0 deletions(-) diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6c18221..607006a 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -97,6 +97,7 @@ #include <asm/irq.h> #include <asm/hw_irq.h> +#include <asm/tlb.h> #include "mca_drv.h" #include "entry.h" @@ -112,6 +113,7 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ +DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */ unsigned long __per_cpu_mca[NR_CPUS]; @@ -1182,6 +1184,49 @@ all_in: return; } +/* mca_insert_tr + * + * Switch rid when TR reload and needed! + * iord: 1: itr, 2: itr; + * +*/ +static void mca_insert_tr(u64 iord) +{ + + int i; + u64 old_rr; + struct ia64_tr_entry *p; + unsigned long psr; + int cpu = smp_processor_id(); + + psr = ia64_clear_ic(); + for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { + p = &__per_cpu_idtrs[cpu][iord-1][i]; + if (p->pte & 0x1) { + old_rr = ia64_get_rr(p->ifa); + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, p->rr); + ia64_srlz_d(); + } + ia64_ptr(iord, p->ifa, p->itir >> 2); + ia64_srlz_i(); + if (iord & 0x1) { + ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (iord & 0x2) { + ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, old_rr); + ia64_srlz_d(); + } + } + } + ia64_set_psr(psr); +} + /* * ia64_mca_handler * @@ -1271,6 +1316,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, monarch_cpu = -1; #endif } + if (__get_cpu_var(ia64_mca_tr_reload)) { + mca_insert_tr(0x1); /*Reload dynamic itrs*/ + mca_insert_tr(0x2); /*Reload dynamic itrs*/ + } if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) == NOTIFY_STOP) ia64_mca_spin(__func__); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 8bc7d25..a06d465 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -219,8 +219,13 @@ ia64_reload_tr: mov r20=IA64_TR_CURRENT_STACK ;; itr.d dtr[r20]=r16 + GET_THIS_PADDR(r2, ia64_mca_tr_reload) + mov r18 = 1 ;; srlz.d + ;; + st8 [r2] =r18 + ;; done_tlb_purge_and_reload: diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 655da24..626100c 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -26,6 +26,8 @@ #include <asm/pal.h> #include <asm/tlbflush.h> #include <asm/dma.h> +#include <asm/processor.h> +#include <asm/tlb.h> static struct { unsigned long mask; /* mask of supported purge page-sizes */ @@ -39,6 +41,10 @@ struct ia64_ctx ia64_ctx = { }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); +DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ +DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ + +struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; /* * Initializes the ia64_ctx.bitmap array based on max_ctx+1. @@ -190,6 +196,9 @@ ia64_tlb_init (void) ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */ unsigned long tr_pgbits; long status; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + int cpu = smp_processor_id(); if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " @@ -206,4 +215,193 @@ ia64_tlb_init (void) local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); + + if (status) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + per_cpu(ia64_tr_num, cpu) = 8; + return; + } + per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > + (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) + per_cpu(ia64_tr_num, cpu) = + vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { + per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; + printk(KERN_DEBUG"TR register number exceeds IA64_TR_ALLOC_MAX!" + "IA64_TR_ALLOC_MAX should be extended\n"); + } +} + +/* + * is_tr_overlap + * + * Check overlap with inserted TRs. + */ +static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) +{ + u64 tr_log_size; + u64 tr_end; + u64 va_rr = ia64_get_rr(va); + u64 va_rid = RR_TO_RID(va_rr); + u64 va_end = va + (1<<log_size) - 1; + + if (va_rid != RR_TO_RID(p->rr)) + return 0; + tr_log_size = (p->itir & 0xff) >> 2; + tr_end = p->ifa + (1<<tr_log_size) - 1; + + if (va > tr_end || p->ifa > va_end) + return 0; + return 1; + +} + +/* + * ia64_insert_tr in virtual mode. Allocate a TR slot + * + * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr + * + * va : virtual address. + * pte : pte entries inserted. + * log_size: range to be covered. + * + * Return value: <0 : error No. + * + * >=0 : slot number allocated for TR. + * Called with preemption disabled. + */ +int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) +{ + int i, r; + unsigned long psr; + struct ia64_tr_entry *p; + int cpu = smp_processor_id(); + + r = -EINVAL; + /*Check overlap with existing TR entries*/ + if (target_mask & 0x1) { + p = &__per_cpu_idtrs[cpu][0][0]; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG"Overlapped Entry" + "Inserted for TR Reigster!!\n"); + goto out; + } + } + } + if (target_mask & 0x2) { + p = &__per_cpu_idtrs[cpu][1][0]; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG"Overlapped Entry" + "Inserted for TR Reigster!!\n"); + goto out; + } + } + } + + for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { + switch (target_mask & 0x3) { + case 1: + if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1)) + goto found; + continue; + case 2: + if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + goto found; + continue; + case 3: + if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) && + !(__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + goto found; + continue; + default: + r = -EINVAL; + goto out; + } + } +found: + if (i >= per_cpu(ia64_tr_num, cpu)) { + r = -EBUSY; + goto out; + } + + /*Record tr info for mca hander use!*/ + if (i > per_cpu(ia64_tr_used, cpu)) + per_cpu(ia64_tr_used, cpu) = i; + + psr = ia64_clear_ic(); + if (target_mask & 0x1) { + ia64_itr(0x1, i, va, pte, log_size); + ia64_srlz_i(); + p = &__per_cpu_idtrs[cpu][0][i]; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + if (target_mask & 0x2) { + ia64_itr(0x2, i, va, pte, log_size); + ia64_srlz_i(); + p = &__per_cpu_idtrs[cpu][1][i]; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + ia64_set_psr(psr); + r = i; +out: + return r; +} +EXPORT_SYMBOL_GPL(ia64_itr_entry); + +/* + * ia64_purge_tr + * + * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. + * slot: slot number to be freed. + * + * Called with preemption disabled. + */ +void ia64_ptr_entry(u64 target_mask, int slot) +{ + int cpu = smp_processor_id(); + int i; + struct ia64_tr_entry *p; + + if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) + return; + + if (target_mask & 0x1) { + p = &__per_cpu_idtrs[cpu][0][slot]; + if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x1, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + if (target_mask & 0x2) { + p = &__per_cpu_idtrs[cpu][1][slot]; + if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x2, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { + if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) || + (__per_cpu_idtrs[cpu][1][i].pte & 0x1)) + break; + } + per_cpu(ia64_tr_used, cpu) = i; } +EXPORT_SYMBOL_GPL(ia64_ptr_entry); diff --git a/include/asm-ia64/kregs.h b/include/asm-ia64/kregs.h index 7e55a58..aefcdfe 100644 --- a/include/asm-ia64/kregs.h +++ b/include/asm-ia64/kregs.h @@ -31,6 +31,9 @@ #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */ #define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */ +#define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/ +#define IA64_TR_ALLOC_MAX 32 /* Max number for dynamic use*/ + /* Processor status register bits: */ #define IA64_PSR_BE_BIT 1 #define IA64_PSR_UP_BIT 2 diff --git a/include/asm-ia64/tlb.h b/include/asm-ia64/tlb.h index 26edcb7..20d8a39 100644 --- a/include/asm-ia64/tlb.h +++ b/include/asm-ia64/tlb.h @@ -64,6 +64,32 @@ struct mmu_gather { struct page *pages[FREE_PTE_NR]; }; +struct ia64_tr_entry { + u64 ifa; + u64 itir; + u64 pte; + u64 rr; +}; /*Record for tr entry!*/ + +extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); +extern void ia64_ptr_entry(u64 target_mask, int slot); + +extern struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX]; + +/* + region register macros +*/ +#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001) +#define RR_VE(val) (((val) & 0x0000000000000001) << 0) +#define RR_VE_MASK 0x0000000000000001L +#define RR_VE_SHIFT 0 +#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f) +#define RR_PS(val) (((val) & 0x000000000000003f) << 2) +#define RR_PS_MASK 0x00000000000000fcL +#define RR_PS_SHIFT 2 +#define RR_RID_MASK 0x00000000ffffff00L +#define RR_TO_RID(val) ((val >> 8) & 0xffffff) + /* Users of the generic TLB shootdown code must declare this storage space. */ DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); -- 1.5.2 |
From: Zhang, X. <xia...@in...> - 2008-04-01 09:49:03
|
Carsten Otte wrote: > Zhang, Xiantao wrote: >> Hi, Carsten >> Why do you think it is racy? In this function, >> target_vcpu->arch.launched should be set to 1 for the first run, and >> keep its value all the time. Except the first IPI to wake up the >> vcpu, all IPIs received by target vcpu should go into "else" >> condition. So you mean the race condition exist in "else" code ? > For example to lock against destroying that vcpu. Or, the waitqueue > may become active after if (waitqueue_active()) and before > wake_up_interruptible(). In that case, the target vcpu might sleep and > not get waken up by the ipi. I don't think it may cause issue, because the target vcpu at least can be waken up by the timer interrupt. But as you said, x86 side also have the same race issue ? Xiantao |
From: Zhang, X. <xia...@in...> - 2008-04-01 08:58:00
|
Jes Sorensen wrote: > Zhang, Xiantao wrote: >>> From 6b731c15afa8cec84f16408c421c286f1dd1b7d3 Mon Sep 17 00:00:00 >>> 2001 >> From: Xiantao Zhang <xia...@in...> >> Date: Wed, 12 Mar 2008 13:45:40 +0800 >> Subject: [PATCH] KVM:IA64 : Add TLB virtulization support. >> >> vtlb.c includes tlb/VHPT virtulization. >> Signed-off-by: Anthony Xu <ant...@in...> >> Signed-off-by: Xiantao Zhang <xia...@in...> > > Hi Xiantao, > > Just a clarification question on this one: > > >> +void machine_tlb_purge(u64 va, u64 ps) >> +{ >> + ia64_ptcl(va, ps << 2); >> +} > > What is the purpose of machine_tlb_purge()? Is it supposed to do a > global purge of the tlb on the host machine? If so, how does this > macro differ from platform_global_tlb_purge()? Hi, Jes Not for global purge, and just for purge local processsor's TLB entry covered by the parameters :-) Xiantao > I am mentioning this because it's very important to keep in mind that > the regular tlb purging instructions are not functional on all ia64 > platforms, which is why we have special implementations via the > machine vector interface. If global puge, we indeed need to consider the machine vectore. |
From: Jes S. <je...@sg...> - 2008-04-01 08:49:41
|
Zhang, Xiantao wrote: >>From 6b731c15afa8cec84f16408c421c286f1dd1b7d3 Mon Sep 17 00:00:00 2001 > From: Xiantao Zhang <xia...@in...> > Date: Wed, 12 Mar 2008 13:45:40 +0800 > Subject: [PATCH] KVM:IA64 : Add TLB virtulization support. > > vtlb.c includes tlb/VHPT virtulization. > Signed-off-by: Anthony Xu <ant...@in...> > Signed-off-by: Xiantao Zhang <xia...@in...> Hi Xiantao, Just a clarification question on this one: > +void machine_tlb_purge(u64 va, u64 ps) > +{ > + ia64_ptcl(va, ps << 2); > +} What is the purpose of machine_tlb_purge()? Is it supposed to do a global purge of the tlb on the host machine? If so, how does this macro differ from platform_global_tlb_purge()? I am mentioning this because it's very important to keep in mind that the regular tlb purging instructions are not functional on all ia64 platforms, which is why we have special implementations via the machine vector interface. Cheers, Jes |
From: Jes S. <je...@sg...> - 2008-04-01 08:35:01
|
Jeremy Fitzhardinge wrote: > Jes Sorensen wrote: > This change has been on the x86 side for ages, and not even Ingo made a > peep about it ;) Mmmm, last time I looked, x86 didn't scale to any interesting number of CPUs :-) >> Why not keep smp_call_function() the way it was before, rather than >> implementing it via the call to smp_call_function_mask()? > > Because Xen needs a different core implementation (because of its > different IPI implementation), and it would be better to just have to do > one of them rather than N. I wasn't suggesting we shouldn't have both interfaces, merely questioning why adding what to me seems like an unnecessary performance hit for the classic case of the call. Cheers, Jes |
From: Carsten O. <co...@de...> - 2008-04-01 07:55:03
|
Zhang, Xiantao wrote: > Hi, Carsten > Why do you think it is racy? In this function, > target_vcpu->arch.launched should be set to 1 for the first run, and > keep its value all the time. Except the first IPI to wake up the vcpu, > all IPIs received by target vcpu should go into "else" condition. So you > mean the race condition exist in "else" code ? For example to lock against destroying that vcpu. Or, the waitqueue may become active after if (waitqueue_active()) and before wake_up_interruptible(). In that case, the target vcpu might sleep and not get waken up by the ipi. |
From: Carsten O. <co...@de...> - 2008-04-01 07:48:37
|
Zhang, Xiantao wrote: > Carsten Otte wrote: >> Zhang, Xiantao wrote: >>> +/* mca_insert_tr >>> + * >>> + * Switch rid when TR reload and needed! >>> + * iord: 1: itr, 2: itr; >>> + * >>> +*/ >>> +static void mca_insert_tr(u64 iord) >>> +{ >>> + >>> + int i; >>> + u64 old_rr; >>> + struct ia64_tr_entry *p; >>> + unsigned long psr; >>> + int cpu = smp_processor_id(); >> What if CONFIG_PREEMPT is on, and we're being preempted and scheduled >> to a different CPU here? Are we running preempt disabled here? If so, >> the function header should state that this function needs to be called >> preempt_disabled. > > The function insert one TR to local TLB, and doesn't allow preempt > before and after the call, so the caller should be with preempt_disable > before calling into this routine. > Maybe the descripiton of this function should contain "Called with > preempt disabled!". Does it make sense ? Yea, I think a comment would help in that case :-). |
From: Zhang, X. <xia...@in...> - 2008-04-01 02:17:52
|
Carsten Otte wrote: > Zhang, Xiantao wrote: >> +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long >> id, + unsigned long eid) +{ >> + ia64_lid_t lid; >> + int i; >> + >> + for (i = 0; i < KVM_MAX_VCPUS; i++) { >> + if (kvm->vcpus[i]) { >> + lid.val = VCPU_LID(kvm->vcpus[i]); >> + if (lid.id == id && lid.eid == eid) >> + return kvm->vcpus[i]; >> + } >> + } >> + >> + return NULL; >> +} >> + >> +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run >> *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); >> + struct kvm_vcpu *target_vcpu; >> + struct kvm_pt_regs *regs; >> + ia64_ipi_a addr = p->u.ipi_data.addr; >> + ia64_ipi_d data = p->u.ipi_data.data; >> + >> + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if >> (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); >> + >> + if (!target_vcpu->arch.launched) { >> + regs = vcpu_regs(target_vcpu); >> + >> + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; >> + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + >> + target_vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; >> + if (waitqueue_active(&target_vcpu->wq)) >> + wake_up_interruptible(&target_vcpu->wq); >> + } else { >> + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if >> (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); >> + } >> + >> + return 1; >> +} > *Shrug*. This looks highly racy to me. You do access various values in > target_vcpu without any lock! I know that taking the target vcpu's > lock does'nt work because that one is held all the time during > KVM_VCPU_RUN. My solution to that was struct local_interrupt, which > has its own lock, and has the waitqueue plus everything I need to send > a sigp [that's our flavor of ipi]. ex Hi, Carsten Why do you think it is racy? In this function, target_vcpu->arch.launched should be set to 1 for the first run, and keep its value all the time. Except the first IPI to wake up the vcpu, all IPIs received by target vcpu should go into "else" condition. So you mean the race condition exist in "else" code ? Xiantao |
From: Zhang, X. <xia...@in...> - 2008-04-01 01:53:37
|
Carsten Otte wrote: > Zhang, Xiantao wrote: >> +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long >> id, + unsigned long eid) +{ >> + ia64_lid_t lid; >> + int i; >> + >> + for (i = 0; i < KVM_MAX_VCPUS; i++) { >> + if (kvm->vcpus[i]) { >> + lid.val = VCPU_LID(kvm->vcpus[i]); >> + if (lid.id == id && lid.eid == eid) >> + return kvm->vcpus[i]; >> + } >> + } >> + >> + return NULL; >> +} >> + >> +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run >> *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); >> + struct kvm_vcpu *target_vcpu; >> + struct kvm_pt_regs *regs; >> + ia64_ipi_a addr = p->u.ipi_data.addr; >> + ia64_ipi_d data = p->u.ipi_data.data; >> + >> + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if >> (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); >> + >> + if (!target_vcpu->arch.launched) { >> + regs = vcpu_regs(target_vcpu); >> + >> + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; >> + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + >> + target_vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; >> + if (waitqueue_active(&target_vcpu->wq)) >> + wake_up_interruptible(&target_vcpu->wq); >> + } else { >> + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if >> (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); >> + } >> + >> + return 1; >> +} > *Shrug*. This looks highly racy to me. You do access various values in > target_vcpu without any lock! I know that taking the target vcpu's > lock does'nt work because that one is held all the time during > KVM_VCPU_RUN. My solution to that was struct local_interrupt, which > has its own lock, and has the waitqueue plus everything I need to send > a sigp [that's our flavor of ipi]. > >> +int kvm_emulate_halt(struct kvm_vcpu *vcpu) >> +{ >> + >> + ktime_t kt; >> + long itc_diff; >> + unsigned long vcpu_now_itc; >> + >> + unsigned long expires; >> + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; > That makes me jealous, I'd love to have hrtimer on s390 for this. I've > got to round up to the next jiffie. *Sigh* > >> +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct >> kvm_sregs *sregs) +{ >> + printk(KERN_WARNING"kvm:kvm_arch_vcpu_ioctl_set_sregs >> called!!\n"); >> + return 0; >> +} >> + >> +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct >> kvm_sregs *sregs) +{ >> + printk(KERN_WARNING"kvm:kvm_arch_vcpu_ioctl_get_sregs >> called!!\n"); >> + return 0; >> + >> +} > Suggestion: if get/set sregs does'nt seem useful on ia64, why not > return -EINVAL? In that case, you could also not print a kern warning, > the user will either handle that situation or complain. > >> +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) >> +{ > <snip> >> + /*FIXME:Need to removed it later!!\n*/ >> + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); >> + vcpu->arch.apic->vcpu = vcpu; > Fixme! Removed! >> +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) +{ >> + unsigned long psr; >> + int r; >> + >> + local_irq_save(psr); >> + r = kvm_insert_vmm_mapping(vcpu); >> + if (r) >> + goto fail; >> + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); >> + if (r) >> + goto fail; > Maybe change to return r, rather then goto fail? It should be same. >> +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct >> kvm_fpu *fpu) +{ >> + printk(KERN_WARNING"kvm:IA64 doesn't need to export" + "fpu to >> userspace!\n"); + return 0; >> +} >> + >> +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct >> kvm_fpu *fpu) +{ >> + printk(KERN_WARNING"kvm:IA64 doesn't need to export" >> + "fpu to userspace !\n"); >> + return 0; >> +} > maybe -EINVAL? Good suggestion! >> +static int find_highest_bits(int *dat) >> +{ >> + u32 bits, bitnum; >> + int i; >> + >> + /* loop for all 256 bits */ >> + for (i = 7; i >= 0 ; i--) { >> + bits = dat[i]; >> + if (bits) { >> + bitnum = fls(bits); >> + return i * 32 + bitnum - 1; >> + } >> + } >> + >> + return -1; >> +} > Should be in asm/bitops.h. Look at find_first_bit() and friends, this > is duplicate. Seems find_first_bit only can be used to find lowest bit? Xiantao |
From: Zhang, X. <xia...@in...> - 2008-04-01 01:37:38
|
Carsten Otte wrote: > Zhang, Xiantao wrote: >> +typedef union context { >> + /* 8K size */ >> + char dummy[KVM_CONTEXT_SIZE]; >> + struct { >> + unsigned long psr; >> + unsigned long pr; >> + unsigned long caller_unat; >> + unsigned long pad; >> + unsigned long gr[32]; >> + unsigned long ar[128]; >> + unsigned long br[8]; >> + unsigned long cr[128]; >> + unsigned long rr[8]; >> + unsigned long ibr[8]; >> + unsigned long dbr[8]; >> + unsigned long pkr[8]; >> + struct ia64_fpreg fr[128]; >> + }; >> +} context_t; > This looks ugly to me. I'd rather prefer to have a straight struct > with elements psr...fr[], and cast the pointer to char* when needed. > KVM_CONTEXT_SIZE can be used as parameter to kzalloc() on allocation, > it's too large to be on stack anyway. We need to allocate enough memory fix area, considering back-ward compabitility. In migration or save/restore case, we need to save this area. If migration happens in different kvm versions, and the size of different, it may cause issues. For example, we added a new field in new kvm, and restore a new snapshot to old versions, it may fail. >> +typedef struct thash_data { >> + union { >> + struct { >> + unsigned long p : 1; /* 0 */ >> + unsigned long rv1 : 1; /* 1 */ >> + unsigned long ma : 3; /* 2-4 */ >> + unsigned long a : 1; /* 5 */ >> + unsigned long d : 1; /* 6 */ >> + unsigned long pl : 2; /* 7-8 */ >> + unsigned long ar : 3; /* 9-11 */ >> + unsigned long ppn : 38; /* 12-49 */ >> + unsigned long rv2 : 2; /* 50-51 */ >> + unsigned long ed : 1; /* 52 */ >> + unsigned long ig1 : 11; /* 53-63 */ >> + }; >> + struct { >> + unsigned long __rv1 : 53; /* 0-52 */ >> + unsigned long contiguous : 1; /*53 */ >> + unsigned long tc : 1; /* 54 TR or TC */ + unsigned >> long cl : 1; + /* 55 I side or D side cache line */ >> + unsigned long len : 4; /* 56-59 */ >> + unsigned long io : 1; /* 60 entry is for io or >> not */ >> + unsigned long nomap : 1; >> + /* 61 entry cann't be inserted into machine >> TLB.*/ >> + unsigned long checked : 1; >> + /* 62 for VTLB/VHPT sanity check */ >> + unsigned long invalid : 1; >> + /* 63 invalid entry */ >> + }; >> + unsigned long page_flags; >> + }; /* same for VHPT and TLB */ >> + >> + union { >> + struct { >> + unsigned long rv3 : 2; >> + unsigned long ps : 6; >> + unsigned long key : 24; >> + unsigned long rv4 : 32; >> + }; >> + unsigned long itir; >> + }; >> + union { >> + struct { >> + unsigned long ig2 : 12; >> + unsigned long vpn : 49; >> + unsigned long vrn : 3; >> + }; >> + unsigned long ifa; >> + unsigned long vadr; >> + struct { >> + unsigned long tag : 63; >> + unsigned long ti : 1; >> + }; >> + unsigned long etag; >> + }; >> + union { >> + struct thash_data *next; >> + unsigned long rid; >> + unsigned long gpaddr; >> + }; >> +} thash_data_t; > A matter of taste, but I'd prefer unsigned long mask, and > #define MASK_BIT_FOR_PURPUSE over bitfields. This structure could be > much smaller that way. Yes, but it may be not so flexible to use. >> +struct kvm_regs { >> + char *saved_guest; >> + char *saved_stack; >> + struct saved_vpd vpd; >> + /*Arch-regs*/ >> + int mp_state; >> + unsigned long vmm_rr; >> + /* TR and TC. */ >> + struct thash_data itrs[NITRS]; >> + struct thash_data dtrs[NDTRS]; >> + /* Bit is set if there is a tr/tc for the region. */ + unsigned >> char itr_regions; + unsigned char dtr_regions; >> + unsigned char tc_regions; >> + >> + char irq_check; >> + unsigned long saved_itc; >> + unsigned long itc_check; >> + unsigned long timer_check; >> + unsigned long timer_pending; >> + unsigned long last_itc; >> + >> + unsigned long vrr[8]; >> + unsigned long ibr[8]; >> + unsigned long dbr[8]; >> + unsigned long insvc[4]; /* Interrupt in service. */ + unsigned >> long xtp; + >> + unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */ >> + unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */ >> + unsigned long metaphysical_saved_rr0; /* from kvm_arch */ >> + unsigned long metaphysical_saved_rr4; /* from kvm_arch */ >> + unsigned long fp_psr; /*used for lazy float register */ >> + unsigned long saved_gp; + /*for phycial emulation */ >> +}; > This looks like it does'nt just have guest register content in it. It > seems to me preferable to have another ioctl different from > KVM_GET_REGS/KVM_SET_REGS to read and set the rest of the content and > seperate it from struct kvm_regs. We want to add a ioctl for that later. |
From: Zhang, X. <xia...@in...> - 2008-04-01 01:31:49
|
Jes Sorensen wrote: > Zhang, Xiantao wrote: >>> From 62895ff991d48398a77afdbf7f2bef127e802230 Mon Sep 17 00:00:00 >>> 2001 >> From: Xiantao Zhang <xia...@in...> >> Date: Fri, 28 Mar 2008 09:49:57 +0800 >> Subject: [PATCH] KVM: IA64: Add kvm arch-specific core code for >> kvm/ia64. >> >> kvm_ia64.c is created to handle kvm ia64-specific core logic. >> Signed-off-by: Xiantao Zhang <xia...@in...> > > More comments, a couple of bugs in this one. > >> +#include <linux/module.h> >> +#include <linux/vmalloc.h> > > Don't think you need vmalloc.h here. Originally, we called vmalloc, but removed later. Maybe we can remove it now. >> +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct >> kvm_regs *regs) +{ > [snip] >> + copy_from_user(&vcpu->arch.guest, regs->saved_guest, >> + sizeof(union context)); >> + copy_from_user(vcpu + 1, regs->saved_stack + sizeof(struct >> kvm_vcpu), >> + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); > > You need to check the return values from copy_from_user() here and > deal with possible failure. > >> + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); >> + vcpu->arch.apic->vcpu = vcpu; > > Whoops! Missing NULL pointer check here after the kzalloc. Good catch. Fixed! >> + copy_to_user(regs->saved_guest, &vcpu->arch.guest, >> + sizeof(union context)); + copy_to_user(regs->saved_stack, >> (void *)vcpu, IA64_STK_OFFSET); > > Same problem as above - check the return values. |
From: Zhang, X. <xia...@in...> - 2008-04-01 01:15:26
|
Jes Sorensen wrote: > Hi Xiantao, Hi, Jes I fixed the coding style issues. Thanks! > More comments. > > Zhang, Xiantao wrote: >>> From 696b9eea9f5001a7b7a07c0e58514aa10306b91a Mon Sep 17 00:00:00 >>> 2001 >> From: Xiantao Zhang <xia...@in...> >> Date: Fri, 28 Mar 2008 09:51:36 +0800 >> Subject: [PATCH] KVM:IA64 : Add head files for kvm/ia64 >> >> ia64_regs: some defintions for special registers >> which aren't defined in asm-ia64/ia64regs. > > Please put missing definitions of registers into asm-ia64/ia64regs.h > if they are official definitions from the spec. Moved! >> kvm_minstate.h : Marcos about Min save routines. >> lapic.h: apic structure definition. >> vcpu.h : routions related to vcpu virtualization. >> vti.h : Some macros or routines for VT support on Itanium. >> Signed-off-by: Xiantao Zhang <xia...@in...> > >> +/* >> + * Flushrs instruction stream. >> + */ >> +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") + >> +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") > > Please put these into include/asm-ia64/gcc_intrin.h OK. >> +#define ia64_get_rsc() >> \ >> +({ >> \ >> + unsigned long val; >> \ >> + asm volatile ("mov %0=ar.rsc;;" : "=r"(val) :: "memory"); \ >> + val; >> \ >> +}) >> + >> +#define ia64_set_rsc(val) \ >> + asm volatile ("mov ar.rsc=%0;;" :: "r"(val) : "memory") > > Please update the ia64_get/set_reg macros to handle the RSC register > and use those macros. Moved. >> +#define ia64_get_bspstore() >> \ >> +({ >> \ >> + unsigned long val; >> \ >> + asm volatile ("mov %0=ar.bspstore;;" : "=r"(val) :: "memory"); \ >> + val; >> \ >> +}) > > Ditto for for AR.BSPSTORE > >> +#define ia64_get_rnat() >> \ >> +({ >> \ >> + unsigned long val; >> \ >> + asm volatile ("mov %0=ar.rnat;" : "=r"(val) :: "memory"); \ >> + val; >> \ >> +}) > > Ditto for AR.RNAT > >> +static inline unsigned long ia64_get_itc(void) >> +{ >> + unsigned long result; >> + result = ia64_getreg(_IA64_REG_AR_ITC); >> + return result; >> +} > > This exists in include/asm-ia64/delay.h > >> +static inline void ia64_set_dcr(unsigned long dcr) +{ >> + ia64_setreg(_IA64_REG_CR_DCR, dcr); >> +} > > Please just call ia64_setreg() in your code rather than defining a > wrapper for it. Sure. >> +#define ia64_ttag(addr) >> \ >> +({ >> \ >> + __u64 ia64_intri_res; >> \ >> + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ >> + ia64_intri_res; >> \ >> +}) > > Please add to include/asm-ia64/gcc_intrin.h instead. > >> diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h >> new file mode 100644 >> index 0000000..152cbdc >> --- /dev/null >> +++ b/arch/ia64/kvm/lapic.h >> @@ -0,0 +1,27 @@ >> +#ifndef __KVM_IA64_LAPIC_H >> +#define __KVM_IA64_LAPIC_H >> + >> +#include "iodev.h" > > I don't understand why iodev.h is included here? It is inherited from x86 side, and forget to remove it. Seems redundant. >> --- /dev/null >> +++ b/arch/ia64/kvm/vcpu.h > > The formatting of this file is dodgy, please try and make it comply > with the Linux standards in Documentation/CodingStyle > >> +#define _vmm_raw_spin_lock(x) >> \ > [snip] >> + >> +#define _vmm_raw_spin_unlock(x) \ > > Could you explain the reasoning behind these two macros? Whenever I > see open coded spin lock modifications like these, I have to admit I > get a bit worried. In the architecture of kvm/ia64, gvmm and host are in the two different worlds, and gvmm can't call host's interface. In migration case, we need to take a lock to sync the status of dirty memory. In order to make it work, this spin_lock is defined and used. >> +typedef struct kvm_vcpu VCPU; >> +typedef struct kvm_pt_regs REGS; >> +typedef enum { DATA_REF, NA_REF, INST_REF, RSE_REF } vhpt_ref_t; >> +typedef enum { INSTRUCTION, DATA, REGISTER } miss_type; > > ARGH! Please see previous mail about typedefs! I suspect this is code > inherited from Xen ? Xen has a lot of really nasty and pointless > typedefs like these :-( Removed. >> +static inline void vcpu_set_dbr(VCPU *vcpu, u64 reg, u64 val) +{ >> + /* TODO: need to virtualize */ >> + __ia64_set_dbr(reg, val); >> +} >> + >> +static inline void vcpu_set_ibr(VCPU *vcpu, u64 reg, u64 val) +{ >> + /* TODO: need to virtualize */ >> + ia64_set_ibr(reg, val); >> +} >> + >> +static inline u64 vcpu_get_dbr(VCPU *vcpu, u64 reg) +{ >> + /* TODO: need to virtualize */ >> + return ((u64)__ia64_get_dbr(reg)); >> +} >> + >> +static inline u64 vcpu_get_ibr(VCPU *vcpu, u64 reg) +{ >> + /* TODO: need to virtualize */ >> + return ((u64)ia64_get_ibr(reg)); >> +} > > More wrapper macros that really should just use ia64_get/set_reg() > directly in the code. Removed, and used the one without wrapper. > >> diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h >> new file mode 100644 >> index 0000000..591ab22 > [ship] >> +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil >> -*- */ > > Evil formatting again! > > Cheers, > Jes |
From: Zhang, X. <xia...@in...> - 2008-04-01 01:01:05
|
Carsten Otte wrote: > Zhang, Xiantao wrote: >> +/* mca_insert_tr >> + * >> + * Switch rid when TR reload and needed! >> + * iord: 1: itr, 2: itr; >> + * >> +*/ >> +static void mca_insert_tr(u64 iord) >> +{ >> + >> + int i; >> + u64 old_rr; >> + struct ia64_tr_entry *p; >> + unsigned long psr; >> + int cpu = smp_processor_id(); > What if CONFIG_PREEMPT is on, and we're being preempted and scheduled > to a different CPU here? Are we running preempt disabled here? If so, > the function header should state that this function needs to be called > preempt_disabled. The function insert one TR to local TLB, and doesn't allow preempt before and after the call, so the caller should be with preempt_disable before calling into this routine. Maybe the descripiton of this function should contain "Called with preempt disabled!". Does it make sense ? Xiantao >> +/* >> + * ia64_insert_tr in virtual mode. Allocate a TR slot + * >> + * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr >> + * >> + * va : virtual address. >> + * pte : pte entries inserted. >> + * log_size: range to be covered. >> + * >> + * Return value: <0 : error No. >> + * >> + * >=0 : slot number allocated for TR. >> + */ >> +int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) >> +{ + int i, r; >> + unsigned long psr; >> + struct ia64_tr_entry *p; >> + int cpu = smp_processor_id(); > Same here. > >> +/* >> + * ia64_purge_tr >> + * >> + * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. + * >> + * slot: slot number to be freed. >> + */ >> +void ia64_ptr_entry(u64 target_mask, int slot) >> +{ >> + int cpu = smp_processor_id(); >> + int i; >> + struct ia64_tr_entry *p; > Here again. |
From: Yang, F. <fre...@in...> - 2008-04-01 00:34:54
|
FYI, Here is some assessment on on shipping open GFW from http://xenbits.xensource.com/ext/efi-vfirmware.hg for HVM guests on IA64 environment. -Fred Daniel P. Berrange wrote: > On Fri, Mar 28, 2008 at 10:26:37AM -0700, Yang, Fred wrote: >> Dan, >> >> This mail is to follow up with >> "https://bugzilla.redhat.com/show_bug.cgi?id=420421 FEAT: IA64 >> RHEL5.2 Xen to use Open Guest Firmware" to make it for RHEL5.3. >> In BZ, we have updated following information in addressing your 3 >> concerns, > >> 1. The build process requires tools which are not part of RHEL-5 eg >> the XML Beans Java libraries. This means that it is not currently >> possible to produce an RPM of the firmware source which will build >> [Status] open GFW was originally derived from >> https://www.tianocore.org/. The build infrastructure is also derived >> from Tiano core, which is a very unique in its own way. Can Red Hat >> release binary GFW (similar to the current RHEL5.2 in releasing Intel >> proprietary GFW) associated with source code described in item#2 if >> no short term tool change is available? > > When we ship open source software we need to be able to guarentee that > the binary and source code we ship are matching. ie, if someone gets > the > source code, they will be able to build it and get the same result as > the binary we built. This is neccessary for us to comply with the > terms > of licenses such as the GPL. It is also neccessary for our support and > maintainence procedures - if we patch something to fix a bug we need > to > sure that we are building new updated RPM the same way. > > The way we guarentee this is via our RPM build system. Every open > source > package has a source RPM. This is fed into the build system to produce > the binary RPM. As such, we need to be able to build the binary RPM > using > the tools available in RHEL. We cannot simply ship a pre-built > binary > and a collection of source code. It has to go via the RPM build > system. > >> 2. The is no upstream official release. The build instructions are >> just telling us to take a HG snapshot of the Xen patches, and a SVN >> snapshot of the EDK sources. There really needs to be a properly >> versioned, formal release of the firmware - preferably as a >> self-contained tar.gz of all the source code [Status] The open GFW >> site http://xenbits.xensource.com/ext/efi-vfirmware.hg is now also >> building binary as part of it release now. Please see Changeset99 >> "Binary for CS 92" > > This is not exactly what I meant. In fact, including and distributing > the pre-built binary in the efi-vfirmware.hg would be a violation of > the GPL > because you are not including any of the source from the tiancore.org > Subversion repository that is used to build it. > > What we require is a single tar.gz file containing *all* the source > code neccessary to build the firmware image - this will be a > combination > of the source from efi-vfirmeware.hg, and the neccessary bits from the > tianocore Subversion repository in one tar.gz file. This must *not* > include any pre-built binary images - it must be all source code. > >> 3. The is no clear statement of the licensing of the open source >> code. I've picked a random selection of source files and found a >> couple of different license headers - some BSD, some public domain, >> and some referring to external license files which don't exist. The >> source will need auditing to make sure its all consistent in terms >> of licensing. [Status] The code checked into >> http://xenbits.xensource.com/ext/efi-vfirmware.hg should all have >> <signed-off> by community developers. This would need Red Hat >> address/sanitize from legal/license aspect. > > The 'signed-off-by' lines indicate that the developer had the right > to submit > the code. They do not, however, specify the license for files. Most > of the > source code files contain a comment at the top of the file describing > what license they are under. A number of source code files do not > have any > comment describing the license. These need to be updated to have > explicit > license information. Second, the complete text of all the license > should > be included in top level directory of the source code. Many of the > files > simply say > > "All rights reserved. This program and the accompanying materials > are licensed and made available under the terms and conditions of the > BSD License which accompanies this distribution. The full text of > the license may be found at > http://opensource.org/licenses/bsd-license.php " > > It is not sufficient to point to a website URL since this URL / site > can disappear/change at any time. The actual text of the license > should be > included in the .tar.gz file along with all the source code. There > seems > to be a mixture of GPL, BSD and Apache licensed files, so you'll > probably > need to include multiple license files in the tar.gz. > > This should all the pretty straightforward, since most of the files > are > correct - only a small number are missing comments about their > license. > > Regards, > Daniel. >>> Red Hat, Engineering, Boston -o- >>> http://people.redhat.com/berrange/ :| http://libvirt.org -o- >>> http://virt-manager.org -o- http://ovirt.org :| >>> http://autobuild.org -o- >>> http://search.cpan.org/~danberr/ :| GnuPG: 7D3B9505 -o- F3C9 553F >>> A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| |
From: Carsten O. <co...@de...> - 2008-03-31 15:37:40
|
> +/********************************************************************** > **** > + VCPU control register access routines > + > ************************************************************************ > **/ > +static inline u64 vcpu_get_itir(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, itir)); > +} > + > +static inline void vcpu_set_itir(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, itir) = val; > +} > + > +static inline u64 vcpu_get_ifa(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, ifa)); > +} > + > +static inline void vcpu_set_ifa(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, ifa) = val; > +} > + > +static inline u64 vcpu_get_iva(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, iva)); > +} > + > +static inline u64 vcpu_get_pta(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, pta)); > +} > + > +static inline u64 vcpu_get_lid(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, lid)); > +} > + > +static inline u64 vcpu_get_tpr(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, tpr)); > +} > + > +static inline u64 vcpu_get_eoi(VCPU *vcpu) > +{ > + return (0UL); /*reads of eoi always return 0 */ > +} > + > +static inline u64 vcpu_get_irr0(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, irr[0])); > +} > + > +static inline u64 vcpu_get_irr1(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, irr[1])); > +} > + > +static inline u64 vcpu_get_irr2(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, irr[2])); > +} > + > +static inline u64 vcpu_get_irr3(VCPU *vcpu) > +{ > + return ((u64)VCPU(vcpu, irr[3])); > +} > + > +static inline void vcpu_set_dcr(VCPU *vcpu, u64 val) > +{ > + ia64_set_dcr(val); > +} > + > +static inline void vcpu_set_isr(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, isr) = val; > +} > + > +static inline void vcpu_set_lid(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, lid) = val; > +} > + > +static inline void vcpu_set_ipsr(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, ipsr) = val; > +} > + > +static inline void vcpu_set_iip(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, iip) = val; > +} > + > +static inline void vcpu_set_ifs(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, ifs) = val; > +} > + > +static inline void vcpu_set_iipa(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, iipa) = val; > +} > + > +static inline void vcpu_set_iha(VCPU *vcpu, u64 val) > +{ > + VCPU(vcpu, iha) = val; > +} > + > + > +static inline u64 vcpu_get_rr(VCPU *vcpu, u64 reg) > +{ > + return vcpu->arch.vrr[reg>>61]; > +} Looks to me like most of them can be replaced by a few macros using macro_##. > +static inline int highest_bits(int *dat) > +{ > + u32 bits, bitnum; > + int i; > + > + /* loop for all 256 bits */ > + for (i = 7; i >= 0 ; i --) { > + bits = dat[i]; > + if (bits) { > + bitnum = fls(bits); > + return i * 32 + bitnum - 1; > + } > + } > + return NULL_VECTOR; > +} duplicate to asm/bitops.h find_first_bit(). |
From: Jeremy F. <je...@go...> - 2008-03-31 15:03:14
|
Jes Sorensen wrote: > I'm a little wary of the performance impact of this change. Doing a > cpumask compare on all smp_call_function calls seems a little expensive. > Maybe it's just noise in the big picture compared to the actual cost of > the IPIs, but I thought I'd bring it up. > > Keep in mind that a cpumask can be fairly big these days, max NR_CPUS > is currently 4096. For those booting a kernel with NR_CPUS at 4096 on > a dual CPU machine, it would be a bit expensive. > Unless your hardware has remarkably fast IPIs, I think really the cost of scanning 512 bytes is going to be in the noise... This change has been on the x86 side for ages, and not even Ingo made a peep about it ;) > Why not keep smp_call_function() the way it was before, rather than > implementing it via the call to smp_call_function_mask()? > Because Xen needs a different core implementation (because of its different IPI implementation), and it would be better to just have to do one of them rather than N. J |
From: Carsten O. <co...@de...> - 2008-03-31 14:58:17
|
Zhang, Xiantao wrote: > +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, > + unsigned long eid) > +{ > + ia64_lid_t lid; > + int i; > + > + for (i = 0; i < KVM_MAX_VCPUS; i++) { > + if (kvm->vcpus[i]) { > + lid.val = VCPU_LID(kvm->vcpus[i]); > + if (lid.id == id && lid.eid == eid) > + return kvm->vcpus[i]; > + } > + } > + > + return NULL; > +} > + > +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > +{ > + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); > + struct kvm_vcpu *target_vcpu; > + struct kvm_pt_regs *regs; > + ia64_ipi_a addr = p->u.ipi_data.addr; > + ia64_ipi_d data = p->u.ipi_data.data; > + > + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); > + if (!target_vcpu) > + return handle_vm_error(vcpu, kvm_run); > + > + if (!target_vcpu->arch.launched) { > + regs = vcpu_regs(target_vcpu); > + > + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; > + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; > + > + target_vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE; > + if (waitqueue_active(&target_vcpu->wq)) > + wake_up_interruptible(&target_vcpu->wq); > + } else { > + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); > + if (target_vcpu != vcpu) > + kvm_vcpu_kick(target_vcpu); > + } > + > + return 1; > +} *Shrug*. This looks highly racy to me. You do access various values in target_vcpu without any lock! I know that taking the target vcpu's lock does'nt work because that one is held all the time during KVM_VCPU_RUN. My solution to that was struct local_interrupt, which has its own lock, and has the waitqueue plus everything I need to send a sigp [that's our flavor of ipi]. > +int kvm_emulate_halt(struct kvm_vcpu *vcpu) > +{ > + > + ktime_t kt; > + long itc_diff; > + unsigned long vcpu_now_itc; > + > + unsigned long expires; > + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; That makes me jealous, I'd love to have hrtimer on s390 for this. I've got to round up to the next jiffie. *Sigh* > +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, > + struct kvm_sregs *sregs) > +{ > + printk(KERN_WARNING"kvm:kvm_arch_vcpu_ioctl_set_sregs > called!!\n"); > + return 0; > +} > + > +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, > + struct kvm_sregs *sregs) > +{ > + printk(KERN_WARNING"kvm:kvm_arch_vcpu_ioctl_get_sregs > called!!\n"); > + return 0; > + > +} Suggestion: if get/set sregs does'nt seem useful on ia64, why not return -EINVAL? In that case, you could also not print a kern warning, the user will either handle that situation or complain. > +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > +{ <snip> > + /*FIXME:Need to removed it later!!\n*/ > + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); > + vcpu->arch.apic->vcpu = vcpu; Fixme! > +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) > +{ > + unsigned long psr; > + int r; > + > + local_irq_save(psr); > + r = kvm_insert_vmm_mapping(vcpu); > + if (r) > + goto fail; > + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); > + if (r) > + goto fail; Maybe change to return r, rather then goto fail? > +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu > *fpu) > +{ > + printk(KERN_WARNING"kvm:IA64 doesn't need to export" > + "fpu to userspace!\n"); > + return 0; > +} > + > +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu > *fpu) > +{ > + printk(KERN_WARNING"kvm:IA64 doesn't need to export" > + "fpu to userspace !\n"); > + return 0; > +} maybe -EINVAL? > +static int find_highest_bits(int *dat) > +{ > + u32 bits, bitnum; > + int i; > + > + /* loop for all 256 bits */ > + for (i = 7; i >= 0 ; i--) { > + bits = dat[i]; > + if (bits) { > + bitnum = fls(bits); > + return i * 32 + bitnum - 1; > + } > + } > + > + return -1; > +} Should be in asm/bitops.h. Look at find_first_bit() and friends, this is duplicate. |
From: Carsten O. <co...@de...> - 2008-03-31 13:46:41
|
Zhang, Xiantao wrote: > +typedef union context { > + /* 8K size */ > + char dummy[KVM_CONTEXT_SIZE]; > + struct { > + unsigned long psr; > + unsigned long pr; > + unsigned long caller_unat; > + unsigned long pad; > + unsigned long gr[32]; > + unsigned long ar[128]; > + unsigned long br[8]; > + unsigned long cr[128]; > + unsigned long rr[8]; > + unsigned long ibr[8]; > + unsigned long dbr[8]; > + unsigned long pkr[8]; > + struct ia64_fpreg fr[128]; > + }; > +} context_t; This looks ugly to me. I'd rather prefer to have a straight struct with elements psr...fr[], and cast the pointer to char* when needed. KVM_CONTEXT_SIZE can be used as parameter to kzalloc() on allocation, it's too large to be on stack anyway. > +typedef struct thash_data { > + union { > + struct { > + unsigned long p : 1; /* 0 */ > + unsigned long rv1 : 1; /* 1 */ > + unsigned long ma : 3; /* 2-4 */ > + unsigned long a : 1; /* 5 */ > + unsigned long d : 1; /* 6 */ > + unsigned long pl : 2; /* 7-8 */ > + unsigned long ar : 3; /* 9-11 */ > + unsigned long ppn : 38; /* 12-49 */ > + unsigned long rv2 : 2; /* 50-51 */ > + unsigned long ed : 1; /* 52 */ > + unsigned long ig1 : 11; /* 53-63 */ > + }; > + struct { > + unsigned long __rv1 : 53; /* 0-52 */ > + unsigned long contiguous : 1; /*53 */ > + unsigned long tc : 1; /* 54 TR or TC */ > + unsigned long cl : 1; > + /* 55 I side or D side cache line */ > + unsigned long len : 4; /* 56-59 */ > + unsigned long io : 1; /* 60 entry is for io or > not */ > + unsigned long nomap : 1; > + /* 61 entry cann't be inserted into machine > TLB.*/ > + unsigned long checked : 1; > + /* 62 for VTLB/VHPT sanity check */ > + unsigned long invalid : 1; > + /* 63 invalid entry */ > + }; > + unsigned long page_flags; > + }; /* same for VHPT and TLB */ > + > + union { > + struct { > + unsigned long rv3 : 2; > + unsigned long ps : 6; > + unsigned long key : 24; > + unsigned long rv4 : 32; > + }; > + unsigned long itir; > + }; > + union { > + struct { > + unsigned long ig2 : 12; > + unsigned long vpn : 49; > + unsigned long vrn : 3; > + }; > + unsigned long ifa; > + unsigned long vadr; > + struct { > + unsigned long tag : 63; > + unsigned long ti : 1; > + }; > + unsigned long etag; > + }; > + union { > + struct thash_data *next; > + unsigned long rid; > + unsigned long gpaddr; > + }; > +} thash_data_t; A matter of taste, but I'd prefer unsigned long mask, and #define MASK_BIT_FOR_PURPUSE over bitfields. This structure could be much smaller that way. > +struct kvm_regs { > + char *saved_guest; > + char *saved_stack; > + struct saved_vpd vpd; > + /*Arch-regs*/ > + int mp_state; > + unsigned long vmm_rr; > + /* TR and TC. */ > + struct thash_data itrs[NITRS]; > + struct thash_data dtrs[NDTRS]; > + /* Bit is set if there is a tr/tc for the region. */ > + unsigned char itr_regions; > + unsigned char dtr_regions; > + unsigned char tc_regions; > + > + char irq_check; > + unsigned long saved_itc; > + unsigned long itc_check; > + unsigned long timer_check; > + unsigned long timer_pending; > + unsigned long last_itc; > + > + unsigned long vrr[8]; > + unsigned long ibr[8]; > + unsigned long dbr[8]; > + unsigned long insvc[4]; /* Interrupt in service. */ > + unsigned long xtp; > + > + unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) > */ > + unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) > */ > + unsigned long metaphysical_saved_rr0; /* from kvm_arch > */ > + unsigned long metaphysical_saved_rr4; /* from kvm_arch > */ > + unsigned long fp_psr; /*used for lazy float register */ > + unsigned long saved_gp; > + /*for phycial emulation */ > +}; This looks like it does'nt just have guest register content in it. It seems to me preferable to have another ioctl different from KVM_GET_REGS/KVM_SET_REGS to read and set the rest of the content and seperate it from struct kvm_regs. |
From: Carsten O. <co...@de...> - 2008-03-31 13:44:44
|
Zhang, Xiantao wrote: > +/* mca_insert_tr > + * > + * Switch rid when TR reload and needed! > + * iord: 1: itr, 2: itr; > + * > +*/ > +static void mca_insert_tr(u64 iord) > +{ > + > + int i; > + u64 old_rr; > + struct ia64_tr_entry *p; > + unsigned long psr; > + int cpu = smp_processor_id(); What if CONFIG_PREEMPT is on, and we're being preempted and scheduled to a different CPU here? Are we running preempt disabled here? If so, the function header should state that this function needs to be called preempt_disabled. > +/* > + * ia64_insert_tr in virtual mode. Allocate a TR slot > + * > + * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr > + * > + * va : virtual address. > + * pte : pte entries inserted. > + * log_size: range to be covered. > + * > + * Return value: <0 : error No. > + * > + * >=0 : slot number allocated for TR. > + */ > +int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) > +{ > + int i, r; > + unsigned long psr; > + struct ia64_tr_entry *p; > + int cpu = smp_processor_id(); Same here. > +/* > + * ia64_purge_tr > + * > + * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. > + * > + * slot: slot number to be freed. > + */ > +void ia64_ptr_entry(u64 target_mask, int slot) > +{ > + int cpu = smp_processor_id(); > + int i; > + struct ia64_tr_entry *p; Here again. |
From: Jes S. <je...@sg...> - 2008-03-31 12:33:46
|
Hi Xiantao, More comments. Zhang, Xiantao wrote: >>From 696b9eea9f5001a7b7a07c0e58514aa10306b91a Mon Sep 17 00:00:00 2001 > From: Xiantao Zhang <xia...@in...> > Date: Fri, 28 Mar 2008 09:51:36 +0800 > Subject: [PATCH] KVM:IA64 : Add head files for kvm/ia64 > > ia64_regs: some defintions for special registers > which aren't defined in asm-ia64/ia64regs. Please put missing definitions of registers into asm-ia64/ia64regs.h if they are official definitions from the spec. > kvm_minstate.h : Marcos about Min save routines. > lapic.h: apic structure definition. > vcpu.h : routions related to vcpu virtualization. > vti.h : Some macros or routines for VT support on Itanium. > Signed-off-by: Xiantao Zhang <xia...@in...> > +/* > + * Flushrs instruction stream. > + */ > +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") > + > +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") Please put these into include/asm-ia64/gcc_intrin.h > +#define ia64_get_rsc() > \ > +({ > \ > + unsigned long val; > \ > + asm volatile ("mov %0=ar.rsc;;" : "=r"(val) :: "memory"); > \ > + val; > \ > +}) > + > +#define ia64_set_rsc(val) \ > + asm volatile ("mov ar.rsc=%0;;" :: "r"(val) : "memory") Please update the ia64_get/set_reg macros to handle the RSC register and use those macros. > +#define ia64_get_bspstore() > \ > +({ > \ > + unsigned long val; > \ > + asm volatile ("mov %0=ar.bspstore;;" : "=r"(val) :: "memory"); > \ > + val; > \ > +}) Ditto for for AR.BSPSTORE > +#define ia64_get_rnat() > \ > +({ > \ > + unsigned long val; > \ > + asm volatile ("mov %0=ar.rnat;" : "=r"(val) :: "memory"); > \ > + val; > \ > +}) Ditto for AR.RNAT > +static inline unsigned long ia64_get_itc(void) > +{ > + unsigned long result; > + result = ia64_getreg(_IA64_REG_AR_ITC); > + return result; > +} This exists in include/asm-ia64/delay.h > +static inline void ia64_set_dcr(unsigned long dcr) > +{ > + ia64_setreg(_IA64_REG_CR_DCR, dcr); > +} Please just call ia64_setreg() in your code rather than defining a wrapper for it. > +#define ia64_ttag(addr) > \ > +({ > \ > + __u64 ia64_intri_res; > \ > + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); > \ > + ia64_intri_res; > \ > +}) Please add to include/asm-ia64/gcc_intrin.h instead. > diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h > new file mode 100644 > index 0000000..152cbdc > --- /dev/null > +++ b/arch/ia64/kvm/lapic.h > @@ -0,0 +1,27 @@ > +#ifndef __KVM_IA64_LAPIC_H > +#define __KVM_IA64_LAPIC_H > + > +#include "iodev.h" I don't understand why iodev.h is included here? > --- /dev/null > +++ b/arch/ia64/kvm/vcpu.h The formatting of this file is dodgy, please try and make it comply with the Linux standards in Documentation/CodingStyle > +#define _vmm_raw_spin_lock(x) > \ [snip] > + > +#define _vmm_raw_spin_unlock(x) \ Could you explain the reasoning behind these two macros? Whenever I see open coded spin lock modifications like these, I have to admit I get a bit worried. > +typedef struct kvm_vcpu VCPU; > +typedef struct kvm_pt_regs REGS; > +typedef enum { DATA_REF, NA_REF, INST_REF, RSE_REF } vhpt_ref_t; > +typedef enum { INSTRUCTION, DATA, REGISTER } miss_type; ARGH! Please see previous mail about typedefs! I suspect this is code inherited from Xen ? Xen has a lot of really nasty and pointless typedefs like these :-( > +static inline void vcpu_set_dbr(VCPU *vcpu, u64 reg, u64 val) > +{ > + /* TODO: need to virtualize */ > + __ia64_set_dbr(reg, val); > +} > + > +static inline void vcpu_set_ibr(VCPU *vcpu, u64 reg, u64 val) > +{ > + /* TODO: need to virtualize */ > + ia64_set_ibr(reg, val); > +} > + > +static inline u64 vcpu_get_dbr(VCPU *vcpu, u64 reg) > +{ > + /* TODO: need to virtualize */ > + return ((u64)__ia64_get_dbr(reg)); > +} > + > +static inline u64 vcpu_get_ibr(VCPU *vcpu, u64 reg) > +{ > + /* TODO: need to virtualize */ > + return ((u64)ia64_get_ibr(reg)); > +} More wrapper macros that really should just use ia64_get/set_reg() directly in the code. > diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h > new file mode 100644 > index 0000000..591ab22 [ship] > +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- > */ Evil formatting again! Cheers, Jes |
From: Jes S. <je...@sg...> - 2008-03-31 12:02:06
|
Zhang, Xiantao wrote: >>From 62895ff991d48398a77afdbf7f2bef127e802230 Mon Sep 17 00:00:00 2001 > From: Xiantao Zhang <xia...@in...> > Date: Fri, 28 Mar 2008 09:49:57 +0800 > Subject: [PATCH] KVM: IA64: Add kvm arch-specific core code for > kvm/ia64. > > kvm_ia64.c is created to handle kvm ia64-specific core logic. > Signed-off-by: Xiantao Zhang <xia...@in...> More comments, a couple of bugs in this one. > +#include <linux/module.h> > +#include <linux/vmalloc.h> Don't think you need vmalloc.h here. > +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs > *regs) > +{ [snip] > + copy_from_user(&vcpu->arch.guest, regs->saved_guest, > + sizeof(union context)); > + copy_from_user(vcpu + 1, regs->saved_stack + sizeof(struct > kvm_vcpu), > + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); You need to check the return values from copy_from_user() here and deal with possible failure. > + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); > + vcpu->arch.apic->vcpu = vcpu; Whoops! Missing NULL pointer check here after the kzalloc. > + copy_to_user(regs->saved_guest, &vcpu->arch.guest, > + sizeof(union context)); > + copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); Same problem as above - check the return values. Cheers, Jes |
From: Jes S. <je...@sg...> - 2008-03-31 11:41:45
|
Hi Xiantao, Some more nit-picking, though some of this is a bit more important to fixup. Cheers, Jes > +typedef struct thash_data { Urgh! argh! Please avoid typedefs unless you really need them, see Chapter 5 of Documentation/CodingStyle for details. > diff --git a/include/asm-ia64/kvm_host.h b/include/asm-ia64/kvm_host.h > new file mode 100644 > index 0000000..522bde0 > --- /dev/null > +++ b/include/asm-ia64/kvm_host.h > @@ -0,0 +1,530 @@ > +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- > */ The standard indentation for Linux is 8 characters using tabs. If possible it's preferred to comply with that to make the entire kernel tree easier for everybody to deal with. See CodingStyle for details. > +struct kvm_mmio_req { > + uint64_t addr; /* physical address */ > + uint64_t size; /* size in bytes */ > + uint64_t data; /* data (or paddr of data) */ > + uint8_t state:4; > + uint8_t dir:1; /* 1=read, 0=write */ > +}; > +typedef struct kvm_mmio_req mmio_req_t; More typedefs > +/*Pal data struct */ > +typedef struct pal_call{ and again. > + /*In area*/ > + uint64_t gr28; > + uint64_t gr29; > + uint64_t gr30; > + uint64_t gr31; > + /*Out area*/ > + struct ia64_pal_retval ret; > +} pal_call_t; > + > +/* Sal data structure */ > +typedef struct sal_call{ and again... > + /*In area*/ > + uint64_t in0; > + uint64_t in1; > + uint64_t in2; > + uint64_t in3; > + uint64_t in4; > + uint64_t in5; > + uint64_t in6; > + uint64_t in7; > + /*Our area*/ > + struct sal_ret_values ret; > +} sal_call_t; |