Thread: [kvm-devel] [RFC] [VTD][patch 1/3] vt-d support for pci passthrough: kvm-vtd--kernel.patch

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Kvm kernel changes.

Signed-off-by: Allen M Kay <all...@in...>

------
 arch/x86/kvm/Makefile      |    2 
 arch/x86/kvm/vtd.c         |  183
+++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c         |    7 +
 include/asm-x86/kvm_host.h |    3 
 include/asm-x86/kvm_para.h |    1 
 include/linux/kvm_host.h   |    6 +
 virt/kvm/kvm_main.c        |    3 
 7 files changed, 204 insertions(+), 1 deletion(-)

------

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c97d35c..b1057fb 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
lapic.o \
 	i8254.o
 obj-$(CONFIG_KVM) += kvm.o
-kvm-intel-objs = vmx.o
+kvm-intel-objs = vmx.o vtd.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
 kvm-amd-objs = svm.o
 obj-$(CONFIG_KVM_AMD) += kvm-amd.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 0000000..9a080b5
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Allen M. Kay <all...@in...>
+ * Author: Weidong Han <wei...@in...>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+
+//#define DEBUG
+
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+
+struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev
*dev);
+struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu);
+void iommu_free_domain(struct dmar_domain *domain);
+int domain_init(struct dmar_domain *domain, int guest_width);
+int domain_context_mapping(struct dmar_domain *d,
+			struct pci_dev *pdev);
+int domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+			u64 hpa, size_t size, int prot);
+void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8
devfn);
+struct dmar_domain * find_domain(struct pci_dev *pdev);
+
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+	gfn_t base_gfn, unsigned long npages)
+{
+	unsigned long gpa;
+	struct page *page;
+	hpa_t hpa;
+	int j, write;
+	struct vm_area_struct *vma;
+
+	if (!kvm->arch.domain)
+		return 1;
+
+	gpa = base_gfn << PAGE_SHIFT;
+	page = gfn_to_page(kvm, base_gfn);
+	hpa = page_to_phys(page);
+
+	printk(KERN_DEBUG "kvm_iommu_map_page: gpa = %lx\n", gpa);
+	printk(KERN_DEBUG "kvm_iommu_map_page: hpa = %llx\n", hpa);
+	printk(KERN_DEBUG "kvm_iommu_map_page: size = %lx\n",
+		npages*PAGE_SIZE);
+
+	for (j = 0; j < npages; j++) {
+		gpa +=  PAGE_SIZE;
+		page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
+		hpa = page_to_phys(page);
+		domain_page_mapping(kvm->arch.domain, gpa, hpa,
PAGE_SIZE,
+					DMA_PTE_READ | DMA_PTE_WRITE);
+		vma = find_vma(current->mm, gpa);
+		if (!vma)
+			return 1;
+		write = (vma->vm_flags & VM_WRITE) != 0;
+		get_user_pages(current, current->mm, gpa,
+				PAGE_SIZE, write, 0, NULL, NULL);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_iommu_map_pages);
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int i, status;
+	for (i = 0; i < kvm->nmemslots; i++) {
+		status = kvm_iommu_map_pages(kvm,
kvm->memslots[i].base_gfn,
+				kvm->memslots[i].npages);
+		if (status)
+			return status;
+	}
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm,
+	struct kvm_pci_passthrough_dev *pci_pt_dev)
+{
+	struct dmar_drhd_unit *drhd;
+	struct dmar_domain *domain;
+	struct intel_iommu *iommu;
+	struct pci_dev *pdev = NULL;
+
+	printk(KERN_DEBUG "kvm_iommu_map_guest: host bdf = %x:%x:%x\n",
+		pci_pt_dev->host.busnr,
+		PCI_SLOT(pci_pt_dev->host.devfn),
+		PCI_FUNC(pci_pt_dev->host.devfn));
+
+	for_each_pci_dev(pdev) {
+		if ((pdev->bus->number == pci_pt_dev->host.busnr) &&
+			(pdev->devfn == pci_pt_dev->host.devfn))
+			goto found;
+	}
+	goto not_found;
+found:
+	pci_pt_dev->pdev = pdev;
+
+	drhd = dmar_find_matched_drhd_unit(pdev);
+	if (!drhd) {
+		printk(KERN_ERR "kvm_iommu_map_guest: drhd == NULL\n");
+		goto not_found;
+	}
+
+	printk(KERN_DEBUG "kvm_iommu_map_guest: reg_base_addr = %llx\n",
+		drhd->reg_base_addr);
+
+	iommu = drhd->iommu;
+	if (!iommu) {
+		printk(KERN_ERR "kvm_iommu_map_guest: iommu == NULL\n");
+		goto not_found;
+	}
+	domain = iommu_alloc_domain(iommu);
+	if (!domain) {
+		printk(KERN_ERR "kvm_iommu_map_guest: domain ==
NULL\n");
+		goto not_found;
+	}
+	if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+		printk(KERN_ERR "kvm_iommu_map_guest: domain_init()
failed\n");
+		goto not_found;
+	}
+	kvm->arch.domain = domain;
+	kvm_iommu_map_memslots(kvm);
+	domain_context_mapping(kvm->arch.domain, pdev);
+	return 0;
+not_found:
+	return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_iommu_map_guest);
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct dmar_domain *domain;
+	struct kvm_pci_pt_dev_list *entry;
+	struct pci_dev *pdev;
+
+	list_for_each_entry(entry, &kvm->arch.domain->devices, list) {
+		printk(KERN_DEBUG "kvm_iommu_unmap_guest: %x:%x:%x\n",
+			entry->pt_dev.host.busnr,
+			PCI_SLOT(entry->pt_dev.host.devfn),
+			PCI_FUNC(entry->pt_dev.host.devfn));
+
+		pdev = entry->pt_dev.pdev;
+
+		if (pdev == NULL) {
+			printk("kvm_iommu_unmap_guest: pdev == NULL\n");
+			return 1;
+		}
+
+		/* detach kvm dmar domain */
+		detach_domain_for_dev(kvm->arch.domain,
+				pdev->bus->number, pdev->devfn);
+
+		/* now restore back linux iommu domain */
+		domain = find_domain(pdev);
+		if (domain)
+			domain_context_mapping(domain, pdev);
+		else
+			printk(KERN_DEBUG
+				"kvm_iommu_unmap_guest: domain ==
NULL\n");
+	}
+	/* unmap guest memory in vt-d page table */ 
+	iommu_free_domain(kvm->arch.domain);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_iommu_unmap_guest);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a97d2e2..a877db2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -257,6 +257,7 @@ static void kvm_free_pci_passthrough(struct kvm
*kvm)
 
 		list_del(&pci_pt_dev->list);
 	}
+	kvm->arch.domain = NULL;
 }
 
 unsigned long segment_base(u16 selector)
@@ -1846,6 +1847,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (copy_from_user(&pci_pt_dev, argp, sizeof
pci_pt_dev))
 			goto out;
 
+		r = kvm_iommu_map_guest(kvm, &pci_pt_dev);
+		if (r)
+			goto out;
+
 		r = kvm_vm_ioctl_pci_pt_dev(kvm, &pci_pt_dev);
 		if (r)
 			goto out;
@@ -4088,6 +4093,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+ 	if (kvm->arch.domain)
+ 		kvm_iommu_unmap_guest(kvm);
 	kvm_free_pci_passthrough(kvm);
 	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 4662d49..70248cb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -19,6 +19,8 @@
 #include <linux/kvm_types.h>
 
 #include <asm/desc.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
 
 #define KVM_MAX_VCPUS 16
 #define KVM_MEMORY_SLOTS 32
@@ -318,6 +320,7 @@ struct kvm_arch{
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head pci_pt_dev_head;
+	struct dmar_domain *domain;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 5f93b78..6202ed1 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -170,5 +170,6 @@ struct kvm_pci_pt_info {
 struct kvm_pci_passthrough_dev {
 	struct kvm_pci_pt_info guest;
 	struct kvm_pci_pt_info host;
+	struct pci_dev *pdev;    /* kernel device pointer for host dev
*/
 };
 #endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4e16682..bcfcf78 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -276,6 +276,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
+                        unsigned long npages);
+int kvm_iommu_map_guest(struct kvm *kvm,
+                        struct kvm_pci_passthrough_dev *pci_pt_dev);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
 static inline void kvm_guest_enter(void)
 {
 	account_system_vtime(current);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d3cb4cc..e46614a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -309,6 +309,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	new.npages = npages;
 	new.flags = mem->flags;
 
+        /* map the pages in iommu page table */
+        kvm_iommu_map_pages(kvm, base_gfn, npages);
+
 	/* Disallow changing a memory slot's size. */
 	r = -EINVAL;
 	if (npages && old.npa