From: Marcelo T. <ma...@kv...> - 2008-01-08 15:32:05
|
Following patch introduces a KVM guest balloon driver. Communication to/from the host is performed via virtio. Next patch implements the QEMU driver and handling. Signed-off-by: Marcelo Tosatti <mto...@re...> Index: linux-2.6-nv/drivers/virtio/Kconfig =================================================================== --- linux-2.6-nv.orig/drivers/virtio/Kconfig +++ linux-2.6-nv/drivers/virtio/Kconfig @@ -23,3 +23,12 @@ config VIRTIO_PCI If unsure, say M. +config KVM_BALLOON + tristate "KVM balloon driver (EXPERIMENTAL)" + depends on VIRTIO_PCI + ---help--- + This driver provides support for ballooning memory in/out of a + KVM paravirt guest. + + If unsure, say M. + Index: linux-2.6-nv/drivers/virtio/Makefile =================================================================== --- linux-2.6-nv.orig/drivers/virtio/Makefile +++ linux-2.6-nv/drivers/virtio/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_VIRTIO) += virtio.o obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o +obj-$(CONFIG_KVM_BALLOON) += kvm_balloon.o Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c =================================================================== --- /dev/null +++ linux-2.6-nv/drivers/virtio/kvm_balloon.c @@ -0,0 +1,577 @@ +/* + * KVM guest balloon driver + * + * Copyright (C) 2007, Qumranet, Inc., Dor Laor <dor...@qu...> + * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti <mto...@re...> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <asm/uaccess.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/wait.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/version.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_pci.h> +#include <linux/preempt.h> +#include <linux/kvm_types.h> +#include <linux/kvm_host.h> + +MODULE_AUTHOR ("Dor Laor"); +MODULE_DESCRIPTION ("Implements guest ballooning support"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1"); + +#define CMD_BALLOON_INFLATE 0x1 +#define CMD_BALLOON_DEFLATE 0x2 + +static int kvm_balloon_debug; + +#define DEBUG +#ifdef DEBUG +#define dprintk(str...) if (kvm_balloon_debug) printk(str) +#else +#define dprintk(str...) +#endif + +static LIST_HEAD(balloon_plist); +static LIST_HEAD(balloon_work); +static int balloon_size = 0; +static DEFINE_SPINLOCK(balloon_plist_lock); +static DEFINE_SPINLOCK(balloon_queue_lock); + +struct virtio_balloon_hdr { + uint8_t cmd; + uint8_t status; +}; + +#define BALLOON_DATA_SIZE 200 + +struct balloon_buf { + struct virtio_balloon_hdr hdr; + u8 data[BALLOON_DATA_SIZE]; +}; + +struct balloon_work { + struct balloon_buf *buf; + struct list_head list; +}; + +#define VIRTIO_MAX_SG 2 + +struct virtballoon { + struct virtio_device *dev; + struct virtqueue *vq; + struct task_struct *balloon_thread; + wait_queue_head_t balloon_wait; + wait_queue_head_t rmmod_wait; + uint32_t target_nrpages; + atomic_t inflight_bufs; +}; + +struct balloon_page { + struct page *bpage; + struct list_head bp_list; +}; + +struct virtballoon virtballoon; + +struct balloon_buf *alloc_balloon_buf(void) +{ + struct balloon_buf *buf; + + buf = kzalloc(sizeof(struct balloon_buf), GFP_KERNEL); + if (!buf) + printk(KERN_ERR "%s: allocation failed\n", __func__); + + return buf; +} + +static int send_balloon_buf(uint8_t cmd, struct balloon_buf *buf) +{ + struct scatterlist sg[VIRTIO_MAX_SG]; + struct virtqueue *vq = virtballoon.vq; + int err = 0; + + buf->hdr.cmd = cmd; + + sg_init_table(sg, VIRTIO_MAX_SG); + sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr)); + sg_set_buf(&sg[1], &buf->data, sizeof(buf->data)); + + spin_lock_irq(&balloon_queue_lock); + err = vq->vq_ops->add_buf(vq, sg, 0, 2, buf); + if (err) { + printk("%s: add_buf err\n", __func__); + goto out; + } + atomic_inc(&virtballoon.inflight_bufs); + + /* TODO: kick several balloon buffers at once */ + vq->vq_ops->kick(vq); +out: + spin_unlock_irq(&balloon_queue_lock); + return err; +} + +static int kvm_balloon_inflate(int32_t npages) +{ + LIST_HEAD(tmp_list); + struct balloon_page *node, *tmp; + struct balloon_buf *buf; + u32 *pfn; + int allocated = 0; + int i, r = -ENOMEM; + + buf = alloc_balloon_buf(); + if (!buf) + return r; + + pfn = (u32 *)&buf->data; + *pfn++ = (u32)npages; + + for (i = 0; i < npages; i++) { + node = kzalloc(sizeof(struct balloon_page), GFP_KERNEL); + if (!node) + goto out_free; + + node->bpage = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); + if (!node->bpage) { + kfree(node); + goto out_free; + } + + list_add(&node->bp_list, &tmp_list); + allocated++; + *pfn = page_to_pfn(node->bpage); + pfn++; + } + + r = send_balloon_buf(CMD_BALLOON_INFLATE, buf); + if (r) + goto out_free; + + spin_lock(&balloon_plist_lock); + list_splice(&tmp_list, &balloon_plist); + balloon_size += allocated; + totalram_pages -= allocated; + dprintk("%s: current balloon size=%d\n", __FUNCTION__, + balloon_size); + spin_unlock(&balloon_plist_lock); + return allocated; + +out_free: + list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) { + __free_page(node->bpage); + list_del(&node->bp_list); + kfree(node); + } + return r; +} + +static int kvm_balloon_deflate(int32_t npages) +{ + LIST_HEAD(tmp_list); + struct balloon_page *node, *tmp; + struct balloon_buf *buf; + u32 *pfn; + int deallocated = 0; + int r = 0; + + buf = alloc_balloon_buf(); + if (!buf) + return r; + + spin_lock(&balloon_plist_lock); + + if (balloon_size < npages) { + printk("%s: balloon=%d while deflate rq=%d\n", + __FUNCTION__, balloon_size, npages); + npages = balloon_size; + if (!npages) + goto out; + } + + pfn = (u32 *)&buf->data; + *pfn++ = (u32)-npages; + + /* + * Move the balloon pages to tmp list before issuing + * the virtio buffer + */ + list_for_each_entry_safe(node, tmp, &balloon_plist, bp_list) { + *pfn++ = page_to_pfn(node->bpage); + list_move(&node->bp_list, &tmp_list); + if (++deallocated == npages) + break; + } + + r = send_balloon_buf(CMD_BALLOON_DEFLATE, buf); + if (r) + goto out; + + list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) { + list_del(&node->bp_list); + kfree(node); + } + balloon_size -= npages; + totalram_pages += npages; + dprintk("%s: current balloon size=%d\n", __FUNCTION__, + balloon_size); + + spin_unlock(&balloon_plist_lock); + return deallocated; + +out: + list_splice(&tmp_list, &balloon_plist); + spin_unlock(&balloon_plist_lock); + return r; +} + +#define MAX_BALLOON_PAGES_PER_OP (BALLOON_DATA_SIZE/sizeof(u32)) \ + - sizeof(int32_t) +#define MAX_BALLOON_XFLATE_OP 1000000 + +static int kvm_balloon_xflate(int32_t npages) +{ + int r = -EINVAL, i; + int iterations; + int abspages; + int curr_pages = 0; + int gfns_per_buf; + + abspages = abs(npages); + + if (abspages > MAX_BALLOON_XFLATE_OP) { + printk("%s: bad npages=%d\n", __func__, + npages); + return -EINVAL; + } + + dprintk("%s: got %s, npages=%d\n", __FUNCTION__, + (npages > 0)? "inflate":"deflate", npages); + + gfns_per_buf = MAX_BALLOON_PAGES_PER_OP; + + /* + * Call the balloon in PAGE_SIZE*pfns-per-buf + * iterations + */ + iterations = DIV_ROUND_UP(abspages, gfns_per_buf); + dprintk("%s: iterations=%d\n", __FUNCTION__, iterations); + + for (i = 0; i < iterations; i++) { + int32_t pages_in_iteration = + min(abspages - curr_pages, gfns_per_buf); + + if (npages > 0) + r = kvm_balloon_inflate(pages_in_iteration); + else + r = kvm_balloon_deflate(pages_in_iteration); + + if (r < 0) + return r; + curr_pages += r; + if (r != pages_in_iteration) + break; + cond_resched(); + } + + return curr_pages; +} + +static void inflate_done(struct balloon_buf *buf) +{ + uint8_t status = buf->hdr.status; + + /* error inflating, return pages to the system */ + if (status) { + struct balloon_page *node, *tmp; + + spin_lock(&balloon_plist_lock); + list_for_each_entry_safe(node, tmp, &balloon_plist, bp_list) { + u32 *pfn = (u32 *)&buf->data; + int npages = (int)*pfn++; + int i; + + for (i=0;i<npages;i++) { + if (page_to_pfn(node->bpage) == *pfn) { + list_del(&node->bp_list); + kfree(node); + __free_page(pfn_to_page(*pfn)); + balloon_size--; + totalram_pages++; + virtballoon.target_nrpages++; + break; + } + pfn++; + } + } + spin_unlock(&balloon_plist_lock); + virtballoon.dev->config->set(virtballoon.dev, 0, + &virtballoon.target_nrpages, + sizeof(virtballoon.target_nrpages)); + } +} + +static void deflate_done(struct balloon_buf *buf) +{ + uint8_t status = buf->hdr.status; + + /* deflate OK, return pages to the system */ + if (!status) { + u32 *pfn = (u32 *)&buf->data; + int npages, i; + + npages = (int)*pfn++; + npages = abs(npages); + + for (i = 0; i<npages; i++) { + __free_page(pfn_to_page(*pfn)); + pfn++; + } + /* deflate error, add pages back to ballooned list */ + } else { + u32 *pfn = (u32 *)&buf->data; + int npages, i; + struct balloon_page *node; + + npages = (int)*pfn++; + npages = abs(npages); + + spin_lock(&balloon_plist_lock); + for (i = 0; i < npages; i++) { + node = kzalloc(sizeof(struct balloon_page), GFP_KERNEL); + if (!node) { + spin_unlock(&balloon_plist_lock); + printk(KERN_ERR "%s: allocation failure\n", + __func__); + goto out; + } + + node->bpage = pfn_to_page(*pfn++); + list_add(&node->bp_list, &balloon_plist); + balloon_size++; + totalram_pages--; + virtballoon.target_nrpages--; + } + spin_unlock(&balloon_plist_lock); + virtballoon.dev->config->set(virtballoon.dev, 0, + &virtballoon.target_nrpages, + sizeof(virtballoon.target_nrpages)); + } +out: + return; +} + +static int balloon_thread(void *p) +{ + struct virtballoon *v = p; + DEFINE_WAIT(wait); + + set_freezable(); + for (;;) { + prepare_to_wait(&v->balloon_wait, &wait, TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&v->balloon_wait, &wait); + + try_to_freeze(); + + if (kthread_should_stop()) + break; + + if (v->target_nrpages != totalram_pages) { + int delta = totalram_pages - v->target_nrpages; + kvm_balloon_xflate(delta); + } + + spin_lock_irq(&balloon_queue_lock); + while (!list_empty(&balloon_work)) { + struct balloon_work *work; + struct balloon_buf *buf; + + work = list_entry(balloon_work.next, + struct balloon_work, list); + list_del(&work->list); + spin_unlock_irq(&balloon_queue_lock); + buf = work->buf; + kfree(work); + + switch(buf->hdr.cmd) { + case CMD_BALLOON_DEFLATE: + deflate_done(buf); + break; + case CMD_BALLOON_INFLATE: + inflate_done(buf); + break; + default: + printk("%s: unknown cmd 0x%x\n", __func__, + buf->hdr.cmd); + } + kfree(buf); + if (atomic_dec_and_test(&v->inflight_bufs)) { + if (waitqueue_active(&v->rmmod_wait)) { + wake_up(&v->rmmod_wait); + return 0; + } + } + cond_resched(); + spin_lock_irq(&balloon_queue_lock); + } + spin_unlock_irq(&balloon_queue_lock); + } + return 0; +} + +static bool balloon_tx_done(struct virtqueue *vq) +{ + struct balloon_buf *buf; + unsigned int len; + + spin_lock(&balloon_queue_lock); + while ((buf = vq->vq_ops->get_buf(vq, &len)) != NULL) { + struct balloon_work *work; + + work = kzalloc(sizeof(struct balloon_work), GFP_ATOMIC); + if (!work) + continue; + INIT_LIST_HEAD(&work->list); + work->buf = buf; + + list_add(&work->list, &balloon_work); + } + spin_unlock(&balloon_queue_lock); + wake_up(&virtballoon.balloon_wait); + + return true; +} + +static irqreturn_t balloon_irq(int irq, void *opaque) +{ + struct virtballoon *vb = opaque; + uint32_t target_nrpages; + + __virtio_config_val(vb->dev, 0, &target_nrpages); + + dprintk("%s: target_nrpages = %d, vb->target_nrpages = %d\n", + __func__, target_nrpages, vb->target_nrpages); + + if (target_nrpages != vb->target_nrpages) { + vb->target_nrpages = target_nrpages; + wake_up(&vb->balloon_wait); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +#define VIRTIO_ID_BALLOON 3 + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID}, + { 0 }, +}; + +static int balloon_probe(struct virtio_device *vdev) +{ + struct virtio_pci_device *pvdev = to_vp_device(vdev); + int err = -EBUSY; + + if (virtballoon.dev) { + printk("kvm_ballon: error, already registered\n"); + return -EBUSY; + } + + virtballoon.vq = vdev->config->find_vq(vdev, 0, balloon_tx_done); + if (IS_ERR(virtballoon.vq)) { + printk("%s: error finding vq\n", __func__); + return -EINVAL; + } + + virtballoon.dev = vdev; + init_waitqueue_head(&virtballoon.balloon_wait); + init_waitqueue_head(&virtballoon.rmmod_wait); + atomic_set(&virtballoon.inflight_bufs, 0); + + err = request_irq(pvdev->pci_dev->irq, balloon_irq, IRQF_SHARED, + pvdev->vdev.dev.bus_id, &virtballoon); + if (err) + goto out_free_vq; + + virtballoon.balloon_thread = kthread_run(balloon_thread, + &virtballoon, + "kvm_balloond"); + printk("kvm_balloon: registered\n"); + + return 0; + +out_free_vq: + vdev->config->del_vq(virtballoon.vq); + virtballoon.dev = NULL; + return err; +} + +static void balloon_remove(struct virtio_device *vdev) +{ + kthread_stop(virtballoon.balloon_thread); + vdev->config->del_vq(virtballoon.vq); +} + +static struct virtio_driver virtio_balloon = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = balloon_probe, + .remove = __devexit_p(balloon_remove), +}; + +module_param(kvm_balloon_debug, int, 0); + +static int __init kvm_balloon_init(void) +{ + virtballoon.dev = NULL; + + return register_virtio_driver(&virtio_balloon); +} + +static void __exit kvm_balloon_exit(void) +{ + spin_lock(&balloon_plist_lock); + if (balloon_size) { + DEFINE_WAIT(wait); + + virtballoon.target_nrpages += balloon_size; + spin_unlock(&balloon_plist_lock); + virtballoon.dev->config->set(virtballoon.dev, 0, + &virtballoon.target_nrpages, + sizeof(virtballoon.target_nrpages)); + wake_up(&virtballoon.balloon_wait); + prepare_to_wait(&virtballoon.rmmod_wait, &wait, + TASK_UNINTERRUPTIBLE); + schedule(); + finish_wait(&virtballoon.rmmod_wait, &wait); + spin_lock(&balloon_plist_lock); + } + + if (balloon_size) + printk(KERN_ERR "%s: exit while balloon not empty!\n", + __FUNCTION__); + + spin_unlock(&balloon_plist_lock); + + unregister_virtio_driver(&virtio_balloon); +} + +module_init(kvm_balloon_init); +module_exit(kvm_balloon_exit); Index: linux-2.6-nv/drivers/virtio/virtio_pci.c =================================================================== --- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c +++ linux-2.6-nv/drivers/virtio/virtio_pci.c @@ -30,20 +30,6 @@ MODULE_DESCRIPTION("virtio-pci"); MODULE_LICENSE("GPL"); MODULE_VERSION("1"); -/* Our device structure */ -struct virtio_pci_device -{ - struct virtio_device vdev; - struct pci_dev *pci_dev; - - /* the IO mapping for the PCI config space */ - void *ioaddr; - - /* a list of queues so we can dispatch IRQs */ - spinlock_t lock; - struct list_head virtqueues; -}; - struct virtio_pci_vq_info { /* the actual virtqueue */ @@ -67,6 +53,7 @@ static struct pci_device_id virtio_pci_i { 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ { 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ { 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ + { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */ { 0 }, }; @@ -89,12 +76,6 @@ static struct device virtio_pci_root = { /* Unique numbering for devices under the kvm root */ static unsigned int dev_index; -/* Convert a generic virtio device to our structure */ -static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) -{ - return container_of(vdev, struct virtio_pci_device, vdev); -} - /* virtio config->feature() implementation */ static bool vp_feature(struct virtio_device *vdev, unsigned bit) { Index: linux-2.6-nv/include/linux/virtio_pci.h =================================================================== --- linux-2.6-nv.orig/include/linux/virtio_pci.h +++ linux-2.6-nv/include/linux/virtio_pci.h @@ -19,6 +19,26 @@ #include <linux/virtio_config.h> +/* Our device structure */ +struct virtio_pci_device +{ + struct virtio_device vdev; + struct pci_dev *pci_dev; + + /* the IO mapping for the PCI config space */ + void *ioaddr; + + /* a list of queues so we can dispatch IRQs */ + spinlock_t lock; + struct list_head virtqueues; +}; + +/* Convert a generic virtio device to our structure */ +struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) +{ + return container_of(vdev, struct virtio_pci_device, vdev); +} + /* A 32-bit r/o bitmask of the features supported by the host */ #define VIRTIO_PCI_HOST_FEATURES 0 |
From: Anthony L. <an...@co...> - 2008-01-08 15:42:06
|
Marcelo Tosatti wrote: > Following patch introduces a KVM guest balloon driver. Communication > to/from the host is performed via virtio. > > Next patch implements the QEMU driver and handling. > > Signed-off-by: Marcelo Tosatti <mto...@re...> > > Index: linux-2.6-nv/drivers/virtio/Kconfig > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/Kconfig > +++ linux-2.6-nv/drivers/virtio/Kconfig > @@ -23,3 +23,12 @@ config VIRTIO_PCI > > If unsure, say M. > > +config KVM_BALLOON > + tristate "KVM balloon driver (EXPERIMENTAL)" > + depends on VIRTIO_PCI > + ---help--- > + This driver provides support for ballooning memory in/out of a > + KVM paravirt guest. > + > + If unsure, say M. > + > Index: linux-2.6-nv/drivers/virtio/Makefile > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/Makefile > +++ linux-2.6-nv/drivers/virtio/Makefile > @@ -1,3 +1,4 @@ > obj-$(CONFIG_VIRTIO) += virtio.o > obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o > obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o > +obj-$(CONFIG_KVM_BALLOON) += kvm_balloon.o > Index: linux-2.6-nv/drivers/virtio/kvm_balloon.c > =================================================================== > --- /dev/null > +++ linux-2.6-nv/drivers/virtio/kvm_balloon.c > @@ -0,0 +1,577 @@ > +/* > + * KVM guest balloon driver > + * > + * Copyright (C) 2007, Qumranet, Inc., Dor Laor <dor...@qu...> > + * Copyright (C) 2007, Red Hat, Inc., Marcelo Tosatti <mto...@re...> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + */ > + > +#include <asm/uaccess.h> > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/percpu.h> > +#include <linux/init.h> > +#include <linux/interrupt.h> > +#include <linux/pci.h> > +#include <linux/mm.h> > +#include <linux/swap.h> > +#include <linux/wait.h> > +#include <linux/kthread.h> > +#include <linux/freezer.h> > +#include <linux/version.h> > +#include <linux/virtio.h> > +#include <linux/virtio_config.h> > +#include <linux/virtio_pci.h> > +#include <linux/preempt.h> > +#include <linux/kvm_types.h> > +#include <linux/kvm_host.h> > + > +MODULE_AUTHOR ("Dor Laor"); > +MODULE_DESCRIPTION ("Implements guest ballooning support"); > +MODULE_LICENSE("GPL"); > +MODULE_VERSION("1"); > + > +#define CMD_BALLOON_INFLATE 0x1 > +#define CMD_BALLOON_DEFLATE 0x2 > Anything that's part of the ABI needs to be in a header file. This is how the rest of the virtio devices work. > +static int kvm_balloon_debug; > + > +#define DEBUG > +#ifdef DEBUG > +#define dprintk(str...) if (kvm_balloon_debug) printk(str) > +#else > +#define dprintk(str...) > +#endif > I think pr_debug is the more appropriate thing to use. > +static LIST_HEAD(balloon_plist); > +static LIST_HEAD(balloon_work); > +static int balloon_size = 0; > +static DEFINE_SPINLOCK(balloon_plist_lock); > +static DEFINE_SPINLOCK(balloon_queue_lock); > So far, all the drivers hang their state off of the virtio_device instead of using statics. While it's probably arguable that you will never have multiple balloon drivers, I think it's a good idea to at least be consistent and not use all of these globals. > +struct virtio_balloon_hdr { > + uint8_t cmd; > + uint8_t status; > +}; > + > +#define BALLOON_DATA_SIZE 200 > + > +struct balloon_buf { > + struct virtio_balloon_hdr hdr; > + u8 data[BALLOON_DATA_SIZE]; > +}; > + > +struct balloon_work { > + struct balloon_buf *buf; > + struct list_head list; > +}; > + > +#define VIRTIO_MAX_SG 2 > + > +struct virtballoon { > + struct virtio_device *dev; > + struct virtqueue *vq; > + struct task_struct *balloon_thread; > + wait_queue_head_t balloon_wait; > + wait_queue_head_t rmmod_wait; > + uint32_t target_nrpages; > + atomic_t inflight_bufs; > +}; > + > +struct balloon_page { > + struct page *bpage; > + struct list_head bp_list; > +}; > + > +struct virtballoon virtballoon; > + > +struct balloon_buf *alloc_balloon_buf(void) > +{ > + struct balloon_buf *buf; > + > + buf = kzalloc(sizeof(struct balloon_buf), GFP_KERNEL); > + if (!buf) > + printk(KERN_ERR "%s: allocation failed\n", __func__); > + > + return buf; > +} > + > +static int send_balloon_buf(uint8_t cmd, struct balloon_buf *buf) > +{ > + struct scatterlist sg[VIRTIO_MAX_SG]; > + struct virtqueue *vq = virtballoon.vq; > + int err = 0; > + > + buf->hdr.cmd = cmd; > + > + sg_init_table(sg, VIRTIO_MAX_SG); > + sg_set_buf(&sg[0], &buf->hdr, sizeof(buf->hdr)); > + sg_set_buf(&sg[1], &buf->data, sizeof(buf->data)); > + > + spin_lock_irq(&balloon_queue_lock); > + err = vq->vq_ops->add_buf(vq, sg, 0, 2, buf); > + if (err) { > + printk("%s: add_buf err\n", __func__); > + goto out; > + } > + atomic_inc(&virtballoon.inflight_bufs); > + > + /* TODO: kick several balloon buffers at once */ > + vq->vq_ops->kick(vq); > +out: > + spin_unlock_irq(&balloon_queue_lock); > + return err; > +} > + > +static int kvm_balloon_inflate(int32_t npages) > +{ > + LIST_HEAD(tmp_list); > + struct balloon_page *node, *tmp; > + struct balloon_buf *buf; > + u32 *pfn; > + int allocated = 0; > + int i, r = -ENOMEM; > + > + buf = alloc_balloon_buf(); > + if (!buf) > + return r; > + > + pfn = (u32 *)&buf->data; > + *pfn++ = (u32)npages; > + > + for (i = 0; i < npages; i++) { > + node = kzalloc(sizeof(struct balloon_page), GFP_KERNEL); > + if (!node) > + goto out_free; > Instead of allocating a node for each page, you could use page->private to create a linked list. You're potentially burning a lot of memory when ballooning down by a large amount otherwise. > + node->bpage = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); > + if (!node->bpage) { > + kfree(node); > + goto out_free; > + } > + > + list_add(&node->bp_list, &tmp_list); > + allocated++; > + *pfn = page_to_pfn(node->bpage); > + pfn++; > + } > + > + r = send_balloon_buf(CMD_BALLOON_INFLATE, buf); > + if (r) > + goto out_free; > + > + spin_lock(&balloon_plist_lock); > + list_splice(&tmp_list, &balloon_plist); > + balloon_size += allocated; > + totalram_pages -= allocated; > + dprintk("%s: current balloon size=%d\n", __FUNCTION__, > + balloon_size); > + spin_unlock(&balloon_plist_lock); > + return allocated; > + > +out_free: > + list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) { > + __free_page(node->bpage); > + list_del(&node->bp_list); > + kfree(node); > + } > + return r; > +} > <snip> > + > +static int balloon_probe(struct virtio_device *vdev) > +{ > + struct virtio_pci_device *pvdev = to_vp_device(vdev); > + int err = -EBUSY; > + > + if (virtballoon.dev) { > + printk("kvm_ballon: error, already registered\n"); > + return -EBUSY; > + } > + > + virtballoon.vq = vdev->config->find_vq(vdev, 0, balloon_tx_done); > + if (IS_ERR(virtballoon.vq)) { > + printk("%s: error finding vq\n", __func__); > + return -EINVAL; > + } > + > + virtballoon.dev = vdev; > + init_waitqueue_head(&virtballoon.balloon_wait); > + init_waitqueue_head(&virtballoon.rmmod_wait); > + atomic_set(&virtballoon.inflight_bufs, 0); > + > + err = request_irq(pvdev->pci_dev->irq, balloon_irq, IRQF_SHARED, > + pvdev->vdev.dev.bus_id, &virtballoon); > + if (err) > + goto out_free_vq; > Why is it taking over the irq? This is very, very wrong. A virtio device cannot be dependent on being used on top of the virtio-pci backend. > + > + virtballoon.balloon_thread = kthread_run(balloon_thread, > + &virtballoon, > + "kvm_balloond"); > + printk("kvm_balloon: registered\n"); > + > + return 0; > + > +out_free_vq: > + vdev->config->del_vq(virtballoon.vq); > + virtballoon.dev = NULL; > + return err; > +} > + > +static void balloon_remove(struct virtio_device *vdev) > +{ > + kthread_stop(virtballoon.balloon_thread); > + vdev->config->del_vq(virtballoon.vq); > +} > + > +static struct virtio_driver virtio_balloon = { > + .driver.name = KBUILD_MODNAME, > + .driver.owner = THIS_MODULE, > + .id_table = id_table, > + .probe = balloon_probe, > + .remove = __devexit_p(balloon_remove), > +}; > + > +module_param(kvm_balloon_debug, int, 0); > + > +static int __init kvm_balloon_init(void) > +{ > + virtballoon.dev = NULL; > + > + return register_virtio_driver(&virtio_balloon); > +} > + > +static void __exit kvm_balloon_exit(void) > +{ > + spin_lock(&balloon_plist_lock); > + if (balloon_size) { > + DEFINE_WAIT(wait); > + > + virtballoon.target_nrpages += balloon_size; > + spin_unlock(&balloon_plist_lock); > + virtballoon.dev->config->set(virtballoon.dev, 0, > + &virtballoon.target_nrpages, > + sizeof(virtballoon.target_nrpages)); > + wake_up(&virtballoon.balloon_wait); > + prepare_to_wait(&virtballoon.rmmod_wait, &wait, > + TASK_UNINTERRUPTIBLE); > + schedule(); > + finish_wait(&virtballoon.rmmod_wait, &wait); > + spin_lock(&balloon_plist_lock); > + } > + > + if (balloon_size) > + printk(KERN_ERR "%s: exit while balloon not empty!\n", > + __FUNCTION__); > + > + spin_unlock(&balloon_plist_lock); > + > + unregister_virtio_driver(&virtio_balloon); > +} > + > +module_init(kvm_balloon_init); > +module_exit(kvm_balloon_exit); > Index: linux-2.6-nv/drivers/virtio/virtio_pci.c > =================================================================== > --- linux-2.6-nv.orig/drivers/virtio/virtio_pci.c > +++ linux-2.6-nv/drivers/virtio/virtio_pci.c > @@ -30,20 +30,6 @@ MODULE_DESCRIPTION("virtio-pci"); > MODULE_LICENSE("GPL"); > MODULE_VERSION("1"); > > -/* Our device structure */ > -struct virtio_pci_device > -{ > - struct virtio_device vdev; > - struct pci_dev *pci_dev; > - > - /* the IO mapping for the PCI config space */ > - void *ioaddr; > - > - /* a list of queues so we can dispatch IRQs */ > - spinlock_t lock; > - struct list_head virtqueues; > -}; > - > struct virtio_pci_vq_info > { > /* the actual virtqueue */ > @@ -67,6 +53,7 @@ static struct pci_device_id virtio_pci_i > { 0x1AF4, 0x1000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > { 0x1AF4, 0x1001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > { 0x1AF4, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Dummy entry */ > + { 0x1AF4, 0x1003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Balloon */ > { 0 }, > }; > > @@ -89,12 +76,6 @@ static struct device virtio_pci_root = { > /* Unique numbering for devices under the kvm root */ > static unsigned int dev_index; > > -/* Convert a generic virtio device to our structure */ > -static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) > -{ > - return container_of(vdev, struct virtio_pci_device, vdev); > -} > - > /* virtio config->feature() implementation */ > static bool vp_feature(struct virtio_device *vdev, unsigned bit) > { > Index: linux-2.6-nv/include/linux/virtio_pci.h > =================================================================== > --- linux-2.6-nv.orig/include/linux/virtio_pci.h > +++ linux-2.6-nv/include/linux/virtio_pci.h > @@ -19,6 +19,26 @@ > > #include <linux/virtio_config.h> > > +/* Our device structure */ > +struct virtio_pci_device > +{ > + struct virtio_device vdev; > + struct pci_dev *pci_dev; > + > + /* the IO mapping for the PCI config space */ > + void *ioaddr; > + > + /* a list of queues so we can dispatch IRQs */ > + spinlock_t lock; > + struct list_head virtqueues; > +}; > + > +/* Convert a generic virtio device to our structure */ > +struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) > +{ > + return container_of(vdev, struct virtio_pci_device, vdev); > +} > + > This should be a big indicator that something is wrong. You should not have to change the encapsulation of the virtio-pci backend. Regards, Anthony Liguori > /* A 32-bit r/o bitmask of the features supported by the host */ > #define VIRTIO_PCI_HOST_FEATURES 0 > > > ------------------------------------------------------------------------- > Check out the new SourceForge.net Marketplace. > It's the best place to buy or sell services for > just about anything Open Source. > http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace > _______________________________________________ > kvm-devel mailing list > kvm...@li... > https://lists.sourceforge.net/lists/listinfo/kvm-devel > |
From: Andrea A. <an...@qu...> - 2008-01-09 10:06:42
|
On Tue, Jan 08, 2008 at 09:42:13AM -0600, Anthony Liguori wrote: > Instead of allocating a node for each page, you could use page->private page->lru is probably better for this so splice still works etc... (the struct page isn't visible to the guest VM so it's free to use) |
From: Marcelo T. <ma...@kv...> - 2008-01-09 11:12:09
|
On Wed, Jan 09, 2008 at 11:06:21AM +0100, Andrea Arcangeli wrote: > On Tue, Jan 08, 2008 at 09:42:13AM -0600, Anthony Liguori wrote: > > Instead of allocating a node for each page, you could use page->private > > page->lru is probably better for this so splice still works > etc... (the struct page isn't visible to the guest VM so it's free to > use) Yeap, that works. |
From: Marcelo T. <ma...@kv...> - 2008-01-08 16:07:45
|
On Tue, Jan 08, 2008 at 09:42:13AM -0600, Anthony Liguori wrote: > Marcelo Tosatti wrote: > >Following patch introduces a KVM guest balloon driver. Communication > >to/from the host is performed via virtio. > > I'll address the other comments. > >+ virtballoon.dev = vdev; > >+ init_waitqueue_head(&virtballoon.balloon_wait); > >+ init_waitqueue_head(&virtballoon.rmmod_wait); > >+ atomic_set(&virtballoon.inflight_bufs, 0); > >+ > >+ err = request_irq(pvdev->pci_dev->irq, balloon_irq, IRQF_SHARED, > >+ pvdev->vdev.dev.bus_id, &virtballoon); > >+ if (err) > >+ goto out_free_vq; > > > > Why is it taking over the irq? This is very, very wrong. A virtio > device cannot be dependent on being used on top of the virtio-pci backend. A notification is necessary whenever the host changes the target value in the config space. So right now this notification is sharing the same IRQ as the main PCI device. Do you have any suggestion on how to retrieve the IRQ of the virtio device, or some other notification mechanism? |
From: Avi K. <av...@qu...> - 2008-01-08 16:18:11
|
Marcelo Tosatti wrote: >> Why is it taking over the irq? This is very, very wrong. A virtio >> device cannot be dependent on being used on top of the virtio-pci backend. >> > > A notification is necessary whenever the host changes the target value > in the config space. So right now this notification is sharing the > same IRQ as the main PCI device. > > Do you have any suggestion on how to retrieve the IRQ of the virtio > device, or some other notification mechanism? > One way would be to send a "look at config" queue message, but it seems that it is a fairly generic operation. Maybe virtio can add support for it, with a new callback. -- error compiling committee.c: too many arguments to function |
From: Rusty R. <ru...@ru...> - 2008-01-09 03:20:23
|
On Wednesday 09 January 2008 03:18:13 Avi Kivity wrote: > Marcelo Tosatti wrote: > > Do you have any suggestion on how to retrieve the IRQ of the virtio > > device, or some other notification mechanism? Unfortunately, irqs are logically assigned to virtio queues, not devices. Yet configuration info is per-device. > One way would be to send a "look at config" queue message, but it seems > that it is a fairly generic operation. Maybe virtio can add support for > it, with a new callback. This is the first time we've had to do this, but I don't think it's the last, so we should consider it carefully. Chatted with Anthony, and this is what we came up with: === virtio: configuration change callback Various drivers want to know when their configuration information changes: the balloon driver is the immediate user, but the network driver may one day have a "carrier" status as well. This introduces that callback, and adds it to the virtio PCI implementation. Signed-off-by: Rusty Russell <ru...@ru...> --- drivers/virtio/virtio_pci.c | 10 ++++++++++ include/linux/virtio.h | 3 +++ include/linux/virtio_pci.h | 3 +++ 3 files changed, 16 insertions(+) diff -r 7494c7702462 drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c Wed Jan 09 11:00:21 2008 +1100 +++ b/drivers/virtio/virtio_pci.c Wed Jan 09 11:18:19 2008 +1100 @@ -184,6 +184,16 @@ static irqreturn_t vp_interrupt(int irq, /* It's definitely not us if the ISR was not high */ if (!isr) return IRQ_NONE; + + /* Configuration change? Tell driver if it wants to know. */ + if (isr & VIRTIO_PCI_ISR_CONFIG) { + struct virtio_driver *drv; + drv = container_of(vp_dev->vdev.dev.driver, + struct virtio_driver, driver); + + if (drv->config_changed) + drv->config_changed(&vp_dev->vdev); + } spin_lock(&vp_dev->lock); list_for_each_entry(info, &vp_dev->virtqueues, node) { diff -r 7494c7702462 include/linux/virtio.h --- a/include/linux/virtio.h Wed Jan 09 11:00:21 2008 +1100 +++ b/include/linux/virtio.h Wed Jan 09 11:18:19 2008 +1100 @@ -98,12 +98,15 @@ void unregister_virtio_device(struct vir * @probe: the function to call when a device is found. Returns a token for * remove, or PTR_ERR(). * @remove: the function when a device is removed. + * @config_changed: optional function to call when the device configuration + * changes; may be called in interrupt context. */ struct virtio_driver { struct device_driver driver; const struct virtio_device_id *id_table; int (*probe)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); + void (*config_changed)(struct virtio_device *dev); }; int register_virtio_driver(struct virtio_driver *drv); diff -r 7494c7702462 include/linux/virtio_pci.h --- a/include/linux/virtio_pci.h Wed Jan 09 11:00:21 2008 +1100 +++ b/include/linux/virtio_pci.h Wed Jan 09 11:18:19 2008 +1100 @@ -45,6 +45,9 @@ * a read-and-acknowledge. */ #define VIRTIO_PCI_ISR 19 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 + /* The remaining space is defined by each driver as the per-driver * configuration space */ #define VIRTIO_PCI_CONFIG 20 |
From: Anthony L. <an...@co...> - 2008-01-08 16:14:13
|
Marcelo Tosatti wrote: > On Tue, Jan 08, 2008 at 09:42:13AM -0600, Anthony Liguori wrote: > >> Marcelo Tosatti wrote: >> >>> Following patch introduces a KVM guest balloon driver. Communication >>> to/from the host is performed via virtio. >>> >>> > > I'll address the other comments. > > >>> + virtballoon.dev = vdev; >>> + init_waitqueue_head(&virtballoon.balloon_wait); >>> + init_waitqueue_head(&virtballoon.rmmod_wait); >>> + atomic_set(&virtballoon.inflight_bufs, 0); >>> + >>> + err = request_irq(pvdev->pci_dev->irq, balloon_irq, IRQF_SHARED, >>> + pvdev->vdev.dev.bus_id, &virtballoon); >>> + if (err) >>> + goto out_free_vq; >>> >>> >> Why is it taking over the irq? This is very, very wrong. A virtio >> device cannot be dependent on being used on top of the virtio-pci backend. >> > > A notification is necessary whenever the host changes the target value > in the config space. So right now this notification is sharing the > same IRQ as the main PCI device. > A message should be sent over a virtqueue to indicate that the other need needs to reread a config value. You really need two virtqueues. The virtqueue you have now for the guest to send messages to the host, and another virtqueue that the guest adds buffers too that the host can use to send messages to the guest. A good example of using two queues for bidirectional communication would be the virtio_console driver. BTW, I don't think the target should be a config value. You don't gain anything from it being in the config space and it's somewhat unnatural for a virtio device. It makes more sense as a message to the guest. The PCI config space is not automatically saved/restored during migration. > Do you have any suggestion on how to retrieve the IRQ of the virtio > device, or some other notification mechanism? > A virtio device does not necessarily have an interrupt or it may have multiple interrupts. Regards, Anthony Liguori |
From: Avi K. <av...@qu...> - 2008-01-08 16:36:27
|
Anthony Liguori wrote: > BTW, I don't think the target should be a config value. You don't gain > anything from it being in the config space and it's somewhat unnatural > for a virtio device. It makes more sense as a message to the guest. > I disagree. The target is state, not an individual item that needs to be acted on. Having it as a single variable means that multiple changes are collapsed automatically, and that the the setting survives module reload. It's like a volume control, it doesn't send messages when you turn it, it just sets a value. (maybe a thermostat knob is a better analogy, with the driver being the circuitry around the knob that tries to control the temperature to match the setting) I believe state-like controls will be useful for other settings, like ethernet link state. -- error compiling committee.c: too many arguments to function |
From: Anthony L. <an...@co...> - 2008-01-08 16:43:06
|
Avi Kivity wrote: > Anthony Liguori wrote: >> BTW, I don't think the target should be a config value. You don't >> gain anything from it being in the config space and it's somewhat >> unnatural for a virtio device. It makes more sense as a message to >> the guest. >> > > I disagree. The target is state, not an individual item that needs to > be acted on. Having it as a single variable means that multiple > changes are collapsed automatically, and that the the setting survives > module reload. > > It's like a volume control, it doesn't send messages when you turn it, > it just sets a value. It's a value that is meant to be acted upon though, not something to be polled. You want to tell the driver that it should now try to balloon to a certain value. Or maybe not. Maybe the driver should read the target from the config space but then have a "kick" message that tells it, hey, something's probably changed. > (maybe a thermostat knob is a better analogy, with the driver being > the circuitry around the knob that tries to control the temperature to > match the setting) FWIW, I'm pretty sure that most modern volume controls are actually button presses for either direction instead of a variable resistor but point taken ;-) > I believe state-like controls will be useful for other settings, like > ethernet link state. It's difficult with the current config interface since it treats everything as a discrete blob that is updated all at once. I'm not really sure how to proceed. Rusty: do you have an opinion here? Regards, Anthony Liguori |