You can subscribe to this list here.
2006 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
(33) |
Nov
(325) |
Dec
(320) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2007 |
Jan
(484) |
Feb
(438) |
Mar
(407) |
Apr
(713) |
May
(831) |
Jun
(806) |
Jul
(1023) |
Aug
(1184) |
Sep
(1118) |
Oct
(1461) |
Nov
(1224) |
Dec
(1042) |
2008 |
Jan
(1449) |
Feb
(1110) |
Mar
(1428) |
Apr
(1643) |
May
(682) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Anthony L. <ali...@us...> - 2008-04-15 22:11:36
|
This patch implements the virtio network driver backend. In KVM, this driver can achieve 1gbit tx/rx performance. More patches are required to improve the network IO infrastructure to achieve better performance in QEMU. Since v1, I've updated the patch based on the IOVector refactoring. Signed-off-by: Anthony Liguori <ali...@us...> diff --git a/Makefile.target b/Makefile.target index 3e9f7b1..ea632fa 100644 --- a/Makefile.target +++ b/Makefile.target @@ -535,7 +535,7 @@ OBJS += rtl8139.o OBJS += e1000.o # virtio devices -OBJS += virtio.o +OBJS += virtio.o virtio-net.o ifeq ($(TARGET_BASE_ARCH), i386) # Hardware support diff --git a/hw/pci.c b/hw/pci.c index 3282478..94452d3 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -652,9 +652,11 @@ void pci_nic_init(PCIBus *bus, NICInfo *nd, int devfn) pci_e1000_init(bus, nd, devfn); } else if (strcmp(nd->model, "pcnet") == 0) { pci_pcnet_init(bus, nd, devfn); + } else if (strcmp(nd->model, "virtio") == 0) { + virtio_net_init(bus, nd, devfn); } else if (strcmp(nd->model, "?") == 0) { fprintf(stderr, "qemu: Supported PCI NICs: i82551 i82557b i82559er" - " ne2k_pci pcnet rtl8139 e1000\n"); + " ne2k_pci pcnet rtl8139 e1000 virtio\n"); exit (1); } else { fprintf(stderr, "qemu: Unsupported NIC: %s\n", nd->model); diff --git a/hw/pci.h b/hw/pci.h index c885cc5..e9e5ed3 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -145,4 +145,7 @@ PCIBus *pci_prep_init(qemu_irq *pic); PCIBus *pci_apb_init(target_phys_addr_t special_base, target_phys_addr_t mem_base, qemu_irq *pic); +/* virtio.c */ +PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn); + #endif diff --git a/hw/virtio-net.c b/hw/virtio-net.c new file mode 100644 index 0000000..e21aa1e --- /dev/null +++ b/hw/virtio-net.c @@ -0,0 +1,162 @@ +/* + * Virtio Network Device + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <ali...@us...> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "virtio.h" +#include "net.h" +#include "pc.h" +#include "qemu-timer.h" +#include "virtio-net.h" + +#define TX_TIMER_INTERVAL (1000 / 500) + +typedef struct VirtIONet +{ + VirtIODevice vdev; + uint8_t mac[6]; + VirtQueue *rx_vq; + VirtQueue *tx_vq; + VLANClientState *vc; + QEMUTimer *tx_timer; + int tx_timer_active; +} VirtIONet; + +static VirtIONet *to_virtio_net(VirtIODevice *vdev) +{ + return (VirtIONet *)vdev; +} + +static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) +{ + VirtIONet *n = to_virtio_net(vdev); + struct virtio_net_config netcfg; + + memcpy(netcfg.mac, n->mac, 6); + memcpy(config, &netcfg, sizeof(netcfg)); +} + +static uint32_t virtio_net_get_features(VirtIODevice *vdev) +{ + return (1 << VIRTIO_NET_F_MAC); +} + +/* RX */ + +static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) +{ +} + +static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) +{ + VirtIONet *n = opaque; + VirtQueueElement *elem; + struct virtio_net_hdr hdr; + + /* FIXME: the drivers really need to set their status better */ + if (!virtio_ring_inited(n->rx_vq)) + return; + + if ((elem = virtqueue_pop(n->rx_vq)) == NULL) + /* wait until the guest adds some rx bufs */ + return; + + memset(&hdr, 0, sizeof(hdr)); + hdr.flags = 0; + hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; + + memcpy_to_iovector(elem->in, 0, sizeof(hdr), &hdr); + memcpy_to_iovector(elem->in, sizeof(hdr), size, buf); + + /* signal other side */ + virtqueue_push(n->rx_vq, elem, sizeof(hdr) + size); + virtio_notify(&n->vdev, n->rx_vq); +} + +/* TX */ +static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq) +{ + VirtQueueElement *elem; + + if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) + return; + + while ((elem = virtqueue_pop(vq))) { + IOVector *sg; + size_t out_size; + + /* ignore the header for now */ + out_size = iovector_size(elem->out); + + sg = iovector_trim(elem->out, sizeof(struct virtio_net_hdr), + out_size - sizeof(struct virtio_net_hdr)); + + qemu_sendv_packet(n->vc, sg); + + iovector_free(sg); + + virtqueue_push(vq, elem, out_size); + virtio_notify(&n->vdev, vq); + } +} + +static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq) +{ + VirtIONet *n = to_virtio_net(vdev); + + if (n->tx_timer_active && + virtio_ring_avail_size(vq) == 64) { + virtio_ring_set_used_no_notify(vq, 0); + qemu_del_timer(n->tx_timer); + n->tx_timer_active = 0; + virtio_net_flush_tx(n, vq); + } else { + qemu_mod_timer(n->tx_timer, + qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL); + n->tx_timer_active = 1; + virtio_ring_set_used_no_notify(vq, 1); + } +} + +static void virtio_net_tx_timer(void *opaque) +{ + VirtIONet *n = opaque; + + n->tx_timer_active = 0; + + /* Just in case the driver is not ready on more */ + if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) + return; + + virtio_ring_set_used_no_notify(n->tx_vq, 0); + virtio_net_flush_tx(n, n->tx_vq); +} + +PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) +{ + VirtIONet *n; + + n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000, + 0, VIRTIO_ID_NET, + 0x02, 0x00, 0x00, + 6, sizeof(VirtIONet)); + + n->vdev.get_config = virtio_net_get_config; + n->vdev.get_features = virtio_net_get_features; + n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx); + n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx); + memcpy(n->mac, nd->macaddr, 6); + n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive, NULL, n); + n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); + n->tx_timer_active = 0; + + return (PCIDevice *)n; +} diff --git a/hw/virtio-net.h b/hw/virtio-net.h new file mode 100644 index 0000000..2959198 --- /dev/null +++ b/hw/virtio-net.h @@ -0,0 +1,54 @@ +/* + * Virtio-net Support + * + * Copyright IBM, Corp. 2007-2008 + * + * Authors: + * Anthony Liguori <ali...@us...> + * Rusty Russell <ru...@ru...> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_VIRTIO_NET_H +#define _QEMU_VIRTIO_NET_H + +/* from Linux's virtio_net.h */ + +/* The ID for virtio_net */ +#define VIRTIO_ID_NET 1 + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_NO_CSUM 0 +#define VIRTIO_NET_F_MAC 5 +#define VIRTIO_NET_F_GS0 6 + +/* The config defining mac address (6 bytes) */ +struct virtio_net_config +{ + uint8_t mac[6]; +} __attribute__((packed)); + +/* This is the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. */ +struct virtio_net_hdr +{ +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame +#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) +/* FIXME: Do we need this? If they said they can handle ECN, do they care? */ +#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN +#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) +#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP +#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set + uint8_t gso_type; + uint16_t hdr_len; + uint16_t gso_size; + uint16_t csum_start; + uint16_t csum_offset; +}; + +#endif |
From: Anthony L. <ali...@us...> - 2008-04-15 22:11:36
|
This patch introduces virtio support over PCI. virtio is a generic virtual IO framework for Linux first introduced in 2.6.23. Since 2.6.25, virtio has supported a PCI transport which this patch implements. Since the last time these patches were posted to qemu-devel, I've reworked it to use the proper access functions to manipulate guest memory. Signed-off-by: Anthony Liguori <ali...@us...> diff --git a/Makefile.target b/Makefile.target index 8470164..3e9f7b1 100644 --- a/Makefile.target +++ b/Makefile.target @@ -534,6 +534,9 @@ OBJS += pcnet.o OBJS += rtl8139.o OBJS += e1000.o +# virtio devices +OBJS += virtio.o + ifeq ($(TARGET_BASE_ARCH), i386) # Hardware support OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o diff --git a/cutils.c b/cutils.c index 9ef2fa6..814b3c4 100644 --- a/cutils.c +++ b/cutils.c @@ -95,3 +95,14 @@ time_t mktimegm(struct tm *tm) t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec; return t; } + +int fls(int i) +{ + int bit; + + for (bit=31; bit >= 0; bit--) + if (i & (1 << bit)) + return bit+1; + + return 0; +} diff --git a/hw/virtio-pci.h b/hw/virtio-pci.h new file mode 100644 index 0000000..9262e49 --- /dev/null +++ b/hw/virtio-pci.h @@ -0,0 +1,65 @@ +/* + * Virtio Support + * + * Copyright IBM, Corp. 2007-2008 + * + * Authors: + * Anthony Liguori <ali...@us...> + * Rusty Russell <ru...@ru...> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _VIRTIO_PCI_H +#define _VIRTIO_PCI_H + +/* from Linux's linux/virtio_ring.h */ + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 + +/* This means don't notify other side when buffer added. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* This means don't interrupt guest when buffer consumed. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +#define VIRTIO_PCI_QUEUE_MAX 16 + +/* from Linux's linux/virtio_pci.h */ + +/* A 32-bit r/o bitmask of the features supported by the host */ +#define VIRTIO_PCI_HOST_FEATURES 0 + +/* A 32-bit r/w bitmask of features activated by the guest */ +#define VIRTIO_PCI_GUEST_FEATURES 4 + +/* A 32-bit r/w PFN for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_PFN 8 + +/* A 16-bit r/o queue size for the currently selected queue */ +#define VIRTIO_PCI_QUEUE_NUM 12 + +/* A 16-bit r/w queue selector */ +#define VIRTIO_PCI_QUEUE_SEL 14 + +/* A 16-bit r/w queue notifier */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 + +/* An 8-bit device status register. */ +#define VIRTIO_PCI_STATUS 18 + +/* An 8-bit r/o interrupt status register. Reading the value will return the + * current contents of the ISR and will also clear it. This is effectively + * a read-and-acknowledge. */ +#define VIRTIO_PCI_ISR 19 + +#define VIRTIO_PCI_CONFIG 20 + +/* Virtio ABI version, if we increment this, we break the guest driver. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +#endif diff --git a/hw/virtio.c b/hw/virtio.c new file mode 100644 index 0000000..f364ef3 --- /dev/null +++ b/hw/virtio.c @@ -0,0 +1,592 @@ +/* + * Virtio Support + * + * Copyright IBM, Corp. 2007-2008 + * + * Authors: + * Anthony Liguori <ali...@us...> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include <inttypes.h> +#include <err.h> + +#include "virtio.h" +#include "sysemu.h" +#include "virtio-pci.h" + +typedef struct VRingDesc +{ + /* Address (guest-physical). */ + uint64_t addr; + /* Length. */ + uint32_t len; + /* The flags as indicated above. */ + uint16_t flags; + /* We chain unused descriptors via this, too */ + uint16_t next; +} VRingDesc; + +typedef struct VRingAvail +{ + uint16_t flags; + uint16_t idx; + uint16_t ring[]; +} VRingAvail; + +typedef struct VRingUsedElem +{ + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was used (written to) */ + uint32_t len; +} VRingUsedElem; + +typedef struct VRingUsed +{ + uint16_t flags; + uint16_t idx; + VRingUsedElem ring[]; +} VRingUsed; + +typedef struct VRing +{ + unsigned int num; + target_phys_addr_t desc; + target_phys_addr_t avail; + target_phys_addr_t used; +} VRing; + +struct VirtQueue +{ + VRing vring; + uint32_t pfn; + uint16_t last_avail_idx; + void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq); + int index; + VirtIODevice *vdev; +}; + +/* QEMU doesn't strictly need write barriers since everything runs in + * lock-step. We'll leave the calls to wmb() in though to make it obvious for + * KVM or if kqemu gets SMP support. + */ + +#define wmb() do { } while (0) + +/* FIXME put this somewhere generic */ +#define offsetof(type, member) ((unsigned long)(&((type *)0)->member)) + +/* virt queue functions */ + +static void virtqueue_init(VirtQueue *vq, target_phys_addr_t p) +{ + vq->vring.desc = p; + vq->vring.avail = p + vq->vring.num * 16; + vq->vring.used = vq->vring.avail + 2 * (2 + vq->vring.num); + vq->vring.used = TARGET_PAGE_ALIGN(vq->vring.used); +} + +static uint64_t vring_desc_addr(VirtQueue *vq, unsigned int i) +{ + return ldq_phys(vq->vring.desc + i * sizeof(VRingDesc) + + offsetof(VRingDesc, addr)); +} + +static uint32_t vring_desc_len(VirtQueue *vq, unsigned int i) +{ + return ldl_phys(vq->vring.desc + i * sizeof(VRingDesc) + + offsetof(VRingDesc, len)); +} + +static uint16_t vring_desc_flags(VirtQueue *vq, unsigned int i) +{ + return lduw_phys(vq->vring.desc + i * sizeof(VRingDesc) + + offsetof(VRingDesc, flags)); +} + +static uint16_t vring_desc_next(VirtQueue *vq, unsigned int i) +{ + return lduw_phys(vq->vring.desc + i * sizeof(VRingDesc) + + offsetof(VRingDesc, next)); +} + +static uint16_t vring_avail_flags(VirtQueue *vq) +{ + return lduw_phys(vq->vring.avail + offsetof(VRingAvail, flags)); +} + +static uint16_t vring_avail_idx(VirtQueue *vq) +{ + return lduw_phys(vq->vring.avail + offsetof(VRingAvail, idx)); +} + +static uint16_t vring_avail_ring(VirtQueue *vq, unsigned int i) +{ + return lduw_phys(vq->vring.avail + offsetof(VRingAvail, ring[i])); +} + +static void vring_used_set_flag(VirtQueue *vq, uint16_t flag) +{ + stw_phys(vq->vring.used + offsetof(VRingUsed, flags), + lduw_phys(vq->vring.used + offsetof(VRingUsed, flags)) | flag); +} + +static void vring_used_unset_flag(VirtQueue *vq, uint16_t flag) +{ + stw_phys(vq->vring.used + offsetof(VRingUsed, flags), + lduw_phys(vq->vring.used + offsetof(VRingUsed, flags)) & ~flag); +} + +static uint16_t vring_used_get_idx(VirtQueue *vq) +{ + return lduw_phys(vq->vring.used + offsetof(VRingUsed, idx)); +} + +static void vring_used_set_idx(VirtQueue *vq, uint16_t value) +{ + stw_phys(vq->vring.used + offsetof(VRingUsed, idx), value); +} + +static void vring_used_set_ring(VirtQueue *vq, unsigned int i, + uint32_t id, uint32_t len) +{ + stl_phys(vq->vring.used + offsetof(VRingUsed, ring[i].id), id); + stl_phys(vq->vring.used + offsetof(VRingUsed, ring[i].len), len); +} + +static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i) +{ + unsigned int next; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT)) + return vq->vring.num; + + /* Check they're not leading us off end of descriptors. */ + next = vring_desc_next(vq, i); + /* Make sure compiler knows to grab that: we don't want it changing! */ + wmb(); + + if (next >= vq->vring.num) + errx(1, "Desc next is %u", next); + + return next; +} + +void virtqueue_push(VirtQueue *vq, VirtQueueElement *elem, unsigned int len) +{ + uint16_t idx; + + idx = vring_used_get_idx(vq); + vring_used_set_ring(vq, idx % vq->vring.num, elem->index, len); + wmb(); + vring_used_set_idx(vq, idx + 1); + + iovector_free(elem->in); + iovector_free(elem->out); + qemu_free(elem); +} + +VirtQueueElement *virtqueue_pop(VirtQueue *vq) +{ + unsigned int i, head; + unsigned int position; + VirtQueueElement *elem; + + /* Check it isn't doing very strange things with descriptor numbers. */ + if ((uint16_t)(vring_avail_idx(vq) - vq->last_avail_idx) > vq->vring.num) + errx(1, "Guest moved used index from %u to %u", + vq->last_avail_idx, vring_avail_idx(vq)); + + /* If there's nothing new since last we looked, return invalid. */ + if (vring_avail_idx(vq) == vq->last_avail_idx) + return NULL; + + /* Grab the next descriptor number they're advertising, and increment + * the index we've seen. */ + head = vring_avail_ring(vq, vq->last_avail_idx++ % vq->vring.num); + + /* If their number is silly, that's a fatal mistake. */ + if (head >= vq->vring.num) + errx(1, "Guest says index %u is available", head); + + /* When we start there are none of either input nor output. */ + position = 0; + + elem = qemu_mallocz(sizeof(VirtQueueElement)); + + elem->in = iovector_new(vq->vring.num, + (DMAReadHandler *)pci_device_dma_read, + (DMAWriteHandler *)pci_device_dma_write, + &vq->vdev->pci_dev); + elem->out = iovector_new(vq->vring.num, + (DMAReadHandler *)pci_device_dma_read, + (DMAWriteHandler *)pci_device_dma_write, + &vq->vdev->pci_dev); + + elem->in->num = elem->out->num = 0; + + i = head; + do { + IOVectorElement *sge; + + if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) + sge = &elem->in->sg[elem->in->num++]; + else + sge = &elem->out->sg[elem->out->num++]; + + /* Grab the first descriptor, and check it's OK. */ + sge->len = vring_desc_len(vq, i); + sge->base = vring_desc_addr(vq, i); + + /* If we've got too many, that implies a descriptor loop. */ + if ((elem->in->num + elem->out->num) > vq->vring.num) + errx(1, "Looped descriptor"); + } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num); + + elem->index = head; + + return elem; +} + +/* virtio device */ + +static VirtIODevice *to_virtio_device(PCIDevice *pci_dev) +{ + return (VirtIODevice *)pci_dev; +} + +static void virtio_update_irq(VirtIODevice *vdev) +{ + qemu_set_irq(vdev->pci_dev.irq[0], vdev->isr & 1); +} + +void virtio_reset(void *opaque) +{ + VirtIODevice *vdev = opaque; + int i; + + vdev->features = 0; + vdev->queue_sel = 0; + vdev->status = 0; + vdev->isr = 0; + + for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { + vdev->vq[i].vring.desc = 0; + vdev->vq[i].vring.avail = 0; + vdev->vq[i].vring.used = 0; + vdev->vq[i].last_avail_idx = 0; + vdev->vq[i].pfn = 0; + } +} + +static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + VirtIODevice *vdev = to_virtio_device(opaque); + ram_addr_t pa; + + addr -= vdev->addr; + + switch (addr) { + case VIRTIO_PCI_GUEST_FEATURES: + if (vdev->set_features) + vdev->set_features(vdev, val); + vdev->features = val; + break; + case VIRTIO_PCI_QUEUE_PFN: + pa = (ram_addr_t)val << TARGET_PAGE_BITS; + vdev->vq[vdev->queue_sel].pfn = val; + if (pa == 0) + virtio_reset(vdev); + else + virtqueue_init(&vdev->vq[vdev->queue_sel], pa); + break; + case VIRTIO_PCI_QUEUE_SEL: + if (val < VIRTIO_PCI_QUEUE_MAX) + vdev->queue_sel = val; + break; + case VIRTIO_PCI_QUEUE_NOTIFY: + if (val < VIRTIO_PCI_QUEUE_MAX && vdev->vq[val].vring.desc) + vdev->vq[val].handle_output(vdev, &vdev->vq[val]); + break; + case VIRTIO_PCI_STATUS: + vdev->status = val & 0xFF; + if (vdev->status == 0) + virtio_reset(vdev); + break; + } +} + +static uint32_t virtio_ioport_read(void *opaque, uint32_t addr) +{ + VirtIODevice *vdev = to_virtio_device(opaque); + uint32_t ret = 0xFFFFFFFF; + + addr -= vdev->addr; + + switch (addr) { + case VIRTIO_PCI_HOST_FEATURES: + ret = vdev->get_features(vdev); + break; + case VIRTIO_PCI_GUEST_FEATURES: + ret = vdev->features; + break; + case VIRTIO_PCI_QUEUE_PFN: + ret = vdev->vq[vdev->queue_sel].pfn; + break; + case VIRTIO_PCI_QUEUE_NUM: + ret = vdev->vq[vdev->queue_sel].vring.num; + break; + case VIRTIO_PCI_QUEUE_SEL: + ret = vdev->queue_sel; + break; + case VIRTIO_PCI_STATUS: + ret = vdev->status; + break; + case VIRTIO_PCI_ISR: + /* reading from the ISR also clears it. */ + ret = vdev->isr; + vdev->isr = 0; + virtio_update_irq(vdev); + break; + default: + break; + } + + return ret; +} + +static uint32_t virtio_config_readb(void *opaque, uint32_t addr) +{ + VirtIODevice *vdev = opaque; + uint8_t val; + + addr -= vdev->addr + VIRTIO_PCI_CONFIG; + if (addr > (vdev->config_len - sizeof(val))) + return (uint32_t)-1; + + memcpy(&val, vdev->config + addr, sizeof(val)); + return val; +} + +static uint32_t virtio_config_readw(void *opaque, uint32_t addr) +{ + VirtIODevice *vdev = opaque; + uint16_t val; + + addr -= vdev->addr + VIRTIO_PCI_CONFIG; + if (addr > (vdev->config_len - sizeof(val))) + return (uint32_t)-1; + + memcpy(&val, vdev->config + addr, sizeof(val)); + return val; +} + +static uint32_t virtio_config_readl(void *opaque, uint32_t addr) +{ + VirtIODevice *vdev = opaque; + uint32_t val; + + addr -= vdev->addr + VIRTIO_PCI_CONFIG; + if (addr > (vdev->config_len - sizeof(val))) + return (uint32_t)-1; + + memcpy(&val, vdev->config + addr, sizeof(val)); + return val; +} + +static void virtio_config_writeb(void *opaque, uint32_t addr, uint32_t data) +{ + VirtIODevice *vdev = opaque; + uint8_t val = data; + + addr -= vdev->addr + VIRTIO_PCI_CONFIG; + if (addr > (vdev->config_len - sizeof(val))) + return; + + memcpy(vdev->config + addr, &val, sizeof(val)); + + if (vdev->set_config) + vdev->set_config(vdev, vdev->config); +} + +static void virtio_config_writew(void *opaque, uint32_t addr, uint32_t data) +{ + VirtIODevice *vdev = opaque; + uint16_t val = data; + + addr -= vdev->addr + VIRTIO_PCI_CONFIG; + if (addr > (vdev->config_len - sizeof(val))) + return; + + memcpy(vdev->config + addr, &val, sizeof(val)); + + if (vdev->set_config) + vdev->set_config(vdev, vdev->config); +} + +static void virtio_config_writel(void *opaque, uint32_t addr, uint32_t data) +{ + VirtIODevice *vdev = opaque; + uint32_t val = data; + + addr -= vdev->addr + VIRTIO_PCI_CONFIG; + if (addr > (vdev->config_len - sizeof(val))) + return; + + memcpy(vdev->config + addr, &val, sizeof(val)); + + if (vdev->set_config) + vdev->set_config(vdev, vdev->config); +} + +static void virtio_map(PCIDevice *pci_dev, int region_num, + uint32_t addr, uint32_t size, int type) +{ + VirtIODevice *vdev = to_virtio_device(pci_dev); + int i; + + vdev->addr = addr; + for (i = 0; i < 3; i++) { + register_ioport_write(addr, 20, 1 << i, virtio_ioport_write, vdev); + register_ioport_read(addr, 20, 1 << i, virtio_ioport_read, vdev); + } + + if (vdev->config_len) { + register_ioport_write(addr + 20, vdev->config_len, 1, + virtio_config_writeb, vdev); + register_ioport_write(addr + 20, vdev->config_len, 2, + virtio_config_writew, vdev); + register_ioport_write(addr + 20, vdev->config_len, 4, + virtio_config_writel, vdev); + register_ioport_read(addr + 20, vdev->config_len, 1, + virtio_config_readb, vdev); + register_ioport_read(addr + 20, vdev->config_len, 2, + virtio_config_readw, vdev); + register_ioport_read(addr + 20, vdev->config_len, 4, + virtio_config_readl, vdev); + + vdev->get_config(vdev, vdev->config); + } +} + +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + void (*handle_output)(VirtIODevice *, VirtQueue *)) +{ + int i; + + for (i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) { + if (vdev->vq[i].vring.num == 0) + break; + } + + if (i == VIRTIO_PCI_QUEUE_MAX) + abort(); + + vdev->vq[i].vring.num = queue_size; + vdev->vq[i].handle_output = handle_output; + vdev->vq[i].index = i; + vdev->vq[i].vdev = vdev; + + return &vdev->vq[i]; +} + +void virtio_notify_config(VirtIODevice *vdev) +{ + /* make sure we have the latest config */ + vdev->get_config(vdev, vdev->config); + vdev->isr = 3; + virtio_update_irq(vdev); +} + +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + /* Always notify when queue is empty */ + if (vring_avail_idx(vq) != vq->last_avail_idx && + (vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT)) + return; + + vdev->isr = 1; + virtio_update_irq(vdev); +} + +void virtio_ring_set_used_no_notify(VirtQueue *vq, int enable) +{ + if (enable) + vring_used_set_flag(vq, VRING_USED_F_NO_NOTIFY); + else + vring_used_unset_flag(vq, VRING_USED_F_NO_NOTIFY); +} + +size_t virtio_ring_avail_size(VirtQueue *vq) +{ + return vring_avail_idx(vq) - vq->last_avail_idx; +} + +int virtio_ring_inited(VirtQueue *vq) +{ + return (vq->vring.avail != 0); +} + +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, + uint16_t vendor, uint16_t device, + uint16_t subvendor, uint16_t subdevice, + uint8_t class_code, uint8_t subclass_code, + uint8_t pif, size_t config_size, + size_t struct_size) +{ + VirtIODevice *vdev; + PCIDevice *pci_dev; + uint8_t *config; + uint32_t size; + + pci_dev = pci_register_device(bus, name, struct_size, + -1, NULL, NULL); + vdev = to_virtio_device(pci_dev); + + vdev->status = 0; + vdev->isr = 0; + vdev->queue_sel = 0; + vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX); + + config = pci_dev->config; + config[0x00] = vendor & 0xFF; + config[0x01] = (vendor >> 8) & 0xFF; + config[0x02] = device & 0xFF; + config[0x03] = (device >> 8) & 0xFF; + + config[0x08] = VIRTIO_PCI_ABI_VERSION; + + config[0x09] = pif; + config[0x0a] = subclass_code; + config[0x0b] = class_code; + config[0x0e] = 0x00; + + config[0x2c] = subvendor & 0xFF; + config[0x2d] = (subvendor >> 8) & 0xFF; + config[0x2e] = subdevice & 0xFF; + config[0x2f] = (subdevice >> 8) & 0xFF; + + config[0x3d] = 1; + + vdev->name = name; + vdev->config_len = config_size; + if (vdev->config_len) + vdev->config = qemu_mallocz(config_size); + else + vdev->config = NULL; + + size = 20 + config_size; + if (size & (size-1)) + size = 1 << fls(size); + + pci_register_io_region(pci_dev, 0, size, PCI_ADDRESS_SPACE_IO, + virtio_map); + qemu_register_reset(virtio_reset, vdev); + + return vdev; +} diff --git a/hw/virtio.h b/hw/virtio.h new file mode 100644 index 0000000..4b991d0 --- /dev/null +++ b/hw/virtio.h @@ -0,0 +1,88 @@ +/* + * Virtio Support + * + * Copyright IBM, Corp. 2007-2008 + * + * Authors: + * Anthony Liguori <ali...@us...> + * Rusty Russell <ru...@ru...> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_VIRTIO_H +#define _QEMU_VIRTIO_H + +#include "hw.h" +#include "pci.h" +#include "iovector.h" + +/* from Linux's linux/virtio_config.h */ + +/* Status byte for guest to report progress, and synchronize features. */ +/* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_S_DRIVER 2 +/* Driver has used its parts of the config, and is happy */ +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +/* We've given up on this device. */ +#define VIRTIO_CONFIG_S_FAILED 0x80 + +typedef struct VirtQueue VirtQueue; +typedef struct VirtIODevice VirtIODevice; + +typedef struct VirtQueueElement +{ + unsigned int index; + IOVector *in, *out; +} VirtQueueElement; + +struct VirtIODevice +{ + PCIDevice pci_dev; + const char *name; + uint32_t addr; + uint16_t vendor; + uint16_t device; + uint8_t status; + uint8_t isr; + uint16_t queue_sel; + uint32_t features; + size_t config_len; + void *config; + uint32_t (*get_features)(VirtIODevice *vdev); + void (*set_features)(VirtIODevice *vdev, uint32_t val); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); + VirtQueue *vq; +}; + +VirtIODevice *virtio_init_pci(PCIBus *bus, const char *name, + uint16_t vendor, uint16_t device, + uint16_t subvendor, uint16_t subdevice, + uint8_t class_code, uint8_t subclass_code, + uint8_t pif, size_t config_size, + size_t struct_size); + +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + void (*handle_output)(VirtIODevice *, + VirtQueue *)); + +void virtqueue_push(VirtQueue *vq, VirtQueueElement *elem, unsigned int len); + +VirtQueueElement *virtqueue_pop(VirtQueue *vq); + +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); + +void virtio_ring_set_used_no_notify(VirtQueue *vq, int enable); + +size_t virtio_ring_avail_size(VirtQueue *vq); + +int virtio_ring_inited(VirtQueue *vq); + +void virtio_notify_config(VirtIODevice *vdev); + +#endif diff --git a/qemu-common.h b/qemu-common.h index 746dcc5..cd387b1 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -85,6 +85,7 @@ char *pstrcat(char *buf, int buf_size, const char *s); int strstart(const char *str, const char *val, const char **ptr); int stristart(const char *str, const char *val, const char **ptr); time_t mktimegm(struct tm *tm); +int fls(int i); /* Error handling. */ |
From: Hollis B. <ho...@us...> - 2008-04-15 21:06:59
|
3 files changed, 7 insertions(+), 7 deletions(-) include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 8 ++++---- virt/kvm/kvm_trace.c | 4 ++-- # HG changeset patch # User Hollis Blanchard <ho...@us...> # Date 1208293411 18000 # Node ID 524092a595b246f17ab56199e3afebded1e987a6 # Parent 8ad2f90233993539c3c919c2c303041611ecdcb4 [KVM] Rename debugfs_dir to kvm_debugfs_dir. It's a globally exported symbol now. Signed-off-by: Hollis Blanchard <ho...@us...> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -315,7 +315,7 @@ struct dentry *dentry; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; -extern struct dentry *debugfs_dir; +extern struct dentry *kvm_debugfs_dir; #ifdef CONFIG_KVM_TRACE int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -60,7 +60,7 @@ static __read_mostly struct preempt_ops kvm_preempt_ops; -struct dentry *debugfs_dir; +struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); @@ -1392,9 +1392,9 @@ { struct kvm_stats_debugfs_item *p; - debugfs_dir = debugfs_create_dir("kvm", NULL); + kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); for (p = debugfs_entries; p->name; ++p) - p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, + p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); } @@ -1405,7 +1405,7 @@ for (p = debugfs_entries; p->name; ++p) debugfs_remove(p->dentry); - debugfs_remove(debugfs_dir); + debugfs_remove(kvm_debugfs_dir); } static int kvm_suspend(struct sys_device *dev, pm_message_t state) diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -159,12 +159,12 @@ r = -EIO; atomic_set(&kt->lost_records, 0); - kt->lost_file = debugfs_create_file("lost_records", 0444, debugfs_dir, + kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, kt, &kvm_trace_lost_ops); if (!kt->lost_file) goto err; - kt->rchan = relay_open("trace", debugfs_dir, kuts->buf_size, + kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, kuts->buf_nr, &kvm_relay_callbacks, kt); if (!kt->rchan) goto err; |
From: Hollis B. <ho...@us...> - 2008-04-15 20:58:22
|
On Wednesday 09 April 2008 05:01:36 Liu, Eric E wrote: > +/* This structure represents a single trace buffer record. */ > +struct kvm_trace_rec { > + __u32 event:28; > + __u32 extra_u32:3; > + __u32 cycle_in:1; > + __u32 pid; > + __u32 vcpu_id; > + union { > + struct { > + __u32 cycle_lo, cycle_hi; > + __u32 extra_u32[KVM_TRC_EXTRA_MAX]; > + } cycle; > + struct { > + __u32 extra_u32[KVM_TRC_EXTRA_MAX]; > + } nocycle; > + } u; > +}; Do we really need bitfields here? They are notoriously non-portable. Practically speaking, this will prevent me from copying a trace file from my big-endian target to my little-endian workstation for analysis, at least without some ugly hacking in the userland tool. -- Hollis Blanchard IBM Linux Technology Center |
From: Jerone Y. <jy...@us...> - 2008-04-15 20:22:42
|
1 file changed, 31 insertions(+), 17 deletions(-) kernel/Makefile | 48 +++++++++++++++++++++++++++++++----------------- - fix where $(ARCH_DIR) is lower case and for -DCONFIG_? in _unifdef macro it needs to be upper case. This patch add the ability for make sync in the kernel directory to work for mulitiple architectures and not just x86. Signed-off-by: Jerone Young <jy...@us...> diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -1,5 +1,10 @@ include ../config.mak include ../config.mak +ARCH_DIR=$(ARCH) +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' + ARCH_DIR=x86 +endif + KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) DESTDIR= @@ -18,11 +23,25 @@ _hack = mv $1 $1.orig && \ | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig _unifdef = mv $1 $1.orig && \ - unifdef -DCONFIG_X86 $1.orig > $1; \ + unifdef -DCONFIG_$(shell echo $(ARCH_DIR)|tr '[:lower:]' '[:upper:]') $1.orig > $1; \ [ $$? -le 1 ] && rm $1.orig hack = $(call _hack,tmp/$(strip $1)) unifdef = $(call _unifdef,tmp/$(strip $1)) + +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' +UNIFDEF_FILES = include/linux/kvm.h \ + include/linux/kvm_para.h \ + include/asm-$(ARCH_DIR)/kvm.h \ + include/asm-x86/kvm_para.h + +HACK_FILES = kvm_main.c \ + mmu.c \ + vmx.c \ + svm.c \ + x86.c \ + irq.h +endif all:: # include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat @@ -34,26 +53,21 @@ sync: sync: rm -rf tmp include rsync --exclude='*.mod.c' -R \ - "$(LINUX)"/arch/x86/kvm/./*.[ch] \ + "$(LINUX)"/arch/$(ARCH_DIR)/kvm/./*.[ch] \ "$(LINUX)"/virt/kvm/./*.[ch] \ "$(LINUX)"/./include/linux/kvm*.h \ - "$(LINUX)"/./include/asm-x86/kvm*.h \ + "$(LINUX)"/./include/asm-$(ARCH_DIR)/kvm*.h \ tmp/ - mkdir -p include/linux include/asm-x86 - ln -s asm-x86 include/asm - ln -sf asm-x86 include-compat/asm + mkdir -p include/linux include/asm-$(ARCH_DIR) + ln -s asm-$(ARCH_DIR) include/asm + ln -sf asm-$(ARCH_DIR) include-compat/asm - $(call unifdef, include/linux/kvm.h) - $(call unifdef, include/linux/kvm_para.h) - $(call unifdef, include/asm-x86/kvm.h) - $(call unifdef, include/asm-x86/kvm_para.h) - $(call hack, include/linux/kvm.h) - $(call hack, kvm_main.c) - $(call hack, mmu.c) - $(call hack, vmx.c) - $(call hack, svm.c) - $(call hack, x86.c) - $(call hack, irq.h) + for i in $(UNIFDEF_FILES); \ + do $(call unifdef, $$i); done + + for i in $(HACK_FILES); \ + do $(call hack, $$i); done + for i in $$(find tmp -type f -printf '%P '); \ do cmp -s $$i tmp/$$i || cp tmp/$$i $$i; done rm -rf tmp |
From: Jerone Y. <jy...@us...> - 2008-04-15 20:10:30
|
1 file changed, 31 insertions(+), 17 deletions(-) kernel/Makefile | 48 +++++++++++++++++++++++++++++++----------------- This patch add the ability for make sync in the kernel directory to work for mulitiple architectures and not just x86. Signed-off-by: Jerone Young <jy...@us...> diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -1,5 +1,10 @@ include ../config.mak include ../config.mak +ARCH_DIR=$(ARCH) +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' + ARCH_DIR=x86 +endif + KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) DESTDIR= @@ -18,11 +23,25 @@ _hack = mv $1 $1.orig && \ | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig _unifdef = mv $1 $1.orig && \ - unifdef -DCONFIG_X86 $1.orig > $1; \ + unifdef -DCONFIG_$(ARCH_DIR) $1.orig > $1; \ [ $$? -le 1 ] && rm $1.orig hack = $(call _hack,tmp/$(strip $1)) unifdef = $(call _unifdef,tmp/$(strip $1)) + +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' +UNIFDEF_FILES = include/linux/kvm.h \ + include/linux/kvm_para.h \ + include/asm-$(ARCH_DIR)/kvm.h \ + include/asm-x86/kvm_para.h + +HACK_FILES = kvm_main.c \ + mmu.c \ + vmx.c \ + svm.c \ + x86.c \ + irq.h +endif all:: # include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat @@ -34,26 +53,21 @@ sync: sync: rm -rf tmp include rsync --exclude='*.mod.c' -R \ - "$(LINUX)"/arch/x86/kvm/./*.[ch] \ + "$(LINUX)"/arch/$(ARCH_DIR)/kvm/./*.[ch] \ "$(LINUX)"/virt/kvm/./*.[ch] \ "$(LINUX)"/./include/linux/kvm*.h \ - "$(LINUX)"/./include/asm-x86/kvm*.h \ + "$(LINUX)"/./include/asm-$(ARCH_DIR)/kvm*.h \ tmp/ - mkdir -p include/linux include/asm-x86 - ln -s asm-x86 include/asm - ln -sf asm-x86 include-compat/asm + mkdir -p include/linux include/asm-$(ARCH_DIR) + ln -s asm-$(ARCH_DIR) include/asm + ln -sf asm-$(ARCH_DIR) include-compat/asm - $(call unifdef, include/linux/kvm.h) - $(call unifdef, include/linux/kvm_para.h) - $(call unifdef, include/asm-x86/kvm.h) - $(call unifdef, include/asm-x86/kvm_para.h) - $(call hack, include/linux/kvm.h) - $(call hack, kvm_main.c) - $(call hack, mmu.c) - $(call hack, vmx.c) - $(call hack, svm.c) - $(call hack, x86.c) - $(call hack, irq.h) + for i in $(UNIFDEF_FILES); \ + do $(call unifdef, $$i); done + + for i in $(HACK_FILES); \ + do $(call hack, $$i); done + for i in $$(find tmp -type f -printf '%P '); \ do cmp -s $$i tmp/$$i || cp tmp/$$i $$i; done rm -rf tmp |
From: Hollis B. <ho...@us...> - 2008-04-15 19:58:12
|
On Tuesday 15 April 2008 14:43:12 Jerone Young wrote: > 1 file changed, 31 insertions(+), 17 deletions(-) > kernel/Makefile | 48 +++++++++++++++++++++++++++++++----------------- > > > This patch add the ability for make sync in the kernel directory to work > for mulitiple architectures and not just x86. > > Signed-off-by: Jerone Young <jy...@us...> > > diff --git a/kernel/Makefile b/kernel/Makefile > --- a/kernel/Makefile > +++ b/kernel/Makefile > @@ -1,5 +1,10 @@ include ../config.mak > include ../config.mak > > +ARCH_DIR=$(ARCH) > +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' > + ARCH_DIR=x86 > +endif > + > KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) > > DESTDIR= > @@ -18,11 +23,25 @@ _hack = mv $1 $1.orig && \ > > | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig > > _unifdef = mv $1 $1.orig && \ > - unifdef -DCONFIG_X86 $1.orig > $1; \ > + unifdef -DCONFIG_$(ARCH_DIR) $1.orig > $1; \ > [ $$? -le 1 ] && rm $1.orig This isn't going to work because you've changed -DCONFIG_X86 to -DCONFIG_x86 . -- Hollis Blanchard IBM Linux Technology Center |
From: Jerone Y. <jy...@us...> - 2008-04-15 19:43:15
|
1 file changed, 31 insertions(+), 17 deletions(-) kernel/Makefile | 48 +++++++++++++++++++++++++++++++----------------- This patch add the ability for make sync in the kernel directory to work for mulitiple architectures and not just x86. Signed-off-by: Jerone Young <jy...@us...> diff --git a/kernel/Makefile b/kernel/Makefile --- a/kernel/Makefile +++ b/kernel/Makefile @@ -1,5 +1,10 @@ include ../config.mak include ../config.mak +ARCH_DIR=$(ARCH) +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' + ARCH_DIR=x86 +endif + KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) DESTDIR= @@ -18,11 +23,25 @@ _hack = mv $1 $1.orig && \ | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig _unifdef = mv $1 $1.orig && \ - unifdef -DCONFIG_X86 $1.orig > $1; \ + unifdef -DCONFIG_$(ARCH_DIR) $1.orig > $1; \ [ $$? -le 1 ] && rm $1.orig hack = $(call _hack,tmp/$(strip $1)) unifdef = $(call _unifdef,tmp/$(strip $1)) + +ifneq '$(filter $(ARCH_DIR), x86_64 i386)' '' +UNIFDEF_FILES = include/linux/kvm.h \ + include/linux/kvm_para.h \ + include/asm-$(ARCH_DIR)/kvm.h \ + include/asm-x86/kvm_para.h + +HACK_FILES = kvm_main.c \ + mmu.c \ + vmx.c \ + svm.c \ + x86.c \ + irq.h +endif all:: # include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat @@ -34,26 +53,21 @@ sync: sync: rm -rf tmp include rsync --exclude='*.mod.c' -R \ - "$(LINUX)"/arch/x86/kvm/./*.[ch] \ + "$(LINUX)"/arch/$(ARCH_DIR)/kvm/./*.[ch] \ "$(LINUX)"/virt/kvm/./*.[ch] \ "$(LINUX)"/./include/linux/kvm*.h \ - "$(LINUX)"/./include/asm-x86/kvm*.h \ + "$(LINUX)"/./include/asm-$(ARCH_DIR)/kvm*.h \ tmp/ - mkdir -p include/linux include/asm-x86 - ln -s asm-x86 include/asm - ln -sf asm-x86 include-compat/asm + mkdir -p include/linux include/asm-$(ARCH_DIR) + ln -s asm-$(ARCH_DIR) include/asm + ln -sf asm-$(ARCH_DIR) include-compat/asm - $(call unifdef, include/linux/kvm.h) - $(call unifdef, include/linux/kvm_para.h) - $(call unifdef, include/asm-x86/kvm.h) - $(call unifdef, include/asm-x86/kvm_para.h) - $(call hack, include/linux/kvm.h) - $(call hack, kvm_main.c) - $(call hack, mmu.c) - $(call hack, vmx.c) - $(call hack, svm.c) - $(call hack, x86.c) - $(call hack, irq.h) + for i in $(UNIFDEF_FILES); \ + do $(call unifdef, $$i); done + + for i in $(HACK_FILES); \ + do $(call hack, $$i); done + for i in $$(find tmp -type f -printf '%P '); \ do cmp -s $$i tmp/$$i || cp tmp/$$i $$i; done rm -rf tmp |
From: Jerone Y. <jy...@us...> - 2008-04-15 19:24:49
|
This patch apparently fell through the cracks or I didn't send the rised version to the list. These patches fix cpu initilization for PowerPC. Without them guest cannot be launched. Signed-off-by: Jerone Young <jy...@us...> 2 files changed, 6 insertions(+), 3 deletions(-) qemu/hw/ppc440_bamboo.c | 3 --- qemu/qemu-kvm-powerpc.c | 6 ++++++ |
From: Jerone Y. <jy...@us...> - 2008-04-15 19:23:10
|
1 file changed, 6 insertions(+) qemu/qemu-kvm-powerpc.c | 6 ++++++ This patch adds a call to load_kvm_registers after creation of vcpu. This is required for ppc since we are required to set certain registers before boot. Signed-off-by: Jerone Young <jy...@us...> diff --git a/qemu/qemu-kvm-powerpc.c b/qemu/qemu-kvm-powerpc.c --- a/qemu/qemu-kvm-powerpc.c +++ b/qemu/qemu-kvm-powerpc.c @@ -121,6 +121,12 @@ void kvm_arch_save_regs(CPUState *env) int kvm_arch_qemu_init_env(CPUState *cenv) { + if (cenv->cpu_index == 0) { + /* load any registers set in env into + kvm for the first guest vcpu */ + kvm_load_registers(cenv); + } + return 0; } |
From: Jerone Y. <jy...@us...> - 2008-04-15 18:54:31
|
1 file changed, 3 deletions(-) qemu/hw/ppc440_bamboo.c | 3 --- This patch removes the call to kvm_load_registers while in board platform setup code. This must now be done later in vcpu initialization. Signed-off-by: Jerone Young <jy...@us...> diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -174,9 +174,6 @@ void bamboo_init(ram_addr_t ram_size, in env->gpr[3] = dt_base; #endif env->nip = ep; - - printf("%s: loading kvm registers\n", __func__); - kvm_load_registers(env); } for (i = 0; i < nb_nics; i++) { |
From: Hollis B. <ho...@us...> - 2008-04-15 18:21:41
|
On Tuesday 15 April 2008 11:20:58 Jerone Young wrote: > > What happened to my suggestion of creating a per-arch HACK_FILES and > > UNIFDEF_FILES variables, and looping over those? > > These macros are only for x86. We don't want them or need them. So I > just left them be as not to accidentally miss or break anything. Right, they are only used for x86. So as I said before, create arch-specific HACK_FILES and UNIFDEF_FILES variables, and use those instead. -- Hollis Blanchard IBM Linux Technology Center |
From: Anthony L. <an...@co...> - 2008-04-15 18:14:22
|
Avi Kivity wrote: > Anthony Liguori wrote: >> >> With the IO thread, we don't have to worry about lost signals like we >> do in a VCPU thread so it's fine to just use select() and install >> signal handlers IIUC. >> > > What about aio completions? The only race-free way to handle both > posix aio completion and fd readiness is signals AFAIK. We poll aio completion after the select don't we? Worst case scenario we miss a signal and wait to poll after the next select event. That's going to occur very often because of the timer. Regards, Anthony Liguori |
From: Alberto T. <al...@by...> - 2008-04-15 16:30:04
|
On Tuesday 15 April 2008 12:57:45 am Jun Koi wrote: > Looks like a problem, however. In his instruction, part 3: > >3. Shut down the VM. This time, don't include the new temporary > > image from step 1 and define your disk(s) as SCSI disks: > > > > qemu-system-x86_64 -m 256 \ > > -drive file=hda.img,if=scsi,bus=0,index=0,media=disk,boot=off > > ... > > I think above should have "boot=on", rather than "boot=off". > Otherwise, you cannot boot from scsi disk, right? You are right! That's what happens when you copy and paste code. :-) > I tried the above instructions with WinXP, but WinXP cannot boot > successfully: it stops somewhere in the middle, and hang there. I tried it on a vanilla install of WinXP SP2, no updates. I guess I should try using a fully patched version of XP and see what that does. In step #2, did the Device Manager show the SCSI disk? -- Alberto Treviño al...@by... Testing Center Brigham Young University |
From: Jerone Y. <jy...@us...> - 2008-04-15 16:21:06
|
On Tue, 2008-04-15 at 09:08 -0500, Hollis Blanchard wrote: > On Monday 14 April 2008 21:46:43 Jerone Young wrote: > > 1 file changed, 13 insertions(+), 5 deletions(-) > > kernel/Makefile | 18 +++++++++++++----- > > > > > > This patch add the ability for make sync in the kernel directory to work > > for mulitiple architectures and not just x86. > > > > Signed-off-by: Jerone Young <jy...@us...> > > > > diff --git a/kernel/Makefile b/kernel/Makefile > > --- a/kernel/Makefile > > +++ b/kernel/Makefile > > @@ -1,5 +1,10 @@ include ../config.mak > > include ../config.mak > > > > +ASM_DIR=$(ARCH) > > +ifneq '$(filter $(ASM_DIR), x86_64 i386 ia64)' '' > > + ASM_DIR=x86 > > +endif > > Minor complaint: "ASM_DIR" really isn't. You use it as arch/$(ASM_DIR) and > also as include/asm-$(ASM_DIR). I think what you really meant is "ARCH_DIR" > (or similar). I can change it. Not that big of a deal. Oh left the ia64 on there by accident. > > > +ifneq '$(filter $(ASM_DIR), x86_64 i386 ia64)' '' > > $(call unifdef, include/linux/kvm.h) > > $(call unifdef, include/linux/kvm_para.h) > > $(call unifdef, include/asm-x86/kvm.h) > > @@ -54,6 +60,8 @@ sync: > > $(call hack, svm.c) > > $(call hack, x86.c) > > $(call hack, irq.h) > > +endif > > + > > Why are you keeping IA64 touching asm-x86? Accident. Cut and past error from the first mistake. > > What happened to my suggestion of creating a per-arch HACK_FILES and > UNIFDEF_FILES variables, and looping over those? These macros are only for x86. We don't want them or need them. So I just left them be as not to accidentally miss or break anything. > |
From: Avi K. <av...@qu...> - 2008-04-15 15:43:13
|
Anthony Liguori wrote: > > With the IO thread, we don't have to worry about lost signals like we > do in a VCPU thread so it's fine to just use select() and install > signal handlers IIUC. > What about aio completions? The only race-free way to handle both posix aio completion and fd readiness is signals AFAIK. -- error compiling committee.c: too many arguments to function |
From: Anthony L. <an...@co...> - 2008-04-15 15:34:15
|
Marcelo Tosatti wrote: > On Tue, Apr 15, 2008 at 05:45:28PM +0300, Avi Kivity wrote: > >> Anthony Liguori wrote: >> >>> Why did we ever need sigtimedwait() anyway? Even if we were >>> select()ing within the VCPU context, we should break out of the >>> select() on signal delivery. >>> >>> >> select() is no good since if the signal is delivered after the select(), >> but before entry into guest mode, it is lost. pselect() might work, but >> its is not supported on all hosts, and it (AFAICT) delivers the signals >> by calling their handlers, which is slow and unnecessary. >> > > Anthony tested a patch using signalfd: > > http://people.redhat.com/~mtosatti/io-thread-select-timeout > > Which is only available on newer hosts. I guess the signals will have to > stay for older hosts. > With the IO thread, we don't have to worry about lost signals like we do in a VCPU thread so it's fine to just use select() and install signal handlers IIUC. Regards, Anthony Liguori |
From: Marcelo T. <mto...@re...> - 2008-04-15 15:08:54
|
On Tue, Apr 15, 2008 at 08:40:09AM -0500, Anthony Liguori wrote: > Avi Kivity wrote: > >Anthony Liguori wrote: > >> > >>BTW, when we set O_ASYNC on the tap fd, we're eliminating > >>O_NONBLOCK. This means that we have to poll loop select() when > >>readv()'ing packets instead of just reading until hitting AGAIN. > >>This means at least an extra syscall per packet. Yeah, I noticed that problem too. > > > >I didn't know that O_ASYNC and O_NONBLOCK were mutually exclusive. > >Can you point me at the relevant documentation? > > I don't know that they are, but we're doing an: > > fcntl(fd, F_SETFL, O_ASYNC); > > F_SETFL is not additive so the previous O_NONBLOCK gets dropped. Fortunately read() will only be issued for the tap fd when select() returns with its fd set. And when that happens there is always a packet available for reading... |
From: Marcelo T. <mto...@re...> - 2008-04-15 15:01:16
|
On Tue, Apr 15, 2008 at 05:45:28PM +0300, Avi Kivity wrote: > Anthony Liguori wrote: > > > >Why did we ever need sigtimedwait() anyway? Even if we were > >select()ing within the VCPU context, we should break out of the > >select() on signal delivery. > > > > select() is no good since if the signal is delivered after the select(), > but before entry into guest mode, it is lost. pselect() might work, but > its is not supported on all hosts, and it (AFAICT) delivers the signals > by calling their handlers, which is slow and unnecessary. Anthony tested a patch using signalfd: http://people.redhat.com/~mtosatti/io-thread-select-timeout Which is only available on newer hosts. I guess the signals will have to stay for older hosts. |
From: Avi K. <av...@qu...> - 2008-04-15 14:45:41
|
Anthony Liguori wrote: > > Why did we ever need sigtimedwait() anyway? Even if we were > select()ing within the VCPU context, we should break out of the > select() on signal delivery. > select() is no good since if the signal is delivered after the select(), but before entry into guest mode, it is lost. pselect() might work, but its is not supported on all hosts, and it (AFAICT) delivers the signals by calling their handlers, which is slow and unnecessary. -- error compiling committee.c: too many arguments to function |
From: Anthony L. <an...@co...> - 2008-04-15 14:21:12
|
Nguyen Anh Quynh wrote: > On 4/15/08, Nguyen Anh Quynh <aq...@gm...> wrote: > > >> > You should be able to get very far along the Windows boot process with >> > extboot. If you just install the LSI driver in Win2k, you can boot up >> > completely. Someone just posted instructions for doing scsi boot with >> > Windows XP assuming you installed the VM from IDE. >> > >> >> >> I searched around for those scsi boot instructions, but dont see any. >> Any pointer? >> > > Is that the post from Alberto Treviño? Will try it soon. > Yup. Regards, Anthony Liguori > Thanks, > Q > > ------------------------------------------------------------------------- > This SF.net email is sponsored by the 2008 JavaOne(SM) Conference > Don't miss this year's exciting event. There's still time to save $100. > Use priority code J8TL2D2. > http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone > _______________________________________________ > kvm-devel mailing list > kvm...@li... > https://lists.sourceforge.net/lists/listinfo/kvm-devel > |
From: Anthony L. <an...@co...> - 2008-04-15 14:20:43
|
Avi Kivity wrote: > Anders wrote: > >>>> Why not enable SIGIO on stdio input, like the rest of the fd handling in >>>> qemu? >>>> >>>> >>> Thats a possibility, but I think we've now agreed that doing select() with >>> a timeout is cleaner and possibly half a cent faster. >>> >>> >> Since I can only follow this list as a hobby, I managed to miss that >> discussion. Can somebody point me to the relevant thread, as I would find >> it interesting? >> >> >> > > This was off-list. The point is, that with the iothread we don't need > to rely on signals at all (qemu needs signals to break out of the > emulation loop, kvm without iothread needs them to exit guest mode, but > the iothread can simply sit in select() waiting for an fd to become > active (or for aio to complete via a signal). > Why did we ever need sigtimedwait() anyway? Even if we were select()ing within the VCPU context, we should break out of the select() on signal delivery. Regards, Anthony Liguori |
From: Hollis B. <ho...@us...> - 2008-04-15 14:10:45
|
On Monday 14 April 2008 21:46:43 Jerone Young wrote: > 1 file changed, 13 insertions(+), 5 deletions(-) > kernel/Makefile | 18 +++++++++++++----- > > > This patch add the ability for make sync in the kernel directory to work > for mulitiple architectures and not just x86. > > Signed-off-by: Jerone Young <jy...@us...> > > diff --git a/kernel/Makefile b/kernel/Makefile > --- a/kernel/Makefile > +++ b/kernel/Makefile > @@ -1,5 +1,10 @@ include ../config.mak > include ../config.mak > > +ASM_DIR=$(ARCH) > +ifneq '$(filter $(ASM_DIR), x86_64 i386 ia64)' '' > + ASM_DIR=x86 > +endif Minor complaint: "ASM_DIR" really isn't. You use it as arch/$(ASM_DIR) and also as include/asm-$(ASM_DIR). I think what you really meant is "ARCH_DIR" (or similar). > +ifneq '$(filter $(ASM_DIR), x86_64 i386 ia64)' '' > $(call unifdef, include/linux/kvm.h) > $(call unifdef, include/linux/kvm_para.h) > $(call unifdef, include/asm-x86/kvm.h) > @@ -54,6 +60,8 @@ sync: > $(call hack, svm.c) > $(call hack, x86.c) > $(call hack, irq.h) > +endif > + Why are you keeping IA64 touching asm-x86? What happened to my suggestion of creating a per-arch HACK_FILES and UNIFDEF_FILES variables, and looping over those? -- Hollis Blanchard IBM Linux Technology Center |
From: Anthony L. <ali...@us...> - 2008-04-15 14:00:27
|
Avi Kivity wrote: > Anthony Liguori wrote: >> >> BTW, when we set O_ASYNC on the tap fd, we're eliminating >> O_NONBLOCK. This means that we have to poll loop select() when >> readv()'ing packets instead of just reading until hitting AGAIN. >> This means at least an extra syscall per packet. > > I didn't know that O_ASYNC and O_NONBLOCK were mutually exclusive. > Can you point me at the relevant documentation? I don't know that they are, but we're doing an: fcntl(fd, F_SETFL, O_ASYNC); F_SETFL is not additive so the previous O_NONBLOCK gets dropped. Regards, Anthony Liguori |
From: Avi K. <av...@qu...> - 2008-04-15 13:47:59
|
Anthony Liguori wrote: > Avi Kivity wrote: >> Anthony Liguori wrote: >>> >>> BTW, when we set O_ASYNC on the tap fd, we're eliminating >>> O_NONBLOCK. This means that we have to poll loop select() when >>> readv()'ing packets instead of just reading until hitting AGAIN. >>> This means at least an extra syscall per packet. >> >> I didn't know that O_ASYNC and O_NONBLOCK were mutually exclusive. >> Can you point me at the relevant documentation? > > I don't know that they are, but we're doing an: > > fcntl(fd, F_SETFL, O_ASYNC); > > F_SETFL is not additive so the previous O_NONBLOCK gets dropped. > Ah, I thought it's something fundamental I'm missing. The above is just a bug. -- error compiling committee.c: too many arguments to function |