#include #include #include #include #include "kvm/guest_compat.h" #include "kvm/barrier.h" #include "kvm/virtio.h" #include "kvm/virtio-pci.h" #include "kvm/virtio-mmio.h" #include "kvm/util.h" #include "kvm/kvm.h" const char* virtio_trans_name(enum virtio_trans trans) { if (trans == VIRTIO_PCI || trans == VIRTIO_PCI_LEGACY) return "pci"; else if (trans == VIRTIO_MMIO || trans == VIRTIO_MMIO_LEGACY) return "mmio"; return "unknown"; } int virtio_transport_parser(const struct option *opt, const char *arg, int unset) { enum virtio_trans *type = opt->value; struct kvm *kvm; if (!strcmp(opt->long_name, "virtio-transport")) { if (!strcmp(arg, "pci")) { *type = VIRTIO_PCI; } else if (!strcmp(arg, "pci-legacy")) { *type = VIRTIO_PCI_LEGACY; #if defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) } else if (!strcmp(arg, "mmio")) { *type = VIRTIO_MMIO; } else if (!strcmp(arg, "mmio-legacy")) { *type = VIRTIO_MMIO_LEGACY; #endif } else { pr_err("virtio-transport: unknown type \"%s\"\n", arg); return -1; } } else if (!strcmp(opt->long_name, "virtio-legacy")) { *type = VIRTIO_PCI_LEGACY; } else if (!strcmp(opt->long_name, "force-pci")) { kvm = opt->ptr; kvm->cfg.virtio_transport = VIRTIO_PCI; } return 0; } void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump) { u16 idx = virtio_guest_to_host_u16(queue->endian, queue->vring.used->idx); /* * Use wmb to assure that used elem was updated with head and len. * We need a wmb here since we can't advance idx unless we're ready * to pass the used element to the guest. */ wmb(); idx += jump; queue->vring.used->idx = virtio_host_to_guest_u16(queue->endian, idx); } struct vring_used_elem * virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head, u32 len, u16 offset) { struct vring_used_elem *used_elem; u16 idx = virtio_guest_to_host_u16(queue->endian, queue->vring.used->idx); idx += offset; used_elem = &queue->vring.used->ring[idx % queue->vring.num]; used_elem->id = virtio_host_to_guest_u32(queue->endian, head); used_elem->len = virtio_host_to_guest_u32(queue->endian, len); return used_elem; } struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len) { struct vring_used_elem *used_elem; used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0); virt_queue__used_idx_advance(queue, 1); return used_elem; } static inline bool virt_desc__test_flag(struct virt_queue *vq, struct vring_desc *desc, u16 flag) { return !!(virtio_guest_to_host_u16(vq->endian, desc->flags) & flag); } /* * Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, or max if we're at the * end. */ static unsigned next_desc(struct virt_queue *vq, struct vring_desc *desc, unsigned int i, unsigned int max) { unsigned int next; /* If this descriptor says it doesn't chain, we're done. */ if (!virt_desc__test_flag(vq, &desc[i], VRING_DESC_F_NEXT)) return max; next = virtio_guest_to_host_u16(vq->endian, desc[i].next); /* Ensure they're not leading us off end of descriptors. */ return min(next, max); } u16 virt_queue__get_head_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, u16 head, struct kvm *kvm) { struct vring_desc *desc; u16 idx; u16 max; idx = head; *out = *in = 0; max = vq->vring.num; desc = vq->vring.desc; if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_INDIRECT)) { max = virtio_guest_to_host_u32(vq->endian, desc[idx].len) / sizeof(struct vring_desc); desc = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq->endian, desc[idx].addr)); idx = 0; } do { /* Grab the first descriptor, and check it's OK. */ iov[*out + *in].iov_len = virtio_guest_to_host_u32(vq->endian, desc[idx].len); iov[*out + *in].iov_base = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq->endian, desc[idx].addr)); /* If this is an input descriptor, increment that count. */ if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_WRITE)) (*in)++; else (*out)++; } while ((idx = next_desc(vq, desc, idx, max)) != max); return head; } u16 virt_queue__get_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm) { u16 head; head = virt_queue__pop(vq); return virt_queue__get_head_iov(vq, iov, out, in, head, kvm); } /* in and out are relative to guest */ u16 virt_queue__get_inout_iov(struct kvm *kvm, struct virt_queue *queue, struct iovec in_iov[], struct iovec out_iov[], u16 *in, u16 *out) { struct vring_desc *desc; u16 head, idx; idx = head = virt_queue__pop(queue); *out = *in = 0; do { u64 addr; desc = virt_queue__get_desc(queue, idx); addr = virtio_guest_to_host_u64(queue->endian, desc->addr); if (virt_desc__test_flag(queue, desc, VRING_DESC_F_WRITE)) { in_iov[*in].iov_base = guest_flat_to_host(kvm, addr); in_iov[*in].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len); (*in)++; } else { out_iov[*out].iov_base = guest_flat_to_host(kvm, addr); out_iov[*out].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len); (*out)++; } if (virt_desc__test_flag(queue, desc, VRING_DESC_F_NEXT)) idx = virtio_guest_to_host_u16(queue->endian, desc->next); else break; } while (1); return head; } void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev, struct virt_queue *vq, size_t nr_descs) { struct vring_addr *addr = &vq->vring_addr; vq->endian = vdev->endian; vq->use_event_idx = (vdev->features & (1UL << VIRTIO_RING_F_EVENT_IDX)); vq->enabled = true; vq->vdev = vdev; if (addr->legacy) { unsigned long base = (u64)addr->pfn * addr->pgsize; void *p = guest_flat_to_host(kvm, base); vring_init(&vq->vring, nr_descs, p, addr->align); } else { u64 desc = (u64)addr->desc_hi << 32 | addr->desc_lo; u64 avail = (u64)addr->avail_hi << 32 | addr->avail_lo; u64 used = (u64)addr->used_hi << 32 | addr->used_lo; vq->vring = (struct vring) { .desc = guest_flat_to_host(kvm, desc), .used = guest_flat_to_host(kvm, used), .avail = guest_flat_to_host(kvm, avail), .num = nr_descs, }; } } void virtio_exit_vq(struct kvm *kvm, struct virtio_device *vdev, void *dev, int num) { struct virt_queue *vq = vdev->ops->get_vq(kvm, dev, num); if (vq->enabled && vdev->ops->exit_vq) vdev->ops->exit_vq(kvm, dev, num); memset(vq, 0, sizeof(*vq)); } int virtio__get_dev_specific_field(int offset, bool msix, u32 *config_off) { if (msix) { if (offset < 4) return VIRTIO_PCI_O_MSIX; else offset -= 4; } *config_off = offset; return VIRTIO_PCI_O_CONFIG; } bool virtio_queue__should_signal(struct virt_queue *vq) { u16 old_idx, new_idx, event_idx; /* * Use mb to assure used idx has been increased before we signal the * guest, and we don't read a stale value for used_event. Without a mb * here we might not send a notification that we need to send, or the * guest may ignore the queue since it won't see an updated idx. */ mb(); if (!vq->use_event_idx) { /* * When VIRTIO_RING_F_EVENT_IDX isn't negotiated, interrupt the * guest if it didn't explicitly request to be left alone. */ return !(virtio_guest_to_host_u16(vq->endian, vq->vring.avail->flags) & VRING_AVAIL_F_NO_INTERRUPT); } old_idx = vq->last_used_signalled; new_idx = virtio_guest_to_host_u16(vq->endian, vq->vring.used->idx); event_idx = virtio_guest_to_host_u16(vq->endian, vring_used_event(&vq->vring)); if (vring_need_event(event_idx, new_idx, old_idx)) { vq->last_used_signalled = new_idx; return true; } return false; } void virtio_set_guest_features(struct kvm *kvm, struct virtio_device *vdev, void *dev, u64 features) { /* TODO: fail negotiation if features & ~host_features */ vdev->features |= features; } void virtio_notify_status(struct kvm *kvm, struct virtio_device *vdev, void *dev, u8 status) { u32 ext_status = status; vdev->status &= ~VIRTIO_CONFIG_S_MASK; vdev->status |= status; /* Add a few hints to help devices */ if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(vdev->status & VIRTIO__STATUS_START)) { vdev->status |= VIRTIO__STATUS_START; ext_status |= VIRTIO__STATUS_START; } else if (!status && (vdev->status & VIRTIO__STATUS_START)) { vdev->status &= ~VIRTIO__STATUS_START; ext_status |= VIRTIO__STATUS_STOP; /* * Reset virtqueues and stop all traffic now, so that the device * can safely reset the backend in notify_status(). */ vdev->ops->reset(kvm, vdev); } if (!status) ext_status |= VIRTIO__STATUS_CONFIG; if (vdev->ops->notify_status) vdev->ops->notify_status(kvm, dev, ext_status); } bool virtio_access_config(struct kvm *kvm, struct virtio_device *vdev, void *dev, unsigned long offset, void *data, size_t size, bool is_write) { void *in, *out, *config; size_t config_size = vdev->ops->get_config_size(kvm, dev); if (WARN_ONCE(offset + size > config_size, "Config access offset (%lu) is beyond config size (%zu)\n", offset, config_size)) return false; config = vdev->ops->get_config(kvm, dev) + offset; in = is_write ? data : config; out = is_write ? config : data; switch (size) { case 1: *(u8 *)out = *(u8 *)in; break; case 2: *(u16 *)out = *(u16 *)in; break; case 4: *(u32 *)out = *(u32 *)in; break; case 8: *(u64 *)out = *(u64 *)in; break; default: WARN_ONCE(1, "%s: invalid access size\n", __func__); return false; } return true; } int virtio_init(struct kvm *kvm, void *dev, struct virtio_device *vdev, struct virtio_ops *ops, enum virtio_trans trans, int device_id, int subsys_id, int class) { void *virtio; int r; switch (trans) { case VIRTIO_PCI_LEGACY: vdev->legacy = true; /* fall through */ case VIRTIO_PCI: virtio = calloc(sizeof(struct virtio_pci), 1); if (!virtio) return -ENOMEM; vdev->virtio = virtio; vdev->ops = ops; vdev->ops->signal_vq = virtio_pci__signal_vq; vdev->ops->signal_config = virtio_pci__signal_config; vdev->ops->init = virtio_pci__init; vdev->ops->exit = virtio_pci__exit; vdev->ops->reset = virtio_pci__reset; r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class); break; case VIRTIO_MMIO_LEGACY: vdev->legacy = true; /* fall through */ case VIRTIO_MMIO: virtio = calloc(sizeof(struct virtio_mmio), 1); if (!virtio) return -ENOMEM; vdev->virtio = virtio; vdev->ops = ops; vdev->ops->signal_vq = virtio_mmio_signal_vq; vdev->ops->signal_config = virtio_mmio_signal_config; vdev->ops->init = virtio_mmio_init; vdev->ops->exit = virtio_mmio_exit; vdev->ops->reset = virtio_mmio_reset; r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class); break; default: r = -1; }; return r; } void virtio_exit(struct kvm *kvm, struct virtio_device *vdev) { if (vdev->ops && vdev->ops->exit) vdev->ops->exit(kvm, vdev); } int virtio_compat_add_message(const char *device, const char *config) { int len = 1024; int compat_id; char *title; char *desc; title = malloc(len); if (!title) return -ENOMEM; desc = malloc(len); if (!desc) { free(title); return -ENOMEM; } snprintf(title, len, "%s device was not detected.", device); snprintf(desc, len, "While you have requested a %s device, " "the guest kernel did not initialize it.\n" "\tPlease make sure that the guest kernel was " "compiled with %s=y enabled in .config.", device, config); compat_id = compat__add_message(title, desc); free(desc); free(title); return compat_id; }