diff options
author | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2015-10-23 03:29:33 +0200 |
---|---|---|
committer | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2015-10-23 03:29:33 +0200 |
commit | 15dfd0df63ce6847081d09b2bbd567cc0cc4eae1 (patch) | |
tree | 3b73f24fcef970bfcace3cbb297cfa57f3994682 /virt | |
parent | 328aa7a45af61bc0060c80847daa67fef7b9c0d0 (diff) | |
parent | 0149138c4142da287d23f9d5c6038f7fb5e30ac2 (diff) | |
download | kernel_samsung_smdk4412-15dfd0df63ce6847081d09b2bbd567cc0cc4eae1.zip kernel_samsung_smdk4412-15dfd0df63ce6847081d09b2bbd567cc0cc4eae1.tar.gz kernel_samsung_smdk4412-15dfd0df63ce6847081d09b2bbd567cc0cc4eae1.tar.bz2 |
initial merge with 3.2.72
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 159 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 14 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 129 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 3 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 3 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 78 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 260 |
8 files changed, 478 insertions, 175 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 6cc4b97..758e3b3 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -17,6 +17,8 @@ #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h> #include "irq.h" static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, @@ -58,8 +60,6 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - u32 vector; - int index; if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { spin_lock(&assigned_dev->intx_lock); @@ -68,31 +68,35 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) spin_unlock(&assigned_dev->intx_lock); } - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - index = find_index_from_host_irq(assigned_dev, irq); - if (index >= 0) { - vector = assigned_dev-> - guest_msix_entries[index].vector; - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1); - } - } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + return IRQ_HANDLED; +} + +#ifdef __KVM_HAVE_MSIX +static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int index = find_index_from_host_irq(assigned_dev, irq); + u32 vector; + + if (index >= 0) { + vector = assigned_dev->guest_msix_entries[index].vector; kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); + vector, 1); + } return IRQ_HANDLED; } +#endif /* Ack the irq line for an assigned device */ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) { - struct kvm_assigned_dev_kernel *dev; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); + struct kvm_assigned_dev_kernel *dev = + container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); @@ -110,8 +114,9 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) static void deassign_guest_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - assigned_dev->ack_notifier.gsi = -1; + if (assigned_dev->ack_notifier.gsi != -1) + kvm_unregister_irq_ack_notifier(kvm, + &assigned_dev->ack_notifier); kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, assigned_dev->guest_irq, 0); @@ -143,7 +148,7 @@ static void deassign_host_irq(struct kvm *kvm, for (i = 0; i < assigned_dev->entries_nr; i++) free_irq(assigned_dev->host_msix_entries[i].vector, - (void *)assigned_dev); + assigned_dev); assigned_dev->entries_nr = 0; kfree(assigned_dev->host_msix_entries); @@ -153,7 +158,7 @@ static void deassign_host_irq(struct kvm *kvm, /* Deal with MSI and INTx */ disable_irq(assigned_dev->host_irq); - free_irq(assigned_dev->host_irq, (void *)assigned_dev); + free_irq(assigned_dev->host_irq, assigned_dev); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) pci_disable_msi(assigned_dev->dev); @@ -205,6 +210,8 @@ static void kvm_free_assigned_device(struct kvm *kvm, else pci_restore_state(assigned_dev->dev); + assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); pci_dev_put(assigned_dev->dev); @@ -237,7 +244,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, * are going to be long delays in accepting, acking, etc. */ if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - IRQF_ONESHOT, dev->irq_name, (void *)dev)) + IRQF_ONESHOT, dev->irq_name, dev)) return -EIO; return 0; } @@ -256,7 +263,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, dev->host_irq = dev->dev->irq; if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, (void *)dev)) { + 0, dev->irq_name, dev)) { pci_disable_msi(dev->dev); return -EIO; } @@ -282,8 +289,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, for (i = 0; i < dev->entries_nr; i++) { r = request_threaded_irq(dev->host_msix_entries[i].vector, - NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, (void *)dev); + NULL, kvm_assigned_dev_thread_msix, + 0, dev->irq_name, dev); if (r) goto err; } @@ -291,7 +298,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, return 0; err: for (i -= 1; i >= 0; i--) - free_irq(dev->host_msix_entries[i].vector, (void *)dev); + free_irq(dev->host_msix_entries[i].vector, dev); pci_disable_msix(dev->dev); return r; } @@ -404,7 +411,8 @@ static int assign_guest_irq(struct kvm *kvm, if (!r) { dev->irq_requested_type |= guest_irq_type; - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + if (dev->ack_notifier.gsi != -1) + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); } else kvm_free_irq_source_id(kvm, dev->irq_source_id); @@ -474,12 +482,76 @@ out: return r; } +/* + * We want to test whether the caller has been granted permissions to + * use this device. To be able to configure and control the device, + * the user needs access to PCI configuration space and BAR resources. + * These are accessed through PCI sysfs. PCI config space is often + * passed to the process calling this ioctl via file descriptor, so we + * can't rely on access to that file. We can check for permissions + * on each of the BAR resource files, which is a pretty clear + * indicator that the user has been granted access to the device. + */ +static int probe_sysfs_permissions(struct pci_dev *dev) +{ +#ifdef CONFIG_SYSFS + int i; + bool bar_found = false; + + for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { + char *kpath, *syspath; + struct path path; + struct inode *inode; + int r; + + if (!pci_resource_len(dev, i)) + continue; + + kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); + if (!kpath) + return -ENOMEM; + + /* Per sysfs-rules, sysfs is always at /sys */ + syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); + kfree(kpath); + if (!syspath) + return -ENOMEM; + + r = kern_path(syspath, LOOKUP_FOLLOW, &path); + kfree(syspath); + if (r) + return r; + + inode = path.dentry->d_inode; + + r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); + path_put(&path); + if (r) + return r; + + bar_found = true; + } + + /* If no resources, probably something special */ + if (!bar_found) + return -EPERM; + + return 0; +#else + return -EINVAL; /* No way to control the device without sysfs */ +#endif +} + static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { int r = 0, idx; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + u8 header_type; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) + return -EINVAL; mutex_lock(&kvm->lock); idx = srcu_read_lock(&kvm->srcu); @@ -507,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, r = -EINVAL; goto out_free; } + + /* Don't allow bridges to be assigned */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { + r = -EPERM; + goto out_put; + } + + r = probe_sysfs_permissions(dev); + if (r) + goto out_put; + if (pci_enable_device(dev)) { printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); r = -EBUSY; @@ -538,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); if (r) goto out_list_del; } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; out: srcu_read_unlock(&kvm->srcu, idx); @@ -587,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) - kvm_deassign_device(kvm, match); + kvm_deassign_device(kvm, match); kvm_free_assigned_device(kvm, match); @@ -617,7 +698,7 @@ static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, if (adev->entries_nr == 0) { adev->entries_nr = entry_nr->entry_nr; if (adev->entries_nr == 0 || - adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { + adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { r = -EINVAL; goto msix_nr_out; } diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 74268b4..bdd2c0d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); apf->page = page; - apf->done = true; spin_unlock(&vcpu->async_pf.lock); /* @@ -88,7 +87,7 @@ static void async_pf_execute(struct work_struct *work) if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - mmdrop(mm); + mmput(mm); kvm_put_kvm(vcpu->kvm); } @@ -99,10 +98,12 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) struct kvm_async_pf *work = list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); - cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) /* work was canceled */ + if (cancel_work_sync(&work->work)) { + mmput(work->mm); + kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); + } } spin_lock(&vcpu->async_pf.lock); @@ -163,13 +164,12 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, return 0; work->page = NULL; - work->done = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); work->arch = *arch; work->mm = current->mm; - atomic_inc(&work->mm->mm_count); + atomic_inc(&work->mm->mm_users); kvm_get_kvm(work->vcpu->kvm); /* this can't really happen otherwise gfn_to_pfn_async @@ -187,7 +187,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, return 1; retry_sync: kvm_put_kvm(work->vcpu->kvm); - mmdrop(work->mm); + mmput(work->mm); kmem_cache_free(async_pf_cache, work); return 0; } diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index fc84875..e56b240 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -24,10 +24,19 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, gpa_t addr, int len) { - struct kvm_coalesced_mmio_zone *zone; + /* is it in a batchable area ? + * (addr,len) is fully included in + * (zone->addr, zone->size) + */ + + return (dev->zone.addr <= addr && + addr + len <= dev->zone.addr + dev->zone.size); +} + +static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) +{ struct kvm_coalesced_mmio_ring *ring; unsigned avail; - int i; /* Are we able to batch it ? */ @@ -37,25 +46,12 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, */ ring = dev->kvm->coalesced_mmio_ring; avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; - if (avail < KVM_MAX_VCPUS) { + if (avail == 0) { /* full */ return 0; } - /* is it in a batchable area ? */ - - for (i = 0; i < dev->nb_zones; i++) { - zone = &dev->zone[i]; - - /* (addr,len) is fully included in - * (zone->addr, zone->size) - */ - - if (zone->addr <= addr && - addr + len <= zone->addr + zone->size) - return 1; - } - return 0; + return 1; } static int coalesced_mmio_write(struct kvm_io_device *this, @@ -63,10 +59,16 @@ static int coalesced_mmio_write(struct kvm_io_device *this, { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; + if (!coalesced_mmio_in_range(dev, addr, len)) return -EOPNOTSUPP; - spin_lock(&dev->lock); + spin_lock(&dev->kvm->ring_lock); + + if (!coalesced_mmio_has_room(dev)) { + spin_unlock(&dev->kvm->ring_lock); + return -EOPNOTSUPP; + } /* copy data in first free entry of the ring */ @@ -75,7 +77,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this, memcpy(ring->coalesced_mmio[ring->last].data, val, len); smp_wmb(); ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; - spin_unlock(&dev->lock); + spin_unlock(&dev->kvm->ring_lock); return 0; } @@ -83,6 +85,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); + list_del(&dev->list); + kfree(dev); } @@ -93,7 +97,6 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { int kvm_coalesced_mmio_init(struct kvm *kvm) { - struct kvm_coalesced_mmio_dev *dev; struct page *page; int ret; @@ -101,31 +104,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto out_err; - kvm->coalesced_mmio_ring = page_address(page); - ret = -ENOMEM; - dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); - if (!dev) - goto out_free_page; - spin_lock_init(&dev->lock); - kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); - dev->kvm = kvm; - kvm->coalesced_mmio_dev = dev; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret < 0) - goto out_free_dev; + ret = 0; + kvm->coalesced_mmio_ring = page_address(page); - return ret; + /* + * We're using this spinlock to sync access to the coalesced ring. + * The list doesn't need it's own lock since device registration and + * unregistration should only happen when kvm->slots_lock is held. + */ + spin_lock_init(&kvm->ring_lock); + INIT_LIST_HEAD(&kvm->coalesced_zones); -out_free_dev: - kvm->coalesced_mmio_dev = NULL; - kfree(dev); -out_free_page: - kvm->coalesced_mmio_ring = NULL; - __free_page(page); out_err: return ret; } @@ -139,51 +129,46 @@ void kvm_coalesced_mmio_free(struct kvm *kvm) int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone) { - struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; + int ret; + struct kvm_coalesced_mmio_dev *dev; - if (dev == NULL) - return -ENXIO; + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); + dev->kvm = kvm; + dev->zone = *zone; mutex_lock(&kvm->slots_lock); - if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { - mutex_unlock(&kvm->slots_lock); - return -ENOBUFS; - } + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, + zone->size, &dev->dev); + if (ret < 0) + goto out_free_dev; + list_add_tail(&dev->list, &kvm->coalesced_zones); + mutex_unlock(&kvm->slots_lock); - dev->zone[dev->nb_zones] = *zone; - dev->nb_zones++; + return 0; +out_free_dev: mutex_unlock(&kvm->slots_lock); - return 0; + kfree(dev); + + return ret; } int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone) { - int i; - struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; - struct kvm_coalesced_mmio_zone *z; - - if (dev == NULL) - return -ENXIO; + struct kvm_coalesced_mmio_dev *dev, *tmp; mutex_lock(&kvm->slots_lock); - i = dev->nb_zones; - while (i) { - z = &dev->zone[i - 1]; - - /* unregister all zones - * included in (zone->addr, zone->size) - */ - - if (zone->addr <= z->addr && - z->addr + z->size <= zone->addr + zone->size) { - dev->nb_zones--; - *z = dev->zone[dev->nb_zones]; + list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) + if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); + kvm_iodevice_destructor(&dev->dev); } - i--; - } mutex_unlock(&kvm->slots_lock); diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 8a5959e..b280c20 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -12,14 +12,13 @@ #ifdef CONFIG_KVM_MMIO -#define KVM_COALESCED_MMIO_ZONE_MAX 100 +#include <linux/list.h> struct kvm_coalesced_mmio_dev { + struct list_head list; struct kvm_io_device dev; struct kvm *kvm; - spinlock_t lock; - int nb_zones; - struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; + struct kvm_coalesced_mmio_zone zone; }; int kvm_coalesced_mmio_init(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 73358d2..f59c1e8 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) kvm_iodevice_init(&p->dev, &ioeventfd_ops); - ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, + &p->dev); if (ret < 0) goto unlock_fail; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index e99257c..79647cd 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -397,7 +397,8 @@ int kvm_ioapic_init(struct kvm *kvm) kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); ioapic->kvm = kvm; mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, + IOAPIC_MEM_LENGTH, &ioapic->dev); mutex_unlock(&kvm->slots_lock); if (ret < 0) { kvm->arch.vioapic = NULL; diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index fb0f6e4..e32c93c 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -25,23 +25,31 @@ #include <linux/list.h> #include <linux/kvm_host.h> +#include <linux/module.h> #include <linux/pci.h> +#include <linux/stat.h> #include <linux/dmar.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> +static int allow_unsafe_assigned_interrupts; +module_param_named(allow_unsafe_assigned_interrupts, + allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, + "Enable device assignment on platforms without interrupt remapping support."); + static int kvm_iommu_unmap_memslots(struct kvm *kvm); static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, unsigned long size) + gfn_t gfn, unsigned long npages) { gfn_t end_gfn; pfn_t pfn; pfn = gfn_to_pfn_memslot(kvm, slot, gfn); - end_gfn = gfn + (size >> PAGE_SHIFT); + end_gfn = gfn + npages; gfn += 1; if (is_error_pfn(pfn)) @@ -53,6 +61,14 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, return pfn; } +static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) + kvm_release_pfn_clean(pfn + i); +} + int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) { gfn_t gfn, end_gfn; @@ -93,11 +109,15 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) while ((gfn << PAGE_SHIFT) & (page_size - 1)) page_size >>= 1; + /* Make sure hva is aligned to the page size we want to map */ + while (gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) + page_size >>= 1; + /* * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. */ - pfn = kvm_pin_pages(kvm, slot, gfn, page_size); + pfn = kvm_pin_pages(kvm, slot, gfn, page_size >> PAGE_SHIFT); if (is_error_pfn(pfn)) { gfn += 1; continue; @@ -109,6 +129,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%llx\n", pfn); + kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); goto unmap_pages; } @@ -120,7 +141,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) return 0; unmap_pages: - kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); + kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); return r; } @@ -181,6 +202,8 @@ int kvm_assign_device(struct kvm *kvm, goto out_unmap; } + pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; + printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", assigned_dev->host_segnr, assigned_dev->host_busnr, @@ -209,6 +232,8 @@ int kvm_deassign_device(struct kvm *kvm, iommu_detach_device(domain, &pdev->dev); + pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", assigned_dev->host_segnr, assigned_dev->host_busnr, @@ -222,34 +247,41 @@ int kvm_iommu_map_guest(struct kvm *kvm) { int r; - if (!iommu_found()) { + if (!iommu_present(&pci_bus_type)) { printk(KERN_ERR "%s: iommu not found\n", __func__); return -ENODEV; } - kvm->arch.iommu_domain = iommu_domain_alloc(); - if (!kvm->arch.iommu_domain) - return -ENOMEM; + mutex_lock(&kvm->slots_lock); + + kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); + if (!kvm->arch.iommu_domain) { + r = -ENOMEM; + goto out_unlock; + } + + if (!allow_unsafe_assigned_interrupts && + !iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_INTR_REMAP)) { + printk(KERN_WARNING "%s: No interrupt remapping support," + " disallowing device assignment." + " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" + " module option.\n", __func__); + iommu_domain_free(kvm->arch.iommu_domain); + kvm->arch.iommu_domain = NULL; + r = -EPERM; + goto out_unlock; + } r = kvm_iommu_map_memslots(kvm); if (r) - goto out_unmap; - - return 0; + kvm_iommu_unmap_memslots(kvm); -out_unmap: - kvm_iommu_unmap_memslots(kvm); +out_unlock: + mutex_unlock(&kvm->slots_lock); return r; } -static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) - kvm_release_pfn_clean(pfn + i); -} - static void kvm_iommu_put_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) { @@ -314,7 +346,11 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) if (!domain) return 0; + mutex_lock(&kvm->slots_lock); kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_domain = NULL; + mutex_unlock(&kvm->slots_lock); + iommu_domain_free(domain); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index caa3bb1..53c1746 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,9 +47,12 @@ #include <linux/srcu.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/sort.h> +#include <linux/bsearch.h> #include <asm/processor.h> #include <asm/io.h> +#include <asm/ioctl.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -84,6 +87,10 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +#ifdef CONFIG_COMPAT +static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, + unsigned long arg); +#endif static int hardware_enable_all(void); static void hardware_disable_all(void); @@ -97,8 +104,8 @@ static bool largepages_enabled = true; static struct page *hwpoison_page; static pfn_t hwpoison_pfn; -static struct page *fault_page; -static pfn_t fault_pfn; +struct page *fault_page; +pfn_t fault_pfn; inline int kvm_is_mmio_pfn(pfn_t pfn) { @@ -283,15 +290,15 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, */ idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); + kvm->mmu_notifier_seq++; need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); - /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, @@ -329,12 +336,12 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, for (; start < end; start += PAGE_SIZE) need_tlb_flush |= kvm_unmap_hva(kvm, start); need_tlb_flush |= kvm->tlbs_dirty; - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); - /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush) kvm_flush_remote_tlbs(kvm); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -372,13 +379,14 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - young = kvm_age_hva(kvm, address); - spin_unlock(&kvm->mmu_lock); - srcu_read_unlock(&kvm->srcu, idx); + young = kvm_age_hva(kvm, address); if (young) kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + return young; } @@ -767,7 +775,7 @@ skip_lpage: new.userspace_addr = mem->userspace_addr; #endif /* not defined CONFIG_S390 */ - if (!npages) { + if (!npages || base_gfn != old.base_gfn) { r = -ENOMEM; slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!slots) @@ -781,8 +789,10 @@ skip_lpage: old_memslots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); - /* From this point no new shadow pages pointing to a deleted - * memslot will be created. + /* slot was deleted or moved, clear iommu mapping */ + kvm_iommu_unmap_pages(kvm, &old); + /* From this point no new shadow pages pointing to a deleted, + * or moved, memslot will be created. * * validation of sp->gfn happens in: * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) @@ -813,6 +823,13 @@ skip_lpage: slots->nmemslots = mem->slot + 1; slots->generation++; + /* map new memory slot into the iommu */ + if (npages) { + r = kvm_iommu_map_pages(kvm, &new); + if (r) + goto out_slots; + } + /* actual memory is freed via old in kvm_free_physmem_slot below */ if (!npages) { new.rmap = NULL; @@ -828,11 +845,20 @@ skip_lpage: kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + /* + * If the new memory slot is created, we need to clear all + * mmio sptes. + */ + if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) + kvm_arch_flush_shadow(kvm); + kvm_free_physmem_slot(&old, &new); kfree(old_memslots); return 0; +out_slots: + kfree(slots); out_free: kvm_free_physmem_slot(&new, &old); out: @@ -928,6 +954,18 @@ int is_fault_pfn(pfn_t pfn) } EXPORT_SYMBOL_GPL(is_fault_pfn); +int is_noslot_pfn(pfn_t pfn) +{ + return pfn == bad_pfn; +} +EXPORT_SYMBOL_GPL(is_noslot_pfn); + +int is_invalid_pfn(pfn_t pfn) +{ + return pfn == hwpoison_pfn || pfn == fault_pfn; +} +EXPORT_SYMBOL_GPL(is_invalid_pfn); + static inline unsigned long bad_hva(void) { return PAGE_OFFSET; @@ -1346,7 +1384,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; - r = copy_to_user((void __user *)addr + offset, data, len); + r = __copy_to_user((void __user *)addr + offset, data, len); if (r) return -EFAULT; mark_page_dirty(kvm, gfn); @@ -1407,7 +1445,6 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, /* Use the slow path for cross page reads and writes. */ ghc->memslot = NULL; } - return 0; } EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); @@ -1429,7 +1466,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (kvm_is_error_hva(ghc->hva)) return -EFAULT; - r = copy_to_user((void __user *)ghc->hva, data, len); + r = __copy_to_user((void __user *)ghc->hva, data, len); if (r) return -EFAULT; mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); @@ -1438,6 +1475,31 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, } EXPORT_SYMBOL_GPL(kvm_write_guest_cached); +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + int r; + + BUG_ON(len > ghc->len); + + if (slots->generation != ghc->generation) + kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len); + + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + + if (kvm_is_error_hva(ghc->hva)) + return -EFAULT; + + r = __copy_from_user(data, (void __user *)ghc->hva, len); + if (r) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_read_guest_cached); + int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page, @@ -1610,7 +1672,9 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, - .compat_ioctl = kvm_vcpu_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = kvm_vcpu_compat_ioctl, +#endif .mmap = kvm_vcpu_mmap, .llseek = noop_llseek, }; @@ -1631,6 +1695,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) int r; struct kvm_vcpu *vcpu, *v; + if (id >= KVM_MAX_VCPUS) + return -EINVAL; + vcpu = kvm_arch_vcpu_create(kvm, id); if (IS_ERR(vcpu)) return PTR_ERR(vcpu); @@ -1708,6 +1775,9 @@ static long kvm_vcpu_ioctl(struct file *filp, if (vcpu->kvm->mm != current->mm) return -EIO; + if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) + return -EINVAL; + #if defined(CONFIG_S390) || defined(CONFIG_PPC) /* * Special cases: vcpu ioctls that are asynchronous to vcpu execution, @@ -1903,6 +1973,50 @@ out: return r; } +#ifdef CONFIG_COMPAT +static long kvm_vcpu_compat_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = compat_ptr(arg); + int r; + + if (vcpu->kvm->mm != current->mm) + return -EIO; + + switch (ioctl) { + case KVM_SET_SIGNAL_MASK: { + struct kvm_signal_mask __user *sigmask_arg = argp; + struct kvm_signal_mask kvm_sigmask; + compat_sigset_t csigset; + sigset_t sigset; + + if (argp) { + r = -EFAULT; + if (copy_from_user(&kvm_sigmask, argp, + sizeof kvm_sigmask)) + goto out; + r = -EINVAL; + if (kvm_sigmask.len != sizeof csigset) + goto out; + r = -EFAULT; + if (copy_from_user(&csigset, sigmask_arg->sigset, + sizeof csigset)) + goto out; + } + sigset_from_compat(&sigset, &csigset); + r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); + break; + } + default: + r = kvm_vcpu_ioctl(filp, ioctl, arg); + } + +out: + return r; +} +#endif + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2330,24 +2444,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) int i; for (i = 0; i < bus->dev_count; i++) { - struct kvm_io_device *pos = bus->devs[i]; + struct kvm_io_device *pos = bus->range[i].dev; kvm_iodevice_destructor(pos); } kfree(bus); } +int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +{ + const struct kvm_io_range *r1 = p1; + const struct kvm_io_range *r2 = p2; + + if (r1->addr < r2->addr) + return -1; + if (r1->addr + r1->len > r2->addr + r2->len) + return 1; + return 0; +} + +int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, + gpa_t addr, int len) +{ + if (bus->dev_count == NR_IOBUS_DEVS) + return -ENOSPC; + + bus->range[bus->dev_count++] = (struct kvm_io_range) { + .addr = addr, + .len = len, + .dev = dev, + }; + + sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), + kvm_io_bus_sort_cmp, NULL); + + return 0; +} + +int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, + gpa_t addr, int len) +{ + struct kvm_io_range *range, key; + int off; + + key = (struct kvm_io_range) { + .addr = addr, + .len = len, + }; + + range = bsearch(&key, bus->range, bus->dev_count, + sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); + if (range == NULL) + return -ENOENT; + + off = range - bus->range; + + while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) + off--; + + return off; +} + /* kvm_io_bus_write - called under kvm->slots_lock */ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { - int i; + int idx; struct kvm_io_bus *bus; + struct kvm_io_range range; + + range = (struct kvm_io_range) { + .addr = addr, + .len = len, + }; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - for (i = 0; i < bus->dev_count; i++) - if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) + idx = kvm_io_bus_get_first_dev(bus, addr, len); + if (idx < 0) + return -EOPNOTSUPP; + + while (idx < bus->dev_count && + kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { + if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) return 0; + idx++; + } + return -EOPNOTSUPP; } @@ -2355,19 +2537,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { - int i; + int idx; struct kvm_io_bus *bus; + struct kvm_io_range range; + + range = (struct kvm_io_range) { + .addr = addr, + .len = len, + }; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - for (i = 0; i < bus->dev_count; i++) - if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) + idx = kvm_io_bus_get_first_dev(bus, addr, len); + if (idx < 0) + return -EOPNOTSUPP; + + while (idx < bus->dev_count && + kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { + if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) return 0; + idx++; + } + return -EOPNOTSUPP; } /* Caller must hold slots_lock. */ -int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, + int len, struct kvm_io_device *dev) { struct kvm_io_bus *new_bus, *bus; @@ -2379,7 +2575,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, if (!new_bus) return -ENOMEM; memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); - new_bus->devs[new_bus->dev_count++] = dev; + kvm_io_bus_insert_dev(new_bus, dev, addr, len); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); @@ -2403,9 +2599,13 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, r = -ENOENT; for (i = 0; i < new_bus->dev_count; i++) - if (new_bus->devs[i] == dev) { + if (new_bus->range[i].dev == dev) { r = 0; - new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; + new_bus->dev_count--; + new_bus->range[i] = new_bus->range[new_bus->dev_count]; + sort(new_bus->range, new_bus->dev_count, + sizeof(struct kvm_io_range), + kvm_io_bus_sort_cmp, NULL); break; } |