From 98fda169290b3b28c0f2db2b8f02290c13da50ef Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Wed, 4 Sep 2013 22:32:24 +0200 Subject: kvm: remove .done from struct kvm_async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '.done' is used to mark the completion of 'async_pf_execute()', but 'cancel_work_sync()' returns true when the work was canceled, so we use it instead. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- virt/kvm/async_pf.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8a39dda7a325..b197950ac4d5 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); apf->page = page; - apf->done = true; spin_unlock(&vcpu->async_pf.lock); /* @@ -99,9 +98,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) struct kvm_async_pf *work = list_entry(vcpu->async_pf.queue.next, typeof(*work), queue); - cancel_work_sync(&work->work); list_del(&work->queue); - if (!work->done) { /* work was canceled */ + if (cancel_work_sync(&work->work)) { mmdrop(work->mm); kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */ kmem_cache_free(async_pf_cache, work); @@ -166,7 +164,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, return 0; work->page = NULL; - work->done = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); -- cgit v1.2.3-70-g09d2 From 4fa92fb25ae5a2d79d872ab54df511c831b1f363 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Sep 2013 12:57:17 +0200 Subject: KVM: cleanup (physical) CPU hotplug Remove the useless argument, and do not do anything if there are no VMs running at the time of the hotplug. Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 979bff485fb0..426051d66173 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2681,10 +2681,11 @@ static void hardware_enable_nolock(void *junk) } } -static void hardware_enable(void *junk) +static void hardware_enable(void) { raw_spin_lock(&kvm_lock); - hardware_enable_nolock(junk); + if (kvm_usage_count) + hardware_enable_nolock(NULL); raw_spin_unlock(&kvm_lock); } @@ -2698,10 +2699,11 @@ static void hardware_disable_nolock(void *junk) kvm_arch_hardware_disable(NULL); } -static void hardware_disable(void *junk) +static void hardware_disable(void) { raw_spin_lock(&kvm_lock); - hardware_disable_nolock(junk); + if (kvm_usage_count) + hardware_disable_nolock(NULL); raw_spin_unlock(&kvm_lock); } @@ -2748,20 +2750,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, { int cpu = (long)v; - if (!kvm_usage_count) - return NOTIFY_OK; - val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); - hardware_disable(NULL); + hardware_disable(); break; case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - hardware_enable(NULL); + hardware_enable(); break; } return NOTIFY_OK; -- cgit v1.2.3-70-g09d2 From 4a937f96f3a29c58b7edd349d2e4dfac371efdf2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Sep 2013 12:58:35 +0200 Subject: KVM: protect kvm_usage_count with its own spinlock The VM list need not be protected by a raw spinlock. Separate the two so that kvm_lock can be made non-raw. Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 6 +++++- virt/kvm/kvm_main.c | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index a9f366e73c8e..ba9e1c2150c2 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -135,7 +135,11 @@ Name: kvm_lock Type: raw_spinlock Arch: any Protects: - vm_list - - hardware virtualization enable/disable + +Name: kvm_count_lock +Type: raw_spinlock_t +Arch: any +Protects: - hardware virtualization enable/disable Comment: 'raw' because hardware enabling/disabling must be atomic /wrt migration. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 426051d66173..eb94343c2ed2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -71,6 +71,7 @@ MODULE_LICENSE("GPL"); */ DEFINE_RAW_SPINLOCK(kvm_lock); +static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; @@ -2683,10 +2684,10 @@ static void hardware_enable_nolock(void *junk) static void hardware_enable(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_enable_nolock(NULL); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_nolock(void *junk) @@ -2701,10 +2702,10 @@ static void hardware_disable_nolock(void *junk) static void hardware_disable(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); if (kvm_usage_count) hardware_disable_nolock(NULL); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static void hardware_disable_all_nolock(void) @@ -2718,16 +2719,16 @@ static void hardware_disable_all_nolock(void) static void hardware_disable_all(void) { - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); hardware_disable_all_nolock(); - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); } static int hardware_enable_all(void) { int r = 0; - raw_spin_lock(&kvm_lock); + raw_spin_lock(&kvm_count_lock); kvm_usage_count++; if (kvm_usage_count == 1) { @@ -2740,7 +2741,7 @@ static int hardware_enable_all(void) } } - raw_spin_unlock(&kvm_lock); + raw_spin_unlock(&kvm_count_lock); return r; } @@ -3130,7 +3131,7 @@ static int kvm_suspend(void) static void kvm_resume(void) { if (kvm_usage_count) { - WARN_ON(raw_spin_is_locked(&kvm_lock)); + WARN_ON(raw_spin_is_locked(&kvm_count_lock)); hardware_enable_nolock(NULL); } } -- cgit v1.2.3-70-g09d2 From 2f303b74a62fb74983c0a66e2df353be963c527c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Sep 2013 13:53:07 +0200 Subject: KVM: Convert kvm_lock back to non-raw spinlock In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"), the kvm_lock was made a raw lock. However, the kvm mmu_shrink() function tries to grab the (non-raw) mmu_lock within the scope of the raw locked kvm_lock being held. This leads to the following: BUG: sleeping function called from invalid context at kernel/rtmutex.c:659 in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0 Preemption disabled at:[] mmu_shrink+0x5c/0x1b0 [kvm] Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt Call Trace: [] __might_sleep+0xfd/0x160 [] rt_spin_lock+0x24/0x50 [] mmu_shrink+0xec/0x1b0 [kvm] [] shrink_slab+0x17d/0x3a0 [] ? mem_cgroup_iter+0x130/0x260 [] balance_pgdat+0x54a/0x730 [] ? set_pgdat_percpu_threshold+0xa7/0xd0 [] kswapd+0x18f/0x490 [] ? get_parent_ip+0x11/0x50 [] ? __init_waitqueue_head+0x50/0x50 [] ? balance_pgdat+0x730/0x730 [] kthread+0xdb/0xe0 [] ? finish_task_switch+0x52/0x100 [] kernel_thread_helper+0x4/0x10 [] ? __init_kthread_worker+0x After the previous patch, kvm_lock need not be a raw spinlock anymore, so change it back. Reported-by: Paul Gortmaker Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 2 +- arch/x86/kvm/mmu.c | 5 ++--- arch/x86/kvm/x86.c | 8 ++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 18 +++++++++--------- 5 files changed, 17 insertions(+), 18 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index ba9e1c2150c2..f8869410d40c 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update(). ------------ Name: kvm_lock -Type: raw_spinlock +Type: spinlock_t Arch: any Protects: - vm_list diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dce0df8150df..cf95cfe050a6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4428,7 +4428,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -4478,9 +4478,8 @@ unlock: break; } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return freed; - } static unsigned long diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5ca72a5cdb6..187f824b1454 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5263,7 +5263,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -5273,7 +5273,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -5426,12 +5426,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); atomic_set(&kvm_guest_has_master_clock, 0); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 749bdb12cd15..7c961e1e9270 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -142,7 +142,7 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -extern raw_spinlock_t kvm_lock; +extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eb94343c2ed2..d469114aff09 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -70,7 +70,7 @@ MODULE_LICENSE("GPL"); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_RAW_SPINLOCK(kvm_lock); +DEFINE_SPINLOCK(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return kvm; @@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_del(&kvm->vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -3054,10 +3054,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } @@ -3071,12 +3071,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } -- cgit v1.2.3-70-g09d2 From 29242cb5c63b1f8e12e8055ba1a6c3e0004fa86d Mon Sep 17 00:00:00 2001 From: Andre Richter Date: Wed, 2 Oct 2013 12:23:26 +0200 Subject: virt/kvm/iommu.c: Add leading zeros to device's BDF notation in debug messages When KVM (de)assigns PCI(e) devices to VMs, a debug message is printed including the BDF notation of the respective device. Currently, the BDF notation does not have the commonly used leading zeros. This produces messages like "assign device 0:1:8.0", which look strange at first sight. The patch fixes this by exchanging the printk(KERN_DEBUG ...) with dev_info() and also inserts "kvm" into the debug message, so that it is obvious where the message comes from. Also reduces LoC. Acked-by: Alex Williamson Signed-off-by: Andre Richter Signed-off-by: Gleb Natapov --- virt/kvm/iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 72a130bc448a..a3b14109049b 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm, pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm assign device\n"); return 0; out_unmap: @@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm, pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; - printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", - assigned_dev->host_segnr, - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); + dev_info(&pdev->dev, "kvm deassign device\n"); return 0; } -- cgit v1.2.3-70-g09d2 From f2e106692d5189303997ad7b96de8d8123aa5613 Mon Sep 17 00:00:00 2001 From: chai wen Date: Mon, 14 Oct 2013 22:22:33 +0800 Subject: KVM: Drop FOLL_GET in GUP when doing async page fault Page pinning is not mandatory in kvm async page fault processing since after async page fault event is delivered to a guest it accesses page once again and does its own GUP. Drop the FOLL_GET flag in GUP in async_pf code, and do some simplifying in check/clear processing. Suggested-by: Gleb Natapov Signed-off-by: Gu zheng Signed-off-by: chai wen Signed-off-by: Gleb Natapov --- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- include/trace/events/kvm.h | 10 ++++------ virt/kvm/async_pf.c | 17 +++++------------ 4 files changed, 12 insertions(+), 21 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c951c71dc80b..edf2a07df3a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7298,7 +7298,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) int r; if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || - is_error_page(work->page)) + work->wakeup_all) return; r = kvm_mmu_reload(vcpu); @@ -7408,7 +7408,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct x86_exception fault; trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (is_error_page(work->page)) + if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6dccde755f6..c9d4236ab442 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -189,7 +189,7 @@ struct kvm_async_pf { gva_t gva; unsigned long addr; struct kvm_arch_async_pf arch; - struct page *page; + bool wakeup_all; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7005d1109ec9..131a0bda7aec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TRACE_EVENT( kvm_async_pf_completed, - TP_PROTO(unsigned long address, struct page *page, u64 gva), - TP_ARGS(address, page, gva), + TP_PROTO(unsigned long address, u64 gva), + TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) - __field(pfn_t, pfn) __field(u64, gva) ), TP_fast_assign( __entry->address = address; - __entry->pfn = page ? page_to_pfn(page) : 0; __entry->gva = gva; ), - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, - __entry->address, __entry->pfn) + TP_printk("gva %#llx address %#lx", __entry->gva, + __entry->address) ); #endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index b197950ac4d5..8631d9c14320 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) static void async_pf_execute(struct work_struct *work) { - struct page *page = NULL; struct kvm_async_pf *apf = container_of(work, struct kvm_async_pf, work); struct mm_struct *mm = apf->mm; @@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work) use_mm(mm); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); - apf->page = page; spin_unlock(&vcpu->async_pf.lock); /* @@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, page, gva); + trace_kvm_async_pf_completed(addr, gva); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); @@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.done.next, typeof(*work), link); list_del(&work->link); - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } spin_unlock(&vcpu->async_pf.lock); @@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); - if (work->page) - kvm_arch_async_page_ready(vcpu, work); + kvm_arch_async_page_ready(vcpu, work); kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } } @@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, if (!work) return 0; - work->page = NULL; + work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); @@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) if (!work) return -ENOMEM; - work->page = KVM_ERR_PTR_BAD_PAGE; + work->wakeup_all = true; INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); -- cgit v1.2.3-70-g09d2 From 2ba9f0d8875073a2ed802fca0c25c9bfc4338439 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:17:59 +0530 Subject: kvm: powerpc: book3s: Support building HV and PR KVM as module Signed-off-by: Aneesh Kumar K.V [agraf: squash in compile fix] Signed-off-by: Alexander Graf --- arch/powerpc/kvm/Kconfig | 6 +++--- arch/powerpc/kvm/Makefile | 11 ++++++++--- arch/powerpc/kvm/book3s.c | 12 +++++++++++- arch/powerpc/kvm/book3s_emulate.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 ++ arch/powerpc/kvm/book3s_pr.c | 5 ++++- arch/powerpc/kvm/book3s_rtas.c | 1 + arch/powerpc/kvm/book3s_xics.c | 1 + arch/powerpc/kvm/emulate.c | 1 + arch/powerpc/kvm/powerpc.c | 10 ++++++++++ virt/kvm/kvm_main.c | 4 ++++ 11 files changed, 46 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index a96d7c32f785..8aeeda1ff42a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -73,7 +73,7 @@ config KVM_BOOK3S_64 If unsure, say N. config KVM_BOOK3S_64_HV - bool "KVM support for POWER7 and PPC970 using hypervisor mode in host" + tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host" depends on KVM_BOOK3S_64 select KVM_BOOK3S_HV_POSSIBLE select MMU_NOTIFIER @@ -94,8 +94,8 @@ config KVM_BOOK3S_64_HV If unsure, say N. config KVM_BOOK3S_64_PR - bool "KVM support without using hypervisor mode in host" - depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV + tristate "KVM support without using hypervisor mode in host" + depends on KVM_BOOK3S_64 select KVM_BOOK3S_PR_POSSIBLE ---help--- Support running guest kernels in virtual machines on processors diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index fa17b337bb96..ce569b6bf4d8 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -56,7 +56,7 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \ book3s_64_vio_hv.o -kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ +kvm-pr-y := \ fpu.o \ book3s_paired_singles.o \ book3s_pr.o \ @@ -76,7 +76,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_rmhandlers.o endif -kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) += \ +kvm-hv-y += \ book3s_hv.o \ book3s_hv_interrupts.o \ book3s_64_mmu_hv.o @@ -84,13 +84,15 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_HV) += \ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ book3s_hv_rm_xics.o -kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) += \ +ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ book3s_hv_builtin.o \ book3s_hv_cma.o \ $(kvm-book3s_64-builtin-xics-objs-y) +endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o @@ -131,4 +133,7 @@ obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o +obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o +obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o + obj-y += $(kvm-book3s_64-builtin-objs-y) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 493aff77f84e..39d2994f9d27 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -170,28 +170,32 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) printk(KERN_INFO "Queueing interrupt %x\n", vec); #endif } - +EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) { /* might as well deliver this straight away */ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) { kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); } +EXPORT_SYMBOL_GPL(kvmppc_core_queue_dec); int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) { return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); } +EXPORT_SYMBOL_GPL(kvmppc_core_pending_dec); void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); } +EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec); void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) @@ -329,6 +333,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return 0; } +EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter); pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, bool *writable) @@ -354,6 +359,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing, return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable); } +EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn); static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, bool iswrite, struct kvmppc_pte *pte) @@ -421,6 +427,7 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, return EMULATE_DONE; } +EXPORT_SYMBOL_GPL(kvmppc_st); int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) @@ -451,6 +458,7 @@ nopte: mmio: return EMULATE_DO_MMIO; } +EXPORT_SYMBOL_GPL(kvmppc_ld); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { @@ -706,6 +714,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) { kvmppc_ops->set_msr(vcpu, msr); } +EXPORT_SYMBOL_GPL(kvmppc_set_msr); int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { @@ -787,6 +796,7 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { return kvmppc_ops->unmap_hva(kvm, hva); } +EXPORT_SYMBOL_GPL(kvm_unmap_hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index b9841ad844fb..99d40f8977e8 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -172,7 +172,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmu.tlbie(vcpu, addr, large); break; } -#ifdef CONFIG_KVM_BOOK3S_64_PR +#ifdef CONFIG_PPC_BOOK3S_64 case OP_31_XOP_FAKE_SC1: { /* SC 1 papr hypercalls */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0782c8688c8b..9e954a81c078 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "book3s.h" @@ -2215,3 +2216,4 @@ static void kvmppc_book3s_exit_hv(void) module_init(kvmppc_book3s_init_hv); module_exit(kvmppc_book3s_exit_hv); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ca6c73d2ee77..7f583a482161 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "book3s.h" @@ -918,7 +919,7 @@ program_interrupt: ulong cmd = kvmppc_get_gpr(vcpu, 3); int i; -#ifdef CONFIG_KVM_BOOK3S_64_PR +#ifdef CONFIG_PPC_BOOK3S_64 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { r = RESUME_GUEST; break; @@ -1582,3 +1583,5 @@ static void kvmppc_book3s_exit_pr(void) module_init(kvmppc_book3s_init_pr); module_exit(kvmppc_book3s_exit_pr); + +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 3219ba895246..cf95cdef73c9 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -260,6 +260,7 @@ fail: */ return rc; } +EXPORT_SYMBOL_GPL(kvmppc_rtas_hcall); void kvmppc_rtas_tokens_free(struct kvm *kvm) { diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index cef3de96ff6c..c3c832b27ee5 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -840,6 +840,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) return rc; } +EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); /* -- Initialisation code etc. -- */ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 81762eb7957e..de9a340d22ed 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -483,3 +483,4 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) return emulated; } +EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 874a2a5e6cec..9aaa07efa4b6 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -126,6 +126,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } +EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter); int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { @@ -179,6 +180,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) return r; } +EXPORT_SYMBOL_GPL(kvmppc_kvm_pv); int kvmppc_sanity_check(struct kvm_vcpu *vcpu) { @@ -207,6 +209,7 @@ out: vcpu->arch.sane = r; return r ? 0 : -EINVAL; } +EXPORT_SYMBOL_GPL(kvmppc_sanity_check); int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) { @@ -241,6 +244,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } +EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio); int kvm_arch_hardware_enable(void *garbage) { @@ -665,6 +669,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } +EXPORT_SYMBOL_GPL(kvmppc_handle_load); /* Same as above, but sign extends */ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -726,6 +731,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } +EXPORT_SYMBOL_GPL(kvmppc_handle_store); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -1074,22 +1080,26 @@ long kvmppc_alloc_lpid(void) return lpid; } +EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid); void kvmppc_claim_lpid(long lpid) { set_bit(lpid, lpid_inuse); } +EXPORT_SYMBOL_GPL(kvmppc_claim_lpid); void kvmppc_free_lpid(long lpid) { clear_bit(lpid, lpid_inuse); } +EXPORT_SYMBOL_GPL(kvmppc_free_lpid); void kvmppc_init_lpid(unsigned long nr_lpids_param) { nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); memset(lpid_inuse, 0, sizeof(lpid_inuse)); } +EXPORT_SYMBOL_GPL(kvmppc_init_lpid); int kvm_arch_init(void *opaque) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d469114aff09..94c6e3f6f244 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -187,6 +187,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) ++kvm->stat.remote_tlb_flush; cmpxchg(&kvm->tlbs_dirty, dirty_count, 0); } +EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); void kvm_reload_remote_mmus(struct kvm *kvm) { @@ -965,6 +966,7 @@ int kvm_get_dirty_log(struct kvm *kvm, out: return r; } +EXPORT_SYMBOL_GPL(kvm_get_dirty_log); bool kvm_largepages_enabled(void) { @@ -1653,6 +1655,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot(kvm, gfn); mark_page_dirty_in_slot(kvm, memslot, gfn); } +EXPORT_SYMBOL_GPL(mark_page_dirty); /* * The vCPU has executed a HLT instruction with in-kernel mode enabled. @@ -1678,6 +1681,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); } +EXPORT_SYMBOL_GPL(kvm_vcpu_block); #ifndef CONFIG_S390 /* -- cgit v1.2.3-70-g09d2 From 5587027ce9d59a57aecaa190be1c8e560aaff45d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:00 +0530 Subject: kvm: Add struct kvm arg to memslot APIs We will use that in the later patch to find the kvm ops handler Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf --- arch/arm/kvm/arm.c | 5 +++-- arch/ia64/kvm/kvm-ia64.c | 5 +++-- arch/mips/kvm/kvm_mips.c | 5 +++-- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++-- arch/powerpc/kvm/book3s.c | 4 ++-- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 9 +++++---- arch/s390/kvm/kvm-s390.c | 5 +++-- arch/x86/kvm/x86.c | 5 +++-- include/linux/kvm_host.h | 5 +++-- virt/kvm/kvm_main.c | 12 ++++++------ 11 files changed, 37 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc5adb9349ef..e312e4a53f8d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bdfd8789b376..985bf80c622e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b044536de4..73b34827826c 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c13f15db476c..20f461637090 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -134,9 +134,11 @@ extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); -extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +extern void kvmppc_core_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +extern int kvmppc_core_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 39d2994f9d27..130fe1d75bac 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -761,13 +761,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return kvmppc_ops->get_dirty_log(kvm, log); } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { kvmppc_ops->free_memslot(free, dont); } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return kvmppc_ops->create_memslot(slot, npages); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1769354d0bf8..cb2d986a3382 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1662,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9aaa07efa4b6..b103d747934a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -417,15 +417,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_core_free_memslot(free, dont); + kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { - return kvmppc_core_create_memslot(slot, npages); + return kvmppc_core_create_memslot(kvm, slot, npages); } void kvm_arch_memslots_updated(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1e4e7b97337a..bedda67cc222 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1089,12 +1089,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edf2a07df3a3..666526a55c46 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7080,7 +7080,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { int i; @@ -7101,7 +7101,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, } } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { int i; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..8b0107dc2067 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -507,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 94c6e3f6f244..0932c3b64155 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -542,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - kvm_arch_free_memslot(free, dont); + kvm_arch_free_memslot(kvm, free, dont); free->npages = 0; } @@ -559,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm_for_each_memslot(memslot, slots) - kvm_free_physmem_slot(memslot, NULL); + kvm_free_physmem_slot(kvm, memslot, NULL); kfree(kvm->memslots); } @@ -823,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (change == KVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; - if (kvm_arch_create_memslot(&new, npages)) + if (kvm_arch_create_memslot(kvm, &new, npages)) goto out_free; } @@ -899,7 +899,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); kfree(old_memslots); return 0; @@ -907,7 +907,7 @@ int __kvm_set_memory_region(struct kvm *kvm, out_slots: kfree(slots); out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; } -- cgit v1.2.3-70-g09d2 From e0230e1327fb862c9b6cde24ae62d55f9db62c9b Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Thu, 24 Oct 2013 09:56:39 +0800 Subject: KVM: Mapping IOMMU pages after updating memslot In kvm_iommu_map_pages(), we need to know the page size via call kvm_host_page_size(). And it will check whether the target slot is valid before return the right page size. Currently, we will map the iommu pages when creating a new slot. But we call kvm_iommu_map_pages() during preparing the new slot. At that time, the new slot is not visible by domain(still in preparing). So we cannot get the right page size from kvm_host_page_size() and this will break the IOMMU super page logic. The solution is to map the iommu pages after we insert the new slot into domain. Signed-off-by: Yang Zhang Tested-by: Patrick Lu Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0d20c320a33d..9ca014da134d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -873,21 +873,6 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - /* - * IOMMU mapping: New slots need to be mapped. Old slots need to be - * un-mapped and re-mapped if their base changes. Since base change - * unmapping is handled above with slot deletion, mapping alone is - * needed here. Anything else the iommu might care about for existing - * slots (size changes, userspace addr changes and read-only flag - * changes) is disallowed above, so any other attribute changes getting - * here can be skipped. - */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { - r = kvm_iommu_map_pages(kvm, &new); - if (r) - goto out_slots; - } - /* actual memory is freed via old in kvm_free_physmem_slot below */ if (change == KVM_MR_DELETE) { new.dirty_bitmap = NULL; @@ -901,6 +886,20 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_physmem_slot(&old, &new); kfree(old_memslots); + /* + * IOMMU mapping: New slots need to be mapped. Old slots need to be + * un-mapped and re-mapped if their base changes. Since base change + * unmapping is handled above with slot deletion, mapping alone is + * needed here. Anything else the iommu might care about for existing + * slots (size changes, userspace addr changes and read-only flag + * changes) is disallowed above, so any other attribute changes getting + * here can be skipped. + */ + if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + r = kvm_iommu_map_pages(kvm, &new); + return r; + } + return 0; out_slots: -- cgit v1.2.3-70-g09d2 From ec53500fae421e07c5d035918ca454a429732ef4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/devices/vfio.txt | 22 +++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/Makefile | 2 +- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 4 + virt/kvm/Kconfig | 3 + virt/kvm/kvm_main.c | 5 + virt/kvm/vfio.c | 220 +++++++++++++++++++++++++++++ 8 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 Documentation/virtual/kvm/devices/vfio.txt create mode 100644 virt/kvm/vfio.c (limited to 'virt') diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt new file mode 100644 index 000000000000..ef51740c67ca --- /dev/null +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -0,0 +1,22 @@ +VFIO virtual device +=================== + +Device types supported: + KVM_DEV_TYPE_VFIO + +Only one VFIO instance may be created per VM. The created device +tracks VFIO groups in use by the VM and features of those groups +important to the correctness and acceleration of the VM. As groups +are enabled and disabled for use by the VM, KVM should be updated +about their presence. When registered with KVM, a reference to the +VFIO-group is held by KVM. + +Groups: + KVM_DEV_VFIO_GROUP + +KVM_DEV_VFIO_GROUP attributes: + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + +For each, kvm_device_attr.addr points to an int32_t file descriptor +for the VFIO group. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -9,7 +9,7 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..7beddbd38ac7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1058,6 +1058,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 32c60b98ddf8..509cfbfe9658 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -845,6 +845,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 779262f59e25..fbe1a48bd629 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -27,3 +27,6 @@ config HAVE_KVM_MSI config HAVE_KVM_CPU_RELAX_INTERCEPT bool + +config KVM_VFIO + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ca014da134d..82c4047aa0e3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_XICS: ops = &kvm_xics_ops; break; +#endif +#ifdef CONFIG_KVM_VFIO + case KVM_DEV_TYPE_VFIO: + ops = &kvm_vfio_ops; + break; #endif default: return -ENODEV; diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c new file mode 100644 index 000000000000..597c258245ea --- /dev/null +++ b/virt/kvm/vfio.c @@ -0,0 +1,220 @@ +/* + * VFIO-KVM bridge pseudo device + * + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_vfio_group { + struct list_head node; + struct vfio_group *vfio_group; +}; + +struct kvm_vfio { + struct list_head group_list; + struct mutex lock; +}; + +static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) +{ + struct vfio_group *vfio_group; + struct vfio_group *(*fn)(struct file *); + + fn = symbol_get(vfio_group_get_external_user); + if (!fn) + return ERR_PTR(-EINVAL); + + vfio_group = fn(filep); + + symbol_put(vfio_group_get_external_user); + + return vfio_group; +} + +static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) +{ + void (*fn)(struct vfio_group *); + + fn = symbol_get(vfio_group_put_external_user); + if (!fn) + return; + + fn(vfio_group); + + symbol_put(vfio_group_put_external_user); +} + +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) +{ + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + void __user *argp = (void __user *)arg; + struct fd f; + int32_t fd; + int ret; + + switch (attr) { + case KVM_DEV_VFIO_GROUP_ADD: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group == vfio_group) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -EEXIST; + } + } + + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + if (!kvg) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -ENOMEM; + } + + list_add_tail(&kvg->node, &kv->group_list); + kvg->vfio_group = vfio_group; + + mutex_unlock(&kv->lock); + + return 0; + + case KVM_DEV_VFIO_GROUP_DEL: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + list_del(&kvg->node); + kvm_vfio_group_put_external_user(kvg->vfio_group); + kfree(kvg); + ret = 0; + break; + } + + mutex_unlock(&kv->lock); + + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } + + return -ENXIO; +} + +static int kvm_vfio_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + return kvm_vfio_set_group(dev, attr->attr, attr->addr); + } + + return -ENXIO; +} + +static int kvm_vfio_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + switch (attr->attr) { + case KVM_DEV_VFIO_GROUP_ADD: + case KVM_DEV_VFIO_GROUP_DEL: + return 0; + } + + break; + } + + return -ENXIO; +} + +static void kvm_vfio_destroy(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; + + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_put_external_user(kvg->vfio_group); + list_del(&kvg->node); + kfree(kvg); + } + + kfree(kv); + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int kvm_vfio_create(struct kvm_device *dev, u32 type) +{ + struct kvm_device *tmp; + struct kvm_vfio *kv; + + /* Only one VFIO "device" per VM */ + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) + if (tmp->ops == &kvm_vfio_ops) + return -EBUSY; + + kv = kzalloc(sizeof(*kv), GFP_KERNEL); + if (!kv) + return -ENOMEM; + + INIT_LIST_HEAD(&kv->group_list); + mutex_init(&kv->lock); + + dev->private = kv; + + return 0; +} + +struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; -- cgit v1.2.3-70-g09d2 From d96eb2c6f480769bff32054e78b964860dae4d56 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:23 -0600 Subject: kvm/x86: Convert iommu_flags to iommu_noncoherent Default to operating in coherent mode. This simplifies the logic when we switch to a model of registering and unregistering noncoherent I/O with KVM. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- arch/ia64/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 3 --- virt/kvm/iommu.c | 16 ++++++++-------- 6 files changed, 12 insertions(+), 15 deletions(-) (limited to 'virt') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 95a3ff93777c..db95f570705f 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -476,7 +476,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; - int iommu_flags; + bool iommu_noncoherent; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5cbf3166257c..91b35e4005d3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -564,7 +564,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; - int iommu_flags; + bool iommu_noncoherent; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6850b0f1d52b..727a5e980c43 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7446,7 +7446,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) if (is_mmio) ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; else if (vcpu->kvm->arch.iommu_domain && - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) + vcpu->kvm->arch.iommu_noncoherent) ret = kvm_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT; else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 15f9540a2b1f..92ad83e5b132 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2719,7 +2719,7 @@ static void wbinvd_ipi(void *garbage) static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.iommu_domain && - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); + vcpu->kvm->arch.iommu_noncoherent; } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7beddbd38ac7..ed64880e4915 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -746,9 +746,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -/* For vcpu->arch.iommu_flags */ -#define KVM_IOMMU_CACHE_COHERENCY 0x1 - #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index a3b14109049b..d32d156a423a 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) flags = IOMMU_READ; if (!(slot->flags & KVM_MEM_READONLY)) flags |= IOMMU_WRITE; - if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) + if (!kvm->arch.iommu_noncoherent) flags |= IOMMU_CACHE; @@ -158,7 +158,8 @@ int kvm_assign_device(struct kvm *kvm, { struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; - int r, last_flags; + int r; + bool noncoherent; /* check if iommu exists and in use */ if (!domain) @@ -174,15 +175,13 @@ int kvm_assign_device(struct kvm *kvm, return r; } - last_flags = kvm->arch.iommu_flags; - if (iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_CACHE_COHERENCY)) - kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; + noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_CACHE_COHERENCY); /* Check if need to update IOMMU page table for guest memory */ - if ((last_flags ^ kvm->arch.iommu_flags) == - KVM_IOMMU_CACHE_COHERENCY) { + if (noncoherent != kvm->arch.iommu_noncoherent) { kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_noncoherent = noncoherent; r = kvm_iommu_map_memslots(kvm); if (r) goto out_unmap; @@ -342,6 +341,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) mutex_lock(&kvm->slots_lock); kvm_iommu_unmap_memslots(kvm); kvm->arch.iommu_domain = NULL; + kvm->arch.iommu_noncoherent = false; mutex_unlock(&kvm->slots_lock); iommu_domain_free(domain); -- cgit v1.2.3-70-g09d2 From e0f0bbc527f6e9c0261f1d16b2a0b47612b7f235 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:30 -0600 Subject: kvm: Create non-coherent DMA registeration We currently use some ad-hoc arch variables tied to legacy KVM device assignment to manage emulation of instructions that depend on whether non-coherent DMA is present. Create an interface for this, adapting legacy KVM device assignment and adding VFIO via the KVM-VFIO device. For now we assume that non-coherent DMA is possible any time we have a VFIO group. Eventually an interface can be developed as part of the VFIO external user interface to query the coherency of a group. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx.c | 3 +-- arch/x86/kvm/x86.c | 22 +++++++++++++++++++-- include/linux/kvm_host.h | 19 ++++++++++++++++++ virt/kvm/iommu.c | 6 ++++++ virt/kvm/vfio.c | 44 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 91b35e4005d3..de388c55e7ec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -565,6 +565,8 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; bool iommu_noncoherent; +#define __KVM_HAVE_ARCH_NONCOHERENT_DMA + atomic_t noncoherent_dma_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 727a5e980c43..fabf7421ec18 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7445,8 +7445,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) */ if (is_mmio) ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; - else if (vcpu->kvm->arch.iommu_domain && - vcpu->kvm->arch.iommu_noncoherent) + else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) ret = kvm_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT; else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92ad83e5b132..ec35d09937da 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2718,8 +2718,7 @@ static void wbinvd_ipi(void *garbage) static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.iommu_domain && - vcpu->kvm->arch.iommu_noncoherent; + return kvm_arch_has_noncoherent_dma(vcpu->kvm); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -6998,6 +6997,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + atomic_set(&kvm->arch.noncoherent_dma_count, 0); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); @@ -7437,6 +7437,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); + +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); + +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ed64880e4915..92aae88756db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -670,6 +670,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif +#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA +void kvm_arch_register_noncoherent_dma(struct kvm *kvm); +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); +#else +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return false; +} +#endif + static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index d32d156a423a..c7d9ce122529 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -140,6 +140,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + if (kvm->arch.iommu_noncoherent) + kvm_arch_register_noncoherent_dma(kvm); + idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); @@ -327,6 +330,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); + if (kvm->arch.iommu_noncoherent) + kvm_arch_unregister_noncoherent_dma(kvm); + return 0; } diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 597c258245ea..ca4260e35037 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -27,6 +27,7 @@ struct kvm_vfio_group { struct kvm_vfio { struct list_head group_list; struct mutex lock; + bool noncoherent; }; static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) @@ -58,6 +59,43 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) symbol_put(vfio_group_put_external_user); } +/* + * Groups can use the same or different IOMMU domains. If the same then + * adding a new group may change the coherency of groups we've previously + * been told about. We don't want to care about any of that so we retest + * each group and bail as soon as we find one that's noncoherent. This + * means we only ever [un]register_noncoherent_dma once for the whole device. + */ +static void kvm_vfio_update_coherency(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + bool noncoherent = false; + struct kvm_vfio_group *kvg; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + /* + * TODO: We need an interface to check the coherency of + * the IOMMU domain this group is using. For now, assume + * it's always noncoherent. + */ + noncoherent = true; + break; + } + + if (noncoherent != kv->noncoherent) { + kv->noncoherent = noncoherent; + + if (kv->noncoherent) + kvm_arch_register_noncoherent_dma(dev->kvm); + else + kvm_arch_unregister_noncoherent_dma(dev->kvm); + } + + mutex_unlock(&kv->lock); +} + static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) { struct kvm_vfio *kv = dev->private; @@ -105,6 +143,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); + kvm_vfio_update_coherency(dev); + return 0; case KVM_DEV_VFIO_GROUP_DEL: @@ -140,6 +180,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) kvm_vfio_group_put_external_user(vfio_group); + kvm_vfio_update_coherency(dev); + return ret; } @@ -185,6 +227,8 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kfree(kvg); } + kvm_vfio_update_coherency(dev); + kfree(kv); kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ } -- cgit v1.2.3-70-g09d2 From 27ef63c7e97d1e5dddd85051c03f8d44cc887f34 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Mon, 4 Nov 2013 09:08:12 -0700 Subject: KVM: IOMMU: hva align mapping page size When determining the page size we could use to map with the IOMMU, the page size should also be aligned with the hva, not just the gfn. The gfn may not reflect the real alignment within the hugetlbfs file. Most of the time, this works fine. However, if the hugetlbfs file is backed by non-contiguous huge pages, a multi-huge page memslot starts at an unaligned offset within the hugetlbfs file, and the gfn is aligned with respect to the huge page size, kvm_host_page_size() will return the huge page size and we will use that to map with the IOMMU. When we later unpin that same memslot, the IOMMU returns the unmap size as the huge page size, and we happily unpin that many pfns in monotonically increasing order, not realizing we are spanning non-contiguous huge pages and partially unpin the wrong huge page. Ensure the IOMMU mapping page size is aligned with the hva corresponding to the gfn, which does reflect the alignment within the hugetlbfs file. Reviewed-by: Marcelo Tosatti Signed-off-by: Greg Edwards Cc: stable@vger.kernel.org Signed-off-by: Gleb Natapov --- virt/kvm/iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index c7d9ce122529..0df7d4b34dfe 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -103,6 +103,10 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) while ((gfn << PAGE_SHIFT) & (page_size - 1)) page_size >>= 1; + /* Make sure hva is aligned to the page size we want to map */ + while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) + page_size >>= 1; + /* * Pin all pages we are about to map in memory. This is * important because we unmap and unpin in 4kb steps later. -- cgit v1.2.3-70-g09d2 From 80f5b5e700fa9c58480eafce0d47367bafb70006 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 5 Nov 2013 16:04:18 +0200 Subject: KVM: remove vm mmap method It was used in conjunction with KVM_SET_MEMORY_REGION ioctl which was removed by b74a07beed0 in 2010, QEMU stopped using it in 2008, so it is time to remove the code finally. Signed-off-by: Gleb Natapov --- virt/kvm/kvm_main.c | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7a317a7f9025..10015d6e4d08 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2528,44 +2528,12 @@ out: } #endif -static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *page[1]; - unsigned long addr; - int npages; - gfn_t gfn = vmf->pgoff; - struct kvm *kvm = vma->vm_file->private_data; - - addr = gfn_to_hva(kvm, gfn); - if (kvm_is_error_hva(addr)) - return VM_FAULT_SIGBUS; - - npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, - NULL); - if (unlikely(npages != 1)) - return VM_FAULT_SIGBUS; - - vmf->page = page[0]; - return 0; -} - -static const struct vm_operations_struct kvm_vm_vm_ops = { - .fault = kvm_vm_fault, -}; - -static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_vm_vm_ops; - return 0; -} - static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = kvm_vm_compat_ioctl, #endif - .mmap = kvm_vm_mmap, .llseek = noop_llseek, }; -- cgit v1.2.3-70-g09d2