summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/svm.h2
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/kernel/apic/msi.c11
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c27
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kvm/cpuid.c28
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c28
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c35
-rw-r--r--arch/x86/kvm/svm/nested.c13
-rw-r--r--arch/x86/kvm/svm/svm.c9
-rw-r--r--arch/x86/kvm/vmx/nested.c56
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/tools/chkobjdump.awk1
15 files changed, 149 insertions, 80 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 974cbfb1eefe..af6ce8d4c86a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1038,6 +1038,13 @@ struct kvm_arch {
struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
+ /*
+ * Protects marking pages unsync during page faults, as TDP MMU page
+ * faults only take mmu_lock for read. For simplicity, the unsync
+ * pages lock is always taken when marking pages unsync regardless of
+ * whether mmu_lock is held for read or write.
+ */
+ spinlock_t mmu_unsync_pages_lock;
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e322676039f4..b00dbc5fac2b 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -184,6 +184,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define V_IGN_TPR_SHIFT 20
#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
+#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)
+
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d5c691a3208b..39224e035e47 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static struct irq_chip ioapic_ir_chip __read_mostly = {
@@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static inline void init_IO_APIC_traps(void)
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 44ebe25e7703..dbacb9ec8843 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
* The quirk bit is not set in this case.
* - The new vector is the same as the old vector
* - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The interrupt is not yet started up
* - The new destination CPU is the same as the old destination CPU
*/
if (!irqd_msi_nomask_quirk(irqd) ||
cfg->vector == old_cfg.vector ||
old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ !irqd_is_started(irqd) ||
cfg->dest_apicid == old_cfg.dest_apicid) {
irq_msi_update_msg(irqd, cfg);
return ret;
@@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_affinity = msi_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
@@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = {
.irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static struct msi_domain_info pci_msi_ir_domain_info = {
@@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = {
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = dmar_msi_compose_msg,
.irq_write_msi_msg = dmar_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP,
};
static int dmar_msi_init(struct irq_domain *domain,
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f07c10b87a87..57e4bb695ff9 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -285,15 +285,14 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >>= shift;
}
-static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
{
struct mbm_state *m;
u64 chunks, tval;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
- rr->val = tval;
- return -EINVAL;
+ return tval;
}
switch (rr->evtid) {
case QOS_L3_OCCUP_EVENT_ID:
@@ -305,12 +304,6 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
case QOS_L3_MBM_LOCAL_EVENT_ID:
m = &rr->d->mbm_local[rmid];
break;
- default:
- /*
- * Code would never reach here because
- * an invalid event id would fail the __rmid_read.
- */
- return -EINVAL;
}
if (rr->first) {
@@ -361,23 +354,29 @@ void mon_event_count(void *info)
struct rdtgroup *rdtgrp, *entry;
struct rmid_read *rr = info;
struct list_head *head;
+ u64 ret_val;
rdtgrp = rr->rgrp;
- if (__mon_event_count(rdtgrp->mon.rmid, rr))
- return;
+ ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
/*
- * For Ctrl groups read data from child monitor groups.
+ * For Ctrl groups read data from child monitor groups and
+ * add them together. Count events which are read successfully.
+ * Discard the rmid_read's reporting errors.
*/
head = &rdtgrp->mon.crdtgrp_list;
if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->mon.rmid, rr))
- return;
+ if (__mon_event_count(entry->mon.rmid, rr) == 0)
+ ret_val = 0;
}
}
+
+ /* Report error if none of rmid_reads are successful */
+ if (ret_val)
+ rr->val = ret_val;
}
/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 08651a4e6aa0..42fc41dd0e1f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = {
.irq_set_affinity = msi_domain_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_write_msi_msg = hpet_msi_write_msg,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP,
};
static int hpet_msi_init(struct irq_domain *domain,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 739be5da3bca..fe03bd978761 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_mmu_after_set_cpuid(vcpu);
}
-static int is_efer_nx(void)
-{
- return host_efer & EFER_NX;
-}
-
-static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
-{
- int i;
- struct kvm_cpuid_entry2 *e, *entry;
-
- entry = NULL;
- for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- e = &vcpu->arch.cpuid_entries[i];
- if (e->function == 0x80000001) {
- entry = e;
- break;
- }
- }
- if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
- cpuid_entry_clear(entry, X86_FEATURE_NX);
- printk(KERN_INFO "kvm: guest NX capability removed\n");
- }
-}
-
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = cpuid->nent;
- cpuid_fix_nx_cap(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
@@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
void kvm_set_cpu_caps(void)
{
- unsigned int f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
unsigned int f_gbpages = F(GBPAGES);
unsigned int f_lm = F(LM);
@@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void)
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* Reserved */ |
- f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+ F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0b38f944c6b6..41d2a53c5dea 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1933,7 +1933,7 @@ ret_success:
void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *entry;
- struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ struct kvm_vcpu_hv *hv_vcpu;
entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c4f4fa23320e..47b765270239 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
{
struct kvm_mmu_page *sp;
+ bool locked = false;
/*
* Force write-protection if the page is being tracked. Note, the page
@@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
if (sp->unsync)
continue;
+ /*
+ * TDP MMU page faults require an additional spinlock as they
+ * run with mmu_lock held for read, not write, and the unsync
+ * logic is not thread safe. Take the spinklock regardless of
+ * the MMU type to avoid extra conditionals/parameters, there's
+ * no meaningful penalty if mmu_lock is held for write.
+ */
+ if (!locked) {
+ locked = true;
+ spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+
+ /*
+ * Recheck after taking the spinlock, a different vCPU
+ * may have since marked the page unsync. A false
+ * positive on the unprotected check above is not
+ * possible as clearing sp->unsync _must_ hold mmu_lock
+ * for write, i.e. unsync cannot transition from 0->1
+ * while this CPU holds mmu_lock for read (or write).
+ */
+ if (READ_ONCE(sp->unsync))
+ continue;
+ }
+
WARN_ON(sp->role.level != PG_LEVEL_4K);
kvm_unsync_page(vcpu, sp);
}
+ if (locked)
+ spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
/*
* We need to ensure that the marking of unsync pages is visible
@@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+ spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
+
if (!kvm_mmu_init_tdp_mmu(kvm))
/*
* No smp_load/store wrappers needed here as we are in
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 0853370bd811..d80cb122b5f3 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
if (!kvm->arch.tdp_mmu_enabled)
return;
+ WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
@@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
-
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
@@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
list_del_rcu(&root->link);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
- zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
+ zap_gfn_range(kvm, root, 0, -1ull, false, false, shared);
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
}
@@ -724,13 +723,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end, bool can_yield, bool flush,
bool shared)
{
+ gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
+ bool zap_all = (start == 0 && end >= max_gfn_host);
struct tdp_iter iter;
+ /*
+ * No need to try to step down in the iterator when zapping all SPTEs,
+ * zapping the top-level non-leaf SPTEs will recurse on their children.
+ */
+ int min_level = zap_all ? root->role.level : PG_LEVEL_4K;
+
+ /*
+ * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
+ * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
+ * and so KVM will never install a SPTE for such addresses.
+ */
+ end = min(end, max_gfn_host);
+
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
rcu_read_lock();
- tdp_root_for_each_pte(iter, root, start, end) {
+ for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+ min_level, start, end) {
retry:
if (can_yield &&
tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) {
@@ -744,9 +759,10 @@ retry:
/*
* If this is a non-last-level SPTE that covers a larger range
* than should be zapped, continue, and zap the mappings at a
- * lower level.
+ * lower level, except when zapping all SPTEs.
*/
- if ((iter.gfn < start ||
+ if (!zap_all &&
+ (iter.gfn < start ||
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
!is_last_spte(iter.old_spte, iter.level))
continue;
@@ -794,12 +810,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
bool flush = false;
int i;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
+ flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
flush, false);
if (flush)
@@ -838,7 +853,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
*/
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
struct kvm_mmu_page *next_root;
struct kvm_mmu_page *root;
bool flush = false;
@@ -854,8 +868,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
rcu_read_unlock();
- flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
- true);
+ flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true);
/*
* Put the reference acquired in
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 61738ff8ef33..e5515477c30a 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -158,6 +158,9 @@ void recalc_intercepts(struct vcpu_svm *svm)
/* If SMI is not intercepted, ignore guest SMI intercept as well */
if (!intercept_smi)
vmcb_clr_intercept(c, INTERCEPT_SMI);
+
+ vmcb_set_intercept(c, INTERCEPT_VMLOAD);
+ vmcb_set_intercept(c, INTERCEPT_VMSAVE);
}
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -503,7 +506,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
{
- const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ const u32 int_ctl_vmcb01_bits =
+ V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
+
+ const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+
struct kvm_vcpu *vcpu = &svm->vcpu;
/*
@@ -535,8 +542,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
svm->vmcb->control.int_ctl =
- (svm->nested.ctl.int_ctl & ~mask) |
- (svm->vmcb01.ptr->control.int_ctl & mask);
+ (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
+ (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext;
svm->vmcb->control.int_vector = svm->nested.ctl.int_vector;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e8ccab50ebf6..69639f9624f5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1589,17 +1589,18 @@ static void svm_set_vintr(struct vcpu_svm *svm)
static void svm_clear_vintr(struct vcpu_svm *svm)
{
- const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
svm_clr_intercept(svm, INTERCEPT_VINTR);
/* Drop int_ctl fields related to VINTR injection. */
- svm->vmcb->control.int_ctl &= mask;
+ svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
if (is_guest_mode(&svm->vcpu)) {
- svm->vmcb01.ptr->control.int_ctl &= mask;
+ svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
(svm->nested.ctl.int_ctl & V_TPR_MASK));
- svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+
+ svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
+ V_IRQ_INJECTION_BITS_MASK;
}
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1a52134b0c42..b3f77d18eb5a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
vcpu_put(vcpu);
}
+#define EPTP_PA_MASK GENMASK_ULL(51, 12)
+
+static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
+{
+ return VALID_PAGE(root_hpa) &&
+ ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
+}
+
+static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
+ gpa_t addr)
+{
+ uint i;
+ struct kvm_mmu_root_info *cached_root;
+
+ WARN_ON_ONCE(!mmu_is_nested(vcpu));
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ cached_root = &vcpu->arch.mmu->prev_roots[i];
+
+ if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
+ eptp))
+ vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
+ }
+}
+
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
@@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
vm_exit_reason = EXIT_REASON_PML_FULL;
vmx->nested.pml_full = false;
exit_qualification &= INTR_INFO_UNBLOCK_NMI;
- } else if (fault->error_code & PFERR_RSVD_MASK)
- vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
- else
- vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ } else {
+ if (fault->error_code & PFERR_RSVD_MASK)
+ vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ else
+ vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+
+ /*
+ * Although the caller (kvm_inject_emulated_page_fault) would
+ * have already synced the faulting address in the shadow EPT
+ * tables for the current EPTP12, we also need to sync it for
+ * any other cached EPTP02s based on the same EP4TA, since the
+ * TLB associates mappings to the EP4TA rather than the full EPTP.
+ */
+ nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
+ fault->address);
+ }
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
vmcs12->guest_physical_address = fault->address;
@@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return nested_vmx_succeed(vcpu);
}
-#define EPTP_PA_MASK GENMASK_ULL(51, 12)
-
-static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
-{
- return VALID_PAGE(root_hpa) &&
- ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
-}
-
/* Emulate the INVEPT instruction */
static int handle_invept(struct kvm_vcpu *vcpu)
{
@@ -5826,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
if (is_nmi(intr_info))
return true;
else if (is_page_fault(intr_info))
- return vcpu->arch.apf.host_apf_flags || !enable_ept;
+ return vcpu->arch.apf.host_apf_flags ||
+ vmx_need_pf_intercept(vcpu);
else if (is_debug(intr_info) &&
vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index db88ed4f2121..17a1cb4b059d 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
{
- return vmx->secondary_exec_control &
+ return secondary_exec_controls_get(vmx) &
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
}
diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk
index fd1ab80be0de..a4cf678cf5c8 100644
--- a/arch/x86/tools/chkobjdump.awk
+++ b/arch/x86/tools/chkobjdump.awk
@@ -10,6 +10,7 @@ BEGIN {
/^GNU objdump/ {
verstr = ""
+ gsub(/\(.*\)/, "");
for (i = 3; i <= NF; i++)
if (match($(i), "^[0-9]")) {
verstr = $(i);