diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 5d80a0faa0961cd3a45a97f6d8ef44cd8963bf9a..2a9746fe6c4ab64638d2caca2cf648a5f0f35c83 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2356,7 +2356,9 @@ KVM_DIRTY_LOG_INITIALLY_SET is enabled or disabled. If disabled, all huge pages in a memslot will be eagerly split when dirty logging is enabled on that memslot. If - enabled, huge pages will not be eagerly split. + enabled, eager page splitting will be performed during + the KVM_CLEAR_DIRTY ioctl, and only for the pages being + cleared. Eager page splitting currently only supports splitting huge pages mapped by the TDP MMU. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8bfb069fb3df3872e777684ad24cfe54f2f2794e..10815b672a26962529e53bb3ed250d471f97491b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1590,6 +1590,10 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, int target_level); +void kvm_mmu_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + u64 start, u64 end, + int target_level); void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 308c8b21f9b19d53fe651eb493bfc84d26ab8cb1..296f8723f9ae92653b4c6023bd9bb0343b6212c2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1358,6 +1358,9 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask); gfn_t end = slot->base_gfn + gfn_offset + __fls(mask); + if (READ_ONCE(eager_page_split)) + kvm_mmu_try_split_huge_pages(kvm, slot, start, end, PG_LEVEL_4K); + kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M); /* Cross two large pages? */ @@ -5830,16 +5833,32 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); } +/* Must be called with the mmu_lock held in write-mode. */ +void kvm_mmu_try_split_huge_pages(struct kvm *kvm, + const struct kvm_memory_slot *memslot, + u64 start, u64 end, + int target_level) +{ + if (is_tdp_mmu_enabled(kvm)) + kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, + target_level, false); + + /* + * A TLB flush is unnecessary at this point for the same resons as in + * kvm_mmu_slot_try_split_huge_pages(). + */ +} + void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, - const struct kvm_memory_slot *memslot, - int target_level) + const struct kvm_memory_slot *memslot, + int target_level) { u64 start = memslot->base_gfn; u64 end = start + memslot->npages; if (is_tdp_mmu_enabled(kvm)) { read_lock(&kvm->mmu_lock); - kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level); + kvm_tdp_mmu_try_split_huge_pages(kvm, memslot, start, end, target_level, true); read_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6dfd6db154d8624614b1853ab7c064ed93fcdde5..dae2cebcf8b5c41d6f66f40d2635d8a2f318e78d 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -963,27 +963,33 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, } /* - * tdp_mmu_link_sp_atomic - Atomically replace the given spte with an spte - * pointing to the provided page table. + * tdp_mmu_link_sp - Replace the given spte with an spte pointing to the + * provided page table. * * @kvm: kvm instance * @iter: a tdp_iter instance currently on the SPTE that should be set * @sp: The new TDP page table to install. * @account_nx: True if this page table is being installed to split a * non-executable huge page. + * @shared: This operation is running under the MMU lock in read mode. * * Returns: 0 if the new page table was installed. Non-0 if the page table * could not be installed (e.g. the atomic compare-exchange failed). */ -static int tdp_mmu_link_sp_atomic(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_mmu_page *sp, bool account_nx) +static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, + struct kvm_mmu_page *sp, bool account_nx, + bool shared) { u64 spte = make_nonleaf_spte(sp->spt, !shadow_accessed_mask); - int ret; + int ret = 0; - ret = tdp_mmu_set_spte_atomic(kvm, iter, spte); - if (ret) - return ret; + if (shared) { + ret = tdp_mmu_set_spte_atomic(kvm, iter, spte); + if (ret) + return ret; + } else { + tdp_mmu_set_spte(kvm, iter, spte); + } spin_lock(&kvm->arch.tdp_mmu_pages_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); @@ -1051,7 +1057,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) sp = tdp_mmu_alloc_sp(vcpu); tdp_mmu_init_child_sp(sp, &iter); - if (tdp_mmu_link_sp_atomic(vcpu->kvm, &iter, sp, account_nx)) { + if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { tdp_mmu_free_sp(sp); break; } @@ -1277,12 +1283,11 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp) } static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, - struct tdp_iter *iter) + struct tdp_iter *iter, + bool shared) { struct kvm_mmu_page *sp; - lockdep_assert_held_read(&kvm->mmu_lock); - /* * Since we are allocating while under the MMU lock we have to be * careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct @@ -1297,20 +1302,27 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, return sp; rcu_read_unlock(); - read_unlock(&kvm->mmu_lock); + + if (shared) + read_unlock(&kvm->mmu_lock); + else + write_unlock(&kvm->mmu_lock); iter->yielded = true; sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT); - read_lock(&kvm->mmu_lock); + if (shared) + read_lock(&kvm->mmu_lock); + else + write_lock(&kvm->mmu_lock); + rcu_read_lock(); return sp; } -static int tdp_mmu_split_huge_page_atomic(struct kvm *kvm, - struct tdp_iter *iter, - struct kvm_mmu_page *sp) +static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, + struct kvm_mmu_page *sp, bool shared) { const u64 huge_spte = iter->old_spte; const int level = iter->level; @@ -1333,7 +1345,7 @@ static int tdp_mmu_split_huge_page_atomic(struct kvm *kvm, * correctness standpoint since the translation will be the same either * way. */ - ret = tdp_mmu_link_sp_atomic(kvm, iter, sp, false); + ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); if (ret) return ret; @@ -1350,7 +1362,7 @@ static int tdp_mmu_split_huge_page_atomic(struct kvm *kvm, static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, - int target_level) + int target_level, bool shared) { struct kvm_mmu_page *sp = NULL; struct tdp_iter iter; @@ -1371,14 +1383,14 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, */ for_each_tdp_pte_min_level(iter, root, target_level + 1, start, end) { retry: - if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) + if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared)) continue; if (!is_shadow_present_pte(iter.old_spte) || !is_large_pte(iter.old_spte)) continue; if (!sp) { - sp = tdp_mmu_alloc_sp_for_split(kvm, &iter); + sp = tdp_mmu_alloc_sp_for_split(kvm, &iter, shared); if (!sp) { ret = -ENOMEM; break; @@ -1388,7 +1400,7 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, continue; } - if (tdp_mmu_split_huge_page_atomic(kvm, &iter, sp)) + if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared)) goto retry; sp = NULL; @@ -1408,23 +1420,24 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, return ret; } + /* * Try to split all huge pages mapped by the TDP MMU down to the target level. */ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, - int target_level) + int target_level, bool shared) { struct kvm_mmu_page *root; int r = 0; - lockdep_assert_held_read(&kvm->mmu_lock); + kvm_lockdep_assert_mmu_lock_held(kvm, shared); - for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true) { - r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level); + for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, shared) { + r = tdp_mmu_split_huge_pages_root(kvm, root, start, end, target_level, shared); if (r) { - kvm_tdp_mmu_put_root(kvm, root, true); + kvm_tdp_mmu_put_root(kvm, root, shared); break; } } diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index fdb3a886e50f94de799b98570d6c289826d52b0e..3f987785702a4b1938c67319a07a41fe469976c2 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -70,7 +70,7 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *slot, gfn_t start, gfn_t end, - int target_level); + int target_level, bool shared); static inline void kvm_tdp_mmu_walk_lockless_begin(void) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ffef31feac3a882b5835efff00ee4f4d13bcb4cd..803b2e4c7b758ffe5df8e5b387b4a3a9fd3a4ea8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -192,7 +192,7 @@ bool __read_mostly enable_pmu = true; EXPORT_SYMBOL_GPL(enable_pmu); module_param(enable_pmu, bool, 0444); -static bool __read_mostly eager_page_split = true; +bool __read_mostly eager_page_split = true; module_param(eager_page_split, bool, 0644); /* diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 767ec7f9951608f984b4ac69a1c3205ce0a93ebc..aa86abad914d2f6e2d6d8ce14920d1c5cad20dfa 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -307,6 +307,8 @@ extern int pi_inject_timer; extern bool report_ignored_msrs; +extern bool eager_page_split; + static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) { return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,