Commit b30c14cd authored by James Houghton's avatar James Houghton Committed by Andrew Morton
Browse files

hugetlb: unshare some PMDs when splitting VMAs

PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however,
it is possible that HugeTLB VMAs are split without unsharing the PMDs
first.

Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE
in hugetlb_change_protection [1].  The key there is that
hugetlb_unshare_all_pmds will not attempt to unshare PMDs in
non-PUD_SIZE-aligned sections of the VMA.

It might seem ideal to unshare in hugetlb_vm_op_open, but we need to
unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split
seems natural.

[1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/

Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com


Fixes: 6dfeaff9 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp")
Signed-off-by: default avatarJames Houghton <jthoughton@google.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Acked-by: default avatarPeter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent a1193de5
Loading
Loading
Loading
Loading
+35 −9
Original line number Diff line number Diff line
@@ -94,6 +94,8 @@ static int hugetlb_acct_memory(struct hstate *h, long delta);
static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
		unsigned long start, unsigned long end);

static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
@@ -4834,6 +4836,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
{
	if (addr & ~(huge_page_mask(hstate_vma(vma))))
		return -EINVAL;

	/*
	 * PMD sharing is only possible for PUD_SIZE-aligned address ranges
	 * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
	 * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
	 */
	if (addr & ~PUD_MASK) {
		/*
		 * hugetlb_vm_op_split is called right before we attempt to
		 * split the VMA. We will need to unshare PMDs in the old and
		 * new VMAs, so let's unshare before we split.
		 */
		unsigned long floor = addr & PUD_MASK;
		unsigned long ceil = floor + PUD_SIZE;

		if (floor >= vma->vm_start && ceil <= vma->vm_end)
			hugetlb_unshare_pmds(vma, floor, ceil);
	}

	return 0;
}

@@ -7322,26 +7343,21 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
	}
}

/*
 * This function will unconditionally remove all the shared pmd pgtable entries
 * within the specific vma for a hugetlbfs memory range.
 */
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
				   unsigned long start,
				   unsigned long end)
{
	struct hstate *h = hstate_vma(vma);
	unsigned long sz = huge_page_size(h);
	struct mm_struct *mm = vma->vm_mm;
	struct mmu_notifier_range range;
	unsigned long address, start, end;
	unsigned long address;
	spinlock_t *ptl;
	pte_t *ptep;

	if (!(vma->vm_flags & VM_MAYSHARE))
		return;

	start = ALIGN(vma->vm_start, PUD_SIZE);
	end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);

	if (start >= end)
		return;

@@ -7373,6 +7389,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
	mmu_notifier_invalidate_range_end(&range);
}

/*
 * This function will unconditionally remove all the shared pmd pgtable entries
 * within the specific vma for a hugetlbfs memory range.
 */
void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
{
	hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
			ALIGN_DOWN(vma->vm_end, PUD_SIZE));
}

#ifdef CONFIG_CMA
static bool cma_reserve_called __initdata;