mm: fix memory ordering for mm_lock_seq and vm_lock_seq (b1f02b95) · Commits · jan.koester / Linux

include/linux/mm.h

+23 −6

Original line number	Diff line number	Diff line
		@@ -641,8 +641,14 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
		*/
		static inline bool vma_start_read(struct vm_area_struct *vma)
		{
		/* Check before locking. A race might cause false locked result. */
		if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
		/*
		* Check before locking. A race might cause false locked result.
		* We can use READ_ONCE() for the mm_lock_seq here, and don't need
		* ACQUIRE semantics, because this is just a lockless check whose result
		* we don't rely on for anything - the mm_lock_seq read against which we
		* need ordering is below.
		*/
		if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
		return false;

		if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
		@@ -653,8 +659,13 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
		* False unlocked result is impossible because we modify and check
		* vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
		* modification invalidates all existing locks.
		*
		* We must use ACQUIRE semantics for the mm_lock_seq so that if we are
		* racing with vma_end_write_all(), we only start reading from the VMA
		* after it has been unlocked.
		* This pairs with RELEASE semantics in vma_end_write_all().
		*/
		if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
		if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
		up_read(&vma->vm_lock->lock);
		return false;
		}
		@@ -676,7 +687,7 @@ static bool __is_vma_write_locked(struct vm_area_struct vma, int mm_lock_seq)
		* current task is holding mmap_write_lock, both vma->vm_lock_seq and
		* mm->mm_lock_seq can't be concurrently modified.
		*/
		*mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
		*mm_lock_seq = vma->vm_mm->mm_lock_seq;
		return (vma->vm_lock_seq == *mm_lock_seq);
		}

		@@ -688,7 +699,13 @@ static inline void vma_start_write(struct vm_area_struct *vma)
		return;

		down_write(&vma->vm_lock->lock);
		vma->vm_lock_seq = mm_lock_seq;
		/*
		* We should use WRITE_ONCE() here because we can have concurrent reads
		* from the early lockless pessimistic check in vma_start_read().
		* We don't really care about the correctness of that early check, but
		* we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
		*/
		WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
		up_write(&vma->vm_lock->lock);
		}

		@@ -702,7 +719,7 @@ static inline bool vma_try_start_write(struct vm_area_struct *vma)
		if (!down_write_trylock(&vma->vm_lock->lock))
		return false;

		vma->vm_lock_seq = mm_lock_seq;
		WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
		up_write(&vma->vm_lock->lock);
		return true;
		}

include/linux/mm_types.h

+28 −0

Original line number	Diff line number	Diff line
		@@ -514,6 +514,20 @@ struct vm_area_struct {
		};

		#ifdef CONFIG_PER_VMA_LOCK
		/*
		* Can only be written (using WRITE_ONCE()) while holding both:
		* - mmap_lock (in write mode)
		* - vm_lock->lock (in write mode)
		* Can be read reliably while holding one of:
		* - mmap_lock (in read or write mode)
		* - vm_lock->lock (in read or write mode)
		* Can be read unreliably (using READ_ONCE()) for pessimistic bailout
		* while holding nothing (except RCU to keep the VMA struct allocated).
		*
		* This sequence counter is explicitly allowed to overflow; sequence
		* counter reuse can only lead to occasional unnecessary use of the
		* slowpath.
		*/
		int vm_lock_seq;
		struct vma_lock *vm_lock;

		@@ -679,6 +693,20 @@ struct mm_struct {
		* by mmlist_lock
		*/
		#ifdef CONFIG_PER_VMA_LOCK
		/*
		* This field has lock-like semantics, meaning it is sometimes
		* accessed with ACQUIRE/RELEASE semantics.
		* Roughly speaking, incrementing the sequence number is
		* equivalent to releasing locks on VMAs; reading the sequence
		* number can be part of taking a read lock on a VMA.
		*
		* Can be modified under write mmap_lock using RELEASE
		* semantics.
		* Can be read with no other protection when holding write
		* mmap_lock.
		* Can be read with ACQUIRE semantics if not holding write
		* mmap_lock.
		*/
		int mm_lock_seq;
		#endif

include/linux/mmap_lock.h

+8 −2

Original line number	Diff line number	Diff line
		@@ -76,8 +76,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
		static inline void vma_end_write_all(struct mm_struct *mm)
		{
		mmap_assert_write_locked(mm);
		/* No races during update due to exclusive mmap_lock being held */
		WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1);
		/*
		* Nobody can concurrently modify mm->mm_lock_seq due to exclusive
		* mmap_lock being held.
		* We need RELEASE semantics here to ensure that preceding stores into
		* the VMA take effect before we unlock it with this store.
		* Pairs with ACQUIRE semantics in vma_start_read().
		*/
		smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
		}
		#else
		static inline void vma_end_write_all(struct mm_struct *mm) {}