Commit d930ff03 authored by Vlastimil Babka's avatar Vlastimil Babka Committed by Linus Torvalds
Browse files

mm, slub: splice cpu and page freelists in deactivate_slab()

In deactivate_slab() we currently move all but one objects on the cpu
freelist to the page freelist one by one using the costly cmpxchg_double()
operation.  Then we unfreeze the page while moving the last object on page
freelist, with a final cmpxchg_double().

This can be optimized to avoid the cmpxchg_double() per object.  Just
count the objects on cpu freelist (to adjust page->inuse properly) and
also remember the last object in the chain.  Then splice page->freelist to
the last object and effectively add the whole cpu freelist to
page->freelist while unfreezing the page, with a single cmpxchg_double().

Link: https://lkml.kernel.org/r/20210115183543.15097-1-vbabka@suse.cz


Signed-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarJann Horn <jannh@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 59450bbc
Loading
Loading
Loading
Loading
+24 −35
Original line number Diff line number Diff line
@@ -2167,9 +2167,9 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
{
	enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
	int lock = 0;
	int lock = 0, free_delta = 0;
	enum slab_modes l = M_NONE, m = M_NONE;
	void *nextfree;
	void *nextfree, *freelist_iter, *freelist_tail;
	int tail = DEACTIVATE_TO_HEAD;
	struct page new;
	struct page old;
@@ -2180,45 +2180,34 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
	}

	/*
	 * Stage one: Free all available per cpu objects back
	 * to the page freelist while it is still frozen. Leave the
	 * last one.
	 *
	 * There is no need to take the list->lock because the page
	 * is still frozen.
	 * Stage one: Count the objects on cpu's freelist as free_delta and
	 * remember the last object in freelist_tail for later splicing.
	 */
	while (freelist && (nextfree = get_freepointer(s, freelist))) {
		void *prior;
		unsigned long counters;
	freelist_tail = NULL;
	freelist_iter = freelist;
	while (freelist_iter) {
		nextfree = get_freepointer(s, freelist_iter);

		/*
		 * If 'nextfree' is invalid, it is possible that the object at
		 * 'freelist' is already corrupted.  So isolate all objects
		 * starting at 'freelist'.
		 * 'freelist_iter' is already corrupted.  So isolate all objects
		 * starting at 'freelist_iter' by skipping them.
		 */
		if (freelist_corrupted(s, page, &freelist, nextfree))
		if (freelist_corrupted(s, page, &freelist_iter, nextfree))
			break;

		do {
			prior = page->freelist;
			counters = page->counters;
			set_freepointer(s, freelist, prior);
			new.counters = counters;
			new.inuse--;
			VM_BUG_ON(!new.frozen);

		} while (!__cmpxchg_double_slab(s, page,
			prior, counters,
			freelist, new.counters,
			"drain percpu freelist"));
		freelist_tail = freelist_iter;
		free_delta++;

		freelist = nextfree;
		freelist_iter = nextfree;
	}

	/*
	 * Stage two: Ensure that the page is unfrozen while the
	 * list presence reflects the actual number of objects
	 * during unfreeze.
	 * Stage two: Unfreeze the page while splicing the per-cpu
	 * freelist to the head of page's freelist.
	 *
	 * Ensure that the page is unfrozen while the list presence
	 * reflects the actual number of objects during unfreeze.
	 *
	 * We setup the list membership and then perform a cmpxchg
	 * with the count. If there is a mismatch then the page
@@ -2231,15 +2220,15 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
	 */
redo:

	old.freelist = page->freelist;
	old.counters = page->counters;
	old.freelist = READ_ONCE(page->freelist);
	old.counters = READ_ONCE(page->counters);
	VM_BUG_ON(!old.frozen);

	/* Determine target state of the slab */
	new.counters = old.counters;
	if (freelist) {
		new.inuse--;
		set_freepointer(s, freelist, old.freelist);
	if (freelist_tail) {
		new.inuse -= free_delta;
		set_freepointer(s, freelist_tail, old.freelist);
		new.freelist = freelist;
	} else
		new.freelist = old.freelist;