Commit 63638450 authored by Tejun Heo's avatar Tejun Heo
Browse files

workqueue: Report work funcs that trigger automatic CPU_INTENSIVE mechanism



Workqueue now automatically marks per-cpu work items that hog CPU for too
long as CPU_INTENSIVE, which excludes them from concurrency management and
prevents stalling other concurrency-managed work items. If a work function
keeps running over the thershold, it likely needs to be switched to use an
unbound workqueue.

This patch adds a debug mechanism which tracks the work functions which
trigger the automatic CPU_INTENSIVE mechanism and report them using
pr_warn() with exponential backoff.

v3: Documentation update.

v2: Drop bouncing to kthread_worker for printing messages. It was to avoid
    introducing circular locking dependency through printk but not effective
    as it still had pool lock -> wci_lock -> printk -> pool lock loop. Let's
    just print directly using printk_deferred().

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Suggested-by: default avatarPeter Zijlstra <peterz@infradead.org>
parent 616db877
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -6938,6 +6938,11 @@
			them from noticeably delaying other per-cpu work
			items. Default is 10000 (10ms).

			If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
			will report the work functions which violate this
			threshold repeatedly. They are likely good
			candidates for using WQ_UNBOUND workqueues instead.

	workqueue.disable_numa
			By default, all work items queued to unbound
			workqueues are affine to the NUMA nodes they're
+93 −0
Original line number Diff line number Diff line
@@ -948,6 +948,98 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
			pool->nr_running++;
}

#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT

/*
 * Concurrency-managed per-cpu work items that hog CPU for longer than
 * wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism,
 * which prevents them from stalling other concurrency-managed work items. If a
 * work function keeps triggering this mechanism, it's likely that the work item
 * should be using an unbound workqueue instead.
 *
 * wq_cpu_intensive_report() tracks work functions which trigger such conditions
 * and report them so that they can be examined and converted to use unbound
 * workqueues as appropriate. To avoid flooding the console, each violating work
 * function is tracked and reported with exponential backoff.
 */
#define WCI_MAX_ENTS 128

struct wci_ent {
	work_func_t		func;
	atomic64_t		cnt;
	struct hlist_node	hash_node;
};

static struct wci_ent wci_ents[WCI_MAX_ENTS];
static int wci_nr_ents;
static DEFINE_RAW_SPINLOCK(wci_lock);
static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS));

static struct wci_ent *wci_find_ent(work_func_t func)
{
	struct wci_ent *ent;

	hash_for_each_possible_rcu(wci_hash, ent, hash_node,
				   (unsigned long)func) {
		if (ent->func == func)
			return ent;
	}
	return NULL;
}

static void wq_cpu_intensive_report(work_func_t func)
{
	struct wci_ent *ent;

restart:
	ent = wci_find_ent(func);
	if (ent) {
		u64 cnt;

		/*
		 * Start reporting from the fourth time and back off
		 * exponentially.
		 */
		cnt = atomic64_inc_return_relaxed(&ent->cnt);
		if (cnt >= 4 && is_power_of_2(cnt))
			printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
					ent->func, wq_cpu_intensive_thresh_us,
					atomic64_read(&ent->cnt));
		return;
	}

	/*
	 * @func is a new violation. Allocate a new entry for it. If wcn_ents[]
	 * is exhausted, something went really wrong and we probably made enough
	 * noise already.
	 */
	if (wci_nr_ents >= WCI_MAX_ENTS)
		return;

	raw_spin_lock(&wci_lock);

	if (wci_nr_ents >= WCI_MAX_ENTS) {
		raw_spin_unlock(&wci_lock);
		return;
	}

	if (wci_find_ent(func)) {
		raw_spin_unlock(&wci_lock);
		goto restart;
	}

	ent = &wci_ents[wci_nr_ents++];
	ent->func = func;
	atomic64_set(&ent->cnt, 1);
	hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);

	raw_spin_unlock(&wci_lock);
}

#else	/* CONFIG_WQ_CPU_INTENSIVE_REPORT */
static void wq_cpu_intensive_report(work_func_t func) {}
#endif	/* CONFIG_WQ_CPU_INTENSIVE_REPORT */

/**
 * wq_worker_running - a worker is running again
 * @task: task waking up
@@ -1057,6 +1149,7 @@ void wq_worker_tick(struct task_struct *task)
	raw_spin_lock(&pool->lock);

	worker_set_flags(worker, WORKER_CPU_INTENSIVE);
	wq_cpu_intensive_report(worker->current_func);
	pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;

	if (need_more_worker(pool)) {
+13 −0
Original line number Diff line number Diff line
@@ -1134,6 +1134,19 @@ config WQ_WATCHDOG
	  state.  This can be configured through kernel parameter
	  "workqueue.watchdog_thresh" and its sysfs counterpart.

config WQ_CPU_INTENSIVE_REPORT
	bool "Report per-cpu work items which hog CPU for too long"
	depends on DEBUG_KERNEL
	help
	  Say Y here to enable reporting of concurrency-managed per-cpu work
	  items that hog CPUs for longer than
	  workqueue.cpu_intensive_threshold_us. Workqueue automatically
	  detects and excludes them from concurrency management to prevent
	  them from stalling other per-cpu work items. Occassional
	  triggering may not necessarily indicate a problem. Repeated
	  triggering likely indicates that the work item should be switched
	  to use an unbound workqueue.

config TEST_LOCKUP
	tristate "Test module to generate lockups"
	depends on m