workqueue: Report work funcs that trigger automatic CPU_INTENSIVE mechanism (63638450) · Commits · jan.koester / Linux

Documentation/admin-guide/kernel-parameters.txt

+5 −0

Original line number	Diff line number	Diff line
		@@ -6938,6 +6938,11 @@
		them from noticeably delaying other per-cpu work
		items. Default is 10000 (10ms).

		If CONFIG_WQ_CPU_INTENSIVE_REPORT is set, the kernel
		will report the work functions which violate this
		threshold repeatedly. They are likely good
		candidates for using WQ_UNBOUND workqueues instead.

		workqueue.disable_numa
		By default, all work items queued to unbound
		workqueues are affine to the NUMA nodes they're

kernel/workqueue.c

+93 −0

Original line number	Diff line number	Diff line
		@@ -948,6 +948,98 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
		pool->nr_running++;
		}

		#ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT

		/*
		* Concurrency-managed per-cpu work items that hog CPU for longer than
		* wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism,
		* which prevents them from stalling other concurrency-managed work items. If a
		* work function keeps triggering this mechanism, it's likely that the work item
		* should be using an unbound workqueue instead.
		*
		* wq_cpu_intensive_report() tracks work functions which trigger such conditions
		* and report them so that they can be examined and converted to use unbound
		* workqueues as appropriate. To avoid flooding the console, each violating work
		* function is tracked and reported with exponential backoff.
		*/
		#define WCI_MAX_ENTS 128

		struct wci_ent {
		work_func_t func;
		atomic64_t cnt;
		struct hlist_node hash_node;
		};

		static struct wci_ent wci_ents[WCI_MAX_ENTS];
		static int wci_nr_ents;
		static DEFINE_RAW_SPINLOCK(wci_lock);
		static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS));

		static struct wci_ent *wci_find_ent(work_func_t func)
		{
		struct wci_ent *ent;

		hash_for_each_possible_rcu(wci_hash, ent, hash_node,
		(unsigned long)func) {
		if (ent->func == func)
		return ent;
		}
		return NULL;
		}

		static void wq_cpu_intensive_report(work_func_t func)
		{
		struct wci_ent *ent;

		restart:
		ent = wci_find_ent(func);
		if (ent) {
		u64 cnt;

		/*
		* Start reporting from the fourth time and back off
		* exponentially.
		*/
		cnt = atomic64_inc_return_relaxed(&ent->cnt);
		if (cnt >= 4 && is_power_of_2(cnt))
		printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
		ent->func, wq_cpu_intensive_thresh_us,
		atomic64_read(&ent->cnt));
		return;
		}

		/*
		* @func is a new violation. Allocate a new entry for it. If wcn_ents[]
		* is exhausted, something went really wrong and we probably made enough
		* noise already.
		*/
		if (wci_nr_ents >= WCI_MAX_ENTS)
		return;

		raw_spin_lock(&wci_lock);

		if (wci_nr_ents >= WCI_MAX_ENTS) {
		raw_spin_unlock(&wci_lock);
		return;
		}

		if (wci_find_ent(func)) {
		raw_spin_unlock(&wci_lock);
		goto restart;
		}

		ent = &wci_ents[wci_nr_ents++];
		ent->func = func;
		atomic64_set(&ent->cnt, 1);
		hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);

		raw_spin_unlock(&wci_lock);
		}

		#else /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
		static void wq_cpu_intensive_report(work_func_t func) {}
		#endif /* CONFIG_WQ_CPU_INTENSIVE_REPORT */

		/**
		* wq_worker_running - a worker is running again
		* @task: task waking up
		@@ -1057,6 +1149,7 @@ void wq_worker_tick(struct task_struct *task)
		raw_spin_lock(&pool->lock);

		worker_set_flags(worker, WORKER_CPU_INTENSIVE);
		wq_cpu_intensive_report(worker->current_func);
		pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;

		if (need_more_worker(pool)) {

lib/Kconfig.debug

+13 −0

Original line number	Diff line number	Diff line
		@@ -1134,6 +1134,19 @@ config WQ_WATCHDOG
		state. This can be configured through kernel parameter
		"workqueue.watchdog_thresh" and its sysfs counterpart.

		config WQ_CPU_INTENSIVE_REPORT
		bool "Report per-cpu work items which hog CPU for too long"
		depends on DEBUG_KERNEL
		help
		Say Y here to enable reporting of concurrency-managed per-cpu work
		items that hog CPUs for longer than
		workqueue.cpu_intensive_threshold_us. Workqueue automatically
		detects and excludes them from concurrency management to prevent
		them from stalling other per-cpu work items. Occassional
		triggering may not necessarily indicate a problem. Repeated
		triggering likely indicates that the work item should be switched
		to use an unbound workqueue.

		config TEST_LOCKUP
		tristate "Test module to generate lockups"
		depends on m