diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1ca0b1d54e701644f0c714bc01fa999aab304a97..586ad91300b0f3924b41bd417cda9cbb076f1a9d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -332,6 +332,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 static int worker_thread(void *__worker);
 static void copy_workqueue_attrs(struct workqueue_attrs *to,
 				 const struct workqueue_attrs *from);
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3001,792 +3002,475 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew)
 }
 EXPORT_SYMBOL_GPL(execute_in_process_context);
 
-#ifdef CONFIG_SYSFS
-/*
- * Workqueues with WQ_SYSFS flag set is visible to userland via
- * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
- * following attributes.
- *
- *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound
- *  max_active	RW int	: maximum number of in-flight work items
- *
- * Unbound workqueues have the following extra attributes.
+/**
+ * free_workqueue_attrs - free a workqueue_attrs
+ * @attrs: workqueue_attrs to free
  *
- *  id		RO int	: the associated pool ID
- *  nice	RW int	: nice value of the workers
- *  cpumask	RW mask	: bitmask of allowed CPUs for the workers
+ * Undo alloc_workqueue_attrs().
  */
-struct wq_device {
-	struct workqueue_struct		*wq;
-	struct device			dev;
-};
-
-static struct workqueue_struct *dev_to_wq(struct device *dev)
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
 {
-	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
-
-	return wq_dev->wq;
+	if (attrs) {
+		free_cpumask_var(attrs->cpumask);
+		kfree(attrs);
+	}
 }
 
-static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
+/**
+ * alloc_workqueue_attrs - allocate a workqueue_attrs
+ * @gfp_mask: allocation mask to use
+ *
+ * Allocate a new workqueue_attrs, initialize with default settings and
+ * return it.
+ *
+ * Return: The allocated new workqueue_attr on success. %NULL on failure.
+ */
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+	attrs = kzalloc(sizeof(*attrs), gfp_mask);
+	if (!attrs)
+		goto fail;
+	if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+		goto fail;
+
+	cpumask_copy(attrs->cpumask, cpu_possible_mask);
+	return attrs;
+fail:
+	free_workqueue_attrs(attrs);
+	return NULL;
 }
-static DEVICE_ATTR_RO(per_cpu);
 
-static ssize_t max_active_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+				 const struct workqueue_attrs *from)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+	to->nice = from->nice;
+	cpumask_copy(to->cpumask, from->cpumask);
+	/*
+	 * Unlike hash and equality test, this function doesn't ignore
+	 * ->no_numa as it is used for both pool and wq attrs.  Instead,
+	 * get_unbound_pool() explicitly clears ->no_numa after copying.
+	 */
+	to->no_numa = from->no_numa;
 }
 
-static ssize_t max_active_store(struct device *dev,
-				struct device_attribute *attr, const char *buf,
-				size_t count)
+/* hash value of the content of @attr */
+static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int val;
-
-	if (sscanf(buf, "%d", &val) != 1 || val <= 0)
-		return -EINVAL;
+	u32 hash = 0;
 
-	workqueue_set_max_active(wq, val);
-	return count;
+	hash = jhash_1word(attrs->nice, hash);
+	hash = jhash(cpumask_bits(attrs->cpumask),
+		     BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
+	return hash;
 }
-static DEVICE_ATTR_RW(max_active);
-
-static struct attribute *wq_sysfs_attrs[] = {
-	&dev_attr_per_cpu.attr,
-	&dev_attr_max_active.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(wq_sysfs);
 
-static ssize_t wq_pool_ids_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+/* content equality test */
+static bool wqattrs_equal(const struct workqueue_attrs *a,
+			  const struct workqueue_attrs *b)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	const char *delim = "";
-	int node, written = 0;
-
-	rcu_read_lock_sched();
-	for_each_node(node) {
-		written += scnprintf(buf + written, PAGE_SIZE - written,
-				     "%s%d:%d", delim, node,
-				     unbound_pwq_by_node(wq, node)->pool->id);
-		delim = " ";
-	}
-	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
-	rcu_read_unlock_sched();
-
-	return written;
+	if (a->nice != b->nice)
+		return false;
+	if (!cpumask_equal(a->cpumask, b->cpumask))
+		return false;
+	return true;
 }
 
-static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
+/**
+ * init_worker_pool - initialize a newly zalloc'd worker_pool
+ * @pool: worker_pool to initialize
+ *
+ * Initiailize a newly zalloc'd @pool.  It also allocates @pool->attrs.
+ *
+ * Return: 0 on success, -errno on failure.  Even on failure, all fields
+ * inside @pool proper are initialized and put_unbound_pool() can be called
+ * on @pool safely to release it.
+ */
+static int init_worker_pool(struct worker_pool *pool)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int written;
+	spin_lock_init(&pool->lock);
+	pool->id = -1;
+	pool->cpu = -1;
+	pool->node = NUMA_NO_NODE;
+	pool->flags |= POOL_DISASSOCIATED;
+	INIT_LIST_HEAD(&pool->worklist);
+	INIT_LIST_HEAD(&pool->idle_list);
+	hash_init(pool->busy_hash);
 
-	mutex_lock(&wq->mutex);
-	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
-	mutex_unlock(&wq->mutex);
+	init_timer_deferrable(&pool->idle_timer);
+	pool->idle_timer.function = idle_worker_timeout;
+	pool->idle_timer.data = (unsigned long)pool;
 
-	return written;
-}
+	setup_timer(&pool->mayday_timer, pool_mayday_timeout,
+		    (unsigned long)pool);
 
-/* prepare workqueue_attrs for sysfs store operations */
-static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
-{
-	struct workqueue_attrs *attrs;
+	mutex_init(&pool->manager_arb);
+	mutex_init(&pool->attach_mutex);
+	INIT_LIST_HEAD(&pool->workers);
 
-	attrs = alloc_workqueue_attrs(GFP_KERNEL);
-	if (!attrs)
-		return NULL;
+	ida_init(&pool->worker_ida);
+	INIT_HLIST_NODE(&pool->hash_node);
+	pool->refcnt = 1;
 
-	mutex_lock(&wq->mutex);
-	copy_workqueue_attrs(attrs, wq->unbound_attrs);
-	mutex_unlock(&wq->mutex);
-	return attrs;
+	/* shouldn't fail above this point */
+	pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!pool->attrs)
+		return -ENOMEM;
+	return 0;
 }
 
-static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
+static void rcu_free_wq(struct rcu_head *rcu)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct workqueue_attrs *attrs;
-	int ret;
-
-	attrs = wq_sysfs_prep_attrs(wq);
-	if (!attrs)
-		return -ENOMEM;
+	struct workqueue_struct *wq =
+		container_of(rcu, struct workqueue_struct, rcu);
 
-	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
-	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
-		ret = apply_workqueue_attrs(wq, attrs);
+	if (!(wq->flags & WQ_UNBOUND))
+		free_percpu(wq->cpu_pwqs);
 	else
-		ret = -EINVAL;
+		free_workqueue_attrs(wq->unbound_attrs);
 
-	free_workqueue_attrs(attrs);
-	return ret ?: count;
+	kfree(wq->rescuer);
+	kfree(wq);
 }
 
-static ssize_t wq_cpumask_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
+static void rcu_free_pool(struct rcu_head *rcu)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int written;
+	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
 
-	mutex_lock(&wq->mutex);
-	written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
-			    cpumask_pr_args(wq->unbound_attrs->cpumask));
-	mutex_unlock(&wq->mutex);
-	return written;
+	ida_destroy(&pool->worker_ida);
+	free_workqueue_attrs(pool->attrs);
+	kfree(pool);
 }
 
-static ssize_t wq_cpumask_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
+/**
+ * put_unbound_pool - put a worker_pool
+ * @pool: worker_pool to put
+ *
+ * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
+ * safe manner.  get_unbound_pool() calls this function on its failure path
+ * and this function should be able to release pools which went through,
+ * successfully or not, init_worker_pool().
+ *
+ * Should be called with wq_pool_mutex held.
+ */
+static void put_unbound_pool(struct worker_pool *pool)
 {
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct workqueue_attrs *attrs;
-	int ret;
+	DECLARE_COMPLETION_ONSTACK(detach_completion);
+	struct worker *worker;
 
-	attrs = wq_sysfs_prep_attrs(wq);
-	if (!attrs)
-		return -ENOMEM;
+	lockdep_assert_held(&wq_pool_mutex);
 
-	ret = cpumask_parse(buf, attrs->cpumask);
-	if (!ret)
-		ret = apply_workqueue_attrs(wq, attrs);
+	if (--pool->refcnt)
+		return;
 
-	free_workqueue_attrs(attrs);
-	return ret ?: count;
-}
+	/* sanity checks */
+	if (WARN_ON(!(pool->cpu < 0)) ||
+	    WARN_ON(!list_empty(&pool->worklist)))
+		return;
 
-static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	int written;
-
-	mutex_lock(&wq->mutex);
-	written = scnprintf(buf, PAGE_SIZE, "%d\n",
-			    !wq->unbound_attrs->no_numa);
-	mutex_unlock(&wq->mutex);
-
-	return written;
-}
-
-static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
-			     const char *buf, size_t count)
-{
-	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct workqueue_attrs *attrs;
-	int v, ret;
-
-	attrs = wq_sysfs_prep_attrs(wq);
-	if (!attrs)
-		return -ENOMEM;
+	/* release id and unhash */
+	if (pool->id >= 0)
+		idr_remove(&worker_pool_idr, pool->id);
+	hash_del(&pool->hash_node);
 
-	ret = -EINVAL;
-	if (sscanf(buf, "%d", &v) == 1) {
-		attrs->no_numa = !v;
-		ret = apply_workqueue_attrs(wq, attrs);
-	}
+	/*
+	 * Become the manager and destroy all workers.  Grabbing
+	 * manager_arb prevents @pool's workers from blocking on
+	 * attach_mutex.
+	 */
+	mutex_lock(&pool->manager_arb);
 
-	free_workqueue_attrs(attrs);
-	return ret ?: count;
-}
+	spin_lock_irq(&pool->lock);
+	while ((worker = first_idle_worker(pool)))
+		destroy_worker(worker);
+	WARN_ON(pool->nr_workers || pool->nr_idle);
+	spin_unlock_irq(&pool->lock);
 
-static struct device_attribute wq_sysfs_unbound_attrs[] = {
-	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
-	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
-	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
-	__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
-	__ATTR_NULL,
-};
+	mutex_lock(&pool->attach_mutex);
+	if (!list_empty(&pool->workers))
+		pool->detach_completion = &detach_completion;
+	mutex_unlock(&pool->attach_mutex);
 
-static struct bus_type wq_subsys = {
-	.name				= "workqueue",
-	.dev_groups			= wq_sysfs_groups,
-};
+	if (pool->detach_completion)
+		wait_for_completion(pool->detach_completion);
 
-static int __init wq_sysfs_init(void)
-{
-	return subsys_virtual_register(&wq_subsys, NULL);
-}
-core_initcall(wq_sysfs_init);
+	mutex_unlock(&pool->manager_arb);
 
-static void wq_device_release(struct device *dev)
-{
-	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+	/* shut down the timers */
+	del_timer_sync(&pool->idle_timer);
+	del_timer_sync(&pool->mayday_timer);
 
-	kfree(wq_dev);
+	/* sched-RCU protected to allow dereferences from get_work_pool() */
+	call_rcu_sched(&pool->rcu, rcu_free_pool);
 }
 
 /**
- * workqueue_sysfs_register - make a workqueue visible in sysfs
- * @wq: the workqueue to register
+ * get_unbound_pool - get a worker_pool with the specified attributes
+ * @attrs: the attributes of the worker_pool to get
  *
- * Expose @wq in sysfs under /sys/bus/workqueue/devices.
- * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
- * which is the preferred method.
+ * Obtain a worker_pool which has the same attributes as @attrs, bump the
+ * reference count and return it.  If there already is a matching
+ * worker_pool, it will be used; otherwise, this function attempts to
+ * create a new one.
  *
- * Workqueue user should use this function directly iff it wants to apply
- * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
- * apply_workqueue_attrs() may race against userland updating the
- * attributes.
+ * Should be called with wq_pool_mutex held.
  *
- * Return: 0 on success, -errno on failure.
+ * Return: On success, a worker_pool with the same attributes as @attrs.
+ * On failure, %NULL.
  */
-int workqueue_sysfs_register(struct workqueue_struct *wq)
+static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 {
-	struct wq_device *wq_dev;
-	int ret;
+	u32 hash = wqattrs_hash(attrs);
+	struct worker_pool *pool;
+	int node;
 
-	/*
-	 * Adjusting max_active or creating new pwqs by applyting
-	 * attributes breaks ordering guarantee.  Disallow exposing ordered
-	 * workqueues.
-	 */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
-		return -EINVAL;
+	lockdep_assert_held(&wq_pool_mutex);
 
-	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
-	if (!wq_dev)
-		return -ENOMEM;
+	/* do we already have a matching pool? */
+	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
+		if (wqattrs_equal(pool->attrs, attrs)) {
+			pool->refcnt++;
+			return pool;
+		}
+	}
 
-	wq_dev->wq = wq;
-	wq_dev->dev.bus = &wq_subsys;
-	wq_dev->dev.init_name = wq->name;
-	wq_dev->dev.release = wq_device_release;
+	/* nope, create a new one */
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool || init_worker_pool(pool) < 0)
+		goto fail;
+
+	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
+	copy_workqueue_attrs(pool->attrs, attrs);
 
 	/*
-	 * unbound_attrs are created separately.  Suppress uevent until
-	 * everything is ready.
+	 * no_numa isn't a worker_pool attribute, always clear it.  See
+	 * 'struct workqueue_attrs' comments for detail.
 	 */
-	dev_set_uevent_suppress(&wq_dev->dev, true);
-
-	ret = device_register(&wq_dev->dev);
-	if (ret) {
-		kfree(wq_dev);
-		wq->wq_dev = NULL;
-		return ret;
-	}
-
-	if (wq->flags & WQ_UNBOUND) {
-		struct device_attribute *attr;
+	pool->attrs->no_numa = false;
 
-		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
-			ret = device_create_file(&wq_dev->dev, attr);
-			if (ret) {
-				device_unregister(&wq_dev->dev);
-				wq->wq_dev = NULL;
-				return ret;
+	/* if cpumask is contained inside a NUMA node, we belong to that node */
+	if (wq_numa_enabled) {
+		for_each_node(node) {
+			if (cpumask_subset(pool->attrs->cpumask,
+					   wq_numa_possible_cpumask[node])) {
+				pool->node = node;
+				break;
 			}
 		}
 	}
 
-	dev_set_uevent_suppress(&wq_dev->dev, false);
-	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
-	return 0;
-}
+	if (worker_pool_assign_id(pool) < 0)
+		goto fail;
 
-/**
- * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
- * @wq: the workqueue to unregister
- *
- * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
- */
-static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
-{
-	struct wq_device *wq_dev = wq->wq_dev;
+	/* create and start the initial worker */
+	if (!create_worker(pool))
+		goto fail;
 
-	if (!wq->wq_dev)
-		return;
+	/* install */
+	hash_add(unbound_pool_hash, &pool->hash_node, hash);
 
-	wq->wq_dev = NULL;
-	device_unregister(&wq_dev->dev);
+	return pool;
+fail:
+	if (pool)
+		put_unbound_pool(pool);
+	return NULL;
 }
-#else	/* CONFIG_SYSFS */
-static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
-#endif	/* CONFIG_SYSFS */
 
-/**
- * free_workqueue_attrs - free a workqueue_attrs
- * @attrs: workqueue_attrs to free
- *
- * Undo alloc_workqueue_attrs().
- */
-void free_workqueue_attrs(struct workqueue_attrs *attrs)
+static void rcu_free_pwq(struct rcu_head *rcu)
 {
-	if (attrs) {
-		free_cpumask_var(attrs->cpumask);
-		kfree(attrs);
-	}
+	kmem_cache_free(pwq_cache,
+			container_of(rcu, struct pool_workqueue, rcu));
 }
 
-/**
- * alloc_workqueue_attrs - allocate a workqueue_attrs
- * @gfp_mask: allocation mask to use
- *
- * Allocate a new workqueue_attrs, initialize with default settings and
- * return it.
- *
- * Return: The allocated new workqueue_attr on success. %NULL on failure.
+/*
+ * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
+ * and needs to be destroyed.
  */
-struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+static void pwq_unbound_release_workfn(struct work_struct *work)
 {
-	struct workqueue_attrs *attrs;
+	struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
+						  unbound_release_work);
+	struct workqueue_struct *wq = pwq->wq;
+	struct worker_pool *pool = pwq->pool;
+	bool is_last;
 
-	attrs = kzalloc(sizeof(*attrs), gfp_mask);
-	if (!attrs)
-		goto fail;
-	if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
-		goto fail;
+	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+		return;
 
-	cpumask_copy(attrs->cpumask, cpu_possible_mask);
-	return attrs;
-fail:
-	free_workqueue_attrs(attrs);
-	return NULL;
-}
+	mutex_lock(&wq->mutex);
+	list_del_rcu(&pwq->pwqs_node);
+	is_last = list_empty(&wq->pwqs);
+	mutex_unlock(&wq->mutex);
+
+	mutex_lock(&wq_pool_mutex);
+	put_unbound_pool(pool);
+	mutex_unlock(&wq_pool_mutex);
+
+	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
 
-static void copy_workqueue_attrs(struct workqueue_attrs *to,
-				 const struct workqueue_attrs *from)
-{
-	to->nice = from->nice;
-	cpumask_copy(to->cpumask, from->cpumask);
 	/*
-	 * Unlike hash and equality test, this function doesn't ignore
-	 * ->no_numa as it is used for both pool and wq attrs.  Instead,
-	 * get_unbound_pool() explicitly clears ->no_numa after copying.
+	 * If we're the last pwq going away, @wq is already dead and no one
+	 * is gonna access it anymore.  Schedule RCU free.
 	 */
-	to->no_numa = from->no_numa;
-}
-
-/* hash value of the content of @attr */
-static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
-{
-	u32 hash = 0;
-
-	hash = jhash_1word(attrs->nice, hash);
-	hash = jhash(cpumask_bits(attrs->cpumask),
-		     BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
-	return hash;
-}
-
-/* content equality test */
-static bool wqattrs_equal(const struct workqueue_attrs *a,
-			  const struct workqueue_attrs *b)
-{
-	if (a->nice != b->nice)
-		return false;
-	if (!cpumask_equal(a->cpumask, b->cpumask))
-		return false;
-	return true;
+	if (is_last)
+		call_rcu_sched(&wq->rcu, rcu_free_wq);
 }
 
 /**
- * init_worker_pool - initialize a newly zalloc'd worker_pool
- * @pool: worker_pool to initialize
- *
- * Initiailize a newly zalloc'd @pool.  It also allocates @pool->attrs.
+ * pwq_adjust_max_active - update a pwq's max_active to the current setting
+ * @pwq: target pool_workqueue
  *
- * Return: 0 on success, -errno on failure.  Even on failure, all fields
- * inside @pool proper are initialized and put_unbound_pool() can be called
- * on @pool safely to release it.
+ * If @pwq isn't freezing, set @pwq->max_active to the associated
+ * workqueue's saved_max_active and activate delayed work items
+ * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
  */
-static int init_worker_pool(struct worker_pool *pool)
+static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 {
-	spin_lock_init(&pool->lock);
-	pool->id = -1;
-	pool->cpu = -1;
-	pool->node = NUMA_NO_NODE;
-	pool->flags |= POOL_DISASSOCIATED;
-	INIT_LIST_HEAD(&pool->worklist);
-	INIT_LIST_HEAD(&pool->idle_list);
-	hash_init(pool->busy_hash);
-
-	init_timer_deferrable(&pool->idle_timer);
-	pool->idle_timer.function = idle_worker_timeout;
-	pool->idle_timer.data = (unsigned long)pool;
+	struct workqueue_struct *wq = pwq->wq;
+	bool freezable = wq->flags & WQ_FREEZABLE;
 
-	setup_timer(&pool->mayday_timer, pool_mayday_timeout,
-		    (unsigned long)pool);
+	/* for @wq->saved_max_active */
+	lockdep_assert_held(&wq->mutex);
 
-	mutex_init(&pool->manager_arb);
-	mutex_init(&pool->attach_mutex);
-	INIT_LIST_HEAD(&pool->workers);
+	/* fast exit for non-freezable wqs */
+	if (!freezable && pwq->max_active == wq->saved_max_active)
+		return;
 
-	ida_init(&pool->worker_ida);
-	INIT_HLIST_NODE(&pool->hash_node);
-	pool->refcnt = 1;
+	spin_lock_irq(&pwq->pool->lock);
 
-	/* shouldn't fail above this point */
-	pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
-	if (!pool->attrs)
-		return -ENOMEM;
-	return 0;
-}
+	/*
+	 * During [un]freezing, the caller is responsible for ensuring that
+	 * this function is called at least once after @workqueue_freezing
+	 * is updated and visible.
+	 */
+	if (!freezable || !workqueue_freezing) {
+		pwq->max_active = wq->saved_max_active;
 
-static void rcu_free_wq(struct rcu_head *rcu)
-{
-	struct workqueue_struct *wq =
-		container_of(rcu, struct workqueue_struct, rcu);
+		while (!list_empty(&pwq->delayed_works) &&
+		       pwq->nr_active < pwq->max_active)
+			pwq_activate_first_delayed(pwq);
 
-	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_pwqs);
-	else
-		free_workqueue_attrs(wq->unbound_attrs);
+		/*
+		 * Need to kick a worker after thawed or an unbound wq's
+		 * max_active is bumped.  It's a slow path.  Do it always.
+		 */
+		wake_up_worker(pwq->pool);
+	} else {
+		pwq->max_active = 0;
+	}
 
-	kfree(wq->rescuer);
-	kfree(wq);
+	spin_unlock_irq(&pwq->pool->lock);
 }
 
-static void rcu_free_pool(struct rcu_head *rcu)
+/* initialize newly alloced @pwq which is associated with @wq and @pool */
+static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
+		     struct worker_pool *pool)
 {
-	struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
+	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 
-	ida_destroy(&pool->worker_ida);
-	free_workqueue_attrs(pool->attrs);
-	kfree(pool);
+	memset(pwq, 0, sizeof(*pwq));
+
+	pwq->pool = pool;
+	pwq->wq = wq;
+	pwq->flush_color = -1;
+	pwq->refcnt = 1;
+	INIT_LIST_HEAD(&pwq->delayed_works);
+	INIT_LIST_HEAD(&pwq->pwqs_node);
+	INIT_LIST_HEAD(&pwq->mayday_node);
+	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
 }
 
-/**
- * put_unbound_pool - put a worker_pool
- * @pool: worker_pool to put
- *
- * Put @pool.  If its refcnt reaches zero, it gets destroyed in sched-RCU
- * safe manner.  get_unbound_pool() calls this function on its failure path
- * and this function should be able to release pools which went through,
- * successfully or not, init_worker_pool().
- *
- * Should be called with wq_pool_mutex held.
- */
-static void put_unbound_pool(struct worker_pool *pool)
+/* sync @pwq with the current state of its associated wq and link it */
+static void link_pwq(struct pool_workqueue *pwq)
 {
-	DECLARE_COMPLETION_ONSTACK(detach_completion);
-	struct worker *worker;
+	struct workqueue_struct *wq = pwq->wq;
 
-	lockdep_assert_held(&wq_pool_mutex);
+	lockdep_assert_held(&wq->mutex);
 
-	if (--pool->refcnt)
+	/* may be called multiple times, ignore if already linked */
+	if (!list_empty(&pwq->pwqs_node))
 		return;
 
-	/* sanity checks */
-	if (WARN_ON(!(pool->cpu < 0)) ||
-	    WARN_ON(!list_empty(&pool->worklist)))
-		return;
+	/* set the matching work_color */
+	pwq->work_color = wq->work_color;
 
-	/* release id and unhash */
-	if (pool->id >= 0)
-		idr_remove(&worker_pool_idr, pool->id);
-	hash_del(&pool->hash_node);
+	/* sync max_active to the current setting */
+	pwq_adjust_max_active(pwq);
 
-	/*
-	 * Become the manager and destroy all workers.  Grabbing
-	 * manager_arb prevents @pool's workers from blocking on
-	 * attach_mutex.
-	 */
-	mutex_lock(&pool->manager_arb);
+	/* link in @pwq */
+	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
+}
 
-	spin_lock_irq(&pool->lock);
-	while ((worker = first_idle_worker(pool)))
-		destroy_worker(worker);
-	WARN_ON(pool->nr_workers || pool->nr_idle);
-	spin_unlock_irq(&pool->lock);
+/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
+static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
+					const struct workqueue_attrs *attrs)
+{
+	struct worker_pool *pool;
+	struct pool_workqueue *pwq;
 
-	mutex_lock(&pool->attach_mutex);
-	if (!list_empty(&pool->workers))
-		pool->detach_completion = &detach_completion;
-	mutex_unlock(&pool->attach_mutex);
+	lockdep_assert_held(&wq_pool_mutex);
 
-	if (pool->detach_completion)
-		wait_for_completion(pool->detach_completion);
+	pool = get_unbound_pool(attrs);
+	if (!pool)
+		return NULL;
 
-	mutex_unlock(&pool->manager_arb);
+	pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
+	if (!pwq) {
+		put_unbound_pool(pool);
+		return NULL;
+	}
 
-	/* shut down the timers */
-	del_timer_sync(&pool->idle_timer);
-	del_timer_sync(&pool->mayday_timer);
+	init_pwq(pwq, wq, pool);
+	return pwq;
+}
 
-	/* sched-RCU protected to allow dereferences from get_work_pool() */
-	call_rcu_sched(&pool->rcu, rcu_free_pool);
+/* undo alloc_unbound_pwq(), used only in the error path */
+static void free_unbound_pwq(struct pool_workqueue *pwq)
+{
+	lockdep_assert_held(&wq_pool_mutex);
+
+	if (pwq) {
+		put_unbound_pool(pwq->pool);
+		kmem_cache_free(pwq_cache, pwq);
+	}
 }
 
 /**
- * get_unbound_pool - get a worker_pool with the specified attributes
- * @attrs: the attributes of the worker_pool to get
+ * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
+ * @attrs: the wq_attrs of interest
+ * @node: the target NUMA node
+ * @cpu_going_down: if >= 0, the CPU to consider as offline
+ * @cpumask: outarg, the resulting cpumask
  *
- * Obtain a worker_pool which has the same attributes as @attrs, bump the
- * reference count and return it.  If there already is a matching
- * worker_pool, it will be used; otherwise, this function attempts to
- * create a new one.
+ * Calculate the cpumask a workqueue with @attrs should use on @node.  If
+ * @cpu_going_down is >= 0, that cpu is considered offline during
+ * calculation.  The result is stored in @cpumask.
  *
- * Should be called with wq_pool_mutex held.
+ * If NUMA affinity is not enabled, @attrs->cpumask is always used.  If
+ * enabled and @node has online CPUs requested by @attrs, the returned
+ * cpumask is the intersection of the possible CPUs of @node and
+ * @attrs->cpumask.
  *
- * Return: On success, a worker_pool with the same attributes as @attrs.
- * On failure, %NULL.
+ * The caller is responsible for ensuring that the cpumask of @node stays
+ * stable.
+ *
+ * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
+ * %false if equal.
  */
-static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
+static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
+				 int cpu_going_down, cpumask_t *cpumask)
 {
-	u32 hash = wqattrs_hash(attrs);
-	struct worker_pool *pool;
-	int node;
+	if (!wq_numa_enabled || attrs->no_numa)
+		goto use_dfl;
 
-	lockdep_assert_held(&wq_pool_mutex);
+	/* does @node have any online CPUs @attrs wants? */
+	cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
+	if (cpu_going_down >= 0)
+		cpumask_clear_cpu(cpu_going_down, cpumask);
 
-	/* do we already have a matching pool? */
-	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
-		if (wqattrs_equal(pool->attrs, attrs)) {
-			pool->refcnt++;
-			return pool;
-		}
-	}
-
-	/* nope, create a new one */
-	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-	if (!pool || init_worker_pool(pool) < 0)
-		goto fail;
-
-	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
-	copy_workqueue_attrs(pool->attrs, attrs);
-
-	/*
-	 * no_numa isn't a worker_pool attribute, always clear it.  See
-	 * 'struct workqueue_attrs' comments for detail.
-	 */
-	pool->attrs->no_numa = false;
-
-	/* if cpumask is contained inside a NUMA node, we belong to that node */
-	if (wq_numa_enabled) {
-		for_each_node(node) {
-			if (cpumask_subset(pool->attrs->cpumask,
-					   wq_numa_possible_cpumask[node])) {
-				pool->node = node;
-				break;
-			}
-		}
-	}
-
-	if (worker_pool_assign_id(pool) < 0)
-		goto fail;
-
-	/* create and start the initial worker */
-	if (!create_worker(pool))
-		goto fail;
-
-	/* install */
-	hash_add(unbound_pool_hash, &pool->hash_node, hash);
-
-	return pool;
-fail:
-	if (pool)
-		put_unbound_pool(pool);
-	return NULL;
-}
-
-static void rcu_free_pwq(struct rcu_head *rcu)
-{
-	kmem_cache_free(pwq_cache,
-			container_of(rcu, struct pool_workqueue, rcu));
-}
-
-/*
- * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
- * and needs to be destroyed.
- */
-static void pwq_unbound_release_workfn(struct work_struct *work)
-{
-	struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
-						  unbound_release_work);
-	struct workqueue_struct *wq = pwq->wq;
-	struct worker_pool *pool = pwq->pool;
-	bool is_last;
-
-	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
-		return;
-
-	mutex_lock(&wq->mutex);
-	list_del_rcu(&pwq->pwqs_node);
-	is_last = list_empty(&wq->pwqs);
-	mutex_unlock(&wq->mutex);
-
-	mutex_lock(&wq_pool_mutex);
-	put_unbound_pool(pool);
-	mutex_unlock(&wq_pool_mutex);
-
-	call_rcu_sched(&pwq->rcu, rcu_free_pwq);
-
-	/*
-	 * If we're the last pwq going away, @wq is already dead and no one
-	 * is gonna access it anymore.  Schedule RCU free.
-	 */
-	if (is_last)
-		call_rcu_sched(&wq->rcu, rcu_free_wq);
-}
-
-/**
- * pwq_adjust_max_active - update a pwq's max_active to the current setting
- * @pwq: target pool_workqueue
- *
- * If @pwq isn't freezing, set @pwq->max_active to the associated
- * workqueue's saved_max_active and activate delayed work items
- * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.
- */
-static void pwq_adjust_max_active(struct pool_workqueue *pwq)
-{
-	struct workqueue_struct *wq = pwq->wq;
-	bool freezable = wq->flags & WQ_FREEZABLE;
-
-	/* for @wq->saved_max_active */
-	lockdep_assert_held(&wq->mutex);
-
-	/* fast exit for non-freezable wqs */
-	if (!freezable && pwq->max_active == wq->saved_max_active)
-		return;
-
-	spin_lock_irq(&pwq->pool->lock);
-
-	/*
-	 * During [un]freezing, the caller is responsible for ensuring that
-	 * this function is called at least once after @workqueue_freezing
-	 * is updated and visible.
-	 */
-	if (!freezable || !workqueue_freezing) {
-		pwq->max_active = wq->saved_max_active;
-
-		while (!list_empty(&pwq->delayed_works) &&
-		       pwq->nr_active < pwq->max_active)
-			pwq_activate_first_delayed(pwq);
-
-		/*
-		 * Need to kick a worker after thawed or an unbound wq's
-		 * max_active is bumped.  It's a slow path.  Do it always.
-		 */
-		wake_up_worker(pwq->pool);
-	} else {
-		pwq->max_active = 0;
-	}
-
-	spin_unlock_irq(&pwq->pool->lock);
-}
-
-/* initialize newly alloced @pwq which is associated with @wq and @pool */
-static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
-		     struct worker_pool *pool)
-{
-	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
-
-	memset(pwq, 0, sizeof(*pwq));
-
-	pwq->pool = pool;
-	pwq->wq = wq;
-	pwq->flush_color = -1;
-	pwq->refcnt = 1;
-	INIT_LIST_HEAD(&pwq->delayed_works);
-	INIT_LIST_HEAD(&pwq->pwqs_node);
-	INIT_LIST_HEAD(&pwq->mayday_node);
-	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
-}
-
-/* sync @pwq with the current state of its associated wq and link it */
-static void link_pwq(struct pool_workqueue *pwq)
-{
-	struct workqueue_struct *wq = pwq->wq;
-
-	lockdep_assert_held(&wq->mutex);
-
-	/* may be called multiple times, ignore if already linked */
-	if (!list_empty(&pwq->pwqs_node))
-		return;
-
-	/* set the matching work_color */
-	pwq->work_color = wq->work_color;
-
-	/* sync max_active to the current setting */
-	pwq_adjust_max_active(pwq);
-
-	/* link in @pwq */
-	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
-}
-
-/* obtain a pool matching @attr and create a pwq associating the pool and @wq */
-static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
-					const struct workqueue_attrs *attrs)
-{
-	struct worker_pool *pool;
-	struct pool_workqueue *pwq;
-
-	lockdep_assert_held(&wq_pool_mutex);
-
-	pool = get_unbound_pool(attrs);
-	if (!pool)
-		return NULL;
-
-	pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
-	if (!pwq) {
-		put_unbound_pool(pool);
-		return NULL;
-	}
-
-	init_pwq(pwq, wq, pool);
-	return pwq;
-}
-
-/* undo alloc_unbound_pwq(), used only in the error path */
-static void free_unbound_pwq(struct pool_workqueue *pwq)
-{
-	lockdep_assert_held(&wq_pool_mutex);
-
-	if (pwq) {
-		put_unbound_pool(pwq->pool);
-		kmem_cache_free(pwq_cache, pwq);
-	}
-}
-
-/**
- * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
- * @attrs: the wq_attrs of interest
- * @node: the target NUMA node
- * @cpu_going_down: if >= 0, the CPU to consider as offline
- * @cpumask: outarg, the resulting cpumask
- *
- * Calculate the cpumask a workqueue with @attrs should use on @node.  If
- * @cpu_going_down is >= 0, that cpu is considered offline during
- * calculation.  The result is stored in @cpumask.
- *
- * If NUMA affinity is not enabled, @attrs->cpumask is always used.  If
- * enabled and @node has online CPUs requested by @attrs, the returned
- * cpumask is the intersection of the possible CPUs of @node and
- * @attrs->cpumask.
- *
- * The caller is responsible for ensuring that the cpumask of @node stays
- * stable.
- *
- * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
- * %false if equal.
- */
-static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
-				 int cpu_going_down, cpumask_t *cpumask)
-{
-	if (!wq_numa_enabled || attrs->no_numa)
-		goto use_dfl;
-
-	/* does @node have any online CPUs @attrs wants? */
-	cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
-	if (cpu_going_down >= 0)
-		cpumask_clear_cpu(cpu_going_down, cpumask);
-
-	if (cpumask_empty(cpumask))
-		goto use_dfl;
+	if (cpumask_empty(cpumask))
+		goto use_dfl;
 
 	/* yeap, return possible CPUs in @node that @attrs wants */
 	cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
@@ -4817,202 +4501,519 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
 			else if (pool->cpu < 0)
 				restore_unbound_workers_cpumask(pool, cpu);
 
-			mutex_unlock(&pool->attach_mutex);
-		}
+			mutex_unlock(&pool->attach_mutex);
+		}
+
+		/* update NUMA affinity of unbound workqueues */
+		list_for_each_entry(wq, &workqueues, list)
+			wq_update_unbound_numa(wq, cpu, true);
+
+		mutex_unlock(&wq_pool_mutex);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+/*
+ * Workqueues should be brought down after normal priority CPU notifiers.
+ * This will be registered as low priority CPU notifier.
+ */
+static int workqueue_cpu_down_callback(struct notifier_block *nfb,
+						 unsigned long action,
+						 void *hcpu)
+{
+	int cpu = (unsigned long)hcpu;
+	struct work_struct unbind_work;
+	struct workqueue_struct *wq;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DOWN_PREPARE:
+		/* unbinding per-cpu workers should happen on the local CPU */
+		INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
+		queue_work_on(cpu, system_highpri_wq, &unbind_work);
+
+		/* update NUMA affinity of unbound workqueues */
+		mutex_lock(&wq_pool_mutex);
+		list_for_each_entry(wq, &workqueues, list)
+			wq_update_unbound_numa(wq, cpu, false);
+		mutex_unlock(&wq_pool_mutex);
+
+		/* wait for per-cpu unbinding to finish */
+		flush_work(&unbind_work);
+		destroy_work_on_stack(&unbind_work);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+#ifdef CONFIG_SMP
+
+struct work_for_cpu {
+	struct work_struct work;
+	long (*fn)(void *);
+	void *arg;
+	long ret;
+};
+
+static void work_for_cpu_fn(struct work_struct *work)
+{
+	struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
+
+	wfc->ret = wfc->fn(wfc->arg);
+}
+
+/**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * It is up to the caller to ensure that the cpu doesn't go offline.
+ * The caller must not hold any locks which would prevent @fn from completing.
+ *
+ * Return: The value @fn returns.
+ */
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
+{
+	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
+
+	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
+	schedule_work_on(cpu, &wfc.work);
+	flush_work(&wfc.work);
+	destroy_work_on_stack(&wfc.work);
+	return wfc.ret;
+}
+EXPORT_SYMBOL_GPL(work_on_cpu);
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_FREEZER
+
+/**
+ * freeze_workqueues_begin - begin freezing workqueues
+ *
+ * Start freezing workqueues.  After this function returns, all freezable
+ * workqueues will queue new works to their delayed_works list instead of
+ * pool->worklist.
+ *
+ * CONTEXT:
+ * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ */
+void freeze_workqueues_begin(void)
+{
+	struct workqueue_struct *wq;
+	struct pool_workqueue *pwq;
+
+	mutex_lock(&wq_pool_mutex);
+
+	WARN_ON_ONCE(workqueue_freezing);
+	workqueue_freezing = true;
+
+	list_for_each_entry(wq, &workqueues, list) {
+		mutex_lock(&wq->mutex);
+		for_each_pwq(pwq, wq)
+			pwq_adjust_max_active(pwq);
+		mutex_unlock(&wq->mutex);
+	}
+
+	mutex_unlock(&wq_pool_mutex);
+}
+
+/**
+ * freeze_workqueues_busy - are freezable workqueues still busy?
+ *
+ * Check whether freezing is complete.  This function must be called
+ * between freeze_workqueues_begin() and thaw_workqueues().
+ *
+ * CONTEXT:
+ * Grabs and releases wq_pool_mutex.
+ *
+ * Return:
+ * %true if some freezable workqueues are still busy.  %false if freezing
+ * is complete.
+ */
+bool freeze_workqueues_busy(void)
+{
+	bool busy = false;
+	struct workqueue_struct *wq;
+	struct pool_workqueue *pwq;
+
+	mutex_lock(&wq_pool_mutex);
+
+	WARN_ON_ONCE(!workqueue_freezing);
+
+	list_for_each_entry(wq, &workqueues, list) {
+		if (!(wq->flags & WQ_FREEZABLE))
+			continue;
+		/*
+		 * nr_active is monotonically decreasing.  It's safe
+		 * to peek without lock.
+		 */
+		rcu_read_lock_sched();
+		for_each_pwq(pwq, wq) {
+			WARN_ON_ONCE(pwq->nr_active < 0);
+			if (pwq->nr_active) {
+				busy = true;
+				rcu_read_unlock_sched();
+				goto out_unlock;
+			}
+		}
+		rcu_read_unlock_sched();
+	}
+out_unlock:
+	mutex_unlock(&wq_pool_mutex);
+	return busy;
+}
+
+/**
+ * thaw_workqueues - thaw workqueues
+ *
+ * Thaw workqueues.  Normal queueing is restored and all collected
+ * frozen works are transferred to their respective pool worklists.
+ *
+ * CONTEXT:
+ * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ */
+void thaw_workqueues(void)
+{
+	struct workqueue_struct *wq;
+	struct pool_workqueue *pwq;
+
+	mutex_lock(&wq_pool_mutex);
+
+	if (!workqueue_freezing)
+		goto out_unlock;
+
+	workqueue_freezing = false;
+
+	/* restore max_active and repopulate worklist */
+	list_for_each_entry(wq, &workqueues, list) {
+		mutex_lock(&wq->mutex);
+		for_each_pwq(pwq, wq)
+			pwq_adjust_max_active(pwq);
+		mutex_unlock(&wq->mutex);
+	}
+
+out_unlock:
+	mutex_unlock(&wq_pool_mutex);
+}
+#endif /* CONFIG_FREEZER */
+
+#ifdef CONFIG_SYSFS
+/*
+ * Workqueues with WQ_SYSFS flag set is visible to userland via
+ * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
+ * following attributes.
+ *
+ *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound
+ *  max_active	RW int	: maximum number of in-flight work items
+ *
+ * Unbound workqueues have the following extra attributes.
+ *
+ *  id		RO int	: the associated pool ID
+ *  nice	RW int	: nice value of the workers
+ *  cpumask	RW mask	: bitmask of allowed CPUs for the workers
+ */
+struct wq_device {
+	struct workqueue_struct		*wq;
+	struct device			dev;
+};
+
+static struct workqueue_struct *dev_to_wq(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	return wq_dev->wq;
+}
+
+static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+}
+static DEVICE_ATTR_RO(per_cpu);
+
+static ssize_t max_active_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+}
+
+static ssize_t max_active_store(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) != 1 || val <= 0)
+		return -EINVAL;
+
+	workqueue_set_max_active(wq, val);
+	return count;
+}
+static DEVICE_ATTR_RW(max_active);
+
+static struct attribute *wq_sysfs_attrs[] = {
+	&dev_attr_per_cpu.attr,
+	&dev_attr_max_active.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(wq_sysfs);
+
+static ssize_t wq_pool_ids_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	const char *delim = "";
+	int node, written = 0;
+
+	rcu_read_lock_sched();
+	for_each_node(node) {
+		written += scnprintf(buf + written, PAGE_SIZE - written,
+				     "%s%d:%d", delim, node,
+				     unbound_pwq_by_node(wq, node)->pool->id);
+		delim = " ";
+	}
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+	rcu_read_unlock_sched();
+
+	return written;
+}
+
+static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
+	mutex_unlock(&wq->mutex);
+
+	return written;
+}
+
+/* prepare workqueue_attrs for sysfs store operations */
+static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
+{
+	struct workqueue_attrs *attrs;
+
+	attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+
+	mutex_lock(&wq->mutex);
+	copy_workqueue_attrs(attrs, wq->unbound_attrs);
+	mutex_unlock(&wq->mutex);
+	return attrs;
+}
+
+static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
+	    attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
+		ret = apply_workqueue_attrs(wq, attrs);
+	else
+		ret = -EINVAL;
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static ssize_t wq_cpumask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
+			    cpumask_pr_args(wq->unbound_attrs->cpumask));
+	mutex_unlock(&wq->mutex);
+	return written;
+}
+
+static ssize_t wq_cpumask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, attrs->cpumask);
+	if (!ret)
+		ret = apply_workqueue_attrs(wq, attrs);
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
 
-		/* update NUMA affinity of unbound workqueues */
-		list_for_each_entry(wq, &workqueues, list)
-			wq_update_unbound_numa(wq, cpu, true);
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n",
+			    !wq->unbound_attrs->no_numa);
+	mutex_unlock(&wq->mutex);
 
-		mutex_unlock(&wq_pool_mutex);
-		break;
-	}
-	return NOTIFY_OK;
+	return written;
 }
 
-/*
- * Workqueues should be brought down after normal priority CPU notifiers.
- * This will be registered as low priority CPU notifier.
- */
-static int workqueue_cpu_down_callback(struct notifier_block *nfb,
-						 unsigned long action,
-						 void *hcpu)
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
-	int cpu = (unsigned long)hcpu;
-	struct work_struct unbind_work;
-	struct workqueue_struct *wq;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_PREPARE:
-		/* unbinding per-cpu workers should happen on the local CPU */
-		INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
-		queue_work_on(cpu, system_highpri_wq, &unbind_work);
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int v, ret;
 
-		/* update NUMA affinity of unbound workqueues */
-		mutex_lock(&wq_pool_mutex);
-		list_for_each_entry(wq, &workqueues, list)
-			wq_update_unbound_numa(wq, cpu, false);
-		mutex_unlock(&wq_pool_mutex);
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
 
-		/* wait for per-cpu unbinding to finish */
-		flush_work(&unbind_work);
-		destroy_work_on_stack(&unbind_work);
-		break;
+	ret = -EINVAL;
+	if (sscanf(buf, "%d", &v) == 1) {
+		attrs->no_numa = !v;
+		ret = apply_workqueue_attrs(wq, attrs);
 	}
-	return NOTIFY_OK;
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
 }
 
-#ifdef CONFIG_SMP
+static struct device_attribute wq_sysfs_unbound_attrs[] = {
+	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
+	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
+	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+	__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
+	__ATTR_NULL,
+};
 
-struct work_for_cpu {
-	struct work_struct work;
-	long (*fn)(void *);
-	void *arg;
-	long ret;
+static struct bus_type wq_subsys = {
+	.name				= "workqueue",
+	.dev_groups			= wq_sysfs_groups,
 };
 
-static void work_for_cpu_fn(struct work_struct *work)
+static int __init wq_sysfs_init(void)
 {
-	struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
-
-	wfc->ret = wfc->fn(wfc->arg);
+	return subsys_virtual_register(&wq_subsys, NULL);
 }
+core_initcall(wq_sysfs_init);
 
-/**
- * work_on_cpu - run a function in user context on a particular cpu
- * @cpu: the cpu to run on
- * @fn: the function to run
- * @arg: the function arg
- *
- * It is up to the caller to ensure that the cpu doesn't go offline.
- * The caller must not hold any locks which would prevent @fn from completing.
- *
- * Return: The value @fn returns.
- */
-long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
+static void wq_device_release(struct device *dev)
 {
-	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
 
-	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
-	schedule_work_on(cpu, &wfc.work);
-	flush_work(&wfc.work);
-	destroy_work_on_stack(&wfc.work);
-	return wfc.ret;
+	kfree(wq_dev);
 }
-EXPORT_SYMBOL_GPL(work_on_cpu);
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_FREEZER
 
 /**
- * freeze_workqueues_begin - begin freezing workqueues
+ * workqueue_sysfs_register - make a workqueue visible in sysfs
+ * @wq: the workqueue to register
  *
- * Start freezing workqueues.  After this function returns, all freezable
- * workqueues will queue new works to their delayed_works list instead of
- * pool->worklist.
+ * Expose @wq in sysfs under /sys/bus/workqueue/devices.
+ * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
+ * which is the preferred method.
  *
- * CONTEXT:
- * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ * Workqueue user should use this function directly iff it wants to apply
+ * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
+ * apply_workqueue_attrs() may race against userland updating the
+ * attributes.
+ *
+ * Return: 0 on success, -errno on failure.
  */
-void freeze_workqueues_begin(void)
+int workqueue_sysfs_register(struct workqueue_struct *wq)
 {
-	struct workqueue_struct *wq;
-	struct pool_workqueue *pwq;
-
-	mutex_lock(&wq_pool_mutex);
+	struct wq_device *wq_dev;
+	int ret;
 
-	WARN_ON_ONCE(workqueue_freezing);
-	workqueue_freezing = true;
+	/*
+	 * Adjusting max_active or creating new pwqs by applyting
+	 * attributes breaks ordering guarantee.  Disallow exposing ordered
+	 * workqueues.
+	 */
+	if (WARN_ON(wq->flags & __WQ_ORDERED))
+		return -EINVAL;
 
-	list_for_each_entry(wq, &workqueues, list) {
-		mutex_lock(&wq->mutex);
-		for_each_pwq(pwq, wq)
-			pwq_adjust_max_active(pwq);
-		mutex_unlock(&wq->mutex);
-	}
+	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
+	if (!wq_dev)
+		return -ENOMEM;
 
-	mutex_unlock(&wq_pool_mutex);
-}
+	wq_dev->wq = wq;
+	wq_dev->dev.bus = &wq_subsys;
+	wq_dev->dev.init_name = wq->name;
+	wq_dev->dev.release = wq_device_release;
 
-/**
- * freeze_workqueues_busy - are freezable workqueues still busy?
- *
- * Check whether freezing is complete.  This function must be called
- * between freeze_workqueues_begin() and thaw_workqueues().
- *
- * CONTEXT:
- * Grabs and releases wq_pool_mutex.
- *
- * Return:
- * %true if some freezable workqueues are still busy.  %false if freezing
- * is complete.
- */
-bool freeze_workqueues_busy(void)
-{
-	bool busy = false;
-	struct workqueue_struct *wq;
-	struct pool_workqueue *pwq;
+	/*
+	 * unbound_attrs are created separately.  Suppress uevent until
+	 * everything is ready.
+	 */
+	dev_set_uevent_suppress(&wq_dev->dev, true);
 
-	mutex_lock(&wq_pool_mutex);
+	ret = device_register(&wq_dev->dev);
+	if (ret) {
+		kfree(wq_dev);
+		wq->wq_dev = NULL;
+		return ret;
+	}
 
-	WARN_ON_ONCE(!workqueue_freezing);
+	if (wq->flags & WQ_UNBOUND) {
+		struct device_attribute *attr;
 
-	list_for_each_entry(wq, &workqueues, list) {
-		if (!(wq->flags & WQ_FREEZABLE))
-			continue;
-		/*
-		 * nr_active is monotonically decreasing.  It's safe
-		 * to peek without lock.
-		 */
-		rcu_read_lock_sched();
-		for_each_pwq(pwq, wq) {
-			WARN_ON_ONCE(pwq->nr_active < 0);
-			if (pwq->nr_active) {
-				busy = true;
-				rcu_read_unlock_sched();
-				goto out_unlock;
+		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
+			ret = device_create_file(&wq_dev->dev, attr);
+			if (ret) {
+				device_unregister(&wq_dev->dev);
+				wq->wq_dev = NULL;
+				return ret;
 			}
 		}
-		rcu_read_unlock_sched();
 	}
-out_unlock:
-	mutex_unlock(&wq_pool_mutex);
-	return busy;
+
+	dev_set_uevent_suppress(&wq_dev->dev, false);
+	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
+	return 0;
 }
 
 /**
- * thaw_workqueues - thaw workqueues
- *
- * Thaw workqueues.  Normal queueing is restored and all collected
- * frozen works are transferred to their respective pool worklists.
+ * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
+ * @wq: the workqueue to unregister
  *
- * CONTEXT:
- * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
+ * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
  */
-void thaw_workqueues(void)
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
 {
-	struct workqueue_struct *wq;
-	struct pool_workqueue *pwq;
-
-	mutex_lock(&wq_pool_mutex);
-
-	if (!workqueue_freezing)
-		goto out_unlock;
-
-	workqueue_freezing = false;
+	struct wq_device *wq_dev = wq->wq_dev;
 
-	/* restore max_active and repopulate worklist */
-	list_for_each_entry(wq, &workqueues, list) {
-		mutex_lock(&wq->mutex);
-		for_each_pwq(pwq, wq)
-			pwq_adjust_max_active(pwq);
-		mutex_unlock(&wq->mutex);
-	}
+	if (!wq->wq_dev)
+		return;
 
-out_unlock:
-	mutex_unlock(&wq_pool_mutex);
+	wq->wq_dev = NULL;
+	device_unregister(&wq_dev->dev);
 }
-#endif /* CONFIG_FREEZER */
+#else	/* CONFIG_SYSFS */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
+#endif	/* CONFIG_SYSFS */
 
 static void __init wq_numa_init(void)
 {