Newer
Older
ret = restart_syscall();
goto out_free;
* No such thing, create a new one. name= matching without subsys
* specification is allowed for already existing hierarchies but we
* can't create new one without subsys specification.
if (!opts.subsys_mask && !opts.none) {
ret = -EINVAL;
goto out_unlock;
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root) {
ret = -ENOMEM;
init_cgroup_root(root, &opts);
ret = cgroup_setup_root(root, opts.subsys_mask);
out_unlock:
mutex_unlock(&cgroup_mutex);
kfree(opts.release_agent);
kfree(opts.name);
return ERR_PTR(ret);
dentry = kernfs_mount(fs_type, flags, root->kf_root,
CGROUP_SUPER_MAGIC, &new_sb);
cgroup_put(&root->cgrp);
/*
* If @pinned_sb, we're reusing an existing root and holding an
* extra ref on its sb. Mount is complete. Put the extra ref.
*/
if (pinned_sb) {
WARN_ON(new_sb);
deactivate_super(pinned_sb);
}
return dentry;
}
static void cgroup_kill_sb(struct super_block *sb)
{
struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
/*
* If @root doesn't have any mounts or children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
*
* And don't kill the default root.
if (!list_empty(&root->cgrp.self.children) ||
cgroup_put(&root->cgrp);
else
percpu_ref_kill(&root->cgrp.self.refcnt);
}
static struct file_system_type cgroup_fs_type = {
.name = "cgroup",
.kill_sb = cgroup_kill_sb,
};
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Determine @task's cgroup on the first (the one with the lowest non-zero
* hierarchy_id) cgroup hierarchy and copy its path into @buf. This
* function grabs cgroup_mutex and shouldn't be used inside locks used by
* cgroup controller callbacks.
*
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
struct cgroup_root *root;
struct cgroup *cgrp;
mutex_lock(&cgroup_mutex);
down_read(&css_set_rwsem);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
if (root) {
cgrp = task_cgroup_from_root(task, root);
} else {
/* if no hierarchy exists, everyone is in "/" */
if (strlcpy(buf, "/", buflen) < buflen)
path = buf;
up_read(&css_set_rwsem);
mutex_unlock(&cgroup_mutex);
EXPORT_SYMBOL_GPL(task_cgroup_path);
/* used to track tasks and other necessary states during migration */
Tejun Heo
committed
struct cgroup_taskset {
/* the src and dst cset list running through cset->mg_node */
struct list_head src_csets;
struct list_head dst_csets;
/*
* Fields for cgroup_taskset_*() iteration.
*
* Before migration is committed, the target migration tasks are on
* ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of
* the csets on ->dst_csets. ->csets point to either ->src_csets
* or ->dst_csets depending on whether migration is committed.
*
* ->cur_csets and ->cur_task point to the current task position
* during iteration.
*/
struct list_head *csets;
struct css_set *cur_cset;
struct task_struct *cur_task;
Tejun Heo
committed
};
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
*
* @tset iteration is initialized and the first task is returned.
*/
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;
return cgroup_taskset_next(tset);
Tejun Heo
committed
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
Tejun Heo
committed
while (&cset->mg_node != tset->csets) {
if (!task)
task = list_first_entry(&cset->mg_tasks,
struct task_struct, cg_list);
else
task = list_next_entry(task, cg_list);
Tejun Heo
committed
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;
return task;
}
Tejun Heo
committed
cset = list_next_entry(cset, mg_node);
task = NULL;
}
Tejun Heo
committed
return NULL;
Tejun Heo
committed
}
* cgroup_task_migrate - move a task from one cgroup to another.
* @old_cgrp: the cgroup @tsk is being migrated from
* @tsk: the task being migrated
* @new_cset: the new css_set @tsk is being attached to
* Must be called with cgroup_mutex, threadgroup and css_set_rwsem locked.
static void cgroup_task_migrate(struct cgroup *old_cgrp,
struct task_struct *tsk,
struct css_set *new_cset)
struct css_set *old_cset;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_rwsem);
* We are synchronized through cgroup_threadgroup_rwsem against
* PF_EXITING setting such that we can't race against cgroup_exit()
* changing the css_set to init_css_set and dropping the old one.
Frederic Weisbecker
committed
WARN_ON_ONCE(tsk->flags & PF_EXITING);
old_cset = task_css_set(tsk);
get_css_set(new_cset);
rcu_assign_pointer(tsk->cgroups, new_cset);
list_move_tail(&tsk->cg_list, &new_cset->mg_tasks);
* We just gained a reference on old_cset by taking it from the
* task. As trading it for new_cset is protected by cgroup_mutex,
* we're safe to drop it here; it will be freed under RCU.
* cgroup_migrate_finish - cleanup after attach
* @preloaded_csets: list of preloaded css_sets
* Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See
* those functions for details.
static void cgroup_migrate_finish(struct list_head *preloaded_csets)
struct css_set *cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
down_write(&css_set_rwsem);
list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cset = NULL;
list_del_init(&cset->mg_preload_node);
}
up_write(&css_set_rwsem);
}
/**
* cgroup_migrate_add_src - add a migration source css_set
* @src_cset: the source css_set to add
* @dst_cgrp: the destination cgroup
* @preloaded_csets: list of preloaded css_sets
*
* Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin
* @src_cset and add it to @preloaded_csets, which should later be cleaned
* up by cgroup_migrate_finish().
*
* This function may be called without holding cgroup_threadgroup_rwsem
* even if the target is a process. Threads may be created and destroyed
* but as long as cgroup_mutex is not dropped, no new css_set can be put
* into play and the preloaded css_sets are guaranteed to cover all
* migrations.
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
*/
static void cgroup_migrate_add_src(struct css_set *src_cset,
struct cgroup *dst_cgrp,
struct list_head *preloaded_csets)
{
struct cgroup *src_cgrp;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_rwsem);
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
if (!list_empty(&src_cset->mg_preload_node))
return;
WARN_ON(src_cset->mg_src_cgrp);
WARN_ON(!list_empty(&src_cset->mg_tasks));
WARN_ON(!list_empty(&src_cset->mg_node));
src_cset->mg_src_cgrp = src_cgrp;
get_css_set(src_cset);
list_add(&src_cset->mg_preload_node, preloaded_csets);
}
/**
* cgroup_migrate_prepare_dst - prepare destination css_sets for migration
* @dst_cgrp: the destination cgroup (may be %NULL)
* @preloaded_csets: list of preloaded source css_sets
*
* Tasks are about to be moved to @dst_cgrp and all the source css_sets
* have been preloaded to @preloaded_csets. This function looks up and
* pins all destination css_sets, links each to its source, and append them
* to @preloaded_csets. If @dst_cgrp is %NULL, the destination of each
* source css_set is assumed to be its cgroup on the default hierarchy.
*
* This function must be called after cgroup_migrate_add_src() has been
* called on each migration source css_set. After migration is performed
* using cgroup_migrate(), cgroup_migrate_finish() must be called on
* @preloaded_csets.
*/
static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
struct list_head *preloaded_csets)
{
LIST_HEAD(csets);
struct css_set *src_cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
/*
* Except for the root, child_subsys_mask must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
Tejun Heo
committed
dst_cgrp->child_subsys_mask)
return -EBUSY;
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
struct css_set *dst_cset;
dst_cset = find_css_set(src_cset,
dst_cgrp ?: src_cset->dfl_cgrp);
if (!dst_cset)
goto err;
WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
/*
* If src cset equals dst, it's noop. Drop the src.
* cgroup_migrate() will skip the cset too. Note that we
* can't handle src == dst as some nodes are used by both.
*/
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
list_del_init(&src_cset->mg_preload_node);
put_css_set(src_cset);
put_css_set(dst_cset);
src_cset->mg_dst_cset = dst_cset;
if (list_empty(&dst_cset->mg_preload_node))
list_add(&dst_cset->mg_preload_node, &csets);
else
list_splice_tail(&csets, preloaded_csets);
return 0;
err:
cgroup_migrate_finish(&csets);
return -ENOMEM;
}
/**
* cgroup_migrate - migrate a process or task to a cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
* @cgrp: the destination cgroup
*
* Migrate a process or task denoted by @leader to @cgrp. If migrating a
* process, the caller must be holding cgroup_threadgroup_rwsem. The
* caller is also responsible for invoking cgroup_migrate_add_src() and
* cgroup_migrate_prepare_dst() on the targets before invoking this
* function and following up with cgroup_migrate_finish().
*
* As long as a controller's ->can_attach() doesn't fail, this function is
* guaranteed to succeed. This means that, excluding ->can_attach()
* failure, when migrating multiple targets, the success or failure can be
* decided for all targets by invoking group_migrate_prepare_dst() before
* actually starting migrating.
*/
static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
struct cgroup *cgrp)
struct cgroup_taskset tset = {
.src_csets = LIST_HEAD_INIT(tset.src_csets),
.dst_csets = LIST_HEAD_INIT(tset.dst_csets),
.csets = &tset.src_csets,
};
struct cgroup_subsys_state *css, *failed_css = NULL;
struct css_set *cset, *tmp_cset;
struct task_struct *task, *tmp_task;
int i, ret;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
down_write(&css_set_rwsem);
rcu_read_lock();
/* @task either already exited or can't exit until the end */
if (task->flags & PF_EXITING)
goto next;
/* leave @task alone if post_fork() hasn't linked it yet */
if (list_empty(&task->cg_list))
goto next;
cset = task_css_set(task);
if (!cset->mg_src_cgrp)
goto next;
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
list_move_tail(&cset->mg_dst_cset->mg_node,
&tset.dst_csets);
next:
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset.src_csets))
return 0;
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, cgrp) {
ret = css->ss->can_attach(css, &tset);
if (ret) {
goto out_cancel_attach;
}
}
}
/*
* Now that we're guaranteed success, proceed to move all tasks to
* the new cgroup. There are no failure cases after here, so this
* is the commit point.
down_write(&css_set_rwsem);
list_for_each_entry(cset, &tset.src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
cgroup_task_migrate(cset->mg_src_cgrp, task,
cset->mg_dst_cset);
up_write(&css_set_rwsem);
* Migration is committed, all target tasks are now on dst_csets.
* Nothing is sensitive to fork() after this point. Notify
* controllers that migration is complete.
tset.csets = &tset.dst_csets;
for_each_e_css(css, i, cgrp)
if (css->ss->attach)
css->ss->attach(css, &tset);
goto out_release_tset;
for_each_e_css(css, i, cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, &tset);
out_release_tset:
down_write(&css_set_rwsem);
list_splice_init(&tset.dst_csets, &tset.src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
up_write(&css_set_rwsem);
/**
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @dst_cgrp: the cgroup to attach to
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
*
* Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
*/
static int cgroup_attach_task(struct cgroup *dst_cgrp,
struct task_struct *leader, bool threadgroup)
{
LIST_HEAD(preloaded_csets);
struct task_struct *task;
int ret;
/* look up all src csets */
down_read(&css_set_rwsem);
rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
&preloaded_csets);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_read(&css_set_rwsem);
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
if (!ret)
ret = cgroup_migrate(leader, threadgroup, dst_cgrp);
cgroup_migrate_finish(&preloaded_csets);
return ret;
Tejun Heo
committed
static int cgroup_procs_write_permission(struct task_struct *task,
struct cgroup *dst_cgrp,
struct kernfs_open_file *of)
Tejun Heo
committed
{
const struct cred *cred = current_cred();
const struct cred *tcred = get_task_cred(task);
int ret = 0;
/*
* even if we're attaching all tasks in the thread group, we only
* need to check permissions on one of them.
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid))
ret = -EACCES;
Tejun Heo
committed
if (!ret && cgroup_on_dfl(dst_cgrp)) {
struct super_block *sb = of->file->f_path.dentry->d_sb;
struct cgroup *cgrp;
struct inode *inode;
down_read(&css_set_rwsem);
cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
up_read(&css_set_rwsem);
while (!cgroup_is_descendant(dst_cgrp, cgrp))
cgrp = cgroup_parent(cgrp);
ret = -ENOMEM;
inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
Tejun Heo
committed
if (inode) {
ret = inode_permission(inode, MAY_WRITE);
iput(inode);
}
}
Tejun Heo
committed
put_cred(tcred);
return ret;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup.
static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
struct cgroup *cgrp;
pid_t pid;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
percpu_down_write(&cgroup_threadgroup_rwsem);
rcu_read_lock();
Tejun Heo
committed
} else {
tsk = current;
Tejun Heo
committed
}
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
}
get_task_struct(tsk);
rcu_read_unlock();
Tejun Heo
committed
ret = cgroup_procs_write_permission(tsk, cgrp, of);
Tejun Heo
committed
if (!ret)
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
goto out_unlock_threadgroup;
out_unlock_rcu:
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroup_root *root;
int retval = 0;
for_each_root(root) {
struct cgroup *from_cgrp;
if (root == &cgrp_dfl_root)
continue;
down_read(&css_set_rwsem);
from_cgrp = task_cgroup_from_root(from, root);
up_read(&css_set_rwsem);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
return __cgroup_procs_write(of, buf, nbytes, off, false);
static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
return __cgroup_procs_write(of, buf, nbytes, off, true);
}
static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
struct cgroup *cgrp;
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
return -ENODEV;
spin_lock(&release_agent_path_lock);
strlcpy(cgrp->root->release_agent_path, strstrip(buf),
sizeof(cgrp->root->release_agent_path));
spin_unlock(&release_agent_path_lock);
cgroup_kn_unlock(of->kn);
static int cgroup_release_agent_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
spin_lock(&release_agent_path_lock);
seq_puts(seq, cgrp->root->release_agent_path);
spin_unlock(&release_agent_path_lock);
seq_putc(seq, '\n');
return 0;
}
static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
seq_puts(seq, "0\n");
return 0;
}
static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask)
Tejun Heo
committed
struct cgroup_subsys *ss;
bool printed = false;
int ssid;
for_each_subsys_which(ss, ssid, &ss_mask) {
if (printed)
seq_putc(seq, ' ');
seq_printf(seq, "%s", ss->name);
printed = true;
Tejun Heo
committed
if (printed)
seq_putc(seq, '\n');
Tejun Heo
committed
/* show controllers which are currently attached to the default hierarchy */
static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->root->subsys_mask &
~cgrp_dfl_root_inhibit_ss_mask);
Tejun Heo
committed
return 0;
Tejun Heo
committed
/* show controllers which are enabled from the parent */
static int cgroup_controllers_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control);
Tejun Heo
committed
return 0;
Tejun Heo
committed
/* show controllers which are enabled for a given cgroup's children */
static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->subtree_control);
Tejun Heo
committed
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
return 0;
}
/**
* cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
* @cgrp: root of the subtree to update csses for
*
* @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
* css associations need to be updated accordingly. This function looks up
* all css_sets which are attached to the subtree, creates the matching
* updated css_sets and migrates the tasks to the new ones.
*/
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
LIST_HEAD(preloaded_csets);
struct cgroup_subsys_state *css;
struct css_set *src_cset;
int ret;
lockdep_assert_held(&cgroup_mutex);
percpu_down_write(&cgroup_threadgroup_rwsem);
Tejun Heo
committed
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
/* look up all csses currently attached to @cgrp's subtree */
down_read(&css_set_rwsem);
css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
struct cgrp_cset_link *link;
/* self is not affected by child_subsys_mask change */
if (css->cgroup == cgrp)
continue;
list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
cgroup_migrate_add_src(link->cset, cgrp,
&preloaded_csets);
}
up_read(&css_set_rwsem);
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
if (ret)
goto out_finish;
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
struct task_struct *last_task = NULL, *task;
/* src_csets precede dst_csets, break on the first dst_cset */
if (!src_cset->mg_src_cgrp)
break;
/*
* All tasks in src_cset need to be migrated to the
* matching dst_cset. Empty it process by process. We
* walk tasks but migrate processes. The leader might even
* belong to a different cset but such src_cset would also
* be among the target src_csets because the default
* hierarchy enforces per-process membership.
*/
while (true) {
down_read(&css_set_rwsem);
task = list_first_entry_or_null(&src_cset->tasks,
struct task_struct, cg_list);
if (task) {
task = task->group_leader;
WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
get_task_struct(task);
}
up_read(&css_set_rwsem);
if (!task)
break;
/* guard against possible infinite loop */
if (WARN(last_task == task,
"cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
goto out_finish;
last_task = task;
ret = cgroup_migrate(task, true, src_cset->dfl_cgrp);
Tejun Heo
committed
put_task_struct(task);
if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
goto out_finish;
}
}
out_finish:
cgroup_migrate_finish(&preloaded_csets);
percpu_up_write(&cgroup_threadgroup_rwsem);
Tejun Heo
committed
return ret;
}
/* change the enabled child controllers for a cgroup in the default hierarchy */
static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
Tejun Heo
committed
{
unsigned long enable = 0, disable = 0;
unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
struct cgroup *cgrp, *child;
Tejun Heo
committed
struct cgroup_subsys *ss;
Tejun Heo
committed
int ssid, ret;
/*
* Parse input - space separated list of subsystem names prefixed
* with either + or -.
Tejun Heo
committed
*/
buf = strstrip(buf);
while ((tok = strsep(&buf, " "))) {
unsigned long tmp_ss_mask = ~cgrp_dfl_root_inhibit_ss_mask;
if (tok[0] == '\0')
continue;
for_each_subsys_which(ss, ssid, &tmp_ss_mask) {
if (!cgroup_ssid_enabled(ssid) ||
strcmp(tok + 1, ss->name))
Tejun Heo
committed
continue;
if (*tok == '+') {
Tejun Heo
committed
enable |= 1 << ssid;
disable &= ~(1 << ssid);
Tejun Heo
committed
} else if (*tok == '-') {
Tejun Heo
committed
disable |= 1 << ssid;
enable &= ~(1 << ssid);
Tejun Heo
committed
} else {
return -EINVAL;
}
break;
}
if (ssid == CGROUP_SUBSYS_COUNT)
return -EINVAL;
}
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
return -ENODEV;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (enable & (1 << ssid)) {
if (cgrp->subtree_control & (1 << ssid)) {
Tejun Heo
committed
enable &= ~(1 << ssid);
continue;
}
/* unavailable or not enabled on the parent? */
if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
(cgroup_parent(cgrp) &&
!(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) {
ret = -ENOENT;
goto out_unlock;
}
Tejun Heo
committed
} else if (disable & (1 << ssid)) {
if (!(cgrp->subtree_control & (1 << ssid))) {
Tejun Heo
committed
disable &= ~(1 << ssid);
continue;
}
/* a child has it enabled? */
cgroup_for_each_live_child(child, cgrp) {
if (child->subtree_control & (1 << ssid)) {
Tejun Heo
committed
ret = -EBUSY;
goto out_unlock;
Tejun Heo
committed
}
}
}
}
if (!enable && !disable) {
ret = 0;
goto out_unlock;
Tejun Heo
committed
}
/*
* Except for the root, subtree_control must be zero for a cgroup
Tejun Heo
committed
* with tasks so that child cgroups don't compete against tasks.
*/
if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
Tejun Heo
committed
ret = -EBUSY;
goto out_unlock;
}
/*
* Update subsys masks and calculate what needs to be done. More
* subsystems than specified may need to be enabled or disabled
* depending on subsystem dependencies.
*/
Tejun Heo
committed
old_sc = cgrp->subtree_control;
old_ss = cgrp->child_subsys_mask;
new_sc = (old_sc | enable) & ~disable;
new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc);
Tejun Heo
committed
css_enable = ~old_ss & new_ss;
css_disable = old_ss & ~new_ss;
enable |= css_enable;
disable |= css_disable;
/*
* Because css offlining is asynchronous, userland might try to
* re-enable the same controller while the previous instance is
* still around. In such cases, wait till it's gone using
* offline_waitq.
*/
for_each_subsys_which(ss, ssid, &css_enable) {
cgroup_for_each_live_child(child, cgrp) {
DEFINE_WAIT(wait);
if (!cgroup_css(child, ss))
continue;
cgroup_get(child);
prepare_to_wait(&child->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
cgroup_kn_unlock(of->kn);
schedule();
finish_wait(&child->offline_waitq, &wait);
cgroup_put(child);
return restart_syscall();
}
}
Tejun Heo
committed
cgrp->subtree_control = new_sc;
cgrp->child_subsys_mask = new_ss;
/*
* Create new csses or make the existing ones visible. A css is
* created invisible if it's being implicitly enabled through
* dependency. An invisible css is made visible when the userland
* explicitly enables it.
Tejun Heo
committed
*/
for_each_subsys(ss, ssid) {
if (!(enable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp) {
if (css_enable & (1 << ssid))
ret = create_css(child, ss,
cgrp->subtree_control & (1 << ssid));
else
ret = css_populate_dir(cgroup_css(child, ss),
NULL);
Tejun Heo
committed
if (ret)
goto err_undo_css;
}
}