Newer
Older
Tejun Heo
committed
* removed subsystems and rebind each subsystem.
*/
mutex_unlock(&cgroup_mutex);
Tejun Heo
committed
cgroup_clear_dir(cgrp, removed_mask);
unsigned long bit = 1UL << i;
/* We're binding this subsystem to this hierarchy */
BUG_ON(cgroup_css(cgrp, ss));
BUG_ON(!cgroup_css(cgroup_dummy_top, ss));
BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top);
rcu_assign_pointer(cgrp->subsys[i],
cgroup_css(cgroup_dummy_top, ss));
cgroup_css(cgrp, ss)->cgroup = cgrp;
ss->root = root;
ss->bind(cgroup_css(cgrp, ss));
/* refcount was already taken, and we're keeping it */
} else if (bit & removed_mask) {
/* We're removing this subsystem */
BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss));
BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp);
ss->bind(cgroup_css(cgroup_dummy_top, ss));
cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top;
RCU_INIT_POINTER(cgrp->subsys[i], NULL);
cgroup_subsys[i]->root = &cgroup_dummy_root;
static int cgroup_show_options(struct seq_file *seq,
struct kernfs_root *kf_root)
struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_subsys *ss;
for_each_subsys(ss, ssid)
if (root->subsys_mask & (1 << ssid))
seq_printf(seq, ",%s", ss->name);
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
seq_puts(seq, ",sane_behavior");
if (root->flags & CGRP_ROOT_NOPREFIX)
seq_puts(seq, ",noprefix");
if (root->flags & CGRP_ROOT_XATTR)
spin_lock(&release_agent_path_lock);
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
spin_unlock(&release_agent_path_lock);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
seq_puts(seq, ",clone_children");
if (strlen(root->name))
seq_printf(seq, ",name=%s", root->name);
return 0;
}
struct cgroup_sb_opts {
unsigned long subsys_mask;
char *release_agent;
bool cpuset_clone_children;
/* User explicitly requested empty subsystem */
bool none;
* Convert a hierarchy specifier into a bitmask of subsystems and
* flags. Call with cgroup_mutex held to protect the cgroup_subsys[]
* array. This function takes refcounts on subsystems to be used, unless it
* returns error, in which case no refcounts are taken.
static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data;
bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
struct cgroup_subsys *ss;
int i;
#ifdef CONFIG_CPUSETS
mask = ~(1UL << cpuset_cgrp_id);
memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
return -EINVAL;
if (!strcmp(token, "none")) {
/* Explicitly have no subsystems */
opts->none = true;
continue;
}
if (!strcmp(token, "all")) {
/* Mutually exclusive option 'all' + subsystem name */
if (one_ss)
return -EINVAL;
all_ss = true;
continue;
}
if (!strcmp(token, "__DEVEL__sane_behavior")) {
opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
continue;
}
if (!strcmp(token, "noprefix")) {
opts->flags |= CGRP_ROOT_NOPREFIX;
continue;
}
if (!strcmp(token, "clone_children")) {
opts->cpuset_clone_children = true;
opts->flags |= CGRP_ROOT_XATTR;
if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
continue;
}
if (!strncmp(token, "name=", 5)) {
const char *name = token + 5;
/* Can't specify an empty name */
if (!strlen(name))
return -EINVAL;
/* Must match [\w.-]+ */
for (i = 0; i < strlen(name); i++) {
char c = name[i];
if (isalnum(c))
continue;
if ((c == '.') || (c == '-') || (c == '_'))
continue;
return -EINVAL;
}
/* Specifying two names is forbidden */
if (opts->name)
return -EINVAL;
opts->name = kstrndup(name,
GFP_KERNEL);
if (!opts->name)
return -ENOMEM;
continue;
}
if (strcmp(token, ss->name))
continue;
if (ss->disabled)
continue;
/* Mutually exclusive option 'all' + subsystem name */
if (all_ss)
return -EINVAL;
set_bit(i, &opts->subsys_mask);
one_ss = true;
break;
}
if (i == CGROUP_SUBSYS_COUNT)
return -ENOENT;
}
/*
* If the 'all' option was specified select all the subsystems,
* otherwise if 'none', 'name=' and a subsystem name options
* were not specified, let's default to 'all'
if (all_ss || (!one_ss && !opts->none && !opts->name))
for_each_subsys(ss, i)
if (!ss->disabled)
set_bit(i, &opts->subsys_mask);
/* Consistency checks */
if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) ||
opts->cpuset_clone_children || opts->release_agent ||
opts->name) {
pr_err("cgroup: sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n");
return -EINVAL;
}
}
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
* the cpuset subsystem.
*/
if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
return -EINVAL;
/* Can't specify "none" and some subsystems */
if (opts->subsys_mask && opts->none)
return -EINVAL;
/*
* We either have to specify by name or by subsystems. (So all
* empty hierarchies must have a name).
*/
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
return 0;
}
static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_sb_opts opts;
unsigned long added_mask, removed_mask;
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
pr_err("cgroup: sane_behavior: remount is not allowed\n");
return -EINVAL;
}
mutex_lock(&cgroup_mutex);
/* See what subsystems are wanted */
ret = parse_cgroupfs_options(data, &opts);
if (ret)
goto out_unlock;
if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
task_tgid_nr(current), current->comm);
added_mask = opts.subsys_mask & ~root->subsys_mask;
removed_mask = root->subsys_mask & ~opts.subsys_mask;
/* Don't allow flags or name to change at remount */
if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
(opts.name && strcmp(opts.name, root->name))) {
pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
root->flags & CGRP_ROOT_OPTION_MASK, root->name);
ret = -EINVAL;
goto out_unlock;
}
Tejun Heo
committed
/* remounting is not allowed for populated hierarchies */
Tejun Heo
committed
if (!list_empty(&root->top_cgroup.children)) {
Tejun Heo
committed
ret = -EBUSY;
ret = rebind_subsystems(root, added_mask, removed_mask);
Tejun Heo
committed
if (ret)
if (opts.release_agent) {
spin_lock(&release_agent_path_lock);
strcpy(root->release_agent_path, opts.release_agent);
spin_unlock(&release_agent_path_lock);
}
mutex_unlock(&cgroup_mutex);
/*
* To reduce the fork() overhead for systems that are not actually using
* their cgroups capability, we don't maintain the lists running through
* each css_set to its tasks until we see the list actually used - in other
* words after the first mount.
*/
static bool use_task_css_set_links __read_mostly;
static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
down_write(&css_set_rwsem);
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
if (use_task_css_set_links)
goto out_unlock;
use_task_css_set_links = true;
/*
* We need tasklist_lock because RCU is not safe against
* while_each_thread(). Besides, a forking task that has passed
* cgroup_post_fork() without seeing use_task_css_set_links = 1
* is not guaranteed to have its child immediately visible in the
* tasklist if we walk through it with RCU.
*/
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
WARN_ON_ONCE(!list_empty(&p->cg_list) ||
task_css_set(p) != &init_css_set);
/*
* We should check if the process is exiting, otherwise
* it will race with cgroup_exit() in that the list
* entry won't be deleted though the process has exited.
* Do it while holding siglock so that we don't end up
* racing against cgroup_exit().
spin_lock_irq(&p->sighand->siglock);
if (!(p->flags & PF_EXITING))
list_add(&p->cg_list, &task_css_set(p)->tasks);
spin_unlock_irq(&p->sighand->siglock);
task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
out_unlock:
up_write(&css_set_rwsem);
static void init_cgroup_housekeeping(struct cgroup *cgrp)
{
INIT_LIST_HEAD(&cgrp->sibling);
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->release_list);
Ben Blum
committed
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
INIT_LIST_HEAD(&root->root_list);
Tejun Heo
committed
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
init_cgroup_housekeeping(cgrp);
static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
{
struct cgroupfs_root *root;
if (!opts->subsys_mask && !opts->none)
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
init_cgroup_root(root);
root->flags = opts->flags;
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
if (opts->cpuset_clone_children)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
static int cgroup_setup_root(struct cgroupfs_root *root, unsigned long ss_mask)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->top_cgroup;
struct css_set *cset;
int i, ret;
lockdep_assert_held(&cgroup_tree_mutex);
lockdep_assert_held(&cgroup_mutex);
ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL);
if (ret < 0)
root_cgrp->id = ret;
/*
* We're accessing css_set_count without locking css_set_rwsem here,
* but that's OK - it can only be increased by someone holding
* cgroup_lock, and that's us. The worst that can happen is that we
* have some link structures left over
*/
ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
if (ret)
/* ID 0 is reserved for dummy root, 1 for unified hierarchy */
ret = cgroup_init_root_id(root, 2, 0);
if (ret)
root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
KERNFS_ROOT_CREATE_DEACTIVATED,
root_cgrp);
if (IS_ERR(root->kf_root)) {
ret = PTR_ERR(root->kf_root);
goto exit_root_id;
}
root_cgrp->kn = root->kf_root->kn;
ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true);
if (ret)
ret = rebind_subsystems(root, ss_mask, 0);
/*
* There must be no failure case after here, since rebinding takes
* care of subsystems' refcounts, which are explicitly dropped in
* the failure exit path.
*/
list_add(&root->root_list, &cgroup_roots);
cgroup_root_count++;
/*
* Link the top cgroup in this hierarchy into all the css_set
* objects.
*/
down_write(&css_set_rwsem);
hash_for_each(css_set_table, i, cset, hlist)
link_css_set(&tmp_links, cset, root_cgrp);
up_write(&css_set_rwsem);
BUG_ON(!list_empty(&root_cgrp->children));
Tejun Heo
committed
BUG_ON(atomic_read(&root->nr_cgrps) != 1);
destroy_root:
kernfs_destroy_root(root->kf_root);
root->kf_root = NULL;
exit_root_id:
cgroup_exit_root_id(root);
free_cgrp_cset_links(&tmp_links);
return ret;
}
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
struct cgroup_sb_opts opts;
/*
* The first time anyone tries to mount a cgroup, enable the list
* linking each css_set to its tasks and fix up all existing tasks.
*/
if (!use_task_css_set_links)
cgroup_enable_task_cg_lists();
mutex_lock(&cgroup_tree_mutex);
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
goto out_unlock;
/* look for a matching existing root */
for_each_active_root(root) {
bool name_match = false;
/*
* If we asked for a name then it must match. Also, if
* name matches but sybsys_mask doesn't, we should fail.
* Remember whether name matched.
*/
if (opts.name) {
if (strcmp(opts.name, root->name))
continue;
name_match = true;
}
* If we asked for subsystems (or explicitly for no
* subsystems) then they must match.
if ((opts.subsys_mask || opts.none) &&
(opts.subsys_mask != root->subsys_mask)) {
if (!name_match)
continue;
ret = -EBUSY;
goto out_unlock;
}
Tejun Heo
committed
if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
ret = -EINVAL;
goto out_unlock;
} else {
pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
}
/*
* A root's lifetime is governed by its top cgroup. Zero
* ref indicate that the root is being destroyed. Wait for
* destruction to complete so that the subsystems are free.
* We can use wait_queue for the wait but this path is
* super cold. Let's just sleep for a bit and retry.
*/
if (!atomic_inc_not_zero(&root->top_cgroup.refcnt)) {
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgroup_tree_mutex);
kfree(opts.release_agent);
kfree(opts.name);
msleep(10);
goto retry;
}
ret = 0;
/* no such thing, create a new one */
root = cgroup_root_from_opts(&opts);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_unlock;
}
ret = cgroup_setup_root(root, opts.subsys_mask);
out_unlock:
kfree(opts.release_agent);
kfree(opts.name);
return ERR_PTR(ret);
dentry = kernfs_mount(fs_type, flags, root->kf_root);
if (IS_ERR(dentry))
return dentry;
}
static void cgroup_kill_sb(struct super_block *sb)
{
struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
}
static struct file_system_type cgroup_fs_type = {
.name = "cgroup",
.kill_sb = cgroup_kill_sb,
};
static struct kobject *cgroup_kobj;
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Determine @task's cgroup on the first (the one with the lowest non-zero
* hierarchy_id) cgroup hierarchy and copy its path into @buf. This
* function grabs cgroup_mutex and shouldn't be used inside locks used by
* cgroup controller callbacks.
*
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
struct cgroupfs_root *root;
struct cgroup *cgrp;
mutex_lock(&cgroup_mutex);
down_read(&css_set_rwsem);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
if (root) {
cgrp = task_cgroup_from_root(task, root);
} else {
/* if no hierarchy exists, everyone is in "/" */
if (strlcpy(buf, "/", buflen) < buflen)
path = buf;
up_read(&css_set_rwsem);
mutex_unlock(&cgroup_mutex);
EXPORT_SYMBOL_GPL(task_cgroup_path);
/* used to track tasks and other necessary states during migration */
Tejun Heo
committed
struct cgroup_taskset {
/* the src and dst cset list running through cset->mg_node */
struct list_head src_csets;
struct list_head dst_csets;
/*
* Fields for cgroup_taskset_*() iteration.
*
* Before migration is committed, the target migration tasks are on
* ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of
* the csets on ->dst_csets. ->csets point to either ->src_csets
* or ->dst_csets depending on whether migration is committed.
*
* ->cur_csets and ->cur_task point to the current task position
* during iteration.
*/
struct list_head *csets;
struct css_set *cur_cset;
struct task_struct *cur_task;
Tejun Heo
committed
};
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
*
* @tset iteration is initialized and the first task is returned.
*/
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;
return cgroup_taskset_next(tset);
Tejun Heo
committed
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
Tejun Heo
committed
while (&cset->mg_node != tset->csets) {
if (!task)
task = list_first_entry(&cset->mg_tasks,
struct task_struct, cg_list);
else
task = list_next_entry(task, cg_list);
Tejun Heo
committed
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;
return task;
}
cset = list_next_entry(cset, mg_node);
task = NULL;
}
return NULL;
Tejun Heo
committed
}
* cgroup_task_migrate - move a task from one cgroup to another.
* @old_cgrp; the cgroup @tsk is being migrated from
* @tsk: the task being migrated
* @new_cset: the new css_set @tsk is being attached to
* Must be called with cgroup_mutex, threadgroup and css_set_rwsem locked.
static void cgroup_task_migrate(struct cgroup *old_cgrp,
struct task_struct *tsk,
struct css_set *new_cset)
struct css_set *old_cset;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_rwsem);
* We are synchronized through threadgroup_lock() against PF_EXITING
* setting such that we can't race against cgroup_exit() changing the
* css_set to init_css_set and dropping the old one.
Frederic Weisbecker
committed
WARN_ON_ONCE(tsk->flags & PF_EXITING);
old_cset = task_css_set(tsk);
get_css_set(new_cset);
rcu_assign_pointer(tsk->cgroups, new_cset);
list_move(&tsk->cg_list, &new_cset->mg_tasks);
* We just gained a reference on old_cset by taking it from the
* task. As trading it for new_cset is protected by cgroup_mutex,
* we're safe to drop it here; it will be freed under RCU.
set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
put_css_set_locked(old_cset, false);
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
* Call holding cgroup_mutex and the group_rwsem of the leader. Will take
* task_lock of @tsk or each thread in the threadgroup individually in turn.
static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader,
struct cgroup_taskset tset = {
.src_csets = LIST_HEAD_INIT(tset.src_csets),
.dst_csets = LIST_HEAD_INIT(tset.dst_csets),
.csets = &tset.src_csets,
};
struct cgroup_subsys_state *css, *failed_css = NULL;
struct css_set *cset, *tmp_cset;
struct task_struct *task, *tmp_task;
int i, ret;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
down_write(&css_set_rwsem);
rcu_read_lock();
struct cgroup *src_cgrp;
/* @task either already exited or can't exit until the end */
if (task->flags & PF_EXITING)
goto next;
cset = task_css_set(task);
src_cgrp = task_cgroup_from_root(task, cgrp->root);
/* nothing to do if this task is already in the cgroup */
if (src_cgrp == cgrp)
goto next;
if (!cset->mg_src_cgrp) {
WARN_ON(!list_empty(&cset->mg_tasks));
WARN_ON(!list_empty(&cset->mg_node));
cset->mg_src_cgrp = src_cgrp;
list_add(&cset->mg_node, &tset.src_csets);
get_css_set(cset);
}
list_move(&task->cg_list, &cset->mg_tasks);
next:
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset.src_csets))
return 0;
/*
* step 1: check that we can legitimately attach to the cgroup.
*/
for_each_css(css, i, cgrp) {
if (css->ss->can_attach) {
ret = css->ss->can_attach(css, &tset);
if (ret) {
goto out_cancel_attach;
}
}
}
/*
* step 2: make sure css_sets exist for all threads to be migrated.
* we use find_css_set, which allocates a new one if necessary.
*/
list_for_each_entry(cset, &tset.src_csets, mg_node) {
struct css_set *dst_cset;
dst_cset = find_css_set(cset, cgrp);
if (!dst_cset) {
goto out_release_tset;
if (list_empty(&dst_cset->mg_node))
list_add(&dst_cset->mg_node, &tset.dst_csets);
else
put_css_set(dst_cset, false);
cset->mg_dst_cset = dst_cset;
* step 3: now that we're guaranteed success wrt the css_sets,
* proceed to move all tasks to the new cgroup. There are no
* failure cases after here, so this is the commit point.
down_write(&css_set_rwsem);
list_for_each_entry(cset, &tset.src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
cgroup_task_migrate(cset->mg_src_cgrp, task,
cset->mg_dst_cset);
up_write(&css_set_rwsem);
/* migration is committed, all target tasks are now on dst_csets */
tset.csets = &tset.dst_csets;
/* nothing is sensitive to fork() after this point */
* step 4: do subsystem attach callbacks.
for_each_css(css, i, cgrp)
if (css->ss->attach)
css->ss->attach(css, &tset);
goto out_release_tset;
for_each_css(css, i, cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, &tset);
out_release_tset:
down_write(&css_set_rwsem);
list_splice_init(&tset.dst_csets, &tset.src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
list_splice_init(&cset->mg_tasks, &cset->tasks);
cset->mg_dst_cset = NULL;
cset->mg_src_cgrp = NULL;
list_del_init(&cset->mg_node);
put_css_set_locked(cset, false);
}
up_write(&css_set_rwsem);
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup; may take task_lock of task.
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
retry_find_task:
rcu_read_lock();
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroupfs_root *root;
int retval = 0;
for_each_active_root(root) {
struct cgroup *from_cgrp;
down_read(&css_set_rwsem);
from_cgrp = task_cgroup_from_root(from, root);
up_read(&css_set_rwsem);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)