Newer
Older
Tejun Heo
committed
*/
mutex_unlock(&cgroup_mutex);
Tejun Heo
committed
cgroup_clear_dir(cgrp, removed_mask);
unsigned long bit = 1UL << i;
/* We're binding this subsystem to this hierarchy */
BUG_ON(cgroup_css(cgrp, ss));
BUG_ON(!cgroup_css(cgroup_dummy_top, ss));
BUG_ON(cgroup_css(cgroup_dummy_top, ss)->cgroup != cgroup_dummy_top);
rcu_assign_pointer(cgrp->subsys[i],
cgroup_css(cgroup_dummy_top, ss));
cgroup_css(cgrp, ss)->cgroup = cgrp;
ss->root = root;
ss->bind(cgroup_css(cgrp, ss));
/* refcount was already taken, and we're keeping it */
} else if (bit & removed_mask) {
/* We're removing this subsystem */
BUG_ON(cgroup_css(cgrp, ss) != cgroup_css(cgroup_dummy_top, ss));
BUG_ON(cgroup_css(cgrp, ss)->cgroup != cgrp);
ss->bind(cgroup_css(cgroup_dummy_top, ss));
cgroup_css(cgroup_dummy_top, ss)->cgroup = cgroup_dummy_top;
RCU_INIT_POINTER(cgrp->subsys[i], NULL);
cgroup_subsys[i]->root = &cgroup_dummy_root;
static int cgroup_show_options(struct seq_file *seq,
struct kernfs_root *kf_root)
struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_subsys *ss;
for_each_subsys(ss, ssid)
if (root->subsys_mask & (1 << ssid))
seq_printf(seq, ",%s", ss->name);
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
seq_puts(seq, ",sane_behavior");
if (root->flags & CGRP_ROOT_NOPREFIX)
seq_puts(seq, ",noprefix");
if (root->flags & CGRP_ROOT_XATTR)
spin_lock(&release_agent_path_lock);
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
spin_unlock(&release_agent_path_lock);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
seq_puts(seq, ",clone_children");
if (strlen(root->name))
seq_printf(seq, ",name=%s", root->name);
return 0;
}
struct cgroup_sb_opts {
unsigned long subsys_mask;
char *release_agent;
bool cpuset_clone_children;
/* User explicitly requested empty subsystem */
bool none;
* Convert a hierarchy specifier into a bitmask of subsystems and
* flags. Call with cgroup_mutex held to protect the cgroup_subsys[]
* array. This function takes refcounts on subsystems to be used, unless it
* returns error, in which case no refcounts are taken.
static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data;
bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
struct cgroup_subsys *ss;
int i;
#ifdef CONFIG_CPUSETS
mask = ~(1UL << cpuset_cgrp_id);
memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
return -EINVAL;
if (!strcmp(token, "none")) {
/* Explicitly have no subsystems */
opts->none = true;
continue;
}
if (!strcmp(token, "all")) {
/* Mutually exclusive option 'all' + subsystem name */
if (one_ss)
return -EINVAL;
all_ss = true;
continue;
}
if (!strcmp(token, "__DEVEL__sane_behavior")) {
opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
continue;
}
if (!strcmp(token, "noprefix")) {
opts->flags |= CGRP_ROOT_NOPREFIX;
continue;
}
if (!strcmp(token, "clone_children")) {
opts->cpuset_clone_children = true;
opts->flags |= CGRP_ROOT_XATTR;
if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
continue;
}
if (!strncmp(token, "name=", 5)) {
const char *name = token + 5;
/* Can't specify an empty name */
if (!strlen(name))
return -EINVAL;
/* Must match [\w.-]+ */
for (i = 0; i < strlen(name); i++) {
char c = name[i];
if (isalnum(c))
continue;
if ((c == '.') || (c == '-') || (c == '_'))
continue;
return -EINVAL;
}
/* Specifying two names is forbidden */
if (opts->name)
return -EINVAL;
opts->name = kstrndup(name,
GFP_KERNEL);
if (!opts->name)
return -ENOMEM;
continue;
}
if (strcmp(token, ss->name))
continue;
if (ss->disabled)
continue;
/* Mutually exclusive option 'all' + subsystem name */
if (all_ss)
return -EINVAL;
set_bit(i, &opts->subsys_mask);
one_ss = true;
break;
}
if (i == CGROUP_SUBSYS_COUNT)
return -ENOENT;
}
/*
* If the 'all' option was specified select all the subsystems,
* otherwise if 'none', 'name=' and a subsystem name options
* were not specified, let's default to 'all'
if (all_ss || (!one_ss && !opts->none && !opts->name))
for_each_subsys(ss, i)
if (!ss->disabled)
set_bit(i, &opts->subsys_mask);
/* Consistency checks */
if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
if ((opts->flags & (CGRP_ROOT_NOPREFIX | CGRP_ROOT_XATTR)) ||
opts->cpuset_clone_children || opts->release_agent ||
opts->name) {
pr_err("cgroup: sane_behavior: noprefix, xattr, clone_children, release_agent and name are not allowed\n");
return -EINVAL;
}
}
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
* the cpuset subsystem.
*/
if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
return -EINVAL;
/* Can't specify "none" and some subsystems */
if (opts->subsys_mask && opts->none)
return -EINVAL;
/*
* We either have to specify by name or by subsystems. (So all
* empty hierarchies must have a name).
*/
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
return 0;
}
static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_sb_opts opts;
unsigned long added_mask, removed_mask;
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
pr_err("cgroup: sane_behavior: remount is not allowed\n");
return -EINVAL;
}
mutex_lock(&cgroup_mutex);
/* See what subsystems are wanted */
ret = parse_cgroupfs_options(data, &opts);
if (ret)
goto out_unlock;
if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
task_tgid_nr(current), current->comm);
added_mask = opts.subsys_mask & ~root->subsys_mask;
removed_mask = root->subsys_mask & ~opts.subsys_mask;
/* Don't allow flags or name to change at remount */
if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
(opts.name && strcmp(opts.name, root->name))) {
pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
root->flags & CGRP_ROOT_OPTION_MASK, root->name);
ret = -EINVAL;
goto out_unlock;
}
Tejun Heo
committed
/* remounting is not allowed for populated hierarchies */
Tejun Heo
committed
if (!list_empty(&root->top_cgroup.children)) {
Tejun Heo
committed
ret = -EBUSY;
ret = rebind_subsystems(root, added_mask, removed_mask);
Tejun Heo
committed
if (ret)
if (opts.release_agent) {
spin_lock(&release_agent_path_lock);
strcpy(root->release_agent_path, opts.release_agent);
spin_unlock(&release_agent_path_lock);
}
mutex_unlock(&cgroup_mutex);
/*
* To reduce the fork() overhead for systems that are not actually using
* their cgroups capability, we don't maintain the lists running through
* each css_set to its tasks until we see the list actually used - in other
* words after the first mount.
*/
static bool use_task_css_set_links __read_mostly;
static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
down_write(&css_set_rwsem);
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
if (use_task_css_set_links)
goto out_unlock;
use_task_css_set_links = true;
/*
* We need tasklist_lock because RCU is not safe against
* while_each_thread(). Besides, a forking task that has passed
* cgroup_post_fork() without seeing use_task_css_set_links = 1
* is not guaranteed to have its child immediately visible in the
* tasklist if we walk through it with RCU.
*/
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
WARN_ON_ONCE(!list_empty(&p->cg_list) ||
task_css_set(p) != &init_css_set);
/*
* We should check if the process is exiting, otherwise
* it will race with cgroup_exit() in that the list
* entry won't be deleted though the process has exited.
*/
if (!(p->flags & PF_EXITING))
list_add(&p->cg_list, &task_css_set(p)->tasks);
task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
out_unlock:
up_write(&css_set_rwsem);
static void init_cgroup_housekeeping(struct cgroup *cgrp)
{
INIT_LIST_HEAD(&cgrp->sibling);
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->release_list);
Ben Blum
committed
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
INIT_LIST_HEAD(&root->root_list);
Tejun Heo
committed
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
init_cgroup_housekeeping(cgrp);
static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
{
struct cgroupfs_root *root;
if (!opts->subsys_mask && !opts->none)
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
init_cgroup_root(root);
root->flags = opts->flags;
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
if (opts->cpuset_clone_children)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
static int cgroup_setup_root(struct cgroupfs_root *root, unsigned long ss_mask)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->top_cgroup;
struct css_set *cset;
int i, ret;
lockdep_assert_held(&cgroup_tree_mutex);
lockdep_assert_held(&cgroup_mutex);
ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL);
if (ret < 0)
root_cgrp->id = ret;
/*
* We're accessing css_set_count without locking css_set_rwsem here,
* but that's OK - it can only be increased by someone holding
* cgroup_lock, and that's us. The worst that can happen is that we
* have some link structures left over
*/
ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
if (ret)
/* ID 0 is reserved for dummy root, 1 for unified hierarchy */
ret = cgroup_init_root_id(root, 2, 0);
if (ret)
root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
KERNFS_ROOT_CREATE_DEACTIVATED,
root_cgrp);
if (IS_ERR(root->kf_root)) {
ret = PTR_ERR(root->kf_root);
goto exit_root_id;
}
root_cgrp->kn = root->kf_root->kn;
ret = cgroup_addrm_files(root_cgrp, cgroup_base_files, true);
if (ret)
ret = rebind_subsystems(root, ss_mask, 0);
/*
* There must be no failure case after here, since rebinding takes
* care of subsystems' refcounts, which are explicitly dropped in
* the failure exit path.
*/
list_add(&root->root_list, &cgroup_roots);
cgroup_root_count++;
/*
* Link the top cgroup in this hierarchy into all the css_set
* objects.
*/
down_write(&css_set_rwsem);
hash_for_each(css_set_table, i, cset, hlist)
link_css_set(&tmp_links, cset, root_cgrp);
up_write(&css_set_rwsem);
BUG_ON(!list_empty(&root_cgrp->children));
Tejun Heo
committed
BUG_ON(atomic_read(&root->nr_cgrps) != 1);
destroy_root:
kernfs_destroy_root(root->kf_root);
root->kf_root = NULL;
exit_root_id:
cgroup_exit_root_id(root);
free_cgrp_cset_links(&tmp_links);
return ret;
}
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
struct cgroup_sb_opts opts;
/*
* The first time anyone tries to mount a cgroup, enable the list
* linking each css_set to its tasks and fix up all existing tasks.
*/
if (!use_task_css_set_links)
cgroup_enable_task_cg_lists();
mutex_lock(&cgroup_tree_mutex);
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
goto out_unlock;
/* look for a matching existing root */
for_each_active_root(root) {
bool name_match = false;
/*
* If we asked for a name then it must match. Also, if
* name matches but sybsys_mask doesn't, we should fail.
* Remember whether name matched.
*/
if (opts.name) {
if (strcmp(opts.name, root->name))
continue;
name_match = true;
}
* If we asked for subsystems (or explicitly for no
* subsystems) then they must match.
if ((opts.subsys_mask || opts.none) &&
(opts.subsys_mask != root->subsys_mask)) {
if (!name_match)
continue;
ret = -EBUSY;
goto out_unlock;
}
Tejun Heo
committed
if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
ret = -EINVAL;
goto out_unlock;
} else {
pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
}
/*
* A root's lifetime is governed by its top cgroup. Zero
* ref indicate that the root is being destroyed. Wait for
* destruction to complete so that the subsystems are free.
* We can use wait_queue for the wait but this path is
* super cold. Let's just sleep for a bit and retry.
*/
if (!atomic_inc_not_zero(&root->top_cgroup.refcnt)) {
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgroup_tree_mutex);
kfree(opts.release_agent);
kfree(opts.name);
msleep(10);
goto retry;
}
ret = 0;
/* no such thing, create a new one */
root = cgroup_root_from_opts(&opts);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
goto out_unlock;
}
ret = cgroup_setup_root(root, opts.subsys_mask);
out_unlock:
kfree(opts.release_agent);
kfree(opts.name);
return ERR_PTR(ret);
dentry = kernfs_mount(fs_type, flags, root->kf_root);
if (IS_ERR(dentry))
return dentry;
}
static void cgroup_kill_sb(struct super_block *sb)
{
struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
struct cgroupfs_root *root = cgroup_root_from_kf(kf_root);
}
static struct file_system_type cgroup_fs_type = {
.name = "cgroup",
.kill_sb = cgroup_kill_sb,
};
static struct kobject *cgroup_kobj;
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Determine @task's cgroup on the first (the one with the lowest non-zero
* hierarchy_id) cgroup hierarchy and copy its path into @buf. This
* function grabs cgroup_mutex and shouldn't be used inside locks used by
* cgroup controller callbacks.
*
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
struct cgroupfs_root *root;
struct cgroup *cgrp;
mutex_lock(&cgroup_mutex);
down_read(&css_set_rwsem);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
if (root) {
cgrp = task_cgroup_from_root(task, root);
} else {
/* if no hierarchy exists, everyone is in "/" */
if (strlcpy(buf, "/", buflen) < buflen)
path = buf;
up_read(&css_set_rwsem);
mutex_unlock(&cgroup_mutex);
EXPORT_SYMBOL_GPL(task_cgroup_path);
Tejun Heo
committed
/*
* Control Group taskset
*/
struct task_and_cgroup {
struct task_struct *task;
struct cgroup *cgrp;
Tejun Heo
committed
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
struct cgroup_taskset {
struct task_and_cgroup single;
struct flex_array *tc_array;
int tc_array_len;
int idx;
};
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
*
* @tset iteration is initialized and the first task is returned.
*/
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
{
if (tset->tc_array) {
tset->idx = 0;
return cgroup_taskset_next(tset);
} else {
return tset->single.task;
}
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
{
struct task_and_cgroup *tc;
if (!tset->tc_array || tset->idx >= tset->tc_array_len)
return NULL;
tc = flex_array_get(tset->tc_array, tset->idx++);
return tc->task;
}
* cgroup_task_migrate - move a task from one cgroup to another.
* @old_cgrp; the cgroup @tsk is being migrated from
* @tsk: the task being migrated
* @new_cset: the new css_set @tsk is being attached to
* Must be called with cgroup_mutex, threadgroup and css_set_rwsem locked.
static void cgroup_task_migrate(struct cgroup *old_cgrp,
struct task_struct *tsk,
struct css_set *new_cset)
struct css_set *old_cset;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_rwsem);
* We are synchronized through threadgroup_lock() against PF_EXITING
* setting such that we can't race against cgroup_exit() changing the
* css_set to init_css_set and dropping the old one.
Frederic Weisbecker
committed
WARN_ON_ONCE(tsk->flags & PF_EXITING);
old_cset = task_css_set(tsk);
rcu_assign_pointer(tsk->cgroups, new_cset);
list_move(&tsk->cg_list, &new_cset->tasks);
* We just gained a reference on old_cset by taking it from the
* task. As trading it for new_cset is protected by cgroup_mutex,
* we're safe to drop it here; it will be freed under RCU.
set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
put_css_set_locked(old_cset, false);
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
* Call holding cgroup_mutex and the group_rwsem of the leader. Will take
* task_lock of @tsk or each thread in the threadgroup individually in turn.
static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *leader,
struct cgroup_subsys_state *css, *failed_css = NULL;
struct task_and_cgroup *tc;
Tejun Heo
committed
struct cgroup_taskset tset = { };
/*
* step 0: in order to do expensive, possibly blocking operations for
* every thread, we cannot iterate the thread group list, since it needs
* rcu or tasklist locked. instead, build an array of all threads in the
* group - group_rwsem prevents new threads from appearing, and if
* threads exit, this will just be an over-estimate.
if (threadgroup)
group_size = get_nr_threads(leader);
else
group_size = 1;
/* flex_array supports very large thread-groups better than kmalloc. */
group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
/* pre-allocate to guarantee space while iterating in rcu read-side. */
ret = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
if (ret)
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
down_read(&css_set_rwsem);
rcu_read_lock();
struct task_and_cgroup ent;
/* @task either already exited or can't exit until the end */
if (task->flags & PF_EXITING)
goto next;
/* as per above, nr_threads may decrease, but not increase. */
BUG_ON(i >= group_size);
ent.task = task;
ent.cgrp = task_cgroup_from_root(task, root);
/* nothing to do if this task is already in the cgroup */
if (ent.cgrp == cgrp)
goto next;
/*
* saying GFP_ATOMIC has no effect here because we did prealloc
* earlier, but it's good form to communicate our expectations.
*/
ret = flex_array_put(group, i, &ent, GFP_ATOMIC);
BUG_ON(ret != 0);
next:
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_read(&css_set_rwsem);
/* remember the number of threads in the array for later. */
group_size = i;
Tejun Heo
committed
tset.tc_array = group;
tset.tc_array_len = group_size;
/* methods shouldn't be called if no task is actually migrating */
if (!group_size)
goto out_free_group_list;
/*
* step 1: check that we can legitimately attach to the cgroup.
*/
for_each_css(css, i, cgrp) {
if (css->ss->can_attach) {
ret = css->ss->can_attach(css, &tset);
if (ret) {
goto out_cancel_attach;
}
}
}
/*
* step 2: make sure css_sets exist for all threads to be migrated.
* we use find_css_set, which allocates a new one if necessary.
*/
for (i = 0; i < group_size; i++) {
struct css_set *old_cset;
tc = flex_array_get(group, i);
old_cset = task_css_set(tc->task);
tc->cset = find_css_set(old_cset, cgrp);
if (!tc->cset) {
goto out_put_css_set_refs;
* step 3: now that we're guaranteed success wrt the css_sets,
* proceed to move all tasks to the new cgroup. There are no
* failure cases after here, so this is the commit point.
down_write(&css_set_rwsem);
tc = flex_array_get(group, i);
cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
up_write(&css_set_rwsem);
/* nothing is sensitive to fork() after this point. */
/*
* step 4: do subsystem attach callbacks.
for_each_css(css, i, cgrp)
if (css->ss->attach)
css->ss->attach(css, &tset);
/*
* step 5: success! and cleanup
*/
out_put_css_set_refs:
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
put_css_set(tc->cset, false);
for_each_css(css, i, cgrp) {
if (css == failed_css)
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, &tset);
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup; may take task_lock of task.
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
retry_find_task:
rcu_read_lock();
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroupfs_root *root;
int retval = 0;
for_each_active_root(root) {
struct cgroup *from_cgrp;
down_read(&css_set_rwsem);
from_cgrp = task_cgroup_from_root(from, root);
up_read(&css_set_rwsem);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static int cgroup_tasks_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 pid)