Newer
Older
static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long added_mask, unsigned removed_mask)
struct cgroup *cgrp = &root->top_cgroup;
unsigned long pinned = 0;
Tejun Heo
committed
int i, ret;
BUG_ON(!mutex_is_locked(&cgroup_root_mutex));
/* Check that any added subsystems are currently free */
if (!(added_mask & (1 << i)))
/* is the subsystem mounted elsewhere? */
if (ss->root != &cgroup_dummy_root) {
ret = -EBUSY;
goto out_put;
}
/* pin the module */
if (!try_module_get(ss->module)) {
ret = -ENOENT;
goto out_put;
pinned |= 1 << i;
}
/* subsys could be missing if unloaded between parsing and here */
if (added_mask != pinned) {
ret = -ENOENT;
goto out_put;
Tejun Heo
committed
ret = cgroup_populate_dir(cgrp, added_mask);
if (ret)
Tejun Heo
committed
/*
* Nothing can fail from this point on. Remove files for the
* removed subsystems and rebind each subsystem.
*/
cgroup_clear_dir(cgrp, removed_mask);
unsigned long bit = 1UL << i;
/* We're binding this subsystem to this hierarchy */
BUG_ON(cgrp->subsys[i]);
BUG_ON(!cgroup_dummy_top->subsys[i]);
BUG_ON(cgroup_dummy_top->subsys[i]->cgroup != cgroup_dummy_top);
cgrp->subsys[i] = cgroup_dummy_top->subsys[i];
cgrp->subsys[i]->cgroup = cgrp;
list_move(&ss->sibling, &root->subsys_list);
ss->root = root;
/* refcount was already taken, and we're keeping it */
} else if (bit & removed_mask) {
/* We're removing this subsystem */
BUG_ON(cgrp->subsys[i] != cgroup_dummy_top->subsys[i]);
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
ss->bind(cgroup_dummy_top);
cgroup_dummy_top->subsys[i]->cgroup = cgroup_dummy_top;
cgrp->subsys[i] = NULL;
cgroup_subsys[i]->root = &cgroup_dummy_root;
list_move(&ss->sibling, &cgroup_dummy_root.subsys_list);
/* subsystem is now free - drop reference on module */
module_put(ss->module);
/*
* Mark @root has finished binding subsystems. @root->subsys_mask
* now matches the bound subsystems.
*/
root->flags |= CGRP_ROOT_SUBSYS_BOUND;
out_put:
for_each_subsys(ss, i)
if (pinned & (1 << i))
module_put(ss->module);
return ret;
static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
struct cgroup_subsys *ss;
for_each_root_subsys(root, ss)
seq_printf(seq, ",%s", ss->name);
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
seq_puts(seq, ",sane_behavior");
if (root->flags & CGRP_ROOT_NOPREFIX)
seq_puts(seq, ",noprefix");
if (root->flags & CGRP_ROOT_XATTR)
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags))
seq_puts(seq, ",clone_children");
if (strlen(root->name))
seq_printf(seq, ",name=%s", root->name);
return 0;
}
struct cgroup_sb_opts {
unsigned long subsys_mask;
char *release_agent;
bool cpuset_clone_children;
/* User explicitly requested empty subsystem */
bool none;
struct cgroupfs_root *new_root;
* Convert a hierarchy specifier into a bitmask of subsystems and
* flags. Call with cgroup_mutex held to protect the cgroup_subsys[]
* array. This function takes refcounts on subsystems to be used, unless it
* returns error, in which case no refcounts are taken.
static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data;
bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
struct cgroup_subsys *ss;
int i;
#ifdef CONFIG_CPUSETS
mask = ~(1UL << cpuset_subsys_id);
#endif
memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
return -EINVAL;
if (!strcmp(token, "none")) {
/* Explicitly have no subsystems */
opts->none = true;
continue;
}
if (!strcmp(token, "all")) {
/* Mutually exclusive option 'all' + subsystem name */
if (one_ss)
return -EINVAL;
all_ss = true;
continue;
}
if (!strcmp(token, "__DEVEL__sane_behavior")) {
opts->flags |= CGRP_ROOT_SANE_BEHAVIOR;
continue;
}
if (!strcmp(token, "noprefix")) {
opts->flags |= CGRP_ROOT_NOPREFIX;
continue;
}
if (!strcmp(token, "clone_children")) {
opts->cpuset_clone_children = true;
opts->flags |= CGRP_ROOT_XATTR;
if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
continue;
}
if (!strncmp(token, "name=", 5)) {
const char *name = token + 5;
/* Can't specify an empty name */
if (!strlen(name))
return -EINVAL;
/* Must match [\w.-]+ */
for (i = 0; i < strlen(name); i++) {
char c = name[i];
if (isalnum(c))
continue;
if ((c == '.') || (c == '-') || (c == '_'))
continue;
return -EINVAL;
}
/* Specifying two names is forbidden */
if (opts->name)
return -EINVAL;
opts->name = kstrndup(name,
GFP_KERNEL);
if (!opts->name)
return -ENOMEM;
continue;
}
if (strcmp(token, ss->name))
continue;
if (ss->disabled)
continue;
/* Mutually exclusive option 'all' + subsystem name */
if (all_ss)
return -EINVAL;
set_bit(i, &opts->subsys_mask);
one_ss = true;
break;
}
if (i == CGROUP_SUBSYS_COUNT)
return -ENOENT;
}
/*
* If the 'all' option was specified select all the subsystems,
* otherwise if 'none', 'name=' and a subsystem name options
* were not specified, let's default to 'all'
if (all_ss || (!one_ss && !opts->none && !opts->name))
for_each_subsys(ss, i)
if (!ss->disabled)
set_bit(i, &opts->subsys_mask);
/* Consistency checks */
if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
if (opts->flags & CGRP_ROOT_NOPREFIX) {
pr_err("cgroup: sane_behavior: noprefix is not allowed\n");
return -EINVAL;
}
if (opts->cpuset_clone_children) {
pr_err("cgroup: sane_behavior: clone_children is not allowed\n");
return -EINVAL;
}
}
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
* the cpuset subsystem.
*/
if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
return -EINVAL;
/* Can't specify "none" and some subsystems */
if (opts->subsys_mask && opts->none)
return -EINVAL;
/*
* We either have to specify by name or by subsystems. (So all
* empty hierarchies must have a name).
*/
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
return 0;
}
static int cgroup_remount(struct super_block *sb, int *flags, char *data)
{
int ret = 0;
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup_sb_opts opts;
unsigned long added_mask, removed_mask;
if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
pr_err("cgroup: sane_behavior: remount is not allowed\n");
return -EINVAL;
}
mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
/* See what subsystems are wanted */
ret = parse_cgroupfs_options(data, &opts);
if (ret)
goto out_unlock;
if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
task_tgid_nr(current), current->comm);
added_mask = opts.subsys_mask & ~root->subsys_mask;
removed_mask = root->subsys_mask & ~opts.subsys_mask;
/* Don't allow flags or name to change at remount */
if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
(opts.name && strcmp(opts.name, root->name))) {
pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
root->flags & CGRP_ROOT_OPTION_MASK, root->name);
ret = -EINVAL;
goto out_unlock;
}
Tejun Heo
committed
/* remounting is not allowed for populated hierarchies */
if (root->number_of_cgroups > 1) {
ret = -EBUSY;
goto out_unlock;
}
ret = rebind_subsystems(root, added_mask, removed_mask);
Tejun Heo
committed
if (ret)
if (opts.release_agent)
strcpy(root->release_agent_path, opts.release_agent);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
static const struct super_operations cgroup_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = cgroup_show_options,
.remount_fs = cgroup_remount,
};
static void init_cgroup_housekeeping(struct cgroup *cgrp)
{
INIT_LIST_HEAD(&cgrp->sibling);
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->cset_links);
INIT_LIST_HEAD(&cgrp->release_list);
Ben Blum
committed
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock);
static void init_cgroup_root(struct cgroupfs_root *root)
{
struct cgroup *cgrp = &root->top_cgroup;
INIT_LIST_HEAD(&root->subsys_list);
INIT_LIST_HEAD(&root->root_list);
root->number_of_cgroups = 1;
cgrp->root = root;
RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
init_cgroup_housekeeping(cgrp);
static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end)
{
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&cgroup_root_mutex);
id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, start, end,
GFP_KERNEL);
if (id < 0)
return id;
root->hierarchy_id = id;
return 0;
}
static void cgroup_exit_root_id(struct cgroupfs_root *root)
{
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&cgroup_root_mutex);
idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
}
static int cgroup_test_super(struct super_block *sb, void *data)
{
struct cgroup_sb_opts *opts = data;
struct cgroupfs_root *root = sb->s_fs_info;
/* If we asked for a name then it must match */
if (opts->name && strcmp(opts->name, root->name))
return 0;
/*
* If we asked for subsystems (or explicitly for no
* subsystems) then they must match
*/
if ((opts->subsys_mask || opts->none)
&& (opts->subsys_mask != root->subsys_mask))
return 0;
return 1;
}
static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
{
struct cgroupfs_root *root;
if (!opts->subsys_mask && !opts->none)
return NULL;
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
init_cgroup_root(root);
/*
* We need to set @root->subsys_mask now so that @root can be
* matched by cgroup_test_super() before it finishes
* initialization; otherwise, competing mounts with the same
* options may try to bind the same subsystems instead of waiting
* for the first one leading to unexpected mount errors.
* SUBSYS_BOUND will be set once actual binding is complete.
*/
root->subsys_mask = opts->subsys_mask;
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
if (opts->cpuset_clone_children)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags);
static void cgroup_free_root(struct cgroupfs_root *root)
{
if (root) {
/* hierarhcy ID shoulid already have been released */
WARN_ON_ONCE(root->hierarchy_id);
ida_destroy(&root->cgroup_ida);
kfree(root);
}
}
static int cgroup_set_super(struct super_block *sb, void *data)
{
int ret;
struct cgroup_sb_opts *opts = data;
/* If we don't have a new root, we can't set up a new sb */
if (!opts->new_root)
return -EINVAL;
BUG_ON(!opts->subsys_mask && !opts->none);
ret = set_anon_super(sb, NULL);
if (ret)
return ret;
sb->s_fs_info = opts->new_root;
opts->new_root->sb = sb;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = CGROUP_SUPER_MAGIC;
sb->s_op = &cgroup_ops;
return 0;
}
static int cgroup_get_rootdir(struct super_block *sb)
{
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
struct inode *inode =
cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
if (!inode)
return -ENOMEM;
inode->i_fop = &simple_dir_operations;
inode->i_op = &cgroup_dir_inode_operations;
/* directories start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
sb->s_root = d_make_root(inode);
if (!sb->s_root)
/* for everything else we want ->d_op set */
sb->s_d_op = &cgroup_dops;
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
{
struct cgroup_sb_opts opts;
int ret = 0;
struct super_block *sb;
struct cgroupfs_root *new_root;
Tejun Heo
committed
struct list_head tmp_links;
Tejun Heo
committed
const struct cred *cred;
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
/*
* Allocate a new cgroup root. We may not need it if we're
* reusing an existing hierarchy.
*/
new_root = cgroup_root_from_opts(&opts);
if (IS_ERR(new_root)) {
ret = PTR_ERR(new_root);
/* Locate an existing or new sb for this hierarchy */
sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
root = sb->s_fs_info;
BUG_ON(!root);
if (root == opts.new_root) {
/* We used the new root structure, so this is a new hierarchy */
struct cgroup *root_cgrp = &root->top_cgroup;
struct cgroupfs_root *existing_root;
struct css_set *cset;
BUG_ON(sb->s_root != NULL);
ret = cgroup_get_rootdir(sb);
if (ret)
goto drop_new_super;
inode = sb->s_root->d_inode;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
/* Check for name clashes with existing mounts */
ret = -EBUSY;
if (strlen(root->name))
for_each_active_root(existing_root)
if (!strcmp(existing_root->name, root->name))
goto unlock_drop;
/*
* We're accessing css_set_count without locking
* css_set_lock here, but that's OK - it can only be
* increased by someone holding cgroup_lock, and
* that's us. The worst that can happen is that we
* have some link structures left over
*/
ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
/* ID 0 is reserved for dummy root, 1 for unified hierarchy */
ret = cgroup_init_root_id(root, 2, 0);
if (ret)
goto unlock_drop;
Tejun Heo
committed
sb->s_root->d_fsdata = root_cgrp;
root_cgrp->dentry = sb->s_root;
/*
* We're inside get_sb() and will call lookup_one_len() to
* create the root files, which doesn't work if SELinux is
* in use. The following cred dancing somehow works around
* it. See 2ce9738ba ("cgroupfs: use init_cred when
* populating new cgroupfs mount") for more details.
*/
cred = override_creds(&init_cred);
ret = cgroup_addrm_files(root_cgrp, NULL, cgroup_base_files, true);
if (ret)
goto rm_base_files;
ret = rebind_subsystems(root, root->subsys_mask, 0);
Tejun Heo
committed
if (ret)
goto rm_base_files;
revert_creds(cred);
/*
* There must be no failure case after here, since rebinding
* takes care of subsystems' refcounts, which are explicitly
* dropped in the failure exit path.
*/
list_add(&root->root_list, &cgroup_roots);
cgroup_root_count++;
/* Link the top cgroup in this hierarchy into all
* the css_set objects */
write_lock(&css_set_lock);
hash_for_each(css_set_table, i, cset, hlist)
link_css_set(&tmp_links, cset, root_cgrp);
write_unlock(&css_set_lock);
free_cgrp_cset_links(&tmp_links);
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
} else {
/*
* We re-used an existing hierarchy - the new root (if
* any) is not needed
*/
Tejun Heo
committed
if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
ret = -EINVAL;
goto drop_new_super;
} else {
pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
}
kfree(opts.release_agent);
kfree(opts.name);
Tejun Heo
committed
rm_base_files:
free_cgrp_cset_links(&tmp_links);
cgroup_addrm_files(&root->top_cgroup, NULL, cgroup_base_files, false);
revert_creds(cred);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
deactivate_locked_super(sb);
out_err:
kfree(opts.release_agent);
kfree(opts.name);
}
static void cgroup_kill_sb(struct super_block *sb) {
struct cgroupfs_root *root = sb->s_fs_info;
struct cgroup *cgrp = &root->top_cgroup;
struct cgrp_cset_link *link, *tmp_link;
int ret;
BUG_ON(!root);
BUG_ON(root->number_of_cgroups != 1);
BUG_ON(!list_empty(&cgrp->children));
Tejun Heo
committed
mutex_lock(&cgrp->dentry->d_inode->i_mutex);
mutex_lock(&cgroup_mutex);
/* Rebind all subsystems back to the default hierarchy */
if (root->flags & CGRP_ROOT_SUBSYS_BOUND) {
ret = rebind_subsystems(root, 0, root->subsys_mask);
/* Shouldn't be able to fail ... */
BUG_ON(ret);
}
* Release all the links from cset_links to this hierarchy's
* root cgroup
*/
write_lock(&css_set_lock);
list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
list_del(&link->cset_link);
list_del(&link->cgrp_link);
kfree(link);
}
write_unlock(&css_set_lock);
if (!list_empty(&root->root_list)) {
list_del(&root->root_list);
mutex_unlock(&cgroup_mutex);
Tejun Heo
committed
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
}
static struct file_system_type cgroup_fs_type = {
.name = "cgroup",
.kill_sb = cgroup_kill_sb,
};
static struct kobject *cgroup_kobj;
/**
* cgroup_path - generate the path of a cgroup
* @cgrp: the cgroup in question
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Writes path of cgroup into buf. Returns 0 on success, -errno on error.
*
* We can't generate cgroup path using dentry->d_name, as accessing
* dentry->name must be protected by irq-unsafe dentry->d_lock or parent
* inode's i_mutex, while on the other hand cgroup_path() can be called
* with some irq-safe spinlocks held.
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
if (!cgrp->parent) {
if (strlcpy(buf, "/", buflen) >= buflen)
return -ENAMETOOLONG;
start = buf + buflen - 1;
*start = '\0';
const char *name = cgroup_name(cgrp);
int len;
len = strlen(name);
if ((start -= len) < buf)
goto out;
memcpy(start, name, len);
} while (cgrp->parent);
memmove(buf, start, buf + buflen - start);
out:
rcu_read_unlock();
return ret;
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
/**
* task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
* @task: target task
* @hierarchy_id: the hierarchy to look up @task's cgroup from
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
* copy its path into @buf. This function grabs cgroup_mutex and shouldn't
* be used inside locks used by cgroup controller callbacks.
*/
int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
char *buf, size_t buflen)
{
struct cgroupfs_root *root;
struct cgroup *cgrp = NULL;
int ret = -ENOENT;
mutex_lock(&cgroup_mutex);
root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
if (root) {
cgrp = task_cgroup_from_root(task, root);
ret = cgroup_path(cgrp, buf, buflen);
}
mutex_unlock(&cgroup_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
Tejun Heo
committed
/*
* Control Group taskset
*/
struct task_and_cgroup {
struct task_struct *task;
struct cgroup *cgrp;
Tejun Heo
committed
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
struct cgroup_taskset {
struct task_and_cgroup single;
struct flex_array *tc_array;
int tc_array_len;
int idx;
struct cgroup *cur_cgrp;
};
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
*
* @tset iteration is initialized and the first task is returned.
*/
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
{
if (tset->tc_array) {
tset->idx = 0;
return cgroup_taskset_next(tset);
} else {
tset->cur_cgrp = tset->single.cgrp;
return tset->single.task;
}
}
EXPORT_SYMBOL_GPL(cgroup_taskset_first);
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
{
struct task_and_cgroup *tc;
if (!tset->tc_array || tset->idx >= tset->tc_array_len)
return NULL;
tc = flex_array_get(tset->tc_array, tset->idx++);
tset->cur_cgrp = tc->cgrp;
return tc->task;
}
EXPORT_SYMBOL_GPL(cgroup_taskset_next);
/**
* cgroup_taskset_cur_cgroup - return the matching cgroup for the current task
* @tset: taskset of interest
*
* Return the cgroup for the current (last returned) task of @tset. This
* function must be preceded by either cgroup_taskset_first() or
* cgroup_taskset_next().
*/
struct cgroup *cgroup_taskset_cur_cgroup(struct cgroup_taskset *tset)
{
return tset->cur_cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_taskset_cur_cgroup);
/**
* cgroup_taskset_size - return the number of tasks in taskset
* @tset: taskset of interest
*/
int cgroup_taskset_size(struct cgroup_taskset *tset)
{
return tset->tc_array ? tset->tc_array_len : 1;
}
EXPORT_SYMBOL_GPL(cgroup_taskset_size);
/*
* cgroup_task_migrate - move a task from one cgroup to another.
*
* Must be called with cgroup_mutex and threadgroup locked.
static void cgroup_task_migrate(struct cgroup *old_cgrp,
struct task_struct *tsk,
struct css_set *new_cset)
struct css_set *old_cset;
* We are synchronized through threadgroup_lock() against PF_EXITING
* setting such that we can't race against cgroup_exit() changing the
* css_set to init_css_set and dropping the old one.
Frederic Weisbecker
committed
WARN_ON_ONCE(tsk->flags & PF_EXITING);
old_cset = task_css_set(tsk);
rcu_assign_pointer(tsk->cgroups, new_cset);
task_unlock(tsk);
/* Update the css_set linked lists if we're using them */
write_lock(&css_set_lock);
if (!list_empty(&tsk->cg_list))
list_move(&tsk->cg_list, &new_cset->tasks);
* We just gained a reference on old_cset by taking it from the
* task. As trading it for new_cset is protected by cgroup_mutex,
* we're safe to drop it here; it will be freed under RCU.
set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
put_css_set(old_cset);
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @tsk: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
* Call holding cgroup_mutex and the group_rwsem of the leader. Will take
* task_lock of @tsk or each thread in the threadgroup individually in turn.
static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
bool threadgroup)
{
int retval, i, group_size;
struct cgroup_subsys *ss, *failed_ss = NULL;
struct cgroupfs_root *root = cgrp->root;
/* threadgroup list cursor and array */
struct task_struct *leader = tsk;
struct task_and_cgroup *tc;
Tejun Heo
committed
struct cgroup_taskset tset = { };
/*
* step 0: in order to do expensive, possibly blocking operations for
* every thread, we cannot iterate the thread group list, since it needs
* rcu or tasklist locked. instead, build an array of all threads in the
* group - group_rwsem prevents new threads from appearing, and if
* threads exit, this will just be an over-estimate.
if (threadgroup)
group_size = get_nr_threads(tsk);
else
group_size = 1;
/* flex_array supports very large thread-groups better than kmalloc. */
group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL);
/* pre-allocate to guarantee space while iterating in rcu read-side. */
retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
if (retval)
goto out_free_group_list;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/