Newer
Older
next:
if (!threadgroup)
break;
rcu_read_unlock();
/* remember the number of threads in the array for later. */
group_size = i;
Tejun Heo
committed
tset.tc_array = group;
tset.tc_array_len = group_size;
/* methods shouldn't be called if no task is actually migrating */
retval = 0;
if (!group_size)
goto out_free_group_list;
/*
* step 1: check that we can legitimately attach to the cgroup.
*/
for_each_root_subsys(root, ss) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
retval = ss->can_attach(css, &tset);
if (retval) {
failed_ss = ss;
goto out_cancel_attach;
}
}
}
/*
* step 2: make sure css_sets exist for all threads to be migrated.
* we use find_css_set, which allocates a new one if necessary.
*/
for (i = 0; i < group_size; i++) {
struct css_set *old_cset;
tc = flex_array_get(group, i);
old_cset = task_css_set(tc->task);
tc->cset = find_css_set(old_cset, cgrp);
if (!tc->cset) {
retval = -ENOMEM;
goto out_put_css_set_refs;
* step 3: now that we're guaranteed success wrt the css_sets,
* proceed to move all tasks to the new cgroup. There are no
* failure cases after here, so this is the commit point.
*/
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
cgroup_task_migrate(tc->cgrp, tc->task, tc->cset);
}
/* nothing is sensitive to fork() after this point. */
/*
* step 4: do subsystem attach callbacks.
for_each_root_subsys(root, ss) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
ss->attach(css, &tset);
}
/*
* step 5: success! and cleanup
*/
retval = 0;
out_put_css_set_refs:
if (retval) {
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
}
out_cancel_attach:
if (retval) {
for_each_root_subsys(root, ss) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
if (ss == failed_ss)
ss->cancel_attach(css, &tset);
return retval;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup; may take task_lock of task.
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
retry_find_task:
rcu_read_lock();
ret= -ESRCH;
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroupfs_root *root;
int retval = 0;
for_each_active_root(root) {
struct cgroup *from_cgrp = task_cgroup_from_root(from, root);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static int cgroup_tasks_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 pid)
return attach_task_by_pid(css->cgroup, pid, false);
static int cgroup_procs_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 tgid)
{
return attach_task_by_pid(css->cgroup, tgid, true);
}
static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
struct cftype *cft, const char *buffer)
BUILD_BUG_ON(sizeof(css->cgroup->root->release_agent_path) < PATH_MAX);
if (strlen(buffer) >= PATH_MAX)
return -EINVAL;
if (!cgroup_lock_live_group(css->cgroup))
return -ENODEV;
strcpy(css->cgroup->root->release_agent_path, buffer);
return 0;
}
static int cgroup_release_agent_show(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *seq)
struct cgroup *cgrp = css->cgroup;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
seq_puts(seq, cgrp->root->release_agent_path);
seq_putc(seq, '\n');
return 0;
}
static int cgroup_sane_behavior_show(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *seq)
seq_printf(seq, "%d\n", cgroup_sane_behavior(css->cgroup));
return 0;
}
/* A buffer size big enough for numbers or short strings */
#define CGROUP_LOCAL_BUFFER_SIZE 64
static ssize_t cgroup_write_X64(struct cgroup_subsys_state *css,
struct cftype *cft, struct file *file,
const char __user *userbuf, size_t nbytes,
loff_t *unused_ppos)
char buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
char *end;
if (!nbytes)
return -EINVAL;
if (nbytes >= sizeof(buffer))
return -E2BIG;
if (copy_from_user(buffer, userbuf, nbytes))
return -EFAULT;
buffer[nbytes] = 0; /* nul-terminate */
if (cft->write_u64) {
u64 val = simple_strtoull(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
retval = cft->write_u64(css, cft, val);
s64 val = simple_strtoll(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
retval = cft->write_s64(css, cft, val);
if (!retval)
retval = nbytes;
return retval;
}
static ssize_t cgroup_write_string(struct cgroup_subsys_state *css,
struct cftype *cft, struct file *file,
const char __user *userbuf, size_t nbytes,
loff_t *unused_ppos)
char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
size_t max_bytes = cft->max_write_len;
char *buffer = local_buffer;
if (!max_bytes)
max_bytes = sizeof(local_buffer) - 1;
if (nbytes >= max_bytes)
return -E2BIG;
/* Allocate a dynamic buffer if we need one */
if (nbytes >= sizeof(local_buffer)) {
buffer = kmalloc(nbytes + 1, GFP_KERNEL);
if (buffer == NULL)
return -ENOMEM;
}
if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
retval = -EFAULT;
goto out;
}
buffer[nbytes] = 0; /* nul-terminate */
retval = cft->write_string(css, cft, strstrip(buffer));
if (!retval)
retval = nbytes;
if (buffer != local_buffer)
kfree(buffer);
return retval;
}
static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
Tejun Heo
committed
struct cgroup_subsys_state *css = cfe->css;
if (cft->write_u64 || cft->write_s64)
return cgroup_write_X64(css, cft, file, buf, nbytes, ppos);
if (cft->write_string)
return cgroup_write_string(css, cft, file, buf, nbytes, ppos);
if (cft->trigger) {
int ret = cft->trigger(css, (unsigned int)cft->private);
return ret ? ret : nbytes;
}
static ssize_t cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft, struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos)
char tmp[CGROUP_LOCAL_BUFFER_SIZE];
u64 val = cft->read_u64(css, cft);
int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static ssize_t cgroup_read_s64(struct cgroup_subsys_state *css,
struct cftype *cft, struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos)
char tmp[CGROUP_LOCAL_BUFFER_SIZE];
s64 val = cft->read_s64(css, cft);
int len = sprintf(tmp, "%lld\n", (long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static ssize_t cgroup_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
Tejun Heo
committed
struct cgroup_subsys_state *css = cfe->css;
if (cft->read_u64)
return cgroup_read_u64(css, cft, file, buf, nbytes, ppos);
if (cft->read_s64)
return cgroup_read_s64(css, cft, file, buf, nbytes, ppos);
return -EINVAL;
}
/*
* seqfile ops/methods for returning structured data. Currently just
* supports string->u64 maps, but can be extended in future.
*/
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
{
struct cfent *cfe = m->private;
struct cftype *cft = cfe->type;
Tejun Heo
committed
struct cgroup_subsys_state *css = cfe->css;
return cft->read_seq_string(css, cft, m);
static const struct file_operations cgroup_seqfile_operations = {
.write = cgroup_file_write,
.release = cgroup_file_release,
static int cgroup_file_open(struct inode *inode, struct file *file)
{
struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
Tejun Heo
committed
struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
struct cgroup_subsys_state *css;
int err;
err = generic_file_open(inode, file);
if (err)
return err;
/*
* If the file belongs to a subsystem, pin the css. Will be
* unpinned either on open failure or release. This ensures that
* @css stays alive for all file operations.
*/
Tejun Heo
committed
rcu_read_lock();
css = cgroup_css(cgrp, cft->ss);
if (cft->ss && !css_tryget(css))
css = NULL;
Tejun Heo
committed
rcu_read_unlock();
return -ENODEV;
/*
* @cfe->css is used by read/write/close to determine the
* associated css. @file->private_data would be a better place but
* that's already used by seqfile. Multiple accessors may use it
* simultaneously which is okay as the association never changes.
*/
WARN_ON_ONCE(cfe->css && cfe->css != css);
cfe->css = css;
if (cft->read_seq_string) {
file->f_op = &cgroup_seqfile_operations;
err = single_open(file, cgroup_seqfile_show, cfe);
} else if (cft->open) {
err = cft->open(inode, file);
return err;
}
static int cgroup_file_release(struct inode *inode, struct file *file)
{
struct cfent *cfe = __d_cfe(file->f_dentry);
Tejun Heo
committed
struct cgroup_subsys_state *css = cfe->css;
if (file->f_op == &cgroup_seqfile_operations)
single_release(inode, file);
}
/*
* cgroup_rename - Only allow simple rename of directories in place.
*/
static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
int ret;
struct cgroup_name *name, *old_name;
struct cgroup *cgrp;
/*
* It's convinient to use parent dir's i_mutex to protected
* cgrp->name.
*/
lockdep_assert_held(&old_dir->i_mutex);
if (!S_ISDIR(old_dentry->d_inode->i_mode))
return -ENOTDIR;
if (new_dentry->d_inode)
return -EEXIST;
if (old_dir != new_dir)
return -EIO;
cgrp = __d_cgrp(old_dentry);
/*
* This isn't a proper migration and its usefulness is very
* limited. Disallow if sane_behavior.
*/
if (cgroup_sane_behavior(cgrp))
return -EPERM;
name = cgroup_alloc_name(new_dentry);
if (!name)
return -ENOMEM;
ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
if (ret) {
kfree(name);
return ret;
}
old_name = rcu_dereference_protected(cgrp->name, true);
rcu_assign_pointer(cgrp->name, name);
kfree_rcu(old_name, rcu_head);
return 0;
static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
{
if (S_ISDIR(dentry->d_inode->i_mode))
return &__d_cgrp(dentry)->xattrs;
else
}
static inline int xattr_enabled(struct dentry *dentry)
{
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
return root->flags & CGRP_ROOT_XATTR;
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
}
static bool is_valid_xattr(const char *name)
{
if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
return true;
return false;
}
static int cgroup_setxattr(struct dentry *dentry, const char *name,
const void *val, size_t size, int flags)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
if (!is_valid_xattr(name))
return -EINVAL;
return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
}
static int cgroup_removexattr(struct dentry *dentry, const char *name)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
if (!is_valid_xattr(name))
return -EINVAL;
return simple_xattr_remove(__d_xattrs(dentry), name);
}
static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
void *buf, size_t size)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
if (!is_valid_xattr(name))
return -EINVAL;
return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
}
static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
return simple_xattr_list(__d_xattrs(dentry), buf, size);
}
static const struct file_operations cgroup_file_operations = {
.read = cgroup_file_read,
.write = cgroup_file_write,
.llseek = generic_file_llseek,
.open = cgroup_file_open,
.release = cgroup_file_release,
};
static const struct inode_operations cgroup_file_inode_operations = {
.setxattr = cgroup_setxattr,
.getxattr = cgroup_getxattr,
.listxattr = cgroup_listxattr,
.removexattr = cgroup_removexattr,
};
static const struct inode_operations cgroup_dir_inode_operations = {
.mkdir = cgroup_mkdir,
.rmdir = cgroup_rmdir,
.rename = cgroup_rename,
.setxattr = cgroup_setxattr,
.getxattr = cgroup_getxattr,
.listxattr = cgroup_listxattr,
.removexattr = cgroup_removexattr,
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
struct super_block *sb)
{
struct inode *inode;
if (!dentry)
return -ENOENT;
if (dentry->d_inode)
return -EEXIST;
inode = cgroup_new_inode(mode, sb);
if (!inode)
return -ENOMEM;
if (S_ISDIR(mode)) {
inode->i_op = &cgroup_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
inc_nlink(dentry->d_parent->d_inode);
/*
* Control reaches here with cgroup_mutex held.
* @inode->i_mutex should nest outside cgroup_mutex but we
* want to populate it immediately without releasing
* cgroup_mutex. As @inode isn't visible to anyone else
* yet, trylock will always succeed without affecting
* lockdep checks.
*/
WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
} else if (S_ISREG(mode)) {
inode->i_size = 0;
inode->i_fop = &cgroup_file_operations;
inode->i_op = &cgroup_file_inode_operations;
}
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
return 0;
}
/**
* cgroup_file_mode - deduce file mode of a control file
* @cft: the control file in question
*
* returns cft->mode if ->mode is not 0
* returns S_IRUGO|S_IWUSR if it has both a read and a write handler
* returns S_IRUGO if it has only a read handler
* returns S_IWUSR if it has only a write hander
*/
static umode_t cgroup_file_mode(const struct cftype *cft)
if (cft->mode)
return cft->mode;
if (cft->read_u64 || cft->read_s64 || cft->read_seq_string)
if (cft->write_u64 || cft->write_s64 || cft->write_string ||
cft->trigger)
mode |= S_IWUSR;
return mode;
}
static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
struct dentry *dir = cgrp->dentry;
char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
!(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
strcat(name, ".");
}
strcat(name, cft->name);
BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
if (!cfe)
return -ENOMEM;
dentry = lookup_one_len(name, dir, strlen(name));
error = PTR_ERR(dentry);
cfe->type = (void *)cft;
cfe->dentry = dentry;
dentry->d_fsdata = cfe;
simple_xattrs_init(&cfe->xattrs);
mode = cgroup_file_mode(cft);
error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
if (!error) {
list_add_tail(&cfe->node, &parent->files);
cfe = NULL;
}
dput(dentry);
out:
kfree(cfe);
return error;
}
/**
* cgroup_addrm_files - add or remove files to a cgroup directory
* @cgrp: the target cgroup
* @cfts: array of cftypes to be added
* @is_add: whether to add or remove
*
* Depending on @is_add, add or remove files defined by @cfts on @cgrp.
* For removals, this function never fails. If addition fails, this
* function doesn't remove files already added. The caller is responsible
* for cleaning up.
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add)
int ret;
lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
continue;
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
continue;
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
continue;
ret = cgroup_add_file(cgrp, cft);
pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
cft->name, ret);
return ret;
}
} else {
cgroup_rm_file(cgrp, cft);
static void cgroup_cfts_prepare(void)
__acquires(&cgroup_mutex)
{
/*
* Thanks to the entanglement with vfs inode locking, we can't walk
* the existing cgroups under cgroup_mutex and create files.
Tejun Heo
committed
* Instead, we use css_for_each_descendant_pre() and drop RCU read
* lock before calling cgroup_addrm_files().
*/
mutex_lock(&cgroup_mutex);
}
static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
__releases(&cgroup_mutex)
{
LIST_HEAD(pending);
struct cgroup_subsys *ss = cfts[0].ss;
Tejun Heo
committed
struct cgroup *root = &ss->root->top_cgroup;
struct super_block *sb = ss->root->sb;
struct dentry *prev = NULL;
struct inode *inode;
Tejun Heo
committed
struct cgroup_subsys_state *css;
/* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
if (!cfts || ss->root == &cgroup_dummy_root ||
!atomic_inc_not_zero(&sb->s_active)) {
mutex_unlock(&cgroup_mutex);
* All cgroups which are created after we drop cgroup_mutex will
* have the updated set of files, so we only need to update the
* cgroups created before the current @cgroup_serial_nr_next.
update_before = cgroup_serial_nr_next;
mutex_unlock(&cgroup_mutex);
/* add/rm files for all cgroups created before */
rcu_read_lock();
css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
Tejun Heo
committed
struct cgroup *cgrp = css->cgroup;
if (cgroup_is_dead(cgrp))
continue;
inode = cgrp->dentry->d_inode;
dget(cgrp->dentry);
rcu_read_unlock();
dput(prev);
prev = cgrp->dentry;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))
ret = cgroup_addrm_files(cgrp, cfts, is_add);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
rcu_read_lock();
rcu_read_unlock();
dput(prev);
deactivate_super(sb);
}
/**
* cgroup_add_cftypes - add an array of cftypes to a subsystem
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Register @cfts to @ss. Files described by @cfts are created for all
* existing cgroups to which @ss is attached and all future cgroups will
* have them too. This function can be called anytime whether @ss is
* attached or not.
*
* Returns 0 on successful registration, -errno on failure. Note that this
* function currently returns 0 as long as @cfts registration is successful
* even if some file creation attempts on existing cgroups fail.
*/
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype_set *set;
set = kzalloc(sizeof(*set), GFP_KERNEL);
if (!set)
return -ENOMEM;
for (cft = cfts; cft->name[0] != '\0'; cft++)
cft->ss = ss;
cgroup_cfts_prepare();
set->cfts = cfts;
list_add_tail(&set->node, &ss->cftsets);
ret = cgroup_cfts_commit(cfts, true);
}
EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
/**
* cgroup_rm_cftypes - remove an array of cftypes from a subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Unregister @cfts. Files described by @cfts are removed from all
* existing cgroups and all future cgroups won't have them either. This
* function can be called anytime whether @cfts' subsys is attached or not.
*
* Returns 0 on successful unregistration, -ENOENT if @cfts is not
int cgroup_rm_cftypes(struct cftype *cfts)
if (!cfts || !cfts[0].ss)
return -ENOENT;
list_for_each_entry(set, &cfts[0].ss->cftsets, node) {
list_del(&set->node);
kfree(set);
cgroup_cfts_commit(cfts, false);
cgroup_cfts_commit(NULL, false);
/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
* Return the number of tasks in the cgroup.
*/
int cgroup_task_count(const struct cgroup *cgrp)
struct cgrp_cset_link *link;
read_lock(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += atomic_read(&link->cset->refcount);
read_unlock(&css_set_lock);
* To reduce the fork() overhead for systems that are not actually using
* their cgroups capability, we don't maintain the lists running through
* each css_set to its tasks until we see the list actually used - in other
* words after the first call to css_task_iter_start().
static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
write_lock(&css_set_lock);
use_task_css_set_links = 1;
Frederic Weisbecker
committed
/*
* We need tasklist_lock because RCU is not safe against
* while_each_thread(). Besides, a forking task that has passed
* cgroup_post_fork() without seeing use_task_css_set_links = 1
* is not guaranteed to have its child immediately visible in the
* tasklist if we walk through it with RCU.
*/
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
/*
* We should check if the process is exiting, otherwise
* it will race with cgroup_exit() in that the list
* entry won't be deleted though the process has exited.
*/
if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
list_add(&p->cg_list, &task_css_set(p)->tasks);
task_unlock(p);
} while_each_thread(g, p);
Frederic Weisbecker
committed
read_unlock(&tasklist_lock);
write_unlock(&css_set_lock);
}
Tejun Heo
committed
* css_next_child - find the next child of a given css
* @pos_css: the current position (%NULL to initiate traversal)
* @parent_css: css whose children to walk
Tejun Heo
committed
* This function returns the next child of @parent_css and should be called
* under RCU read lock. The only requirement is that @parent_css and
* @pos_css are accessible. The next sibling is guaranteed to be returned
* regardless of their states.
Tejun Heo
committed
struct cgroup_subsys_state *
css_next_child(struct cgroup_subsys_state *pos_css,
struct cgroup_subsys_state *parent_css)
Tejun Heo
committed
struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
struct cgroup *cgrp = parent_css->cgroup;
struct cgroup *next;
WARN_ON_ONCE(!rcu_read_lock_held());
/*
* @pos could already have been removed. Once a cgroup is removed,
* its ->sibling.next is no longer updated when its next sibling
* changes. As CGRP_DEAD assertion is serialized and happens
* before the cgroup is taken off the ->sibling list, if we see it
* unasserted, it's guaranteed that the next sibling hasn't
* finished its grace period even if it's already removed, and thus
* safe to dereference from this RCU critical section. If
* ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
* to be visible as %true here.
*
* If @pos is dead, its next pointer can't be dereferenced;
* however, as each cgroup is given a monotonically increasing
* unique serial number and always appended to the sibling list,
* the next one can be found by walking the parent's children until
* we see a cgroup with higher serial number than @pos's. While
* this path can be slower, it's taken only when either the current
* cgroup is removed or iteration and removal race.
if (!pos) {
next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
} else if (likely(!cgroup_is_dead(pos))) {
next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
} else {
list_for_each_entry_rcu(next, &cgrp->children, sibling)
if (next->serial_nr > pos->serial_nr)
break;
Tejun Heo
committed
if (&next->sibling == &cgrp->children)
return NULL;
return cgroup_css(next, parent_css->ss);
Tejun Heo
committed
EXPORT_SYMBOL_GPL(css_next_child);
Tejun Heo
committed
* css_next_descendant_pre - find the next descendant for pre-order walk
* @pos: the current position (%NULL to initiate traversal)
Tejun Heo
committed
* @root: css whose descendants to walk
Tejun Heo
committed
* To be used by css_for_each_descendant_pre(). Find the next descendant
Tejun Heo
committed
* to visit for pre-order traversal of @root's descendants. @root is