Newer
Older
/* pre-allocate to guarantee space while iterating in rcu read-side. */
retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL);
if (retval)
goto out_free_group_list;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
rcu_read_lock();
struct task_and_cgroup ent;
/* @tsk either already exited or can't exit until the end */
if (tsk->flags & PF_EXITING)
continue;
/* as per above, nr_threads may decrease, but not increase. */
BUG_ON(i >= group_size);
ent.task = tsk;
ent.cgrp = task_cgroup_from_root(tsk, root);
/* nothing to do if this task is already in the cgroup */
if (ent.cgrp == cgrp)
continue;
/*
* saying GFP_ATOMIC has no effect here because we did prealloc
* earlier, but it's good form to communicate our expectations.
*/
retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
if (!threadgroup)
break;
rcu_read_unlock();
/* remember the number of threads in the array for later. */
group_size = i;
Tejun Heo
committed
tset.tc_array = group;
tset.tc_array_len = group_size;
/* methods shouldn't be called if no task is actually migrating */
retval = 0;
if (!group_size)
goto out_free_group_list;
/*
* step 1: check that we can legitimately attach to the cgroup.
*/
for_each_subsys(root, ss) {
if (ss->can_attach) {
retval = ss->can_attach(cgrp, &tset);
if (retval) {
failed_ss = ss;
goto out_cancel_attach;
}
}
}
/*
* step 2: make sure css_sets exist for all threads to be migrated.
* we use find_css_set, which allocates a new one if necessary.
*/
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
tc->cg = find_css_set(tc->task->cgroups, cgrp);
if (!tc->cg) {
retval = -ENOMEM;
goto out_put_css_set_refs;
* step 3: now that we're guaranteed success wrt the css_sets,
* proceed to move all tasks to the new cgroup. There are no
* failure cases after here, so this is the commit point.
*/
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
cgroup_task_migrate(tc->cgrp, tc->task, tc->cg);
}
/* nothing is sensitive to fork() after this point. */
/*
* step 4: do subsystem attach callbacks.
*/
for_each_subsys(root, ss) {
if (ss->attach)
ss->attach(cgrp, &tset);
}
/*
* step 5: success! and cleanup
*/
retval = 0;
out_put_css_set_refs:
if (retval) {
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
if (!tc->cg)
break;
put_css_set(tc->cg);
}
}
out_cancel_attach:
if (retval) {
for_each_subsys(root, ss) {
if (ss == failed_ss)
ss->cancel_attach(cgrp, &tset);
return retval;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup; may take task_lock of task.
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
retry_find_task:
rcu_read_lock();
ret= -ESRCH;
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroupfs_root *root;
int retval = 0;
for_each_active_root(root) {
struct cgroup *from_cg = task_cgroup_from_root(from, root);
retval = cgroup_attach_task(from_cg, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
{
return attach_task_by_pid(cgrp, pid, false);
}
static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
{
return attach_task_by_pid(cgrp, tgid, true);
}
static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
const char *buffer)
{
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
if (strlen(buffer) >= PATH_MAX)
return -EINVAL;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
strcpy(cgrp->root->release_agent_path, buffer);
return 0;
}
static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
struct seq_file *seq)
{
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
seq_puts(seq, cgrp->root->release_agent_path);
seq_putc(seq, '\n');
return 0;
}
static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft,
struct seq_file *seq)
{
seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
return 0;
}
/* A buffer size big enough for numbers or short strings */
#define CGROUP_LOCAL_BUFFER_SIZE 64
static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
char buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
char *end;
if (!nbytes)
return -EINVAL;
if (nbytes >= sizeof(buffer))
return -E2BIG;
if (copy_from_user(buffer, userbuf, nbytes))
return -EFAULT;
buffer[nbytes] = 0; /* nul-terminate */
if (cft->write_u64) {
u64 val = simple_strtoull(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
retval = cft->write_u64(cgrp, cft, val);
} else {
s64 val = simple_strtoll(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
retval = cft->write_s64(cgrp, cft, val);
}
if (!retval)
retval = nbytes;
return retval;
}
static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
int retval = 0;
size_t max_bytes = cft->max_write_len;
char *buffer = local_buffer;
if (!max_bytes)
max_bytes = sizeof(local_buffer) - 1;
if (nbytes >= max_bytes)
return -E2BIG;
/* Allocate a dynamic buffer if we need one */
if (nbytes >= sizeof(local_buffer)) {
buffer = kmalloc(nbytes + 1, GFP_KERNEL);
if (buffer == NULL)
return -ENOMEM;
}
if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
retval = -EFAULT;
goto out;
}
buffer[nbytes] = 0; /* nul-terminate */
retval = cft->write_string(cgrp, cft, strstrip(buffer));
if (!retval)
retval = nbytes;
if (buffer != local_buffer)
kfree(buffer);
return retval;
}
static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
if (cft->write_u64 || cft->write_s64)
return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
if (cft->write_string)
return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
if (cft->trigger) {
int ret = cft->trigger(cgrp, (unsigned int)cft->private);
return ret ? ret : nbytes;
}
static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos)
char tmp[CGROUP_LOCAL_BUFFER_SIZE];
u64 val = cft->read_u64(cgrp, cft);
int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos)
{
char tmp[CGROUP_LOCAL_BUFFER_SIZE];
s64 val = cft->read_s64(cgrp, cft);
int len = sprintf(tmp, "%lld\n", (long long) val);
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
static ssize_t cgroup_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
return -ENODEV;
if (cft->read)
return cft->read(cgrp, cft, file, buf, nbytes, ppos);
if (cft->read_u64)
return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
if (cft->read_s64)
return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
return -EINVAL;
}
/*
* seqfile ops/methods for returning structured data. Currently just
* supports string->u64 maps, but can be extended in future.
*/
struct cgroup_seqfile_state {
struct cftype *cft;
struct cgroup *cgroup;
};
static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
{
struct seq_file *sf = cb->state;
return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
}
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
{
struct cgroup_seqfile_state *state = m->private;
struct cftype *cft = state->cft;
if (cft->read_map) {
struct cgroup_map_cb cb = {
.fill = cgroup_map_add,
.state = m,
};
return cft->read_map(state->cgroup, cft, &cb);
}
return cft->read_seq_string(state->cgroup, cft, m);
static int cgroup_seqfile_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
kfree(seq->private);
return single_release(inode, file);
}
static const struct file_operations cgroup_seqfile_operations = {
.write = cgroup_file_write,
.llseek = seq_lseek,
.release = cgroup_seqfile_release,
};
static int cgroup_file_open(struct inode *inode, struct file *file)
{
int err;
struct cftype *cft;
err = generic_file_open(inode, file);
if (err)
return err;
cft = __d_cft(file->f_dentry);
if (cft->read_map || cft->read_seq_string) {
struct cgroup_seqfile_state *state;
state = kzalloc(sizeof(*state), GFP_USER);
if (!state)
return -ENOMEM;
state->cft = cft;
state->cgroup = __d_cgrp(file->f_dentry->d_parent);
file->f_op = &cgroup_seqfile_operations;
err = single_open(file, cgroup_seqfile_show, state);
if (err < 0)
kfree(state);
} else if (cft->open)
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
err = cft->open(inode, file);
else
err = 0;
return err;
}
static int cgroup_file_release(struct inode *inode, struct file *file)
{
struct cftype *cft = __d_cft(file->f_dentry);
if (cft->release)
return cft->release(inode, file);
return 0;
}
/*
* cgroup_rename - Only allow simple rename of directories in place.
*/
static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
int ret;
struct cgroup_name *name, *old_name;
struct cgroup *cgrp;
/*
* It's convinient to use parent dir's i_mutex to protected
* cgrp->name.
*/
lockdep_assert_held(&old_dir->i_mutex);
if (!S_ISDIR(old_dentry->d_inode->i_mode))
return -ENOTDIR;
if (new_dentry->d_inode)
return -EEXIST;
if (old_dir != new_dir)
return -EIO;
cgrp = __d_cgrp(old_dentry);
name = cgroup_alloc_name(new_dentry);
if (!name)
return -ENOMEM;
ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
if (ret) {
kfree(name);
return ret;
}
old_name = cgrp->name;
rcu_assign_pointer(cgrp->name, name);
kfree_rcu(old_name, rcu_head);
return 0;
static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
{
if (S_ISDIR(dentry->d_inode->i_mode))
return &__d_cgrp(dentry)->xattrs;
else
}
static inline int xattr_enabled(struct dentry *dentry)
{
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
return root->flags & CGRP_ROOT_XATTR;
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
}
static bool is_valid_xattr(const char *name)
{
if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
return true;
return false;
}
static int cgroup_setxattr(struct dentry *dentry, const char *name,
const void *val, size_t size, int flags)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
if (!is_valid_xattr(name))
return -EINVAL;
return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
}
static int cgroup_removexattr(struct dentry *dentry, const char *name)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
if (!is_valid_xattr(name))
return -EINVAL;
return simple_xattr_remove(__d_xattrs(dentry), name);
}
static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
void *buf, size_t size)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
if (!is_valid_xattr(name))
return -EINVAL;
return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
}
static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
{
if (!xattr_enabled(dentry))
return -EOPNOTSUPP;
return simple_xattr_list(__d_xattrs(dentry), buf, size);
}
static const struct file_operations cgroup_file_operations = {
.read = cgroup_file_read,
.write = cgroup_file_write,
.llseek = generic_file_llseek,
.open = cgroup_file_open,
.release = cgroup_file_release,
};
static const struct inode_operations cgroup_file_inode_operations = {
.setxattr = cgroup_setxattr,
.getxattr = cgroup_getxattr,
.listxattr = cgroup_listxattr,
.removexattr = cgroup_removexattr,
};
static const struct inode_operations cgroup_dir_inode_operations = {
.mkdir = cgroup_mkdir,
.rmdir = cgroup_rmdir,
.rename = cgroup_rename,
.setxattr = cgroup_setxattr,
.getxattr = cgroup_getxattr,
.listxattr = cgroup_listxattr,
.removexattr = cgroup_removexattr,
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
d_add(dentry, NULL);
return NULL;
}
/*
* Check if a file is a control file
*/
static inline struct cftype *__file_cft(struct file *file)
{
if (file_inode(file)->i_fop != &cgroup_file_operations)
return ERR_PTR(-EINVAL);
return __d_cft(file->f_dentry);
}
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
struct super_block *sb)
{
struct inode *inode;
if (!dentry)
return -ENOENT;
if (dentry->d_inode)
return -EEXIST;
inode = cgroup_new_inode(mode, sb);
if (!inode)
return -ENOMEM;
if (S_ISDIR(mode)) {
inode->i_op = &cgroup_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
inc_nlink(dentry->d_parent->d_inode);
/*
* Control reaches here with cgroup_mutex held.
* @inode->i_mutex should nest outside cgroup_mutex but we
* want to populate it immediately without releasing
* cgroup_mutex. As @inode isn't visible to anyone else
* yet, trylock will always succeed without affecting
* lockdep checks.
*/
WARN_ON_ONCE(!mutex_trylock(&inode->i_mutex));
} else if (S_ISREG(mode)) {
inode->i_size = 0;
inode->i_fop = &cgroup_file_operations;
inode->i_op = &cgroup_file_inode_operations;
}
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
return 0;
}
/**
* cgroup_file_mode - deduce file mode of a control file
* @cft: the control file in question
*
* returns cft->mode if ->mode is not 0
* returns S_IRUGO|S_IWUSR if it has both a read and a write handler
* returns S_IRUGO if it has only a read handler
* returns S_IWUSR if it has only a write hander
*/
static umode_t cgroup_file_mode(const struct cftype *cft)
if (cft->mode)
return cft->mode;
if (cft->read || cft->read_u64 || cft->read_s64 ||
cft->read_map || cft->read_seq_string)
mode |= S_IRUGO;
if (cft->write || cft->write_u64 || cft->write_s64 ||
cft->write_string || cft->trigger)
mode |= S_IWUSR;
return mode;
}
static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
struct dentry *dir = cgrp->dentry;
char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
strcpy(name, subsys->name);
strcat(name, ".");
}
strcat(name, cft->name);
BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
if (!cfe)
return -ENOMEM;
dentry = lookup_one_len(name, dir, strlen(name));
error = PTR_ERR(dentry);
cfe->type = (void *)cft;
cfe->dentry = dentry;
dentry->d_fsdata = cfe;
simple_xattrs_init(&cfe->xattrs);
mode = cgroup_file_mode(cft);
error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
if (!error) {
list_add_tail(&cfe->node, &parent->files);
cfe = NULL;
}
dput(dentry);
out:
kfree(cfe);
return error;
}
static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
int err, ret = 0;
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
continue;
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
continue;
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
continue;
err = cgroup_add_file(cgrp, subsys, cft);
if (err)
pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
cft->name, err);
} else {
cgroup_rm_file(cgrp, cft);
static DEFINE_MUTEX(cgroup_cft_mutex);
static void cgroup_cfts_prepare(void)
__acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
{
/*
* Thanks to the entanglement with vfs inode locking, we can't walk
* the existing cgroups under cgroup_mutex and create files.
* Instead, we increment reference on all cgroups and build list of
* them using @cgrp->cft_q_node. Grab cgroup_cft_mutex to ensure
* exclusive access to the field.
*/
mutex_lock(&cgroup_cft_mutex);
mutex_lock(&cgroup_mutex);
}
static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
__releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
{
LIST_HEAD(pending);
struct cgroup *cgrp, *n;
/* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
if (cfts && ss->root != &rootnode) {
list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
dget(cgrp->dentry);
list_add_tail(&cgrp->cft_q_node, &pending);
}
}
mutex_unlock(&cgroup_mutex);
/*
* All new cgroups will see @cfts update on @ss->cftsets. Add/rm
* files for all cgroups which were created before.
*/
list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
struct inode *inode = cgrp->dentry->d_inode;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
cgroup_addrm_files(cgrp, ss, cfts, is_add);
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
list_del_init(&cgrp->cft_q_node);
dput(cgrp->dentry);
}
mutex_unlock(&cgroup_cft_mutex);
}
/**
* cgroup_add_cftypes - add an array of cftypes to a subsystem
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Register @cfts to @ss. Files described by @cfts are created for all
* existing cgroups to which @ss is attached and all future cgroups will
* have them too. This function can be called anytime whether @ss is
* attached or not.
*
* Returns 0 on successful registration, -errno on failure. Note that this
* function currently returns 0 as long as @cfts registration is successful
* even if some file creation attempts on existing cgroups fail.
*/
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype_set *set;
set = kzalloc(sizeof(*set), GFP_KERNEL);
if (!set)
return -ENOMEM;
cgroup_cfts_prepare();
set->cfts = cfts;
list_add_tail(&set->node, &ss->cftsets);
return 0;
}
EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
/**
* cgroup_rm_cftypes - remove an array of cftypes from a subsystem
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Unregister @cfts from @ss. Files described by @cfts are removed from
* all existing cgroups to which @ss is attached and all future cgroups
* won't have them either. This function can be called anytime whether @ss
* is attached or not.
*
* Returns 0 on successful unregistration, -ENOENT if @cfts is not
* registered with @ss.
*/
int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype_set *set;
cgroup_cfts_prepare();
list_for_each_entry(set, &ss->cftsets, node) {
if (set->cfts == cfts) {
list_del_init(&set->node);
cgroup_cfts_commit(ss, cfts, false);
return 0;
}
}
cgroup_cfts_commit(ss, NULL, false);
return -ENOENT;
}
/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
* Return the number of tasks in the cgroup.
*/
int cgroup_task_count(const struct cgroup *cgrp)
struct cgrp_cset_link *link;
read_lock(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += atomic_read(&link->cset->refcount);
read_unlock(&css_set_lock);
/*
* Advance a list_head iterator. The iterator should be positioned at
* the start of a css_set
*/
static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it)
struct list_head *l = it->cset_link;
struct cgrp_cset_link *link;
struct css_set *cset;
/* Advance to the next non-empty css_set */
do {
l = l->next;
if (l == &cgrp->cset_links) {
it->cset_link = NULL;
link = list_entry(l, struct cgrp_cset_link, cset_link);
cset = link->cset;
} while (list_empty(&cset->tasks));
it->cset_link = l;
it->task = cset->tasks.next;
/*
* To reduce the fork() overhead for systems that are not actually
* using their cgroups capability, we don't maintain the lists running
* through each css_set to its tasks until we see the list actually
* used - in other words after the first call to cgroup_iter_start().
*/
static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
write_lock(&css_set_lock);
use_task_css_set_links = 1;
Frederic Weisbecker
committed
/*
* We need tasklist_lock because RCU is not safe against
* while_each_thread(). Besides, a forking task that has passed
* cgroup_post_fork() without seeing use_task_css_set_links = 1
* is not guaranteed to have its child immediately visible in the
* tasklist if we walk through it with RCU.
*/
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
/*
* We should check if the process is exiting, otherwise
* it will race with cgroup_exit() in that the list
* entry won't be deleted though the process has exited.
*/
if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
list_add(&p->cg_list, &p->cgroups->tasks);
task_unlock(p);
} while_each_thread(g, p);
Frederic Weisbecker
committed
read_unlock(&tasklist_lock);
write_unlock(&css_set_lock);
}
/**
* cgroup_next_sibling - find the next sibling of a given cgroup
* @pos: the current cgroup
*
* This function returns the next sibling of @pos and should be called
* under RCU read lock. The only requirement is that @pos is accessible.
* The next sibling is guaranteed to be returned regardless of @pos's
* state.
*/
struct cgroup *cgroup_next_sibling(struct cgroup *pos)
{
struct cgroup *next;
WARN_ON_ONCE(!rcu_read_lock_held());
/*
* @pos could already have been removed. Once a cgroup is removed,
* its ->sibling.next is no longer updated when its next sibling
* changes. As CGRP_DEAD assertion is serialized and happens
* before the cgroup is taken off the ->sibling list, if we see it
* unasserted, it's guaranteed that the next sibling hasn't
* finished its grace period even if it's already removed, and thus
* safe to dereference from this RCU critical section. If
* ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
* to be visible as %true here.
if (likely(!cgroup_is_dead(pos))) {
next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
if (&next->sibling != &pos->parent->children)
return next;
return NULL;
}
/*
* Can't dereference the next pointer. Each cgroup is given a
* monotonically increasing unique serial number and always
* appended to the sibling list, so the next one can be found by