Newer
Older
static void css_free_rcu_fn(struct rcu_head *rcu_head)
{
struct cgroup_subsys_state *css =
container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
/*
* css holds an extra ref to @cgrp->dentry which is put on the last
* css_put(). dput() requires process context which we don't have.
*/
INIT_WORK(&css->destroy_work, css_free_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
static void css_release(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
call_rcu(&css->rcu_head, css_free_rcu_fn);
static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
struct cgroup *cgrp)
css->cgroup = cgrp;
css->parent = cgroup_css(cgrp->parent, ss);
css->flags |= CSS_ROOT;
BUG_ON(cgroup_css(cgrp, ss));
/* invoke ->css_online() on a new CSS and mark it online if successful */
static int online_css(struct cgroup_subsys_state *css)
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
if (ss->css_online)
ret = ss->css_online(css);
css->flags |= CSS_ONLINE;
rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css);
}
/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
static void offline_css(struct cgroup_subsys_state *css)
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
if (!(css->flags & CSS_ONLINE))
return;
ss->css_offline(css);
css->flags &= ~CSS_ONLINE;
css->cgroup->nr_css--;
RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
* @dentry: dentry of the new cgroup
* @mode: mode to set on new inode
* Must be called with the mutex on the parent inode held
*/
static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
struct cgroup_subsys_state *css = NULL;
struct cgroup *cgrp;
struct cgroupfs_root *root = parent->root;
int err = 0;
struct cgroup_subsys *ss;
struct super_block *sb = root->sb;
/* allocate the cgroup and its ID, 0 is reserved for the root */
cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
if (!cgrp)
name = cgroup_alloc_name(dentry);
if (!name)
goto err_free_cgrp;
rcu_assign_pointer(cgrp->name, name);
/*
* Temporarily set the pointer to NULL, so idr_find() won't return
* a half-baked cgroup.
*/
cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
/*
* Only live parents can have children. Note that the liveliness
* check isn't strictly necessary because cgroup_mkdir() and
* cgroup_rmdir() are fully synchronized by i_mutex; however, do it
* anyway so that locking is contained inside cgroup proper and we
* don't get nasty surprises if we ever grow another caller.
*/
if (!cgroup_lock_live_group(parent)) {
err = -ENODEV;
/* Grab a reference on the superblock so the hierarchy doesn't
* get deleted on unmount if there are child cgroups. This
* can be done outside cgroup_mutex, since the sb can't
* disappear while someone has an open control file on the
* fs */
atomic_inc(&sb->s_active);
init_cgroup_housekeeping(cgrp);
dentry->d_fsdata = cgrp;
cgrp->dentry = dentry;
cgrp->parent = parent;
cgrp->dummy_css.parent = &parent->dummy_css;
cgrp->root = parent->root;
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
/*
* Create directory. cgroup_create_file() returns with the new
* directory locked on success so that it can be populated without
* dropping cgroup_mutex.
*/
err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
lockdep_assert_held(&dentry->d_inode->i_mutex);
cgrp->serial_nr = cgroup_serial_nr_next++;
/* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
root->number_of_cgroups++;
/* hold a ref to the parent's dentry */
dget(parent->dentry);
/*
* @cgrp is now fully operational. If something fails after this
* point, it'll be released via the normal destruction path.
*/
idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
if (err)
goto err_destroy;
/* let's create and online css's */
for_each_root_subsys(root, ss) {
css = ss->css_alloc(cgroup_css(parent, ss));
if (IS_ERR(css)) {
err = PTR_ERR(css);
goto err_destroy;
}
err = percpu_ref_init(&css->refcnt, css_release);
if (err)
goto err_destroy;
init_css(css, ss, cgrp);
err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id);
if (err)
goto err_destroy;
dget(dentry);
css_get(css->parent);
/* mark it consumed for error path */
if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
parent->parent) {
pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
current->comm, current->pid, ss->name);
if (!strcmp(ss->name, "memory"))
pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
ss->warned_broken_hierarchy = true;
}
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
idr_remove(&root->cgroup_idr, cgrp->id);
err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
if (css) {
percpu_ref_cancel_init(&css->refcnt);
css->ss->css_free(css);
cgroup_destroy_locked(cgrp);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&dentry->d_inode->i_mutex);
return err;
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct cgroup *c_parent = dentry->d_parent->d_fsdata;
/* the vfs holds inode->i_mutex already */
return cgroup_create(c_parent, dentry, mode | S_IFDIR);
}
/*
* This is called when the refcnt of a css is confirmed to be killed.
* css_tryget() is now guaranteed to fail.
*/
static void css_killed_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup *cgrp = css->cgroup;
mutex_lock(&cgroup_mutex);
/*
* css_tryget() is guaranteed to fail now. Tell subsystems to
* initate destruction.
*/
offline_css(css);
/*
* If @cgrp is marked dead, it's waiting for refs of all css's to
* be disabled before proceeding to the second phase of cgroup
* destruction. If we are the last one, kick it off.
*/
if (!cgrp->nr_css && cgroup_is_dead(cgrp))
cgroup_destroy_css_killed(cgrp);
mutex_unlock(&cgroup_mutex);
/*
* Put the css refs from kill_css(). Each css holds an extra
* reference to the cgroup's dentry and cgroup removal proceeds
* regardless of css refs. On the last put of each css, whenever
* that may be, the extra dentry ref is put so that dentry
* destruction happens only after all css's are released.
*/
css_put(css);
/* css kill confirmation processing requires process context, bounce */
static void css_killed_ref_fn(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_killed_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
/**
* kill_css - destroy a css
* @css: css to destroy
*
* This function initiates destruction of @css by removing cgroup interface
* files and putting its base reference. ->css_offline() will be invoked
* asynchronously once css_tryget() is guaranteed to fail and when the
* reference count reaches zero, @css will be released.
*/
static void kill_css(struct cgroup_subsys_state *css)
{
cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id);
/*
* Killing would put the base ref, but we need to keep it alive
* until after ->css_offline().
*/
css_get(css);
/*
* cgroup core guarantees that, by the time ->css_offline() is
* invoked, no new css reference will be given out via
* css_tryget(). We can't simply call percpu_ref_kill() and
* proceed to offlining css's because percpu_ref_kill() doesn't
* guarantee that the ref is seen as killed on all CPUs on return.
*
* Use percpu_ref_kill_and_confirm() to get notifications as each
* css is confirmed to be seen as killed on all CPUs.
*/
percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
}
/**
* cgroup_destroy_locked - the first stage of cgroup destruction
* @cgrp: cgroup to be destroyed
*
* css's make use of percpu refcnts whose killing latency shouldn't be
* exposed to userland and are RCU protected. Also, cgroup core needs to
* guarantee that css_tryget() won't succeed by the time ->css_offline() is
* invoked. To satisfy all the requirements, destruction is implemented in
* the following two steps.
*
* s1. Verify @cgrp can be destroyed and mark it dying. Remove all
* userland visible parts and start killing the percpu refcnts of
* css's. Set up so that the next stage will be kicked off once all
* the percpu refcnts are confirmed to be killed.
*
* s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
* rest of destruction. Once all cgroup references are gone, the
* cgroup is RCU-freed.
*
* This function implements s1. After this step, @cgrp is gone as far as
* the userland is concerned and a new cgroup with the same name may be
* created. As cgroup doesn't care about the names internally, this
* doesn't cause any problem.
*/
static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
struct dentry *d = cgrp->dentry;
struct cgroup_subsys *ss;
lockdep_assert_held(&d->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
* css_set_lock synchronizes access to ->cset_links and prevents
* @cgrp from being removed while __put_css_set() is in progress.
*/
read_lock(&css_set_lock);
empty = list_empty(&cgrp->cset_links);
read_unlock(&css_set_lock);
if (!empty)
/*
* Make sure there's no live children. We can't test ->children
* emptiness as dead children linger on it while being destroyed;
* otherwise, "rmdir parent/child parent" may fail with -EBUSY.
*/
empty = true;
rcu_read_lock();
list_for_each_entry_rcu(child, &cgrp->children, sibling) {
empty = cgroup_is_dead(child);
if (!empty)
break;
}
rcu_read_unlock();
if (!empty)
return -EBUSY;
* Initiate massacre of all css's. cgroup_destroy_css_killed()
* will be invoked to perform the rest of destruction once the
* percpu refs of all css's are confirmed to be killed.
for_each_root_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
if (css)
kill_css(css);
}
/*
* Mark @cgrp dead. This prevents further task migration and child
* creation by disabling cgroup_lock_live_group(). Note that
Tejun Heo
committed
* CGRP_DEAD assertion is depended upon by css_next_child() to
* resume iteration after dropping RCU read lock. See
Tejun Heo
committed
* css_next_child() for details.
/* CGRP_DEAD is set, remove from ->release_list for the last time */
raw_spin_lock(&release_list_lock);
if (!list_empty(&cgrp->release_list))
list_del_init(&cgrp->release_list);
raw_spin_unlock(&release_list_lock);
/*
* If @cgrp has css's attached, the second stage of cgroup
* destruction is kicked off from css_killed_work_fn() after the
* refs of all attached css's are killed. If @cgrp doesn't have
* any css, we kick it off here.
*/
if (!cgrp->nr_css)
cgroup_destroy_css_killed(cgrp);
* Clear the base files and remove @cgrp directory. The removal
* puts the base ref but we aren't quite done with @cgrp yet, so
* hold onto it.
cgroup_addrm_files(cgrp, cgroup_base_files, false);
dget(d);
cgroup_d_remove_dir(d);
* cgroup_destroy_css_killed - the second step of cgroup destruction
* @work: cgroup->destroy_free_work
*
* This function is invoked from a work item for a cgroup which is being
* destroyed after all css's are offlined and performs the rest of
* destruction. This is the second step of destruction described in the
* comment above cgroup_destroy_locked().
static void cgroup_destroy_css_killed(struct cgroup *cgrp)
{
struct cgroup *parent = cgrp->parent;
struct dentry *d = cgrp->dentry;
lockdep_assert_held(&cgroup_mutex);
/* delete this cgroup from parent->children */
* We should remove the cgroup object from idr before its grace
* period starts, so we won't be looking up a cgroup while the
* cgroup is being freed.
idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
int ret;
mutex_lock(&cgroup_mutex);
ret = cgroup_destroy_locked(dentry->d_fsdata);
mutex_unlock(&cgroup_mutex);
return ret;
}
static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
{
INIT_LIST_HEAD(&ss->cftsets);
/*
* base_cftset is embedded in subsys itself, no need to worry about
* deregistration.
*/
if (ss->base_cftypes) {
struct cftype *cft;
for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++)
cft->ss = ss;
ss->base_cftset.cfts = ss->base_cftypes;
list_add_tail(&ss->base_cftset.node, &ss->cftsets);
}
}
static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex);
/* init base cftset */
cgroup_init_cftsets(ss);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_css(css, ss, cgroup_dummy_top);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* newly registered, all tasks and hence the
* init_css_set is in the subsystem's top cgroup. */
init_css_set.subsys[ss->subsys_id] = css;
need_forkexit_callback |= ss->fork || ss->exit;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
BUG_ON(online_css(css));
mutex_unlock(&cgroup_mutex);
/* this function shouldn't be used with modular subsystems, since they
* need to register a subsys_id, among other things */
BUG_ON(ss->module);
}
/**
* cgroup_load_subsys: load and register a modular subsystem at runtime
* @ss: the subsystem to load
*
* This function should be called in a modular subsystem's initcall. If the
* subsystem is built as a module, it will be assigned a new subsys_id and set
* up for use. If the subsystem is built-in anyway, work is delegated to the
* simpler cgroup_init_subsys.
*/
int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
struct css_set *cset;
/* check name and function validity */
if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
Tejun Heo
committed
ss->css_alloc == NULL || ss->css_free == NULL)
return -EINVAL;
/*
* we don't support callbacks in modular subsystems. this check is
* before the ss->module check for consistency; a subsystem that could
* be a module should still have no callbacks even if the user isn't
* compiling it as one.
*/
if (ss->fork || ss->exit)
return -EINVAL;
/*
* an optionally modular subsystem is built-in: we want to do nothing,
* since cgroup_init_subsys will have already taken care of it.
*/
if (ss->module == NULL) {
BUG_ON(cgroup_subsys[ss->subsys_id] != ss);
/* init base cftset */
cgroup_init_cftsets(ss);
mutex_lock(&cgroup_root_mutex);
cgroup_subsys[ss->subsys_id] = ss;
Tejun Heo
committed
* no ss->css_alloc seems to need anything important in the ss
* struct, so this can happen first (i.e. before the dummy root
Tejun Heo
committed
* attachment).
css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* failure case - need to deassign the cgroup_subsys[] slot. */
cgroup_subsys[ss->subsys_id] = NULL;
mutex_unlock(&cgroup_mutex);
return PTR_ERR(css);
}
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
/* our new subsystem will be attached to the dummy hierarchy. */
init_css(css, ss, cgroup_dummy_top);
/*
* Now we need to entangle the css into the existing css_sets. unlike
* in cgroup_init_subsys, there are now multiple css_sets, so each one
* will need a new pointer to it; done by iterating the css_set_table.
* furthermore, modifying the existing css_sets will corrupt the hash
* table state, so each changed css_set will need its hash recomputed.
* this is all done under the css_set_lock.
*/
write_lock(&css_set_lock);
hash_for_each_safe(css_set_table, i, tmp, cset, hlist) {
/* skip entries that we already rehashed */
if (cset->subsys[ss->subsys_id])
continue;
/* remove existing entry */
hash_del(&cset->hlist);
cset->subsys[ss->subsys_id] = css;
/* recompute hash and restore entry */
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
}
write_unlock(&css_set_lock);
ret = online_css(css);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
return 0;
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
/* @ss can't be mounted here as try_module_get() would fail */
cgroup_unload_subsys(ss);
return ret;
EXPORT_SYMBOL_GPL(cgroup_load_subsys);
/**
* cgroup_unload_subsys: unload a modular subsystem
* @ss: the subsystem to unload
*
* This function should be called in a modular subsystem's exitcall. When this
* function is invoked, the refcount on the subsystem's module will be 0, so
* the subsystem will not be attached to any hierarchy.
*/
void cgroup_unload_subsys(struct cgroup_subsys *ss)
{
struct cgrp_cset_link *link;
BUG_ON(ss->module == NULL);
/*
* we shouldn't be called if the subsystem is in use, and the use of
* try_module_get() in rebind_subsystems() should ensure that it
* doesn't start being used while we're killing it off.
*/
BUG_ON(ss->root != &cgroup_dummy_root);
mutex_lock(&cgroup_root_mutex);
Tejun Heo
committed
offline_css(cgroup_css(cgroup_dummy_top, ss));
Tejun Heo
committed
cgroup_subsys[ss->subsys_id] = NULL;
/* remove subsystem from the dummy root's list of subsystems */
list_del_init(&ss->sibling);
* disentangle the css from all css_sets attached to the dummy
* top. as in loading, we need to pay our respects to the hashtable
* gods.
list_for_each_entry(link, &cgroup_dummy_top->cset_links, cset_link) {
struct css_set *cset = link->cset;
hash_del(&cset->hlist);
cset->subsys[ss->subsys_id] = NULL;
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
}
write_unlock(&css_set_lock);
/*
* remove subsystem's css from the cgroup_dummy_top and free it -
* need to free before marking as null because ss->css_free needs
ss->css_free(cgroup_css(cgroup_dummy_top, ss));
RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
}
EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cgrp_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&cgroup_dummy_root);
cgroup_root_count = 1;
RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
init_cgrp_cset_link.cset = &init_css_set;
init_cgrp_cset_link.cgrp = cgroup_dummy_top;
list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
/* at bootup time, we don't worry about modular subsystems */
for_each_builtin_subsys(ss, i) {
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
Tejun Heo
committed
BUG_ON(!ss->css_alloc);
BUG_ON(!ss->css_free);
if (ss->subsys_id != i) {
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
return 0;
}
/**
* cgroup_init - cgroup initialization
*
* Register cgroup filesystem and /proc file, and initialize
* any subsystems that didn't request early init.
*/
int __init cgroup_init(void)
{
err = bdi_init(&cgroup_backing_dev_info);
if (err)
return err;
for_each_builtin_subsys(ss, i) {
if (!ss->early_init)
cgroup_init_subsys(ss);
}
/* allocate id for the dummy hierarchy */
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
/* Add init_css_set to the hash table */
key = css_set_hash(init_css_set.subsys);
hash_add(css_set_table, &init_css_set.hlist, key);
BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top,
0, 1, GFP_KERNEL);
BUG_ON(err < 0);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
if (!cgroup_kobj) {
err = -ENOMEM;
goto out;
}
err = register_filesystem(&cgroup_fs_type);
if (err < 0) {
kobject_put(cgroup_kobj);
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
if (err)
bdi_destroy(&cgroup_backing_dev_info);
static int __init cgroup_wq_init(void)
{
/*
* There isn't much point in executing destruction path in
* parallel. Good chunk is serialized with cgroup_mutex anyway.
* Use 1 for @max_active.
*
* We would prefer to do this in cgroup_init() above, but that
* is called before init_workqueues(): so leave this until after.
*/
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
BUG_ON(!cgroup_destroy_wq);
/*
* Used to destroy pidlists and separate to serve as flush domain.
* Cap @max_active to 1 too.
*/
cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
0, 1);
BUG_ON(!cgroup_pidlist_destroy_wq);
return 0;
}
core_initcall(cgroup_wq_init);
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
* - No need to task_lock(tsk) on this tsk->cgroup reference, as it
* doesn't really matter if tsk->cgroup changes after we read it,
* and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
* anyway. No need to check that tsk->cgroup != NULL, thanks to
* the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
* cgroup to top_cgroup.
*/
/* TODO: Use a proper seq_file iterator */
int proc_cgroup_show(struct seq_file *m, void *v)
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
int retval;
struct cgroupfs_root *root;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto out;
retval = -ESRCH;
pid = m->private;
tsk = get_pid_task(pid, PIDTYPE_PID);
if (!tsk)
goto out_free;
retval = 0;
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
struct cgroup *cgrp;
seq_printf(m, "%d:", root->hierarchy_id);
for_each_root_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
seq_putc(m, '\n');
}
out_unlock:
mutex_unlock(&cgroup_mutex);
put_task_struct(tsk);
out_free:
kfree(buf);
out:
return retval;
}
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
* subsys/hierarchy state.
*/
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->name, ss->root->hierarchy_id,
ss->root->number_of_cgroups, !ss->disabled);
mutex_unlock(&cgroup_mutex);
return 0;
}
static int cgroupstats_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_cgroupstats_show, NULL);
static const struct file_operations proc_cgroupstats_operations = {
.open = cgroupstats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* cgroup_fork - attach newly forked task to its parents cgroup.
* @child: pointer to task_struct of forking parent process.
*
* Description: A task inherits its parent's cgroup at fork().
*
* A pointer to the shared css_set was automatically copied in
* fork.c by dup_task_struct(). However, we ignore that copy, since
* it was not made under the protection of RCU or cgroup_mutex, so
* might no longer be a valid cgroup pointer. cgroup_attach_task() might
* have already changed current->cgroups, allowing the previously
* referenced cgroup group to be removed and freed.
*
* At the point that cgroup_fork() is called, 'current' is the parent
* task, and the passed argument 'child' points to the child task.
*/
void cgroup_fork(struct task_struct *child)
{
task_lock(current);
get_css_set(task_css_set(current));
child->cgroups = current->cgroups;
task_unlock(current);
INIT_LIST_HEAD(&child->cg_list);
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
*
Tejun Heo
committed
* Adds the task to the list running through its css_set if necessary and
* call the subsystem fork() callbacks. Has to be after the task is
* visible on the task list in case we race with the first call to
* cgroup_task_iter_start() - to guarantee that the new task ends up on its
Tejun Heo
committed
* list.
void cgroup_post_fork(struct task_struct *child)
{
Tejun Heo
committed
int i;
Frederic Weisbecker
committed
/*
* use_task_css_set_links is set to 1 before we walk the tasklist
* under the tasklist_lock and we read it here after we added the child
* to the tasklist under the tasklist_lock as well. If the child wasn't
* yet in the tasklist when we walked through it from
* cgroup_enable_task_cg_lists(), then use_task_css_set_links value
* should be visible now due to the paired locking and barriers implied
* by LOCK/UNLOCK: it is written before the tasklist_lock unlock
* in cgroup_enable_task_cg_lists() and read here after the tasklist_lock
* lock on fork.
*/
if (use_task_css_set_links) {
write_lock(&css_set_lock);
task_lock(child);
if (list_empty(&child->cg_list))
list_add(&child->cg_list, &task_css_set(child)->tasks);
task_unlock(child);
write_unlock(&css_set_lock);
}