Newer
Older
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup *cgrp = css->cgroup;
if (css->parent)
css_put(css->parent);
css->ss->css_free(css);
cgroup_dput(cgrp);
static void css_free_rcu_fn(struct rcu_head *rcu_head)
{
struct cgroup_subsys_state *css =
container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
/*
* css holds an extra ref to @cgrp->dentry which is put on the last
* css_put(). dput() requires process context which we don't have.
*/
INIT_WORK(&css->destroy_work, css_free_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
static void css_release(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
call_rcu(&css->rcu_head, css_free_rcu_fn);
static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
struct cgroup *cgrp)
css->cgroup = cgrp;
css->parent = cgroup_css(cgrp->parent, ss);
css->flags |= CSS_ROOT;
BUG_ON(cgroup_css(cgrp, ss));
/* invoke ->css_online() on a new CSS and mark it online if successful */
static int online_css(struct cgroup_subsys_state *css)
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
if (ss->css_online)
ret = ss->css_online(css);
css->flags |= CSS_ONLINE;
rcu_assign_pointer(css->cgroup->subsys[ss->subsys_id], css);
}
/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
static void offline_css(struct cgroup_subsys_state *css)
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
if (!(css->flags & CSS_ONLINE))
return;
ss->css_offline(css);
css->flags &= ~CSS_ONLINE;
css->cgroup->nr_css--;
RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
/**
* create_css - create a cgroup_subsys_state
* @cgrp: the cgroup new css will be associated with
* @ss: the subsys of new css
*
* Create a new css associated with @cgrp - @ss pair. On success, the new
* css is online and installed in @cgrp with all interface files created.
* Returns 0 on success, -errno on failure.
*/
static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
{
struct cgroup *parent = cgrp->parent;
struct cgroup_subsys_state *css;
int err;
lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
css = ss->css_alloc(cgroup_css(parent, ss));
if (IS_ERR(css))
return PTR_ERR(css);
err = percpu_ref_init(&css->refcnt, css_release);
if (err)
goto err_free;
init_css(css, ss, cgrp);
err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id);
if (err)
goto err_free;
err = online_css(css);
if (err)
goto err_free;
dget(cgrp->dentry);
css_get(css->parent);
if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
parent->parent) {
pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
current->comm, current->pid, ss->name);
if (!strcmp(ss->name, "memory"))
pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
ss->warned_broken_hierarchy = true;
}
return 0;
err_free:
percpu_ref_cancel_init(&css->refcnt);
ss->css_free(css);
return err;
}
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
* @dentry: dentry of the new cgroup
* @mode: mode to set on new inode
* Must be called with the mutex on the parent inode held
*/
static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
struct cgroup *cgrp;
struct cgroupfs_root *root = parent->root;
int err = 0;
struct cgroup_subsys *ss;
struct super_block *sb = root->sb;
/* allocate the cgroup and its ID, 0 is reserved for the root */
cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
if (!cgrp)
name = cgroup_alloc_name(dentry);
if (!name)
goto err_free_cgrp;
rcu_assign_pointer(cgrp->name, name);
/*
* Temporarily set the pointer to NULL, so idr_find() won't return
* a half-baked cgroup.
*/
cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
/*
* Only live parents can have children. Note that the liveliness
* check isn't strictly necessary because cgroup_mkdir() and
* cgroup_rmdir() are fully synchronized by i_mutex; however, do it
* anyway so that locking is contained inside cgroup proper and we
* don't get nasty surprises if we ever grow another caller.
*/
if (!cgroup_lock_live_group(parent)) {
err = -ENODEV;
/* Grab a reference on the superblock so the hierarchy doesn't
* get deleted on unmount if there are child cgroups. This
* can be done outside cgroup_mutex, since the sb can't
* disappear while someone has an open control file on the
* fs */
atomic_inc(&sb->s_active);
init_cgroup_housekeeping(cgrp);
dentry->d_fsdata = cgrp;
cgrp->dentry = dentry;
cgrp->parent = parent;
cgrp->dummy_css.parent = &parent->dummy_css;
cgrp->root = parent->root;
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
/*
* Create directory. cgroup_create_file() returns with the new
* directory locked on success so that it can be populated without
* dropping cgroup_mutex.
*/
err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
lockdep_assert_held(&dentry->d_inode->i_mutex);
cgrp->serial_nr = cgroup_serial_nr_next++;
/* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
root->number_of_cgroups++;
/* hold a ref to the parent's dentry */
dget(parent->dentry);
/*
* @cgrp is now fully operational. If something fails after this
* point, it'll be released via the normal destruction path.
*/
idr_replace(&root->cgroup_idr, cgrp, cgrp->id);
err = cgroup_addrm_files(cgrp, cgroup_base_files, true);
if (err)
goto err_destroy;
/* let's create and online css's */
for_each_root_subsys(root, ss) {
err = create_css(cgrp, ss);
if (err)
goto err_destroy;
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
idr_remove(&root->cgroup_idr, cgrp->id);
err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
err_destroy:
cgroup_destroy_locked(cgrp);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&dentry->d_inode->i_mutex);
return err;
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct cgroup *c_parent = dentry->d_parent->d_fsdata;
/* the vfs holds inode->i_mutex already */
return cgroup_create(c_parent, dentry, mode | S_IFDIR);
}
/*
* This is called when the refcnt of a css is confirmed to be killed.
* css_tryget() is now guaranteed to fail.
*/
static void css_killed_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup *cgrp = css->cgroup;
mutex_lock(&cgroup_mutex);
/*
* css_tryget() is guaranteed to fail now. Tell subsystems to
* initate destruction.
*/
offline_css(css);
/*
* If @cgrp is marked dead, it's waiting for refs of all css's to
* be disabled before proceeding to the second phase of cgroup
* destruction. If we are the last one, kick it off.
*/
if (!cgrp->nr_css && cgroup_is_dead(cgrp))
cgroup_destroy_css_killed(cgrp);
mutex_unlock(&cgroup_mutex);
/*
* Put the css refs from kill_css(). Each css holds an extra
* reference to the cgroup's dentry and cgroup removal proceeds
* regardless of css refs. On the last put of each css, whenever
* that may be, the extra dentry ref is put so that dentry
* destruction happens only after all css's are released.
*/
css_put(css);
/* css kill confirmation processing requires process context, bounce */
static void css_killed_ref_fn(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_killed_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
/**
* kill_css - destroy a css
* @css: css to destroy
*
* This function initiates destruction of @css by removing cgroup interface
* files and putting its base reference. ->css_offline() will be invoked
* asynchronously once css_tryget() is guaranteed to fail and when the
* reference count reaches zero, @css will be released.
*/
static void kill_css(struct cgroup_subsys_state *css)
{
cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id);
/*
* Killing would put the base ref, but we need to keep it alive
* until after ->css_offline().
*/
css_get(css);
/*
* cgroup core guarantees that, by the time ->css_offline() is
* invoked, no new css reference will be given out via
* css_tryget(). We can't simply call percpu_ref_kill() and
* proceed to offlining css's because percpu_ref_kill() doesn't
* guarantee that the ref is seen as killed on all CPUs on return.
*
* Use percpu_ref_kill_and_confirm() to get notifications as each
* css is confirmed to be seen as killed on all CPUs.
*/
percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
}
/**
* cgroup_destroy_locked - the first stage of cgroup destruction
* @cgrp: cgroup to be destroyed
*
* css's make use of percpu refcnts whose killing latency shouldn't be
* exposed to userland and are RCU protected. Also, cgroup core needs to
* guarantee that css_tryget() won't succeed by the time ->css_offline() is
* invoked. To satisfy all the requirements, destruction is implemented in
* the following two steps.
*
* s1. Verify @cgrp can be destroyed and mark it dying. Remove all
* userland visible parts and start killing the percpu refcnts of
* css's. Set up so that the next stage will be kicked off once all
* the percpu refcnts are confirmed to be killed.
*
* s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
* rest of destruction. Once all cgroup references are gone, the
* cgroup is RCU-freed.
*
* This function implements s1. After this step, @cgrp is gone as far as
* the userland is concerned and a new cgroup with the same name may be
* created. As cgroup doesn't care about the names internally, this
* doesn't cause any problem.
*/
static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
struct dentry *d = cgrp->dentry;
lockdep_assert_held(&d->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
* css_set_lock synchronizes access to ->cset_links and prevents
* @cgrp from being removed while __put_css_set() is in progress.
*/
read_lock(&css_set_lock);
empty = list_empty(&cgrp->cset_links);
read_unlock(&css_set_lock);
if (!empty)
/*
* Make sure there's no live children. We can't test ->children
* emptiness as dead children linger on it while being destroyed;
* otherwise, "rmdir parent/child parent" may fail with -EBUSY.
*/
empty = true;
rcu_read_lock();
list_for_each_entry_rcu(child, &cgrp->children, sibling) {
empty = cgroup_is_dead(child);
if (!empty)
break;
}
rcu_read_unlock();
if (!empty)
return -EBUSY;
* Initiate massacre of all css's. cgroup_destroy_css_killed()
* will be invoked to perform the rest of destruction once the
* percpu refs of all css's are confirmed to be killed.
for_each_css(css, ssid, cgrp)
kill_css(css);
/*
* Mark @cgrp dead. This prevents further task migration and child
* creation by disabling cgroup_lock_live_group(). Note that
Tejun Heo
committed
* CGRP_DEAD assertion is depended upon by css_next_child() to
* resume iteration after dropping RCU read lock. See
Tejun Heo
committed
* css_next_child() for details.
/* CGRP_DEAD is set, remove from ->release_list for the last time */
raw_spin_lock(&release_list_lock);
if (!list_empty(&cgrp->release_list))
list_del_init(&cgrp->release_list);
raw_spin_unlock(&release_list_lock);
/*
* If @cgrp has css's attached, the second stage of cgroup
* destruction is kicked off from css_killed_work_fn() after the
* refs of all attached css's are killed. If @cgrp doesn't have
* any css, we kick it off here.
*/
if (!cgrp->nr_css)
cgroup_destroy_css_killed(cgrp);
* Clear the base files and remove @cgrp directory. The removal
* puts the base ref but we aren't quite done with @cgrp yet, so
* hold onto it.
cgroup_addrm_files(cgrp, cgroup_base_files, false);
dget(d);
cgroup_d_remove_dir(d);
* cgroup_destroy_css_killed - the second step of cgroup destruction
* @work: cgroup->destroy_free_work
*
* This function is invoked from a work item for a cgroup which is being
* destroyed after all css's are offlined and performs the rest of
* destruction. This is the second step of destruction described in the
* comment above cgroup_destroy_locked().
static void cgroup_destroy_css_killed(struct cgroup *cgrp)
{
struct cgroup *parent = cgrp->parent;
struct dentry *d = cgrp->dentry;
lockdep_assert_held(&cgroup_mutex);
/* delete this cgroup from parent->children */
* We should remove the cgroup object from idr before its grace
* period starts, so we won't be looking up a cgroup while the
* cgroup is being freed.
idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
int ret;
mutex_lock(&cgroup_mutex);
ret = cgroup_destroy_locked(dentry->d_fsdata);
mutex_unlock(&cgroup_mutex);
return ret;
}
static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
{
INIT_LIST_HEAD(&ss->cftsets);
/*
* base_cftset is embedded in subsys itself, no need to worry about
* deregistration.
*/
if (ss->base_cftypes) {
struct cftype *cft;
for (cft = ss->base_cftypes; cft->name[0] != '\0'; cft++)
cft->ss = ss;
ss->base_cftset.cfts = ss->base_cftypes;
list_add_tail(&ss->base_cftset.node, &ss->cftsets);
}
}
static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex);
/* init base cftset */
cgroup_init_cftsets(ss);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_css(css, ss, cgroup_dummy_top);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* newly registered, all tasks and hence the
* init_css_set is in the subsystem's top cgroup. */
init_css_set.subsys[ss->subsys_id] = css;
need_forkexit_callback |= ss->fork || ss->exit;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
BUG_ON(online_css(css));
mutex_unlock(&cgroup_mutex);
/* this function shouldn't be used with modular subsystems, since they
* need to register a subsys_id, among other things */
BUG_ON(ss->module);
}
/**
* cgroup_load_subsys: load and register a modular subsystem at runtime
* @ss: the subsystem to load
*
* This function should be called in a modular subsystem's initcall. If the
* subsystem is built as a module, it will be assigned a new subsys_id and set
* up for use. If the subsystem is built-in anyway, work is delegated to the
* simpler cgroup_init_subsys.
*/
int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
struct css_set *cset;
/* check name and function validity */
if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
Tejun Heo
committed
ss->css_alloc == NULL || ss->css_free == NULL)
return -EINVAL;
/*
* we don't support callbacks in modular subsystems. this check is
* before the ss->module check for consistency; a subsystem that could
* be a module should still have no callbacks even if the user isn't
* compiling it as one.
*/
if (ss->fork || ss->exit)
return -EINVAL;
/*
* an optionally modular subsystem is built-in: we want to do nothing,
* since cgroup_init_subsys will have already taken care of it.
*/
if (ss->module == NULL) {
BUG_ON(cgroup_subsys[ss->subsys_id] != ss);
/* init base cftset */
cgroup_init_cftsets(ss);
mutex_lock(&cgroup_root_mutex);
cgroup_subsys[ss->subsys_id] = ss;
Tejun Heo
committed
* no ss->css_alloc seems to need anything important in the ss
* struct, so this can happen first (i.e. before the dummy root
Tejun Heo
committed
* attachment).
css = ss->css_alloc(cgroup_css(cgroup_dummy_top, ss));
/* failure case - need to deassign the cgroup_subsys[] slot. */
cgroup_subsys[ss->subsys_id] = NULL;
mutex_unlock(&cgroup_mutex);
return PTR_ERR(css);
}
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
/* our new subsystem will be attached to the dummy hierarchy. */
init_css(css, ss, cgroup_dummy_top);
/*
* Now we need to entangle the css into the existing css_sets. unlike
* in cgroup_init_subsys, there are now multiple css_sets, so each one
* will need a new pointer to it; done by iterating the css_set_table.
* furthermore, modifying the existing css_sets will corrupt the hash
* table state, so each changed css_set will need its hash recomputed.
* this is all done under the css_set_lock.
*/
write_lock(&css_set_lock);
hash_for_each_safe(css_set_table, i, tmp, cset, hlist) {
/* skip entries that we already rehashed */
if (cset->subsys[ss->subsys_id])
continue;
/* remove existing entry */
hash_del(&cset->hlist);
cset->subsys[ss->subsys_id] = css;
/* recompute hash and restore entry */
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
}
write_unlock(&css_set_lock);
ret = online_css(css);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
return 0;
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
/* @ss can't be mounted here as try_module_get() would fail */
cgroup_unload_subsys(ss);
return ret;
EXPORT_SYMBOL_GPL(cgroup_load_subsys);
/**
* cgroup_unload_subsys: unload a modular subsystem
* @ss: the subsystem to unload
*
* This function should be called in a modular subsystem's exitcall. When this
* function is invoked, the refcount on the subsystem's module will be 0, so
* the subsystem will not be attached to any hierarchy.
*/
void cgroup_unload_subsys(struct cgroup_subsys *ss)
{
struct cgrp_cset_link *link;
BUG_ON(ss->module == NULL);
/*
* we shouldn't be called if the subsystem is in use, and the use of
* try_module_get() in rebind_subsystems() should ensure that it
* doesn't start being used while we're killing it off.
*/
BUG_ON(ss->root != &cgroup_dummy_root);
mutex_lock(&cgroup_root_mutex);
Tejun Heo
committed
offline_css(cgroup_css(cgroup_dummy_top, ss));
Tejun Heo
committed
cgroup_subsys[ss->subsys_id] = NULL;
/* remove subsystem from the dummy root's list of subsystems */
list_del_init(&ss->sibling);
* disentangle the css from all css_sets attached to the dummy
* top. as in loading, we need to pay our respects to the hashtable
* gods.
list_for_each_entry(link, &cgroup_dummy_top->cset_links, cset_link) {
struct css_set *cset = link->cset;
hash_del(&cset->hlist);
cset->subsys[ss->subsys_id] = NULL;
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
}
write_unlock(&css_set_lock);
/*
* remove subsystem's css from the cgroup_dummy_top and free it -
* need to free before marking as null because ss->css_free needs
ss->css_free(cgroup_css(cgroup_dummy_top, ss));
RCU_INIT_POINTER(cgroup_dummy_top->subsys[ss->subsys_id], NULL);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
}
EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cgrp_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&cgroup_dummy_root);
cgroup_root_count = 1;
RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
init_cgrp_cset_link.cset = &init_css_set;
init_cgrp_cset_link.cgrp = cgroup_dummy_top;
list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
/* at bootup time, we don't worry about modular subsystems */
for_each_builtin_subsys(ss, i) {
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
Tejun Heo
committed
BUG_ON(!ss->css_alloc);
BUG_ON(!ss->css_free);
if (ss->subsys_id != i) {
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
return 0;
}
/**
* cgroup_init - cgroup initialization
*
* Register cgroup filesystem and /proc file, and initialize
* any subsystems that didn't request early init.
*/
int __init cgroup_init(void)
{
err = bdi_init(&cgroup_backing_dev_info);
if (err)
return err;
for_each_builtin_subsys(ss, i) {
if (!ss->early_init)
cgroup_init_subsys(ss);
}
/* allocate id for the dummy hierarchy */
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
/* Add init_css_set to the hash table */
key = css_set_hash(init_css_set.subsys);
hash_add(css_set_table, &init_css_set.hlist, key);
BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
err = idr_alloc(&cgroup_dummy_root.cgroup_idr, cgroup_dummy_top,
0, 1, GFP_KERNEL);
BUG_ON(err < 0);
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
if (!cgroup_kobj) {
err = -ENOMEM;
goto out;
}
err = register_filesystem(&cgroup_fs_type);
if (err < 0) {
kobject_put(cgroup_kobj);
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
if (err)
bdi_destroy(&cgroup_backing_dev_info);
static int __init cgroup_wq_init(void)
{
/*
* There isn't much point in executing destruction path in
* parallel. Good chunk is serialized with cgroup_mutex anyway.
* Use 1 for @max_active.
*
* We would prefer to do this in cgroup_init() above, but that
* is called before init_workqueues(): so leave this until after.
*/
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
BUG_ON(!cgroup_destroy_wq);
/*
* Used to destroy pidlists and separate to serve as flush domain.
* Cap @max_active to 1 too.
*/
cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
0, 1);
BUG_ON(!cgroup_pidlist_destroy_wq);
return 0;
}
core_initcall(cgroup_wq_init);
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
* - No need to task_lock(tsk) on this tsk->cgroup reference, as it
* doesn't really matter if tsk->cgroup changes after we read it,
* and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
* anyway. No need to check that tsk->cgroup != NULL, thanks to
* the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
* cgroup to top_cgroup.
*/
/* TODO: Use a proper seq_file iterator */
int proc_cgroup_show(struct seq_file *m, void *v)
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
int retval;
struct cgroupfs_root *root;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto out;
retval = -ESRCH;
pid = m->private;
tsk = get_pid_task(pid, PIDTYPE_PID);
if (!tsk)
goto out_free;
retval = 0;
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
struct cgroup *cgrp;
seq_printf(m, "%d:", root->hierarchy_id);
for_each_root_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
seq_putc(m, '\n');
}
out_unlock:
mutex_unlock(&cgroup_mutex);
put_task_struct(tsk);
out_free:
kfree(buf);
out:
return retval;
}
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
* subsys/hierarchy state.
*/
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->name, ss->root->hierarchy_id,
ss->root->number_of_cgroups, !ss->disabled);
mutex_unlock(&cgroup_mutex);
return 0;
}
static int cgroupstats_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_cgroupstats_show, NULL);
static const struct file_operations proc_cgroupstats_operations = {
.open = cgroupstats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* cgroup_fork - attach newly forked task to its parents cgroup.
* @child: pointer to task_struct of forking parent process.
*
* Description: A task inherits its parent's cgroup at fork().
*
* A pointer to the shared css_set was automatically copied in
* fork.c by dup_task_struct(). However, we ignore that copy, since
* it was not made under the protection of RCU or cgroup_mutex, so
* might no longer be a valid cgroup pointer. cgroup_attach_task() might
* have already changed current->cgroups, allowing the previously
* referenced cgroup group to be removed and freed.
*
* At the point that cgroup_fork() is called, 'current' is the parent
* task, and the passed argument 'child' points to the child task.
*/
void cgroup_fork(struct task_struct *child)
{
task_lock(current);
get_css_set(task_css_set(current));
child->cgroups = current->cgroups;
task_unlock(current);
INIT_LIST_HEAD(&child->cg_list);
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
*
Tejun Heo
committed
* Adds the task to the list running through its css_set if necessary and
* call the subsystem fork() callbacks. Has to be after the task is
* visible on the task list in case we race with the first call to
* cgroup_task_iter_start() - to guarantee that the new task ends up on its
Tejun Heo
committed
* list.
void cgroup_post_fork(struct task_struct *child)