Newer
Older
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
goto fail;
}
ret = event->cft->register_event(cgrp, event->cft,
event->eventfd, buffer);
if (ret)
goto fail;
efile->f_op->poll(efile, &event->pt);
/*
* Events should be removed after rmdir of cgroup directory, but before
* destroying subsystem state objects. Let's take reference to cgroup
* directory dentry to do that.
*/
dget(cgrp->dentry);
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
spin_lock(&cgrp->event_list_lock);
list_add(&event->list, &cgrp->event_list);
spin_unlock(&cgrp->event_list_lock);
fput(cfile);
fput(efile);
return 0;
fail:
if (cfile)
fput(cfile);
if (event && event->eventfd && !IS_ERR(event->eventfd))
eventfd_ctx_put(event->eventfd);
if (!IS_ERR_OR_NULL(efile))
fput(efile);
kfree(event);
return ret;
}
static u64 cgroup_clone_children_read(struct cgroup *cgrp,
struct cftype *cft)
{
return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
}
static int cgroup_clone_children_write(struct cgroup *cgrp,
struct cftype *cft,
u64 val)
{
if (val)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
static struct cftype cgroup_base_files[] = {
.name = "cgroup.procs",
Ben Blum
committed
.open = cgroup_procs_open,
Ben Blum
committed
.release = cgroup_pidlist_release,
Ben Blum
committed
},
.name = "cgroup.event_control",
.write_string = cgroup_write_event_control,
.mode = S_IWUGO,
},
{
.name = "cgroup.clone_children",
.read_u64 = cgroup_clone_children_read,
.write_u64 = cgroup_clone_children_write,
},
{
.name = "cgroup.sane_behavior",
.flags = CFTYPE_ONLY_ON_ROOT,
.read_seq_string = cgroup_sane_behavior_show,
},
/*
* Historical crazy stuff. These don't have "cgroup." prefix and
* don't exist if sane_behavior. If you're depending on these, be
* prepared to be burned.
*/
{
.name = "tasks",
.flags = CFTYPE_INSANE, /* use "procs" instead */
.open = cgroup_tasks_open,
.write_u64 = cgroup_tasks_write,
.release = cgroup_pidlist_release,
.mode = S_IRUGO | S_IWUSR,
},
{
.name = "notify_on_release",
.flags = CFTYPE_INSANE,
.read_u64 = cgroup_read_notify_on_release,
.write_u64 = cgroup_write_notify_on_release,
},
{
.name = "release_agent",
.flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
.read_seq_string = cgroup_release_agent_show,
.write_string = cgroup_release_agent_write,
.max_write_len = PATH_MAX,
},
Tejun Heo
committed
* cgroup_populate_dir - create subsys files in a cgroup directory
* @cgrp: target cgroup
* @subsys_mask: mask of the subsystem ids whose files should be added
*
* On failure, no file is added.
Tejun Heo
committed
static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
struct cgroup_subsys *ss;
Tejun Heo
committed
int i, ret = 0;
/* process cftsets of each subsystem */
Tejun Heo
committed
for_each_subsys(ss, i) {
Tejun Heo
committed
if (!test_bit(i, &subsys_mask))
list_for_each_entry(set, &ss->cftsets, node) {
ret = cgroup_addrm_files(cgrp, ss, set->cfts, true);
if (ret < 0)
goto err;
}
for_each_root_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
struct css_id *id = rcu_dereference_protected(css->id, true);
/*
* Update id->css pointer and make this css visible from
* CSS ID functions. This pointer will be dereferened
* from RCU-read-side without locks.
*/
if (id)
rcu_assign_pointer(id->css, css);
err:
cgroup_clear_dir(cgrp, subsys_mask);
return ret;
static void css_dput_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, dput_work);
static void css_release(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
schedule_work(&css->dput_work);
}
static void init_cgroup_css(struct cgroup_subsys_state *css,
struct cgroup_subsys *ss,
struct cgroup *cgrp)
css->cgroup = cgrp;
if (cgrp == cgroup_dummy_top)
css->flags |= CSS_ROOT;
BUG_ON(cgrp->subsys[ss->subsys_id]);
cgrp->subsys[ss->subsys_id] = css;
* css holds an extra ref to @cgrp->dentry which is put on the last
* css_put(). dput() requires process context, which css_put() may
* be called without. @css->dput_work will be used to invoke
* dput() asynchronously from css_put().
*/
INIT_WORK(&css->dput_work, css_dput_fn);
/* invoke ->css_online() on a new CSS and mark it online if successful */
static int online_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
if (ss->css_online)
ret = ss->css_online(cgrp);
if (!ret)
cgrp->subsys[ss->subsys_id]->flags |= CSS_ONLINE;
return ret;
/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
lockdep_assert_held(&cgroup_mutex);
if (!(css->flags & CSS_ONLINE))
return;
Tejun Heo
committed
ss->css_offline(cgrp);
cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE;
}
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
* @dentry: dentry of the new cgroup
* @mode: mode to set on new inode
* Must be called with the mutex on the parent inode held
*/
static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
struct cgroup *cgrp;
struct cgroupfs_root *root = parent->root;
int err = 0;
struct cgroup_subsys *ss;
struct super_block *sb = root->sb;
/* allocate the cgroup and its ID, 0 is reserved for the root */
cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
if (!cgrp)
name = cgroup_alloc_name(dentry);
if (!name)
goto err_free_cgrp;
rcu_assign_pointer(cgrp->name, name);
cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
if (cgrp->id < 0)
/*
* Only live parents can have children. Note that the liveliness
* check isn't strictly necessary because cgroup_mkdir() and
* cgroup_rmdir() are fully synchronized by i_mutex; however, do it
* anyway so that locking is contained inside cgroup proper and we
* don't get nasty surprises if we ever grow another caller.
*/
if (!cgroup_lock_live_group(parent)) {
err = -ENODEV;
/* Grab a reference on the superblock so the hierarchy doesn't
* get deleted on unmount if there are child cgroups. This
* can be done outside cgroup_mutex, since the sb can't
* disappear while someone has an open control file on the
* fs */
atomic_inc(&sb->s_active);
init_cgroup_housekeeping(cgrp);
dentry->d_fsdata = cgrp;
cgrp->dentry = dentry;
cgrp->parent = parent;
cgrp->root = parent->root;
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
for_each_root_subsys(root, ss) {
Tejun Heo
committed
struct cgroup_subsys_state *css;
Tejun Heo
committed
css = ss->css_alloc(cgrp);
if (IS_ERR(css)) {
err = PTR_ERR(css);
err = percpu_ref_init(&css->refcnt, css_release);
if (err)
goto err_free_all;
init_cgroup_css(css, ss, cgrp);
if (ss->use_id) {
err = alloc_css_id(ss, parent, cgrp);
if (err)
/*
* Create directory. cgroup_create_file() returns with the new
* directory locked on success so that it can be populated without
* dropping cgroup_mutex.
*/
err = cgroup_create_file(dentry, S_IFDIR | mode, sb);
lockdep_assert_held(&dentry->d_inode->i_mutex);
cgrp->serial_nr = cgroup_serial_nr_next++;
/* allocation complete, commit to creation */
list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
root->number_of_cgroups++;
/* each css holds a ref to the cgroup's dentry */
for_each_root_subsys(root, ss)
/* hold a ref to the parent's dentry */
dget(parent->dentry);
/* creation succeeded, notify subsystems */
for_each_root_subsys(root, ss) {
err = online_css(ss, cgrp);
if (err)
goto err_destroy;
if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
parent->parent) {
pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
current->comm, current->pid, ss->name);
if (!strcmp(ss->name, "memory"))
pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n");
ss->warned_broken_hierarchy = true;
}
Tejun Heo
committed
err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
if (err)
goto err_destroy;
err = cgroup_populate_dir(cgrp, root->subsys_mask);
if (err)
goto err_destroy;
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
for_each_root_subsys(root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
if (css) {
percpu_ref_cancel_init(&css->refcnt);
Tejun Heo
committed
ss->css_free(cgrp);
}
mutex_unlock(&cgroup_mutex);
/* Release the reference count that we took on the superblock */
deactivate_super(sb);
err_free_id:
ida_simple_remove(&root->cgroup_ida, cgrp->id);
err_free_name:
kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
err_destroy:
cgroup_destroy_locked(cgrp);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&dentry->d_inode->i_mutex);
return err;
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct cgroup *c_parent = dentry->d_parent->d_fsdata;
/* the vfs holds inode->i_mutex already */
return cgroup_create(c_parent, dentry, mode | S_IFDIR);
}
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
static void cgroup_css_killed(struct cgroup *cgrp)
{
if (!atomic_dec_and_test(&cgrp->css_kill_cnt))
return;
/* percpu ref's of all css's are killed, kick off the next step */
INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
schedule_work(&cgrp->destroy_work);
}
static void css_ref_killed_fn(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
cgroup_css_killed(css->cgroup);
}
/**
* cgroup_destroy_locked - the first stage of cgroup destruction
* @cgrp: cgroup to be destroyed
*
* css's make use of percpu refcnts whose killing latency shouldn't be
* exposed to userland and are RCU protected. Also, cgroup core needs to
* guarantee that css_tryget() won't succeed by the time ->css_offline() is
* invoked. To satisfy all the requirements, destruction is implemented in
* the following two steps.
*
* s1. Verify @cgrp can be destroyed and mark it dying. Remove all
* userland visible parts and start killing the percpu refcnts of
* css's. Set up so that the next stage will be kicked off once all
* the percpu refcnts are confirmed to be killed.
*
* s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
* rest of destruction. Once all cgroup references are gone, the
* cgroup is RCU-freed.
*
* This function implements s1. After this step, @cgrp is gone as far as
* the userland is concerned and a new cgroup with the same name may be
* created. As cgroup doesn't care about the names internally, this
* doesn't cause any problem.
*/
static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
struct dentry *d = cgrp->dentry;
struct cgroup_event *event, *tmp;
struct cgroup_subsys *ss;
lockdep_assert_held(&d->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
* css_set_lock synchronizes access to ->cset_links and prevents
* @cgrp from being removed while __put_css_set() is in progress.
*/
read_lock(&css_set_lock);
empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children);
read_unlock(&css_set_lock);
if (!empty)
* Block new css_tryget() by killing css refcnts. cgroup core
* guarantees that, by the time ->css_offline() is invoked, no new
* css reference will be given out via css_tryget(). We can't
* simply call percpu_ref_kill() and proceed to offlining css's
* because percpu_ref_kill() doesn't guarantee that the ref is seen
* as killed on all CPUs on return.
*
* Use percpu_ref_kill_and_confirm() to get notifications as each
* css is confirmed to be seen as killed on all CPUs. The
* notification callback keeps track of the number of css's to be
* killed and schedules cgroup_offline_fn() to perform the rest of
* destruction once the percpu refs of all css's are confirmed to
* be killed.
atomic_set(&cgrp->css_kill_cnt, 1);
for_each_root_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
/*
* Killing would put the base ref, but we need to keep it
* alive until after ->css_offline.
*/
percpu_ref_get(&css->refcnt);
atomic_inc(&cgrp->css_kill_cnt);
percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn);
cgroup_css_killed(cgrp);
/*
* Mark @cgrp dead. This prevents further task migration and child
* creation by disabling cgroup_lock_live_group(). Note that
* CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to
* resume iteration after dropping RCU read lock. See
* cgroup_next_sibling() for details.
*/
/* CGRP_DEAD is set, remove from ->release_list for the last time */
raw_spin_lock(&release_list_lock);
if (!list_empty(&cgrp->release_list))
list_del_init(&cgrp->release_list);
raw_spin_unlock(&release_list_lock);
/*
* Clear and remove @cgrp directory. The removal puts the base ref
* but we aren't quite done with @cgrp yet, so hold onto it.
Tejun Heo
committed
cgroup_clear_dir(cgrp, cgrp->root->subsys_mask);
cgroup_addrm_files(cgrp, NULL, cgroup_base_files, false);
dget(d);
cgroup_d_remove_dir(d);
/*
* Unregister events and notify userspace.
* Notify userspace about cgroup removing only after rmdir of cgroup
* directory to avoid race between userspace and kernelspace.
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del_init(&event->list);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);
/**
* cgroup_offline_fn - the second step of cgroup destruction
* @work: cgroup->destroy_free_work
*
* This function is invoked from a work item for a cgroup which is being
* destroyed after the percpu refcnts of all css's are guaranteed to be
* seen as killed on all CPUs, and performs the rest of destruction. This
* is the second step of destruction described in the comment above
* cgroup_destroy_locked().
*/
static void cgroup_offline_fn(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
struct cgroup *parent = cgrp->parent;
struct dentry *d = cgrp->dentry;
struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
/*
* css_tryget() is guaranteed to fail now. Tell subsystems to
* initate destruction.
*/
for_each_root_subsys(cgrp->root, ss)
offline_css(ss, cgrp);
* Put the css refs from cgroup_destroy_locked(). Each css holds
* an extra reference to the cgroup's dentry and cgroup removal
* proceeds regardless of css refs. On the last put of each css,
* whenever that may be, the extra dentry ref is put so that dentry
* destruction happens only after all css's are released.
for_each_root_subsys(cgrp->root, ss)
/* delete this cgroup from parent->children */
set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent);
mutex_unlock(&cgroup_mutex);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
int ret;
mutex_lock(&cgroup_mutex);
ret = cgroup_destroy_locked(dentry->d_fsdata);
mutex_unlock(&cgroup_mutex);
return ret;
}
static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
{
INIT_LIST_HEAD(&ss->cftsets);
/*
* base_cftset is embedded in subsys itself, no need to worry about
* deregistration.
*/
if (ss->base_cftypes) {
ss->base_cftset.cfts = ss->base_cftypes;
list_add_tail(&ss->base_cftset.node, &ss->cftsets);
}
}
static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex);
/* init base cftset */
cgroup_init_cftsets(ss);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
css = ss->css_alloc(cgroup_dummy_top);
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_cgroup_css(css, ss, cgroup_dummy_top);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* newly registered, all tasks and hence the
* init_css_set is in the subsystem's top cgroup. */
init_css_set.subsys[ss->subsys_id] = css;
need_forkexit_callback |= ss->fork || ss->exit;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
BUG_ON(online_css(ss, cgroup_dummy_top));
mutex_unlock(&cgroup_mutex);
/* this function shouldn't be used with modular subsystems, since they
* need to register a subsys_id, among other things */
BUG_ON(ss->module);
}
/**
* cgroup_load_subsys: load and register a modular subsystem at runtime
* @ss: the subsystem to load
*
* This function should be called in a modular subsystem's initcall. If the
* subsystem is built as a module, it will be assigned a new subsys_id and set
* up for use. If the subsystem is built-in anyway, work is delegated to the
* simpler cgroup_init_subsys.
*/
int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
struct css_set *cset;
/* check name and function validity */
if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
Tejun Heo
committed
ss->css_alloc == NULL || ss->css_free == NULL)
return -EINVAL;
/*
* we don't support callbacks in modular subsystems. this check is
* before the ss->module check for consistency; a subsystem that could
* be a module should still have no callbacks even if the user isn't
* compiling it as one.
*/
if (ss->fork || ss->exit)
return -EINVAL;
/*
* an optionally modular subsystem is built-in: we want to do nothing,
* since cgroup_init_subsys will have already taken care of it.
*/
if (ss->module == NULL) {
BUG_ON(cgroup_subsys[ss->subsys_id] != ss);
/* init base cftset */
cgroup_init_cftsets(ss);
cgroup_subsys[ss->subsys_id] = ss;
Tejun Heo
committed
* no ss->css_alloc seems to need anything important in the ss
* struct, so this can happen first (i.e. before the dummy root
Tejun Heo
committed
* attachment).
css = ss->css_alloc(cgroup_dummy_top);
/* failure case - need to deassign the cgroup_subsys[] slot. */
cgroup_subsys[ss->subsys_id] = NULL;
mutex_unlock(&cgroup_mutex);
return PTR_ERR(css);
}
list_add(&ss->sibling, &cgroup_dummy_root.subsys_list);
ss->root = &cgroup_dummy_root;
/* our new subsystem will be attached to the dummy hierarchy. */
init_cgroup_css(css, ss, cgroup_dummy_top);
/* init_idr must be after init_cgroup_css because it sets css->id. */
if (ss->use_id) {
ret = cgroup_init_idr(ss, css);
if (ret)
goto err_unload;
}
/*
* Now we need to entangle the css into the existing css_sets. unlike
* in cgroup_init_subsys, there are now multiple css_sets, so each one
* will need a new pointer to it; done by iterating the css_set_table.
* furthermore, modifying the existing css_sets will corrupt the hash
* table state, so each changed css_set will need its hash recomputed.
* this is all done under the css_set_lock.
*/
write_lock(&css_set_lock);
hash_for_each_safe(css_set_table, i, tmp, cset, hlist) {
/* skip entries that we already rehashed */
if (cset->subsys[ss->subsys_id])
continue;
/* remove existing entry */
hash_del(&cset->hlist);
cset->subsys[ss->subsys_id] = css;
/* recompute hash and restore entry */
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
}
write_unlock(&css_set_lock);
ret = online_css(ss, cgroup_dummy_top);
/* success! */
mutex_unlock(&cgroup_mutex);
return 0;
err_unload:
mutex_unlock(&cgroup_mutex);
/* @ss can't be mounted here as try_module_get() would fail */
cgroup_unload_subsys(ss);
return ret;
EXPORT_SYMBOL_GPL(cgroup_load_subsys);
/**
* cgroup_unload_subsys: unload a modular subsystem
* @ss: the subsystem to unload
*
* This function should be called in a modular subsystem's exitcall. When this
* function is invoked, the refcount on the subsystem's module will be 0, so
* the subsystem will not be attached to any hierarchy.
*/
void cgroup_unload_subsys(struct cgroup_subsys *ss)
{
struct cgrp_cset_link *link;
BUG_ON(ss->module == NULL);
/*
* we shouldn't be called if the subsystem is in use, and the use of
* try_module_get() in rebind_subsystems() should ensure that it
* doesn't start being used while we're killing it off.
*/
BUG_ON(ss->root != &cgroup_dummy_root);
Tejun Heo
committed
offline_css(ss, cgroup_dummy_top);
Tejun Heo
committed
Tejun Heo
committed
idr_destroy(&ss->idr);
cgroup_subsys[ss->subsys_id] = NULL;
/* remove subsystem from the dummy root's list of subsystems */
list_del_init(&ss->sibling);
* disentangle the css from all css_sets attached to the dummy
* top. as in loading, we need to pay our respects to the hashtable
* gods.
list_for_each_entry(link, &cgroup_dummy_top->cset_links, cset_link) {
struct css_set *cset = link->cset;
hash_del(&cset->hlist);
cset->subsys[ss->subsys_id] = NULL;
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
}
write_unlock(&css_set_lock);
/*
* remove subsystem's css from the cgroup_dummy_top and free it -
* need to free before marking as null because ss->css_free needs
* the cgrp->subsys pointer to find their state. note that this
* also takes care of freeing the css_id.
ss->css_free(cgroup_dummy_top);
cgroup_dummy_top->subsys[ss->subsys_id] = NULL;
mutex_unlock(&cgroup_mutex);
}
EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cgrp_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&cgroup_dummy_root);
cgroup_root_count = 1;
RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
init_cgrp_cset_link.cset = &init_css_set;
init_cgrp_cset_link.cgrp = cgroup_dummy_top;
list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links);
list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
/* at bootup time, we don't worry about modular subsystems */
for_each_builtin_subsys(ss, i) {
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
Tejun Heo
committed
BUG_ON(!ss->css_alloc);
BUG_ON(!ss->css_free);
if (ss->subsys_id != i) {
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
return 0;
}
/**
* cgroup_init - cgroup initialization
*
* Register cgroup filesystem and /proc file, and initialize
* any subsystems that didn't request early init.
*/
int __init cgroup_init(void)
{
err = bdi_init(&cgroup_backing_dev_info);
if (err)
return err;
for_each_builtin_subsys(ss, i) {
if (!ss->early_init)
cgroup_init_subsys(ss);
cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
/* allocate id for the dummy hierarchy */
mutex_lock(&cgroup_mutex);
mutex_lock(&cgroup_root_mutex);
/* Add init_css_set to the hash table */
key = css_set_hash(init_css_set.subsys);
hash_add(css_set_table, &init_css_set.hlist, key);
BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1));
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
if (!cgroup_kobj) {
err = -ENOMEM;
goto out;
}
err = register_filesystem(&cgroup_fs_type);
if (err < 0) {
kobject_put(cgroup_kobj);
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
if (err)
bdi_destroy(&cgroup_backing_dev_info);
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
* - No need to task_lock(tsk) on this tsk->cgroup reference, as it
* doesn't really matter if tsk->cgroup changes after we read it,
* and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
* anyway. No need to check that tsk->cgroup != NULL, thanks to
* the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
* cgroup to top_cgroup.
*/
/* TODO: Use a proper seq_file iterator */
int proc_cgroup_show(struct seq_file *m, void *v)
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
int retval;
struct cgroupfs_root *root;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto out;
retval = -ESRCH;
pid = m->private;
tsk = get_pid_task(pid, PIDTYPE_PID);
if (!tsk)
goto out_free;
retval = 0;
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
struct cgroup *cgrp;
seq_printf(m, "%d:", root->hierarchy_id);
for_each_root_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
seq_putc(m, '\n');
}
out_unlock:
mutex_unlock(&cgroup_mutex);
put_task_struct(tsk);
out_free:
kfree(buf);
out:
return retval;
}
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
* subsys/hierarchy state.
*/