Newer
Older
/*
* Unregister events and notify userspace.
* Notify userspace about cgroup removing only after rmdir of cgroup
* directory to avoid race between userspace and kernelspace
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del(&event->list);
remove_wait_queue(event->wqh, &event->wait);
eventfd_signal(event->eventfd, 1);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);
mutex_unlock(&cgroup_mutex);
return 0;
}
static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &rootnode.subsys_list);
css = ss->create(dummytop);
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_cgroup_css(css, ss, dummytop);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* newly registered, all tasks and hence the
* init_css_set is in the subsystem's top cgroup. */
init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
need_forkexit_callback |= ss->fork || ss->exit;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
mutex_init(&ss->hierarchy_mutex);
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
/* this function shouldn't be used with modular subsystems, since they
* need to register a subsys_id, among other things */
BUG_ON(ss->module);
}
/**
* cgroup_load_subsys: load and register a modular subsystem at runtime
* @ss: the subsystem to load
*
* This function should be called in a modular subsystem's initcall. If the
* subsystem is built as a module, it will be assigned a new subsys_id and set
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
* up for use. If the subsystem is built-in anyway, work is delegated to the
* simpler cgroup_init_subsys.
*/
int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
{
int i;
struct cgroup_subsys_state *css;
/* check name and function validity */
if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
ss->create == NULL || ss->destroy == NULL)
return -EINVAL;
/*
* we don't support callbacks in modular subsystems. this check is
* before the ss->module check for consistency; a subsystem that could
* be a module should still have no callbacks even if the user isn't
* compiling it as one.
*/
if (ss->fork || ss->exit)
return -EINVAL;
/*
* an optionally modular subsystem is built-in: we want to do nothing,
* since cgroup_init_subsys will have already taken care of it.
*/
if (ss->module == NULL) {
/* a few sanity checks */
BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
BUG_ON(subsys[ss->subsys_id] != ss);
return 0;
}
/*
* need to register a subsys id before anything else - for example,
* init_cgroup_css needs it.
*/
mutex_lock(&cgroup_mutex);
/* find the first empty slot in the array */
for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
if (subsys[i] == NULL)
break;
}
if (i == CGROUP_SUBSYS_COUNT) {
/* maximum number of subsystems already registered! */
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
/* assign ourselves the subsys_id */
ss->subsys_id = i;
subsys[i] = ss;
/*
* no ss->create seems to need anything important in the ss struct, so
* this can happen first (i.e. before the rootnode attachment).
*/
css = ss->create(dummytop);
if (IS_ERR(css)) {
/* failure case - need to deassign the subsys[] slot. */
subsys[i] = NULL;
mutex_unlock(&cgroup_mutex);
return PTR_ERR(css);
}
list_add(&ss->sibling, &rootnode.subsys_list);
ss->root = &rootnode;
/* our new subsystem will be attached to the dummy hierarchy. */
init_cgroup_css(css, ss, dummytop);
/* init_idr must be after init_cgroup_css because it sets css->id. */
if (ss->use_id) {
int ret = cgroup_init_idr(ss, css);
if (ret) {
dummytop->subsys[ss->subsys_id] = NULL;
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
subsys[i] = NULL;
mutex_unlock(&cgroup_mutex);
return ret;
}
}
/*
* Now we need to entangle the css into the existing css_sets. unlike
* in cgroup_init_subsys, there are now multiple css_sets, so each one
* will need a new pointer to it; done by iterating the css_set_table.
* furthermore, modifying the existing css_sets will corrupt the hash
* table state, so each changed css_set will need its hash recomputed.
* this is all done under the css_set_lock.
*/
write_lock(&css_set_lock);
for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
struct css_set *cg;
struct hlist_node *node, *tmp;
struct hlist_head *bucket = &css_set_table[i], *new_bucket;
hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
/* skip entries that we already rehashed */
if (cg->subsys[ss->subsys_id])
continue;
/* remove existing entry */
hlist_del(&cg->hlist);
/* set new value */
cg->subsys[ss->subsys_id] = css;
/* recompute hash and restore entry */
new_bucket = css_set_hash(cg->subsys);
hlist_add_head(&cg->hlist, new_bucket);
}
}
write_unlock(&css_set_lock);
mutex_init(&ss->hierarchy_mutex);
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
ss->active = 1;
/* success! */
mutex_unlock(&cgroup_mutex);
return 0;
EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
/**
* cgroup_unload_subsys: unload a modular subsystem
* @ss: the subsystem to unload
*
* This function should be called in a modular subsystem's exitcall. When this
* function is invoked, the refcount on the subsystem's module will be 0, so
* the subsystem will not be attached to any hierarchy.
*/
void cgroup_unload_subsys(struct cgroup_subsys *ss)
{
struct cg_cgroup_link *link;
struct hlist_head *hhead;
BUG_ON(ss->module == NULL);
/*
* we shouldn't be called if the subsystem is in use, and the use of
* try_module_get in parse_cgroupfs_options should ensure that it
* doesn't start being used while we're killing it off.
*/
BUG_ON(ss->root != &rootnode);
mutex_lock(&cgroup_mutex);
/* deassign the subsys_id */
BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
subsys[ss->subsys_id] = NULL;
/* remove subsystem from rootnode's list of subsystems */
list_del_init(&ss->sibling);
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
/*
* disentangle the css from all css_sets attached to the dummytop. as
* in loading, we need to pay our respects to the hashtable gods.
*/
write_lock(&css_set_lock);
list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
struct css_set *cg = link->cg;
hlist_del(&cg->hlist);
BUG_ON(!cg->subsys[ss->subsys_id]);
cg->subsys[ss->subsys_id] = NULL;
hhead = css_set_hash(cg->subsys);
hlist_add_head(&cg->hlist, hhead);
}
write_unlock(&css_set_lock);
/*
* remove subsystem's css from the dummytop and free it - need to free
* before marking as null because ss->destroy needs the cgrp->subsys
* pointer to find their state. note that this also takes care of
* freeing the css_id.
*/
dummytop->subsys[ss->subsys_id] = NULL;
mutex_unlock(&cgroup_mutex);
}
EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
int i;
atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cg_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&rootnode);
root_count = 1;
init_task.cgroups = &init_css_set;
init_css_set_link.cg = &init_css_set;
init_css_set_link.cgrp = dummytop;
list_add(&init_css_set_link.cgrp_link_list,
&rootnode.top_cgroup.css_sets);
list_add(&init_css_set_link.cg_link_list,
&init_css_set.cg_links);
for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&css_set_table[i]);
/* at bootup time, we don't worry about modular subsystems */
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
BUG_ON(!ss->create);
BUG_ON(!ss->destroy);
if (ss->subsys_id != i) {
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
return 0;
}
/**
* cgroup_init - cgroup initialization
*
* Register cgroup filesystem and /proc file, and initialize
* any subsystems that didn't request early init.
*/
int __init cgroup_init(void)
{
int err;
int i;
err = bdi_init(&cgroup_backing_dev_info);
if (err)
return err;
/* at bootup time, we don't worry about modular subsystems */
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (!ss->early_init)
cgroup_init_subsys(ss);
cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
/* Add init_css_set to the hash table */
hhead = css_set_hash(init_css_set.subsys);
hlist_add_head(&init_css_set.hlist, hhead);
BUG_ON(!init_root_id(&rootnode));
cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
if (!cgroup_kobj) {
err = -ENOMEM;
goto out;
}
err = register_filesystem(&cgroup_fs_type);
if (err < 0) {
kobject_put(cgroup_kobj);
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
if (err)
bdi_destroy(&cgroup_backing_dev_info);
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
* - No need to task_lock(tsk) on this tsk->cgroup reference, as it
* doesn't really matter if tsk->cgroup changes after we read it,
* and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
* anyway. No need to check that tsk->cgroup != NULL, thanks to
* the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
* cgroup to top_cgroup.
*/
/* TODO: Use a proper seq_file iterator */
static int proc_cgroup_show(struct seq_file *m, void *v)
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
int retval;
struct cgroupfs_root *root;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto out;
retval = -ESRCH;
pid = m->private;
tsk = get_pid_task(pid, PIDTYPE_PID);
if (!tsk)
goto out_free;
retval = 0;
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
struct cgroup *cgrp;
seq_printf(m, "%d:", root->hierarchy_id);
for_each_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
seq_putc(m, '\n');
}
out_unlock:
mutex_unlock(&cgroup_mutex);
put_task_struct(tsk);
out_free:
kfree(buf);
out:
return retval;
}
static int cgroup_open(struct inode *inode, struct file *file)
{
struct pid *pid = PROC_I(inode)->pid;
return single_open(file, proc_cgroup_show, pid);
}
const struct file_operations proc_cgroup_operations = {
.open = cgroup_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
int i;
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
* subsys/hierarchy state.
*/
mutex_lock(&cgroup_mutex);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->name, ss->root->hierarchy_id,
ss->root->number_of_cgroups, !ss->disabled);
}
mutex_unlock(&cgroup_mutex);
return 0;
}
static int cgroupstats_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_cgroupstats_show, NULL);
static const struct file_operations proc_cgroupstats_operations = {
.open = cgroupstats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* cgroup_fork - attach newly forked task to its parents cgroup.
* @child: pointer to task_struct of forking parent process.
*
* Description: A task inherits its parent's cgroup at fork().
*
* A pointer to the shared css_set was automatically copied in
* fork.c by dup_task_struct(). However, we ignore that copy, since
* it was not made under the protection of RCU, cgroup_mutex or
* threadgroup_change_begin(), so it might no longer be a valid
* cgroup pointer. cgroup_attach_task() might have already changed
* current->cgroups, allowing the previously referenced cgroup
* group to be removed and freed.
*
* Outside the pointer validity we also need to process the css_set
* inheritance between threadgoup_change_begin() and
* threadgoup_change_end(), this way there is no leak in any process
* wide migration performed by cgroup_attach_proc() that could otherwise
* miss a thread because it is too early or too late in the fork stage.
*
* At the point that cgroup_fork() is called, 'current' is the parent
* task, and the passed argument 'child' points to the child task.
*/
void cgroup_fork(struct task_struct *child)
{
/*
* We don't need to task_lock() current because current->cgroups
* can't be changed concurrently here. The parent obviously hasn't
* exited and called cgroup_exit(), and we are synchronized against
* cgroup migration through threadgroup_change_begin().
*/
child->cgroups = current->cgroups;
get_css_set(child->cgroups);
INIT_LIST_HEAD(&child->cg_list);
* cgroup_fork_callbacks - run fork callbacks
* @child: the new task
*
* Called on a new task very soon before adding it to the
* tasklist. No need to take any locks since no-one can
* be operating on this task.
*/
void cgroup_fork_callbacks(struct task_struct *child)
{
if (need_forkexit_callback) {
int i;
/*
* forkexit callbacks are only supported for builtin
* subsystems, and the builtin section of the subsys array is
* immutable, so we don't need to lock the subsys array here.
*/
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->fork)
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
*
* Adds the task to the list running through its css_set if necessary.
* Has to be after the task is visible on the task list in case we race
* with the first call to cgroup_iter_start() - to guarantee that the
* new task ends up on its list.
*/
void cgroup_post_fork(struct task_struct *child)
{
if (use_task_css_set_links) {
write_lock(&css_set_lock);
if (list_empty(&child->cg_list)) {
/*
* It's safe to use child->cgroups without task_lock()
* here because we are protected through
* threadgroup_change_begin() against concurrent
* css_set change in cgroup_task_migrate(). Also
* the task can't exit at that point until
* wake_up_new_task() is called, so we are protected
* against cgroup_exit() setting child->cgroup to
* init_css_set.
*/
list_add(&child->cg_list, &child->cgroups->tasks);
write_unlock(&css_set_lock);
}
}
/**
* cgroup_exit - detach cgroup from exiting task
* @tsk: pointer to task_struct of exiting process
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
*
* Description: Detach cgroup from @tsk and release it.
*
* Note that cgroups marked notify_on_release force every task in
* them to take the global cgroup_mutex mutex when exiting.
* This could impact scaling on very large systems. Be reluctant to
* use notify_on_release cgroups where very high task exit scaling
* is required on large systems.
*
* the_top_cgroup_hack:
*
* Set the exiting tasks cgroup to the root cgroup (top_cgroup).
*
* We call cgroup_exit() while the task is still competent to
* handle notify_on_release(), then leave the task attached to the
* root cgroup in each hierarchy for the remainder of its exit.
*
* To do this properly, we would increment the reference count on
* top_cgroup, and near the very end of the kernel/exit.c do_exit()
* code we would add a second cgroup function call, to drop that
* reference. This would just create an unnecessary hot spot on
* the top_cgroup reference count, to no avail.
*
* Normally, holding a reference to a cgroup without bumping its
* count is unsafe. The cgroup could go away, or someone could
* attach us to a different cgroup, decrementing the count on
* the first cgroup that we never incremented. But in this case,
* top_cgroup isn't going away, and either task has PF_EXITING set,
* which wards off any cgroup_attach_task() attempts, or task is a failed
* fork, never visible to cgroup_attach_task.
*/
void cgroup_exit(struct task_struct *tsk, int run_callbacks)
{
struct css_set *cg;
/*
* Unlink from the css_set task list if necessary.
* Optimistically check cg_list before taking
* css_set_lock
*/
if (!list_empty(&tsk->cg_list)) {
write_lock(&css_set_lock);
if (!list_empty(&tsk->cg_list))
list_del_init(&tsk->cg_list);
write_unlock(&css_set_lock);
}
/* Reassign the task to the init_css_set. */
task_lock(tsk);
cg = tsk->cgroups;
tsk->cgroups = &init_css_set;
if (run_callbacks && need_forkexit_callback) {
/*
* modular subsystems can't use callbacks, so no need to lock
* the subsys array
*/
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->exit) {
struct cgroup *old_cgrp =
rcu_dereference_raw(cg->subsys[i])->cgroup;
struct cgroup *cgrp = task_cgroup(tsk, i);
ss->exit(cgrp, old_cgrp, tsk);
put_css_set_taskexit(cg);
Grzegorz Nosek
committed
* cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
Grzegorz Nosek
committed
* @task: the task in question
Grzegorz Nosek
committed
* See if @cgrp is a descendant of @task's cgroup in the appropriate
* hierarchy.
*
* If we are sending in dummytop, then presumably we are creating
* the top cgroup in the subsystem.
*
* Called only by the ns (nsproxy) cgroup.
*/
Grzegorz Nosek
committed
int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
{
int ret;
struct cgroup *target;
if (cgrp == dummytop)
target = task_cgroup_from_root(task, cgrp->root);
while (cgrp != target && cgrp!= cgrp->top_cgroup)
cgrp = cgrp->parent;
ret = (cgrp == target);
static void check_for_release(struct cgroup *cgrp)
{
/* All of these checks rely on RCU to keep the cgroup
* structure alive */
if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
&& list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
/* Control Group is currently removeable. If it's not
* already queued for a userspace notification, queue
* it now */
int need_schedule_work = 0;
raw_spin_lock(&release_list_lock);
if (!cgroup_is_removed(cgrp) &&
list_empty(&cgrp->release_list)) {
list_add(&cgrp->release_list, &release_list);
need_schedule_work = 1;
}
raw_spin_unlock(&release_list_lock);
if (need_schedule_work)
schedule_work(&release_agent_work);
}
}
/* Caller must verify that the css is not for root cgroup */
void __css_put(struct cgroup_subsys_state *css, int count)
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
val = atomic_sub_return(count, &css->refcnt);
if (notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
cgroup_wakeup_rmdir_waiter(cgrp);
}
rcu_read_unlock();
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
/*
* Notify userspace when a cgroup is released, by running the
* configured release agent with the name of the cgroup (path
* relative to the root of cgroup file system) as the argument.
*
* Most likely, this user command will try to rmdir this cgroup.
*
* This races with the possibility that some other task will be
* attached to this cgroup before it is removed, or that some other
* user task will 'mkdir' a child cgroup of this cgroup. That's ok.
* The presumed 'rmdir' will fail quietly if this cgroup is no longer
* unused, and this cgroup will be reprieved from its death sentence,
* to continue to serve a useful existence. Next time it's released,
* we will get notified again, if it still has 'notify_on_release' set.
*
* The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
* means only wait until the task is successfully execve()'d. The
* separate release agent task is forked by call_usermodehelper(),
* then control in this thread returns here, without waiting for the
* release agent task. We don't bother to wait because the caller of
* this routine has no use for the exit status of the release agent
* task, so no sense holding our caller up for that.
*/
static void cgroup_release_agent(struct work_struct *work)
{
BUG_ON(work != &release_agent_work);
mutex_lock(&cgroup_mutex);
raw_spin_lock(&release_list_lock);
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
char *pathbuf = NULL, *agentbuf = NULL;
struct cgroup *cgrp = list_entry(release_list.next,
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
raw_spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!pathbuf)
goto continue_free;
if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
goto continue_free;
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
if (!agentbuf)
goto continue_free;
i = 0;
argv[i++] = agentbuf;
argv[i++] = pathbuf;
argv[i] = NULL;
i = 0;
/* minimal command environment */
envp[i++] = "HOME=/";
envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[i] = NULL;
/* Drop the lock while we invoke the usermode helper,
* since the exec could involve hitting disk and hence
* be a slow process */
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
mutex_lock(&cgroup_mutex);
continue_free:
kfree(pathbuf);
kfree(agentbuf);
raw_spin_lock(&release_list_lock);
raw_spin_unlock(&release_list_lock);
mutex_unlock(&cgroup_mutex);
}
static int __init cgroup_disable(char *str)
{
int i;
char *token;
while ((token = strsep(&str, ",")) != NULL) {
if (!*token)
continue;
/*
* cgroup_disable, being at boot time, can't know about module
* subsystems, so we don't worry about them.
*/
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (!strcmp(token, ss->name)) {
ss->disabled = 1;
printk(KERN_INFO "Disabling %s control group"
" subsystem\n", ss->name);
break;
}
}
}
return 1;
}
__setup("cgroup_disable=", cgroup_disable);
/*
* Functons for CSS ID.
*/
/*
*To get ID other than 0, this should be called when !cgroup_is_removed().
*/
unsigned short css_id(struct cgroup_subsys_state *css)
{
struct css_id *cssid;
/*
* This css_id() can return correct value when somone has refcnt
* on this or this is under rcu_read_lock(). Once css->id is allocated,
* it's unchanged until freed.
*/
Michal Hocko
committed
cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
if (cssid)
return cssid->id;
return 0;
}
unsigned short css_depth(struct cgroup_subsys_state *css)
{
struct css_id *cssid;
Michal Hocko
committed
cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt));
if (cssid)
return cssid->depth;
return 0;
}
/**
* css_is_ancestor - test "root" css is an ancestor of "child"
* @child: the css to be tested.
* @root: the css supporsed to be an ancestor of the child.
*
* Returns true if "root" is an ancestor of "child" in its hierarchy. Because
* this function reads css->id, this use rcu_dereference() and rcu_read_lock().
* But, considering usual usage, the csses should be valid objects after test.
* Assuming that the caller will do some action to the child if this returns
* returns true, the caller must take "child";s reference count.
* If "child" is valid object and this returns true, "root" is valid, too.
*/
bool css_is_ancestor(struct cgroup_subsys_state *child,
const struct cgroup_subsys_state *root)
struct css_id *child_id;
struct css_id *root_id;
bool ret = true;
rcu_read_lock();
child_id = rcu_dereference(child->id);
root_id = rcu_dereference(root->id);
if (!child_id
|| !root_id
|| (child_id->depth < root_id->depth)
|| (child_id->stack[root_id->depth] != root_id->id))
ret = false;
rcu_read_unlock();
return ret;
}
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
{
struct css_id *id = css->id;
/* When this is called before css_id initialization, id can be NULL */
if (!id)
return;
BUG_ON(!ss->use_id);
rcu_assign_pointer(id->css, NULL);
rcu_assign_pointer(css->id, NULL);
write_unlock(&ss->id_lock);
kfree_rcu(id, rcu_head);
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
/*
* This is called by init or create(). Then, calls to this function are
* always serialized (By cgroup_mutex() at create()).
*/
static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
{
struct css_id *newid;
int myid, error, size;
BUG_ON(!ss->use_id);
size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
newid = kzalloc(size, GFP_KERNEL);
if (!newid)
return ERR_PTR(-ENOMEM);
/* get id */
if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
error = -ENOMEM;
goto err_out;
}
/* Don't use 0. allocates an ID of 1-65535 */
error = idr_get_new_above(&ss->idr, newid, 1, &myid);
write_unlock(&ss->id_lock);
/* Returns error when there are no free spaces for new ID.*/
if (error) {
error = -ENOSPC;
goto err_out;
}
if (myid > CSS_ID_MAX)
goto remove_idr;
newid->id = myid;
newid->depth = depth;
return newid;
remove_idr:
error = -ENOSPC;
write_unlock(&ss->id_lock);
err_out:
kfree(newid);
return ERR_PTR(error);
}
static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
struct cgroup_subsys_state *rootcss)
rwlock_init(&ss->id_lock);
idr_init(&ss->idr);
newid = get_new_cssid(ss, 0);
if (IS_ERR(newid))
return PTR_ERR(newid);
newid->stack[0] = newid->id;
newid->css = rootcss;
rootcss->id = newid;
return 0;
}
static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
struct cgroup *child)
{
int subsys_id, i, depth = 0;
struct cgroup_subsys_state *parent_css, *child_css;
struct css_id *child_id, *parent_id;
subsys_id = ss->subsys_id;
parent_css = parent->subsys[subsys_id];
child_css = child->subsys[subsys_id];
parent_id = parent_css->id;
depth = parent_id->depth + 1;
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
child_id = get_new_cssid(ss, depth);
if (IS_ERR(child_id))
return PTR_ERR(child_id);
for (i = 0; i < depth; i++)
child_id->stack[i] = parent_id->stack[i];
child_id->stack[depth] = child_id->id;
/*
* child_id->css pointer will be set after this cgroup is available
* see cgroup_populate_dir()
*/
rcu_assign_pointer(child_css->id, child_id);
return 0;
}
/**
* css_lookup - lookup css by id
* @ss: cgroup subsys to be looked into.
* @id: the id
*
* Returns pointer to cgroup_subsys_state if there is valid one with id.
* NULL if not. Should be called under rcu_read_lock()
*/
struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
{
struct css_id *cssid = NULL;
BUG_ON(!ss->use_id);
cssid = idr_find(&ss->idr, id);
if (unlikely(!cssid))
return NULL;
return rcu_dereference(cssid->css);
}
/**
* css_get_next - lookup next cgroup under specified hierarchy.
* @ss: pointer to subsystem
* @id: current position of iteration.
* @root: pointer to css. search tree under this.
* @foundid: position of found object.
*
* Search next css under the specified hierarchy of rootid. Calling under
* rcu_read_lock() is necessary. Returns NULL if it reaches the end.