Newer
Older
struct cgroup_subsys_state *css;
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &rootnode.subsys_list);
ss->root = &rootnode;
css = ss->create(ss, dummytop);
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_cgroup_css(css, ss, dummytop);
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* newly registered, all tasks and hence the
* init_css_set is in the subsystem's top cgroup. */
init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
need_forkexit_callback |= ss->fork || ss->exit;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
mutex_init(&ss->hierarchy_mutex);
lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
ss->active = 1;
}
/**
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
int i;
atomic_set(&init_css_set.refcount, 1);
INIT_LIST_HEAD(&init_css_set.cg_links);
INIT_LIST_HEAD(&init_css_set.tasks);
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&rootnode);
root_count = 1;
init_task.cgroups = &init_css_set;
init_css_set_link.cg = &init_css_set;
init_css_set_link.cgrp = dummytop;
list_add(&init_css_set_link.cgrp_link_list,
&rootnode.top_cgroup.css_sets);
list_add(&init_css_set_link.cg_link_list,
&init_css_set.cg_links);
for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&css_set_table[i]);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
BUG_ON(!ss->name);
BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
BUG_ON(!ss->create);
BUG_ON(!ss->destroy);
if (ss->subsys_id != i) {
ss->name, ss->subsys_id);
BUG();
}
if (ss->early_init)
cgroup_init_subsys(ss);
}
return 0;
}
/**
* cgroup_init - cgroup initialization
*
* Register cgroup filesystem and /proc file, and initialize
* any subsystems that didn't request early init.
*/
int __init cgroup_init(void)
{
int err;
int i;
err = bdi_init(&cgroup_backing_dev_info);
if (err)
return err;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (!ss->early_init)
cgroup_init_subsys(ss);
if (ss->use_id)
cgroup_subsys_init_idr(ss);
/* Add init_css_set to the hash table */
hhead = css_set_hash(init_css_set.subsys);
hlist_add_head(&init_css_set.hlist, hhead);
BUG_ON(!init_root_id(&rootnode));
err = register_filesystem(&cgroup_fs_type);
if (err < 0)
goto out;
proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
if (err)
bdi_destroy(&cgroup_backing_dev_info);
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
* - No need to task_lock(tsk) on this tsk->cgroup reference, as it
* doesn't really matter if tsk->cgroup changes after we read it,
* and we take cgroup_mutex, keeping cgroup_attach_task() from changing it
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
* anyway. No need to check that tsk->cgroup != NULL, thanks to
* the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
* cgroup to top_cgroup.
*/
/* TODO: Use a proper seq_file iterator */
static int proc_cgroup_show(struct seq_file *m, void *v)
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
int retval;
struct cgroupfs_root *root;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!buf)
goto out;
retval = -ESRCH;
pid = m->private;
tsk = get_pid_task(pid, PIDTYPE_PID);
if (!tsk)
goto out_free;
retval = 0;
mutex_lock(&cgroup_mutex);
for_each_active_root(root) {
struct cgroup *cgrp;
seq_printf(m, "%d:", root->hierarchy_id);
for_each_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
if (retval < 0)
goto out_unlock;
seq_puts(m, buf);
seq_putc(m, '\n');
}
out_unlock:
mutex_unlock(&cgroup_mutex);
put_task_struct(tsk);
out_free:
kfree(buf);
out:
return retval;
}
static int cgroup_open(struct inode *inode, struct file *file)
{
struct pid *pid = PROC_I(inode)->pid;
return single_open(file, proc_cgroup_show, pid);
}
struct file_operations proc_cgroup_operations = {
.open = cgroup_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
int i;
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
mutex_lock(&cgroup_mutex);
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->name, ss->root->hierarchy_id,
ss->root->number_of_cgroups, !ss->disabled);
}
mutex_unlock(&cgroup_mutex);
return 0;
}
static int cgroupstats_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_cgroupstats_show, NULL);
}
static struct file_operations proc_cgroupstats_operations = {
.open = cgroupstats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
/**
* cgroup_fork - attach newly forked task to its parents cgroup.
* @child: pointer to task_struct of forking parent process.
*
* Description: A task inherits its parent's cgroup at fork().
*
* A pointer to the shared css_set was automatically copied in
* fork.c by dup_task_struct(). However, we ignore that copy, since
* it was not made under the protection of RCU or cgroup_mutex, so
* might no longer be a valid cgroup pointer. cgroup_attach_task() might
* have already changed current->cgroups, allowing the previously
* referenced cgroup group to be removed and freed.
*
* At the point that cgroup_fork() is called, 'current' is the parent
* task, and the passed argument 'child' points to the child task.
*/
void cgroup_fork(struct task_struct *child)
{
task_lock(current);
child->cgroups = current->cgroups;
get_css_set(child->cgroups);
task_unlock(current);
INIT_LIST_HEAD(&child->cg_list);
* cgroup_fork_callbacks - run fork callbacks
* @child: the new task
*
* Called on a new task very soon before adding it to the
* tasklist. No need to take any locks since no-one can
* be operating on this task.
*/
void cgroup_fork_callbacks(struct task_struct *child)
{
if (need_forkexit_callback) {
int i;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->fork)
ss->fork(ss, child);
}
}
}
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
*
* Adds the task to the list running through its css_set if necessary.
* Has to be after the task is visible on the task list in case we race
* with the first call to cgroup_iter_start() - to guarantee that the
* new task ends up on its list.
*/
void cgroup_post_fork(struct task_struct *child)
{
if (use_task_css_set_links) {
write_lock(&css_set_lock);
task_lock(child);
if (list_empty(&child->cg_list))
list_add(&child->cg_list, &child->cgroups->tasks);
task_unlock(child);
write_unlock(&css_set_lock);
}
}
/**
* cgroup_exit - detach cgroup from exiting task
* @tsk: pointer to task_struct of exiting process
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
*
* Description: Detach cgroup from @tsk and release it.
*
* Note that cgroups marked notify_on_release force every task in
* them to take the global cgroup_mutex mutex when exiting.
* This could impact scaling on very large systems. Be reluctant to
* use notify_on_release cgroups where very high task exit scaling
* is required on large systems.
*
* the_top_cgroup_hack:
*
* Set the exiting tasks cgroup to the root cgroup (top_cgroup).
*
* We call cgroup_exit() while the task is still competent to
* handle notify_on_release(), then leave the task attached to the
* root cgroup in each hierarchy for the remainder of its exit.
*
* To do this properly, we would increment the reference count on
* top_cgroup, and near the very end of the kernel/exit.c do_exit()
* code we would add a second cgroup function call, to drop that
* reference. This would just create an unnecessary hot spot on
* the top_cgroup reference count, to no avail.
*
* Normally, holding a reference to a cgroup without bumping its
* count is unsafe. The cgroup could go away, or someone could
* attach us to a different cgroup, decrementing the count on
* the first cgroup that we never incremented. But in this case,
* top_cgroup isn't going away, and either task has PF_EXITING set,
* which wards off any cgroup_attach_task() attempts, or task is a failed
* fork, never visible to cgroup_attach_task.
*/
void cgroup_exit(struct task_struct *tsk, int run_callbacks)
{
int i;
struct css_set *cg;
if (run_callbacks && need_forkexit_callback) {
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->exit)
ss->exit(ss, tsk);
}
}
/*
* Unlink from the css_set task list if necessary.
* Optimistically check cg_list before taking
* css_set_lock
*/
if (!list_empty(&tsk->cg_list)) {
write_lock(&css_set_lock);
if (!list_empty(&tsk->cg_list))
list_del(&tsk->cg_list);
write_unlock(&css_set_lock);
}
/* Reassign the task to the init_css_set. */
task_lock(tsk);
cg = tsk->cgroups;
tsk->cgroups = &init_css_set;
put_css_set_taskexit(cg);
* cgroup_clone - clone the cgroup the given subsystem is attached to
* @tsk: the task to be moved
* @subsys: the given subsystem
* @nodename: the name for the new cgroup
*
* Duplicate the current cgroup in the hierarchy that the given
* subsystem is attached to, and move this task into the new
* child.
int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
char *nodename)
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
{
struct dentry *dentry;
int ret = 0;
struct cgroup *parent, *child;
struct inode *inode;
struct css_set *cg;
struct cgroupfs_root *root;
struct cgroup_subsys *ss;
/* We shouldn't be called by an unregistered subsystem */
BUG_ON(!subsys->active);
/* First figure out what hierarchy and cgroup we're dealing
* with, and pin them so we can drop cgroup_mutex */
mutex_lock(&cgroup_mutex);
again:
root = subsys->root;
if (root == &rootnode) {
mutex_unlock(&cgroup_mutex);
return 0;
}
/* Pin the hierarchy */
if (!atomic_inc_not_zero(&root->sb->s_active)) {
/* We race with the final deactivate_super() */
mutex_unlock(&cgroup_mutex);
return 0;
}
/* Keep the cgroup alive */
task_lock(tsk);
parent = task_cgroup(tsk, subsys->subsys_id);
cg = tsk->cgroups;
get_css_set(cg);
task_unlock(tsk);
mutex_unlock(&cgroup_mutex);
/* Now do the VFS work to create a cgroup */
inode = parent->dentry->d_inode;
/* Hold the parent directory mutex across this operation to
* stop anyone else deleting the new cgroup */
mutex_lock(&inode->i_mutex);
dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
if (IS_ERR(dentry)) {
printk(KERN_INFO
"cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
PTR_ERR(dentry));
ret = PTR_ERR(dentry);
goto out_release;
}
/* Create the cgroup directory, which also creates the cgroup */
ret = vfs_mkdir(inode, dentry, 0755);
child = __d_cgrp(dentry);
dput(dentry);
if (ret) {
printk(KERN_INFO
"Failed to create cgroup %s: %d\n", nodename,
ret);
goto out_release;
}
/* The cgroup now exists. Retake cgroup_mutex and check
* that we're still in the same state that we thought we
* were. */
mutex_lock(&cgroup_mutex);
if ((root != subsys->root) ||
(parent != task_cgroup(tsk, subsys->subsys_id))) {
/* Aargh, we raced ... */
mutex_unlock(&inode->i_mutex);
put_css_set(cg);
deactivate_super(root->sb);
/* The cgroup is still accessible in the VFS, but
* we're not going to try to rmdir() it at this
* point. */
printk(KERN_INFO
"Race in cgroup_clone() - leaking cgroup %s\n",
nodename);
goto again;
}
/* do any required auto-setup */
for_each_subsys(root, ss) {
if (ss->post_clone)
ss->post_clone(ss, child);
}
/* All seems fine. Finish by moving the task into the new cgroup */
ret = cgroup_attach_task(child, tsk);
mutex_unlock(&cgroup_mutex);
out_release:
mutex_unlock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
put_css_set(cg);
mutex_unlock(&cgroup_mutex);
deactivate_super(root->sb);
return ret;
}
Grzegorz Nosek
committed
* cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
Grzegorz Nosek
committed
* @task: the task in question
Grzegorz Nosek
committed
* See if @cgrp is a descendant of @task's cgroup in the appropriate
* hierarchy.
*
* If we are sending in dummytop, then presumably we are creating
* the top cgroup in the subsystem.
*
* Called only by the ns (nsproxy) cgroup.
*/
Grzegorz Nosek
committed
int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
{
int ret;
struct cgroup *target;
if (cgrp == dummytop)
target = task_cgroup_from_root(task, cgrp->root);
while (cgrp != target && cgrp!= cgrp->top_cgroup)
cgrp = cgrp->parent;
ret = (cgrp == target);
static void check_for_release(struct cgroup *cgrp)
{
/* All of these checks rely on RCU to keep the cgroup
* structure alive */
if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
&& list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
/* Control Group is currently removeable. If it's not
* already queued for a userspace notification, queue
* it now */
int need_schedule_work = 0;
spin_lock(&release_list_lock);
if (!cgroup_is_removed(cgrp) &&
list_empty(&cgrp->release_list)) {
list_add(&cgrp->release_list, &release_list);
need_schedule_work = 1;
}
spin_unlock(&release_list_lock);
if (need_schedule_work)
schedule_work(&release_agent_work);
}
}
void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
if (atomic_dec_return(&css->refcnt) == 1) {
if (notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
cgroup_wakeup_rmdir_waiter(cgrp);
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
}
rcu_read_unlock();
}
/*
* Notify userspace when a cgroup is released, by running the
* configured release agent with the name of the cgroup (path
* relative to the root of cgroup file system) as the argument.
*
* Most likely, this user command will try to rmdir this cgroup.
*
* This races with the possibility that some other task will be
* attached to this cgroup before it is removed, or that some other
* user task will 'mkdir' a child cgroup of this cgroup. That's ok.
* The presumed 'rmdir' will fail quietly if this cgroup is no longer
* unused, and this cgroup will be reprieved from its death sentence,
* to continue to serve a useful existence. Next time it's released,
* we will get notified again, if it still has 'notify_on_release' set.
*
* The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
* means only wait until the task is successfully execve()'d. The
* separate release agent task is forked by call_usermodehelper(),
* then control in this thread returns here, without waiting for the
* release agent task. We don't bother to wait because the caller of
* this routine has no use for the exit status of the release agent
* task, so no sense holding our caller up for that.
*/
static void cgroup_release_agent(struct work_struct *work)
{
BUG_ON(work != &release_agent_work);
mutex_lock(&cgroup_mutex);
spin_lock(&release_list_lock);
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
char *pathbuf = NULL, *agentbuf = NULL;
struct cgroup *cgrp = list_entry(release_list.next,
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!pathbuf)
goto continue_free;
if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
goto continue_free;
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
if (!agentbuf)
goto continue_free;
i = 0;
argv[i++] = agentbuf;
argv[i++] = pathbuf;
argv[i] = NULL;
i = 0;
/* minimal command environment */
envp[i++] = "HOME=/";
envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[i] = NULL;
/* Drop the lock while we invoke the usermode helper,
* since the exec could involve hitting disk and hence
* be a slow process */
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
mutex_lock(&cgroup_mutex);
continue_free:
kfree(pathbuf);
kfree(agentbuf);
spin_lock(&release_list_lock);
}
spin_unlock(&release_list_lock);
mutex_unlock(&cgroup_mutex);
}
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
static int __init cgroup_disable(char *str)
{
int i;
char *token;
while ((token = strsep(&str, ",")) != NULL) {
if (!*token)
continue;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (!strcmp(token, ss->name)) {
ss->disabled = 1;
printk(KERN_INFO "Disabling %s control group"
" subsystem\n", ss->name);
break;
}
}
}
return 1;
}
__setup("cgroup_disable=", cgroup_disable);
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
/*
* Functons for CSS ID.
*/
/*
*To get ID other than 0, this should be called when !cgroup_is_removed().
*/
unsigned short css_id(struct cgroup_subsys_state *css)
{
struct css_id *cssid = rcu_dereference(css->id);
if (cssid)
return cssid->id;
return 0;
}
unsigned short css_depth(struct cgroup_subsys_state *css)
{
struct css_id *cssid = rcu_dereference(css->id);
if (cssid)
return cssid->depth;
return 0;
}
bool css_is_ancestor(struct cgroup_subsys_state *child,
const struct cgroup_subsys_state *root)
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
{
struct css_id *child_id = rcu_dereference(child->id);
struct css_id *root_id = rcu_dereference(root->id);
if (!child_id || !root_id || (child_id->depth < root_id->depth))
return false;
return child_id->stack[root_id->depth] == root_id->id;
}
static void __free_css_id_cb(struct rcu_head *head)
{
struct css_id *id;
id = container_of(head, struct css_id, rcu_head);
kfree(id);
}
void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
{
struct css_id *id = css->id;
/* When this is called before css_id initialization, id can be NULL */
if (!id)
return;
BUG_ON(!ss->use_id);
rcu_assign_pointer(id->css, NULL);
rcu_assign_pointer(css->id, NULL);
spin_lock(&ss->id_lock);
idr_remove(&ss->idr, id->id);
spin_unlock(&ss->id_lock);
call_rcu(&id->rcu_head, __free_css_id_cb);
}
/*
* This is called by init or create(). Then, calls to this function are
* always serialized (By cgroup_mutex() at create()).
*/
static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
{
struct css_id *newid;
int myid, error, size;
BUG_ON(!ss->use_id);
size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
newid = kzalloc(size, GFP_KERNEL);
if (!newid)
return ERR_PTR(-ENOMEM);
/* get id */
if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
error = -ENOMEM;
goto err_out;
}
spin_lock(&ss->id_lock);
/* Don't use 0. allocates an ID of 1-65535 */
error = idr_get_new_above(&ss->idr, newid, 1, &myid);
spin_unlock(&ss->id_lock);
/* Returns error when there are no free spaces for new ID.*/
if (error) {
error = -ENOSPC;
goto err_out;
}
if (myid > CSS_ID_MAX)
goto remove_idr;
newid->id = myid;
newid->depth = depth;
return newid;
remove_idr:
error = -ENOSPC;
spin_lock(&ss->id_lock);
idr_remove(&ss->idr, myid);
spin_unlock(&ss->id_lock);
err_out:
kfree(newid);
return ERR_PTR(error);
}
static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
{
struct css_id *newid;
struct cgroup_subsys_state *rootcss;
spin_lock_init(&ss->id_lock);
idr_init(&ss->idr);
rootcss = init_css_set.subsys[ss->subsys_id];
newid = get_new_cssid(ss, 0);
if (IS_ERR(newid))
return PTR_ERR(newid);
newid->stack[0] = newid->id;
newid->css = rootcss;
rootcss->id = newid;
return 0;
}
static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
struct cgroup *child)
{
int subsys_id, i, depth = 0;
struct cgroup_subsys_state *parent_css, *child_css;
struct css_id *child_id, *parent_id = NULL;
subsys_id = ss->subsys_id;
parent_css = parent->subsys[subsys_id];
child_css = child->subsys[subsys_id];
depth = css_depth(parent_css) + 1;
parent_id = parent_css->id;
child_id = get_new_cssid(ss, depth);
if (IS_ERR(child_id))
return PTR_ERR(child_id);
for (i = 0; i < depth; i++)
child_id->stack[i] = parent_id->stack[i];
child_id->stack[depth] = child_id->id;
/*
* child_id->css pointer will be set after this cgroup is available
* see cgroup_populate_dir()
*/
rcu_assign_pointer(child_css->id, child_id);
return 0;
}
/**
* css_lookup - lookup css by id
* @ss: cgroup subsys to be looked into.
* @id: the id
*
* Returns pointer to cgroup_subsys_state if there is valid one with id.
* NULL if not. Should be called under rcu_read_lock()
*/
struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
{
struct css_id *cssid = NULL;
BUG_ON(!ss->use_id);
cssid = idr_find(&ss->idr, id);
if (unlikely(!cssid))
return NULL;
return rcu_dereference(cssid->css);
}
/**
* css_get_next - lookup next cgroup under specified hierarchy.
* @ss: pointer to subsystem
* @id: current position of iteration.
* @root: pointer to css. search tree under this.
* @foundid: position of found object.
*
* Search next css under the specified hierarchy of rootid. Calling under
* rcu_read_lock() is necessary. Returns NULL if it reaches the end.
*/
struct cgroup_subsys_state *
css_get_next(struct cgroup_subsys *ss, int id,
struct cgroup_subsys_state *root, int *foundid)
{
struct cgroup_subsys_state *ret = NULL;
struct css_id *tmp;
int tmpid;
int rootid = css_id(root);
int depth = css_depth(root);
if (!rootid)
return NULL;
BUG_ON(!ss->use_id);
/* fill start point for scan */
tmpid = id;
while (1) {
/*
* scan next entry from bitmap(tree), tmpid is updated after
* idr_get_next().
*/
spin_lock(&ss->id_lock);
tmp = idr_get_next(&ss->idr, &tmpid);
spin_unlock(&ss->id_lock);
if (!tmp)
break;
if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
ret = rcu_dereference(tmp->css);
if (ret) {
*foundid = tmpid;
break;
}
}
/* continue to scan from next id */
tmpid = tmpid + 1;
}
return ret;
}
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
struct cgroup *cont)
{
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
if (!css)
return ERR_PTR(-ENOMEM);
return css;
}
static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
{
kfree(cont->subsys[debug_subsys_id]);
}
static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
{
return atomic_read(&cont->count);
}
static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
{
return cgroup_task_count(cont);
}
static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
{
return (u64)(unsigned long)current->cgroups;
}
static u64 current_css_set_refcount_read(struct cgroup *cont,
struct cftype *cft)
{
u64 count;
rcu_read_lock();
count = atomic_read(¤t->cgroups->refcount);
rcu_read_unlock();
return count;
}
static int current_css_set_cg_links_read(struct cgroup *cont,
struct cftype *cft,
struct seq_file *seq)
{
struct cg_cgroup_link *link;
struct css_set *cg;
read_lock(&css_set_lock);
rcu_read_lock();
cg = rcu_dereference(current->cgroups);
list_for_each_entry(link, &cg->cg_links, cg_link_list) {
struct cgroup *c = link->cgrp;
const char *name;
if (c->dentry)
name = c->dentry->d_name.name;
else
name = "?";
seq_printf(seq, "Root %d group %s\n",
c->root->hierarchy_id, name);
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
}
rcu_read_unlock();
read_unlock(&css_set_lock);
return 0;
}
#define MAX_TASKS_SHOWN_PER_CSS 25
static int cgroup_css_links_read(struct cgroup *cont,
struct cftype *cft,
struct seq_file *seq)
{
struct cg_cgroup_link *link;
read_lock(&css_set_lock);
list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
struct css_set *cg = link->cg;
struct task_struct *task;
int count = 0;
seq_printf(seq, "css_set %p\n", cg);
list_for_each_entry(task, &cg->tasks, cg_list) {
if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
seq_puts(seq, " ...\n");
break;
} else {
seq_printf(seq, " task %d\n",
task_pid_vnr(task));
}
}
}
read_unlock(&css_set_lock);
return 0;
}
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
{
return test_bit(CGRP_RELEASABLE, &cgrp->flags);
}
static struct cftype debug_files[] = {
{
.name = "cgroup_refcount",
.read_u64 = cgroup_refcount_read,
},
{
.name = "taskcount",
.read_u64 = debug_taskcount_read,
},
{
.name = "current_css_set",
.read_u64 = current_css_set_read,
},
{
.name = "current_css_set_refcount",
.read_u64 = current_css_set_refcount_read,
},
{
.name = "current_css_set_cg_links",
.read_seq_string = current_css_set_cg_links_read,
},
{
.name = "cgroup_css_links",
.read_seq_string = cgroup_css_links_read,
},
{
.name = "releasable",
.read_u64 = releasable_read,
},
};