Newer
Older
/*
* Generic process-grouping system.
*
* Based originally on the cpuset system, extracted by Paul Menage
* Copyright (C) 2006 Google, Inc
*
* Copyright notices from the original cpuset code:
* --------------------------------------------------
* Copyright (C) 2003 BULL SA.
* Copyright (C) 2004-2006 Silicon Graphics, Inc.
*
* Portions derived from Patrick Mochel's sysfs code.
* sysfs is Copyright (c) 2001-3 Patrick Mochel
*
* 2003-10-10 Written by Simon Derr.
* 2003-10-22 Updates by Stephen Hemminger.
* 2004 May-July Rework by Paul Jackson.
* ---------------------------------------------------
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#include <linux/cgroup.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/backing-dev.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/magic.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/kmod.h>
#include <linux/delayacct.h>
#include <linux/cgroupstats.h>
#include <asm/atomic.h>
static DEFINE_MUTEX(cgroup_mutex);
/* Generate an array of cgroup subsystem pointers */
#define SUBSYS(_x) &_x ## _subsys,
static struct cgroup_subsys *subsys[] = {
#include <linux/cgroup_subsys.h>
};
#define MAX_CGROUP_ROOT_NAMELEN 64
/*
* A cgroupfs_root represents the root of a cgroup hierarchy,
* and may be associated with a superblock to form an active
* hierarchy
*/
struct cgroupfs_root {
struct super_block *sb;
/*
* The bitmask of subsystems intended to be attached to this
* hierarchy
*/
unsigned long subsys_bits;
/* The bitmask of subsystems currently attached to this hierarchy */
unsigned long actual_subsys_bits;
/* A list running through the attached subsystems */
struct list_head subsys_list;
/* The root cgroup for this hierarchy */
struct cgroup top_cgroup;
/* Tracks how many cgroups are currently defined in hierarchy.*/
int number_of_cgroups;
/* A list running through the active hierarchies */
struct list_head root_list;
/* Hierarchy-specific flags */
unsigned long flags;
/* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
/* The name for this hierarchy - may be empty */
char name[MAX_CGROUP_ROOT_NAMELEN];
};
/*
* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
* subsystems that are otherwise unattached - it never has more than a
* single cgroup, and all tasks are part of that cgroup.
*/
static struct cgroupfs_root rootnode;
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*
* CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
* cgroup_subsys->use_id != 0.
*/
#define CSS_ID_MAX (65535)
struct css_id {
/*
* The css to which this ID points. This pointer is set to valid value
* after cgroup is populated. If cgroup is removed, this will be NULL.
* This pointer is expected to be RCU-safe because destroy()
* is called after synchronize_rcu(). But for safe use, css_is_removed()
* css_tryget() should be used for avoiding race.
*/
struct cgroup_subsys_state *css;
/*
* ID of this css.
*/
unsigned short id;
/*
* Depth in hierarchy which this ID belongs to.
*/
unsigned short depth;
/*
* ID is freed by RCU. (and lookup routine is RCU safe.)
*/
struct rcu_head rcu_head;
/*
* Hierarchy of CSS ID belongs to.
*/
unsigned short stack[0]; /* Array of Length (depth+1) */
};
/* The list of hierarchy roots */
static LIST_HEAD(roots);
static int root_count;
/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
#define dummytop (&rootnode.top_cgroup)
/* This flag indicates whether tasks in the fork and exit paths should
* check for fork/exit handlers to call. This avoids us having to do
* extra work in the fork/exit path if none of the subsystems need to
* be called.
static int need_forkexit_callback __read_mostly;
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
return test_bit(CGRP_REMOVED, &cgrp->flags);
}
/* bits in struct cgroupfs_root flags field */
enum {
ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
};
static int cgroup_is_releasable(const struct cgroup *cgrp)
{
const int bits =
(1 << CGRP_RELEASABLE) |
(1 << CGRP_NOTIFY_ON_RELEASE);
return (cgrp->flags & bits) == bits;
}
static int notify_on_release(const struct cgroup *cgrp)
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
}
/*
* for_each_subsys() allows you to iterate on each subsystem attached to
* an active hierarchy
*/
#define for_each_subsys(_root, _ss) \
list_for_each_entry(_ss, &_root->subsys_list, sibling)
/* for_each_active_root() allows you to iterate across the active hierarchies */
#define for_each_active_root(_root) \
list_for_each_entry(_root, &roots, root_list)
/* the list of cgroups eligible for automatic release. Protected by
* release_list_lock */
static LIST_HEAD(release_list);
static DEFINE_SPINLOCK(release_list_lock);
static void cgroup_release_agent(struct work_struct *work);
static DECLARE_WORK(release_agent_work, cgroup_release_agent);
static void check_for_release(struct cgroup *cgrp);
/* Link structure for associating css_set objects with cgroups */
struct cg_cgroup_link {
/*
* List running through cg_cgroup_links associated with a
* cgroup, anchored on cgroup->css_sets
*/
struct list_head cgrp_link_list;
struct cgroup *cgrp;
/*
* List running through cg_cgroup_links pointing at a
* single css_set object, anchored on css_set->cg_links
*/
struct list_head cg_link_list;
struct css_set *cg;
};
/* The default css_set - used by init and its children prior to any
* hierarchies being mounted. It contains a pointer to the root state
* for each subsystem. Also used to anchor the list of css_sets. Not
* reference-counted, to improve performance when child cgroups
* haven't been created.
*/
static struct css_set init_css_set;
static struct cg_cgroup_link init_css_set_link;
static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
/* css_set_lock protects the list of css_set objects, and the
* chain of tasks off each css_set. Nests outside task->alloc_lock
* due to cgroup_iter_start() */
static DEFINE_RWLOCK(css_set_lock);
static int css_set_count;
/*
* hash table for cgroup groups. This improves the performance to find
* an existing css_set. This hash doesn't (currently) take into
* account cgroups in empty hierarchies.
*/
#define CSS_SET_HASH_BITS 7
#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
{
int i;
int index;
unsigned long tmp = 0UL;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
tmp += (unsigned long)css[i];
tmp = (tmp >> 16) ^ tmp;
index = hash_long(tmp, CSS_SET_HASH_BITS);
return &css_set_table[index];
}
/* We don't maintain the lists running through each css_set to its
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
* compiled into their kernel but not actually in use */
static int use_task_css_set_links __read_mostly;
/* When we create or destroy a css_set, the operation simply
* takes/releases a reference count on all the cgroups referenced
* by subsystems in this css_set. This can end up multiple-counting
* some cgroups, but that's OK - the ref-count is just a
* busy/not-busy indicator; ensuring that we only count each cgroup
* once would require taking a global lock to ensure that no
* subsystems moved between hierarchies while we were doing so.
*
* Possible TODO: decide at boot time based on the number of
* registered subsystems and the number of CPUs or NUMA nodes whether
* it's better for performance to ref-count every subsystem, or to
* take a global lock and only add one ref count to each hierarchy.
*/
/*
* unlink a css_set from the list and free it
*/
static void unlink_css_set(struct css_set *cg)
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
css_set_count--;
list_for_each_entry_safe(link, saved_link, &cg->cg_links,
cg_link_list) {
list_del(&link->cg_link_list);
list_del(&link->cgrp_link_list);
kfree(link);
}
}
static void __put_css_set(struct css_set *cg, int taskexit)
{
int i;
/*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
* rwlock
*/
if (atomic_add_unless(&cg->refcount, -1, 1))
return;
write_lock(&css_set_lock);
if (!atomic_dec_and_test(&cg->refcount)) {
write_unlock(&css_set_lock);
return;
}
unlink_css_set(cg);
write_unlock(&css_set_lock);
rcu_read_lock();
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
if (atomic_dec_and_test(&cgrp->count) &&
notify_on_release(cgrp)) {
if (taskexit)
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
}
rcu_read_unlock();
/*
* refcounted get/put for css_set objects
*/
static inline void get_css_set(struct css_set *cg)
{
atomic_inc(&cg->refcount);
}
static inline void put_css_set(struct css_set *cg)
{
__put_css_set(cg, 0);
static inline void put_css_set_taskexit(struct css_set *cg)
{
__put_css_set(cg, 1);
}
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
/*
* compare_css_sets - helper function for find_existing_css_set().
* @cg: candidate css_set being tested
* @old_cg: existing css_set for a task
* @new_cgrp: cgroup that's being entered by the task
* @template: desired set of css pointers in css_set (pre-calculated)
*
* Returns true if "cg" matches "old_cg" except for the hierarchy
* which "new_cgrp" belongs to, for which it should match "new_cgrp".
*/
static bool compare_css_sets(struct css_set *cg,
struct css_set *old_cg,
struct cgroup *new_cgrp,
struct cgroup_subsys_state *template[])
{
struct list_head *l1, *l2;
if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
/* Not all subsystems matched */
return false;
}
/*
* Compare cgroup pointers in order to distinguish between
* different cgroups in heirarchies with no subsystems. We
* could get by with just this check alone (and skip the
* memcmp above) but on most setups the memcmp check will
* avoid the need for this more expensive check on almost all
* candidates.
*/
l1 = &cg->cg_links;
l2 = &old_cg->cg_links;
while (1) {
struct cg_cgroup_link *cgl1, *cgl2;
struct cgroup *cg1, *cg2;
l1 = l1->next;
l2 = l2->next;
/* See if we reached the end - both lists are equal length. */
if (l1 == &cg->cg_links) {
BUG_ON(l2 != &old_cg->cg_links);
break;
} else {
BUG_ON(l2 == &old_cg->cg_links);
}
/* Locate the cgroups associated with these links. */
cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
cg1 = cgl1->cgrp;
cg2 = cgl2->cgrp;
/* Hierarchies should be linked in the same order. */
BUG_ON(cg1->root != cg2->root);
/*
* If this hierarchy is the hierarchy of the cgroup
* that's changing, then we need to check that this
* css_set points to the new cgroup; if it's any other
* hierarchy, then this css_set should point to the
* same cgroup as the old css_set.
*/
if (cg1->root == new_cgrp->root) {
if (cg1 != new_cgrp)
return false;
} else {
if (cg1 != cg2)
return false;
}
}
return true;
}
/*
* find_existing_css_set() is a helper for
* find_css_set(), and checks to see whether an existing
*
* oldcg: the cgroup group that we're using before the cgroup
* transition
*
* cgrp: the cgroup that we're moving into
*
* template: location in which to build the desired set of subsystem
* state objects for the new cgroup group
*/
static struct css_set *find_existing_css_set(
struct css_set *oldcg,
struct cgroup *cgrp,
struct cgroup_subsys_state *template[])
struct cgroupfs_root *root = cgrp->root;
struct hlist_head *hhead;
struct hlist_node *node;
struct css_set *cg;
/* Built the set of subsystem state objects that we want to
* see in the new css_set */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
/* Subsystem is in this hierarchy. So we want
* the subsystem state from the new
* cgroup */
template[i] = cgrp->subsys[i];
} else {
/* Subsystem is not in this hierarchy, so we
* don't want to change the subsystem state */
template[i] = oldcg->subsys[i];
}
}
hhead = css_set_hash(template);
hlist_for_each_entry(cg, node, hhead, hlist) {
if (!compare_css_sets(cg, oldcg, cgrp, template))
continue;
/* This css_set matches what we need */
return cg;
/* No existing cgroup group matched */
return NULL;
}
static void free_cg_links(struct list_head *tmp)
{
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
list_del(&link->cgrp_link_list);
kfree(link);
}
}
/*
* allocate_cg_links() allocates "count" cg_cgroup_link structures
* and chains them on tmp through their cgrp_link_list fields. Returns 0 on
* success or a negative error
*/
static int allocate_cg_links(int count, struct list_head *tmp)
{
struct cg_cgroup_link *link;
int i;
INIT_LIST_HEAD(tmp);
for (i = 0; i < count; i++) {
link = kmalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
return -ENOMEM;
}
list_add(&link->cgrp_link_list, tmp);
}
return 0;
}
/**
* link_css_set - a helper function to link a css_set to a cgroup
* @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
* @cg: the css_set to be linked
* @cgrp: the destination cgroup
*/
static void link_css_set(struct list_head *tmp_cg_links,
struct css_set *cg, struct cgroup *cgrp)
{
struct cg_cgroup_link *link;
BUG_ON(list_empty(tmp_cg_links));
link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
cgrp_link_list);
link->cg = cg;
link->cgrp = cgrp;
list_move(&link->cgrp_link_list, &cgrp->css_sets);
/*
* Always add links to the tail of the list so that the list
* is sorted by order of hierarchy creation
*/
list_add_tail(&link->cg_link_list, &cg->cg_links);
/*
* find_css_set() takes an existing cgroup group and a
* cgroup object, and returns a css_set object that's
* equivalent to the old group, but with the given cgroup
* substituted into the appropriate hierarchy. Must be called with
* cgroup_mutex held
*/
static struct css_set *find_css_set(
struct css_set *oldcg, struct cgroup *cgrp)
{
struct css_set *res;
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
int i;
struct list_head tmp_cg_links;
struct cg_cgroup_link *link;
/* First see if we already have a cgroup group that matches
* the desired set */
read_lock(&css_set_lock);
res = find_existing_css_set(oldcg, cgrp, template);
if (res)
get_css_set(res);
read_unlock(&css_set_lock);
if (res)
return res;
res = kmalloc(sizeof(*res), GFP_KERNEL);
if (!res)
return NULL;
/* Allocate all the cg_cgroup_link objects that we'll need */
if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
kfree(res);
return NULL;
}
atomic_set(&res->refcount, 1);
INIT_LIST_HEAD(&res->cg_links);
INIT_LIST_HEAD(&res->tasks);
/* Copy the set of subsystem state objects generated in
* find_existing_css_set() */
memcpy(res->subsys, template, sizeof(res->subsys));
write_lock(&css_set_lock);
/* Add reference counts and links from the new css_set. */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup *cgrp = res->subsys[i]->cgroup;
atomic_inc(&cgrp->count);
list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
struct cgroup *c = link->cgrp;
if (c->root == cgrp->root)
c = cgrp;
link_css_set(&tmp_cg_links, res, c);
}
BUG_ON(!list_empty(&tmp_cg_links));
css_set_count++;
/* Add this cgroup group to the hash table */
hhead = css_set_hash(res->subsys);
hlist_add_head(&res->hlist, hhead);
write_unlock(&css_set_lock);
return res;
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
/*
* Return the cgroup for "task" from the given hierarchy. Must be
* called with cgroup_mutex held.
*/
static struct cgroup *task_cgroup_from_root(struct task_struct *task,
struct cgroupfs_root *root)
{
struct css_set *css;
struct cgroup *res = NULL;
BUG_ON(!mutex_is_locked(&cgroup_mutex));
read_lock(&css_set_lock);
/*
* No need to lock the task - since we hold cgroup_mutex the
* task can't change groups, so the only thing that can happen
* is that it exits and its css is set back to init_css_set.
*/
css = task->cgroups;
if (css == &init_css_set) {
res = &root->top_cgroup;
} else {
struct cg_cgroup_link *link;
list_for_each_entry(link, &css->cg_links, cg_link_list) {
struct cgroup *c = link->cgrp;
if (c->root == root) {
res = c;
break;
}
}
}
read_unlock(&css_set_lock);
BUG_ON(!res);
return res;
}
/*
* There is one global cgroup mutex. We also require taking
* task_lock() when dereferencing a task's cgroup subsys pointers.
* See "The task_lock() exception", at the end of this comment.
*
* A task must hold cgroup_mutex to modify cgroups.
*
* Any task can increment and decrement the count field without lock.
* So in general, code holding cgroup_mutex can't rely on the count
* field not changing. However, if the count goes to zero, then only
* cgroup_attach_task() can increment it again. Because a count of zero
* means that no tasks are currently attached, therefore there is no
* way a task attached to that cgroup can fork (the other way to
* increment the count). So code holding cgroup_mutex can safely
* assume that if the count is zero, it will stay zero. Similarly, if
* a task holds cgroup_mutex on a cgroup with zero count, it
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
* The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
* (usually) take cgroup_mutex. These are the two most performance
* critical pieces of code here. The exception occurs on cgroup_exit(),
* when a task in a notify_on_release cgroup exits. Then cgroup_mutex
* is taken, and if the cgroup count is zero, a usermode call made
* to the release agent with the name of the cgroup (path relative to
* the root of cgroup file system) as the argument.
*
* A cgroup can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cgroups is empty. Since all
* tasks in the system use _some_ cgroup, and since there is always at
* least one task in the system (init, pid == 1), therefore, top_cgroup
* always has either children cgroups and/or using tasks. So we don't
* need a special hack to ensure that top_cgroup cannot be deleted.
*
* The task_lock() exception
*
* The need for this exception arises from the action of
* cgroup_attach_task(), which overwrites one tasks cgroup pointer with
* another. It does so using cgroup_mutex, however there are
* several performance critical places that need to reference
* task->cgroup without the expense of grabbing a system global
* mutex. Therefore except as noted below, when dereferencing or, as
* in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
* task_lock(), which acts on a spinlock (task->alloc_lock) already in
* the task_struct routinely used for such matters.
*
* P.S. One more locking exception. RCU is used to guard the
* update of a tasks cgroup pointer by cgroup_attach_task()
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
*/
/**
* cgroup_lock - lock out any changes to cgroup structures
*
*/
void cgroup_lock(void)
{
mutex_lock(&cgroup_mutex);
}
/**
* cgroup_unlock - release lock on cgroup changes
*
* Undo the lock taken in a previous cgroup_lock() call.
*/
void cgroup_unlock(void)
{
mutex_unlock(&cgroup_mutex);
}
/*
* A couple of forward declarations required, due to cyclic reference loop:
* cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
* cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
* -> cgroup_mkdir.
*/
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
static int cgroup_populate_dir(struct cgroup *cgrp);
static const struct inode_operations cgroup_dir_inode_operations;
static struct file_operations proc_cgroupstats_operations;
static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
static int alloc_css_id(struct cgroup_subsys *ss,
struct cgroup *parent, struct cgroup *child);
static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
}
return inode;
}
/*
* Call subsys's pre_destroy handler.
* This is called before css refcnt check.
*/
static int cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
for_each_subsys(cgrp->root, ss)
if (ss->pre_destroy) {
ret = ss->pre_destroy(ss, cgrp);
if (ret)
break;
}
return ret;
static void free_cgroup_rcu(struct rcu_head *obj)
{
struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
kfree(cgrp);
}
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
{
/* is dentry a directory ? if so, kfree() associated cgroup */
if (S_ISDIR(inode->i_mode)) {
struct cgroup *cgrp = dentry->d_fsdata;
struct cgroup_subsys *ss;
BUG_ON(!(cgroup_is_removed(cgrp)));
/* It's possible for external users to be holding css
* reference counts on a cgroup; css_put() needs to
* be able to access the cgroup after decrementing
* the reference count in order to know if it needs to
* queue the cgroup to be handled by the release
* agent */
synchronize_rcu();
mutex_lock(&cgroup_mutex);
/*
* Release the subsystem state objects.
*/
for_each_subsys(cgrp->root, ss)
ss->destroy(ss, cgrp);
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
/*
* Drop the active superblock reference that we took when we
* created the cgroup
*/
deactivate_super(cgrp->root->sb);
call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
}
iput(inode);
}
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
d_delete(d);
simple_rmdir(parent->d_inode, d);
dput(parent);
}
static void cgroup_clear_directory(struct dentry *dentry)
{
struct list_head *node;
BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
spin_lock(&dcache_lock);
node = dentry->d_subdirs.next;
while (node != &dentry->d_subdirs) {
struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
list_del_init(node);
if (d->d_inode) {
/* This should never be called on a cgroup
* directory with child cgroups */
BUG_ON(d->d_inode->i_mode & S_IFDIR);
d = dget_locked(d);
spin_unlock(&dcache_lock);
d_delete(d);
simple_unlink(dentry->d_inode, d);
dput(d);
spin_lock(&dcache_lock);
}
node = dentry->d_subdirs.next;
}
spin_unlock(&dcache_lock);
}
/*
* NOTE : the dentry must have been dget()'ed
*/
static void cgroup_d_remove_dir(struct dentry *dentry)
{
cgroup_clear_directory(dentry);
spin_lock(&dcache_lock);
list_del_init(&dentry->d_u.d_child);
spin_unlock(&dcache_lock);
remove_dir(dentry);
}
/*
* A queue for waiters to do rmdir() cgroup. A tasks will sleep when
* cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
* reference to css->refcnt. In general, this refcnt is expected to goes down
* to zero, soon.
*
* CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
*/
DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
wake_up_all(&cgroup_rmdir_waitq);
}
void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
{
css_get(css);
}
void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
{
cgroup_wakeup_rmdir_waiter(css->cgroup);
css_put(css);
}
static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long final_bits)
{
unsigned long added_bits, removed_bits;
struct cgroup *cgrp = &root->top_cgroup;
int i;
removed_bits = root->actual_subsys_bits & ~final_bits;
added_bits = final_bits & ~root->actual_subsys_bits;
/* Check that any added subsystems are currently free */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (!(bit & added_bits))
continue;
if (ss->root != &rootnode) {
/* Subsystem isn't free */
return -EBUSY;
}
}
/* Currently we don't handle adding/removing subsystems when
* any child cgroups exist. This is theoretically supportable
* but involves complex error handling, so it's being left until
* later */
if (root->number_of_cgroups > 1)
return -EBUSY;
/* Process each subsystem */
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
unsigned long bit = 1UL << i;
if (bit & added_bits) {
/* We're binding this subsystem to this hierarchy */
BUG_ON(cgrp->subsys[i]);
BUG_ON(!dummytop->subsys[i]);
BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
mutex_lock(&ss->hierarchy_mutex);
cgrp->subsys[i] = dummytop->subsys[i];
cgrp->subsys[i]->cgroup = cgrp;
list_move(&ss->sibling, &root->subsys_list);
ss->root = root;
ss->bind(ss, cgrp);
mutex_unlock(&ss->hierarchy_mutex);
} else if (bit & removed_bits) {
/* We're removing this subsystem */
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
mutex_lock(&ss->hierarchy_mutex);
if (ss->bind)
ss->bind(ss, dummytop);
dummytop->subsys[i]->cgroup = dummytop;
cgrp->subsys[i] = NULL;
subsys[i]->root = &rootnode;
list_move(&ss->sibling, &rootnode.subsys_list);
mutex_unlock(&ss->hierarchy_mutex);
} else if (bit & final_bits) {
/* Subsystem state should already exist */
BUG_ON(!cgrp->subsys[i]);
} else {
/* Subsystem state shouldn't exist */
BUG_ON(cgrp->subsys[i]);
}
}
root->subsys_bits = root->actual_subsys_bits = final_bits;
synchronize_rcu();
return 0;
}
static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
for_each_subsys(root, ss)
seq_printf(seq, ",%s", ss->name);
if (test_bit(ROOT_NOPREFIX, &root->flags))
seq_puts(seq, ",noprefix");
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
if (strlen(root->name))
seq_printf(seq, ",name=%s", root->name);
mutex_unlock(&cgroup_mutex);
return 0;
}
struct cgroup_sb_opts {
unsigned long subsys_bits;
unsigned long flags;
char *release_agent;
char *name;
struct cgroupfs_root *new_root;
};
/* Convert a hierarchy specifier into a bitmask of subsystems and
* flags. */
static int parse_cgroupfs_options(char *data,
struct cgroup_sb_opts *opts)
{
char *token, *o = data ?: "all";
unsigned long mask = (unsigned long)-1;
#ifdef CONFIG_CPUSETS
mask = ~(1UL << cpuset_subsys_id);
#endif
memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
return -EINVAL;
if (!strcmp(token, "all")) {
/* Add all non-disabled subsystems */
int i;
opts->subsys_bits = 0;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {