Newer
Older
/*
* Generic process-grouping system.
*
* Based originally on the cpuset system, extracted by Paul Menage
* Copyright (C) 2006 Google, Inc
*
* Notifications support
* Copyright (C) 2009 Nokia Corporation
* Author: Kirill A. Shutemov
*
* Copyright notices from the original cpuset code:
* --------------------------------------------------
* Copyright (C) 2003 BULL SA.
* Copyright (C) 2004-2006 Silicon Graphics, Inc.
*
* Portions derived from Patrick Mochel's sysfs code.
* sysfs is Copyright (c) 2001-3 Patrick Mochel
*
* 2003-10-10 Written by Simon Derr.
* 2003-10-22 Updates by Stephen Hemminger.
* 2004 May-July Rework by Paul Jackson.
* ---------------------------------------------------
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#include <linux/cgroup.h>
#include <linux/cred.h>
#include <linux/init_task.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/backing-dev.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/magic.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/kmod.h>
#include <linux/delayacct.h>
#include <linux/cgroupstats.h>
#include <linux/idr.h>
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/eventfd.h>
#include <linux/poll.h>
#include <linux/flex_array.h> /* used in cgroup_attach_task */
#include <linux/kthread.h>
/* css deactivation bias, makes css->refcnt negative to deny new trygets */
#define CSS_DEACT_BIAS INT_MIN
/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it.
*
* cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
* cgroupfs_root of any cgroup hierarchy - subsys list, flags,
* release_agent_path and so on. Modifying requires both cgroup_mutex and
* cgroup_root_mutex. Readers can acquire either of the two. This is to
* break the following locking order cycle.
*
* A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
* B. namespace_sem -> cgroup_mutex
*
* B happens only through cgroup_show_options() and using cgroup_root_mutex
* breaks it.
*/
#ifdef CONFIG_PROVE_RCU
DEFINE_MUTEX(cgroup_mutex);
EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for task_subsys_state_check() */
#else
static DEFINE_MUTEX(cgroup_mutex);
/*
* Generate an array of cgroup subsystem pointers. At boot time, this is
* populated with the built in subsystems, and modular subsystems are
* registered after that. The mutable section of this array is protected by
* cgroup_mutex.
*/
#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
#include <linux/cgroup_subsys.h>
};
/*
* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
* subsystems that are otherwise unattached - it never has more than a
* single cgroup, and all tasks are part of that cgroup.
*/
static struct cgroupfs_root rootnode;
/*
* cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
*/
struct cfent {
struct list_head node;
struct dentry *dentry;
struct cftype *type;
/* file xattrs */
struct simple_xattrs xattrs;
/*
* CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
* cgroup_subsys->use_id != 0.
*/
#define CSS_ID_MAX (65535)
struct css_id {
/*
* The css to which this ID points. This pointer is set to valid value
* after cgroup is populated. If cgroup is removed, this will be NULL.
* This pointer is expected to be RCU-safe because destroy()
* is called after synchronize_rcu(). But for safe use, css_tryget()
* should be used for avoiding race.
/*
* ID of this css.
*/
unsigned short id;
/*
* Depth in hierarchy which this ID belongs to.
*/
unsigned short depth;
/*
* ID is freed by RCU. (and lookup routine is RCU safe.)
*/
struct rcu_head rcu_head;
/*
* Hierarchy of CSS ID belongs to.
*/
unsigned short stack[0]; /* Array of Length (depth+1) */
};
* cgroup_event represents events which userspace want to receive.
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
*/
struct cgroup_event {
/*
* Cgroup which the event belongs to.
*/
struct cgroup *cgrp;
/*
* Control file which the event associated.
*/
struct cftype *cft;
/*
* eventfd to signal userspace about the event.
*/
struct eventfd_ctx *eventfd;
/*
* Each of these stored in a list by the cgroup.
*/
struct list_head list;
/*
* All fields below needed to unregister event when
* userspace closes eventfd.
*/
poll_table pt;
wait_queue_head_t *wqh;
wait_queue_t wait;
struct work_struct remove;
};
/* The list of hierarchy roots */
static LIST_HEAD(roots);
static int root_count;
/*
* Hierarchy ID allocation and mapping. It follows the same exclusion
* rules as other root ops - both cgroup_mutex and cgroup_root_mutex for
* writes, either for reads.
*/
static DEFINE_IDA(hierarchy_ida);
static int next_hierarchy_id;
/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
#define dummytop (&rootnode.top_cgroup)
static struct cgroup_name root_cgroup_name = { .name = "/" };
/* This flag indicates whether tasks in the fork and exit paths should
* check for fork/exit handlers to call. This avoids us having to do
* extra work in the fork/exit path if none of the subsystems need to
* be called.
static int need_forkexit_callback __read_mostly;
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
struct cftype cfts[], bool is_add);
static int css_unbias_refcnt(int refcnt)
{
return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
}
/* the current nr of refs, always >= 0 whether @css is deactivated or not */
static int css_refcnt(struct cgroup_subsys_state *css)
{
int v = atomic_read(&css->refcnt);
return css_unbias_refcnt(v);
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
return test_bit(CGRP_REMOVED, &cgrp->flags);
/**
* cgroup_is_descendant - test ancestry
* @cgrp: the cgroup to be tested
* @ancestor: possible ancestor of @cgrp
*
* Test whether @cgrp is a descendant of @ancestor. It also returns %true
* if @cgrp == @ancestor. This function is safe to call as long as @cgrp
* and @ancestor are accessible.
*/
bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
{
while (cgrp) {
if (cgrp == ancestor)
return true;
cgrp = cgrp->parent;
}
return false;
}
EXPORT_SYMBOL_GPL(cgroup_is_descendant);
static int cgroup_is_releasable(const struct cgroup *cgrp)
{
const int bits =
(1 << CGRP_RELEASABLE) |
(1 << CGRP_NOTIFY_ON_RELEASE);
return (cgrp->flags & bits) == bits;
}
static int notify_on_release(const struct cgroup *cgrp)
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
}
/*
* for_each_subsys() allows you to iterate on each subsystem attached to
* an active hierarchy
*/
#define for_each_subsys(_root, _ss) \
list_for_each_entry(_ss, &_root->subsys_list, sibling)
/* for_each_active_root() allows you to iterate across the active hierarchies */
#define for_each_active_root(_root) \
list_for_each_entry(_root, &roots, root_list)
static inline struct cgroup *__d_cgrp(struct dentry *dentry)
{
return dentry->d_fsdata;
}
static inline struct cfent *__d_cfe(struct dentry *dentry)
{
return dentry->d_fsdata;
}
static inline struct cftype *__d_cft(struct dentry *dentry)
{
return __d_cfe(dentry)->type;
}
/**
* cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
* @cgrp: the cgroup to be checked for liveness
*
* On success, returns true; the mutex should be later unlocked. On
* failure returns false with no lock held.
static bool cgroup_lock_live_group(struct cgroup *cgrp)
{
mutex_lock(&cgroup_mutex);
if (cgroup_is_removed(cgrp)) {
mutex_unlock(&cgroup_mutex);
return false;
}
return true;
}
/* the list of cgroups eligible for automatic release. Protected by
* release_list_lock */
static LIST_HEAD(release_list);
static DEFINE_RAW_SPINLOCK(release_list_lock);
static void cgroup_release_agent(struct work_struct *work);
static DECLARE_WORK(release_agent_work, cgroup_release_agent);
static void check_for_release(struct cgroup *cgrp);
/* Link structure for associating css_set objects with cgroups */
struct cg_cgroup_link {
/*
* List running through cg_cgroup_links associated with a
* cgroup, anchored on cgroup->css_sets
*/
struct list_head cgrp_link_list;
struct cgroup *cgrp;
/*
* List running through cg_cgroup_links pointing at a
* single css_set object, anchored on css_set->cg_links
*/
struct list_head cg_link_list;
struct css_set *cg;
};
/* The default css_set - used by init and its children prior to any
* hierarchies being mounted. It contains a pointer to the root state
* for each subsystem. Also used to anchor the list of css_sets. Not
* reference-counted, to improve performance when child cgroups
* haven't been created.
*/
static struct css_set init_css_set;
static struct cg_cgroup_link init_css_set_link;
static int cgroup_init_idr(struct cgroup_subsys *ss,
struct cgroup_subsys_state *css);
/* css_set_lock protects the list of css_set objects, and the
* chain of tasks off each css_set. Nests outside task->alloc_lock
* due to cgroup_iter_start() */
static DEFINE_RWLOCK(css_set_lock);
static int css_set_count;
/*
* hash table for cgroup groups. This improves the performance to find
* an existing css_set. This hash doesn't (currently) take into
* account cgroups in empty hierarchies.
*/
static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
key += (unsigned long)css[i];
key = (key >> 16) ^ key;
/* We don't maintain the lists running through each css_set to its
* task until after the first call to cgroup_iter_start(). This
* reduces the fork()/exit() overhead for people who have cgroups
* compiled into their kernel but not actually in use */
static int use_task_css_set_links __read_mostly;
static void __put_css_set(struct css_set *cg, int taskexit)
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
/*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
* rwlock
*/
if (atomic_add_unless(&cg->refcount, -1, 1))
return;
write_lock(&css_set_lock);
if (!atomic_dec_and_test(&cg->refcount)) {
write_unlock(&css_set_lock);
return;
}
/* This css_set is dead. unlink it and release cgroup refcounts */
css_set_count--;
list_for_each_entry_safe(link, saved_link, &cg->cg_links,
cg_link_list) {
struct cgroup *cgrp = link->cgrp;
list_del(&link->cg_link_list);
list_del(&link->cgrp_link_list);
/*
* We may not be holding cgroup_mutex, and if cgrp->count is
* dropped to 0 the cgroup can be destroyed at any time, hence
* rcu_read_lock is used to keep it alive.
*/
rcu_read_lock();
if (atomic_dec_and_test(&cgrp->count) &&
notify_on_release(cgrp)) {
if (taskexit)
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
kfree(link);
write_unlock(&css_set_lock);
kfree_rcu(cg, rcu_head);
/*
* refcounted get/put for css_set objects
*/
static inline void get_css_set(struct css_set *cg)
{
atomic_inc(&cg->refcount);
}
static inline void put_css_set(struct css_set *cg)
{
__put_css_set(cg, 0);
static inline void put_css_set_taskexit(struct css_set *cg)
{
__put_css_set(cg, 1);
}
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
/*
* compare_css_sets - helper function for find_existing_css_set().
* @cg: candidate css_set being tested
* @old_cg: existing css_set for a task
* @new_cgrp: cgroup that's being entered by the task
* @template: desired set of css pointers in css_set (pre-calculated)
*
* Returns true if "cg" matches "old_cg" except for the hierarchy
* which "new_cgrp" belongs to, for which it should match "new_cgrp".
*/
static bool compare_css_sets(struct css_set *cg,
struct css_set *old_cg,
struct cgroup *new_cgrp,
struct cgroup_subsys_state *template[])
{
struct list_head *l1, *l2;
if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
/* Not all subsystems matched */
return false;
}
/*
* Compare cgroup pointers in order to distinguish between
* different cgroups in heirarchies with no subsystems. We
* could get by with just this check alone (and skip the
* memcmp above) but on most setups the memcmp check will
* avoid the need for this more expensive check on almost all
* candidates.
*/
l1 = &cg->cg_links;
l2 = &old_cg->cg_links;
while (1) {
struct cg_cgroup_link *cgl1, *cgl2;
struct cgroup *cg1, *cg2;
l1 = l1->next;
l2 = l2->next;
/* See if we reached the end - both lists are equal length. */
if (l1 == &cg->cg_links) {
BUG_ON(l2 != &old_cg->cg_links);
break;
} else {
BUG_ON(l2 == &old_cg->cg_links);
}
/* Locate the cgroups associated with these links. */
cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
cg1 = cgl1->cgrp;
cg2 = cgl2->cgrp;
/* Hierarchies should be linked in the same order. */
BUG_ON(cg1->root != cg2->root);
/*
* If this hierarchy is the hierarchy of the cgroup
* that's changing, then we need to check that this
* css_set points to the new cgroup; if it's any other
* hierarchy, then this css_set should point to the
* same cgroup as the old css_set.
*/
if (cg1->root == new_cgrp->root) {
if (cg1 != new_cgrp)
return false;
} else {
if (cg1 != cg2)
return false;
}
}
return true;
}
/*
* find_existing_css_set() is a helper for
* find_css_set(), and checks to see whether an existing
*
* oldcg: the cgroup group that we're using before the cgroup
* transition
*
* cgrp: the cgroup that we're moving into
*
* template: location in which to build the desired set of subsystem
* state objects for the new cgroup group
*/
static struct css_set *find_existing_css_set(
struct css_set *oldcg,
struct cgroup *cgrp,
struct cgroup_subsys_state *template[])
struct cgroupfs_root *root = cgrp->root;
/*
* Build the set of subsystem state objects that we want to see in the
* new css_set. while subsystems can change globally, the entries here
* won't change, so no need for locking.
*/
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
if (root->subsys_mask & (1UL << i)) {
/* Subsystem is in this hierarchy. So we want
* the subsystem state from the new
* cgroup */
template[i] = cgrp->subsys[i];
} else {
/* Subsystem is not in this hierarchy, so we
* don't want to change the subsystem state */
template[i] = oldcg->subsys[i];
}
}
hash_for_each_possible(css_set_table, cg, hlist, key) {
if (!compare_css_sets(cg, oldcg, cgrp, template))
continue;
/* This css_set matches what we need */
return cg;
/* No existing cgroup group matched */
return NULL;
}
static void free_cg_links(struct list_head *tmp)
{
struct cg_cgroup_link *link;
struct cg_cgroup_link *saved_link;
list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
list_del(&link->cgrp_link_list);
kfree(link);
}
}
/*
* allocate_cg_links() allocates "count" cg_cgroup_link structures
* and chains them on tmp through their cgrp_link_list fields. Returns 0 on
* success or a negative error
*/
static int allocate_cg_links(int count, struct list_head *tmp)
{
struct cg_cgroup_link *link;
int i;
INIT_LIST_HEAD(tmp);
for (i = 0; i < count; i++) {
link = kmalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
return -ENOMEM;
}
list_add(&link->cgrp_link_list, tmp);
}
return 0;
}
/**
* link_css_set - a helper function to link a css_set to a cgroup
* @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
* @cg: the css_set to be linked
* @cgrp: the destination cgroup
*/
static void link_css_set(struct list_head *tmp_cg_links,
struct css_set *cg, struct cgroup *cgrp)
{
struct cg_cgroup_link *link;
BUG_ON(list_empty(tmp_cg_links));
link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
cgrp_link_list);
link->cg = cg;
link->cgrp = cgrp;
atomic_inc(&cgrp->count);
list_move(&link->cgrp_link_list, &cgrp->css_sets);
/*
* Always add links to the tail of the list so that the list
* is sorted by order of hierarchy creation
*/
list_add_tail(&link->cg_link_list, &cg->cg_links);
/*
* find_css_set() takes an existing cgroup group and a
* cgroup object, and returns a css_set object that's
* equivalent to the old group, but with the given cgroup
* substituted into the appropriate hierarchy. Must be called with
* cgroup_mutex held
*/
static struct css_set *find_css_set(
struct css_set *oldcg, struct cgroup *cgrp)
{
struct css_set *res;
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
struct list_head tmp_cg_links;
struct cg_cgroup_link *link;
/* First see if we already have a cgroup group that matches
* the desired set */
read_lock(&css_set_lock);
res = find_existing_css_set(oldcg, cgrp, template);
if (res)
get_css_set(res);
read_unlock(&css_set_lock);
if (res)
return res;
res = kmalloc(sizeof(*res), GFP_KERNEL);
if (!res)
return NULL;
/* Allocate all the cg_cgroup_link objects that we'll need */
if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
kfree(res);
return NULL;
}
atomic_set(&res->refcount, 1);
INIT_LIST_HEAD(&res->cg_links);
INIT_LIST_HEAD(&res->tasks);
/* Copy the set of subsystem state objects generated in
* find_existing_css_set() */
memcpy(res->subsys, template, sizeof(res->subsys));
write_lock(&css_set_lock);
/* Add reference counts and links from the new css_set. */
list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
struct cgroup *c = link->cgrp;
if (c->root == cgrp->root)
c = cgrp;
link_css_set(&tmp_cg_links, res, c);
}
BUG_ON(!list_empty(&tmp_cg_links));
css_set_count++;
/* Add this cgroup group to the hash table */
key = css_set_hash(res->subsys);
hash_add(css_set_table, &res->hlist, key);
write_unlock(&css_set_lock);
return res;
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
/*
* Return the cgroup for "task" from the given hierarchy. Must be
* called with cgroup_mutex held.
*/
static struct cgroup *task_cgroup_from_root(struct task_struct *task,
struct cgroupfs_root *root)
{
struct css_set *css;
struct cgroup *res = NULL;
BUG_ON(!mutex_is_locked(&cgroup_mutex));
read_lock(&css_set_lock);
/*
* No need to lock the task - since we hold cgroup_mutex the
* task can't change groups, so the only thing that can happen
* is that it exits and its css is set back to init_css_set.
*/
css = task->cgroups;
if (css == &init_css_set) {
res = &root->top_cgroup;
} else {
struct cg_cgroup_link *link;
list_for_each_entry(link, &css->cg_links, cg_link_list) {
struct cgroup *c = link->cgrp;
if (c->root == root) {
res = c;
break;
}
}
}
read_unlock(&css_set_lock);
BUG_ON(!res);
return res;
}
/*
* There is one global cgroup mutex. We also require taking
* task_lock() when dereferencing a task's cgroup subsys pointers.
* See "The task_lock() exception", at the end of this comment.
*
* A task must hold cgroup_mutex to modify cgroups.
*
* Any task can increment and decrement the count field without lock.
* So in general, code holding cgroup_mutex can't rely on the count
* field not changing. However, if the count goes to zero, then only
* cgroup_attach_task() can increment it again. Because a count of zero
* means that no tasks are currently attached, therefore there is no
* way a task attached to that cgroup can fork (the other way to
* increment the count). So code holding cgroup_mutex can safely
* assume that if the count is zero, it will stay zero. Similarly, if
* a task holds cgroup_mutex on a cgroup with zero count, it
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
* The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
* (usually) take cgroup_mutex. These are the two most performance
* critical pieces of code here. The exception occurs on cgroup_exit(),
* when a task in a notify_on_release cgroup exits. Then cgroup_mutex
* is taken, and if the cgroup count is zero, a usermode call made
* to the release agent with the name of the cgroup (path relative to
* the root of cgroup file system) as the argument.
*
* A cgroup can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cgroups is empty. Since all
* tasks in the system use _some_ cgroup, and since there is always at
* least one task in the system (init, pid == 1), therefore, top_cgroup
* always has either children cgroups and/or using tasks. So we don't
* need a special hack to ensure that top_cgroup cannot be deleted.
*
* The task_lock() exception
*
* The need for this exception arises from the action of
* cgroup_attach_task(), which overwrites one task's cgroup pointer with
* another. It does so using cgroup_mutex, however there are
* several performance critical places that need to reference
* task->cgroup without the expense of grabbing a system global
* mutex. Therefore except as noted below, when dereferencing or, as
* in cgroup_attach_task(), modifying a task's cgroup pointer we use
* task_lock(), which acts on a spinlock (task->alloc_lock) already in
* the task_struct routinely used for such matters.
*
* P.S. One more locking exception. RCU is used to guard the
* update of a tasks cgroup pointer by cgroup_attach_task()
*/
/*
* A couple of forward declarations required, due to cyclic reference loop:
* cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
* cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
* -> cgroup_mkdir.
*/
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
unsigned long subsys_mask);
static const struct inode_operations cgroup_dir_inode_operations;
static const struct file_operations proc_cgroupstats_operations;
static struct backing_dev_info cgroup_backing_dev_info = {
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
static int alloc_css_id(struct cgroup_subsys *ss,
struct cgroup *parent, struct cgroup *child);
static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
}
return inode;
}
static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
{
struct cgroup_name *name;
name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
if (!name)
return NULL;
strcpy(name->name, dentry->d_name.name);
return name;
}
static void cgroup_free_fn(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
/*
* Release the subsystem state objects.
*/
for_each_subsys(cgrp->root, ss)
ss->css_free(cgrp);
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
/*
* We get a ref to the parent's dentry, and put the ref when
* this cgroup is being freed, so it's guaranteed that the
* parent won't be destroyed before its children.
*/
dput(cgrp->parent->dentry);
ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
/*
* Drop the active superblock reference that we took when we
* created the cgroup. This will free cgrp->root, if we are
* holding the last reference to @sb.
*/
deactivate_super(cgrp->root->sb);
/*
* if we're getting rid of the cgroup, refcount should ensure
* that there are no pidlists left.
*/
BUG_ON(!list_empty(&cgrp->pidlists));
simple_xattrs_free(&cgrp->xattrs);
kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
}
static void cgroup_free_rcu(struct rcu_head *head)
{
struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
schedule_work(&cgrp->free_work);
}
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
{
/* is dentry a directory ? if so, kfree() associated cgroup */
if (S_ISDIR(inode->i_mode)) {
struct cgroup *cgrp = dentry->d_fsdata;
BUG_ON(!(cgroup_is_removed(cgrp)));
call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
} else {
struct cfent *cfe = __d_cfe(dentry);
struct cgroup *cgrp = dentry->d_parent->d_fsdata;
WARN_ONCE(!list_empty(&cfe->node) &&
cgrp != &cgrp->root->top_cgroup,
"cfe still linked for %s\n", cfe->type->name);
}
iput(inode);
}
static int cgroup_delete(const struct dentry *d)
{
return 1;
}
static void remove_dir(struct dentry *d)
{
struct dentry *parent = dget(d->d_parent);
d_delete(d);
simple_rmdir(parent->d_inode, d);
dput(parent);
}
static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
{
struct cfent *cfe;
lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
lockdep_assert_held(&cgroup_mutex);
/*
* If we're doing cleanup due to failure of cgroup_create(),
* the corresponding @cfe may not exist.
*/
list_for_each_entry(cfe, &cgrp->files, node) {
struct dentry *d = cfe->dentry;
if (cft && cfe->type != cft)
continue;
dget(d);
d_delete(d);
simple_unlink(cgrp->dentry->d_inode, d);
/**
* cgroup_clear_directory - selective removal of base and subsystem files
* @dir: directory containing the files
* @base_files: true if the base files should be removed
* @subsys_mask: mask of the subsystem ids whose files should be removed
*/
static void cgroup_clear_directory(struct dentry *dir, bool base_files,
unsigned long subsys_mask)
struct cgroup_subsys *ss;
for_each_subsys(cgrp->root, ss) {
struct cftype_set *set;
if (!test_bit(ss->subsys_id, &subsys_mask))
continue;
list_for_each_entry(set, &ss->cftsets, node)
cgroup_addrm_files(cgrp, NULL, set->cfts, false);
}
if (base_files) {
while (!list_empty(&cgrp->files))
cgroup_rm_file(cgrp, NULL);
}
}
/*
* NOTE : the dentry must have been dget()'ed
*/
static void cgroup_d_remove_dir(struct dentry *dentry)
{
struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
cgroup_clear_directory(dentry, true, root->subsys_mask);
parent = dentry->d_parent;
spin_lock(&parent->d_lock);
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
list_del_init(&dentry->d_u.d_child);
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
remove_dir(dentry);
}
* Call with cgroup_mutex held. Drops reference counts on modules, including
* any duplicate ones that parse_cgroupfs_options took. If this function
* returns an error, no reference counts are touched.
static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long final_subsys_mask)
unsigned long added_mask, removed_mask;
struct cgroup *cgrp = &root->top_cgroup;