Newer
Older
/*
* Generic process-grouping system.
*
* Based originally on the cpuset system, extracted by Paul Menage
* Copyright (C) 2006 Google, Inc
*
* Notifications support
* Copyright (C) 2009 Nokia Corporation
* Author: Kirill A. Shutemov
*
* Copyright notices from the original cpuset code:
* --------------------------------------------------
* Copyright (C) 2003 BULL SA.
* Copyright (C) 2004-2006 Silicon Graphics, Inc.
*
* Portions derived from Patrick Mochel's sysfs code.
* sysfs is Copyright (c) 2001-3 Patrick Mochel
*
* 2003-10-10 Written by Simon Derr.
* 2003-10-22 Updates by Stephen Hemminger.
* 2004 May-July Rework by Paul Jackson.
* ---------------------------------------------------
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#include <linux/cgroup.h>
#include <linux/cred.h>
#include <linux/init_task.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/backing-dev.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/magic.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/kmod.h>
#include <linux/delayacct.h>
#include <linux/cgroupstats.h>
#include <linux/idr.h>
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/flex_array.h> /* used in cgroup_attach_task */
#include <linux/kthread.h>
/*
* pidlists linger the following amount before being destroyed. The goal
* is avoiding frequent destruction in the middle of consecutive read calls
* Expiring in the middle is a performance problem not a correctness one.
* 1 sec should be enough.
*/
#define CGROUP_PIDLIST_DESTROY_DELAY HZ
/*
* cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it.
*
* cgroup_root_mutex nests inside cgroup_mutex and should be held to modify
* cgroupfs_root of any cgroup hierarchy - subsys list, flags,
* release_agent_path and so on. Modifying requires both cgroup_mutex and
* cgroup_root_mutex. Readers can acquire either of the two. This is to
* break the following locking order cycle.
*
* A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem
* B. namespace_sem -> cgroup_mutex
*
* B happens only through cgroup_show_options() and using cgroup_root_mutex
* breaks it.
*/
#ifdef CONFIG_PROVE_RCU
DEFINE_MUTEX(cgroup_mutex);
EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for lockdep */
static DEFINE_MUTEX(cgroup_mutex);
/*
* cgroup destruction makes heavy use of work items and there can be a lot
* of concurrent destructions. Use a separate workqueue so that cgroup
* destruction work items don't end up filling up max_active of system_wq
* which may lead to deadlock.
*/
static struct workqueue_struct *cgroup_destroy_wq;
/*
* pidlist destructions need to be flushed on cgroup destruction. Use a
* separate workqueue as flush domain.
*/
static struct workqueue_struct *cgroup_pidlist_destroy_wq;
/*
* Generate an array of cgroup subsystem pointers. At boot time, this is
* populated with the built in subsystems, and modular subsystems are
* registered after that. The mutable section of this array is protected by
* cgroup_mutex.
*/
#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
static struct cgroup_subsys *cgroup_subsys[CGROUP_SUBSYS_COUNT] = {
#include <linux/cgroup_subsys.h>
};
/*
* The dummy hierarchy, reserved for the subsystems that are otherwise
* unattached - it never has more than a single cgroup, and all tasks are
* part of that cgroup.
static struct cgroupfs_root cgroup_dummy_root;
/* dummy_top is a shorthand for the dummy hierarchy's top cgroup */
static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup;
/*
* cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
*/
struct cfent {
struct list_head node;
struct dentry *dentry;
struct cftype *type;
Tejun Heo
committed
struct cgroup_subsys_state *css;
/* file xattrs */
struct simple_xattrs xattrs;
/* The list of hierarchy roots */
static LIST_HEAD(cgroup_roots);
static int cgroup_root_count;
/*
* Hierarchy ID allocation and mapping. It follows the same exclusion
* rules as other root ops - both cgroup_mutex and cgroup_root_mutex for
* writes, either for reads.
*/
static DEFINE_IDR(cgroup_hierarchy_idr);
static struct cgroup_name root_cgroup_name = { .name = "/" };
/*
* Assign a monotonically increasing serial number to cgroups. It
* guarantees cgroups with bigger numbers are newer than those with smaller
* numbers. Also, as cgroups are always appended to the parent's
* ->children list, it guarantees that sibling cgroups are always sorted in
* the ascending serial number order on the list. Protected by
* cgroup_mutex.
static u64 cgroup_serial_nr_next = 1;
/* This flag indicates whether tasks in the fork and exit paths should
* check for fork/exit handlers to call. This avoids us having to do
* extra work in the fork/exit path if none of the subsystems need to
* be called.
static int need_forkexit_callback __read_mostly;
Tejun Heo
committed
static struct cftype cgroup_base_files[];
static void cgroup_destroy_css_killed(struct cgroup *cgrp);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add);
static int cgroup_file_release(struct inode *inode, struct file *file);
static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
/**
* cgroup_css - obtain a cgroup's css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns the dummy_css)
* Return @cgrp's css (cgroup_subsys_state) associated with @ss. This
* function must be called either under cgroup_mutex or rcu_read_lock() and
* the caller is responsible for pinning the returned css if it wants to
* keep accessing it outside the said locks. This function may return
* %NULL if @cgrp doesn't have @subsys_id enabled.
*/
static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
struct cgroup_subsys *ss)
Loading
Loading full blame...