Newer
Older
* through ->cset_link. Returns 0 on success or -errno.
static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
struct cgrp_cset_link *link;
INIT_LIST_HEAD(tmp_links);
for (i = 0; i < count; i++) {
link = kzalloc(sizeof(*link), GFP_KERNEL);
free_cgrp_cset_links(tmp_links);
return -ENOMEM;
}
list_add(&link->cset_link, tmp_links);
}
return 0;
}
/**
* link_css_set - a helper function to link a css_set to a cgroup
* @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
* @cset: the css_set to be linked
* @cgrp: the destination cgroup
*/
static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
struct cgroup *cgrp)
struct cgrp_cset_link *link;
BUG_ON(list_empty(tmp_links));
if (cgroup_on_dfl(cgrp))
cset->dfl_cgrp = cgrp;
link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
link->cset = cset;
link->cgrp = cgrp;
/*
* Always add links to the tail of the lists so that the lists are
* in choronological order.
*/
list_move_tail(&link->cset_link, &cgrp->cset_links);
list_add_tail(&link->cgrp_link, &cset->cgrp_links);
if (cgroup_parent(cgrp))
cgroup_get(cgrp);
/**
* find_css_set - return a new css_set with one cgroup updated
* @old_cset: the baseline css_set
* @cgrp: the cgroup to be updated
*
* Return a new css_set that's equivalent to @old_cset, but with @cgrp
* substituted into the appropriate hierarchy.
static struct css_set *find_css_set(struct css_set *old_cset,
struct cgroup *cgrp)
struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
struct css_set *cset;
struct list_head tmp_links;
struct cgrp_cset_link *link;
lockdep_assert_held(&cgroup_mutex);
/* First see if we already have a cgroup group that matches
* the desired set */
spin_lock_irq(&css_set_lock);
cset = find_existing_css_set(old_cset, cgrp, template);
if (cset)
get_css_set(cset);
spin_unlock_irq(&css_set_lock);
if (cset)
return cset;
cset = kzalloc(sizeof(*cset), GFP_KERNEL);
return NULL;
/* Allocate all the cgrp_cset_link objects that we'll need */
if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
return NULL;
}
atomic_set(&cset->refcount, 1);
INIT_LIST_HEAD(&cset->cgrp_links);
INIT_LIST_HEAD(&cset->tasks);
INIT_LIST_HEAD(&cset->mg_preload_node);
INIT_LIST_HEAD(&cset->mg_node);
INIT_LIST_HEAD(&cset->task_iters);
INIT_HLIST_NODE(&cset->hlist);
/* Copy the set of subsystem state objects generated in
* find_existing_css_set() */
memcpy(cset->subsys, template, sizeof(cset->subsys));
spin_lock_irq(&css_set_lock);
/* Add reference counts and links from the new css_set. */
list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
if (c->root == cgrp->root)
c = cgrp;
link_css_set(&tmp_links, cset, c);
}
BUG_ON(!list_empty(&tmp_links));
css_set_count++;
key = css_set_hash(cset->subsys);
hash_add(css_set_table, &cset->hlist, key);
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cset->subsys[ssid];
list_add_tail(&cset->e_cset_node[ssid],
&css->cgroup->e_csets[ssid]);
css_get(css);
}
spin_unlock_irq(&css_set_lock);
static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
{
struct cgroup *root_cgrp = kf_root->kn->priv;
static int cgroup_init_root_id(struct cgroup_root *root)
{
int id;
lockdep_assert_held(&cgroup_mutex);
id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
if (id < 0)
return id;
root->hierarchy_id = id;
return 0;
}
static void cgroup_exit_root_id(struct cgroup_root *root)
{
lockdep_assert_held(&cgroup_mutex);
idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
static void cgroup_free_root(struct cgroup_root *root)
{
if (root) {
idr_destroy(&root->cgroup_idr);
kfree(root);
}
}
static void cgroup_destroy_root(struct cgroup_root *root)
struct cgroup *cgrp = &root->cgrp;
struct cgrp_cset_link *link, *tmp_link;
Tejun Heo
committed
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
BUG_ON(!list_empty(&cgrp->self.children));
/* Rebind all subsystems back to the default hierarchy */
Tejun Heo
committed
WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
/*
* Release all the links from cset_links to this hierarchy's
* root cgroup
*/
spin_lock_irq(&css_set_lock);
list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
list_del(&link->cset_link);
list_del(&link->cgrp_link);
kfree(link);
}
spin_unlock_irq(&css_set_lock);
if (!list_empty(&root->root_list)) {
list_del(&root->root_list);
cgroup_root_count--;
}
cgroup_exit_root_id(root);
mutex_unlock(&cgroup_mutex);
cgroup_free_root(root);
}
Serge E. Hallyn
committed
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
/*
* look up cgroup associated with current task's cgroup namespace on the
* specified hierarchy
*/
static struct cgroup *
current_cgns_cgroup_from_root(struct cgroup_root *root)
{
struct cgroup *res = NULL;
struct css_set *cset;
lockdep_assert_held(&css_set_lock);
rcu_read_lock();
cset = current->nsproxy->cgroup_ns->root_cset;
if (cset == &init_css_set) {
res = &root->cgrp;
} else {
struct cgrp_cset_link *link;
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
if (c->root == root) {
res = c;
break;
}
}
}
rcu_read_unlock();
BUG_ON(!res);
return res;
}
/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
{
struct cgroup *res = NULL;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);
if (cset == &init_css_set) {
} else {
struct cgrp_cset_link *link;
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
if (c->root == root) {
res = c;
break;
}
}
}
BUG_ON(!res);
return res;
}
* Return the cgroup for "task" from the given hierarchy. Must be
* called with cgroup_mutex and css_set_lock held.
*/
static struct cgroup *task_cgroup_from_root(struct task_struct *task,
struct cgroup_root *root)
{
/*
* No need to lock the task - since we hold cgroup_mutex the
* task can't change groups, so the only thing that can happen
* is that it exits and its css is set back to init_css_set.
*/
return cset_cgroup_from_root(task_css_set(task), root);
}
/*
* A task must hold cgroup_mutex to modify cgroups.
*
* Any task can increment and decrement the count field without lock.
* So in general, code holding cgroup_mutex can't rely on the count
* field not changing. However, if the count goes to zero, then only
* cgroup_attach_task() can increment it again. Because a count of zero
* means that no tasks are currently attached, therefore there is no
* way a task attached to that cgroup can fork (the other way to
* increment the count). So code holding cgroup_mutex can safely
* assume that if the count is zero, it will stay zero. Similarly, if
* a task holds cgroup_mutex on a cgroup with zero count, it
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
* A cgroup can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cgroups is empty. Since all
* tasks in the system use _some_ cgroup, and since there is always at
* least one task in the system (init, pid == 1), therefore, root cgroup
* always has either children cgroups and/or using tasks. So we don't
* need a special hack to ensure that root cgroup cannot be deleted.
*
* P.S. One more locking exception. RCU is used to guard the
* update of a tasks cgroup pointer by cgroup_attach_task()
static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
static const struct file_operations proc_cgroupstats_operations;
static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
struct cgroup_subsys *ss = cft->ss;
if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
!(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
cft->name);
else
strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
return buf;
/**
* cgroup_file_mode - deduce file mode of a control file
* @cft: the control file in question
*
* S_IRUGO for read, S_IWUSR for write.
*/
static umode_t cgroup_file_mode(const struct cftype *cft)
umode_t mode = 0;
if (cft->read_u64 || cft->read_s64 || cft->seq_show)
mode |= S_IRUGO;
if (cft->write_u64 || cft->write_s64 || cft->write) {
if (cft->flags & CFTYPE_WORLD_WRITABLE)
mode |= S_IWUGO;
else
mode |= S_IWUSR;
}
return mode;
* cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
Tejun Heo
committed
* @subtree_control: the new subtree_control mask to consider
* @this_ss_mask: available subsystems
*
* On the default hierarchy, a subsystem may request other subsystems to be
* enabled together through its ->depends_on mask. In such cases, more
* subsystems than specified in "cgroup.subtree_control" may be enabled.
*
Tejun Heo
committed
* This function calculates which subsystems need to be enabled if
* @subtree_control is to be applied while restricted to @this_ss_mask.
static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
struct cgroup_subsys *ss;
int ssid;
lockdep_assert_held(&cgroup_mutex);
cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
do_each_subsys_mask(ss, ssid, cur_ss_mask) {
new_ss_mask |= ss->depends_on;
} while_each_subsys_mask();
/*
* Mask out subsystems which aren't available. This can
* happen only if some depended-upon subsystems were bound
* to non-default hierarchies.
*/
new_ss_mask &= this_ss_mask;
if (new_ss_mask == cur_ss_mask)
break;
cur_ss_mask = new_ss_mask;
}
Tejun Heo
committed
return cur_ss_mask;
}
/**
* cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
* @kn: the kernfs_node being serviced
*
* This helper undoes cgroup_kn_lock_live() and should be invoked before
* the method finishes if locking succeeded. Note that once this function
* returns the cgroup returned by cgroup_kn_lock_live() may become
* inaccessible any time. If the caller intends to continue to access the
* cgroup, it should pin it before invoking this function.
*/
static void cgroup_kn_unlock(struct kernfs_node *kn)
struct cgroup *cgrp;
if (kernfs_type(kn) == KERNFS_DIR)
cgrp = kn->priv;
else
cgrp = kn->parent->priv;
mutex_unlock(&cgroup_mutex);
kernfs_unbreak_active_protection(kn);
cgroup_put(cgrp);
/**
* cgroup_kn_lock_live - locking helper for cgroup kernfs methods
* @kn: the kernfs_node being serviced
* @drain_offline: perform offline draining on the cgroup
*
* This helper is to be used by a cgroup kernfs method currently servicing
* @kn. It breaks the active protection, performs cgroup locking and
* verifies that the associated cgroup is alive. Returns the cgroup if
* alive; otherwise, %NULL. A successful return should be undone by a
* matching cgroup_kn_unlock() invocation. If @drain_offline is %true, the
* cgroup is drained of offlining csses before return.
*
* Any cgroup kernfs method implementation which requires locking the
* associated cgroup should use this helper. It avoids nesting cgroup
* locking under kernfs active protection and allows all kernfs operations
* including self-removal.
*/
static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn,
bool drain_offline)
struct cgroup *cgrp;
if (kernfs_type(kn) == KERNFS_DIR)
cgrp = kn->priv;
else
cgrp = kn->parent->priv;
* We're gonna grab cgroup_mutex which nests outside kernfs
* active_ref. cgroup liveliness check alone provides enough
* protection against removal. Ensure @cgrp stays accessible and
* break the active_ref protection.
if (!cgroup_tryget(cgrp))
return NULL;
kernfs_break_active_protection(kn);
if (drain_offline)
cgroup_lock_and_drain_offline(cgrp);
else
mutex_lock(&cgroup_mutex);
if (!cgroup_is_dead(cgrp))
return cgrp;
cgroup_kn_unlock(kn);
return NULL;
static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
lockdep_assert_held(&cgroup_mutex);
if (cft->file_offset) {
struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
struct cgroup_file *cfile = (void *)css + cft->file_offset;
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = NULL;
spin_unlock_irq(&cgroup_file_kn_lock);
}
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
* css_clear_dir - remove subsys files in a cgroup directory
* @css: taget css
Tejun Heo
committed
static void css_clear_dir(struct cgroup_subsys_state *css)
Tejun Heo
committed
struct cgroup *cgrp = css->cgroup;
if (!(css->flags & CSS_VISIBLE))
return;
css->flags &= ~CSS_VISIBLE;
list_for_each_entry(cfts, &css->ss->cfts, node)
cgroup_addrm_files(css, cgrp, cfts, false);
* css_populate_dir - create subsys files in a cgroup directory
* @css: target css
*
* On failure, no file is added.
*/
Tejun Heo
committed
static int css_populate_dir(struct cgroup_subsys_state *css)
Tejun Heo
committed
struct cgroup *cgrp = css->cgroup;
struct cftype *cfts, *failed_cfts;
int ret;
if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
return 0;
if (!css->ss) {
if (cgroup_on_dfl(cgrp))
cfts = cgroup_dfl_base_files;
else
cfts = cgroup_legacy_base_files;
return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
}
list_for_each_entry(cfts, &css->ss->cfts, node) {
ret = cgroup_addrm_files(css, cgrp, cfts, true);
if (ret < 0) {
failed_cfts = cfts;
goto err;
css->flags |= CSS_VISIBLE;
list_for_each_entry(cfts, &css->ss->cfts, node) {
if (cfts == failed_cfts)
break;
cgroup_addrm_files(css, cgrp, cfts, false);
}
static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
struct cgroup *dcgrp = &dst_root->cgrp;
do_each_subsys_mask(ss, ssid, ss_mask) {
/*
* If @ss has non-root csses attached to it, can't move.
* If @ss is an implicit controller, it is exempt from this
* rule and can be stolen.
*/
if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
!ss->implicit_on_dfl)
/* can't move between two non-dummy roots either */
if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
return -EBUSY;
} while_each_subsys_mask();
do_each_subsys_mask(ss, ssid, ss_mask) {
struct cgroup_root *src_root = ss->root;
struct cgroup *scgrp = &src_root->cgrp;
struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
WARN_ON(!css || cgroup_css(dcgrp, ss));
Tejun Heo
committed
/* disable from the source */
src_root->subsys_mask &= ~(1 << ssid);
WARN_ON(cgroup_apply_control(scgrp));
cgroup_finalize_control(scgrp, 0);
Tejun Heo
committed
/* rebind */
RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
rcu_assign_pointer(dcgrp->subsys[ssid], css);
ss->root = dst_root;
spin_lock_irq(&css_set_lock);
hash_for_each(css_set_table, i, cset, hlist)
list_move_tail(&cset->e_cset_node[ss->id],
spin_unlock_irq(&css_set_lock);
Tejun Heo
committed
/* default hierarchy doesn't enable controllers by default */
Tejun Heo
committed
dst_root->subsys_mask |= 1 << ssid;
Tejun Heo
committed
if (dst_root == &cgrp_dfl_root) {
static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
} else {
dcgrp->subtree_control |= 1 << ssid;
Tejun Heo
committed
static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
Tejun Heo
committed
ret = cgroup_apply_control(dcgrp);
if (ret)
pr_warn("partial failure to rebind %s controller (err=%d)\n",
ss->name, ret);
if (ss->bind)
ss->bind(css);
} while_each_subsys_mask();
kernfs_activate(dcgrp->kn);
Serge E. Hallyn
committed
static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
struct kernfs_root *kf_root)
{
Serge E. Hallyn
committed
char *buf = NULL;
struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
struct cgroup *ns_cgroup;
buf = kmalloc(PATH_MAX, GFP_KERNEL);
if (!buf)
return -ENOMEM;
spin_lock_irq(&css_set_lock);
Serge E. Hallyn
committed
ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
spin_unlock_irq(&css_set_lock);
Serge E. Hallyn
committed
if (len >= PATH_MAX)
len = -ERANGE;
else if (len > 0) {
seq_escape(sf, buf, " \t\n\\");
len = 0;
}
kfree(buf);
return len;
}
static int cgroup_show_options(struct seq_file *seq,
struct kernfs_root *kf_root)
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_subsys *ss;
if (root != &cgrp_dfl_root)
for_each_subsys(ss, ssid)
if (root->subsys_mask & (1 << ssid))
seq_show_option(seq, ss->legacy_name, NULL);
if (root->flags & CGRP_ROOT_NOPREFIX)
seq_puts(seq, ",noprefix");
if (root->flags & CGRP_ROOT_XATTR)
spin_lock(&release_agent_path_lock);
if (strlen(root->release_agent_path))
seq_show_option(seq, "release_agent",
root->release_agent_path);
spin_unlock(&release_agent_path_lock);
if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
seq_puts(seq, ",clone_children");
seq_show_option(seq, "name", root->name);
return 0;
}
struct cgroup_sb_opts {
char *release_agent;
bool cpuset_clone_children;
/* User explicitly requested empty subsystem */
bool none;
static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
char *token, *o = data;
bool all_ss = false, one_ss = false;
#ifdef CONFIG_CPUSETS
mask = ~((u16)1 << cpuset_cgrp_id);
memset(opts, 0, sizeof(*opts));
while ((token = strsep(&o, ",")) != NULL) {
if (!*token)
return -EINVAL;
if (!strcmp(token, "none")) {
/* Explicitly have no subsystems */
opts->none = true;
continue;
}
if (!strcmp(token, "all")) {
/* Mutually exclusive option 'all' + subsystem name */
if (one_ss)
return -EINVAL;
all_ss = true;
continue;
}
if (!strcmp(token, "noprefix")) {
opts->flags |= CGRP_ROOT_NOPREFIX;
continue;
}
if (!strcmp(token, "clone_children")) {
opts->cpuset_clone_children = true;
opts->flags |= CGRP_ROOT_XATTR;
if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
continue;
}
if (!strncmp(token, "name=", 5)) {
const char *name = token + 5;
/* Can't specify an empty name */
if (!strlen(name))
return -EINVAL;
/* Must match [\w.-]+ */
for (i = 0; i < strlen(name); i++) {
char c = name[i];
if (isalnum(c))
continue;
if ((c == '.') || (c == '-') || (c == '_'))
continue;
return -EINVAL;
}
/* Specifying two names is forbidden */
if (opts->name)
return -EINVAL;
opts->name = kstrndup(name,
GFP_KERNEL);
if (!opts->name)
return -ENOMEM;
continue;
}
if (strcmp(token, ss->legacy_name))
if (!cgroup_ssid_enabled(i))
if (cgroup_ssid_no_v1(i))
continue;
/* Mutually exclusive option 'all' + subsystem name */
if (all_ss)
return -EINVAL;
opts->subsys_mask |= (1 << i);
one_ss = true;
break;
}
if (i == CGROUP_SUBSYS_COUNT)
return -ENOENT;
}
/*
* If the 'all' option was specified select all the subsystems,
* otherwise if 'none', 'name=' and a subsystem name options were
* not specified, let's default to 'all'
*/
if (all_ss || (!one_ss && !opts->none && !opts->name))
for_each_subsys(ss, i)
if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i))
opts->subsys_mask |= (1 << i);
/*
* We either have to specify by name or by subsystems. (So all
* empty hierarchies must have a name).
*/
if (!opts->subsys_mask && !opts->name)
return -EINVAL;
/*
* Option noprefix was introduced just for backward compatibility
* with the old cpuset, so we allow noprefix only if mounting just
* the cpuset subsystem.
*/
if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
return -EINVAL;
/* Can't specify "none" and some subsystems */
if (opts->subsys_mask && opts->none)
return -EINVAL;
static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_sb_opts opts;
if (root == &cgrp_dfl_root) {
pr_err("remount is not allowed\n");
Tejun Heo
committed
cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
/* See what subsystems are wanted */
ret = parse_cgroupfs_options(data, &opts);
if (ret)
goto out_unlock;
Tejun Heo
committed
if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
task_tgid_nr(current), current->comm);
Tejun Heo
committed
added_mask = opts.subsys_mask & ~root->subsys_mask;
removed_mask = root->subsys_mask & ~opts.subsys_mask;
/* Don't allow flags or name to change at remount */
(opts.name && strcmp(opts.name, root->name))) {
pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
opts.flags, opts.name ?: "", root->flags, root->name);
ret = -EINVAL;
goto out_unlock;
}
Tejun Heo
committed
/* remounting is not allowed for populated hierarchies */
if (!list_empty(&root->cgrp.self.children)) {
Tejun Heo
committed
ret = -EBUSY;
ret = rebind_subsystems(root, added_mask);
Tejun Heo
committed
if (ret)
Tejun Heo
committed
WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
if (opts.release_agent) {
spin_lock(&release_agent_path_lock);
strcpy(root->release_agent_path, opts.release_agent);
spin_unlock(&release_agent_path_lock);
}
mutex_unlock(&cgroup_mutex);
return ret;
}
/*
* To reduce the fork() overhead for systems that are not actually using
* their cgroups capability, we don't maintain the lists running through
* each css_set to its tasks until we see the list actually used - in other
* words after the first mount.
*/
static bool use_task_css_set_links __read_mostly;
static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
spin_lock_irq(&css_set_lock);
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
if (use_task_css_set_links)
goto out_unlock;
use_task_css_set_links = true;
/*
* We need tasklist_lock because RCU is not safe against
* while_each_thread(). Besides, a forking task that has passed
* cgroup_post_fork() without seeing use_task_css_set_links = 1
* is not guaranteed to have its child immediately visible in the
* tasklist if we walk through it with RCU.
*/
read_lock(&tasklist_lock);
do_each_thread(g, p) {
WARN_ON_ONCE(!list_empty(&p->cg_list) ||
task_css_set(p) != &init_css_set);
/*
* We should check if the process is exiting, otherwise
* it will race with cgroup_exit() in that the list
* entry won't be deleted though the process has exited.
* Do it while holding siglock so that we don't end up
* racing against cgroup_exit().
*
* Interrupts were already disabled while acquiring
* the css_set_lock, so we do not need to disable it
* again when acquiring the sighand->siglock here.
spin_lock(&p->sighand->siglock);
if (!(p->flags & PF_EXITING)) {
struct css_set *cset = task_css_set(p);
if (!css_set_populated(cset))
css_set_update_populated(cset, true);
list_add_tail(&p->cg_list, &cset->tasks);
get_css_set(cset);
}
spin_unlock(&p->sighand->siglock);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
out_unlock:
spin_unlock_irq(&css_set_lock);
static void init_cgroup_housekeeping(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
int ssid;
INIT_LIST_HEAD(&cgrp->self.sibling);
INIT_LIST_HEAD(&cgrp->self.children);
INIT_LIST_HEAD(&cgrp->cset_links);
Ben Blum
committed
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
cgrp->self.cgroup = cgrp;
cgrp->self.flags |= CSS_ONLINE;
for_each_subsys(ss, ssid)
INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
Tejun Heo
committed
init_waitqueue_head(&cgrp->offline_waitq);
INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
static void init_cgroup_root(struct cgroup_root *root,
struct cgroup *cgrp = &root->cgrp;
INIT_LIST_HEAD(&root->root_list);
Tejun Heo
committed
atomic_set(&root->nr_cgrps, 1);
cgrp->root = root;
init_cgroup_housekeeping(cgrp);
root->flags = opts->flags;
if (opts->release_agent)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
if (opts->cpuset_clone_children)
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp;
struct css_set *cset;
int i, ret;
lockdep_assert_held(&cgroup_mutex);
ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
root_cgrp->id = ret;
root_cgrp->ancestor_ids[0] = ret;
ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
GFP_KERNEL);
if (ret)
goto out;