Newer
Older
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
struct cftype *cft = seq_cft(m);
struct cgroup_subsys_state *css = seq_css(m);
if (cft->seq_show)
return cft->seq_show(m, arg);
if (cft->read_u64)
seq_printf(m, "%llu\n", cft->read_u64(css, cft));
else if (cft->read_s64)
seq_printf(m, "%lld\n", cft->read_s64(css, cft));
else
return -EINVAL;
return 0;
static struct kernfs_ops cgroup_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.open = cgroup_file_open,
.release = cgroup_file_release,
.write = cgroup_file_write,
.seq_show = cgroup_seqfile_show,
static struct kernfs_ops cgroup_kf_ops = {
.atomic_write_len = PAGE_SIZE,
.open = cgroup_file_open,
.release = cgroup_file_release,
.write = cgroup_file_write,
.seq_start = cgroup_seqfile_start,
.seq_next = cgroup_seqfile_next,
.seq_stop = cgroup_seqfile_stop,
.seq_show = cgroup_seqfile_show,
};
/* set uid and gid of cgroup dirs and files to that of the creator */
static int cgroup_kn_set_ugid(struct kernfs_node *kn)
{
struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
.ia_uid = current_fsuid(),
.ia_gid = current_fsgid(), };
if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
return 0;
return kernfs_setattr(kn, &iattr);
}
static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
struct cftype *cft)
struct kernfs_node *kn;
struct lock_class_key *key = NULL;
int ret;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
key = &cft->lockdep_key;
#endif
kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
cgroup_file_mode(cft), 0, cft->kf_ops, cft,
if (IS_ERR(kn))
return PTR_ERR(kn);
ret = cgroup_kn_set_ugid(kn);
Tejun Heo
committed
if (ret) {
kernfs_remove(kn);
Tejun Heo
committed
return ret;
}
if (cft->file_offset) {
struct cgroup_file *cfile = (void *)css + cft->file_offset;
spin_lock_irq(&cgroup_file_kn_lock);
cfile->kn = kn;
spin_unlock_irq(&cgroup_file_kn_lock);
}
Tejun Heo
committed
return 0;
/**
* cgroup_addrm_files - add or remove files to a cgroup directory
* @css: the target css
* @cgrp: the target cgroup (usually css->cgroup)
* @cfts: array of cftypes to be added
* @is_add: whether to add or remove
*
* Depending on @is_add, add or remove files defined by @cfts on @cgrp.
* For removals, this function never fails.
static int cgroup_addrm_files(struct cgroup_subsys_state *css,
struct cgroup *cgrp, struct cftype cfts[],
struct cftype *cft, *cft_end = NULL;
lockdep_assert_held(&cgroup_mutex);
restart:
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
ret = cgroup_add_file(css, cgrp, cft);
pr_warn("%s: failed to add %s, err=%d\n",
__func__, cft->name, ret);
cft_end = cft;
is_add = false;
goto restart;
} else {
cgroup_rm_file(cgrp, cft);
static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
{
LIST_HEAD(pending);
struct cgroup_subsys *ss = cfts[0].ss;
struct cgroup *root = &ss->root->cgrp;
Tejun Heo
committed
struct cgroup_subsys_state *css;
lockdep_assert_held(&cgroup_mutex);
/* add/rm files for all cgroups created before */
css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
Tejun Heo
committed
struct cgroup *cgrp = css->cgroup;
if (!(css->flags & CSS_VISIBLE))
continue;
ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
if (is_add && !ret)
kernfs_activate(root->kn);
static void cgroup_exit_cftypes(struct cftype *cfts)
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* free copy for custom atomic_write_len, see init_cftypes() */
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
kfree(cft->kf_ops);
cft->kf_ops = NULL;
/* revert flags set by cgroup core while adding @cfts */
cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype *cft;
for (cft = cfts; cft->name[0] != '\0'; cft++) {
struct kernfs_ops *kf_ops;
if (cft->seq_start)
kf_ops = &cgroup_kf_ops;
else
kf_ops = &cgroup_kf_single_ops;
/*
* Ugh... if @cft wants a custom max_write_len, we need to
* make a copy of kf_ops to set its atomic_write_len.
*/
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
if (!kf_ops) {
cgroup_exit_cftypes(cfts);
return -ENOMEM;
}
kf_ops->atomic_write_len = cft->max_write_len;
}
static int cgroup_rm_cftypes_locked(struct cftype *cfts)
{
lockdep_assert_held(&cgroup_mutex);
if (!cfts || !cfts[0].ss)
return -ENOENT;
list_del(&cfts->node);
cgroup_apply_cftypes(cfts, false);
cgroup_exit_cftypes(cfts);
return 0;
/**
* cgroup_rm_cftypes - remove an array of cftypes from a subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Unregister @cfts. Files described by @cfts are removed from all
* existing cgroups and all future cgroups won't have them either. This
* function can be called anytime whether @cfts' subsys is attached or not.
*
* Returns 0 on successful unregistration, -ENOENT if @cfts is not
int cgroup_rm_cftypes(struct cftype *cfts)
mutex_lock(&cgroup_mutex);
ret = cgroup_rm_cftypes_locked(cfts);
mutex_unlock(&cgroup_mutex);
/**
* cgroup_add_cftypes - add an array of cftypes to a subsystem
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Register @cfts to @ss. Files described by @cfts are created for all
* existing cgroups to which @ss is attached and all future cgroups will
* have them too. This function can be called anytime whether @ss is
* attached or not.
*
* Returns 0 on successful registration, -errno on failure. Note that this
* function currently returns 0 as long as @cfts registration is successful
* even if some file creation attempts on existing cgroups fail.
*/
static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
if (!cgroup_ssid_enabled(ss->id))
return 0;
if (!cfts || cfts[0].name[0] == '\0')
return 0;
ret = cgroup_init_cftypes(ss, cfts);
if (ret)
return ret;
mutex_lock(&cgroup_mutex);
ret = cgroup_apply_cftypes(cfts, true);
cgroup_rm_cftypes_locked(cfts);
mutex_unlock(&cgroup_mutex);
/**
* cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Similar to cgroup_add_cftypes() but the added files are only used for
* the default hierarchy.
*/
int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype *cft;
for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
cft->flags |= __CFTYPE_ONLY_ON_DFL;
return cgroup_add_cftypes(ss, cfts);
}
/**
* cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Similar to cgroup_add_cftypes() but the added files are only used for
* the legacy hierarchies.
*/
int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype *cft;
for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
cft->flags |= __CFTYPE_NOT_ON_DFL;
return cgroup_add_cftypes(ss, cfts);
}
/**
* cgroup_file_notify - generate a file modified event for a cgroup_file
* @cfile: target cgroup_file
*
* @cfile must have been obtained by setting cftype->file_offset.
*/
void cgroup_file_notify(struct cgroup_file *cfile)
{
unsigned long flags;
spin_lock_irqsave(&cgroup_file_kn_lock, flags);
if (cfile->kn)
kernfs_notify(cfile->kn);
spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
}
Tejun Heo
committed
* css_next_child - find the next child of a given css
* @pos: the current position (%NULL to initiate traversal)
* @parent: css whose children to walk
* This function returns the next child of @parent and should be called
* under either cgroup_mutex or RCU read lock. The only requirement is
* that @parent and @pos are accessible. The next sibling is guaranteed to
* be returned regardless of their states.
*
* If a subsystem synchronizes ->css_online() and the start of iteration, a
* css which finished ->css_online() is guaranteed to be visible in the
* future iterations and will stay visible until the last reference is put.
* A css which hasn't finished ->css_online() or already finished
* ->css_offline() may show up during traversal. It's each subsystem's
* responsibility to synchronize against on/offlining.
struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *parent)
struct cgroup_subsys_state *next;
* @pos could already have been unlinked from the sibling list.
* Once a cgroup is removed, its ->sibling.next is no longer
* updated when its next sibling changes. CSS_RELEASED is set when
* @pos is taken off list, at which time its next pointer is valid,
* and, as releases are serialized, the one pointed to by the next
* pointer is guaranteed to not have started release yet. This
* implies that if we observe !CSS_RELEASED on @pos in this RCU
* critical section, the one pointed to by its next pointer is
* guaranteed to not have finished its RCU grace period even if we
* have dropped rcu_read_lock() inbetween iterations.
* If @pos has CSS_RELEASED set, its next pointer can't be
* dereferenced; however, as each css is given a monotonically
* increasing unique serial number and always appended to the
* sibling list, the next one can be found by walking the parent's
* children until the first css with higher serial number than
* @pos's. While this path can be slower, it happens iff iteration
* races against release and the race window is very small.
next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
} else if (likely(!(pos->flags & CSS_RELEASED))) {
next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
list_for_each_entry_rcu(next, &parent->children, sibling)
if (next->serial_nr > pos->serial_nr)
break;
/*
* @next, if not pointing to the head, can be dereferenced and is
if (&next->sibling != &parent->children)
return next;
Tejun Heo
committed
* css_next_descendant_pre - find the next descendant for pre-order walk
* @pos: the current position (%NULL to initiate traversal)
Tejun Heo
committed
* @root: css whose descendants to walk
Tejun Heo
committed
* To be used by css_for_each_descendant_pre(). Find the next descendant
Tejun Heo
committed
* to visit for pre-order traversal of @root's descendants. @root is
* included in the iteration and the first node to be visited.
* While this function requires cgroup_mutex or RCU read locking, it
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct next descendant as long
* as both @pos and @root are accessible and @pos is a descendant of @root.
*
* If a subsystem synchronizes ->css_online() and the start of iteration, a
* css which finished ->css_online() is guaranteed to be visible in the
* future iterations and will stay visible until the last reference is put.
* A css which hasn't finished ->css_online() or already finished
* ->css_offline() may show up during traversal. It's each subsystem's
* responsibility to synchronize against on/offlining.
Tejun Heo
committed
struct cgroup_subsys_state *
css_next_descendant_pre(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *root)
Tejun Heo
committed
struct cgroup_subsys_state *next;
Tejun Heo
committed
/* if first iteration, visit @root */
Tejun Heo
committed
return root;
/* visit the first child if exists */
Tejun Heo
committed
next = css_next_child(NULL, pos);
if (next)
return next;
/* no child, visit my or the closest ancestor's next sibling */
Tejun Heo
committed
while (pos != root) {
return NULL;
}
Tejun Heo
committed
* css_rightmost_descendant - return the rightmost descendant of a css
* @pos: css of interest
Tejun Heo
committed
* Return the rightmost descendant of @pos. If there's no descendant, @pos
* is returned. This can be used during pre-order traversal to skip
* While this function requires cgroup_mutex or RCU read locking, it
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct rightmost descendant as
* long as @pos is accessible.
Tejun Heo
committed
struct cgroup_subsys_state *
css_rightmost_descendant(struct cgroup_subsys_state *pos)
Tejun Heo
committed
struct cgroup_subsys_state *last, *tmp;
do {
last = pos;
/* ->prev isn't RCU safe, walk ->next till the end */
pos = NULL;
Tejun Heo
committed
css_for_each_child(tmp, last)
pos = tmp;
} while (pos);
return last;
}
Tejun Heo
committed
static struct cgroup_subsys_state *
css_leftmost_descendant(struct cgroup_subsys_state *pos)
Tejun Heo
committed
struct cgroup_subsys_state *last;
do {
last = pos;
Tejun Heo
committed
pos = css_next_child(NULL, pos);
} while (pos);
return last;
}
/**
Tejun Heo
committed
* css_next_descendant_post - find the next descendant for post-order walk
* @pos: the current position (%NULL to initiate traversal)
Tejun Heo
committed
* @root: css whose descendants to walk
Tejun Heo
committed
* To be used by css_for_each_descendant_post(). Find the next descendant
Tejun Heo
committed
* to visit for post-order traversal of @root's descendants. @root is
* included in the iteration and the last node to be visited.
* While this function requires cgroup_mutex or RCU read locking, it
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct next descendant as long
* as both @pos and @cgroup are accessible and @pos is a descendant of
* @cgroup.
*
* If a subsystem synchronizes ->css_online() and the start of iteration, a
* css which finished ->css_online() is guaranteed to be visible in the
* future iterations and will stay visible until the last reference is put.
* A css which hasn't finished ->css_online() or already finished
* ->css_offline() may show up during traversal. It's each subsystem's
* responsibility to synchronize against on/offlining.
Tejun Heo
committed
struct cgroup_subsys_state *
css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *root)
Tejun Heo
committed
struct cgroup_subsys_state *next;
/* if first iteration, visit leftmost descendant which may be @root */
if (!pos)
return css_leftmost_descendant(root);
Tejun Heo
committed
/* if we visited @root, we're done */
if (pos == root)
return NULL;
/* if there's an unvisited sibling, visit its leftmost descendant */
Tejun Heo
committed
return css_leftmost_descendant(next);
/* no sibling left, visit parent */
/**
* css_has_online_children - does a css have online children
* @css: the target css
*
* Returns %true if @css has any online children; otherwise, %false. This
* function can be called from any context but the caller is responsible
* for synchronizing against on/offlining as necessary.
*/
bool css_has_online_children(struct cgroup_subsys_state *css)
{
struct cgroup_subsys_state *child;
bool ret = false;
rcu_read_lock();
css_for_each_child(child, css) {
if (child->flags & CSS_ONLINE) {
ret = true;
break;
}
}
rcu_read_unlock();
return ret;
* css_task_iter_advance_css_set - advance a task itererator to the next css_set
* @it: the iterator to advance
*
* Advance @it to the next css_set to walk.
static void css_task_iter_advance_css_set(struct css_task_iter *it)
struct cgrp_cset_link *link;
struct css_set *cset;
lockdep_assert_held(&css_set_lock);
/* Advance to the next non-empty css_set */
do {
l = l->next;
if (l == it->cset_head) {
it->cset_pos = NULL;
if (it->ss) {
cset = container_of(l, struct css_set,
e_cset_node[it->ss->id]);
} else {
link = list_entry(l, struct cgrp_cset_link, cset_link);
cset = link->cset;
}
} while (!css_set_populated(cset));
it->task_pos = cset->mg_tasks.next;
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
/*
* We don't keep css_sets locked across iteration steps and thus
* need to take steps to ensure that iteration can be resumed after
* the lock is re-acquired. Iteration is performed at two levels -
* css_sets and tasks in them.
*
* Once created, a css_set never leaves its cgroup lists, so a
* pinned css_set is guaranteed to stay put and we can resume
* iteration afterwards.
*
* Tasks may leave @cset across iteration steps. This is resolved
* by registering each iterator with the css_set currently being
* walked and making css_set_move_task() advance iterators whose
* next task is leaving.
*/
if (it->cur_cset) {
list_del(&it->iters_node);
put_css_set_locked(it->cur_cset);
}
get_css_set(cset);
it->cur_cset = cset;
list_add(&it->iters_node, &cset->task_iters);
static void css_task_iter_advance(struct css_task_iter *it)
{
struct list_head *l = it->task_pos;
lockdep_assert_held(&css_set_lock);
WARN_ON_ONCE(!l);
/*
* Advance iterator to find next entry. cset->tasks is consumed
* first and then ->mg_tasks. After ->mg_tasks, we move onto the
* next cset.
*/
l = l->next;
if (l == it->tasks_head)
l = it->mg_tasks_head->next;
if (l == it->mg_tasks_head)
css_task_iter_advance_css_set(it);
else
it->task_pos = l;
}
* css_task_iter_start - initiate task iteration
* @css: the css to walk tasks of
* @it: the task iterator to use
*
* Initiate iteration through the tasks of @css. The caller can call
* css_task_iter_next() to walk through the tasks until the function
* returns NULL. On completion of iteration, css_task_iter_end() must be
* called.
void css_task_iter_start(struct cgroup_subsys_state *css,
struct css_task_iter *it)
/* no one should try to iterate before mounting cgroups */
WARN_ON_ONCE(!use_task_css_set_links);
memset(it, 0, sizeof(*it));
spin_lock_irq(&css_set_lock);
it->ss = css->ss;
if (it->ss)
it->cset_pos = &css->cgroup->e_csets[css->ss->id];
else
it->cset_pos = &css->cgroup->cset_links;
css_task_iter_advance_css_set(it);
spin_unlock_irq(&css_set_lock);
* css_task_iter_next - return the next task for the iterator
* @it: the task iterator being iterated
*
* The "next" function for task iteration. @it should have been
* initialized via css_task_iter_start(). Returns NULL when the iteration
* reaches the end.
struct task_struct *css_task_iter_next(struct css_task_iter *it)
if (it->cur_task) {
put_task_struct(it->cur_task);
it->cur_task = NULL;
}
spin_lock_irq(&css_set_lock);
if (it->task_pos) {
it->cur_task = list_entry(it->task_pos, struct task_struct,
cg_list);
get_task_struct(it->cur_task);
css_task_iter_advance(it);
}
spin_unlock_irq(&css_set_lock);
return it->cur_task;
* css_task_iter_end - finish task iteration
* @it: the task iterator to finish
*
* Finish task iteration started by css_task_iter_start().
void css_task_iter_end(struct css_task_iter *it)
if (it->cur_cset) {
spin_lock_irq(&css_set_lock);
list_del(&it->iters_node);
put_css_set_locked(it->cur_cset);
spin_unlock_irq(&css_set_lock);
}
if (it->cur_task)
put_task_struct(it->cur_task);
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
static void cgroup_procs_release(struct kernfs_open_file *of)
{
if (of->priv) {
css_task_iter_end(of->priv);
kfree(of->priv);
}
}
static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
{
struct kernfs_open_file *of = s->private;
struct css_task_iter *it = of->priv;
struct task_struct *task;
do {
task = css_task_iter_next(it);
} while (task && !thread_group_leader(task));
return task;
}
static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
{
struct kernfs_open_file *of = s->private;
struct cgroup *cgrp = seq_css(s)->cgroup;
struct css_task_iter *it = of->priv;
/*
* When a seq_file is seeked, it's always traversed sequentially
* from position 0, so we can simply keep iterating on !0 *pos.
*/
if (!it) {
if (WARN_ON_ONCE((*pos)++))
return ERR_PTR(-EINVAL);
it = kzalloc(sizeof(*it), GFP_KERNEL);
if (!it)
return ERR_PTR(-ENOMEM);
of->priv = it;
css_task_iter_start(&cgrp->self, it);
} else if (!(*pos)++) {
css_task_iter_end(it);
css_task_iter_start(&cgrp->self, it);
}
return cgroup_procs_next(s, NULL, NULL);
}
static int cgroup_procs_show(struct seq_file *s, void *v)
{
seq_printf(s, "%d\n", task_tgid_vnr(v));
return 0;
}
/* cgroup core interface files for the default hierarchy */
static struct cftype cgroup_base_files[] = {
.name = "cgroup.procs",
.file_offset = offsetof(struct cgroup, procs_file),
.release = cgroup_procs_release,
.seq_start = cgroup_procs_start,
.seq_next = cgroup_procs_next,
.seq_show = cgroup_procs_show,
.write = cgroup_procs_write,
Ben Blum
committed
},
Tejun Heo
committed
{
.name = "cgroup.controllers",
.seq_show = cgroup_controllers_show,
},
{
.name = "cgroup.subtree_control",
.seq_show = cgroup_subtree_control_show,
.write = cgroup_subtree_control_write,
Tejun Heo
committed
},
.name = "cgroup.events",
.flags = CFTYPE_NOT_ON_ROOT,
.file_offset = offsetof(struct cgroup, events_file),
.seq_show = cgroup_events_show,
{ } /* terminate */
};
/*
* css destruction is four-stage process.
*
* 1. Destruction starts. Killing of the percpu_ref is initiated.
* Implemented in kill_css().
*
* 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
* and thus css_tryget_online() is guaranteed to fail, the css can be
* offlined by invoking offline_css(). After offlining, the base ref is
* put. Implemented in css_killed_work_fn().
*
* 3. When the percpu_ref reaches zero, the only possible remaining
* accessors are inside RCU read sections. css_release() schedules the
* RCU callback.
*
* 4. After the grace period, the css can be freed. Implemented in
* css_free_work_fn().
*
* It is actually hairier because both step 2 and 4 require process context
* and thus involve punting to css->destroy_work adding two additional
* steps to the already complex sequence.
*/
static void css_free_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
percpu_ref_exit(&css->refcnt);
struct cgroup_subsys_state *parent = css->parent;
int id = css->id;
ss->css_free(css);
cgroup_idr_remove(&ss->css_idr, id);
if (parent)
css_put(parent);
} else {
/* cgroup free path */
atomic_dec(&cgrp->root->nr_cgrps);
cgroup1_pidlist_destroy_all(cgrp);
cancel_work_sync(&cgrp->release_agent_work);
/*
* We get a ref to the parent, and put the ref when
* this cgroup is being freed, so it's guaranteed
* that the parent won't be destroyed before its
* children.
*/
kernfs_put(cgrp->kn);
kfree(cgrp);
} else {
/*
* This is root cgroup's refcnt reaching zero,
* which indicates that the root should be
* released.
*/
cgroup_destroy_root(cgrp->root);
}
}
static void css_free_rcu_fn(struct rcu_head *rcu_head)
{
struct cgroup_subsys_state *css =
container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
INIT_WORK(&css->destroy_work, css_free_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
static void css_release_work_fn(struct work_struct *work)
{
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
mutex_lock(&cgroup_mutex);
css->flags |= CSS_RELEASED;
list_del_rcu(&css->sibling);
if (ss) {
/* css release path */
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
if (ss->css_released)
ss->css_released(css);
} else {
/* cgroup release path */
trace_cgroup_release(cgrp);
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
/*
* There are two control paths which try to determine
* cgroup from dentry without going through kernfs -
* cgroupstats_build() and css_tryget_online_from_dir().
* Those are supported by RCU protecting clearing of
* cgrp->kn->priv backpointer.
*/
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
mutex_unlock(&cgroup_mutex);
call_rcu(&css->rcu_head, css_free_rcu_fn);
}
static void css_release(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
INIT_WORK(&css->destroy_work, css_release_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
static void init_and_link_css(struct cgroup_subsys_state *css,
struct cgroup_subsys *ss, struct cgroup *cgrp)
lockdep_assert_held(&cgroup_mutex);
memset(css, 0, sizeof(*css));
css->cgroup = cgrp;
INIT_LIST_HEAD(&css->sibling);
INIT_LIST_HEAD(&css->children);
css->serial_nr = css_serial_nr_next++;
atomic_set(&css->online_cnt, 0);
if (cgroup_parent(cgrp)) {
css->parent = cgroup_css(cgroup_parent(cgrp), ss);
css_get(css->parent);
}
BUG_ON(cgroup_css(cgrp, ss));
/* invoke ->css_online() on a new CSS and mark it online if successful */
static int online_css(struct cgroup_subsys_state *css)
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
if (ss->css_online)
ret = ss->css_online(css);
css->flags |= CSS_ONLINE;
rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
atomic_inc(&css->online_cnt);
if (css->parent)
atomic_inc(&css->parent->online_cnt);
/* if the CSS is online, invoke ->css_offline() on it and mark it offline */
static void offline_css(struct cgroup_subsys_state *css)
struct cgroup_subsys *ss = css->ss;
lockdep_assert_held(&cgroup_mutex);
if (!(css->flags & CSS_ONLINE))
return;