Newer
Older
if (!cset->mg_src_cgrp)
return;
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset->src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
list_move_tail(&cset->mg_dst_cset->mg_node,
&tset->dst_csets);
}
Tejun Heo
committed
/**
* cgroup_taskset_first - reset taskset and return the first task
* @tset: taskset of interest
Tejun Heo
committed
* @dst_cssp: output variable for the destination css
Tejun Heo
committed
*
* @tset iteration is initialized and the first task is returned.
*/
Tejun Heo
committed
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
Tejun Heo
committed
{
tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
tset->cur_task = NULL;
Tejun Heo
committed
return cgroup_taskset_next(tset, dst_cssp);
Tejun Heo
committed
}
/**
* cgroup_taskset_next - iterate to the next task in taskset
* @tset: taskset of interest
Tejun Heo
committed
* @dst_cssp: output variable for the destination css
Tejun Heo
committed
*
* Return the next task in @tset. Iteration must have been initialized
* with cgroup_taskset_first().
*/
Tejun Heo
committed
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp)
Tejun Heo
committed
{
struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task;
Tejun Heo
committed
while (&cset->mg_node != tset->csets) {
if (!task)
task = list_first_entry(&cset->mg_tasks,
struct task_struct, cg_list);
else
task = list_next_entry(task, cg_list);
Tejun Heo
committed
if (&task->cg_list != &cset->mg_tasks) {
tset->cur_cset = cset;
tset->cur_task = task;
Tejun Heo
committed
/*
* This function may be called both before and
* after cgroup_taskset_migrate(). The two cases
* can be distinguished by looking at whether @cset
* has its ->mg_dst_cset set.
*/
if (cset->mg_dst_cset)
*dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
else
*dst_cssp = cset->subsys[tset->ssid];
return task;
}
Tejun Heo
committed
cset = list_next_entry(cset, mg_node);
task = NULL;
}
Tejun Heo
committed
return NULL;
Tejun Heo
committed
}
* cgroup_taskset_migrate - migrate a taskset
* @tset: taget taskset
* @root: cgroup root the migration is taking place on
* Migrate tasks in @tset as setup by migration preparation functions.
* This function fails iff one of the ->can_attach callbacks fails and
* guarantees that either all or none of the tasks in @tset are migrated.
* @tset is consumed regardless of success.
*/
static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
struct cgroup_root *root)
struct cgroup_subsys *ss;
struct task_struct *task, *tmp_task;
struct css_set *cset, *tmp_cset;
int ssid, failed_ssid, ret;
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset->src_csets))
return 0;
/* check that we can legitimately attach to the cgroup */
do_each_subsys_mask(ss, ssid, root->subsys_mask) {
if (ss->can_attach) {
tset->ssid = ssid;
ret = ss->can_attach(tset);
failed_ssid = ssid;
goto out_cancel_attach;
}
}
} while_each_subsys_mask();
/*
* Now that we're guaranteed success, proceed to move all tasks to
* the new cgroup. There are no failure cases after here, so this
* is the commit point.
*/
spin_lock_irq(&css_set_lock);
list_for_each_entry(cset, &tset->src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
struct css_set *from_cset = task_css_set(task);
struct css_set *to_cset = cset->mg_dst_cset;
get_css_set(to_cset);
css_set_move_task(task, from_cset, to_cset, true);
put_css_set_locked(from_cset);
}
spin_unlock_irq(&css_set_lock);
/*
* Migration is committed, all target tasks are now on dst_csets.
* Nothing is sensitive to fork() after this point. Notify
* controllers that migration is complete.
*/
tset->csets = &tset->dst_csets;
do_each_subsys_mask(ss, ssid, root->subsys_mask) {
if (ss->attach) {
tset->ssid = ssid;
ss->attach(tset);
Tejun Heo
committed
}
} while_each_subsys_mask();
ret = 0;
goto out_release_tset;
out_cancel_attach:
do_each_subsys_mask(ss, ssid, root->subsys_mask) {
if (ssid == failed_ssid)
if (ss->cancel_attach) {
tset->ssid = ssid;
ss->cancel_attach(tset);
Tejun Heo
committed
}
} while_each_subsys_mask();
out_release_tset:
spin_lock_irq(&css_set_lock);
list_splice_init(&tset->dst_csets, &tset->src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
spin_unlock_irq(&css_set_lock);
return ret;
}
Tejun Heo
committed
/**
* cgroup_may_migrate_to - verify whether a cgroup can be migration destination
* @dst_cgrp: destination cgroup to test
*
* On the default hierarchy, except for the root, subtree_control must be
* zero for migration destination cgroups with tasks so that child cgroups
* don't compete against tasks.
*/
bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
Tejun Heo
committed
{
return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) ||
!dst_cgrp->subtree_control;
}
* cgroup_migrate_finish - cleanup after attach
* @preloaded_csets: list of preloaded css_sets
* Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See
* those functions for details.
void cgroup_migrate_finish(struct list_head *preloaded_csets)
struct css_set *cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cgrp = NULL;
cset->mg_dst_cset = NULL;
list_del_init(&cset->mg_preload_node);
spin_unlock_irq(&css_set_lock);
}
/**
* cgroup_migrate_add_src - add a migration source css_set
* @src_cset: the source css_set to add
* @dst_cgrp: the destination cgroup
* @preloaded_csets: list of preloaded css_sets
*
* Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin
* @src_cset and add it to @preloaded_csets, which should later be cleaned
* up by cgroup_migrate_finish().
*
* This function may be called without holding cgroup_threadgroup_rwsem
* even if the target is a process. Threads may be created and destroyed
* but as long as cgroup_mutex is not dropped, no new css_set can be put
* into play and the preloaded css_sets are guaranteed to cover all
* migrations.
void cgroup_migrate_add_src(struct css_set *src_cset,
struct cgroup *dst_cgrp,
struct list_head *preloaded_csets)
{
struct cgroup *src_cgrp;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_lock);
/*
* If ->dead, @src_set is associated with one or more dead cgroups
* and doesn't contain any migratable tasks. Ignore it early so
* that the rest of migration path doesn't get confused by it.
*/
if (src_cset->dead)
return;
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
if (!list_empty(&src_cset->mg_preload_node))
return;
WARN_ON(src_cset->mg_src_cgrp);
WARN_ON(src_cset->mg_dst_cgrp);
WARN_ON(!list_empty(&src_cset->mg_tasks));
WARN_ON(!list_empty(&src_cset->mg_node));
src_cset->mg_src_cgrp = src_cgrp;
src_cset->mg_dst_cgrp = dst_cgrp;
get_css_set(src_cset);
list_add(&src_cset->mg_preload_node, preloaded_csets);
}
/**
* cgroup_migrate_prepare_dst - prepare destination css_sets for migration
* @preloaded_csets: list of preloaded source css_sets
*
* Tasks are about to be moved and all the source css_sets have been
* preloaded to @preloaded_csets. This function looks up and pins all
* destination css_sets, links each to its source, and append them to
* @preloaded_csets.
*
* This function must be called after cgroup_migrate_add_src() has been
* called on each migration source css_set. After migration is performed
* using cgroup_migrate(), cgroup_migrate_finish() must be called on
* @preloaded_csets.
*/
int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
{
LIST_HEAD(csets);
struct css_set *src_cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
struct css_set *dst_cset;
dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
if (!dst_cset)
goto err;
WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
/*
* If src cset equals dst, it's noop. Drop the src.
* cgroup_migrate() will skip the cset too. Note that we
* can't handle src == dst as some nodes are used by both.
*/
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
src_cset->mg_dst_cgrp = NULL;
list_del_init(&src_cset->mg_preload_node);
put_css_set(src_cset);
put_css_set(dst_cset);
src_cset->mg_dst_cset = dst_cset;
if (list_empty(&dst_cset->mg_preload_node))
list_add(&dst_cset->mg_preload_node, &csets);
else
list_splice_tail(&csets, preloaded_csets);
return 0;
err:
cgroup_migrate_finish(&csets);
return -ENOMEM;
}
/**
* cgroup_migrate - migrate a process or task to a cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
* @root: cgroup root migration is taking place on
* Migrate a process or task denoted by @leader. If migrating a process,
* the caller must be holding cgroup_threadgroup_rwsem. The caller is also
* responsible for invoking cgroup_migrate_add_src() and
* cgroup_migrate_prepare_dst() on the targets before invoking this
* function and following up with cgroup_migrate_finish().
*
* As long as a controller's ->can_attach() doesn't fail, this function is
* guaranteed to succeed. This means that, excluding ->can_attach()
* failure, when migrating multiple targets, the success or failure can be
* decided for all targets by invoking group_migrate_prepare_dst() before
* actually starting migrating.
*/
int cgroup_migrate(struct task_struct *leader, bool threadgroup,
struct cgroup_root *root)
struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
struct task_struct *task;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
spin_lock_irq(&css_set_lock);
rcu_read_lock();
cgroup_taskset_add(task, &tset);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
return cgroup_taskset_migrate(&tset, root);
/**
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @dst_cgrp: the cgroup to attach to
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
*
* Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
bool threadgroup)
{
LIST_HEAD(preloaded_csets);
struct task_struct *task;
int ret;
Tejun Heo
committed
if (!cgroup_may_migrate_to(dst_cgrp))
return -EBUSY;
/* look up all src csets */
spin_lock_irq(&css_set_lock);
rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
&preloaded_csets);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(&preloaded_csets);
ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
cgroup_migrate_finish(&preloaded_csets);
if (!ret)
trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
Tejun Heo
committed
static int cgroup_procs_write_permission(struct task_struct *task,
struct cgroup *dst_cgrp,
struct kernfs_open_file *of)
Tejun Heo
committed
{
const struct cred *cred = current_cred();
const struct cred *tcred = get_task_cred(task);
int ret = 0;
/*
* even if we're attaching all tasks in the thread group, we only
* need to check permissions on one of them.
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid))
ret = -EACCES;
Tejun Heo
committed
if (!ret && cgroup_on_dfl(dst_cgrp)) {
struct super_block *sb = of->file->f_path.dentry->d_sb;
struct cgroup *cgrp;
struct inode *inode;
spin_lock_irq(&css_set_lock);
Tejun Heo
committed
cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
spin_unlock_irq(&css_set_lock);
Tejun Heo
committed
while (!cgroup_is_descendant(dst_cgrp, cgrp))
cgrp = cgroup_parent(cgrp);
ret = -ENOMEM;
inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
Tejun Heo
committed
if (inode) {
ret = inode_permission(inode, MAY_WRITE);
iput(inode);
}
}
Tejun Heo
committed
put_cred(tcred);
return ret;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup.
ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
Tejun Heo
committed
struct cgroup_subsys *ss;
struct cgroup *cgrp;
pid_t pid;
Tejun Heo
committed
int ssid, ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
percpu_down_write(&cgroup_threadgroup_rwsem);
rcu_read_lock();
Tejun Heo
committed
} else {
tsk = current;
Tejun Heo
committed
}
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
}
get_task_struct(tsk);
rcu_read_unlock();
Tejun Heo
committed
ret = cgroup_procs_write_permission(tsk, cgrp, of);
Tejun Heo
committed
if (!ret)
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
goto out_unlock_threadgroup;
out_unlock_rcu:
rcu_read_unlock();
out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem);
Tejun Heo
committed
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
loff_t off)
{
return __cgroup_procs_write(of, buf, nbytes, off, true);
}
static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
Tejun Heo
committed
struct cgroup_subsys *ss;
bool printed = false;
int ssid;
do_each_subsys_mask(ss, ssid, ss_mask) {
if (printed)
seq_putc(seq, ' ');
seq_printf(seq, "%s", ss->name);
printed = true;
} while_each_subsys_mask();
Tejun Heo
committed
if (printed)
seq_putc(seq, '\n');
Tejun Heo
committed
/* show controllers which are enabled from the parent */
static int cgroup_controllers_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgroup_control(cgrp));
Tejun Heo
committed
return 0;
Tejun Heo
committed
/* show controllers which are enabled for a given cgroup's children */
static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->subtree_control);
Tejun Heo
committed
return 0;
}
/**
* cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
* @cgrp: root of the subtree to update csses for
*
* @cgrp's control masks have changed and its subtree's css associations
* need to be updated accordingly. This function looks up all css_sets
* which are attached to the subtree, creates the matching updated css_sets
* and migrates the tasks to the new ones.
Tejun Heo
committed
*/
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
LIST_HEAD(preloaded_csets);
struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
struct cgroup_subsys_state *d_css;
struct cgroup *dsct;
Tejun Heo
committed
struct css_set *src_cset;
int ret;
lockdep_assert_held(&cgroup_mutex);
percpu_down_write(&cgroup_threadgroup_rwsem);
Tejun Heo
committed
/* look up all csses currently attached to @cgrp's subtree */
spin_lock_irq(&css_set_lock);
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
Tejun Heo
committed
struct cgrp_cset_link *link;
list_for_each_entry(link, &dsct->cset_links, cset_link)
cgroup_migrate_add_src(link->cset, dsct,
Tejun Heo
committed
&preloaded_csets);
}
spin_unlock_irq(&css_set_lock);
Tejun Heo
committed
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(&preloaded_csets);
Tejun Heo
committed
if (ret)
goto out_finish;
spin_lock_irq(&css_set_lock);
Tejun Heo
committed
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
struct task_struct *task, *ntask;
Tejun Heo
committed
/* src_csets precede dst_csets, break on the first dst_cset */
if (!src_cset->mg_src_cgrp)
break;
/* all tasks in src_csets need to be migrated */
list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
cgroup_taskset_add(task, &tset);
Tejun Heo
committed
}
spin_unlock_irq(&css_set_lock);
Tejun Heo
committed
ret = cgroup_taskset_migrate(&tset, cgrp->root);
Tejun Heo
committed
out_finish:
cgroup_migrate_finish(&preloaded_csets);
percpu_up_write(&cgroup_threadgroup_rwsem);
Tejun Heo
committed
return ret;
}
/**
* cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses
Tejun Heo
committed
* @cgrp: root of the target subtree
*
* Because css offlining is asynchronous, userland may try to re-enable a
* controller while the previous css is still around. This function grabs
* cgroup_mutex and drains the previous css instances of @cgrp's subtree.
*/
void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
__acquires(&cgroup_mutex)
{
struct cgroup *dsct;
Tejun Heo
committed
struct cgroup_subsys_state *d_css;
struct cgroup_subsys *ss;
int ssid;
restart:
mutex_lock(&cgroup_mutex);
Tejun Heo
committed
cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
DEFINE_WAIT(wait);
Tejun Heo
committed
if (!css || !percpu_ref_is_dying(&css->refcnt))
continue;
cgroup_get(dsct);
prepare_to_wait(&dsct->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
mutex_unlock(&cgroup_mutex);
schedule();
finish_wait(&dsct->offline_waitq, &wait);
cgroup_put(dsct);
goto restart;
}
}
}
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
/**
* cgroup_save_control - save control masks of a subtree
* @cgrp: root of the target subtree
*
* Save ->subtree_control and ->subtree_ss_mask to the respective old_
* prefixed fields for @cgrp's subtree including @cgrp itself.
*/
static void cgroup_save_control(struct cgroup *cgrp)
{
struct cgroup *dsct;
struct cgroup_subsys_state *d_css;
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
dsct->old_subtree_control = dsct->subtree_control;
dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
}
}
/**
* cgroup_propagate_control - refresh control masks of a subtree
* @cgrp: root of the target subtree
*
* For @cgrp and its subtree, ensure ->subtree_ss_mask matches
* ->subtree_control and propagate controller availability through the
* subtree so that descendants don't have unavailable controllers enabled.
*/
static void cgroup_propagate_control(struct cgroup *cgrp)
{
struct cgroup *dsct;
struct cgroup_subsys_state *d_css;
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
dsct->subtree_control &= cgroup_control(dsct);
dsct->subtree_ss_mask =
cgroup_calc_subtree_ss_mask(dsct->subtree_control,
cgroup_ss_mask(dsct));
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
}
}
/**
* cgroup_restore_control - restore control masks of a subtree
* @cgrp: root of the target subtree
*
* Restore ->subtree_control and ->subtree_ss_mask from the respective old_
* prefixed fields for @cgrp's subtree including @cgrp itself.
*/
static void cgroup_restore_control(struct cgroup *cgrp)
{
struct cgroup *dsct;
struct cgroup_subsys_state *d_css;
cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
dsct->subtree_control = dsct->old_subtree_control;
dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
}
}
static bool css_visible(struct cgroup_subsys_state *css)
{
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
if (cgroup_control(cgrp) & (1 << ss->id))
return true;
if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
return false;
return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
}
Tejun Heo
committed
/**
* cgroup_apply_control_enable - enable or show csses according to control
Tejun Heo
committed
* @cgrp: root of the target subtree
Tejun Heo
committed
*
Tejun Heo
committed
* Walk @cgrp's subtree and create new csses or make the existing ones
Tejun Heo
committed
* visible. A css is created invisible if it's being implicitly enabled
* through dependency. An invisible css is made visible when the userland
* explicitly enables it.
*
* Returns 0 on success, -errno on failure. On failure, csses which have
* been processed already aren't cleaned up. The caller is responsible for
* cleaning up with cgroup_apply_control_disble().
*/
static int cgroup_apply_control_enable(struct cgroup *cgrp)
{
struct cgroup *dsct;
Tejun Heo
committed
struct cgroup_subsys_state *d_css;
Tejun Heo
committed
struct cgroup_subsys *ss;
int ssid, ret;
Tejun Heo
committed
cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
Tejun Heo
committed
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
Tejun Heo
committed
if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
continue;
if (!css) {
css = css_create(dsct, ss);
if (IS_ERR(css))
return PTR_ERR(css);
}
Tejun Heo
committed
ret = css_populate_dir(css);
Tejun Heo
committed
if (ret)
return ret;
}
}
}
return 0;
}
Tejun Heo
committed
/**
* cgroup_apply_control_disable - kill or hide csses according to control
Tejun Heo
committed
* @cgrp: root of the target subtree
Tejun Heo
committed
*
Tejun Heo
committed
* Walk @cgrp's subtree and kill and hide csses so that they match
Tejun Heo
committed
* cgroup_ss_mask() and cgroup_visible_mask().
*
* A css is hidden when the userland requests it to be disabled while other
* subsystems are still depending on it. The css must not actively control
* resources and be in the vanilla state if it's made visible again later.
* Controllers which may be depended upon should provide ->css_reset() for
* this purpose.
*/
static void cgroup_apply_control_disable(struct cgroup *cgrp)
{
struct cgroup *dsct;
Tejun Heo
committed
struct cgroup_subsys_state *d_css;
Tejun Heo
committed
struct cgroup_subsys *ss;
int ssid;
Tejun Heo
committed
cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
Tejun Heo
committed
for_each_subsys(ss, ssid) {
struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt));
Tejun Heo
committed
if (!css)
continue;
Tejun Heo
committed
if (css->parent &&
!(cgroup_ss_mask(dsct) & (1 << ss->id))) {
Tejun Heo
committed
kill_css(css);
} else if (!css_visible(css)) {
Tejun Heo
committed
css_clear_dir(css);
Tejun Heo
committed
if (ss->css_reset)
ss->css_reset(css);
}
}
}
}
Tejun Heo
committed
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
/**
* cgroup_apply_control - apply control mask updates to the subtree
* @cgrp: root of the target subtree
*
* subsystems can be enabled and disabled in a subtree using the following
* steps.
*
* 1. Call cgroup_save_control() to stash the current state.
* 2. Update ->subtree_control masks in the subtree as desired.
* 3. Call cgroup_apply_control() to apply the changes.
* 4. Optionally perform other related operations.
* 5. Call cgroup_finalize_control() to finish up.
*
* This function implements step 3 and propagates the mask changes
* throughout @cgrp's subtree, updates csses accordingly and perform
* process migrations.
*/
static int cgroup_apply_control(struct cgroup *cgrp)
{
int ret;
cgroup_propagate_control(cgrp);
ret = cgroup_apply_control_enable(cgrp);
if (ret)
return ret;
/*
* At this point, cgroup_e_css() results reflect the new csses
* making the following cgroup_update_dfl_csses() properly update
* css associations of all tasks in the subtree.
*/
ret = cgroup_update_dfl_csses(cgrp);
if (ret)
return ret;
return 0;
}
/**
* cgroup_finalize_control - finalize control mask update
* @cgrp: root of the target subtree
* @ret: the result of the update
*
* Finalize control mask update. See cgroup_apply_control() for more info.
*/
static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
{
if (ret) {
cgroup_restore_control(cgrp);
cgroup_propagate_control(cgrp);
}
cgroup_apply_control_disable(cgrp);
}
Tejun Heo
committed
/* change the enabled child controllers for a cgroup in the default hierarchy */
static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
Tejun Heo
committed
{
struct cgroup *cgrp, *child;
Tejun Heo
committed
struct cgroup_subsys *ss;
Tejun Heo
committed
int ssid, ret;
/*
* Parse input - space separated list of subsystem names prefixed
* with either + or -.
Tejun Heo
committed
*/
buf = strstrip(buf);
while ((tok = strsep(&buf, " "))) {
if (tok[0] == '\0')
continue;
do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
if (!cgroup_ssid_enabled(ssid) ||
strcmp(tok + 1, ss->name))
Tejun Heo
committed
continue;
if (*tok == '+') {
Tejun Heo
committed
enable |= 1 << ssid;
disable &= ~(1 << ssid);
Tejun Heo
committed
} else if (*tok == '-') {
Tejun Heo
committed
disable |= 1 << ssid;
enable &= ~(1 << ssid);
Tejun Heo
committed
} else {
return -EINVAL;
}
break;
} while_each_subsys_mask();
Tejun Heo
committed
if (ssid == CGROUP_SUBSYS_COUNT)
return -EINVAL;
}
cgrp = cgroup_kn_lock_live(of->kn, true);
if (!cgrp)
return -ENODEV;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (enable & (1 << ssid)) {
if (cgrp->subtree_control & (1 << ssid)) {
Tejun Heo
committed
enable &= ~(1 << ssid);
continue;
}
if (!(cgroup_control(cgrp) & (1 << ssid))) {
ret = -ENOENT;
goto out_unlock;
}
Tejun Heo
committed
} else if (disable & (1 << ssid)) {
if (!(cgrp->subtree_control & (1 << ssid))) {
Tejun Heo
committed
disable &= ~(1 << ssid);
continue;
}
/* a child has it enabled? */
cgroup_for_each_live_child(child, cgrp) {
if (child->subtree_control & (1 << ssid)) {
Tejun Heo
committed
ret = -EBUSY;
goto out_unlock;
Tejun Heo
committed
}
}
}
}
if (!enable && !disable) {
ret = 0;
goto out_unlock;
Tejun Heo
committed
}
/*
* Except for the root, subtree_control must be zero for a cgroup
Tejun Heo
committed
* with tasks so that child cgroups don't compete against tasks.
*/
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
if (enable && cgroup_parent(cgrp)) {
struct cgrp_cset_link *link;
/*
* Because namespaces pin csets too, @cgrp->cset_links
* might not be empty even when @cgrp is empty. Walk and
* verify each cset.
*/
spin_lock_irq(&css_set_lock);
ret = 0;
list_for_each_entry(link, &cgrp->cset_links, cset_link) {
if (css_set_populated(link->cset)) {
ret = -EBUSY;
break;
}
}
spin_unlock_irq(&css_set_lock);
if (ret)
goto out_unlock;
Tejun Heo
committed
}
/* save and update control masks and prepare csses */
cgroup_save_control(cgrp);
cgrp->subtree_control |= enable;
cgrp->subtree_control &= ~disable;
Tejun Heo
committed
ret = cgroup_apply_control(cgrp);
Tejun Heo
committed
Tejun Heo
committed
cgroup_finalize_control(cgrp, ret);
Tejun Heo
committed
kernfs_activate(cgrp->kn);
ret = 0;
out_unlock:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
Tejun Heo
committed
}
static int cgroup_events_show(struct seq_file *seq, void *v)
seq_printf(seq, "populated %d\n",
cgroup_is_populated(seq_css(seq)->cgroup));
return 0;
}
static int cgroup_file_open(struct kernfs_open_file *of)
{
struct cftype *cft = of->kn->priv;
if (cft->open)
return cft->open(of);
return 0;
}
static void cgroup_file_release(struct kernfs_open_file *of)
{
struct cftype *cft = of->kn->priv;
if (cft->release)
cft->release(of);
}