Newer
Older
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
down_write(&css_set_rwsem);
rcu_read_lock();
/* @task either already exited or can't exit until the end */
if (task->flags & PF_EXITING)
goto next;
/* leave @task alone if post_fork() hasn't linked it yet */
if (list_empty(&task->cg_list))
goto next;
cset = task_css_set(task);
if (!cset->mg_src_cgrp)
goto next;
* cgroup_taskset_first() must always return the leader.
* Take care to avoid disturbing the ordering.
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
list_move_tail(&cset->mg_dst_cset->mg_node,
&tset.dst_csets);
next:
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset.src_csets))
return 0;
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, cgrp) {
ret = css->ss->can_attach(css, &tset);
if (ret) {
goto out_cancel_attach;
}
}
}
/*
* Now that we're guaranteed success, proceed to move all tasks to
* the new cgroup. There are no failure cases after here, so this
* is the commit point.
down_write(&css_set_rwsem);
list_for_each_entry(cset, &tset.src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
cgroup_task_migrate(cset->mg_src_cgrp, task,
cset->mg_dst_cset);
up_write(&css_set_rwsem);
* Migration is committed, all target tasks are now on dst_csets.
* Nothing is sensitive to fork() after this point. Notify
* controllers that migration is complete.
tset.csets = &tset.dst_csets;
for_each_e_css(css, i, cgrp)
if (css->ss->attach)
css->ss->attach(css, &tset);
goto out_release_tset;
for_each_e_css(css, i, cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, &tset);
out_release_tset:
down_write(&css_set_rwsem);
list_splice_init(&tset.dst_csets, &tset.src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
up_write(&css_set_rwsem);
/**
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @dst_cgrp: the cgroup to attach to
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
*
* Call holding cgroup_mutex and threadgroup_lock of @leader.
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
*/
static int cgroup_attach_task(struct cgroup *dst_cgrp,
struct task_struct *leader, bool threadgroup)
{
LIST_HEAD(preloaded_csets);
struct task_struct *task;
int ret;
/* look up all src csets */
down_read(&css_set_rwsem);
rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
&preloaded_csets);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_read(&css_set_rwsem);
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
if (!ret)
ret = cgroup_migrate(dst_cgrp, leader, threadgroup);
cgroup_migrate_finish(&preloaded_csets);
return ret;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup.
static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
retry_find_task:
rcu_read_lock();
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroup_root *root;
int retval = 0;
for_each_root(root) {
struct cgroup *from_cgrp;
if (root == &cgrp_dfl_root)
continue;
down_read(&css_set_rwsem);
from_cgrp = task_cgroup_from_root(from, root);
up_read(&css_set_rwsem);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static int cgroup_tasks_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 pid)
return attach_task_by_pid(css->cgroup, pid, false);
static int cgroup_procs_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 tgid)
{
return attach_task_by_pid(css->cgroup, tgid, true);
}
static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
struct cftype *cft, char *buffer)
struct cgroup_root *root = css->cgroup->root;
BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX);
if (!cgroup_lock_live_group(css->cgroup))
return -ENODEV;
spin_lock(&release_agent_path_lock);
strlcpy(root->release_agent_path, buffer,
sizeof(root->release_agent_path));
spin_unlock(&release_agent_path_lock);
return 0;
}
static int cgroup_release_agent_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
seq_puts(seq, cgrp->root->release_agent_path);
seq_putc(seq, '\n');
return 0;
}
static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
return 0;
}
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
struct cgroup *cgrp = of->kn->parent->priv;
struct cftype *cft = of->kn->priv;
struct cgroup_subsys_state *css;
/*
* kernfs guarantees that a file isn't deleted with operations in
* flight, which means that the matching css is and stays alive and
* doesn't need to be pinned. The RCU locking is not necessary
* either. It's just for the convenience of using cgroup_css().
*/
rcu_read_lock();
css = cgroup_css(cgrp, cft->ss);
rcu_read_unlock();
if (cft->write_string) {
ret = cft->write_string(css, cft, strstrip(buf));
} else if (cft->write_u64) {
unsigned long long v;
ret = kstrtoull(buf, 0, &v);
if (!ret)
ret = cft->write_u64(css, cft, v);
} else if (cft->write_s64) {
long long v;
ret = kstrtoll(buf, 0, &v);
if (!ret)
ret = cft->write_s64(css, cft, v);
} else if (cft->trigger) {
ret = cft->trigger(css, (unsigned int)cft->private);
return ret ?: nbytes;
static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
struct cftype *cft = seq_cft(m);
struct cgroup_subsys_state *css = seq_css(m);
if (cft->seq_show)
return cft->seq_show(m, arg);
if (cft->read_u64)
seq_printf(m, "%llu\n", cft->read_u64(css, cft));
else if (cft->read_s64)
seq_printf(m, "%lld\n", cft->read_s64(css, cft));
else
return -EINVAL;
return 0;
static struct kernfs_ops cgroup_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.write = cgroup_file_write,
.seq_show = cgroup_seqfile_show,
static struct kernfs_ops cgroup_kf_ops = {
.atomic_write_len = PAGE_SIZE,
.write = cgroup_file_write,
.seq_start = cgroup_seqfile_start,
.seq_next = cgroup_seqfile_next,
.seq_stop = cgroup_seqfile_stop,
.seq_show = cgroup_seqfile_show,
};
/*
* cgroup_rename - Only allow simple rename of directories in place.
*/
static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name_str)
/*
* This isn't a proper migration and its usefulness is very
* limited. Disallow if sane_behavior.
*/
if (cgroup_sane_behavior(cgrp))
return -EPERM;
/*
* We're gonna grab cgroup_tree_mutex which nests outside kernfs
* active_ref. kernfs_rename() doesn't require active_ref
* protection. Break them before grabbing cgroup_tree_mutex.
*/
kernfs_break_active_protection(new_parent);
kernfs_break_active_protection(kn);
mutex_lock(&cgroup_tree_mutex);
mutex_lock(&cgroup_mutex);
ret = kernfs_rename(kn, new_parent, new_name_str);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgroup_tree_mutex);
kernfs_unbreak_active_protection(kn);
kernfs_unbreak_active_protection(new_parent);
/* set uid and gid of cgroup dirs and files to that of the creator */
static int cgroup_kn_set_ugid(struct kernfs_node *kn)
{
struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
.ia_uid = current_fsuid(),
.ia_gid = current_fsgid(), };
if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
return 0;
return kernfs_setattr(kn, &iattr);
}
static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
struct kernfs_node *kn;
struct lock_class_key *key = NULL;
int ret;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
key = &cft->lockdep_key;
#endif
kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
cgroup_file_mode(cft), 0, cft->kf_ops, cft,
NULL, false, key);
if (IS_ERR(kn))
return PTR_ERR(kn);
ret = cgroup_kn_set_ugid(kn);
if (ret)
kernfs_remove(kn);
return ret;
/**
* cgroup_addrm_files - add or remove files to a cgroup directory
* @cgrp: the target cgroup
* @cfts: array of cftypes to be added
* @is_add: whether to add or remove
*
* Depending on @is_add, add or remove files defined by @cfts on @cgrp.
* For removals, this function never fails. If addition fails, this
* function doesn't remove files already added. The caller is responsible
* for cleaning up.
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add)
lockdep_assert_held(&cgroup_tree_mutex);
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
continue;
if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
continue;
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
continue;
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
continue;
ret = cgroup_add_file(cgrp, cft);
pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
cft->name, ret);
return ret;
}
} else {
cgroup_rm_file(cgrp, cft);
static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
{
LIST_HEAD(pending);
struct cgroup_subsys *ss = cfts[0].ss;
struct cgroup *root = &ss->root->cgrp;
Tejun Heo
committed
struct cgroup_subsys_state *css;
lockdep_assert_held(&cgroup_tree_mutex);
/* add/rm files for all cgroups created before */
css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
Tejun Heo
committed
struct cgroup *cgrp = css->cgroup;
if (cgroup_is_dead(cgrp))
continue;
ret = cgroup_addrm_files(cgrp, cfts, is_add);
if (is_add && !ret)
kernfs_activate(root->kn);
static void cgroup_exit_cftypes(struct cftype *cfts)
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* free copy for custom atomic_write_len, see init_cftypes() */
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
kfree(cft->kf_ops);
cft->kf_ops = NULL;
static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype *cft;
for (cft = cfts; cft->name[0] != '\0'; cft++) {
struct kernfs_ops *kf_ops;
if (cft->seq_start)
kf_ops = &cgroup_kf_ops;
else
kf_ops = &cgroup_kf_single_ops;
/*
* Ugh... if @cft wants a custom max_write_len, we need to
* make a copy of kf_ops to set its atomic_write_len.
*/
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
if (!kf_ops) {
cgroup_exit_cftypes(cfts);
return -ENOMEM;
}
kf_ops->atomic_write_len = cft->max_write_len;
}
static int cgroup_rm_cftypes_locked(struct cftype *cfts)
{
lockdep_assert_held(&cgroup_tree_mutex);
if (!cfts || !cfts[0].ss)
return -ENOENT;
list_del(&cfts->node);
cgroup_apply_cftypes(cfts, false);
cgroup_exit_cftypes(cfts);
return 0;
/**
* cgroup_rm_cftypes - remove an array of cftypes from a subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Unregister @cfts. Files described by @cfts are removed from all
* existing cgroups and all future cgroups won't have them either. This
* function can be called anytime whether @cfts' subsys is attached or not.
*
* Returns 0 on successful unregistration, -ENOENT if @cfts is not
int cgroup_rm_cftypes(struct cftype *cfts)
mutex_lock(&cgroup_tree_mutex);
ret = cgroup_rm_cftypes_locked(cfts);
mutex_unlock(&cgroup_tree_mutex);
return ret;
/**
* cgroup_add_cftypes - add an array of cftypes to a subsystem
* @ss: target cgroup subsystem
* @cfts: zero-length name terminated array of cftypes
*
* Register @cfts to @ss. Files described by @cfts are created for all
* existing cgroups to which @ss is attached and all future cgroups will
* have them too. This function can be called anytime whether @ss is
* attached or not.
*
* Returns 0 on successful registration, -errno on failure. Note that this
* function currently returns 0 as long as @cfts registration is successful
* even if some file creation attempts on existing cgroups fail.
*/
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
if (!cfts || cfts[0].name[0] == '\0')
return 0;
ret = cgroup_init_cftypes(ss, cfts);
if (ret)
return ret;
mutex_lock(&cgroup_tree_mutex);
ret = cgroup_apply_cftypes(cfts, true);
cgroup_rm_cftypes_locked(cfts);
mutex_unlock(&cgroup_tree_mutex);
/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*
* Return the number of tasks in the cgroup.
*/
static int cgroup_task_count(const struct cgroup *cgrp)
struct cgrp_cset_link *link;
down_read(&css_set_rwsem);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += atomic_read(&link->cset->refcount);
up_read(&css_set_rwsem);
Tejun Heo
committed
* css_next_child - find the next child of a given css
* @pos_css: the current position (%NULL to initiate traversal)
* @parent_css: css whose children to walk
Tejun Heo
committed
* This function returns the next child of @parent_css and should be called
* under either cgroup_mutex or RCU read lock. The only requirement is
* that @parent_css and @pos_css are accessible. The next sibling is
* guaranteed to be returned regardless of their states.
Tejun Heo
committed
struct cgroup_subsys_state *
css_next_child(struct cgroup_subsys_state *pos_css,
struct cgroup_subsys_state *parent_css)
Tejun Heo
committed
struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
struct cgroup *cgrp = parent_css->cgroup;
struct cgroup *next;
/*
* @pos could already have been removed. Once a cgroup is removed,
* its ->sibling.next is no longer updated when its next sibling
* changes. As CGRP_DEAD assertion is serialized and happens
* before the cgroup is taken off the ->sibling list, if we see it
* unasserted, it's guaranteed that the next sibling hasn't
* finished its grace period even if it's already removed, and thus
* safe to dereference from this RCU critical section. If
* ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
* to be visible as %true here.
*
* If @pos is dead, its next pointer can't be dereferenced;
* however, as each cgroup is given a monotonically increasing
* unique serial number and always appended to the sibling list,
* the next one can be found by walking the parent's children until
* we see a cgroup with higher serial number than @pos's. While
* this path can be slower, it's taken only when either the current
* cgroup is removed or iteration and removal race.
if (!pos) {
next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
} else if (likely(!cgroup_is_dead(pos))) {
next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
} else {
list_for_each_entry_rcu(next, &cgrp->children, sibling)
if (next->serial_nr > pos->serial_nr)
break;
/*
* @next, if not pointing to the head, can be dereferenced and is
* the next sibling; however, it might have @ss disabled. If so,
* fast-forward to the next enabled one.
*/
while (&next->sibling != &cgrp->children) {
struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss);
Tejun Heo
committed
if (next_css)
return next_css;
next = list_entry_rcu(next->sibling.next, struct cgroup, sibling);
}
return NULL;
Tejun Heo
committed
* css_next_descendant_pre - find the next descendant for pre-order walk
* @pos: the current position (%NULL to initiate traversal)
Tejun Heo
committed
* @root: css whose descendants to walk
Tejun Heo
committed
* To be used by css_for_each_descendant_pre(). Find the next descendant
Tejun Heo
committed
* to visit for pre-order traversal of @root's descendants. @root is
* included in the iteration and the first node to be visited.
* While this function requires cgroup_mutex or RCU read locking, it
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct next descendant as long
* as both @pos and @root are accessible and @pos is a descendant of @root.
Tejun Heo
committed
struct cgroup_subsys_state *
css_next_descendant_pre(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *root)
Tejun Heo
committed
struct cgroup_subsys_state *next;
Tejun Heo
committed
/* if first iteration, visit @root */
Tejun Heo
committed
return root;
/* visit the first child if exists */
Tejun Heo
committed
next = css_next_child(NULL, pos);
if (next)
return next;
/* no child, visit my or the closest ancestor's next sibling */
Tejun Heo
committed
while (pos != root) {
next = css_next_child(pos, css_parent(pos));
Tejun Heo
committed
pos = css_parent(pos);
return NULL;
}
Tejun Heo
committed
* css_rightmost_descendant - return the rightmost descendant of a css
* @pos: css of interest
Tejun Heo
committed
* Return the rightmost descendant of @pos. If there's no descendant, @pos
* is returned. This can be used during pre-order traversal to skip
* While this function requires cgroup_mutex or RCU read locking, it
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct rightmost descendant as
* long as @pos is accessible.
Tejun Heo
committed
struct cgroup_subsys_state *
css_rightmost_descendant(struct cgroup_subsys_state *pos)
Tejun Heo
committed
struct cgroup_subsys_state *last, *tmp;
do {
last = pos;
/* ->prev isn't RCU safe, walk ->next till the end */
pos = NULL;
Tejun Heo
committed
css_for_each_child(tmp, last)
pos = tmp;
} while (pos);
return last;
}
Tejun Heo
committed
static struct cgroup_subsys_state *
css_leftmost_descendant(struct cgroup_subsys_state *pos)
Tejun Heo
committed
struct cgroup_subsys_state *last;
do {
last = pos;
Tejun Heo
committed
pos = css_next_child(NULL, pos);
} while (pos);
return last;
}
/**
Tejun Heo
committed
* css_next_descendant_post - find the next descendant for post-order walk
* @pos: the current position (%NULL to initiate traversal)
Tejun Heo
committed
* @root: css whose descendants to walk
Tejun Heo
committed
* To be used by css_for_each_descendant_post(). Find the next descendant
Tejun Heo
committed
* to visit for post-order traversal of @root's descendants. @root is
* included in the iteration and the last node to be visited.
* While this function requires cgroup_mutex or RCU read locking, it
* doesn't require the whole traversal to be contained in a single critical
* section. This function will return the correct next descendant as long
* as both @pos and @cgroup are accessible and @pos is a descendant of
* @cgroup.
Tejun Heo
committed
struct cgroup_subsys_state *
css_next_descendant_post(struct cgroup_subsys_state *pos,
struct cgroup_subsys_state *root)
Tejun Heo
committed
struct cgroup_subsys_state *next;
/* if first iteration, visit leftmost descendant which may be @root */
if (!pos)
return css_leftmost_descendant(root);
Tejun Heo
committed
/* if we visited @root, we're done */
if (pos == root)
return NULL;
/* if there's an unvisited sibling, visit its leftmost descendant */
Tejun Heo
committed
next = css_next_child(pos, css_parent(pos));
Tejun Heo
committed
return css_leftmost_descendant(next);
/* no sibling left, visit parent */
Tejun Heo
committed
return css_parent(pos);
* css_advance_task_iter - advance a task itererator to the next css_set
* @it: the iterator to advance
*
* Advance @it to the next css_set to walk.
static void css_advance_task_iter(struct css_task_iter *it)
struct cgrp_cset_link *link;
struct css_set *cset;
/* Advance to the next non-empty css_set */
do {
l = l->next;
if (l == it->cset_head) {
it->cset_pos = NULL;
if (it->ss) {
cset = container_of(l, struct css_set,
e_cset_node[it->ss->id]);
} else {
link = list_entry(l, struct cgrp_cset_link, cset_link);
cset = link->cset;
}
} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
it->task_pos = cset->mg_tasks.next;
it->tasks_head = &cset->tasks;
it->mg_tasks_head = &cset->mg_tasks;
* css_task_iter_start - initiate task iteration
* @css: the css to walk tasks of
* @it: the task iterator to use
*
* Initiate iteration through the tasks of @css. The caller can call
* css_task_iter_next() to walk through the tasks until the function
* returns NULL. On completion of iteration, css_task_iter_end() must be
* called.
*
* Note that this function acquires a lock which is released when the
* iteration finishes. The caller can't sleep while iteration is in
* progress.
*/
void css_task_iter_start(struct cgroup_subsys_state *css,
struct css_task_iter *it)
__acquires(css_set_rwsem)
/* no one should try to iterate before mounting cgroups */
WARN_ON_ONCE(!use_task_css_set_links);
down_read(&css_set_rwsem);
it->ss = css->ss;
if (it->ss)
it->cset_pos = &css->cgroup->e_csets[css->ss->id];
else
it->cset_pos = &css->cgroup->cset_links;
css_advance_task_iter(it);
* css_task_iter_next - return the next task for the iterator
* @it: the task iterator being iterated
*
* The "next" function for task iteration. @it should have been
* initialized via css_task_iter_start(). Returns NULL when the iteration
* reaches the end.
struct task_struct *css_task_iter_next(struct css_task_iter *it)
{
struct task_struct *res;
/* If the iterator cg is NULL, we have no tasks */
return NULL;
res = list_entry(l, struct task_struct, cg_list);
/*
* Advance iterator to find next entry. cset->tasks is consumed
* first and then ->mg_tasks. After ->mg_tasks, we move onto the
* next cset.
*/
if (l == it->tasks_head)
l = it->mg_tasks_head->next;
css_advance_task_iter(it);
return res;
}
* css_task_iter_end - finish task iteration
* @it: the task iterator to finish
*
* Finish task iteration started by css_task_iter_start().
void css_task_iter_end(struct css_task_iter *it)
__releases(css_set_rwsem)
up_read(&css_set_rwsem);
Tejun Heo
committed
* cgroup_trasnsfer_tasks - move tasks from one cgroup to another
* @to: cgroup to which the tasks will be moved
* @from: cgroup in which the tasks currently reside
* Locking rules between cgroup_post_fork() and the migration path
* guarantee that, if a task is forking while being migrated, the new child
* is guaranteed to be either visible in the source cgroup after the
* parent's migration is complete or put into the target cgroup. No task
* can slip out of migration through forking.
Tejun Heo
committed
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
LIST_HEAD(preloaded_csets);
struct cgrp_cset_link *link;
struct css_task_iter it;
struct task_struct *task;
mutex_lock(&cgroup_mutex);
/* all tasks in @from are being moved, all csets are source */
down_read(&css_set_rwsem);
list_for_each_entry(link, &from->cset_links, cset_link)