Newer
Older
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_rwsem);
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
if (!list_empty(&src_cset->mg_preload_node))
return;
WARN_ON(src_cset->mg_src_cgrp);
WARN_ON(!list_empty(&src_cset->mg_tasks));
WARN_ON(!list_empty(&src_cset->mg_node));
src_cset->mg_src_cgrp = src_cgrp;
get_css_set(src_cset);
list_add(&src_cset->mg_preload_node, preloaded_csets);
}
/**
* cgroup_migrate_prepare_dst - prepare destination css_sets for migration
* @dst_cgrp: the destination cgroup (may be %NULL)
* @preloaded_csets: list of preloaded source css_sets
*
* Tasks are about to be moved to @dst_cgrp and all the source css_sets
* have been preloaded to @preloaded_csets. This function looks up and
* pins all destination css_sets, links each to its source, and append them
* to @preloaded_csets. If @dst_cgrp is %NULL, the destination of each
* source css_set is assumed to be its cgroup on the default hierarchy.
*
* This function must be called after cgroup_migrate_add_src() has been
* called on each migration source css_set. After migration is performed
* using cgroup_migrate(), cgroup_migrate_finish() must be called on
* @preloaded_csets.
*/
static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
struct list_head *preloaded_csets)
{
LIST_HEAD(csets);
struct css_set *src_cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
/*
* Except for the root, child_subsys_mask must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
Tejun Heo
committed
dst_cgrp->child_subsys_mask)
return -EBUSY;
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
struct css_set *dst_cset;
dst_cset = find_css_set(src_cset,
dst_cgrp ?: src_cset->dfl_cgrp);
if (!dst_cset)
goto err;
WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
/*
* If src cset equals dst, it's noop. Drop the src.
* cgroup_migrate() will skip the cset too. Note that we
* can't handle src == dst as some nodes are used by both.
*/
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
list_del_init(&src_cset->mg_preload_node);
put_css_set(src_cset, false);
put_css_set(dst_cset, false);
continue;
}
src_cset->mg_dst_cset = dst_cset;
if (list_empty(&dst_cset->mg_preload_node))
list_add(&dst_cset->mg_preload_node, &csets);
else
put_css_set(dst_cset, false);
}
list_splice_tail(&csets, preloaded_csets);
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
return 0;
err:
cgroup_migrate_finish(&csets);
return -ENOMEM;
}
/**
* cgroup_migrate - migrate a process or task to a cgroup
* @cgrp: the destination cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
*
* Migrate a process or task denoted by @leader to @cgrp. If migrating a
* process, the caller must be holding threadgroup_lock of @leader. The
* caller is also responsible for invoking cgroup_migrate_add_src() and
* cgroup_migrate_prepare_dst() on the targets before invoking this
* function and following up with cgroup_migrate_finish().
*
* As long as a controller's ->can_attach() doesn't fail, this function is
* guaranteed to succeed. This means that, excluding ->can_attach()
* failure, when migrating multiple targets, the success or failure can be
* decided for all targets by invoking group_migrate_prepare_dst() before
* actually starting migrating.
*/
static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
bool threadgroup)
struct cgroup_taskset tset = {
.src_csets = LIST_HEAD_INIT(tset.src_csets),
.dst_csets = LIST_HEAD_INIT(tset.dst_csets),
.csets = &tset.src_csets,
};
struct cgroup_subsys_state *css, *failed_css = NULL;
struct css_set *cset, *tmp_cset;
struct task_struct *task, *tmp_task;
int i, ret;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
down_write(&css_set_rwsem);
rcu_read_lock();
/* @task either already exited or can't exit until the end */
if (task->flags & PF_EXITING)
goto next;
/* leave @task alone if post_fork() hasn't linked it yet */
if (list_empty(&task->cg_list))
goto next;
cset = task_css_set(task);
if (!cset->mg_src_cgrp)
goto next;
* cgroup_taskset_first() must always return the leader.
* Take care to avoid disturbing the ordering.
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
list_move_tail(&cset->mg_dst_cset->mg_node,
&tset.dst_csets);
next:
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset.src_csets))
return 0;
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, cgrp) {
ret = css->ss->can_attach(css, &tset);
if (ret) {
goto out_cancel_attach;
}
}
}
/*
* Now that we're guaranteed success, proceed to move all tasks to
* the new cgroup. There are no failure cases after here, so this
* is the commit point.
down_write(&css_set_rwsem);
list_for_each_entry(cset, &tset.src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
cgroup_task_migrate(cset->mg_src_cgrp, task,
cset->mg_dst_cset);
up_write(&css_set_rwsem);
* Migration is committed, all target tasks are now on dst_csets.
* Nothing is sensitive to fork() after this point. Notify
* controllers that migration is complete.
tset.csets = &tset.dst_csets;
for_each_e_css(css, i, cgrp)
if (css->ss->attach)
css->ss->attach(css, &tset);
goto out_release_tset;
for_each_e_css(css, i, cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, &tset);
out_release_tset:
down_write(&css_set_rwsem);
list_splice_init(&tset.dst_csets, &tset.src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
up_write(&css_set_rwsem);
/**
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @dst_cgrp: the cgroup to attach to
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
*
* Call holding cgroup_mutex and threadgroup_lock of @leader.
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
*/
static int cgroup_attach_task(struct cgroup *dst_cgrp,
struct task_struct *leader, bool threadgroup)
{
LIST_HEAD(preloaded_csets);
struct task_struct *task;
int ret;
/* look up all src csets */
down_read(&css_set_rwsem);
rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
&preloaded_csets);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_read(&css_set_rwsem);
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
if (!ret)
ret = cgroup_migrate(dst_cgrp, leader, threadgroup);
cgroup_migrate_finish(&preloaded_csets);
return ret;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup.
static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
struct cgroup *cgrp;
pid_t pid;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
retry_find_task:
rcu_read_lock();
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroup_root *root;
int retval = 0;
for_each_root(root) {
struct cgroup *from_cgrp;
if (root == &cgrp_dfl_root)
continue;
down_read(&css_set_rwsem);
from_cgrp = task_cgroup_from_root(from, root);
up_read(&css_set_rwsem);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
return __cgroup_procs_write(of, buf, nbytes, off, false);
static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
return __cgroup_procs_write(of, buf, nbytes, off, true);
}
static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
struct cgroup *cgrp;
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
return -ENODEV;
spin_lock(&release_agent_path_lock);
strlcpy(cgrp->root->release_agent_path, strstrip(buf),
sizeof(cgrp->root->release_agent_path));
spin_unlock(&release_agent_path_lock);
cgroup_kn_unlock(of->kn);
static int cgroup_release_agent_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
spin_lock(&release_agent_path_lock);
seq_puts(seq, cgrp->root->release_agent_path);
spin_unlock(&release_agent_path_lock);
seq_putc(seq, '\n');
return 0;
}
static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
return 0;
}
Tejun Heo
committed
static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask)
Tejun Heo
committed
struct cgroup_subsys *ss;
bool printed = false;
int ssid;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (ss_mask & (1 << ssid)) {
if (printed)
seq_putc(seq, ' ');
seq_printf(seq, "%s", ss->name);
printed = true;
}
Tejun Heo
committed
if (printed)
seq_putc(seq, '\n');
Tejun Heo
committed
/* show controllers which are currently attached to the default hierarchy */
static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->root->subsys_mask &
~cgrp_dfl_root_inhibit_ss_mask);
Tejun Heo
committed
return 0;
Tejun Heo
committed
/* show controllers which are enabled from the parent */
static int cgroup_controllers_show(struct seq_file *seq, void *v)
Tejun Heo
committed
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
Tejun Heo
committed
return 0;
Tejun Heo
committed
/* show controllers which are enabled for a given cgroup's children */
static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
Tejun Heo
committed
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
return 0;
}
/**
* cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
* @cgrp: root of the subtree to update csses for
*
* @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
* css associations need to be updated accordingly. This function looks up
* all css_sets which are attached to the subtree, creates the matching
* updated css_sets and migrates the tasks to the new ones.
*/
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
LIST_HEAD(preloaded_csets);
struct cgroup_subsys_state *css;
struct css_set *src_cset;
int ret;
lockdep_assert_held(&cgroup_mutex);
/* look up all csses currently attached to @cgrp's subtree */
down_read(&css_set_rwsem);
css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
struct cgrp_cset_link *link;
/* self is not affected by child_subsys_mask change */
if (css->cgroup == cgrp)
continue;
list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
cgroup_migrate_add_src(link->cset, cgrp,
&preloaded_csets);
}
up_read(&css_set_rwsem);
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
if (ret)
goto out_finish;
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
struct task_struct *last_task = NULL, *task;
/* src_csets precede dst_csets, break on the first dst_cset */
if (!src_cset->mg_src_cgrp)
break;
/*
* All tasks in src_cset need to be migrated to the
* matching dst_cset. Empty it process by process. We
* walk tasks but migrate processes. The leader might even
* belong to a different cset but such src_cset would also
* be among the target src_csets because the default
* hierarchy enforces per-process membership.
*/
while (true) {
down_read(&css_set_rwsem);
task = list_first_entry_or_null(&src_cset->tasks,
struct task_struct, cg_list);
if (task) {
task = task->group_leader;
WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
get_task_struct(task);
}
up_read(&css_set_rwsem);
if (!task)
break;
/* guard against possible infinite loop */
if (WARN(last_task == task,
"cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
goto out_finish;
last_task = task;
threadgroup_lock(task);
/* raced against de_thread() from another thread? */
if (!thread_group_leader(task)) {
threadgroup_unlock(task);
put_task_struct(task);
continue;
}
ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
threadgroup_unlock(task);
put_task_struct(task);
if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
goto out_finish;
}
}
out_finish:
cgroup_migrate_finish(&preloaded_csets);
return ret;
}
/* change the enabled child controllers for a cgroup in the default hierarchy */
static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
Tejun Heo
committed
{
Tejun Heo
committed
unsigned int enable = 0, disable = 0;
struct cgroup *cgrp, *child;
Tejun Heo
committed
struct cgroup_subsys *ss;
Tejun Heo
committed
int ssid, ret;
/*
* Parse input - space separated list of subsystem names prefixed
* with either + or -.
Tejun Heo
committed
*/
buf = strstrip(buf);
while ((tok = strsep(&buf, " "))) {
if (tok[0] == '\0')
continue;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (ss->disabled || strcmp(tok + 1, ss->name) ||
((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask))
Tejun Heo
committed
continue;
if (*tok == '+') {
Tejun Heo
committed
enable |= 1 << ssid;
disable &= ~(1 << ssid);
Tejun Heo
committed
} else if (*tok == '-') {
Tejun Heo
committed
disable |= 1 << ssid;
enable &= ~(1 << ssid);
Tejun Heo
committed
} else {
return -EINVAL;
}
break;
}
if (ssid == CGROUP_SUBSYS_COUNT)
return -EINVAL;
}
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
return -ENODEV;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (enable & (1 << ssid)) {
if (cgrp->child_subsys_mask & (1 << ssid)) {
enable &= ~(1 << ssid);
continue;
}
/*
* Because css offlining is asynchronous, userland
* might try to re-enable the same controller while
* the previous instance is still around. In such
* cases, wait till it's gone using offline_waitq.
*/
cgroup_for_each_live_child(child, cgrp) {
DEFINE_WAIT(wait);
Tejun Heo
committed
if (!cgroup_css(child, ss))
continue;
cgroup_get(child);
Tejun Heo
committed
prepare_to_wait(&child->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
cgroup_kn_unlock(of->kn);
Tejun Heo
committed
schedule();
finish_wait(&child->offline_waitq, &wait);
cgroup_put(child);
Tejun Heo
committed
return restart_syscall();
Tejun Heo
committed
}
/* unavailable or not enabled on the parent? */
if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
(cgroup_parent(cgrp) &&
!(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
Tejun Heo
committed
ret = -ENOENT;
goto out_unlock;
Tejun Heo
committed
}
} else if (disable & (1 << ssid)) {
if (!(cgrp->child_subsys_mask & (1 << ssid))) {
disable &= ~(1 << ssid);
continue;
}
/* a child has it enabled? */
cgroup_for_each_live_child(child, cgrp) {
if (child->child_subsys_mask & (1 << ssid)) {
ret = -EBUSY;
goto out_unlock;
Tejun Heo
committed
}
}
}
}
if (!enable && !disable) {
ret = 0;
goto out_unlock;
Tejun Heo
committed
}
/*
* Except for the root, child_subsys_mask must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
Tejun Heo
committed
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
ret = -EBUSY;
goto out_unlock;
}
/*
* Create csses for enables and update child_subsys_mask. This
* changes cgroup_e_css() results which in turn makes the
* subsequent cgroup_update_dfl_csses() associate all tasks in the
* subtree to the updated csses.
*/
for_each_subsys(ss, ssid) {
if (!(enable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp) {
ret = create_css(child, ss);
if (ret)
goto err_undo_css;
}
}
cgrp->child_subsys_mask |= enable;
cgrp->child_subsys_mask &= ~disable;
ret = cgroup_update_dfl_csses(cgrp);
if (ret)
goto err_undo_css;
/* all tasks are now migrated away from the old csses, kill them */
for_each_subsys(ss, ssid) {
if (!(disable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp)
kill_css(cgroup_css(child, ss));
}
kernfs_activate(cgrp->kn);
ret = 0;
out_unlock:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
Tejun Heo
committed
err_undo_css:
cgrp->child_subsys_mask &= ~enable;
cgrp->child_subsys_mask |= disable;
for_each_subsys(ss, ssid) {
if (!(enable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp) {
struct cgroup_subsys_state *css = cgroup_css(child, ss);
if (css)
kill_css(css);
}
}
goto out_unlock;
}
static int cgroup_populated_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
return 0;
}
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
struct cgroup *cgrp = of->kn->parent->priv;
struct cftype *cft = of->kn->priv;
struct cgroup_subsys_state *css;
if (cft->write)
return cft->write(of, buf, nbytes, off);
/*
* kernfs guarantees that a file isn't deleted with operations in
* flight, which means that the matching css is and stays alive and
* doesn't need to be pinned. The RCU locking is not necessary
* either. It's just for the convenience of using cgroup_css().
*/
rcu_read_lock();
css = cgroup_css(cgrp, cft->ss);
rcu_read_unlock();
if (cft->write_u64) {
unsigned long long v;
ret = kstrtoull(buf, 0, &v);
if (!ret)
ret = cft->write_u64(css, cft, v);
} else if (cft->write_s64) {
long long v;
ret = kstrtoll(buf, 0, &v);
if (!ret)
ret = cft->write_s64(css, cft, v);
return ret ?: nbytes;
static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
struct cftype *cft = seq_cft(m);
struct cgroup_subsys_state *css = seq_css(m);
if (cft->seq_show)
return cft->seq_show(m, arg);
if (cft->read_u64)
seq_printf(m, "%llu\n", cft->read_u64(css, cft));
else if (cft->read_s64)
seq_printf(m, "%lld\n", cft->read_s64(css, cft));
else
return -EINVAL;
return 0;
static struct kernfs_ops cgroup_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.write = cgroup_file_write,
.seq_show = cgroup_seqfile_show,
static struct kernfs_ops cgroup_kf_ops = {
.atomic_write_len = PAGE_SIZE,
.write = cgroup_file_write,
.seq_start = cgroup_seqfile_start,
.seq_next = cgroup_seqfile_next,
.seq_stop = cgroup_seqfile_stop,
.seq_show = cgroup_seqfile_show,
};
/*
* cgroup_rename - Only allow simple rename of directories in place.
*/
static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name_str)
/*
* This isn't a proper migration and its usefulness is very
* limited. Disallow if sane_behavior.
*/
if (cgroup_sane_behavior(cgrp))
return -EPERM;
/*
* We're gonna grab cgroup_mutex which nests outside kernfs
* active_ref. kernfs_rename() doesn't require active_ref
* protection. Break them before grabbing cgroup_mutex.
*/
kernfs_break_active_protection(new_parent);
kernfs_break_active_protection(kn);
ret = kernfs_rename(kn, new_parent, new_name_str);
kernfs_unbreak_active_protection(kn);
kernfs_unbreak_active_protection(new_parent);
/* set uid and gid of cgroup dirs and files to that of the creator */
static int cgroup_kn_set_ugid(struct kernfs_node *kn)
{
struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
.ia_uid = current_fsuid(),
.ia_gid = current_fsgid(), };
if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
return 0;
return kernfs_setattr(kn, &iattr);
}
static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
struct kernfs_node *kn;
struct lock_class_key *key = NULL;
int ret;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
key = &cft->lockdep_key;
#endif
kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
cgroup_file_mode(cft), 0, cft->kf_ops, cft,
NULL, false, key);
if (IS_ERR(kn))
return PTR_ERR(kn);
ret = cgroup_kn_set_ugid(kn);
Tejun Heo
committed
if (ret) {
kernfs_remove(kn);
Tejun Heo
committed
return ret;
}
if (cft->seq_show == cgroup_populated_show)
cgrp->populated_kn = kn;
Tejun Heo
committed
return 0;
/**
* cgroup_addrm_files - add or remove files to a cgroup directory
* @cgrp: the target cgroup
* @cfts: array of cftypes to be added
* @is_add: whether to add or remove
*
* Depending on @is_add, add or remove files defined by @cfts on @cgrp.
* For removals, this function never fails. If addition fails, this
* function doesn't remove files already added. The caller is responsible
* for cleaning up.
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add)
lockdep_assert_held(&cgroup_mutex);
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
continue;
if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
continue;
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
ret = cgroup_add_file(cgrp, cft);
pr_warn("%s: failed to add %s, err=%d\n",
__func__, cft->name, ret);
} else {
cgroup_rm_file(cgrp, cft);
static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
{
LIST_HEAD(pending);
struct cgroup_subsys *ss = cfts[0].ss;
struct cgroup *root = &ss->root->cgrp;
Tejun Heo
committed
struct cgroup_subsys_state *css;
lockdep_assert_held(&cgroup_mutex);
/* add/rm files for all cgroups created before */
css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
Tejun Heo
committed
struct cgroup *cgrp = css->cgroup;
if (cgroup_is_dead(cgrp))
continue;
ret = cgroup_addrm_files(cgrp, cfts, is_add);
if (is_add && !ret)
kernfs_activate(root->kn);
static void cgroup_exit_cftypes(struct cftype *cfts)
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* free copy for custom atomic_write_len, see init_cftypes() */
if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
kfree(cft->kf_ops);
cft->kf_ops = NULL;
static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
struct cftype *cft;
for (cft = cfts; cft->name[0] != '\0'; cft++) {
struct kernfs_ops *kf_ops;