Newer
Older
* cgroup_migrate_finish - cleanup after attach
* @preloaded_csets: list of preloaded css_sets
* Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See
* those functions for details.
static void cgroup_migrate_finish(struct list_head *preloaded_csets)
struct css_set *cset, *tmp_cset;
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
lockdep_assert_held(&cgroup_mutex);
down_write(&css_set_rwsem);
list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cset = NULL;
list_del_init(&cset->mg_preload_node);
put_css_set_locked(cset, false);
}
up_write(&css_set_rwsem);
}
/**
* cgroup_migrate_add_src - add a migration source css_set
* @src_cset: the source css_set to add
* @dst_cgrp: the destination cgroup
* @preloaded_csets: list of preloaded css_sets
*
* Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin
* @src_cset and add it to @preloaded_csets, which should later be cleaned
* up by cgroup_migrate_finish().
*
* This function may be called without holding threadgroup_lock even if the
* target is a process. Threads may be created and destroyed but as long
* as cgroup_mutex is not dropped, no new css_set can be put into play and
* the preloaded css_sets are guaranteed to cover all migrations.
*/
static void cgroup_migrate_add_src(struct css_set *src_cset,
struct cgroup *dst_cgrp,
struct list_head *preloaded_csets)
{
struct cgroup *src_cgrp;
lockdep_assert_held(&cgroup_mutex);
lockdep_assert_held(&css_set_rwsem);
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
if (!list_empty(&src_cset->mg_preload_node))
return;
WARN_ON(src_cset->mg_src_cgrp);
WARN_ON(!list_empty(&src_cset->mg_tasks));
WARN_ON(!list_empty(&src_cset->mg_node));
src_cset->mg_src_cgrp = src_cgrp;
get_css_set(src_cset);
list_add(&src_cset->mg_preload_node, preloaded_csets);
}
/**
* cgroup_migrate_prepare_dst - prepare destination css_sets for migration
* @dst_cgrp: the destination cgroup (may be %NULL)
* @preloaded_csets: list of preloaded source css_sets
*
* Tasks are about to be moved to @dst_cgrp and all the source css_sets
* have been preloaded to @preloaded_csets. This function looks up and
* pins all destination css_sets, links each to its source, and append them
* to @preloaded_csets. If @dst_cgrp is %NULL, the destination of each
* source css_set is assumed to be its cgroup on the default hierarchy.
*
* This function must be called after cgroup_migrate_add_src() has been
* called on each migration source css_set. After migration is performed
* using cgroup_migrate(), cgroup_migrate_finish() must be called on
* @preloaded_csets.
*/
static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
struct list_head *preloaded_csets)
{
LIST_HEAD(csets);
struct css_set *src_cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
Tejun Heo
committed
/*
* Except for the root, child_subsys_mask must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent &&
dst_cgrp->child_subsys_mask)
return -EBUSY;
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
struct css_set *dst_cset;
dst_cset = find_css_set(src_cset,
dst_cgrp ?: src_cset->dfl_cgrp);
if (!dst_cset)
goto err;
WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
/*
* If src cset equals dst, it's noop. Drop the src.
* cgroup_migrate() will skip the cset too. Note that we
* can't handle src == dst as some nodes are used by both.
*/
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
list_del_init(&src_cset->mg_preload_node);
put_css_set(src_cset, false);
put_css_set(dst_cset, false);
continue;
}
src_cset->mg_dst_cset = dst_cset;
if (list_empty(&dst_cset->mg_preload_node))
list_add(&dst_cset->mg_preload_node, &csets);
else
put_css_set(dst_cset, false);
}
list_splice_tail(&csets, preloaded_csets);
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
return 0;
err:
cgroup_migrate_finish(&csets);
return -ENOMEM;
}
/**
* cgroup_migrate - migrate a process or task to a cgroup
* @cgrp: the destination cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
*
* Migrate a process or task denoted by @leader to @cgrp. If migrating a
* process, the caller must be holding threadgroup_lock of @leader. The
* caller is also responsible for invoking cgroup_migrate_add_src() and
* cgroup_migrate_prepare_dst() on the targets before invoking this
* function and following up with cgroup_migrate_finish().
*
* As long as a controller's ->can_attach() doesn't fail, this function is
* guaranteed to succeed. This means that, excluding ->can_attach()
* failure, when migrating multiple targets, the success or failure can be
* decided for all targets by invoking group_migrate_prepare_dst() before
* actually starting migrating.
*/
static int cgroup_migrate(struct cgroup *cgrp, struct task_struct *leader,
bool threadgroup)
struct cgroup_taskset tset = {
.src_csets = LIST_HEAD_INIT(tset.src_csets),
.dst_csets = LIST_HEAD_INIT(tset.dst_csets),
.csets = &tset.src_csets,
};
struct cgroup_subsys_state *css, *failed_css = NULL;
struct css_set *cset, *tmp_cset;
struct task_struct *task, *tmp_task;
int i, ret;
/*
* Prevent freeing of tasks while we take a snapshot. Tasks that are
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
down_write(&css_set_rwsem);
rcu_read_lock();
/* @task either already exited or can't exit until the end */
if (task->flags & PF_EXITING)
goto next;
/* leave @task alone if post_fork() hasn't linked it yet */
if (list_empty(&task->cg_list))
goto next;
cset = task_css_set(task);
if (!cset->mg_src_cgrp)
goto next;
* cgroup_taskset_first() must always return the leader.
* Take care to avoid disturbing the ordering.
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
list_add_tail(&cset->mg_node, &tset.src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
list_move_tail(&cset->mg_dst_cset->mg_node,
&tset.dst_csets);
next:
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_write(&css_set_rwsem);
/* methods shouldn't be called if no task is actually migrating */
if (list_empty(&tset.src_csets))
return 0;
/* check that we can legitimately attach to the cgroup */
for_each_e_css(css, i, cgrp) {
ret = css->ss->can_attach(css, &tset);
if (ret) {
goto out_cancel_attach;
}
}
}
/*
* Now that we're guaranteed success, proceed to move all tasks to
* the new cgroup. There are no failure cases after here, so this
* is the commit point.
down_write(&css_set_rwsem);
list_for_each_entry(cset, &tset.src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list)
cgroup_task_migrate(cset->mg_src_cgrp, task,
cset->mg_dst_cset);
up_write(&css_set_rwsem);
* Migration is committed, all target tasks are now on dst_csets.
* Nothing is sensitive to fork() after this point. Notify
* controllers that migration is complete.
tset.csets = &tset.dst_csets;
for_each_e_css(css, i, cgrp)
if (css->ss->attach)
css->ss->attach(css, &tset);
goto out_release_tset;
for_each_e_css(css, i, cgrp) {
if (css == failed_css)
break;
if (css->ss->cancel_attach)
css->ss->cancel_attach(css, &tset);
out_release_tset:
down_write(&css_set_rwsem);
list_splice_init(&tset.dst_csets, &tset.src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset.src_csets, mg_node) {
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
up_write(&css_set_rwsem);
/**
* cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
* @dst_cgrp: the cgroup to attach to
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
*
* Call holding cgroup_mutex and threadgroup_lock of @leader.
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
*/
static int cgroup_attach_task(struct cgroup *dst_cgrp,
struct task_struct *leader, bool threadgroup)
{
LIST_HEAD(preloaded_csets);
struct task_struct *task;
int ret;
/* look up all src csets */
down_read(&css_set_rwsem);
rcu_read_lock();
task = leader;
do {
cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
&preloaded_csets);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
up_read(&css_set_rwsem);
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
if (!ret)
ret = cgroup_migrate(dst_cgrp, leader, threadgroup);
cgroup_migrate_finish(&preloaded_csets);
return ret;
}
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
* cgroup_mutex and threadgroup.
static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
David Howells
committed
const struct cred *cred = current_cred(), *tcred;
struct cgroup *cgrp = of_css(of)->cgroup;
pid_t pid;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
retry_find_task:
rcu_read_lock();
goto out_unlock_cgroup;
/*
* even if we're attaching all tasks in the thread group, we
* only need to check permissions on one of them.
*/
David Howells
committed
tcred = __task_cred(tsk);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid)) {
David Howells
committed
rcu_read_unlock();
ret = -EACCES;
goto out_unlock_cgroup;
} else
tsk = current;
tsk = tsk->group_leader;
/*
* Workqueue threads may acquire PF_NO_SETAFFINITY and become
* trapped in a cpuset, or RT worker may be born in a cgroup
* with no rt_runtime allocated. Just say no.
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
rcu_read_unlock();
goto out_unlock_cgroup;
}
get_task_struct(tsk);
rcu_read_unlock();
threadgroup_lock(tsk);
if (threadgroup) {
if (!thread_group_leader(tsk)) {
/*
* a race with de_thread from another thread's exec()
* may strip us of our leadership, if this happens,
* there is no choice but to throw this task away and
* try again; this is
* "double-double-toil-and-trouble-check locking".
*/
threadgroup_unlock(tsk);
put_task_struct(tsk);
goto retry_find_task;
}
}
ret = cgroup_attach_task(cgrp, tsk, threadgroup);
threadgroup_unlock(tsk);
out_unlock_cgroup:
return ret ?: nbytes;
/**
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task
* @tsk: the task to be attached
*/
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{
struct cgroup_root *root;
int retval = 0;
for_each_root(root) {
struct cgroup *from_cgrp;
if (root == &cgrp_dfl_root)
continue;
down_read(&css_set_rwsem);
from_cgrp = task_cgroup_from_root(from, root);
up_read(&css_set_rwsem);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
break;
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
return __cgroup_procs_write(of, buf, nbytes, off, false);
static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
return __cgroup_procs_write(of, buf, nbytes, off, true);
}
static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
struct cgroup *cgrp = of_css(of)->cgroup;
struct cgroup_root *root = cgrp->root;
BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX);
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
spin_lock(&release_agent_path_lock);
strlcpy(root->release_agent_path, strstrip(buf),
sizeof(root->release_agent_path));
spin_unlock(&release_agent_path_lock);
static int cgroup_release_agent_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
spin_lock(&release_agent_path_lock);
seq_puts(seq, cgrp->root->release_agent_path);
spin_unlock(&release_agent_path_lock);
seq_putc(seq, '\n');
return 0;
}
static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
struct cgroup *cgrp = seq_css(seq)->cgroup;
seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
return 0;
}
Tejun Heo
committed
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask)
{
struct cgroup_subsys *ss;
bool printed = false;
int ssid;
for_each_subsys(ss, ssid) {
if (ss_mask & (1 << ssid)) {
if (printed)
seq_putc(seq, ' ');
seq_printf(seq, "%s", ss->name);
printed = true;
}
}
if (printed)
seq_putc(seq, '\n');
}
/* show controllers which are currently attached to the default hierarchy */
static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->root->subsys_mask);
return 0;
}
/* show controllers which are enabled from the parent */
static int cgroup_controllers_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask);
return 0;
}
/* show controllers which are enabled for a given cgroup's children */
static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
return 0;
}
/**
* cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
* @cgrp: root of the subtree to update csses for
*
* @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
* css associations need to be updated accordingly. This function looks up
* all css_sets which are attached to the subtree, creates the matching
* updated css_sets and migrates the tasks to the new ones.
*/
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
LIST_HEAD(preloaded_csets);
struct cgroup_subsys_state *css;
struct css_set *src_cset;
int ret;
lockdep_assert_held(&cgroup_tree_mutex);
lockdep_assert_held(&cgroup_mutex);
/* look up all csses currently attached to @cgrp's subtree */
down_read(&css_set_rwsem);
css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
struct cgrp_cset_link *link;
/* self is not affected by child_subsys_mask change */
if (css->cgroup == cgrp)
continue;
list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
cgroup_migrate_add_src(link->cset, cgrp,
&preloaded_csets);
}
up_read(&css_set_rwsem);
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
if (ret)
goto out_finish;
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
struct task_struct *last_task = NULL, *task;
/* src_csets precede dst_csets, break on the first dst_cset */
if (!src_cset->mg_src_cgrp)
break;
/*
* All tasks in src_cset need to be migrated to the
* matching dst_cset. Empty it process by process. We
* walk tasks but migrate processes. The leader might even
* belong to a different cset but such src_cset would also
* be among the target src_csets because the default
* hierarchy enforces per-process membership.
*/
while (true) {
down_read(&css_set_rwsem);
task = list_first_entry_or_null(&src_cset->tasks,
struct task_struct, cg_list);
if (task) {
task = task->group_leader;
WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
get_task_struct(task);
}
up_read(&css_set_rwsem);
if (!task)
break;
/* guard against possible infinite loop */
if (WARN(last_task == task,
"cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
goto out_finish;
last_task = task;
threadgroup_lock(task);
/* raced against de_thread() from another thread? */
if (!thread_group_leader(task)) {
threadgroup_unlock(task);
put_task_struct(task);
continue;
}
ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
threadgroup_unlock(task);
put_task_struct(task);
if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
goto out_finish;
}
}
out_finish:
cgroup_migrate_finish(&preloaded_csets);
return ret;
}
/* change the enabled child controllers for a cgroup in the default hierarchy */
static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
Tejun Heo
committed
{
Tejun Heo
committed
unsigned int enable = 0, disable = 0;
struct cgroup *cgrp, *child;
Tejun Heo
committed
struct cgroup_subsys *ss;
Tejun Heo
committed
int ssid, ret;
/*
* Parse input - space separated list of subsystem names prefixed
* with either + or -.
Tejun Heo
committed
*/
buf = strstrip(buf);
while ((tok = strsep(&buf, " "))) {
if (tok[0] == '\0')
continue;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (ss->disabled || strcmp(tok + 1, ss->name))
continue;
if (*tok == '+') {
Tejun Heo
committed
enable |= 1 << ssid;
disable &= ~(1 << ssid);
Tejun Heo
committed
} else if (*tok == '-') {
Tejun Heo
committed
disable |= 1 << ssid;
enable &= ~(1 << ssid);
Tejun Heo
committed
} else {
return -EINVAL;
}
break;
}
if (ssid == CGROUP_SUBSYS_COUNT)
return -EINVAL;
}
cgrp = cgroup_kn_lock_live(of->kn);
if (!cgrp)
return -ENODEV;
Tejun Heo
committed
for_each_subsys(ss, ssid) {
if (enable & (1 << ssid)) {
if (cgrp->child_subsys_mask & (1 << ssid)) {
enable &= ~(1 << ssid);
continue;
}
/*
* Because css offlining is asynchronous, userland
* might try to re-enable the same controller while
* the previous instance is still around. In such
* cases, wait till it's gone using offline_waitq.
*/
cgroup_for_each_live_child(child, cgrp) {
DEFINE_WAIT(wait);
Tejun Heo
committed
if (!cgroup_css(child, ss))
continue;
cgroup_get(child);
Tejun Heo
committed
prepare_to_wait(&child->offline_waitq, &wait,
TASK_UNINTERRUPTIBLE);
cgroup_kn_unlock(of->kn);
Tejun Heo
committed
schedule();
finish_wait(&child->offline_waitq, &wait);
cgroup_put(child);
Tejun Heo
committed
return restart_syscall();
Tejun Heo
committed
}
/* unavailable or not enabled on the parent? */
if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
(cgrp->parent &&
!(cgrp->parent->child_subsys_mask & (1 << ssid)))) {
ret = -ENOENT;
goto out_unlock;
Tejun Heo
committed
}
} else if (disable & (1 << ssid)) {
if (!(cgrp->child_subsys_mask & (1 << ssid))) {
disable &= ~(1 << ssid);
continue;
}
/* a child has it enabled? */
cgroup_for_each_live_child(child, cgrp) {
if (child->child_subsys_mask & (1 << ssid)) {
ret = -EBUSY;
goto out_unlock;
Tejun Heo
committed
}
}
}
}
if (!enable && !disable) {
ret = 0;
goto out_unlock;
Tejun Heo
committed
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
}
/*
* Except for the root, child_subsys_mask must be zero for a cgroup
* with tasks so that child cgroups don't compete against tasks.
*/
if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) {
ret = -EBUSY;
goto out_unlock;
}
/*
* Create csses for enables and update child_subsys_mask. This
* changes cgroup_e_css() results which in turn makes the
* subsequent cgroup_update_dfl_csses() associate all tasks in the
* subtree to the updated csses.
*/
for_each_subsys(ss, ssid) {
if (!(enable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp) {
ret = create_css(child, ss);
if (ret)
goto err_undo_css;
}
}
cgrp->child_subsys_mask |= enable;
cgrp->child_subsys_mask &= ~disable;
ret = cgroup_update_dfl_csses(cgrp);
if (ret)
goto err_undo_css;
/* all tasks are now migrated away from the old csses, kill them */
for_each_subsys(ss, ssid) {
if (!(disable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp)
kill_css(cgroup_css(child, ss));
}
kernfs_activate(cgrp->kn);
ret = 0;
out_unlock:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
Tejun Heo
committed
err_undo_css:
cgrp->child_subsys_mask &= ~enable;
cgrp->child_subsys_mask |= disable;
for_each_subsys(ss, ssid) {
if (!(enable & (1 << ssid)))
continue;
cgroup_for_each_live_child(child, cgrp) {
struct cgroup_subsys_state *css = cgroup_css(child, ss);
if (css)
kill_css(css);
}
}
goto out_unlock;
}
static int cgroup_populated_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
return 0;
}
static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
struct cgroup *cgrp = of->kn->parent->priv;
struct cftype *cft = of->kn->priv;
struct cgroup_subsys_state *css;
if (cft->write)
return cft->write(of, buf, nbytes, off);
/*
* kernfs guarantees that a file isn't deleted with operations in
* flight, which means that the matching css is and stays alive and
* doesn't need to be pinned. The RCU locking is not necessary
* either. It's just for the convenience of using cgroup_css().
*/
rcu_read_lock();
css = cgroup_css(cgrp, cft->ss);
rcu_read_unlock();
if (cft->write_u64) {
unsigned long long v;
ret = kstrtoull(buf, 0, &v);
if (!ret)
ret = cft->write_u64(css, cft, v);
} else if (cft->write_s64) {
long long v;
ret = kstrtoll(buf, 0, &v);
if (!ret)
ret = cft->write_s64(css, cft, v);
return ret ?: nbytes;
static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
static int cgroup_seqfile_show(struct seq_file *m, void *arg)
struct cftype *cft = seq_cft(m);
struct cgroup_subsys_state *css = seq_css(m);
if (cft->seq_show)
return cft->seq_show(m, arg);
if (cft->read_u64)
seq_printf(m, "%llu\n", cft->read_u64(css, cft));
else if (cft->read_s64)
seq_printf(m, "%lld\n", cft->read_s64(css, cft));
else
return -EINVAL;
return 0;
static struct kernfs_ops cgroup_kf_single_ops = {
.atomic_write_len = PAGE_SIZE,
.write = cgroup_file_write,
.seq_show = cgroup_seqfile_show,
static struct kernfs_ops cgroup_kf_ops = {
.atomic_write_len = PAGE_SIZE,
.write = cgroup_file_write,
.seq_start = cgroup_seqfile_start,
.seq_next = cgroup_seqfile_next,
.seq_stop = cgroup_seqfile_stop,
.seq_show = cgroup_seqfile_show,
};
/*
* cgroup_rename - Only allow simple rename of directories in place.
*/
static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name_str)
/*
* This isn't a proper migration and its usefulness is very
* limited. Disallow if sane_behavior.
*/
if (cgroup_sane_behavior(cgrp))
return -EPERM;
/*
* We're gonna grab cgroup_tree_mutex which nests outside kernfs
* active_ref. kernfs_rename() doesn't require active_ref
* protection. Break them before grabbing cgroup_tree_mutex.
*/
kernfs_break_active_protection(new_parent);
kernfs_break_active_protection(kn);
mutex_lock(&cgroup_tree_mutex);
mutex_lock(&cgroup_mutex);
ret = kernfs_rename(kn, new_parent, new_name_str);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgroup_tree_mutex);
kernfs_unbreak_active_protection(kn);
kernfs_unbreak_active_protection(new_parent);
/* set uid and gid of cgroup dirs and files to that of the creator */
static int cgroup_kn_set_ugid(struct kernfs_node *kn)
{
struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
.ia_uid = current_fsuid(),
.ia_gid = current_fsgid(), };
if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
return 0;
return kernfs_setattr(kn, &iattr);
}
static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
struct kernfs_node *kn;
struct lock_class_key *key = NULL;
int ret;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
key = &cft->lockdep_key;
#endif
kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
cgroup_file_mode(cft), 0, cft->kf_ops, cft,
NULL, false, key);
if (IS_ERR(kn))
return PTR_ERR(kn);
ret = cgroup_kn_set_ugid(kn);
Tejun Heo
committed
if (ret) {
kernfs_remove(kn);
Tejun Heo
committed
return ret;
}
if (cft->seq_show == cgroup_populated_show)
cgrp->populated_kn = kn;
Tejun Heo
committed
return 0;
/**
* cgroup_addrm_files - add or remove files to a cgroup directory
* @cgrp: the target cgroup
* @cfts: array of cftypes to be added
* @is_add: whether to add or remove
*
* Depending on @is_add, add or remove files defined by @cfts on @cgrp.
* For removals, this function never fails. If addition fails, this
* function doesn't remove files already added. The caller is responsible
* for cleaning up.
static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
bool is_add)
lockdep_assert_held(&cgroup_tree_mutex);
for (cft = cfts; cft->name[0] != '\0'; cft++) {
/* does cft->flags tell us to skip this file on @cgrp? */
if ((cft->flags & CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
continue;
if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
continue;
if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
continue;
if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
continue;
ret = cgroup_add_file(cgrp, cft);
pr_warn("%s: failed to add %s, err=%d\n",
__func__, cft->name, ret);
} else {
cgroup_rm_file(cgrp, cft);
static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
{
LIST_HEAD(pending);
struct cgroup_subsys *ss = cfts[0].ss;
struct cgroup *root = &ss->root->cgrp;
Tejun Heo
committed
struct cgroup_subsys_state *css;
lockdep_assert_held(&cgroup_tree_mutex);
/* add/rm files for all cgroups created before */