Newer
Older
Tejun Heo
committed
*/
if (!cgroup_on_dfl(cgrp)) {
cgrp->subtree_control = parent->subtree_control;
cgroup_refresh_child_subsys_mask(cgrp);
}
cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
percpu_ref_exit(&cgrp->self.refcnt);
kfree(cgrp);
cgroup_kn_unlock(parent_kn);
/*
* This is called when the refcnt of a css is confirmed to be killed.
* css_tryget_online() is now guaranteed to fail. Tell the subsystem to
* initate destruction and put the css ref from kill_css().
*/
static void css_killed_work_fn(struct work_struct *work)
struct cgroup_subsys_state *css =
container_of(work, struct cgroup_subsys_state, destroy_work);
mutex_lock(&cgroup_mutex);
do {
offline_css(css);
css_put(css);
/* @css can't go away while we're holding cgroup_mutex */
css = css->parent;
} while (css && atomic_dec_and_test(&css->online_cnt));
mutex_unlock(&cgroup_mutex);
/* css kill confirmation processing requires process context, bounce */
static void css_killed_ref_fn(struct percpu_ref *ref)
{
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
if (atomic_dec_and_test(&css->online_cnt)) {
INIT_WORK(&css->destroy_work, css_killed_work_fn);
queue_work(cgroup_destroy_wq, &css->destroy_work);
}
Tejun Heo
committed
/**
* kill_css - destroy a css
* @css: css to destroy
*
* This function initiates destruction of @css by removing cgroup interface
* files and putting its base reference. ->css_offline() will be invoked
* asynchronously once css_tryget_online() is guaranteed to fail and when
* the reference count reaches zero, @css will be released.
Tejun Heo
committed
*/
static void kill_css(struct cgroup_subsys_state *css)
lockdep_assert_held(&cgroup_mutex);
/*
* This must happen before css is disassociated with its cgroup.
* See seq_css() for details.
*/
css_clear_dir(css, NULL);
/*
* Killing would put the base ref, but we need to keep it alive
* until after ->css_offline().
*/
css_get(css);
/*
* cgroup core guarantees that, by the time ->css_offline() is
* invoked, no new css reference will be given out via
* css_tryget_online(). We can't simply call percpu_ref_kill() and
* proceed to offlining css's because percpu_ref_kill() doesn't
* guarantee that the ref is seen as killed on all CPUs on return.
*
* Use percpu_ref_kill_and_confirm() to get notifications as each
* css is confirmed to be seen as killed on all CPUs.
*/
percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
}
/**
* cgroup_destroy_locked - the first stage of cgroup destruction
* @cgrp: cgroup to be destroyed
*
* css's make use of percpu refcnts whose killing latency shouldn't be
* exposed to userland and are RCU protected. Also, cgroup core needs to
* guarantee that css_tryget_online() won't succeed by the time
* ->css_offline() is invoked. To satisfy all the requirements,
* destruction is implemented in the following two steps.
*
* s1. Verify @cgrp can be destroyed and mark it dying. Remove all
* userland visible parts and start killing the percpu refcnts of
* css's. Set up so that the next stage will be kicked off once all
* the percpu refcnts are confirmed to be killed.
*
* s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
* rest of destruction. Once all cgroup references are gone, the
* cgroup is RCU-freed.
*
* This function implements s1. After this step, @cgrp is gone as far as
* the userland is concerned and a new cgroup with the same name may be
* created. As cgroup doesn't care about the names internally, this
* doesn't cause any problem.
*/
static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
lockdep_assert_held(&cgroup_mutex);
/*
* Only migration can raise populated from zero and we're already
* holding cgroup_mutex.
*/
if (cgroup_is_populated(cgrp))
* Make sure there's no live children. We can't test emptiness of
* ->self.children as dead children linger on it while being
* drained; otherwise, "rmdir parent/child parent" may fail.
if (css_has_online_children(&cgrp->self))
/*
* Mark @cgrp dead. This prevents further task migration and child
* creation by disabling cgroup_lock_live_group().
cgrp->self.flags &= ~CSS_ONLINE;
/* initiate massacre of all css's */
for_each_css(css, ssid, cgrp)
kill_css(css);
* Remove @cgrp directory along with the base files. @cgrp has an
* extra ref on its kn.
kernfs_remove(cgrp->kn);
percpu_ref_kill(&cgrp->self.refcnt);
struct cgroup *cgrp;
cgrp = cgroup_kn_lock_live(kn);
if (!cgrp)
return 0;
ret = cgroup_destroy_locked(cgrp);
cgroup_kn_unlock(kn);
static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
.remount_fs = cgroup_remount,
.show_options = cgroup_show_options,
.mkdir = cgroup_mkdir,
.rmdir = cgroup_rmdir,
.rename = cgroup_rename,
};
static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
{
struct cgroup_subsys_state *css;
pr_debug("Initializing cgroup subsys %s\n", ss->name);
mutex_lock(&cgroup_mutex);
idr_init(&ss->css_idr);
/* Create the root cgroup state for this subsystem */
ss->root = &cgrp_dfl_root;
css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
/*
* Root csses are never destroyed and we can't initialize
* percpu_ref during early init. Disable refcnting.
*/
css->flags |= CSS_NO_REF;
if (early) {
/* allocation can't be done safely during early init */
css->id = 1;
} else {
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
BUG_ON(css->id < 0);
}
/* Update the init_css_set to contain a subsys
* pointer to this state - since the subsystem is
* init_css_set is in the subsystem's root cgroup. */
init_css_set.subsys[ss->id] = css;
have_fork_callback |= (bool)ss->fork << ss->id;
have_exit_callback |= (bool)ss->exit << ss->id;
have_free_callback |= (bool)ss->free << ss->id;
have_canfork_callback |= (bool)ss->can_fork << ss->id;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
BUG_ON(online_css(css));
mutex_unlock(&cgroup_mutex);
}
* cgroup_init_early - cgroup initialization at system boot
*
* Initialize cgroups at system boot, and initialize any
* subsystems that request early init.
*/
int __init cgroup_init_early(void)
{
static struct cgroup_sb_opts __initdata opts;
init_cgroup_root(&cgrp_dfl_root, &opts);
cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
"invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p name:id=%d:%s\n",
i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
"cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
ss->name = cgroup_subsys_name[i];
if (!ss->legacy_name)
ss->legacy_name = cgroup_subsys_name[i];
if (ss->early_init)
cgroup_init_subsys(ss, true);
}
return 0;
}
static unsigned long cgroup_disable_mask __initdata;
* cgroup_init - cgroup initialization
*
* Register cgroup filesystem and /proc file, and initialize
* any subsystems that didn't request early init.
*/
int __init cgroup_init(void)
{
BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
/* Add init_css_set to the hash table */
key = css_set_hash(init_css_set.subsys);
hash_add(css_set_table, &init_css_set.hlist, key);
BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
if (ss->early_init) {
struct cgroup_subsys_state *css =
init_css_set.subsys[ss->id];
css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
GFP_KERNEL);
BUG_ON(css->id < 0);
} else {
cgroup_init_subsys(ss, false);
}
list_add_tail(&init_css_set.e_cset_node[ssid],
&cgrp_dfl_root.cgrp.e_csets[ssid]);
* Setting dfl_root subsys_mask needs to consider the
* disabled flag and cftype registration needs kmalloc,
* both of which aren't available during early_init.
if (cgroup_disable_mask & (1 << ssid)) {
static_branch_disable(cgroup_subsys_enabled_key[ssid]);
printk(KERN_INFO "Disabling %s control group subsystem\n",
ss->name);
continue;
if (cgroup_ssid_no_v1(ssid))
printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
ss->name);
cgrp_dfl_root.subsys_mask |= 1 << ss->id;
if (!ss->dfl_cftypes)
cgrp_dfl_root_inhibit_ss_mask |= 1 << ss->id;
if (ss->dfl_cftypes == ss->legacy_cftypes) {
WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
} else {
WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
if (ss->bind)
ss->bind(init_css_set.subsys[ssid]);
WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
WARN_ON(register_filesystem(&cgroup_fs_type));
WARN_ON(register_filesystem(&cgroup2_fs_type));
WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
static int __init cgroup_wq_init(void)
{
/*
* There isn't much point in executing destruction path in
* parallel. Good chunk is serialized with cgroup_mutex anyway.
* Use 1 for @max_active.
*
* We would prefer to do this in cgroup_init() above, but that
* is called before init_workqueues(): so leave this until after.
*/
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
BUG_ON(!cgroup_destroy_wq);
/*
* Used to destroy pidlists and separate to serve as flush domain.
* Cap @max_active to 1 too.
*/
cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
0, 1);
BUG_ON(!cgroup_pidlist_destroy_wq);
return 0;
}
core_initcall(cgroup_wq_init);
/*
* proc_cgroup_show()
* - Print task's cgroup paths into seq_file, one line for each hierarchy
* - Used for /proc/<pid>/cgroup.
*/
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk)
struct cgroup_root *root;
if (!buf)
goto out;
mutex_lock(&cgroup_mutex);
spin_lock_bh(&css_set_lock);
for_each_root(root) {
struct cgroup *cgrp;
if (root == &cgrp_dfl_root && !cgrp_dfl_root_visible)
continue;
seq_printf(m, "%d:", root->hierarchy_id);
if (root != &cgrp_dfl_root)
for_each_subsys(ss, ssid)
if (root->subsys_mask & (1 << ssid))
seq_printf(m, "%s%s", count++ ? "," : "",
if (strlen(root->name))
seq_printf(m, "%sname=%s", count ? "," : "",
root->name);
cgrp = task_cgroup_from_root(tsk, root);
/*
* On traditional hierarchies, all zombie tasks show up as
* belonging to the root cgroup. On the default hierarchy,
* while a zombie doesn't show up in "cgroup.procs" and
* thus can't be migrated, its /proc/PID/cgroup keeps
* reporting the cgroup it belonged to before exiting. If
* the cgroup is removed before the zombie is reaped,
* " (deleted)" is appended to the cgroup path.
*/
if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
path = cgroup_path(cgrp, buf, PATH_MAX);
if (!path) {
retval = -ENAMETOOLONG;
goto out_unlock;
}
} else {
path = "/";
if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
seq_puts(m, " (deleted)\n");
else
seq_putc(m, '\n');
spin_unlock_bh(&css_set_lock);
mutex_unlock(&cgroup_mutex);
kfree(buf);
out:
return retval;
}
/* Display information about each subsystem and each hierarchy */
static int proc_cgroupstats_show(struct seq_file *m, void *v)
{
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
/*
* ideally we don't want subsystems moving around while we do this.
* cgroup_mutex is also necessary to guarantee an atomic snapshot of
* subsys/hierarchy state.
*/
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->legacy_name, ss->root->hierarchy_id,
atomic_read(&ss->root->nr_cgrps),
cgroup_ssid_enabled(i));
mutex_unlock(&cgroup_mutex);
return 0;
}
static int cgroupstats_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_cgroupstats_show, NULL);
static const struct file_operations proc_cgroupstats_operations = {
.open = cgroupstats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
* cgroup_fork - initialize cgroup related fields during copy_process()
* @child: pointer to task_struct of forking parent process.
* A task is associated with the init_css_set until cgroup_post_fork()
* attaches it to the parent's css_set. Empty cg_list indicates that
* @child isn't holding reference to its css_set.
*/
void cgroup_fork(struct task_struct *child)
{
RCU_INIT_POINTER(child->cgroups, &init_css_set);
INIT_LIST_HEAD(&child->cg_list);
/**
* cgroup_can_fork - called on a new task before the process is exposed
* @child: the task in question.
*
* This calls the subsystem can_fork() callbacks. If the can_fork() callback
* returns an error, the fork aborts with that error code. This allows for
* a cgroup subsystem to conditionally allow or deny new forks.
*/
int cgroup_can_fork(struct task_struct *child)
{
struct cgroup_subsys *ss;
int i, j, ret;
for_each_subsys_which(ss, i, &have_canfork_callback) {
ret = ss->can_fork(child);
if (ret)
goto out_revert;
}
return 0;
out_revert:
for_each_subsys(ss, j) {
if (j >= i)
break;
if (ss->cancel_fork)
ss->cancel_fork(child);
}
return ret;
}
/**
* cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
* @child: the task in question
*
* This calls the cancel_fork() callbacks if a fork failed *after*
* cgroup_can_fork() succeded.
*/
void cgroup_cancel_fork(struct task_struct *child)
{
struct cgroup_subsys *ss;
int i;
for_each_subsys(ss, i)
if (ss->cancel_fork)
ss->cancel_fork(child);
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
*
Tejun Heo
committed
* Adds the task to the list running through its css_set if necessary and
* call the subsystem fork() callbacks. Has to be after the task is
* visible on the task list in case we race with the first call to
* cgroup_task_iter_start() - to guarantee that the new task ends up on its
Tejun Heo
committed
* list.
void cgroup_post_fork(struct task_struct *child)
Tejun Heo
committed
int i;
Frederic Weisbecker
committed
/*
* This may race against cgroup_enable_task_cg_lists(). As that
* function sets use_task_css_set_links before grabbing
* tasklist_lock and we just went through tasklist_lock to add
* @child, it's guaranteed that either we see the set
* use_task_css_set_links or cgroup_enable_task_cg_lists() sees
* @child during its iteration.
*
* If we won the race, @child is associated with %current's
* css_set. Grabbing css_set_lock guarantees both that the
* association is stable, and, on completion of the parent's
* migration, @child is visible in the source of migration or
* already in the destination cgroup. This guarantee is necessary
* when implementing operations which need to migrate all tasks of
* a cgroup to another.
*
* Note that if we lose to cgroup_enable_task_cg_lists(), @child
* will remain in init_css_set. This is safe because all tasks are
* in the init_css_set before cg_links is enabled and there's no
* operation which transfers all tasks out of init_css_set.
Frederic Weisbecker
committed
*/
if (use_task_css_set_links) {
struct css_set *cset;
spin_lock_bh(&css_set_lock);
cset = task_css_set(current);
if (list_empty(&child->cg_list)) {
get_css_set(cset);
css_set_move_task(child, NULL, cset, false);
spin_unlock_bh(&css_set_lock);
Tejun Heo
committed
/*
* Call ss->fork(). This must happen after @child is linked on
* css_set; otherwise, @child might change state between ->fork()
* and addition to css_set.
*/
for_each_subsys_which(ss, i, &have_fork_callback)
ss->fork(child);
Tejun Heo
committed
/**
* cgroup_exit - detach cgroup from exiting task
* @tsk: pointer to task_struct of exiting process
*
* Description: Detach cgroup from @tsk and release it.
*
* Note that cgroups marked notify_on_release force every task in
* them to take the global cgroup_mutex mutex when exiting.
* This could impact scaling on very large systems. Be reluctant to
* use notify_on_release cgroups where very high task exit scaling
* is required on large systems.
*
* We set the exiting tasks cgroup to the root cgroup (top_cgroup). We
* call cgroup_exit() while the task is still competent to handle
* notify_on_release(), then leave the task attached to the root cgroup in
* each hierarchy for the remainder of its exit. No need to bother with
* init_css_set refcnting. init_css_set never goes away and we can't race
* with migration path - PF_EXITING is visible to migration path.
void cgroup_exit(struct task_struct *tsk)
struct css_set *cset;
* Unlink from @tsk from its css_set. As migration path can't race
* with us, we can check css_set and cg_list without synchronization.
cset = task_css_set(tsk);
if (!list_empty(&tsk->cg_list)) {
spin_lock_bh(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
spin_unlock_bh(&css_set_lock);
} else {
get_css_set(cset);
/* see cgroup_post_fork() for details */
for_each_subsys_which(ss, i, &have_exit_callback)
ss->exit(tsk);
}
void cgroup_free(struct task_struct *task)
{
struct css_set *cset = task_css_set(task);
struct cgroup_subsys *ss;
int ssid;
for_each_subsys_which(ss, ssid, &have_free_callback)
ss->free(task);
put_css_set(cset);
static void check_for_release(struct cgroup *cgrp)
if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
!css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
schedule_work(&cgrp->release_agent_work);
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
}
/*
* Notify userspace when a cgroup is released, by running the
* configured release agent with the name of the cgroup (path
* relative to the root of cgroup file system) as the argument.
*
* Most likely, this user command will try to rmdir this cgroup.
*
* This races with the possibility that some other task will be
* attached to this cgroup before it is removed, or that some other
* user task will 'mkdir' a child cgroup of this cgroup. That's ok.
* The presumed 'rmdir' will fail quietly if this cgroup is no longer
* unused, and this cgroup will be reprieved from its death sentence,
* to continue to serve a useful existence. Next time it's released,
* we will get notified again, if it still has 'notify_on_release' set.
*
* The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
* means only wait until the task is successfully execve()'d. The
* separate release agent task is forked by call_usermodehelper(),
* then control in this thread returns here, without waiting for the
* release agent task. We don't bother to wait because the caller of
* this routine has no use for the exit status of the release agent
* task, so no sense holding our caller up for that.
*/
static void cgroup_release_agent(struct work_struct *work)
{
struct cgroup *cgrp =
container_of(work, struct cgroup, release_agent_work);
char *pathbuf = NULL, *agentbuf = NULL, *path;
char *argv[3], *envp[3];
mutex_lock(&cgroup_mutex);
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
if (!pathbuf || !agentbuf)
goto out;
path = cgroup_path(cgrp, pathbuf, PATH_MAX);
if (!path)
goto out;
argv[0] = agentbuf;
argv[1] = path;
argv[2] = NULL;
/* minimal command environment */
envp[0] = "HOME=/";
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[2] = NULL;
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
mutex_unlock(&cgroup_mutex);
kfree(agentbuf);
kfree(pathbuf);
static int __init cgroup_disable(char *str)
{
char *token;
while ((token = strsep(&str, ",")) != NULL) {
if (!*token)
continue;
if (strcmp(token, ss->name) &&
strcmp(token, ss->legacy_name))
continue;
cgroup_disable_mask |= 1 << i;
}
}
return 1;
}
__setup("cgroup_disable=", cgroup_disable);
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
static int __init cgroup_no_v1(char *str)
{
struct cgroup_subsys *ss;
char *token;
int i;
while ((token = strsep(&str, ",")) != NULL) {
if (!*token)
continue;
if (!strcmp(token, "all")) {
cgroup_no_v1_mask = ~0UL;
break;
}
for_each_subsys(ss, i) {
if (strcmp(token, ss->name) &&
strcmp(token, ss->legacy_name))
continue;
cgroup_no_v1_mask |= 1 << i;
}
}
return 1;
}
__setup("cgroup_no_v1=", cgroup_no_v1);
/**
* css_tryget_online_from_dir - get corresponding css from a cgroup dentry
* @dentry: directory dentry of interest
* @ss: subsystem of interest
* If @dentry is a directory for a cgroup which has @ss enabled on it, try
* to get the corresponding css and return it. If such css doesn't exist
* or can't be pinned, an ERR_PTR value is returned.
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss)
struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
struct cgroup_subsys_state *css = NULL;
/* is @dentry a cgroup dir? */
if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
kernfs_type(kn) != KERNFS_DIR)
rcu_read_lock();
/*
* This path doesn't originate from kernfs and @kn could already
* have been or be removed at any point. @kn->priv is RCU
* protected for this access. See css_release_work_fn() for details.
*/
cgrp = rcu_dereference(kn->priv);
if (cgrp)
css = cgroup_css(cgrp, ss);
if (!css || !css_tryget_online(css))
css = ERR_PTR(-ENOENT);
rcu_read_unlock();
return css;
/**
* css_from_id - lookup css by id
* @id: the cgroup id
* @ss: cgroup subsys to be looked into
*
* Returns the css if there's valid one with @id, otherwise returns NULL.
* Should be called under rcu_read_lock().
*/
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return id > 0 ? idr_find(&ss->css_idr, id) : NULL;
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
/**
* cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path
* @path: path on the default hierarchy
*
* Find the cgroup at @path on the default hierarchy, increment its
* reference count and return it. Returns pointer to the found cgroup on
* success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
* if @path points to a non-directory.
*/
struct cgroup *cgroup_get_from_path(const char *path)
{
struct kernfs_node *kn;
struct cgroup *cgrp;
mutex_lock(&cgroup_mutex);
kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv;
cgroup_get(cgrp);
} else {
cgrp = ERR_PTR(-ENOTDIR);
}
kernfs_put(kn);
} else {
cgrp = ERR_PTR(-ENOENT);
}
mutex_unlock(&cgroup_mutex);
return cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
*/
#ifdef CONFIG_SOCK_CGROUP_DATA
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
DEFINE_SPINLOCK(cgroup_sk_update_lock);
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
static bool cgroup_sk_alloc_disabled __read_mostly;
void cgroup_sk_alloc_disable(void)
{
if (cgroup_sk_alloc_disabled)
return;
pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
cgroup_sk_alloc_disabled = true;
}
#else
#define cgroup_sk_alloc_disabled false
#endif
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{
if (cgroup_sk_alloc_disabled)
return;
rcu_read_lock();
while (true) {
struct css_set *cset;
cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp;
break;
}
cpu_relax();
}
rcu_read_unlock();
}
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
cgroup_put(sock_cgroup_ptr(skcd));
}
#endif /* CONFIG_SOCK_CGROUP_DATA */
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *
debug_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
if (!css)
return ERR_PTR(-ENOMEM);
return css;
}
static void debug_css_free(struct cgroup_subsys_state *css)
{
kfree(css);
}
static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return cgroup_task_count(css->cgroup);
}
static u64 current_css_set_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return (u64)(unsigned long)current->cgroups;
}
static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
{
u64 count;
rcu_read_lock();
count = atomic_read(&task_css_set(current)->refcount);
rcu_read_unlock();
return count;
}
static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
{
struct cgrp_cset_link *link;
struct css_set *cset;
char *name_buf;
name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!name_buf)
return -ENOMEM;
spin_lock_bh(&css_set_lock);
rcu_read_lock();
cset = rcu_dereference(current->cgroups);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
cgroup_name(c, name_buf, NAME_MAX + 1);
seq_printf(seq, "Root %d group %s\n",
}
rcu_read_unlock();
spin_unlock_bh(&css_set_lock);
return 0;
}
#define MAX_TASKS_SHOWN_PER_CSS 25