Newer
Older
} else {
get_css_set(cset);
/* see cgroup_post_fork() for details */
do_each_subsys_mask(ss, i, have_exit_callback) {
} while_each_subsys_mask();
void cgroup_free(struct task_struct *task)
{
struct css_set *cset = task_css_set(task);
struct cgroup_subsys *ss;
int ssid;
do_each_subsys_mask(ss, ssid, have_free_callback) {
ss->free(task);
} while_each_subsys_mask();
put_css_set(cset);
static void check_for_release(struct cgroup *cgrp)
if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
!css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
schedule_work(&cgrp->release_agent_work);
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
}
/*
* Notify userspace when a cgroup is released, by running the
* configured release agent with the name of the cgroup (path
* relative to the root of cgroup file system) as the argument.
*
* Most likely, this user command will try to rmdir this cgroup.
*
* This races with the possibility that some other task will be
* attached to this cgroup before it is removed, or that some other
* user task will 'mkdir' a child cgroup of this cgroup. That's ok.
* The presumed 'rmdir' will fail quietly if this cgroup is no longer
* unused, and this cgroup will be reprieved from its death sentence,
* to continue to serve a useful existence. Next time it's released,
* we will get notified again, if it still has 'notify_on_release' set.
*
* The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
* means only wait until the task is successfully execve()'d. The
* separate release agent task is forked by call_usermodehelper(),
* then control in this thread returns here, without waiting for the
* release agent task. We don't bother to wait because the caller of
* this routine has no use for the exit status of the release agent
* task, so no sense holding our caller up for that.
*/
static void cgroup_release_agent(struct work_struct *work)
{
struct cgroup *cgrp =
container_of(work, struct cgroup, release_agent_work);
char *pathbuf = NULL, *agentbuf = NULL;
int ret;
mutex_lock(&cgroup_mutex);
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
if (!pathbuf || !agentbuf)
goto out;
spin_lock_irq(&css_set_lock);
ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
spin_unlock_irq(&css_set_lock);
if (ret >= PATH_MAX)
goto out;
argv[0] = agentbuf;
argv[1] = pathbuf;
argv[2] = NULL;
/* minimal command environment */
envp[0] = "HOME=/";
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
envp[2] = NULL;
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
mutex_unlock(&cgroup_mutex);
kfree(agentbuf);
kfree(pathbuf);
static int __init cgroup_disable(char *str)
{
char *token;
while ((token = strsep(&str, ",")) != NULL) {
if (!*token)
continue;
if (strcmp(token, ss->name) &&
strcmp(token, ss->legacy_name))
continue;
cgroup_disable_mask |= 1 << i;
}
}
return 1;
}
__setup("cgroup_disable=", cgroup_disable);
static int __init cgroup_no_v1(char *str)
{
struct cgroup_subsys *ss;
char *token;
int i;
while ((token = strsep(&str, ",")) != NULL) {
if (!*token)
continue;
if (!strcmp(token, "all")) {
break;
}
for_each_subsys(ss, i) {
if (strcmp(token, ss->name) &&
strcmp(token, ss->legacy_name))
continue;
cgroup_no_v1_mask |= 1 << i;
}
}
return 1;
}
__setup("cgroup_no_v1=", cgroup_no_v1);
/**
* css_tryget_online_from_dir - get corresponding css from a cgroup dentry
* @dentry: directory dentry of interest
* @ss: subsystem of interest
* If @dentry is a directory for a cgroup which has @ss enabled on it, try
* to get the corresponding css and return it. If such css doesn't exist
* or can't be pinned, an ERR_PTR value is returned.
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup_subsys *ss)
struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
struct file_system_type *s_type = dentry->d_sb->s_type;
/* is @dentry a cgroup dir? */
if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
!kn || kernfs_type(kn) != KERNFS_DIR)
rcu_read_lock();
/*
* This path doesn't originate from kernfs and @kn could already
* have been or be removed at any point. @kn->priv is RCU
* protected for this access. See css_release_work_fn() for details.
*/
cgrp = rcu_dereference(kn->priv);
if (cgrp)
css = cgroup_css(cgrp, ss);
if (!css || !css_tryget_online(css))
css = ERR_PTR(-ENOENT);
rcu_read_unlock();
return css;
/**
* css_from_id - lookup css by id
* @id: the cgroup id
* @ss: cgroup subsys to be looked into
*
* Returns the css if there's valid one with @id, otherwise returns NULL.
* Should be called under rcu_read_lock().
*/
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
{
WARN_ON_ONCE(!rcu_read_lock_held());
return idr_find(&ss->css_idr, id);
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
/**
* cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path
* @path: path on the default hierarchy
*
* Find the cgroup at @path on the default hierarchy, increment its
* reference count and return it. Returns pointer to the found cgroup on
* success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
* if @path points to a non-directory.
*/
struct cgroup *cgroup_get_from_path(const char *path)
{
struct kernfs_node *kn;
struct cgroup *cgrp;
mutex_lock(&cgroup_mutex);
kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
if (kn) {
if (kernfs_type(kn) == KERNFS_DIR) {
cgrp = kn->priv;
cgroup_get(cgrp);
} else {
cgrp = ERR_PTR(-ENOTDIR);
}
kernfs_put(kn);
} else {
cgrp = ERR_PTR(-ENOENT);
}
mutex_unlock(&cgroup_mutex);
return cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_get_from_path);
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
/**
* cgroup_get_from_fd - get a cgroup pointer from a fd
* @fd: fd obtained by open(cgroup2_dir)
*
* Find the cgroup from a fd which should be obtained
* by opening a cgroup directory. Returns a pointer to the
* cgroup on success. ERR_PTR is returned if the cgroup
* cannot be found.
*/
struct cgroup *cgroup_get_from_fd(int fd)
{
struct cgroup_subsys_state *css;
struct cgroup *cgrp;
struct file *f;
f = fget_raw(fd);
if (!f)
return ERR_PTR(-EBADF);
css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
fput(f);
if (IS_ERR(css))
return ERR_CAST(css);
cgrp = css->cgroup;
if (!cgroup_on_dfl(cgrp)) {
cgroup_put(cgrp);
return ERR_PTR(-EBADF);
}
return cgrp;
}
EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
/*
* sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
* definition in cgroup-defs.h.
*/
#ifdef CONFIG_SOCK_CGROUP_DATA
#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
DEFINE_SPINLOCK(cgroup_sk_update_lock);
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
static bool cgroup_sk_alloc_disabled __read_mostly;
void cgroup_sk_alloc_disable(void)
{
if (cgroup_sk_alloc_disabled)
return;
pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
cgroup_sk_alloc_disabled = true;
}
#else
#define cgroup_sk_alloc_disabled false
#endif
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{
if (cgroup_sk_alloc_disabled)
return;
rcu_read_lock();
while (true) {
struct css_set *cset;
cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->val = (unsigned long)cset->dfl_cgrp;
break;
}
cpu_relax();
}
rcu_read_unlock();
}
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
cgroup_put(sock_cgroup_ptr(skcd));
}
#endif /* CONFIG_SOCK_CGROUP_DATA */
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
/* cgroup namespaces */
static struct cgroup_namespace *alloc_cgroup_ns(void)
{
struct cgroup_namespace *new_ns;
int ret;
new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
if (!new_ns)
return ERR_PTR(-ENOMEM);
ret = ns_alloc_inum(&new_ns->ns);
if (ret) {
kfree(new_ns);
return ERR_PTR(ret);
}
atomic_set(&new_ns->count, 1);
new_ns->ns.ops = &cgroupns_operations;
return new_ns;
}
void free_cgroup_ns(struct cgroup_namespace *ns)
{
put_css_set(ns->root_cset);
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
kfree(ns);
}
EXPORT_SYMBOL(free_cgroup_ns);
struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
struct user_namespace *user_ns,
struct cgroup_namespace *old_ns)
{
struct cgroup_namespace *new_ns;
struct css_set *cset;
BUG_ON(!old_ns);
if (!(flags & CLONE_NEWCGROUP)) {
get_cgroup_ns(old_ns);
return old_ns;
}
/* Allow only sysadmin to create cgroup namespace. */
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
/* It is not safe to take cgroup_mutex here */
spin_lock_irq(&css_set_lock);
cset = task_css_set(current);
get_css_set(cset);
spin_unlock_irq(&css_set_lock);
if (IS_ERR(new_ns)) {
put_css_set(cset);
return new_ns;
new_ns->user_ns = get_user_ns(user_ns);
new_ns->root_cset = cset;
return new_ns;
}
static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
{
return container_of(ns, struct cgroup_namespace, ns);
}
static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
!ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
/* Don't need to do anything if we are attaching to our own cgroupns. */
if (cgroup_ns == nsproxy->cgroup_ns)
return 0;
get_cgroup_ns(cgroup_ns);
put_cgroup_ns(nsproxy->cgroup_ns);
nsproxy->cgroup_ns = cgroup_ns;
return 0;
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
}
static struct ns_common *cgroupns_get(struct task_struct *task)
{
struct cgroup_namespace *ns = NULL;
struct nsproxy *nsproxy;
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy) {
ns = nsproxy->cgroup_ns;
get_cgroup_ns(ns);
}
task_unlock(task);
return ns ? &ns->ns : NULL;
}
static void cgroupns_put(struct ns_common *ns)
{
put_cgroup_ns(to_cg_ns(ns));
}
const struct proc_ns_operations cgroupns_operations = {
.name = "cgroup",
.type = CLONE_NEWCGROUP,
.get = cgroupns_get,
.put = cgroupns_put,
.install = cgroupns_install,
};
static __init int cgroup_namespaces_init(void)
{
return 0;
}
subsys_initcall(cgroup_namespaces_init);
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *
debug_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
if (!css)
return ERR_PTR(-ENOMEM);
return css;
}
static void debug_css_free(struct cgroup_subsys_state *css)
{
kfree(css);
}
static u64 debug_taskcount_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return cgroup_task_count(css->cgroup);
}
static u64 current_css_set_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return (u64)(unsigned long)current->cgroups;
}
static u64 current_css_set_refcount_read(struct cgroup_subsys_state *css,
{
u64 count;
rcu_read_lock();
count = atomic_read(&task_css_set(current)->refcount);
rcu_read_unlock();
return count;
}
static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
{
struct cgrp_cset_link *link;
struct css_set *cset;
char *name_buf;
name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!name_buf)
return -ENOMEM;
spin_lock_irq(&css_set_lock);
rcu_read_lock();
cset = rcu_dereference(current->cgroups);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
cgroup_name(c, name_buf, NAME_MAX + 1);
seq_printf(seq, "Root %d group %s\n",
}
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
return 0;
}
#define MAX_TASKS_SHOWN_PER_CSS 25
static int cgroup_css_links_read(struct seq_file *seq, void *v)
{
struct cgroup_subsys_state *css = seq_css(seq);
struct cgrp_cset_link *link;
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
struct css_set *cset = link->cset;
struct task_struct *task;
int count = 0;
seq_printf(seq, "css_set %p\n", cset);
list_for_each_entry(task, &cset->tasks, cg_list) {
if (count++ > MAX_TASKS_SHOWN_PER_CSS)
goto overflow;
seq_printf(seq, " task %d\n", task_pid_vnr(task));
}
list_for_each_entry(task, &cset->mg_tasks, cg_list) {
if (count++ > MAX_TASKS_SHOWN_PER_CSS)
goto overflow;
seq_printf(seq, " task %d\n", task_pid_vnr(task));
}
continue;
overflow:
seq_puts(seq, " ...\n");
}
spin_unlock_irq(&css_set_lock);
return 0;
}
static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
return (!cgroup_is_populated(css->cgroup) &&
!css_has_online_children(&css->cgroup->self));
}
static struct cftype debug_files[] = {
{
.name = "taskcount",
.read_u64 = debug_taskcount_read,
},
{
.name = "current_css_set",
.read_u64 = current_css_set_read,
},
{
.name = "current_css_set_refcount",
.read_u64 = current_css_set_refcount_read,
},
{
.name = "current_css_set_cg_links",
.seq_show = current_css_set_cg_links_read,
},
{
.name = "cgroup_css_links",
.seq_show = cgroup_css_links_read,
},
{
.name = "releasable",
.read_u64 = releasable_read,
},
{ } /* terminate */
};
struct cgroup_subsys debug_cgrp_subsys = {
Tejun Heo
committed
.css_alloc = debug_css_alloc,
.css_free = debug_css_free,
.legacy_cftypes = debug_files,
};
#endif /* CONFIG_CGROUP_DEBUG */