Merge branch 'for-4.6-ns' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup namespace support from Tejun Heo: "These are changes to implement namespace support for cgroup which has been pending for quite some time now. It is very straight-forward and only affects what part of cgroup hierarchies are visible. After unsharing, mounting a cgroup fs will be scoped to the cgroups the task belonged to at the time of unsharing and the cgroup paths exposed to userland would be adjusted accordingly" * 'for-4.6-ns' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: fix and restructure error handling in copy_cgroup_ns() cgroup: fix alloc_cgroup_ns() error handling in copy_cgroup_ns() Add FS_USERNS_FLAG to cgroup fs cgroup: Add documentation for cgroup namespaces cgroup: mount cgroupns-root when inside non-init cgroupns kernfs: define kernfs_node_dentry cgroup: cgroup namespace setns support cgroup: introduce cgroup namespaces sched: new clone flag CLONE_NEWCGROUP for cgroup namespace kernfs: Add API to generate relative kernfs path
This commit is contained in:
@@ -47,6 +47,11 @@ CONTENTS
|
|||||||
5-3. IO
|
5-3. IO
|
||||||
5-3-1. IO Interface Files
|
5-3-1. IO Interface Files
|
||||||
5-3-2. Writeback
|
5-3-2. Writeback
|
||||||
|
6. Namespace
|
||||||
|
6-1. Basics
|
||||||
|
6-2. The Root and Views
|
||||||
|
6-3. Migration and setns(2)
|
||||||
|
6-4. Interaction with Other Namespaces
|
||||||
P. Information on Kernel Programming
|
P. Information on Kernel Programming
|
||||||
P-1. Filesystem Support for Writeback
|
P-1. Filesystem Support for Writeback
|
||||||
D. Deprecated v1 Core Features
|
D. Deprecated v1 Core Features
|
||||||
@@ -1114,6 +1119,148 @@ writeback as follows.
|
|||||||
vm.dirty[_background]_ratio.
|
vm.dirty[_background]_ratio.
|
||||||
|
|
||||||
|
|
||||||
|
6. Namespace
|
||||||
|
|
||||||
|
6-1. Basics
|
||||||
|
|
||||||
|
cgroup namespace provides a mechanism to virtualize the view of the
|
||||||
|
"/proc/$PID/cgroup" file and cgroup mounts. The CLONE_NEWCGROUP clone
|
||||||
|
flag can be used with clone(2) and unshare(2) to create a new cgroup
|
||||||
|
namespace. The process running inside the cgroup namespace will have
|
||||||
|
its "/proc/$PID/cgroup" output restricted to cgroupns root. The
|
||||||
|
cgroupns root is the cgroup of the process at the time of creation of
|
||||||
|
the cgroup namespace.
|
||||||
|
|
||||||
|
Without cgroup namespace, the "/proc/$PID/cgroup" file shows the
|
||||||
|
complete path of the cgroup of a process. In a container setup where
|
||||||
|
a set of cgroups and namespaces are intended to isolate processes the
|
||||||
|
"/proc/$PID/cgroup" file may leak potential system level information
|
||||||
|
to the isolated processes. For Example:
|
||||||
|
|
||||||
|
# cat /proc/self/cgroup
|
||||||
|
0::/batchjobs/container_id1
|
||||||
|
|
||||||
|
The path '/batchjobs/container_id1' can be considered as system-data
|
||||||
|
and undesirable to expose to the isolated processes. cgroup namespace
|
||||||
|
can be used to restrict visibility of this path. For example, before
|
||||||
|
creating a cgroup namespace, one would see:
|
||||||
|
|
||||||
|
# ls -l /proc/self/ns/cgroup
|
||||||
|
lrwxrwxrwx 1 root root 0 2014-07-15 10:37 /proc/self/ns/cgroup -> cgroup:[4026531835]
|
||||||
|
# cat /proc/self/cgroup
|
||||||
|
0::/batchjobs/container_id1
|
||||||
|
|
||||||
|
After unsharing a new namespace, the view changes.
|
||||||
|
|
||||||
|
# ls -l /proc/self/ns/cgroup
|
||||||
|
lrwxrwxrwx 1 root root 0 2014-07-15 10:35 /proc/self/ns/cgroup -> cgroup:[4026532183]
|
||||||
|
# cat /proc/self/cgroup
|
||||||
|
0::/
|
||||||
|
|
||||||
|
When some thread from a multi-threaded process unshares its cgroup
|
||||||
|
namespace, the new cgroupns gets applied to the entire process (all
|
||||||
|
the threads). This is natural for the v2 hierarchy; however, for the
|
||||||
|
legacy hierarchies, this may be unexpected.
|
||||||
|
|
||||||
|
A cgroup namespace is alive as long as there are processes inside or
|
||||||
|
mounts pinning it. When the last usage goes away, the cgroup
|
||||||
|
namespace is destroyed. The cgroupns root and the actual cgroups
|
||||||
|
remain.
|
||||||
|
|
||||||
|
|
||||||
|
6-2. The Root and Views
|
||||||
|
|
||||||
|
The 'cgroupns root' for a cgroup namespace is the cgroup in which the
|
||||||
|
process calling unshare(2) is running. For example, if a process in
|
||||||
|
/batchjobs/container_id1 cgroup calls unshare, cgroup
|
||||||
|
/batchjobs/container_id1 becomes the cgroupns root. For the
|
||||||
|
init_cgroup_ns, this is the real root ('/') cgroup.
|
||||||
|
|
||||||
|
The cgroupns root cgroup does not change even if the namespace creator
|
||||||
|
process later moves to a different cgroup.
|
||||||
|
|
||||||
|
# ~/unshare -c # unshare cgroupns in some cgroup
|
||||||
|
# cat /proc/self/cgroup
|
||||||
|
0::/
|
||||||
|
# mkdir sub_cgrp_1
|
||||||
|
# echo 0 > sub_cgrp_1/cgroup.procs
|
||||||
|
# cat /proc/self/cgroup
|
||||||
|
0::/sub_cgrp_1
|
||||||
|
|
||||||
|
Each process gets its namespace-specific view of "/proc/$PID/cgroup"
|
||||||
|
|
||||||
|
Processes running inside the cgroup namespace will be able to see
|
||||||
|
cgroup paths (in /proc/self/cgroup) only inside their root cgroup.
|
||||||
|
From within an unshared cgroupns:
|
||||||
|
|
||||||
|
# sleep 100000 &
|
||||||
|
[1] 7353
|
||||||
|
# echo 7353 > sub_cgrp_1/cgroup.procs
|
||||||
|
# cat /proc/7353/cgroup
|
||||||
|
0::/sub_cgrp_1
|
||||||
|
|
||||||
|
From the initial cgroup namespace, the real cgroup path will be
|
||||||
|
visible:
|
||||||
|
|
||||||
|
$ cat /proc/7353/cgroup
|
||||||
|
0::/batchjobs/container_id1/sub_cgrp_1
|
||||||
|
|
||||||
|
From a sibling cgroup namespace (that is, a namespace rooted at a
|
||||||
|
different cgroup), the cgroup path relative to its own cgroup
|
||||||
|
namespace root will be shown. For instance, if PID 7353's cgroup
|
||||||
|
namespace root is at '/batchjobs/container_id2', then it will see
|
||||||
|
|
||||||
|
# cat /proc/7353/cgroup
|
||||||
|
0::/../container_id2/sub_cgrp_1
|
||||||
|
|
||||||
|
Note that the relative path always starts with '/' to indicate that
|
||||||
|
its relative to the cgroup namespace root of the caller.
|
||||||
|
|
||||||
|
|
||||||
|
6-3. Migration and setns(2)
|
||||||
|
|
||||||
|
Processes inside a cgroup namespace can move into and out of the
|
||||||
|
namespace root if they have proper access to external cgroups. For
|
||||||
|
example, from inside a namespace with cgroupns root at
|
||||||
|
/batchjobs/container_id1, and assuming that the global hierarchy is
|
||||||
|
still accessible inside cgroupns:
|
||||||
|
|
||||||
|
# cat /proc/7353/cgroup
|
||||||
|
0::/sub_cgrp_1
|
||||||
|
# echo 7353 > batchjobs/container_id2/cgroup.procs
|
||||||
|
# cat /proc/7353/cgroup
|
||||||
|
0::/../container_id2
|
||||||
|
|
||||||
|
Note that this kind of setup is not encouraged. A task inside cgroup
|
||||||
|
namespace should only be exposed to its own cgroupns hierarchy.
|
||||||
|
|
||||||
|
setns(2) to another cgroup namespace is allowed when:
|
||||||
|
|
||||||
|
(a) the process has CAP_SYS_ADMIN against its current user namespace
|
||||||
|
(b) the process has CAP_SYS_ADMIN against the target cgroup
|
||||||
|
namespace's userns
|
||||||
|
|
||||||
|
No implicit cgroup changes happen with attaching to another cgroup
|
||||||
|
namespace. It is expected that the someone moves the attaching
|
||||||
|
process under the target cgroup namespace root.
|
||||||
|
|
||||||
|
|
||||||
|
6-4. Interaction with Other Namespaces
|
||||||
|
|
||||||
|
Namespace specific cgroup hierarchy can be mounted by a process
|
||||||
|
running inside a non-init cgroup namespace.
|
||||||
|
|
||||||
|
# mount -t cgroup2 none $MOUNT_POINT
|
||||||
|
|
||||||
|
This will mount the unified cgroup hierarchy with cgroupns root as the
|
||||||
|
filesystem root. The process needs CAP_SYS_ADMIN against its user and
|
||||||
|
mount namespaces.
|
||||||
|
|
||||||
|
The virtualization of /proc/self/cgroup file combined with restricting
|
||||||
|
the view of cgroup hierarchy by namespace-private cgroupfs mount
|
||||||
|
provides a properly isolated cgroup view inside the container.
|
||||||
|
|
||||||
|
|
||||||
P. Information on Kernel Programming
|
P. Information on Kernel Programming
|
||||||
|
|
||||||
This section contains kernel programming information in the areas
|
This section contains kernel programming information in the areas
|
||||||
|
|||||||
189
fs/kernfs/dir.c
189
fs/kernfs/dir.c
@@ -44,28 +44,122 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
|
|||||||
return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
|
return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
|
||||||
}
|
}
|
||||||
|
|
||||||
static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
|
/* kernfs_node_depth - compute depth from @from to @to */
|
||||||
size_t buflen)
|
static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
|
||||||
{
|
{
|
||||||
char *p = buf + buflen;
|
size_t depth = 0;
|
||||||
int len;
|
|
||||||
|
|
||||||
*--p = '\0';
|
while (to->parent && to != from) {
|
||||||
|
depth++;
|
||||||
do {
|
to = to->parent;
|
||||||
len = strlen(kn->name);
|
|
||||||
if (p - buf < len + 1) {
|
|
||||||
buf[0] = '\0';
|
|
||||||
p = NULL;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
p -= len;
|
return depth;
|
||||||
memcpy(p, kn->name, len);
|
}
|
||||||
*--p = '/';
|
|
||||||
kn = kn->parent;
|
|
||||||
} while (kn && kn->parent);
|
|
||||||
|
|
||||||
return p;
|
static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
|
||||||
|
struct kernfs_node *b)
|
||||||
|
{
|
||||||
|
size_t da, db;
|
||||||
|
struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
|
||||||
|
|
||||||
|
if (ra != rb)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
da = kernfs_depth(ra->kn, a);
|
||||||
|
db = kernfs_depth(rb->kn, b);
|
||||||
|
|
||||||
|
while (da > db) {
|
||||||
|
a = a->parent;
|
||||||
|
da--;
|
||||||
|
}
|
||||||
|
while (db > da) {
|
||||||
|
b = b->parent;
|
||||||
|
db--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* worst case b and a will be the same at root */
|
||||||
|
while (b != a) {
|
||||||
|
b = b->parent;
|
||||||
|
a = a->parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
|
||||||
|
* where kn_from is treated as root of the path.
|
||||||
|
* @kn_from: kernfs node which should be treated as root for the path
|
||||||
|
* @kn_to: kernfs node to which path is needed
|
||||||
|
* @buf: buffer to copy the path into
|
||||||
|
* @buflen: size of @buf
|
||||||
|
*
|
||||||
|
* We need to handle couple of scenarios here:
|
||||||
|
* [1] when @kn_from is an ancestor of @kn_to at some level
|
||||||
|
* kn_from: /n1/n2/n3
|
||||||
|
* kn_to: /n1/n2/n3/n4/n5
|
||||||
|
* result: /n4/n5
|
||||||
|
*
|
||||||
|
* [2] when @kn_from is on a different hierarchy and we need to find common
|
||||||
|
* ancestor between @kn_from and @kn_to.
|
||||||
|
* kn_from: /n1/n2/n3/n4
|
||||||
|
* kn_to: /n1/n2/n5
|
||||||
|
* result: /../../n5
|
||||||
|
* OR
|
||||||
|
* kn_from: /n1/n2/n3/n4/n5 [depth=5]
|
||||||
|
* kn_to: /n1/n2/n3 [depth=3]
|
||||||
|
* result: /../..
|
||||||
|
*
|
||||||
|
* return value: length of the string. If greater than buflen,
|
||||||
|
* then contents of buf are undefined. On error, -1 is returned.
|
||||||
|
*/
|
||||||
|
static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
|
||||||
|
struct kernfs_node *kn_from,
|
||||||
|
char *buf, size_t buflen)
|
||||||
|
{
|
||||||
|
struct kernfs_node *kn, *common;
|
||||||
|
const char parent_str[] = "/..";
|
||||||
|
size_t depth_from, depth_to, len = 0, nlen = 0;
|
||||||
|
char *p;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!kn_from)
|
||||||
|
kn_from = kernfs_root(kn_to)->kn;
|
||||||
|
|
||||||
|
if (kn_from == kn_to)
|
||||||
|
return strlcpy(buf, "/", buflen);
|
||||||
|
|
||||||
|
common = kernfs_common_ancestor(kn_from, kn_to);
|
||||||
|
if (WARN_ON(!common))
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
depth_to = kernfs_depth(common, kn_to);
|
||||||
|
depth_from = kernfs_depth(common, kn_from);
|
||||||
|
|
||||||
|
if (buf)
|
||||||
|
buf[0] = '\0';
|
||||||
|
|
||||||
|
for (i = 0; i < depth_from; i++)
|
||||||
|
len += strlcpy(buf + len, parent_str,
|
||||||
|
len < buflen ? buflen - len : 0);
|
||||||
|
|
||||||
|
/* Calculate how many bytes we need for the rest */
|
||||||
|
for (kn = kn_to; kn != common; kn = kn->parent)
|
||||||
|
nlen += strlen(kn->name) + 1;
|
||||||
|
|
||||||
|
if (len + nlen >= buflen)
|
||||||
|
return len + nlen;
|
||||||
|
|
||||||
|
p = buf + len + nlen;
|
||||||
|
*p = '\0';
|
||||||
|
for (kn = kn_to; kn != common; kn = kn->parent) {
|
||||||
|
nlen = strlen(kn->name);
|
||||||
|
p -= nlen;
|
||||||
|
memcpy(p, kn->name, nlen);
|
||||||
|
*(--p) = '/';
|
||||||
|
}
|
||||||
|
|
||||||
|
return len + nlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -114,6 +208,34 @@ size_t kernfs_path_len(struct kernfs_node *kn)
|
|||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kernfs_path_from_node - build path of node @to relative to @from.
|
||||||
|
* @from: parent kernfs_node relative to which we need to build the path
|
||||||
|
* @to: kernfs_node of interest
|
||||||
|
* @buf: buffer to copy @to's path into
|
||||||
|
* @buflen: size of @buf
|
||||||
|
*
|
||||||
|
* Builds @to's path relative to @from in @buf. @from and @to must
|
||||||
|
* be on the same kernfs-root. If @from is not parent of @to, then a relative
|
||||||
|
* path (which includes '..'s) as needed to reach from @from to @to is
|
||||||
|
* returned.
|
||||||
|
*
|
||||||
|
* If @buf isn't long enough, the return value will be greater than @buflen
|
||||||
|
* and @buf contents are undefined.
|
||||||
|
*/
|
||||||
|
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
|
||||||
|
char *buf, size_t buflen)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&kernfs_rename_lock, flags);
|
||||||
|
ret = kernfs_path_from_node_locked(to, from, buf, buflen);
|
||||||
|
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kernfs_path_from_node);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* kernfs_path - build full path of a given node
|
* kernfs_path - build full path of a given node
|
||||||
* @kn: kernfs_node of interest
|
* @kn: kernfs_node of interest
|
||||||
@@ -127,13 +249,12 @@ size_t kernfs_path_len(struct kernfs_node *kn)
|
|||||||
*/
|
*/
|
||||||
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
|
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
int ret;
|
||||||
char *p;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&kernfs_rename_lock, flags);
|
ret = kernfs_path_from_node(kn, NULL, buf, buflen);
|
||||||
p = kernfs_path_locked(kn, buf, buflen);
|
if (ret < 0 || ret >= buflen)
|
||||||
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
|
return NULL;
|
||||||
return p;
|
return buf;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kernfs_path);
|
EXPORT_SYMBOL_GPL(kernfs_path);
|
||||||
|
|
||||||
@@ -164,17 +285,25 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
|
|||||||
void pr_cont_kernfs_path(struct kernfs_node *kn)
|
void pr_cont_kernfs_path(struct kernfs_node *kn)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
char *p;
|
int sz;
|
||||||
|
|
||||||
spin_lock_irqsave(&kernfs_rename_lock, flags);
|
spin_lock_irqsave(&kernfs_rename_lock, flags);
|
||||||
|
|
||||||
p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
|
sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
|
||||||
sizeof(kernfs_pr_cont_buf));
|
sizeof(kernfs_pr_cont_buf));
|
||||||
if (p)
|
if (sz < 0) {
|
||||||
pr_cont("%s", p);
|
pr_cont("(error)");
|
||||||
else
|
goto out;
|
||||||
pr_cont("<name too long>");
|
}
|
||||||
|
|
||||||
|
if (sz >= sizeof(kernfs_pr_cont_buf)) {
|
||||||
|
pr_cont("(name too long)");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
pr_cont("%s", kernfs_pr_cont_buf);
|
||||||
|
|
||||||
|
out:
|
||||||
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
|
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#include <linux/magic.h>
|
#include <linux/magic.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
|
#include <linux/namei.h>
|
||||||
|
|
||||||
#include "kernfs-internal.h"
|
#include "kernfs-internal.h"
|
||||||
|
|
||||||
@@ -62,6 +63,74 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* find the next ancestor in the path down to @child, where @parent was the
|
||||||
|
* ancestor whose descendant we want to find.
|
||||||
|
*
|
||||||
|
* Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root
|
||||||
|
* node. If @parent is b, then we return the node for c.
|
||||||
|
* Passing in d as @parent is not ok.
|
||||||
|
*/
|
||||||
|
static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
|
||||||
|
struct kernfs_node *parent)
|
||||||
|
{
|
||||||
|
if (child == parent) {
|
||||||
|
pr_crit_once("BUG in find_next_ancestor: called with parent == child");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (child->parent != parent) {
|
||||||
|
if (!child->parent)
|
||||||
|
return NULL;
|
||||||
|
child = child->parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kernfs_node_dentry - get a dentry for the given kernfs_node
|
||||||
|
* @kn: kernfs_node for which a dentry is needed
|
||||||
|
* @sb: the kernfs super_block
|
||||||
|
*/
|
||||||
|
struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
|
||||||
|
struct super_block *sb)
|
||||||
|
{
|
||||||
|
struct dentry *dentry;
|
||||||
|
struct kernfs_node *knparent = NULL;
|
||||||
|
|
||||||
|
BUG_ON(sb->s_op != &kernfs_sops);
|
||||||
|
|
||||||
|
dentry = dget(sb->s_root);
|
||||||
|
|
||||||
|
/* Check if this is the root kernfs_node */
|
||||||
|
if (!kn->parent)
|
||||||
|
return dentry;
|
||||||
|
|
||||||
|
knparent = find_next_ancestor(kn, NULL);
|
||||||
|
if (WARN_ON(!knparent))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
|
do {
|
||||||
|
struct dentry *dtmp;
|
||||||
|
struct kernfs_node *kntmp;
|
||||||
|
|
||||||
|
if (kn == knparent)
|
||||||
|
return dentry;
|
||||||
|
kntmp = find_next_ancestor(kn, knparent);
|
||||||
|
if (WARN_ON(!kntmp))
|
||||||
|
return ERR_PTR(-EINVAL);
|
||||||
|
mutex_lock(&d_inode(dentry)->i_mutex);
|
||||||
|
dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
|
||||||
|
mutex_unlock(&d_inode(dentry)->i_mutex);
|
||||||
|
dput(dentry);
|
||||||
|
if (IS_ERR(dtmp))
|
||||||
|
return dtmp;
|
||||||
|
knparent = kntmp;
|
||||||
|
dentry = dtmp;
|
||||||
|
} while (true);
|
||||||
|
}
|
||||||
|
|
||||||
static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
|
static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
|
||||||
{
|
{
|
||||||
struct kernfs_super_info *info = kernfs_info(sb);
|
struct kernfs_super_info *info = kernfs_info(sb);
|
||||||
|
|||||||
@@ -28,6 +28,9 @@ static const struct proc_ns_operations *ns_entries[] = {
|
|||||||
&userns_operations,
|
&userns_operations,
|
||||||
#endif
|
#endif
|
||||||
&mntns_operations,
|
&mntns_operations,
|
||||||
|
#ifdef CONFIG_CGROUPS
|
||||||
|
&cgroupns_operations,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *proc_ns_get_link(struct dentry *dentry,
|
static const char *proc_ns_get_link(struct dentry *dentry,
|
||||||
|
|||||||
@@ -17,6 +17,11 @@
|
|||||||
#include <linux/seq_file.h>
|
#include <linux/seq_file.h>
|
||||||
#include <linux/kernfs.h>
|
#include <linux/kernfs.h>
|
||||||
#include <linux/jump_label.h>
|
#include <linux/jump_label.h>
|
||||||
|
#include <linux/nsproxy.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/ns_common.h>
|
||||||
|
#include <linux/nsproxy.h>
|
||||||
|
#include <linux/user_namespace.h>
|
||||||
|
|
||||||
#include <linux/cgroup-defs.h>
|
#include <linux/cgroup-defs.h>
|
||||||
|
|
||||||
@@ -611,4 +616,48 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
|
|||||||
|
|
||||||
#endif /* CONFIG_CGROUP_DATA */
|
#endif /* CONFIG_CGROUP_DATA */
|
||||||
|
|
||||||
|
struct cgroup_namespace {
|
||||||
|
atomic_t count;
|
||||||
|
struct ns_common ns;
|
||||||
|
struct user_namespace *user_ns;
|
||||||
|
struct css_set *root_cset;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern struct cgroup_namespace init_cgroup_ns;
|
||||||
|
|
||||||
|
#ifdef CONFIG_CGROUPS
|
||||||
|
|
||||||
|
void free_cgroup_ns(struct cgroup_namespace *ns);
|
||||||
|
|
||||||
|
struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
|
||||||
|
struct user_namespace *user_ns,
|
||||||
|
struct cgroup_namespace *old_ns);
|
||||||
|
|
||||||
|
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||||
|
struct cgroup_namespace *ns);
|
||||||
|
|
||||||
|
#else /* !CONFIG_CGROUPS */
|
||||||
|
|
||||||
|
static inline void free_cgroup_ns(struct cgroup_namespace *ns) { }
|
||||||
|
static inline struct cgroup_namespace *
|
||||||
|
copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
|
||||||
|
struct cgroup_namespace *old_ns)
|
||||||
|
{
|
||||||
|
return old_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* !CONFIG_CGROUPS */
|
||||||
|
|
||||||
|
static inline void get_cgroup_ns(struct cgroup_namespace *ns)
|
||||||
|
{
|
||||||
|
if (ns)
|
||||||
|
atomic_inc(&ns->count);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void put_cgroup_ns(struct cgroup_namespace *ns)
|
||||||
|
{
|
||||||
|
if (ns && atomic_dec_and_test(&ns->count))
|
||||||
|
free_cgroup_ns(ns);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* _LINUX_CGROUP_H */
|
#endif /* _LINUX_CGROUP_H */
|
||||||
|
|||||||
@@ -267,8 +267,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
|
|||||||
|
|
||||||
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
|
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
|
||||||
size_t kernfs_path_len(struct kernfs_node *kn);
|
size_t kernfs_path_len(struct kernfs_node *kn);
|
||||||
char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
|
int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
|
||||||
size_t buflen);
|
char *buf, size_t buflen);
|
||||||
|
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen);
|
||||||
void pr_cont_kernfs_name(struct kernfs_node *kn);
|
void pr_cont_kernfs_name(struct kernfs_node *kn);
|
||||||
void pr_cont_kernfs_path(struct kernfs_node *kn);
|
void pr_cont_kernfs_path(struct kernfs_node *kn);
|
||||||
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
|
struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
|
||||||
@@ -283,6 +284,8 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
|
|||||||
struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
|
struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
|
||||||
struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
|
struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
|
||||||
|
|
||||||
|
struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
|
||||||
|
struct super_block *sb);
|
||||||
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
|
struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
|
||||||
unsigned int flags, void *priv);
|
unsigned int flags, void *priv);
|
||||||
void kernfs_destroy_root(struct kernfs_root *root);
|
void kernfs_destroy_root(struct kernfs_root *root);
|
||||||
@@ -338,7 +341,7 @@ static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
|
|||||||
static inline size_t kernfs_path_len(struct kernfs_node *kn)
|
static inline size_t kernfs_path_len(struct kernfs_node *kn)
|
||||||
{ return 0; }
|
{ return 0; }
|
||||||
|
|
||||||
static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
|
static inline char *kernfs_path(struct kernfs_node *kn, char *buf,
|
||||||
size_t buflen)
|
size_t buflen)
|
||||||
{ return NULL; }
|
{ return NULL; }
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ struct mnt_namespace;
|
|||||||
struct uts_namespace;
|
struct uts_namespace;
|
||||||
struct ipc_namespace;
|
struct ipc_namespace;
|
||||||
struct pid_namespace;
|
struct pid_namespace;
|
||||||
|
struct cgroup_namespace;
|
||||||
struct fs_struct;
|
struct fs_struct;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -33,6 +34,7 @@ struct nsproxy {
|
|||||||
struct mnt_namespace *mnt_ns;
|
struct mnt_namespace *mnt_ns;
|
||||||
struct pid_namespace *pid_ns_for_children;
|
struct pid_namespace *pid_ns_for_children;
|
||||||
struct net *net_ns;
|
struct net *net_ns;
|
||||||
|
struct cgroup_namespace *cgroup_ns;
|
||||||
};
|
};
|
||||||
extern struct nsproxy init_nsproxy;
|
extern struct nsproxy init_nsproxy;
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,8 @@
|
|||||||
struct pid_namespace;
|
struct pid_namespace;
|
||||||
struct nsproxy;
|
struct nsproxy;
|
||||||
struct path;
|
struct path;
|
||||||
|
struct task_struct;
|
||||||
|
struct inode;
|
||||||
|
|
||||||
struct proc_ns_operations {
|
struct proc_ns_operations {
|
||||||
const char *name;
|
const char *name;
|
||||||
@@ -24,6 +26,7 @@ extern const struct proc_ns_operations ipcns_operations;
|
|||||||
extern const struct proc_ns_operations pidns_operations;
|
extern const struct proc_ns_operations pidns_operations;
|
||||||
extern const struct proc_ns_operations userns_operations;
|
extern const struct proc_ns_operations userns_operations;
|
||||||
extern const struct proc_ns_operations mntns_operations;
|
extern const struct proc_ns_operations mntns_operations;
|
||||||
|
extern const struct proc_ns_operations cgroupns_operations;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We always define these enumerators
|
* We always define these enumerators
|
||||||
@@ -34,6 +37,7 @@ enum {
|
|||||||
PROC_UTS_INIT_INO = 0xEFFFFFFEU,
|
PROC_UTS_INIT_INO = 0xEFFFFFFEU,
|
||||||
PROC_USER_INIT_INO = 0xEFFFFFFDU,
|
PROC_USER_INIT_INO = 0xEFFFFFFDU,
|
||||||
PROC_PID_INIT_INO = 0xEFFFFFFCU,
|
PROC_PID_INIT_INO = 0xEFFFFFFCU,
|
||||||
|
PROC_CGROUP_INIT_INO = 0xEFFFFFFBU,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_PROC_FS
|
#ifdef CONFIG_PROC_FS
|
||||||
|
|||||||
@@ -21,8 +21,7 @@
|
|||||||
#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
|
#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
|
||||||
#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
|
#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
|
||||||
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
|
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
|
||||||
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
|
#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
|
||||||
and is now available for re-use. */
|
|
||||||
#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
|
#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
|
||||||
#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
|
#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
|
||||||
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
|
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
|
||||||
|
|||||||
229
kernel/cgroup.c
229
kernel/cgroup.c
@@ -59,6 +59,9 @@
|
|||||||
#include <linux/delay.h>
|
#include <linux/delay.h>
|
||||||
#include <linux/atomic.h>
|
#include <linux/atomic.h>
|
||||||
#include <linux/cpuset.h>
|
#include <linux/cpuset.h>
|
||||||
|
#include <linux/proc_ns.h>
|
||||||
|
#include <linux/nsproxy.h>
|
||||||
|
#include <linux/proc_ns.h>
|
||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -215,6 +218,15 @@ static u16 have_fork_callback __read_mostly;
|
|||||||
static u16 have_exit_callback __read_mostly;
|
static u16 have_exit_callback __read_mostly;
|
||||||
static u16 have_free_callback __read_mostly;
|
static u16 have_free_callback __read_mostly;
|
||||||
|
|
||||||
|
/* cgroup namespace for init task */
|
||||||
|
struct cgroup_namespace init_cgroup_ns = {
|
||||||
|
.count = { .counter = 2, },
|
||||||
|
.user_ns = &init_user_ns,
|
||||||
|
.ns.ops = &cgroupns_operations,
|
||||||
|
.ns.inum = PROC_CGROUP_INIT_INO,
|
||||||
|
.root_cset = &init_css_set,
|
||||||
|
};
|
||||||
|
|
||||||
/* Ditto for the can_fork callback. */
|
/* Ditto for the can_fork callback. */
|
||||||
static u16 have_canfork_callback __read_mostly;
|
static u16 have_canfork_callback __read_mostly;
|
||||||
|
|
||||||
@@ -2002,6 +2014,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
|||||||
{
|
{
|
||||||
bool is_v2 = fs_type == &cgroup2_fs_type;
|
bool is_v2 = fs_type == &cgroup2_fs_type;
|
||||||
struct super_block *pinned_sb = NULL;
|
struct super_block *pinned_sb = NULL;
|
||||||
|
struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
|
||||||
struct cgroup_subsys *ss;
|
struct cgroup_subsys *ss;
|
||||||
struct cgroup_root *root;
|
struct cgroup_root *root;
|
||||||
struct cgroup_sb_opts opts;
|
struct cgroup_sb_opts opts;
|
||||||
@@ -2010,6 +2023,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
|||||||
int i;
|
int i;
|
||||||
bool new_sb;
|
bool new_sb;
|
||||||
|
|
||||||
|
get_cgroup_ns(ns);
|
||||||
|
|
||||||
|
/* Check if the caller has permission to mount. */
|
||||||
|
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
|
||||||
|
put_cgroup_ns(ns);
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The first time anyone tries to mount a cgroup, enable the list
|
* The first time anyone tries to mount a cgroup, enable the list
|
||||||
* linking each css_set to its tasks and fix up all existing tasks.
|
* linking each css_set to its tasks and fix up all existing tasks.
|
||||||
@@ -2020,6 +2041,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
|||||||
if (is_v2) {
|
if (is_v2) {
|
||||||
if (data) {
|
if (data) {
|
||||||
pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
|
pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
|
||||||
|
put_cgroup_ns(ns);
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
cgrp_dfl_visible = true;
|
cgrp_dfl_visible = true;
|
||||||
@@ -2125,6 +2147,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
|
|||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We know this subsystem has not yet been bound. Users in a non-init
|
||||||
|
* user namespace may only mount hierarchies with no bound subsystems,
|
||||||
|
* i.e. 'none,name=user1'
|
||||||
|
*/
|
||||||
|
if (!opts.none && !capable(CAP_SYS_ADMIN)) {
|
||||||
|
ret = -EPERM;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
root = kzalloc(sizeof(*root), GFP_KERNEL);
|
root = kzalloc(sizeof(*root), GFP_KERNEL);
|
||||||
if (!root) {
|
if (!root) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
@@ -2143,12 +2175,37 @@ out_free:
|
|||||||
kfree(opts.release_agent);
|
kfree(opts.release_agent);
|
||||||
kfree(opts.name);
|
kfree(opts.name);
|
||||||
|
|
||||||
if (ret)
|
if (ret) {
|
||||||
|
put_cgroup_ns(ns);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
out_mount:
|
out_mount:
|
||||||
dentry = kernfs_mount(fs_type, flags, root->kf_root,
|
dentry = kernfs_mount(fs_type, flags, root->kf_root,
|
||||||
is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
|
is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
|
||||||
&new_sb);
|
&new_sb);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In non-init cgroup namespace, instead of root cgroup's
|
||||||
|
* dentry, we return the dentry corresponding to the
|
||||||
|
* cgroupns->root_cgrp.
|
||||||
|
*/
|
||||||
|
if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
|
||||||
|
struct dentry *nsdentry;
|
||||||
|
struct cgroup *cgrp;
|
||||||
|
|
||||||
|
mutex_lock(&cgroup_mutex);
|
||||||
|
spin_lock_bh(&css_set_lock);
|
||||||
|
|
||||||
|
cgrp = cset_cgroup_from_root(ns->root_cset, root);
|
||||||
|
|
||||||
|
spin_unlock_bh(&css_set_lock);
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
|
nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
|
||||||
|
dput(dentry);
|
||||||
|
dentry = nsdentry;
|
||||||
|
}
|
||||||
|
|
||||||
if (IS_ERR(dentry) || !new_sb)
|
if (IS_ERR(dentry) || !new_sb)
|
||||||
cgroup_put(&root->cgrp);
|
cgroup_put(&root->cgrp);
|
||||||
|
|
||||||
@@ -2161,6 +2218,7 @@ out_mount:
|
|||||||
deactivate_super(pinned_sb);
|
deactivate_super(pinned_sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
put_cgroup_ns(ns);
|
||||||
return dentry;
|
return dentry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2189,14 +2247,45 @@ static struct file_system_type cgroup_fs_type = {
|
|||||||
.name = "cgroup",
|
.name = "cgroup",
|
||||||
.mount = cgroup_mount,
|
.mount = cgroup_mount,
|
||||||
.kill_sb = cgroup_kill_sb,
|
.kill_sb = cgroup_kill_sb,
|
||||||
|
.fs_flags = FS_USERNS_MOUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct file_system_type cgroup2_fs_type = {
|
static struct file_system_type cgroup2_fs_type = {
|
||||||
.name = "cgroup2",
|
.name = "cgroup2",
|
||||||
.mount = cgroup_mount,
|
.mount = cgroup_mount,
|
||||||
.kill_sb = cgroup_kill_sb,
|
.kill_sb = cgroup_kill_sb,
|
||||||
|
.fs_flags = FS_USERNS_MOUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||||
|
struct cgroup_namespace *ns)
|
||||||
|
{
|
||||||
|
struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
|
||||||
|
if (ret < 0 || ret >= buflen)
|
||||||
|
return NULL;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
|
||||||
|
struct cgroup_namespace *ns)
|
||||||
|
{
|
||||||
|
char *ret;
|
||||||
|
|
||||||
|
mutex_lock(&cgroup_mutex);
|
||||||
|
spin_lock_bh(&css_set_lock);
|
||||||
|
|
||||||
|
ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
|
||||||
|
|
||||||
|
spin_unlock_bh(&css_set_lock);
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(cgroup_path_ns);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
|
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
|
||||||
* @task: target task
|
* @task: target task
|
||||||
@@ -2224,7 +2313,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
|
|||||||
|
|
||||||
if (root) {
|
if (root) {
|
||||||
cgrp = task_cgroup_from_root(task, root);
|
cgrp = task_cgroup_from_root(task, root);
|
||||||
path = cgroup_path(cgrp, buf, buflen);
|
path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
|
||||||
} else {
|
} else {
|
||||||
/* if no hierarchy exists, everyone is in "/" */
|
/* if no hierarchy exists, everyone is in "/" */
|
||||||
if (strlcpy(buf, "/", buflen) < buflen)
|
if (strlcpy(buf, "/", buflen) < buflen)
|
||||||
@@ -5450,6 +5539,8 @@ int __init cgroup_init(void)
|
|||||||
BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
|
||||||
BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
|
BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
|
||||||
|
|
||||||
|
get_user_ns(init_cgroup_ns.user_ns);
|
||||||
|
|
||||||
mutex_lock(&cgroup_mutex);
|
mutex_lock(&cgroup_mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -5601,7 +5692,8 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
|
|||||||
* " (deleted)" is appended to the cgroup path.
|
* " (deleted)" is appended to the cgroup path.
|
||||||
*/
|
*/
|
||||||
if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
|
if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
|
||||||
path = cgroup_path(cgrp, buf, PATH_MAX);
|
path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
|
||||||
|
current->nsproxy->cgroup_ns);
|
||||||
if (!path) {
|
if (!path) {
|
||||||
retval = -ENAMETOOLONG;
|
retval = -ENAMETOOLONG;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
@@ -5886,7 +5978,9 @@ static void cgroup_release_agent(struct work_struct *work)
|
|||||||
if (!pathbuf || !agentbuf)
|
if (!pathbuf || !agentbuf)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
path = cgroup_path(cgrp, pathbuf, PATH_MAX);
|
spin_lock_bh(&css_set_lock);
|
||||||
|
path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
|
||||||
|
spin_unlock_bh(&css_set_lock);
|
||||||
if (!path)
|
if (!path)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@@ -6098,6 +6192,133 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
|
|||||||
|
|
||||||
#endif /* CONFIG_SOCK_CGROUP_DATA */
|
#endif /* CONFIG_SOCK_CGROUP_DATA */
|
||||||
|
|
||||||
|
/* cgroup namespaces */
|
||||||
|
|
||||||
|
static struct cgroup_namespace *alloc_cgroup_ns(void)
|
||||||
|
{
|
||||||
|
struct cgroup_namespace *new_ns;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
|
||||||
|
if (!new_ns)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
ret = ns_alloc_inum(&new_ns->ns);
|
||||||
|
if (ret) {
|
||||||
|
kfree(new_ns);
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
atomic_set(&new_ns->count, 1);
|
||||||
|
new_ns->ns.ops = &cgroupns_operations;
|
||||||
|
return new_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_cgroup_ns(struct cgroup_namespace *ns)
|
||||||
|
{
|
||||||
|
put_css_set(ns->root_cset);
|
||||||
|
put_user_ns(ns->user_ns);
|
||||||
|
ns_free_inum(&ns->ns);
|
||||||
|
kfree(ns);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(free_cgroup_ns);
|
||||||
|
|
||||||
|
struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
|
||||||
|
struct user_namespace *user_ns,
|
||||||
|
struct cgroup_namespace *old_ns)
|
||||||
|
{
|
||||||
|
struct cgroup_namespace *new_ns;
|
||||||
|
struct css_set *cset;
|
||||||
|
|
||||||
|
BUG_ON(!old_ns);
|
||||||
|
|
||||||
|
if (!(flags & CLONE_NEWCGROUP)) {
|
||||||
|
get_cgroup_ns(old_ns);
|
||||||
|
return old_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allow only sysadmin to create cgroup namespace. */
|
||||||
|
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
|
||||||
|
return ERR_PTR(-EPERM);
|
||||||
|
|
||||||
|
mutex_lock(&cgroup_mutex);
|
||||||
|
spin_lock_bh(&css_set_lock);
|
||||||
|
|
||||||
|
cset = task_css_set(current);
|
||||||
|
get_css_set(cset);
|
||||||
|
|
||||||
|
spin_unlock_bh(&css_set_lock);
|
||||||
|
mutex_unlock(&cgroup_mutex);
|
||||||
|
|
||||||
|
new_ns = alloc_cgroup_ns();
|
||||||
|
if (IS_ERR(new_ns)) {
|
||||||
|
put_css_set(cset);
|
||||||
|
return new_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_ns->user_ns = get_user_ns(user_ns);
|
||||||
|
new_ns->root_cset = cset;
|
||||||
|
|
||||||
|
return new_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
return container_of(ns, struct cgroup_namespace, ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
|
||||||
|
{
|
||||||
|
struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
|
||||||
|
|
||||||
|
if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
|
||||||
|
!ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
/* Don't need to do anything if we are attaching to our own cgroupns. */
|
||||||
|
if (cgroup_ns == nsproxy->cgroup_ns)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
get_cgroup_ns(cgroup_ns);
|
||||||
|
put_cgroup_ns(nsproxy->cgroup_ns);
|
||||||
|
nsproxy->cgroup_ns = cgroup_ns;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ns_common *cgroupns_get(struct task_struct *task)
|
||||||
|
{
|
||||||
|
struct cgroup_namespace *ns = NULL;
|
||||||
|
struct nsproxy *nsproxy;
|
||||||
|
|
||||||
|
task_lock(task);
|
||||||
|
nsproxy = task->nsproxy;
|
||||||
|
if (nsproxy) {
|
||||||
|
ns = nsproxy->cgroup_ns;
|
||||||
|
get_cgroup_ns(ns);
|
||||||
|
}
|
||||||
|
task_unlock(task);
|
||||||
|
|
||||||
|
return ns ? &ns->ns : NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cgroupns_put(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
put_cgroup_ns(to_cg_ns(ns));
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct proc_ns_operations cgroupns_operations = {
|
||||||
|
.name = "cgroup",
|
||||||
|
.type = CLONE_NEWCGROUP,
|
||||||
|
.get = cgroupns_get,
|
||||||
|
.put = cgroupns_put,
|
||||||
|
.install = cgroupns_install,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __init int cgroup_namespaces_init(void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
subsys_initcall(cgroup_namespaces_init);
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_DEBUG
|
#ifdef CONFIG_CGROUP_DEBUG
|
||||||
static struct cgroup_subsys_state *
|
static struct cgroup_subsys_state *
|
||||||
debug_css_alloc(struct cgroup_subsys_state *parent_css)
|
debug_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||||
|
|||||||
@@ -2714,10 +2714,10 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
retval = -ENAMETOOLONG;
|
retval = -ENAMETOOLONG;
|
||||||
rcu_read_lock();
|
css = task_get_css(tsk, cpuset_cgrp_id);
|
||||||
css = task_css(tsk, cpuset_cgrp_id);
|
p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
|
||||||
p = cgroup_path(css->cgroup, buf, PATH_MAX);
|
current->nsproxy->cgroup_ns);
|
||||||
rcu_read_unlock();
|
css_put(css);
|
||||||
if (!p)
|
if (!p)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
seq_puts(m, p);
|
seq_puts(m, p);
|
||||||
|
|||||||
@@ -1892,7 +1892,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
|
|||||||
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
|
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
|
||||||
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
|
||||||
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
|
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
|
||||||
CLONE_NEWUSER|CLONE_NEWPID))
|
CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
/*
|
/*
|
||||||
* Not implemented, but pretend it works if there is nothing
|
* Not implemented, but pretend it works if there is nothing
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include <linux/proc_ns.h>
|
#include <linux/proc_ns.h>
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
|
#include <linux/cgroup.h>
|
||||||
|
|
||||||
static struct kmem_cache *nsproxy_cachep;
|
static struct kmem_cache *nsproxy_cachep;
|
||||||
|
|
||||||
@@ -39,6 +40,9 @@ struct nsproxy init_nsproxy = {
|
|||||||
#ifdef CONFIG_NET
|
#ifdef CONFIG_NET
|
||||||
.net_ns = &init_net,
|
.net_ns = &init_net,
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_CGROUPS
|
||||||
|
.cgroup_ns = &init_cgroup_ns,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct nsproxy *create_nsproxy(void)
|
static inline struct nsproxy *create_nsproxy(void)
|
||||||
@@ -92,6 +96,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
|
|||||||
goto out_pid;
|
goto out_pid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
|
||||||
|
tsk->nsproxy->cgroup_ns);
|
||||||
|
if (IS_ERR(new_nsp->cgroup_ns)) {
|
||||||
|
err = PTR_ERR(new_nsp->cgroup_ns);
|
||||||
|
goto out_cgroup;
|
||||||
|
}
|
||||||
|
|
||||||
new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
|
new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
|
||||||
if (IS_ERR(new_nsp->net_ns)) {
|
if (IS_ERR(new_nsp->net_ns)) {
|
||||||
err = PTR_ERR(new_nsp->net_ns);
|
err = PTR_ERR(new_nsp->net_ns);
|
||||||
@@ -101,6 +112,8 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
|
|||||||
return new_nsp;
|
return new_nsp;
|
||||||
|
|
||||||
out_net:
|
out_net:
|
||||||
|
put_cgroup_ns(new_nsp->cgroup_ns);
|
||||||
|
out_cgroup:
|
||||||
if (new_nsp->pid_ns_for_children)
|
if (new_nsp->pid_ns_for_children)
|
||||||
put_pid_ns(new_nsp->pid_ns_for_children);
|
put_pid_ns(new_nsp->pid_ns_for_children);
|
||||||
out_pid:
|
out_pid:
|
||||||
@@ -128,7 +141,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
|
|||||||
struct nsproxy *new_ns;
|
struct nsproxy *new_ns;
|
||||||
|
|
||||||
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
|
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
|
||||||
CLONE_NEWPID | CLONE_NEWNET)))) {
|
CLONE_NEWPID | CLONE_NEWNET |
|
||||||
|
CLONE_NEWCGROUP)))) {
|
||||||
get_nsproxy(old_ns);
|
get_nsproxy(old_ns);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -165,6 +179,7 @@ void free_nsproxy(struct nsproxy *ns)
|
|||||||
put_ipc_ns(ns->ipc_ns);
|
put_ipc_ns(ns->ipc_ns);
|
||||||
if (ns->pid_ns_for_children)
|
if (ns->pid_ns_for_children)
|
||||||
put_pid_ns(ns->pid_ns_for_children);
|
put_pid_ns(ns->pid_ns_for_children);
|
||||||
|
put_cgroup_ns(ns->cgroup_ns);
|
||||||
put_net(ns->net_ns);
|
put_net(ns->net_ns);
|
||||||
kmem_cache_free(nsproxy_cachep, ns);
|
kmem_cache_free(nsproxy_cachep, ns);
|
||||||
}
|
}
|
||||||
@@ -180,7 +195,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
|
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
|
||||||
CLONE_NEWNET | CLONE_NEWPID)))
|
CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
user_ns = new_cred ? new_cred->user_ns : current_user_ns();
|
user_ns = new_cred ? new_cred->user_ns : current_user_ns();
|
||||||
|
|||||||
Reference in New Issue
Block a user