Merge branch 'for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Nothing too interesting. Documentation updates and trivial changes; however, this pull request does containt he previusly discussed dropping of __must_check from strscpy()" * 'for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: Documentation: Fix 'file_mapped' -> 'mapped_file' string: drop __must_check from strscpy() and restore strscpy() usages in cgroup cgroup, docs: document the root cgroup behavior of cpu and io controllers cgroup-v2.txt: fix typos cgroup: Update documentation reference Documentation/cgroup-v1: fix outdated programming details cgroup, docs: document cgroup v2 device controller
This commit is contained in:
@@ -523,12 +523,7 @@ Accessing a task's cgroup pointer may be done in the following ways:
|
|||||||
Each subsystem should:
|
Each subsystem should:
|
||||||
|
|
||||||
- add an entry in linux/cgroup_subsys.h
|
- add an entry in linux/cgroup_subsys.h
|
||||||
- define a cgroup_subsys object called <name>_subsys
|
- define a cgroup_subsys object called <name>_cgrp_subsys
|
||||||
|
|
||||||
If a subsystem can be compiled as a module, it should also have in its
|
|
||||||
module initcall a call to cgroup_load_subsys(), and in its exitcall a
|
|
||||||
call to cgroup_unload_subsys(). It should also set its_subsys.module =
|
|
||||||
THIS_MODULE in its .c file.
|
|
||||||
|
|
||||||
Each subsystem may export the following methods. The only mandatory
|
Each subsystem may export the following methods. The only mandatory
|
||||||
methods are css_alloc/free. Any others that are null are presumed to
|
methods are css_alloc/free. Any others that are null are presumed to
|
||||||
|
@@ -524,9 +524,9 @@ Note:
|
|||||||
Only anonymous and swap cache memory is listed as part of 'rss' stat.
|
Only anonymous and swap cache memory is listed as part of 'rss' stat.
|
||||||
This should not be confused with the true 'resident set size' or the
|
This should not be confused with the true 'resident set size' or the
|
||||||
amount of physical memory used by the cgroup.
|
amount of physical memory used by the cgroup.
|
||||||
'rss + file_mapped" will give you resident set size of cgroup.
|
'rss + mapped_file" will give you resident set size of cgroup.
|
||||||
(Note: file and shmem may be shared among other cgroups. In that case,
|
(Note: file and shmem may be shared among other cgroups. In that case,
|
||||||
file_mapped is accounted only when the memory cgroup is owner of page
|
mapped_file is accounted only when the memory cgroup is owner of page
|
||||||
cache.)
|
cache.)
|
||||||
|
|
||||||
5.3 swappiness
|
5.3 swappiness
|
||||||
|
@@ -53,10 +53,14 @@ v1 is available under Documentation/cgroup-v1/.
|
|||||||
5-3-2. Writeback
|
5-3-2. Writeback
|
||||||
5-4. PID
|
5-4. PID
|
||||||
5-4-1. PID Interface Files
|
5-4-1. PID Interface Files
|
||||||
5-5. RDMA
|
5-5. Device
|
||||||
5-5-1. RDMA Interface Files
|
5-6. RDMA
|
||||||
5-6. Misc
|
5-6-1. RDMA Interface Files
|
||||||
5-6-1. perf_event
|
5-7. Misc
|
||||||
|
5-7-1. perf_event
|
||||||
|
5-N. Non-normative information
|
||||||
|
5-N-1. CPU controller root cgroup process behaviour
|
||||||
|
5-N-2. IO controller root cgroup process behaviour
|
||||||
6. Namespace
|
6. Namespace
|
||||||
6-1. Basics
|
6-1. Basics
|
||||||
6-2. The Root and Views
|
6-2. The Root and Views
|
||||||
@@ -279,7 +283,7 @@ thread mode, the following conditions must be met.
|
|||||||
exempt from this requirement.
|
exempt from this requirement.
|
||||||
|
|
||||||
Topology-wise, a cgroup can be in an invalid state. Please consider
|
Topology-wise, a cgroup can be in an invalid state. Please consider
|
||||||
the following toplogy::
|
the following topology::
|
||||||
|
|
||||||
A (threaded domain) - B (threaded) - C (domain, just created)
|
A (threaded domain) - B (threaded) - C (domain, just created)
|
||||||
|
|
||||||
@@ -420,7 +424,9 @@ The root cgroup is exempt from this restriction. Root contains
|
|||||||
processes and anonymous resource consumption which can't be associated
|
processes and anonymous resource consumption which can't be associated
|
||||||
with any other cgroups and requires special treatment from most
|
with any other cgroups and requires special treatment from most
|
||||||
controllers. How resource consumption in the root cgroup is governed
|
controllers. How resource consumption in the root cgroup is governed
|
||||||
is up to each controller.
|
is up to each controller (for more information on this topic please
|
||||||
|
refer to the Non-normative information section in the Controllers
|
||||||
|
chapter).
|
||||||
|
|
||||||
Note that the restriction doesn't get in the way if there is no
|
Note that the restriction doesn't get in the way if there is no
|
||||||
enabled controller in the cgroup's "cgroup.subtree_control". This is
|
enabled controller in the cgroup's "cgroup.subtree_control". This is
|
||||||
@@ -1063,10 +1069,10 @@ PAGE_SIZE multiple when read back.
|
|||||||
reached the limit and allocation was about to fail.
|
reached the limit and allocation was about to fail.
|
||||||
|
|
||||||
Depending on context result could be invocation of OOM
|
Depending on context result could be invocation of OOM
|
||||||
killer and retrying allocation or failing alloction.
|
killer and retrying allocation or failing allocation.
|
||||||
|
|
||||||
Failed allocation in its turn could be returned into
|
Failed allocation in its turn could be returned into
|
||||||
userspace as -ENOMEM or siletly ignored in cases like
|
userspace as -ENOMEM or silently ignored in cases like
|
||||||
disk readahead. For now OOM in memory cgroup kills
|
disk readahead. For now OOM in memory cgroup kills
|
||||||
tasks iff shortage has happened inside page fault.
|
tasks iff shortage has happened inside page fault.
|
||||||
|
|
||||||
@@ -1191,7 +1197,7 @@ PAGE_SIZE multiple when read back.
|
|||||||
cgroups. The default is "max".
|
cgroups. The default is "max".
|
||||||
|
|
||||||
Swap usage hard limit. If a cgroup's swap usage reaches this
|
Swap usage hard limit. If a cgroup's swap usage reaches this
|
||||||
limit, anonymous meomry of the cgroup will not be swapped out.
|
limit, anonymous memory of the cgroup will not be swapped out.
|
||||||
|
|
||||||
|
|
||||||
Usage Guidelines
|
Usage Guidelines
|
||||||
@@ -1429,6 +1435,30 @@ through fork() or clone(). These will return -EAGAIN if the creation
|
|||||||
of a new process would cause a cgroup policy to be violated.
|
of a new process would cause a cgroup policy to be violated.
|
||||||
|
|
||||||
|
|
||||||
|
Device controller
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
Device controller manages access to device files. It includes both
|
||||||
|
creation of new device files (using mknod), and access to the
|
||||||
|
existing device files.
|
||||||
|
|
||||||
|
Cgroup v2 device controller has no interface files and is implemented
|
||||||
|
on top of cgroup BPF. To control access to device files, a user may
|
||||||
|
create bpf programs of the BPF_CGROUP_DEVICE type and attach them
|
||||||
|
to cgroups. On an attempt to access a device file, corresponding
|
||||||
|
BPF programs will be executed, and depending on the return value
|
||||||
|
the attempt will succeed or fail with -EPERM.
|
||||||
|
|
||||||
|
A BPF_CGROUP_DEVICE program takes a pointer to the bpf_cgroup_dev_ctx
|
||||||
|
structure, which describes the device access attempt: access type
|
||||||
|
(mknod/read/write) and device (type, major and minor numbers).
|
||||||
|
If the program returns 0, the attempt fails with -EPERM, otherwise
|
||||||
|
it succeeds.
|
||||||
|
|
||||||
|
An example of BPF_CGROUP_DEVICE program may be found in the kernel
|
||||||
|
source tree in the tools/testing/selftests/bpf/dev_cgroup.c file.
|
||||||
|
|
||||||
|
|
||||||
RDMA
|
RDMA
|
||||||
----
|
----
|
||||||
|
|
||||||
@@ -1481,6 +1511,35 @@ always be filtered by cgroup v2 path. The controller can still be
|
|||||||
moved to a legacy hierarchy after v2 hierarchy is populated.
|
moved to a legacy hierarchy after v2 hierarchy is populated.
|
||||||
|
|
||||||
|
|
||||||
|
Non-normative information
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
This section contains information that isn't considered to be a part of
|
||||||
|
the stable kernel API and so is subject to change.
|
||||||
|
|
||||||
|
|
||||||
|
CPU controller root cgroup process behaviour
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
When distributing CPU cycles in the root cgroup each thread in this
|
||||||
|
cgroup is treated as if it was hosted in a separate child cgroup of the
|
||||||
|
root cgroup. This child cgroup weight is dependent on its thread nice
|
||||||
|
level.
|
||||||
|
|
||||||
|
For details of this mapping see sched_prio_to_weight array in
|
||||||
|
kernel/sched/core.c file (values from this array should be scaled
|
||||||
|
appropriately so the neutral - nice 0 - value is 100 instead of 1024).
|
||||||
|
|
||||||
|
|
||||||
|
IO controller root cgroup process behaviour
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Root cgroup processes are hosted in an implicit leaf child node.
|
||||||
|
When distributing IO resources this implicit child node is taken into
|
||||||
|
account as if it was a normal child cgroup of the root cgroup with a
|
||||||
|
weight value of 200.
|
||||||
|
|
||||||
|
|
||||||
Namespace
|
Namespace
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
@@ -561,7 +561,7 @@ struct cftype {
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Control Group subsystem type.
|
* Control Group subsystem type.
|
||||||
* See Documentation/cgroups/cgroups.txt for details
|
* See Documentation/cgroup-v1/cgroups.txt for details
|
||||||
*/
|
*/
|
||||||
struct cgroup_subsys {
|
struct cgroup_subsys {
|
||||||
struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
|
struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
|
||||||
|
@@ -28,7 +28,7 @@ extern char * strncpy(char *,const char *, __kernel_size_t);
|
|||||||
size_t strlcpy(char *, const char *, size_t);
|
size_t strlcpy(char *, const char *, size_t);
|
||||||
#endif
|
#endif
|
||||||
#ifndef __HAVE_ARCH_STRSCPY
|
#ifndef __HAVE_ARCH_STRSCPY
|
||||||
ssize_t __must_check strscpy(char *, const char *, size_t);
|
ssize_t strscpy(char *, const char *, size_t);
|
||||||
#endif
|
#endif
|
||||||
#ifndef __HAVE_ARCH_STRCAT
|
#ifndef __HAVE_ARCH_STRCAT
|
||||||
extern char * strcat(char *, const char *);
|
extern char * strcat(char *, const char *);
|
||||||
|
@@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
|
|||||||
cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
|
cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
|
||||||
cft->name);
|
cft->name);
|
||||||
else
|
else
|
||||||
strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
|
strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
|
|||||||
|
|
||||||
root->flags = opts->flags;
|
root->flags = opts->flags;
|
||||||
if (opts->release_agent)
|
if (opts->release_agent)
|
||||||
strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX);
|
strscpy(root->release_agent_path, opts->release_agent, PATH_MAX);
|
||||||
if (opts->name)
|
if (opts->name)
|
||||||
strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
|
strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
|
||||||
if (opts->cpuset_clone_children)
|
if (opts->cpuset_clone_children)
|
||||||
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
|
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user