Merge branch 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Several notable changes this cycle: - Thread mode was merged. This will be used for cgroup2 support for CPU and possibly other controllers. Unfortunately, CPU controller cgroup2 support didn't make this pull request but most contentions have been resolved and the support is likely to be merged before the next merge window. - cgroup.stat now shows the number of descendant cgroups. - cpuset now can enable the easier-to-configure v2 behavior on v1 hierarchy" * 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits) cpuset: Allow v2 behavior in v1 cgroup cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup cgroup: remove unneeded checks cgroup: misc changes cgroup: short-circuit cset_cgroup_from_root() on the default hierarchy cgroup: re-use the parent pointer in cgroup_destroy_locked() cgroup: add cgroup.stat interface with basic hierarchy stats cgroup: implement hierarchy limits cgroup: keep track of number of descent cgroups cgroup: add comment to cgroup_enable_threaded() cgroup: remove unnecessary empty check when enabling threaded mode cgroup: update debug controller to print out thread mode information cgroup: implement cgroup v2 thread support cgroup: implement CSS_TASK_ITER_THREADED cgroup: introduce cgroup->dom_cgrp and threaded css_set handling cgroup: add @flags to css_task_iter_start() and implement CSS_TASK_ITER_PROCS cgroup: reorganize cgroup.procs / task write path cgroup: replace css_set walking populated test with testing cgrp->nr_populated_csets cgroup: distinguish local and children populated states cgroup: remove now unused list_head @pending in cgroup_apply_cftypes() ...
This commit is contained in:
@@ -156,6 +156,8 @@ static inline void get_css_set(struct css_set *cset)
|
||||
|
||||
bool cgroup_ssid_enabled(int ssid);
|
||||
bool cgroup_on_dfl(const struct cgroup *cgrp);
|
||||
bool cgroup_is_thread_root(struct cgroup *cgrp);
|
||||
bool cgroup_is_threaded(struct cgroup *cgrp);
|
||||
|
||||
struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
|
||||
struct cgroup *task_cgroup_from_root(struct task_struct *task,
|
||||
@@ -173,7 +175,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
|
||||
struct cgroup_root *root, unsigned long magic,
|
||||
struct cgroup_namespace *ns);
|
||||
|
||||
bool cgroup_may_migrate_to(struct cgroup *dst_cgrp);
|
||||
int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp);
|
||||
void cgroup_migrate_finish(struct cgroup_mgctx *mgctx);
|
||||
void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp,
|
||||
struct cgroup_mgctx *mgctx);
|
||||
@@ -183,10 +185,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
|
||||
|
||||
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
|
||||
bool threadgroup);
|
||||
ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off, bool threadgroup);
|
||||
ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
|
||||
loff_t off);
|
||||
struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
|
||||
__acquires(&cgroup_threadgroup_rwsem);
|
||||
void cgroup_procs_write_finish(struct task_struct *task)
|
||||
__releases(&cgroup_threadgroup_rwsem);
|
||||
|
||||
void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
|
||||
|
||||
|
@@ -99,8 +99,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
|
||||
if (cgroup_on_dfl(to))
|
||||
return -EINVAL;
|
||||
|
||||
if (!cgroup_may_migrate_to(to))
|
||||
return -EBUSY;
|
||||
ret = cgroup_migrate_vet_dst(to);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
|
||||
@@ -121,7 +122,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
|
||||
* ->can_attach() fails.
|
||||
*/
|
||||
do {
|
||||
css_task_iter_start(&from->self, &it);
|
||||
css_task_iter_start(&from->self, 0, &it);
|
||||
task = css_task_iter_next(&it);
|
||||
if (task)
|
||||
get_task_struct(task);
|
||||
@@ -373,7 +374,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
|
||||
if (!array)
|
||||
return -ENOMEM;
|
||||
/* now, populate the array */
|
||||
css_task_iter_start(&cgrp->self, &it);
|
||||
css_task_iter_start(&cgrp->self, 0, &it);
|
||||
while ((tsk = css_task_iter_next(&it))) {
|
||||
if (unlikely(n == length))
|
||||
break;
|
||||
@@ -510,10 +511,58 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t cgroup_tasks_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off,
|
||||
bool threadgroup)
|
||||
{
|
||||
return __cgroup_procs_write(of, buf, nbytes, off, false);
|
||||
struct cgroup *cgrp;
|
||||
struct task_struct *task;
|
||||
const struct cred *cred, *tcred;
|
||||
ssize_t ret;
|
||||
|
||||
cgrp = cgroup_kn_lock_live(of->kn, false);
|
||||
if (!cgrp)
|
||||
return -ENODEV;
|
||||
|
||||
task = cgroup_procs_write_start(buf, threadgroup);
|
||||
ret = PTR_ERR_OR_ZERO(task);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Even if we're attaching all tasks in the thread group, we only
|
||||
* need to check permissions on one of them.
|
||||
*/
|
||||
cred = current_cred();
|
||||
tcred = get_task_cred(task);
|
||||
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
|
||||
!uid_eq(cred->euid, tcred->uid) &&
|
||||
!uid_eq(cred->euid, tcred->suid))
|
||||
ret = -EACCES;
|
||||
put_cred(tcred);
|
||||
if (ret)
|
||||
goto out_finish;
|
||||
|
||||
ret = cgroup_attach_task(cgrp, task, threadgroup);
|
||||
|
||||
out_finish:
|
||||
cgroup_procs_write_finish(task);
|
||||
out_unlock:
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
return __cgroup1_procs_write(of, buf, nbytes, off, true);
|
||||
}
|
||||
|
||||
static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
return __cgroup1_procs_write(of, buf, nbytes, off, false);
|
||||
}
|
||||
|
||||
static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
|
||||
@@ -592,7 +641,7 @@ struct cftype cgroup1_base_files[] = {
|
||||
.seq_stop = cgroup_pidlist_stop,
|
||||
.seq_show = cgroup_pidlist_show,
|
||||
.private = CGROUP_FILE_PROCS,
|
||||
.write = cgroup_procs_write,
|
||||
.write = cgroup1_procs_write,
|
||||
},
|
||||
{
|
||||
.name = "cgroup.clone_children",
|
||||
@@ -611,7 +660,7 @@ struct cftype cgroup1_base_files[] = {
|
||||
.seq_stop = cgroup_pidlist_stop,
|
||||
.seq_show = cgroup_pidlist_show,
|
||||
.private = CGROUP_FILE_TASKS,
|
||||
.write = cgroup_tasks_write,
|
||||
.write = cgroup1_tasks_write,
|
||||
},
|
||||
{
|
||||
.name = "notify_on_release",
|
||||
@@ -701,7 +750,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
css_task_iter_start(&cgrp->self, &it);
|
||||
css_task_iter_start(&cgrp->self, 0, &it);
|
||||
while ((tsk = css_task_iter_next(&it))) {
|
||||
switch (tsk->state) {
|
||||
case TASK_RUNNING:
|
||||
@@ -846,6 +895,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
|
||||
seq_puts(seq, ",noprefix");
|
||||
if (root->flags & CGRP_ROOT_XATTR)
|
||||
seq_puts(seq, ",xattr");
|
||||
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
|
||||
seq_puts(seq, ",cpuset_v2_mode");
|
||||
|
||||
spin_lock(&release_agent_path_lock);
|
||||
if (strlen(root->release_agent_path))
|
||||
@@ -900,6 +951,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
|
||||
opts->cpuset_clone_children = true;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(token, "cpuset_v2_mode")) {
|
||||
opts->flags |= CGRP_ROOT_CPUSET_V2_MODE;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(token, "xattr")) {
|
||||
opts->flags |= CGRP_ROOT_XATTR;
|
||||
continue;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -300,6 +300,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
|
||||
|
||||
/*
|
||||
* Cgroup v2 behavior is used when on default hierarchy or the
|
||||
* cgroup_v2_mode flag is set.
|
||||
*/
|
||||
static inline bool is_in_v2_mode(void)
|
||||
{
|
||||
return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
|
||||
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is ugly, but preserves the userspace API for existing cpuset
|
||||
* users. If someone tries to mount the "cpuset" filesystem, we
|
||||
@@ -490,8 +500,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
|
||||
|
||||
/* On legacy hiearchy, we must be a subset of our parent cpuset. */
|
||||
ret = -EACCES;
|
||||
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
||||
!is_cpuset_subset(trial, par))
|
||||
if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@@ -870,7 +879,7 @@ static void update_tasks_cpumask(struct cpuset *cs)
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&cs->css, &it);
|
||||
css_task_iter_start(&cs->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
set_cpus_allowed_ptr(task, cs->effective_cpus);
|
||||
css_task_iter_end(&it);
|
||||
@@ -904,8 +913,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
|
||||
* If it becomes empty, inherit the effective mask of the
|
||||
* parent, which is guaranteed to have some CPUs.
|
||||
*/
|
||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
||||
cpumask_empty(new_cpus))
|
||||
if (is_in_v2_mode() && cpumask_empty(new_cpus))
|
||||
cpumask_copy(new_cpus, parent->effective_cpus);
|
||||
|
||||
/* Skip the whole subtree if the cpumask remains the same. */
|
||||
@@ -922,7 +930,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
|
||||
cpumask_copy(cp->effective_cpus, new_cpus);
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
||||
WARN_ON(!is_in_v2_mode() &&
|
||||
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
|
||||
|
||||
update_tasks_cpumask(cp);
|
||||
@@ -1100,7 +1108,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
|
||||
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
|
||||
* is idempotent. Also migrate pages in each mm to new nodes.
|
||||
*/
|
||||
css_task_iter_start(&cs->css, &it);
|
||||
css_task_iter_start(&cs->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
struct mm_struct *mm;
|
||||
bool migrate;
|
||||
@@ -1158,8 +1166,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
|
||||
* If it becomes empty, inherit the effective mask of the
|
||||
* parent, which is guaranteed to have some MEMs.
|
||||
*/
|
||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
||||
nodes_empty(*new_mems))
|
||||
if (is_in_v2_mode() && nodes_empty(*new_mems))
|
||||
*new_mems = parent->effective_mems;
|
||||
|
||||
/* Skip the whole subtree if the nodemask remains the same. */
|
||||
@@ -1176,7 +1183,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
|
||||
cp->effective_mems = *new_mems;
|
||||
spin_unlock_irq(&callback_lock);
|
||||
|
||||
WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
||||
WARN_ON(!is_in_v2_mode() &&
|
||||
!nodes_equal(cp->mems_allowed, cp->effective_mems));
|
||||
|
||||
update_tasks_nodemask(cp);
|
||||
@@ -1293,7 +1300,7 @@ static void update_tasks_flags(struct cpuset *cs)
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&cs->css, &it);
|
||||
css_task_iter_start(&cs->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
cpuset_update_task_spread_flag(cs, task);
|
||||
css_task_iter_end(&it);
|
||||
@@ -1468,7 +1475,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
|
||||
|
||||
/* allow moving tasks into an empty cpuset if on default hierarchy */
|
||||
ret = -ENOSPC;
|
||||
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
|
||||
if (!is_in_v2_mode() &&
|
||||
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
|
||||
goto out_unlock;
|
||||
|
||||
@@ -1987,7 +1994,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
|
||||
cpuset_inc();
|
||||
|
||||
spin_lock_irq(&callback_lock);
|
||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
|
||||
if (is_in_v2_mode()) {
|
||||
cpumask_copy(cs->effective_cpus, parent->effective_cpus);
|
||||
cs->effective_mems = parent->effective_mems;
|
||||
}
|
||||
@@ -2064,7 +2071,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
|
||||
mutex_lock(&cpuset_mutex);
|
||||
spin_lock_irq(&callback_lock);
|
||||
|
||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
|
||||
if (is_in_v2_mode()) {
|
||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
|
||||
top_cpuset.mems_allowed = node_possible_map;
|
||||
} else {
|
||||
@@ -2258,7 +2265,7 @@ retry:
|
||||
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
|
||||
mems_updated = !nodes_equal(new_mems, cs->effective_mems);
|
||||
|
||||
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
|
||||
if (is_in_v2_mode())
|
||||
hotplug_update_tasks(cs, &new_cpus, &new_mems,
|
||||
cpus_updated, mems_updated);
|
||||
else
|
||||
@@ -2289,7 +2296,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
|
||||
static cpumask_t new_cpus;
|
||||
static nodemask_t new_mems;
|
||||
bool cpus_updated, mems_updated;
|
||||
bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
|
||||
bool on_dfl = is_in_v2_mode();
|
||||
|
||||
mutex_lock(&cpuset_mutex);
|
||||
|
||||
|
@@ -114,27 +114,49 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup_subsys_state *css = seq_css(seq);
|
||||
struct cgrp_cset_link *link;
|
||||
int dead_cnt = 0, extra_refs = 0;
|
||||
int dead_cnt = 0, extra_refs = 0, threaded_csets = 0;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
|
||||
list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
|
||||
struct css_set *cset = link->cset;
|
||||
struct task_struct *task;
|
||||
int count = 0;
|
||||
int refcnt = refcount_read(&cset->refcount);
|
||||
|
||||
seq_printf(seq, " %d", refcnt);
|
||||
if (refcnt - cset->nr_tasks > 0) {
|
||||
int extra = refcnt - cset->nr_tasks;
|
||||
/*
|
||||
* Print out the proc_cset and threaded_cset relationship
|
||||
* and highlight difference between refcount and task_count.
|
||||
*/
|
||||
seq_printf(seq, "css_set %pK", cset);
|
||||
if (rcu_dereference_protected(cset->dom_cset, 1) != cset) {
|
||||
threaded_csets++;
|
||||
seq_printf(seq, "=>%pK", cset->dom_cset);
|
||||
}
|
||||
if (!list_empty(&cset->threaded_csets)) {
|
||||
struct css_set *tcset;
|
||||
int idx = 0;
|
||||
|
||||
seq_printf(seq, " +%d", extra);
|
||||
/*
|
||||
* Take out the one additional reference in
|
||||
* init_css_set.
|
||||
*/
|
||||
if (cset == &init_css_set)
|
||||
extra--;
|
||||
extra_refs += extra;
|
||||
list_for_each_entry(tcset, &cset->threaded_csets,
|
||||
threaded_csets_node) {
|
||||
seq_puts(seq, idx ? "," : "<=");
|
||||
seq_printf(seq, "%pK", tcset);
|
||||
idx++;
|
||||
}
|
||||
} else {
|
||||
seq_printf(seq, " %d", refcnt);
|
||||
if (refcnt - cset->nr_tasks > 0) {
|
||||
int extra = refcnt - cset->nr_tasks;
|
||||
|
||||
seq_printf(seq, " +%d", extra);
|
||||
/*
|
||||
* Take out the one additional reference in
|
||||
* init_css_set.
|
||||
*/
|
||||
if (cset == &init_css_set)
|
||||
extra--;
|
||||
extra_refs += extra;
|
||||
}
|
||||
}
|
||||
seq_puts(seq, "\n");
|
||||
|
||||
@@ -163,10 +185,12 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
|
||||
}
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
if (!dead_cnt && !extra_refs)
|
||||
if (!dead_cnt && !extra_refs && !threaded_csets)
|
||||
return 0;
|
||||
|
||||
seq_puts(seq, "\n");
|
||||
if (threaded_csets)
|
||||
seq_printf(seq, "threaded css_sets = %d\n", threaded_csets);
|
||||
if (extra_refs)
|
||||
seq_printf(seq, "extra references = %d\n", extra_refs);
|
||||
if (dead_cnt)
|
||||
@@ -352,6 +376,7 @@ static int __init enable_cgroup_debug(char *str)
|
||||
{
|
||||
debug_cgrp_subsys.dfl_cftypes = debug_files;
|
||||
debug_cgrp_subsys.implicit_on_dfl = true;
|
||||
debug_cgrp_subsys.threaded = true;
|
||||
return 1;
|
||||
}
|
||||
__setup("cgroup_debug", enable_cgroup_debug);
|
||||
|
@@ -268,7 +268,7 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
|
||||
rcu_read_unlock();
|
||||
|
||||
/* are all tasks frozen? */
|
||||
css_task_iter_start(css, &it);
|
||||
css_task_iter_start(css, 0, &it);
|
||||
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
if (freezing(task)) {
|
||||
@@ -320,7 +320,7 @@ static void freeze_cgroup(struct freezer *freezer)
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&freezer->css, &it);
|
||||
css_task_iter_start(&freezer->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
freeze_task(task);
|
||||
css_task_iter_end(&it);
|
||||
@@ -331,7 +331,7 @@ static void unfreeze_cgroup(struct freezer *freezer)
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
css_task_iter_start(&freezer->css, &it);
|
||||
css_task_iter_start(&freezer->css, 0, &it);
|
||||
while ((task = css_task_iter_next(&it)))
|
||||
__thaw_task(task);
|
||||
css_task_iter_end(&it);
|
||||
|
@@ -345,4 +345,5 @@ struct cgroup_subsys pids_cgrp_subsys = {
|
||||
.free = pids_free,
|
||||
.legacy_cftypes = pids_files,
|
||||
.dfl_cftypes = pids_files,
|
||||
.threaded = true,
|
||||
};
|
||||
|
Reference in New Issue
Block a user