cgroup_freezer: implement proper hierarchy support

Up until now, cgroup_freezer didn't implement hierarchy properly.
cgroups could be arranged in hierarchy but it didn't make any
difference in how each cgroup_freezer behaved.  They all operated
separately.

This patch implements proper hierarchy support.  If a cgroup is
frozen, all its descendants are frozen.  A cgroup is thawed iff it and
all its ancestors are THAWED.  freezer.self_freezing shows the current
freezing state for the cgroup itself.  freezer.parent_freezing shows
whether the cgroup is freezing because any of its ancestors is
freezing.

freezer_post_create() locks the parent and new cgroup and inherits the
parent's state and freezer_change_state() applies new state top-down
using cgroup_for_each_descendant_pre() which guarantees that no child
can escape its parent's state.  update_if_frozen() uses
cgroup_for_each_descendant_post() to propagate frozen states
bottom-up.

Synchronization could be coarser and easier by using a single mutex to
protect all hierarchy operations.  Finer grained approach was used
because it wasn't too difficult for cgroup_freezer and I think it's
beneficial to have an example implementation and cgroup_freezer is
rather simple and can serve a good one.

As this makes cgroup_freezer properly hierarchical,
freezer_subsys.broken_hierarchy marking is removed.

Note that this patch changes userland visible behavior - freezing a
cgroup now freezes all its descendants too.  This behavior change is
intended and has been warned via .broken_hierarchy.

v2: Michal spotted a bug in freezer_change_state() - descendants were
    inheriting from the wrong ancestor.  Fixed.

v3: Documentation/cgroups/freezer-subsystem.txt updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
This commit is contained in:
Tejun Heo
2012-11-09 09:12:30 -08:00
parent 5300a9b348
commit ef9fe980c6
2 changed files with 165 additions and 59 deletions

View File

@@ -22,6 +22,13 @@
#include <linux/freezer.h>
#include <linux/seq_file.h>
/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
* set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
* for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
* for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
* its ancestors has FREEZING_SELF set.
*/
enum freezer_state_flags {
CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
@@ -50,6 +57,15 @@ static inline struct freezer *task_freezer(struct task_struct *task)
struct freezer, css);
}
static struct freezer *parent_freezer(struct freezer *freezer)
{
struct cgroup *pcg = freezer->css.cgroup->parent;
if (pcg)
return cgroup_freezer(pcg);
return NULL;
}
bool cgroup_freezing(struct task_struct *task)
{
bool ret;
@@ -74,17 +90,6 @@ static const char *freezer_state_strs(unsigned int state)
return "THAWED";
};
/*
* State diagram
* Transitions are caused by userspace writes to the freezer.state file.
* The values in parenthesis are state labels. The rest are edge labels.
*
* (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
* ^ ^ | |
* | \_______THAWED_______/ |
* \__________________________THAWED____________/
*/
struct cgroup_subsys freezer_subsys;
static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
@@ -103,15 +108,34 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
* freezer_post_create - commit creation of a freezer cgroup
* @cgroup: cgroup being created
*
* We're committing to creation of @cgroup. Mark it online.
* We're committing to creation of @cgroup. Mark it online and inherit
* parent's freezing state while holding both parent's and our
* freezer->lock.
*/
static void freezer_post_create(struct cgroup *cgroup)
{
struct freezer *freezer = cgroup_freezer(cgroup);
struct freezer *parent = parent_freezer(freezer);
/*
* The following double locking and freezing state inheritance
* guarantee that @cgroup can never escape ancestors' freezing
* states. See cgroup_for_each_descendant_pre() for details.
*/
if (parent)
spin_lock_irq(&parent->lock);
spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
spin_lock_irq(&freezer->lock);
freezer->state |= CGROUP_FREEZER_ONLINE;
spin_unlock_irq(&freezer->lock);
if (parent && (parent->state & CGROUP_FREEZING)) {
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
atomic_inc(&system_freezing_cnt);
}
spin_unlock(&freezer->lock);
if (parent)
spin_unlock_irq(&parent->lock);
}
/**
@@ -153,6 +177,7 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
{
struct freezer *freezer = cgroup_freezer(new_cgrp);
struct task_struct *task;
bool clear_frozen = false;
spin_lock_irq(&freezer->lock);
@@ -172,10 +197,25 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
} else {
freeze_task(task);
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = true;
}
}
spin_unlock_irq(&freezer->lock);
/*
* Propagate FROZEN clearing upwards. We may race with
* update_if_frozen(), but as long as both work bottom-up, either
* update_if_frozen() sees child's FROZEN cleared or we clear the
* parent's FROZEN later. No parent w/ !FROZEN children can be
* left FROZEN.
*/
while (clear_frozen && (freezer = parent_freezer(freezer))) {
spin_lock_irq(&freezer->lock);
freezer->state &= ~CGROUP_FROZEN;
clear_frozen = freezer->state & CGROUP_FREEZING;
spin_unlock_irq(&freezer->lock);
}
}
static void freezer_fork(struct task_struct *task)
@@ -200,24 +240,47 @@ out:
rcu_read_unlock();
}
/*
* We change from FREEZING to FROZEN lazily if the cgroup was only
* partially frozen when we exitted write. Caller must hold freezer->lock.
/**
* update_if_frozen - update whether a cgroup finished freezing
* @cgroup: cgroup of interest
*
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
* calling this function. If the current state is FREEZING but not FROZEN,
* this function checks whether all tasks of this cgroup and the descendant
* cgroups finished freezing and, if so, sets FROZEN.
*
* The caller is responsible for grabbing RCU read lock and calling
* update_if_frozen() on all descendants prior to invoking this function.
*
* Task states and freezer state might disagree while tasks are being
* migrated into or out of @cgroup, so we can't verify task states against
* @freezer state here. See freezer_attach() for details.
*/
static void update_if_frozen(struct freezer *freezer)
static void update_if_frozen(struct cgroup *cgroup)
{
struct cgroup *cgroup = freezer->css.cgroup;
struct freezer *freezer = cgroup_freezer(cgroup);
struct cgroup *pos;
struct cgroup_iter it;
struct task_struct *task;
WARN_ON_ONCE(!rcu_read_lock_held());
spin_lock_irq(&freezer->lock);
if (!(freezer->state & CGROUP_FREEZING) ||
(freezer->state & CGROUP_FROZEN))
return;
goto out_unlock;
/* are all (live) children frozen? */
cgroup_for_each_child(pos, cgroup) {
struct freezer *child = cgroup_freezer(pos);
if ((child->state & CGROUP_FREEZER_ONLINE) &&
!(child->state & CGROUP_FROZEN))
goto out_unlock;
}
/* are all tasks frozen? */
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
@@ -229,27 +292,32 @@ static void update_if_frozen(struct freezer *freezer)
* the usual frozen condition.
*/
if (!frozen(task) && !freezer_should_skip(task))
goto notyet;
goto out_iter_end;
}
}
freezer->state |= CGROUP_FROZEN;
notyet:
out_iter_end:
cgroup_iter_end(cgroup, &it);
out_unlock:
spin_unlock_irq(&freezer->lock);
}
static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
struct seq_file *m)
{
struct freezer *freezer = cgroup_freezer(cgroup);
unsigned int state;
struct cgroup *pos;
spin_lock_irq(&freezer->lock);
update_if_frozen(freezer);
state = freezer->state;
spin_unlock_irq(&freezer->lock);
rcu_read_lock();
seq_puts(m, freezer_state_strs(state));
/* update states bottom-up */
cgroup_for_each_descendant_post(pos, cgroup)
update_if_frozen(pos);
update_if_frozen(cgroup);
rcu_read_unlock();
seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state));
seq_putc(m, '\n');
return 0;
}
@@ -320,14 +388,39 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
* @freezer: freezer of interest
* @freeze: whether to freeze or thaw
*
* Freeze or thaw @cgroup according to @freeze.
* Freeze or thaw @freezer according to @freeze. The operations are
* recursive - all descendants of @freezer will be affected.
*/
static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup *pos;
/* update @freezer */
spin_lock_irq(&freezer->lock);
freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
spin_unlock_irq(&freezer->lock);
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
* CGROUP_FREEZING_PARENT.
*/
rcu_read_lock();
cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
struct freezer *pos_f = cgroup_freezer(pos);
struct freezer *parent = parent_freezer(pos_f);
/*
* Our update to @parent->state is already visible which is
* all we need. No need to lock @parent. For more info on
* synchronization, see freezer_post_create().
*/
spin_lock_irq(&pos_f->lock);
freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
CGROUP_FREEZING_PARENT);
spin_unlock_irq(&pos_f->lock);
}
rcu_read_unlock();
}
static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
@@ -390,12 +483,4 @@ struct cgroup_subsys freezer_subsys = {
.attach = freezer_attach,
.fork = freezer_fork,
.base_cftypes = files,
/*
* freezer subsys doesn't handle hierarchy at all. Frozen state
* should be inherited through the hierarchy - if a parent is
* frozen, all its children should be frozen. Fix it and remove
* the following.
*/
.broken_hierarchy = true,
};