net: sched: make bstats per cpu and estimator RCU safe

In order to run qdisc's without locking statistics and estimators
need to be handled correctly.

To resolve bstats make the statistics per cpu. And because this is
only needed for qdiscs that are running without locks which is not
the case for most qdiscs in the near future only create percpu
stats when qdiscs set the TCQ_F_CPUSTATS flag.

Next because estimators use the bstats to calculate packets per
second and bytes per second the estimator code paths are updated
to use the per cpu statistics.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
John Fastabend
2014-09-28 11:52:56 -07:00
committed by David S. Miller
parent 79cf79abce
commit 22e0f8b932
19 changed files with 164 additions and 51 deletions

View File

@@ -91,6 +91,8 @@ struct gen_estimator
u32 avpps;
struct rcu_head e_rcu;
struct rb_node node;
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
struct rcu_head head;
};
struct gen_estimator_head
@@ -115,9 +117,8 @@ static void est_timer(unsigned long arg)
rcu_read_lock();
list_for_each_entry_rcu(e, &elist[idx].list, list) {
u64 nbytes;
struct gnet_stats_basic_packed b = {0};
u64 brate;
u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
@@ -125,15 +126,15 @@ static void est_timer(unsigned long arg)
if (e->bstats == NULL)
goto skip;
nbytes = e->bstats->bytes;
npackets = e->bstats->packets;
brate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
__gnet_stats_copy_basic(&b, e->cpu_bstats, e->bstats);
brate = (b.bytes - e->last_bytes)<<(7 - idx);
e->last_bytes = b.bytes;
e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
e->rate_est->bps = (e->avbps+0xF)>>5;
rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
rate = (b.packets - e->last_packets)<<(12 - idx);
e->last_packets = b.packets;
e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
e->rate_est->pps = (e->avpps+0x1FF)>>10;
skip:
@@ -203,12 +204,14 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
*
*/
int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock,
struct nlattr *opt)
{
struct gen_estimator *est;
struct gnet_estimator *parm = nla_data(opt);
struct gnet_stats_basic_packed b = {0};
int idx;
if (nla_len(opt) < sizeof(*parm))
@@ -221,15 +224,18 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (est == NULL)
return -ENOBUFS;
__gnet_stats_copy_basic(&b, cpu_bstats, bstats);
idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
est->last_bytes = bstats->bytes;
est->last_bytes = b.bytes;
est->avbps = rate_est->bps<<5;
est->last_packets = bstats->packets;
est->last_packets = b.packets;
est->avpps = rate_est->pps<<10;
est->cpu_bstats = cpu_bstats;
spin_lock_bh(&est_tree_lock);
if (!elist[idx].timer.function) {
@@ -290,11 +296,12 @@ EXPORT_SYMBOL(gen_kill_estimator);
* Returns 0 on success or a negative error code.
*/
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
struct gnet_stats_rate_est64 *rate_est,
spinlock_t *stats_lock, struct nlattr *opt)
{
gen_kill_estimator(bstats, rate_est);
return gen_new_estimator(bstats, rate_est, stats_lock, opt);
return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, opt);
}
EXPORT_SYMBOL(gen_replace_estimator);

View File

@@ -97,6 +97,43 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
}
EXPORT_SYMBOL(gnet_stats_start_copy);
static void
__gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu)
{
int i;
for_each_possible_cpu(i) {
struct gnet_stats_basic_cpu *bcpu = per_cpu_ptr(cpu, i);
unsigned int start;
__u64 bytes;
__u32 packets;
do {
start = u64_stats_fetch_begin_irq(&bcpu->syncp);
bytes = bcpu->bstats.bytes;
packets = bcpu->bstats.packets;
} while (u64_stats_fetch_retry_irq(&bcpu->syncp, start));
bstats->bytes += bcpu->bstats.bytes;
bstats->packets += bcpu->bstats.packets;
}
}
void
__gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
if (cpu) {
__gnet_stats_copy_basic_cpu(bstats, cpu);
} else {
bstats->bytes = b->bytes;
bstats->packets = b->packets;
}
}
EXPORT_SYMBOL(__gnet_stats_copy_basic);
/**
* gnet_stats_copy_basic - copy basic statistics into statistic TLV
* @d: dumping handle
@@ -109,19 +146,25 @@ EXPORT_SYMBOL(gnet_stats_start_copy);
* if the room in the socket buffer was not sufficient.
*/
int
gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
gnet_stats_copy_basic(struct gnet_dump *d,
struct gnet_stats_basic_cpu __percpu *cpu,
struct gnet_stats_basic_packed *b)
{
struct gnet_stats_basic_packed bstats = {0};
__gnet_stats_copy_basic(&bstats, cpu, b);
if (d->compat_tc_stats) {
d->tc_stats.bytes = b->bytes;
d->tc_stats.packets = b->packets;
d->tc_stats.bytes = bstats.bytes;
d->tc_stats.packets = bstats.packets;
}
if (d->tail) {
struct gnet_stats_basic sb;
memset(&sb, 0, sizeof(sb));
sb.bytes = b->bytes;
sb.packets = b->packets;
sb.bytes = bstats.bytes;
sb.packets = bstats.packets;
return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
}
return 0;