bpf: add lookup/update support for per-cpu hash and array maps

The functions bpf_map_lookup_elem(map, key, value) and
bpf_map_update_elem(map, key, value, flags) need to get/set
values from all-cpus for per-cpu hash and array maps,
so that user space can aggregate/update them as necessary.

Example of single counter aggregation in user space:
  unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
  long values[nr_cpus];
  long value = 0;

  bpf_lookup_elem(fd, key, values);
  for (i = 0; i < nr_cpus; i++)
    value += values[i];

The user space must provide round_up(value_size, 8) * nr_cpus
array to get/set values, since kernel will use 'long' copy
of per-cpu values to try to copy good counters atomically.
It's a best-effort, since bpf programs and user space are racing
to access the same memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Alexei Starovoitov
2016-02-01 22:39:55 -08:00
committed by David S. Miller
parent a10423b87a
commit 15a07b3381
4 changed files with 201 additions and 26 deletions

View File

@@ -290,7 +290,7 @@ static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size)
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu)
bool percpu, bool onallcpus)
{
u32 size = htab->map.value_size;
struct htab_elem *l_new;
@@ -312,8 +312,18 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
return NULL;
}
/* copy true value_size bytes */
memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
if (!onallcpus) {
/* copy true value_size bytes */
memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
} else {
int off = 0, cpu;
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
value + off, size);
off += size;
}
}
htab_elem_set_ptr(l_new, key_size, pptr);
} else {
memcpy(l_new->key + round_up(key_size, 8), value, size);
@@ -368,7 +378,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
/* allocate new element outside of the lock, since
* we're most likley going to insert it
*/
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false);
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
if (!l_new)
return -ENOMEM;
@@ -402,8 +412,9 @@ err:
return ret;
}
static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags,
bool onallcpus)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct htab_elem *l_new = NULL, *l_old;
@@ -436,12 +447,25 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
goto err;
if (l_old) {
void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
u32 size = htab->map.value_size;
/* per-cpu hash map can update value in-place */
memcpy(this_cpu_ptr(htab_elem_get_ptr(l_old, key_size)),
value, htab->map.value_size);
if (!onallcpus) {
memcpy(this_cpu_ptr(pptr), value, size);
} else {
int off = 0, cpu;
size = round_up(size, 8);
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
value + off, size);
off += size;
}
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
hash, true);
hash, true, onallcpus);
if (!l_new) {
ret = -ENOMEM;
goto err;
@@ -455,6 +479,12 @@ err:
return ret;
}
static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
{
return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
}
/* Called from syscall or from eBPF program */
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
@@ -557,6 +587,41 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
return NULL;
}
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
{
struct htab_elem *l;
void __percpu *pptr;
int ret = -ENOENT;
int cpu, off = 0;
u32 size;
/* per_cpu areas are zero-filled and bpf programs can only
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
l = __htab_map_lookup_elem(map, key);
if (!l)
goto out;
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off,
per_cpu_ptr(pptr, cpu), size);
off += size;
}
ret = 0;
out:
rcu_read_unlock();
return ret;
}
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
return __htab_percpu_map_update_elem(map, key, value, map_flags, true);
}
static const struct bpf_map_ops htab_percpu_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,