Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller:
 "Highlights:

   1) Support AES128-CCM ciphers in kTLS, from Vakul Garg.

   2) Add fib_sync_mem to control the amount of dirty memory we allow to
      queue up between synchronize RCU calls, from David Ahern.

   3) Make flow classifier more lockless, from Vlad Buslov.

   4) Add PHY downshift support to aquantia driver, from Heiner
      Kallweit.

   5) Add SKB cache for TCP rx and tx, from Eric Dumazet. This reduces
      contention on SLAB spinlocks in heavy RPC workloads.

   6) Partial GSO offload support in XFRM, from Boris Pismenny.

   7) Add fast link down support to ethtool, from Heiner Kallweit.

   8) Use siphash for IP ID generator, from Eric Dumazet.

   9) Pull nexthops even further out from ipv4/ipv6 routes and FIB
      entries, from David Ahern.

  10) Move skb->xmit_more into a per-cpu variable, from Florian
      Westphal.

  11) Improve eBPF verifier speed and increase maximum program size,
      from Alexei Starovoitov.

  12) Eliminate per-bucket spinlocks in rhashtable, and instead use bit
      spinlocks. From Neil Brown.

  13) Allow tunneling with GUE encap in ipvs, from Jacky Hu.

  14) Improve link partner cap detection in generic PHY code, from
      Heiner Kallweit.

  15) Add layer 2 encap support to bpf_skb_adjust_room(), from Alan
      Maguire.

  16) Remove SKB list implementation assumptions in SCTP, your's truly.

  17) Various cleanups, optimizations, and simplifications in r8169
      driver. From Heiner Kallweit.

  18) Add memory accounting on TX and RX path of SCTP, from Xin Long.

  19) Switch PHY drivers over to use dynamic featue detection, from
      Heiner Kallweit.

  20) Support flow steering without masking in dpaa2-eth, from Ioana
      Ciocoi.

  21) Implement ndo_get_devlink_port in netdevsim driver, from Jiri
      Pirko.

  22) Increase the strict parsing of current and future netlink
      attributes, also export such policies to userspace. From Johannes
      Berg.

  23) Allow DSA tag drivers to be modular, from Andrew Lunn.

  24) Remove legacy DSA probing support, also from Andrew Lunn.

  25) Allow ll_temac driver to be used on non-x86 platforms, from Esben
      Haabendal.

  26) Add a generic tracepoint for TX queue timeouts to ease debugging,
      from Cong Wang.

  27) More indirect call optimizations, from Paolo Abeni"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1763 commits)
  cxgb4: Fix error path in cxgb4_init_module
  net: phy: improve pause mode reporting in phy_print_status
  dt-bindings: net: Fix a typo in the phy-mode list for ethernet bindings
  net: macb: Change interrupt and napi enable order in open
  net: ll_temac: Improve error message on error IRQ
  net/sched: remove block pointer from common offload structure
  net: ethernet: support of_get_mac_address new ERR_PTR error
  net: usb: smsc: fix warning reported by kbuild test robot
  staging: octeon-ethernet: Fix of_get_mac_address ERR_PTR check
  net: dsa: support of_get_mac_address new ERR_PTR error
  net: dsa: sja1105: Fix status initialization in sja1105_get_ethtool_stats
  vrf: sit mtu should not be updated when vrf netdev is the link
  net: dsa: Fix error cleanup path in dsa_init_module
  l2tp: Fix possible NULL pointer dereference
  taprio: add null check on sched_nest to avoid potential null pointer dereference
  net: mvpp2: cls: fix less than zero check on a u32 variable
  net_sched: sch_fq: handle non connected flows
  net_sched: sch_fq: do not assume EDT packets are ordered
  net: hns3: use devm_kcalloc when allocating desc_cb
  net: hns3: some cleanup for struct hns3_enet_ring
  ...
This commit is contained in:
Linus Torvalds
2019-05-07 22:03:58 -07:00
1636 changed files with 126938 additions and 27105 deletions

View File

@@ -22,7 +22,7 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -63,6 +63,7 @@ int array_map_alloc_check(union bpf_attr *attr)
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
!bpf_map_flags_access_ok(attr->map_flags) ||
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
@@ -160,6 +161,36 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
return array->value + array->elem_size * (index & array->index_mask);
}
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
u32 off)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
if (map->max_entries != 1)
return -ENOTSUPP;
if (off >= map->value_size)
return -EINVAL;
*imm = (unsigned long)array->value;
return 0;
}
static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
u32 *off)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u64 base = (unsigned long)array->value;
u64 range = array->elem_size;
if (map->max_entries != 1)
return -ENOTSUPP;
if (imm < base || imm >= base + range)
return -ENOENT;
*off = imm - base;
return 0;
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
@@ -360,7 +391,8 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
return;
}
seq_printf(m, "%u: ", *(u32 *)key);
if (map->btf_key_type_id)
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
@@ -397,6 +429,18 @@ static int array_map_check_btf(const struct bpf_map *map,
{
u32 int_data;
/* One exception for keyless BTF: .bss/.data/.rodata map */
if (btf_type_is_void(key_type)) {
if (map->map_type != BPF_MAP_TYPE_ARRAY ||
map->max_entries != 1)
return -EINVAL;
if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
return -EINVAL;
return 0;
}
if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
return -EINVAL;
@@ -419,6 +463,8 @@ const struct bpf_map_ops array_map_ops = {
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
.map_direct_value_addr = array_map_direct_value_addr,
.map_direct_value_meta = array_map_direct_value_meta,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
@@ -440,6 +486,9 @@ static int fd_array_map_alloc_check(union bpf_attr *attr)
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return -EINVAL;
/* Program read-only/write-only not supported for special maps yet. */
if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
return -EINVAL;
return array_map_alloc_check(attr);
}

View File

@@ -185,6 +185,16 @@
i < btf_type_vlen(struct_type); \
i++, member++)
#define for_each_vsi(i, struct_type, member) \
for (i = 0, member = btf_type_var_secinfo(struct_type); \
i < btf_type_vlen(struct_type); \
i++, member++)
#define for_each_vsi_from(i, from, struct_type, member) \
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
i < btf_type_vlen(struct_type); \
i++, member++)
static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
@@ -262,6 +272,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_RESTRICT] = "RESTRICT",
[BTF_KIND_FUNC] = "FUNC",
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
};
struct btf_kind_operations {
@@ -314,7 +326,7 @@ static bool btf_type_is_modifier(const struct btf_type *t)
return false;
}
static bool btf_type_is_void(const struct btf_type *t)
bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void;
}
@@ -375,13 +387,36 @@ static bool btf_type_is_int(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_INT;
}
static bool btf_type_is_var(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
}
static bool btf_type_is_datasec(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
}
/* Types that act only as a source, not sink or intermediate
* type when resolving.
*/
static bool btf_type_is_resolve_source_only(const struct btf_type *t)
{
return btf_type_is_var(t) ||
btf_type_is_datasec(t);
}
/* What types need to be resolved?
*
* btf_type_is_modifier() is an obvious one.
*
* btf_type_is_struct() because its member refers to
* another type (through member->type).
*
* btf_type_is_var() because the variable refers to
* another type. btf_type_is_datasec() holds multiple
* btf_type_is_var() types that need resolving.
*
* btf_type_is_array() because its element (array->type)
* refers to another type. Array can be thought of a
* special case of struct while array just has the same
@@ -390,9 +425,11 @@ static bool btf_type_is_int(const struct btf_type *t)
static bool btf_type_needs_resolve(const struct btf_type *t)
{
return btf_type_is_modifier(t) ||
btf_type_is_ptr(t) ||
btf_type_is_struct(t) ||
btf_type_is_array(t);
btf_type_is_ptr(t) ||
btf_type_is_struct(t) ||
btf_type_is_array(t) ||
btf_type_is_var(t) ||
btf_type_is_datasec(t);
}
/* t->size can be used */
@@ -403,6 +440,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
return true;
}
@@ -467,6 +505,16 @@ static const struct btf_enum *btf_type_enum(const struct btf_type *t)
return (const struct btf_enum *)(t + 1);
}
static const struct btf_var *btf_type_var(const struct btf_type *t)
{
return (const struct btf_var *)(t + 1);
}
static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
{
return (const struct btf_var_secinfo *)(t + 1);
}
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -478,23 +526,31 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
offset < btf->hdr.str_len;
}
/* Only C-style identifier is permitted. This can be relaxed if
* necessary.
*/
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
{
if ((first ? !isalpha(c) :
!isalnum(c)) &&
c != '_' &&
((c == '.' && !dot_ok) ||
c != '.'))
return false;
return true;
}
static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
{
/* offset must be valid */
const char *src = &btf->strings[offset];
const char *src_limit;
if (!isalpha(*src) && *src != '_')
if (!__btf_name_char_ok(*src, true, dot_ok))
return false;
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN;
src++;
while (*src && src < src_limit) {
if (!isalnum(*src) && *src != '_')
if (!__btf_name_char_ok(*src, false, dot_ok))
return false;
src++;
}
@@ -502,6 +558,19 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
return !*src;
}
/* Only C-style identifier is permitted. This can be relaxed if
* necessary.
*/
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
return __btf_name_valid(btf, offset, false);
}
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
return __btf_name_valid(btf, offset, true);
}
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!offset)
@@ -697,6 +766,32 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
__printf(4, 5)
static void btf_verifier_log_vsi(struct btf_verifier_env *env,
const struct btf_type *datasec_type,
const struct btf_var_secinfo *vsi,
const char *fmt, ...)
{
struct bpf_verifier_log *log = &env->log;
va_list args;
if (!bpf_verifier_log_needed(log))
return;
if (env->phase != CHECK_META)
btf_verifier_log_type(env, datasec_type, NULL);
__btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
vsi->type, vsi->offset, vsi->size);
if (fmt && *fmt) {
__btf_verifier_log(log, " ");
va_start(args, fmt);
bpf_verifier_vlog(log, fmt, args);
va_end(args);
}
__btf_verifier_log(log, "\n");
}
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{
@@ -974,7 +1069,8 @@ const struct btf_type *btf_type_id_size(const struct btf *btf,
} else if (btf_type_is_ptr(size_type)) {
size = sizeof(void *);
} else {
if (WARN_ON_ONCE(!btf_type_is_modifier(size_type)))
if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
!btf_type_is_var(size_type)))
return NULL;
size = btf->resolved_sizes[size_type_id];
@@ -1509,7 +1605,7 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
u32 next_type_size = 0;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type) {
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1542,6 +1638,53 @@ static int btf_modifier_resolve(struct btf_verifier_env *env,
return 0;
}
static int btf_var_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
const struct btf_type *next_type;
const struct btf_type *t = v->t;
u32 next_type_id = t->type;
struct btf *btf = env->btf;
u32 next_type_size;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
if (!env_type_is_resolve_sink(env, next_type) &&
!env_type_is_resolved(env, next_type_id))
return env_stack_push(env, next_type, next_type_id);
if (btf_type_is_modifier(next_type)) {
const struct btf_type *resolved_type;
u32 resolved_type_id;
resolved_type_id = next_type_id;
resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
if (btf_type_is_ptr(resolved_type) &&
!env_type_is_resolve_sink(env, resolved_type) &&
!env_type_is_resolved(env, resolved_type_id))
return env_stack_push(env, resolved_type,
resolved_type_id);
}
/* We must resolve to something concrete at this point, no
* forward types or similar that would resolve to size of
* zero is allowed.
*/
if (!btf_type_id_size(btf, &next_type_id, &next_type_size)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
env_stack_pop_resolved(env, next_type_id, next_type_size);
return 0;
}
static int btf_ptr_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
@@ -1551,7 +1694,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
struct btf *btf = env->btf;
next_type = btf_type_by_id(btf, next_type_id);
if (!next_type) {
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1609,6 +1752,15 @@ static void btf_modifier_seq_show(const struct btf *btf,
btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
}
static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct seq_file *m)
{
t = btf_type_id_resolve(btf, &type_id);
btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m);
}
static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct seq_file *m)
@@ -1776,7 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
if (btf_type_nosize_or_null(index_type)) {
if (btf_type_is_resolve_source_only(index_type) ||
btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
@@ -1795,7 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
if (btf_type_nosize_or_null(elem_type)) {
if (btf_type_is_resolve_source_only(elem_type) ||
btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
"Invalid elem");
return -EINVAL;
@@ -2016,7 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env *env,
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
if (btf_type_nosize_or_null(member_type)) {
if (btf_type_is_resolve_source_only(member_type) ||
btf_type_nosize_or_null(member_type)) {
btf_verifier_log_member(env, v->t, member,
"Invalid member");
return -EINVAL;
@@ -2411,6 +2566,222 @@ static struct btf_kind_operations func_ops = {
.seq_show = btf_df_seq_show,
};
static s32 btf_var_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
{
const struct btf_var *var;
u32 meta_needed = sizeof(*var);
if (meta_left < meta_needed) {
btf_verifier_log_basic(env, t,
"meta_left:%u meta_needed:%u",
meta_left, meta_needed);
return -EINVAL;
}
if (btf_type_vlen(t)) {
btf_verifier_log_type(env, t, "vlen != 0");
return -EINVAL;
}
if (btf_type_kflag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
if (!t->name_off ||
!__btf_name_valid(env->btf, t->name_off, true)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
/* A var cannot be in type void */
if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
btf_verifier_log_type(env, t, "Invalid type_id");
return -EINVAL;
}
var = btf_type_var(t);
if (var->linkage != BTF_VAR_STATIC &&
var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
btf_verifier_log_type(env, t, "Linkage not supported");
return -EINVAL;
}
btf_verifier_log_type(env, t, NULL);
return meta_needed;
}
static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
{
const struct btf_var *var = btf_type_var(t);
btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
}
static const struct btf_kind_operations var_ops = {
.check_meta = btf_var_check_meta,
.resolve = btf_var_resolve,
.check_member = btf_df_check_member,
.check_kflag_member = btf_df_check_kflag_member,
.log_details = btf_var_log,
.seq_show = btf_var_seq_show,
};
static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
{
const struct btf_var_secinfo *vsi;
u64 last_vsi_end_off = 0, sum = 0;
u32 i, meta_needed;
meta_needed = btf_type_vlen(t) * sizeof(*vsi);
if (meta_left < meta_needed) {
btf_verifier_log_basic(env, t,
"meta_left:%u meta_needed:%u",
meta_left, meta_needed);
return -EINVAL;
}
if (!btf_type_vlen(t)) {
btf_verifier_log_type(env, t, "vlen == 0");
return -EINVAL;
}
if (!t->size) {
btf_verifier_log_type(env, t, "size == 0");
return -EINVAL;
}
if (btf_type_kflag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
if (!t->name_off ||
!btf_name_valid_section(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
btf_verifier_log_type(env, t, NULL);
for_each_vsi(i, t, vsi) {
/* A var cannot be in type void */
if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
btf_verifier_log_vsi(env, t, vsi,
"Invalid type_id");
return -EINVAL;
}
if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
btf_verifier_log_vsi(env, t, vsi,
"Invalid offset");
return -EINVAL;
}
if (!vsi->size || vsi->size > t->size) {
btf_verifier_log_vsi(env, t, vsi,
"Invalid size");
return -EINVAL;
}
last_vsi_end_off = vsi->offset + vsi->size;
if (last_vsi_end_off > t->size) {
btf_verifier_log_vsi(env, t, vsi,
"Invalid offset+size");
return -EINVAL;
}
btf_verifier_log_vsi(env, t, vsi, NULL);
sum += vsi->size;
}
if (t->size < sum) {
btf_verifier_log_type(env, t, "Invalid btf_info size");
return -EINVAL;
}
return meta_needed;
}
static int btf_datasec_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
const struct btf_var_secinfo *vsi;
struct btf *btf = env->btf;
u16 i;
for_each_vsi_from(i, v->next_member, v->t, vsi) {
u32 var_type_id = vsi->type, type_id, type_size = 0;
const struct btf_type *var_type = btf_type_by_id(env->btf,
var_type_id);
if (!var_type || !btf_type_is_var(var_type)) {
btf_verifier_log_vsi(env, v->t, vsi,
"Not a VAR kind member");
return -EINVAL;
}
if (!env_type_is_resolve_sink(env, var_type) &&
!env_type_is_resolved(env, var_type_id)) {
env_stack_set_next_member(env, i + 1);
return env_stack_push(env, var_type, var_type_id);
}
type_id = var_type->type;
if (!btf_type_id_size(btf, &type_id, &type_size)) {
btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
return -EINVAL;
}
if (vsi->size < type_size) {
btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
return -EINVAL;
}
}
env_stack_pop_resolved(env, 0, 0);
return 0;
}
static void btf_datasec_log(struct btf_verifier_env *env,
const struct btf_type *t)
{
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
}
static void btf_datasec_seq_show(const struct btf *btf,
const struct btf_type *t, u32 type_id,
void *data, u8 bits_offset,
struct seq_file *m)
{
const struct btf_var_secinfo *vsi;
const struct btf_type *var;
u32 i;
seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off));
for_each_vsi(i, t, vsi) {
var = btf_type_by_id(btf, vsi->type);
if (i)
seq_puts(m, ",");
btf_type_ops(var)->seq_show(btf, var, vsi->type,
data + vsi->offset, bits_offset, m);
}
seq_puts(m, "}");
}
static const struct btf_kind_operations datasec_ops = {
.check_meta = btf_datasec_check_meta,
.resolve = btf_datasec_resolve,
.check_member = btf_df_check_member,
.check_kflag_member = btf_df_check_kflag_member,
.log_details = btf_datasec_log,
.seq_show = btf_datasec_seq_show,
};
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
@@ -2542,6 +2913,8 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_RESTRICT] = &modifier_ops,
[BTF_KIND_FUNC] = &func_ops,
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
[BTF_KIND_VAR] = &var_ops,
[BTF_KIND_DATASEC] = &datasec_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -2622,13 +2995,17 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
if (!env_type_is_resolved(env, type_id))
return false;
if (btf_type_is_struct(t))
if (btf_type_is_struct(t) || btf_type_is_datasec(t))
return !btf->resolved_ids[type_id] &&
!btf->resolved_sizes[type_id];
!btf->resolved_sizes[type_id];
if (btf_type_is_modifier(t) || btf_type_is_ptr(t)) {
if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
btf_type_is_var(t)) {
t = btf_type_id_resolve(btf, &type_id);
return t && !btf_type_is_modifier(t);
return t &&
!btf_type_is_modifier(t) &&
!btf_type_is_var(t) &&
!btf_type_is_datasec(t);
}
if (btf_type_is_array(t)) {

View File

@@ -11,7 +11,10 @@
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/cgroup.h>
#include <linux/filter.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
@@ -701,7 +704,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -710,6 +713,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
case BPF_FUNC_map_push_elem:
return &bpf_map_push_elem_proto;
case BPF_FUNC_map_pop_elem:
return &bpf_map_pop_elem_proto;
case BPF_FUNC_map_peek_elem:
return &bpf_map_peek_elem_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_local_storage:
@@ -725,6 +734,12 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
static const struct bpf_func_proto *
cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return cgroup_base_func_proto(func_id, prog);
}
static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -762,3 +777,356 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
.get_func_proto = cgroup_dev_func_proto,
.is_valid_access = cgroup_dev_is_valid_access,
};
/**
* __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
*
* @head: sysctl table header
* @table: sysctl table
* @write: sysctl is being read (= 0) or written (= 1)
* @buf: pointer to buffer passed by user space
* @pcount: value-result argument: value is size of buffer pointed to by @buf,
* result is size of @new_buf if program set new value, initial value
* otherwise
* @ppos: value-result argument: value is position at which read from or write
* to sysctl is happening, result is new position if program overrode it,
* initial value otherwise
* @new_buf: pointer to pointer to new buffer that will be allocated if program
* overrides new value provided by user space on sysctl write
* NOTE: it's caller responsibility to free *new_buf if it was set
* @type: type of program to be executed
*
* Program is run when sysctl is being accessed, either read or written, and
* can allow or deny such access.
*
* This function will return %-EPERM if an attached program is found and
* returned value != 1 during execution. In all other cases 0 is returned.
*/
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
void __user *buf, size_t *pcount,
loff_t *ppos, void **new_buf,
enum bpf_attach_type type)
{
struct bpf_sysctl_kern ctx = {
.head = head,
.table = table,
.write = write,
.ppos = ppos,
.cur_val = NULL,
.cur_len = PAGE_SIZE,
.new_val = NULL,
.new_len = 0,
.new_updated = 0,
};
struct cgroup *cgrp;
int ret;
ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
if (ctx.cur_val) {
mm_segment_t old_fs;
loff_t pos = 0;
old_fs = get_fs();
set_fs(KERNEL_DS);
if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
&ctx.cur_len, &pos)) {
/* Let BPF program decide how to proceed. */
ctx.cur_len = 0;
}
set_fs(old_fs);
} else {
/* Let BPF program decide how to proceed. */
ctx.cur_len = 0;
}
if (write && buf && *pcount) {
/* BPF program should be able to override new value with a
* buffer bigger than provided by user.
*/
ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
if (!ctx.new_val ||
copy_from_user(ctx.new_val, buf, ctx.new_len))
/* Let BPF program decide how to proceed. */
ctx.new_len = 0;
}
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
rcu_read_unlock();
kfree(ctx.cur_val);
if (ret == 1 && ctx.new_updated) {
*new_buf = ctx.new_val;
*pcount = ctx.new_len;
} else {
kfree(ctx.new_val);
}
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
size_t *lenp)
{
ssize_t tmp_ret = 0, ret;
if (dir->header.parent) {
tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
if (tmp_ret < 0)
return tmp_ret;
}
ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
if (ret < 0)
return ret;
*bufp += ret;
*lenp -= ret;
ret += tmp_ret;
/* Avoid leading slash. */
if (!ret)
return ret;
tmp_ret = strscpy(*bufp, "/", *lenp);
if (tmp_ret < 0)
return tmp_ret;
*bufp += tmp_ret;
*lenp -= tmp_ret;
return ret + tmp_ret;
}
BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
size_t, buf_len, u64, flags)
{
ssize_t tmp_ret = 0, ret;
if (!buf)
return -EINVAL;
if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
if (!ctx->head)
return -EINVAL;
tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
if (tmp_ret < 0)
return tmp_ret;
}
ret = strscpy(buf, ctx->table->procname, buf_len);
return ret < 0 ? ret : tmp_ret + ret;
}
static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
.func = bpf_sysctl_get_name,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
.arg4_type = ARG_ANYTHING,
};
static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
size_t src_len)
{
if (!dst)
return -EINVAL;
if (!dst_len)
return -E2BIG;
if (!src || !src_len) {
memset(dst, 0, dst_len);
return -EINVAL;
}
memcpy(dst, src, min(dst_len, src_len));
if (dst_len > src_len) {
memset(dst + src_len, '\0', dst_len - src_len);
return src_len;
}
dst[dst_len - 1] = '\0';
return -E2BIG;
}
BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
char *, buf, size_t, buf_len)
{
return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
}
static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
.func = bpf_sysctl_get_current_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
size_t, buf_len)
{
if (!ctx->write) {
if (buf && buf_len)
memset(buf, '\0', buf_len);
return -EINVAL;
}
return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
}
static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
.func = bpf_sysctl_get_new_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
const char *, buf, size_t, buf_len)
{
if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
return -EINVAL;
if (buf_len > PAGE_SIZE - 1)
return -E2BIG;
memcpy(ctx->new_val, buf, buf_len);
ctx->new_len = buf_len;
ctx->new_updated = 1;
return 0;
}
static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
.func = bpf_sysctl_set_new_value,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE,
};
static const struct bpf_func_proto *
sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_strtol:
return &bpf_strtol_proto;
case BPF_FUNC_strtoul:
return &bpf_strtoul_proto;
case BPF_FUNC_sysctl_get_name:
return &bpf_sysctl_get_name_proto;
case BPF_FUNC_sysctl_get_current_value:
return &bpf_sysctl_get_current_value_proto;
case BPF_FUNC_sysctl_get_new_value:
return &bpf_sysctl_get_new_value_proto;
case BPF_FUNC_sysctl_set_new_value:
return &bpf_sysctl_set_new_value_proto;
default:
return cgroup_base_func_proto(func_id, prog);
}
}
static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const int size_default = sizeof(__u32);
if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
return false;
switch (off) {
case offsetof(struct bpf_sysctl, write):
if (type != BPF_READ)
return false;
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
case offsetof(struct bpf_sysctl, file_pos):
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
} else {
return size == size_default;
}
default:
return false;
}
}
static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
switch (si->off) {
case offsetof(struct bpf_sysctl, write):
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sysctl_kern, write,
FIELD_SIZEOF(struct bpf_sysctl_kern,
write),
target_size));
break;
case offsetof(struct bpf_sysctl, file_pos):
/* ppos is a pointer so it should be accessed via indirect
* loads and stores. Also for stores additional temporary
* register is used since neither src_reg nor dst_reg can be
* overridden.
*/
if (type == BPF_WRITE) {
int treg = BPF_REG_9;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
if (si->src_reg == treg || si->dst_reg == treg)
--treg;
*insn++ = BPF_STX_MEM(
BPF_DW, si->dst_reg, treg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, ppos));
*insn++ = BPF_STX_MEM(
BPF_SIZEOF(u32), treg, si->src_reg, 0);
*insn++ = BPF_LDX_MEM(
BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
} else {
*insn++ = BPF_LDX_MEM(
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
si->dst_reg, si->src_reg,
offsetof(struct bpf_sysctl_kern, ppos));
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
}
*target_size = sizeof(u32);
break;
}
return insn - insn_buf;
}
const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
.get_func_proto = sysctl_func_proto,
.is_valid_access = sysctl_is_valid_access,
.convert_ctx_access = sysctl_convert_ctx_access,
};
const struct bpf_prog_ops cg_sysctl_prog_ops = {
};

View File

@@ -292,7 +292,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
dst[i] = fp->insnsi[i];
if (!was_ld_map &&
dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
dst[i].src_reg == BPF_PSEUDO_MAP_FD) {
(dst[i].src_reg == BPF_PSEUDO_MAP_FD ||
dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
was_ld_map = true;
dst[i].imm = 0;
} else if (was_ld_map &&
@@ -438,6 +439,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
const u32 cnt_max = S16_MAX;
struct bpf_prog *prog_adj;
int err;
/* Since our patchlet doesn't expand the image, we're done. */
if (insn_delta == 0) {
@@ -453,8 +455,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
* we afterwards may not fail anymore.
*/
if (insn_adj_cnt > cnt_max &&
bpf_adj_branches(prog, off, off + 1, off + len, true))
return NULL;
(err = bpf_adj_branches(prog, off, off + 1, off + len, true)))
return ERR_PTR(err);
/* Several new instructions need to be inserted. Make room
* for them. Likely, there's no need for a new allocation as
@@ -463,7 +465,7 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt),
GFP_USER);
if (!prog_adj)
return NULL;
return ERR_PTR(-ENOMEM);
prog_adj->len = insn_adj_cnt;
@@ -1095,13 +1097,13 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
continue;
tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten);
if (!tmp) {
if (IS_ERR(tmp)) {
/* Patching may have repointed aux->prog during
* realloc from the original one, so we need to
* fix it up here on error.
*/
bpf_jit_prog_release_other(prog, clone);
return ERR_PTR(-ENOMEM);
return tmp;
}
clone = tmp;

View File

@@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work)
}
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
struct xdp_frame *xdpf)
struct xdp_frame *xdpf,
struct sk_buff *skb)
{
unsigned int hard_start_headroom;
unsigned int frame_size;
void *pkt_data_start;
struct sk_buff *skb;
/* Part of headroom was reserved to xdpf */
hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
@@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
pkt_data_start = xdpf->data - hard_start_headroom;
skb = build_skb(pkt_data_start, frame_size);
if (!skb)
skb = build_skb_around(skb, pkt_data_start, frame_size);
if (unlikely(!skb))
return NULL;
skb_reserve(skb, hard_start_headroom);
@@ -240,6 +240,8 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
}
}
#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
@@ -252,8 +254,11 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty.
*/
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
unsigned int processed = 0, drops = 0, sched = 0;
struct xdp_frame *xdpf;
unsigned int drops = 0, sched = 0;
void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
int i, n, m;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
@@ -269,18 +274,38 @@ static int cpu_map_kthread_run(void *data)
sched = cond_resched();
}
/* Process packets in rcpu->queue */
local_bh_disable();
/*
* The bpf_cpu_map_entry is single consumer, with this
* kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue.
*/
while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
struct sk_buff *skb;
n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page = virt_to_page(f);
/* Bring struct page memory area to curr CPU. Read by
* build_skb_around via page_is_pfmemalloc(), and when
* freed written by page_frag_free call.
*/
prefetchw(page);
}
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < n; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
drops = n;
}
local_bh_disable();
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
struct sk_buff *skb = skbs[i];
int ret;
skb = cpu_map_build_skb(rcpu, xdpf);
skb = cpu_map_build_skb(rcpu, xdpf, skb);
if (!skb) {
xdp_return_frame(xdpf);
continue;
@@ -290,13 +315,9 @@ static int cpu_map_kthread_run(void *data)
ret = netif_receive_skb_core(skb);
if (ret == NET_RX_DROP)
drops++;
/* Limit BH-disable period */
if (++processed == 8)
break;
}
/* Feedback loop via tracepoint */
trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
local_bh_enable(); /* resched point, may call do_softirq() */
}

View File

@@ -205,10 +205,11 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
* part of the ldimm64 insn is accessible.
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD ||
insn->src_reg == BPF_PSEUDO_MAP_VALUE;
char tmp[64];
if (map_ptr && !allow_ptr_leaks)
if (is_ptr && !allow_ptr_leaks)
imm = 0;
verbose(cbs->private_data, "(%02x) r%d = %s\n",

View File

@@ -23,7 +23,7 @@
#define HTAB_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED)
BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
struct bucket {
struct hlist_nulls_head head;
@@ -262,8 +262,8 @@ static int htab_map_alloc_check(union bpf_attr *attr)
/* Guard against local DoS, and discourage production use. */
return -EPERM;
if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
/* reserved bits should not be used */
if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK ||
!bpf_map_flags_access_ok(attr->map_flags))
return -EINVAL;
if (!lru && percpu_lru)

View File

@@ -18,6 +18,9 @@
#include <linux/sched.h>
#include <linux/uidgid.h>
#include <linux/filter.h>
#include <linux/ctype.h>
#include "../../lib/kstrtox.h"
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -363,4 +366,132 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
#define BPF_STRTOX_BASE_MASK 0x1F
static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags,
unsigned long long *res, bool *is_negative)
{
unsigned int base = flags & BPF_STRTOX_BASE_MASK;
const char *cur_buf = buf;
size_t cur_len = buf_len;
unsigned int consumed;
size_t val_len;
char str[64];
if (!buf || !buf_len || !res || !is_negative)
return -EINVAL;
if (base != 0 && base != 8 && base != 10 && base != 16)
return -EINVAL;
if (flags & ~BPF_STRTOX_BASE_MASK)
return -EINVAL;
while (cur_buf < buf + buf_len && isspace(*cur_buf))
++cur_buf;
*is_negative = (cur_buf < buf + buf_len && *cur_buf == '-');
if (*is_negative)
++cur_buf;
consumed = cur_buf - buf;
cur_len -= consumed;
if (!cur_len)
return -EINVAL;
cur_len = min(cur_len, sizeof(str) - 1);
memcpy(str, cur_buf, cur_len);
str[cur_len] = '\0';
cur_buf = str;
cur_buf = _parse_integer_fixup_radix(cur_buf, &base);
val_len = _parse_integer(cur_buf, base, res);
if (val_len & KSTRTOX_OVERFLOW)
return -ERANGE;
if (val_len == 0)
return -EINVAL;
cur_buf += val_len;
consumed += cur_buf - str;
return consumed;
}
static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
long long *res)
{
unsigned long long _res;
bool is_negative;
int err;
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
if (err < 0)
return err;
if (is_negative) {
if ((long long)-_res > 0)
return -ERANGE;
*res = -_res;
} else {
if ((long long)_res < 0)
return -ERANGE;
*res = _res;
}
return err;
}
BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
long *, res)
{
long long _res;
int err;
err = __bpf_strtoll(buf, buf_len, flags, &_res);
if (err < 0)
return err;
if (_res != (long)_res)
return -ERANGE;
*res = _res;
return err;
}
const struct bpf_func_proto bpf_strtol_proto = {
.func = bpf_strtol,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
};
BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
unsigned long *, res)
{
unsigned long long _res;
bool is_negative;
int err;
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
if (err < 0)
return err;
if (is_negative)
return -EINVAL;
if (_res != (unsigned long)_res)
return -ERANGE;
*res = _res;
return err;
}
const struct bpf_func_proto bpf_strtoul_proto = {
.func = bpf_strtoul,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_PTR_TO_LONG,
};
#endif

View File

@@ -14,7 +14,7 @@ DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STO
#ifdef CONFIG_CGROUP_BPF
#define LOCAL_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_cgroup_storage_map {
struct bpf_map map;
@@ -282,8 +282,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
if (attr->value_size > PAGE_SIZE)
return ERR_PTR(-E2BIG);
if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
/* reserved bits should not be used */
if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
!bpf_map_flags_access_ok(attr->map_flags))
return ERR_PTR(-EINVAL);
if (attr->max_entries)

View File

@@ -538,7 +538,7 @@ out:
#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \
BPF_F_RDONLY | BPF_F_WRONLY)
BPF_F_ACCESS_MASK)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
@@ -553,6 +553,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
if (attr->max_entries == 0 ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
attr->map_flags & ~LPM_CREATE_FLAG_MASK ||
!bpf_map_flags_access_ok(attr->map_flags) ||
attr->key_size < LPM_KEY_SIZE_MIN ||
attr->key_size > LPM_KEY_SIZE_MAX ||
attr->value_size < LPM_VAL_SIZE_MIN ||

View File

@@ -11,8 +11,7 @@
#include "percpu_freelist.h"
#define QUEUE_STACK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
struct bpf_queue_stack {
struct bpf_map map;
@@ -52,7 +51,8 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 0 ||
attr->value_size == 0 ||
attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
!bpf_map_flags_access_ok(attr->map_flags))
return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE)

View File

@@ -166,13 +166,25 @@ void bpf_map_area_free(void *area)
kvfree(area);
}
static u32 bpf_map_flags_retain_permanent(u32 flags)
{
/* Some map creation flags are not tied to the map object but
* rather to the map fd instead, so they have no meaning upon
* map object inspection since multiple file descriptors with
* different (access) properties can exist here. Thus, given
* this has zero meaning for the map itself, lets clear these
* from here.
*/
return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
}
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
{
map->map_type = attr->map_type;
map->key_size = attr->key_size;
map->value_size = attr->value_size;
map->max_entries = attr->max_entries;
map->map_flags = attr->map_flags;
map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
map->numa_node = bpf_map_attr_numa_node(attr);
}
@@ -343,6 +355,18 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
return 0;
}
static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
{
fmode_t mode = f.file->f_mode;
/* Our file permissions may have been overridden by global
* map permissions facing syscall side.
*/
if (READ_ONCE(map->frozen))
mode &= ~FMODE_CAN_WRITE;
return mode;
}
#ifdef CONFIG_PROC_FS
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
@@ -364,14 +388,16 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
"memlock:\t%llu\n"
"map_id:\t%u\n",
"map_id:\t%u\n"
"frozen:\t%u\n",
map->map_type,
map->key_size,
map->value_size,
map->max_entries,
map->map_flags,
map->pages * 1ULL << PAGE_SHIFT,
map->id);
map->id,
READ_ONCE(map->frozen));
if (owner_prog_type) {
seq_printf(m, "owner_prog_type:\t%u\n",
@@ -448,10 +474,10 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
const char *end = src + BPF_OBJ_NAME_LEN;
memset(dst, 0, BPF_OBJ_NAME_LEN);
/* Copy all isalnum() and '_' char */
/* Copy all isalnum(), '_' and '.' chars. */
while (src < end && *src) {
if (!isalnum(*src) && *src != '_')
if (!isalnum(*src) &&
*src != '_' && *src != '.')
return -EINVAL;
*dst++ = *src++;
}
@@ -478,9 +504,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
u32 key_size, value_size;
int ret = 0;
key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
if (!key_type || key_size != map->key_size)
return -EINVAL;
/* Some maps allow key to be unspecified. */
if (btf_key_id) {
key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
if (!key_type || key_size != map->key_size)
return -EINVAL;
} else {
key_type = btf_type_by_id(btf, 0);
if (!map->ops->map_check_btf)
return -EINVAL;
}
value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
if (!value_type || value_size != map->value_size)
@@ -489,9 +522,12 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
map->spin_lock_off = btf_find_spin_lock(btf, value_type);
if (map_value_has_spin_lock(map)) {
if (map->map_flags & BPF_F_RDONLY_PROG)
return -EACCES;
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
map->map_type != BPF_MAP_TYPE_SK_STORAGE)
return -ENOTSUPP;
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
map->value_size) {
@@ -545,7 +581,7 @@ static int map_create(union bpf_attr *attr)
if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
if (!attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
@@ -713,8 +749,7 @@ static int map_lookup_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!(f.file->f_mode & FMODE_CAN_READ)) {
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
@@ -843,8 +878,7 @@ static int map_update_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -955,8 +989,7 @@ static int map_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -1007,8 +1040,7 @@ static int map_get_next_key(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!(f.file->f_mode & FMODE_CAN_READ)) {
if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
@@ -1075,8 +1107,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
@@ -1118,6 +1149,36 @@ err_put:
return err;
}
#define BPF_MAP_FREEZE_LAST_FIELD map_fd
static int map_freeze(const union bpf_attr *attr)
{
int err = 0, ufd = attr->map_fd;
struct bpf_map *map;
struct fd f;
if (CHECK_ATTR(BPF_MAP_FREEZE))
return -EINVAL;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (READ_ONCE(map->frozen)) {
err = -EBUSY;
goto err_put;
}
if (!capable(CAP_SYS_ADMIN)) {
err = -EPERM;
goto err_put;
}
WRITE_ONCE(map->frozen, true);
err_put:
fdput(f);
return err;
}
static const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -1557,7 +1618,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
/* eBPF programs must be GPL compatible to use GPL-ed functions */
is_gpl = license_is_gpl_compatible(license);
if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
if (attr->insn_cnt == 0 ||
attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
return -E2BIG;
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
@@ -1728,12 +1790,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
}
raw_tp->btp = btp;
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
BPF_PROG_TYPE_RAW_TRACEPOINT);
prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
if (IS_ERR(prog)) {
err = PTR_ERR(prog);
goto out_free_tp;
}
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
err = -EINVAL;
goto out_put_prog;
}
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
@@ -1827,6 +1893,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
case BPF_FLOW_DISSECTOR:
ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
break;
case BPF_CGROUP_SYSCTL:
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
break;
default:
return -EINVAL;
}
@@ -1905,6 +1974,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
return lirc_prog_detach(attr);
case BPF_FLOW_DISSECTOR:
return skb_flow_dissector_bpf_prog_detach(attr);
case BPF_CGROUP_SYSCTL:
ptype = BPF_PROG_TYPE_CGROUP_SYSCTL;
break;
default:
return -EINVAL;
}
@@ -1938,9 +2010,12 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_CGROUP_UDP6_SENDMSG:
case BPF_CGROUP_SOCK_OPS:
case BPF_CGROUP_DEVICE:
case BPF_CGROUP_SYSCTL:
break;
case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr);
case BPF_FLOW_DISSECTOR:
return skb_flow_dissector_prog_query(attr, uattr);
default:
return -EINVAL;
}
@@ -1948,7 +2023,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
return cgroup_bpf_prog_query(attr, uattr);
}
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -1961,6 +2036,14 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
return -EINVAL;
if ((attr->test.ctx_size_in && !attr->test.ctx_in) ||
(!attr->test.ctx_size_in && attr->test.ctx_in))
return -EINVAL;
if ((attr->test.ctx_size_out && !attr->test.ctx_out) ||
(!attr->test.ctx_size_out && attr->test.ctx_out))
return -EINVAL;
prog = bpf_prog_get(attr->test.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -2071,13 +2154,26 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
}
static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
unsigned long addr)
unsigned long addr, u32 *off,
u32 *type)
{
const struct bpf_map *map;
int i;
for (i = 0; i < prog->aux->used_map_cnt; i++)
if (prog->aux->used_maps[i] == (void *)addr)
return prog->aux->used_maps[i];
for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
map = prog->aux->used_maps[i];
if (map == (void *)addr) {
*type = BPF_PSEUDO_MAP_FD;
return map;
}
if (!map->ops->map_direct_value_meta)
continue;
if (!map->ops->map_direct_value_meta(map, addr, off)) {
*type = BPF_PSEUDO_MAP_VALUE;
return map;
}
}
return NULL;
}
@@ -2085,6 +2181,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
{
const struct bpf_map *map;
struct bpf_insn *insns;
u32 off, type;
u64 imm;
int i;
@@ -2112,11 +2209,11 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
continue;
imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
map = bpf_map_from_imm(prog, imm);
map = bpf_map_from_imm(prog, imm, &off, &type);
if (map) {
insns[i].src_reg = BPF_PSEUDO_MAP_FD;
insns[i].src_reg = type;
insns[i].imm = map->id;
insns[i + 1].imm = 0;
insns[i + 1].imm = off;
continue;
}
}
@@ -2706,6 +2803,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_GET_NEXT_KEY:
err = map_get_next_key(&attr);
break;
case BPF_MAP_FREEZE:
err = map_freeze(&attr);
break;
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr, uattr);
break;

File diff suppressed because it is too large Load Diff