Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add Maglev hashing scheduler to IPVS, from Inju Song. 2) Lots of new TC subsystem tests from Roman Mashak. 3) Add TCP zero copy receive and fix delayed acks and autotuning with SO_RCVLOWAT, from Eric Dumazet. 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard Brouer. 5) Add ttl inherit support to vxlan, from Hangbin Liu. 6) Properly separate ipv6 routes into their logically independant components. fib6_info for the routing table, and fib6_nh for sets of nexthops, which thus can be shared. From David Ahern. 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP messages from XDP programs. From Nikita V. Shirokov. 8) Lots of long overdue cleanups to the r8169 driver, from Heiner Kallweit. 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau. 10) Add traffic condition monitoring to iwlwifi, from Luca Coelho. 11) Plumb extack down into fib_rules, from Roopa Prabhu. 12) Add Flower classifier offload support to igb, from Vinicius Costa Gomes. 13) Add UDP GSO support, from Willem de Bruijn. 14) Add documentation for eBPF helpers, from Quentin Monnet. 15) Add TLS tx offload to mlx5, from Ilya Lesokhin. 16) Allow applications to be given the number of bytes available to read on a socket via a control message returned from recvmsg(), from Soheil Hassas Yeganeh. 17) Add x86_32 eBPF JIT compiler, from Wang YanQing. 18) Add AF_XDP sockets, with zerocopy support infrastructure as well. From Björn Töpel. 19) Remove indirect load support from all of the BPF JITs and handle these operations in the verifier by translating them into native BPF instead. From Daniel Borkmann. 20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha. 21) Allow XDP programs to do lookups in the main kernel routing tables for forwarding. From David Ahern. 22) Allow drivers to store hardware state into an ELF section of kernel dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy. 23) Various RACK and loss detection improvements in TCP, from Yuchung Cheng. 24) Add TCP SACK compression, from Eric Dumazet. 25) Add User Mode Helper support and basic bpfilter infrastructure, from Alexei Starovoitov. 26) Support ports and protocol values in RTM_GETROUTE, from Roopa Prabhu. 27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard Brouer. 28) Add lots of forwarding selftests, from Petr Machata. 29) Add generic network device failover driver, from Sridhar Samudrala. * ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits) strparser: Add __strp_unpause and use it in ktls. rxrpc: Fix terminal retransmission connection ID to include the channel net: hns3: Optimize PF CMDQ interrupt switching process net: hns3: Fix for VF mailbox receiving unknown message net: hns3: Fix for VF mailbox cannot receiving PF response bnx2x: use the right constant Revert "net: sched: cls: Fix offloading when ingress dev is vxlan" net: dsa: b53: Fix for brcm tag issue in Cygnus SoC enic: fix UDP rss bits netdev-FAQ: clarify DaveM's position for stable backports rtnetlink: validate attributes in do_setlink() mlxsw: Add extack messages for port_{un, }split failures netdevsim: Add extack error message for devlink reload devlink: Add extack to reload and port_{un, }split operations net: metrics: add proper netlink validation ipmr: fix error path when ipmr_new_table fails ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds net: hns3: remove unused hclgevf_cfg_func_mta_filter netfilter: provide udp*_lib_lookup for nf_tproxy qed*: Utilize FW 8.37.2.0 ...
This commit is contained in:
@@ -4,9 +4,13 @@ obj-y := core.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf.o
|
||||
ifeq ($(CONFIG_NET),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
|
||||
ifeq ($(CONFIG_XDP_SOCKETS),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_SYSCALL) += offload.o
|
||||
ifeq ($(CONFIG_STREAM_PARSER),y)
|
||||
ifeq ($(CONFIG_INET),y)
|
||||
|
@@ -11,11 +11,13 @@
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <uapi/linux/btf.h>
|
||||
|
||||
#include "map_in_map.h"
|
||||
|
||||
@@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map)
|
||||
bpf_map_area_free(array);
|
||||
}
|
||||
|
||||
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
|
||||
struct seq_file *m)
|
||||
{
|
||||
void *value;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
value = array_map_lookup_elem(map, key);
|
||||
if (!value) {
|
||||
rcu_read_unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
seq_printf(m, "%u: ", *(u32 *)key);
|
||||
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
|
||||
u32 btf_key_id, u32 btf_value_id)
|
||||
{
|
||||
const struct btf_type *key_type, *value_type;
|
||||
u32 key_size, value_size;
|
||||
u32 int_data;
|
||||
|
||||
key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
|
||||
if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
|
||||
return -EINVAL;
|
||||
|
||||
int_data = *(u32 *)(key_type + 1);
|
||||
/* bpf array can only take a u32 key. This check makes
|
||||
* sure that the btf matches the attr used during map_create.
|
||||
*/
|
||||
if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
|
||||
BTF_INT_OFFSET(int_data))
|
||||
return -EINVAL;
|
||||
|
||||
value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
|
||||
if (!value_type || value_size > map->value_size)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct bpf_map_ops array_map_ops = {
|
||||
.map_alloc_check = array_map_alloc_check,
|
||||
.map_alloc = array_map_alloc,
|
||||
@@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = {
|
||||
.map_update_elem = array_map_update_elem,
|
||||
.map_delete_elem = array_map_delete_elem,
|
||||
.map_gen_lookup = array_map_gen_lookup,
|
||||
.map_seq_show_elem = array_map_seq_show_elem,
|
||||
.map_check_btf = array_map_check_btf,
|
||||
};
|
||||
|
||||
const struct bpf_map_ops percpu_array_map_ops = {
|
||||
|
2348
kernel/bpf/btf.c
Normal file
2348
kernel/bpf/btf.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -500,6 +500,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
|
||||
* @sk: sock struct that will use sockaddr
|
||||
* @uaddr: sockaddr struct provided by user
|
||||
* @type: The type of program to be exectuted
|
||||
* @t_ctx: Pointer to attach type specific context
|
||||
*
|
||||
* socket is expected to be of type INET or INET6.
|
||||
*
|
||||
@@ -508,12 +509,15 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
|
||||
*/
|
||||
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
|
||||
struct sockaddr *uaddr,
|
||||
enum bpf_attach_type type)
|
||||
enum bpf_attach_type type,
|
||||
void *t_ctx)
|
||||
{
|
||||
struct bpf_sock_addr_kern ctx = {
|
||||
.sk = sk,
|
||||
.uaddr = uaddr,
|
||||
.t_ctx = t_ctx,
|
||||
};
|
||||
struct sockaddr_storage unspec;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
@@ -523,6 +527,11 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
|
||||
if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
|
||||
return 0;
|
||||
|
||||
if (!ctx.uaddr) {
|
||||
memset(&unspec, 0, sizeof(unspec));
|
||||
ctx.uaddr = (struct sockaddr *)&unspec;
|
||||
}
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
|
||||
|
||||
|
@@ -31,6 +31,7 @@
|
||||
#include <linux/rbtree_latch.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
@@ -683,23 +684,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
|
||||
*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
|
||||
break;
|
||||
|
||||
case BPF_LD | BPF_ABS | BPF_W:
|
||||
case BPF_LD | BPF_ABS | BPF_H:
|
||||
case BPF_LD | BPF_ABS | BPF_B:
|
||||
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
|
||||
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
|
||||
*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
|
||||
break;
|
||||
|
||||
case BPF_LD | BPF_IND | BPF_W:
|
||||
case BPF_LD | BPF_IND | BPF_H:
|
||||
case BPF_LD | BPF_IND | BPF_B:
|
||||
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
|
||||
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
|
||||
*to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
|
||||
*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
|
||||
break;
|
||||
|
||||
case BPF_LD | BPF_IMM | BPF_DW:
|
||||
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
|
||||
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
|
||||
@@ -940,14 +924,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
|
||||
INSN_3(LDX, MEM, W), \
|
||||
INSN_3(LDX, MEM, DW), \
|
||||
/* Immediate based. */ \
|
||||
INSN_3(LD, IMM, DW), \
|
||||
/* Misc (old cBPF carry-over). */ \
|
||||
INSN_3(LD, ABS, B), \
|
||||
INSN_3(LD, ABS, H), \
|
||||
INSN_3(LD, ABS, W), \
|
||||
INSN_3(LD, IND, B), \
|
||||
INSN_3(LD, IND, H), \
|
||||
INSN_3(LD, IND, W)
|
||||
INSN_3(LD, IMM, DW)
|
||||
|
||||
bool bpf_opcode_in_insntable(u8 code)
|
||||
{
|
||||
@@ -957,6 +934,13 @@ bool bpf_opcode_in_insntable(u8 code)
|
||||
[0 ... 255] = false,
|
||||
/* Now overwrite non-defaults ... */
|
||||
BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
|
||||
/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
|
||||
[BPF_LD | BPF_ABS | BPF_B] = true,
|
||||
[BPF_LD | BPF_ABS | BPF_H] = true,
|
||||
[BPF_LD | BPF_ABS | BPF_W] = true,
|
||||
[BPF_LD | BPF_IND | BPF_B] = true,
|
||||
[BPF_LD | BPF_IND | BPF_H] = true,
|
||||
[BPF_LD | BPF_IND | BPF_W] = true,
|
||||
};
|
||||
#undef BPF_INSN_3_TBL
|
||||
#undef BPF_INSN_2_TBL
|
||||
@@ -987,8 +971,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
|
||||
#undef BPF_INSN_3_LBL
|
||||
#undef BPF_INSN_2_LBL
|
||||
u32 tail_call_cnt = 0;
|
||||
void *ptr;
|
||||
int off;
|
||||
|
||||
#define CONT ({ insn++; goto select_insn; })
|
||||
#define CONT_JMP ({ insn++; goto select_insn; })
|
||||
@@ -1315,67 +1297,6 @@ out:
|
||||
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
|
||||
(DST + insn->off));
|
||||
CONT;
|
||||
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
|
||||
off = IMM;
|
||||
load_word:
|
||||
/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
|
||||
* appearing in the programs where ctx == skb
|
||||
* (see may_access_skb() in the verifier). All programs
|
||||
* keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
|
||||
* bpf_convert_filter() saves it in BPF_R6, internal BPF
|
||||
* verifier will check that BPF_R6 == ctx.
|
||||
*
|
||||
* BPF_ABS and BPF_IND are wrappers of function calls,
|
||||
* so they scratch BPF_R1-BPF_R5 registers, preserve
|
||||
* BPF_R6-BPF_R9, and store return value into BPF_R0.
|
||||
*
|
||||
* Implicit input:
|
||||
* ctx == skb == BPF_R6 == CTX
|
||||
*
|
||||
* Explicit input:
|
||||
* SRC == any register
|
||||
* IMM == 32-bit immediate
|
||||
*
|
||||
* Output:
|
||||
* BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
|
||||
*/
|
||||
|
||||
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
|
||||
if (likely(ptr != NULL)) {
|
||||
BPF_R0 = get_unaligned_be32(ptr);
|
||||
CONT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
|
||||
off = IMM;
|
||||
load_half:
|
||||
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
|
||||
if (likely(ptr != NULL)) {
|
||||
BPF_R0 = get_unaligned_be16(ptr);
|
||||
CONT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
|
||||
off = IMM;
|
||||
load_byte:
|
||||
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
|
||||
if (likely(ptr != NULL)) {
|
||||
BPF_R0 = *(u8 *)ptr;
|
||||
CONT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
|
||||
off = IMM + SRC;
|
||||
goto load_word;
|
||||
LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
|
||||
off = IMM + SRC;
|
||||
goto load_half;
|
||||
LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
|
||||
off = IMM + SRC;
|
||||
goto load_byte;
|
||||
|
||||
default_label:
|
||||
/* If we ever reach this, we have a bug somewhere. Die hard here
|
||||
@@ -1695,6 +1616,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
|
||||
int new_prog_cnt, carry_prog_cnt = 0;
|
||||
struct bpf_prog **existing_prog;
|
||||
struct bpf_prog_array *array;
|
||||
bool found_exclude = false;
|
||||
int new_prog_idx = 0;
|
||||
|
||||
/* Figure out how many existing progs we need to carry over to
|
||||
@@ -1703,14 +1625,20 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
|
||||
if (old_array) {
|
||||
existing_prog = old_array->progs;
|
||||
for (; *existing_prog; existing_prog++) {
|
||||
if (*existing_prog != exclude_prog &&
|
||||
*existing_prog != &dummy_bpf_prog.prog)
|
||||
if (*existing_prog == exclude_prog) {
|
||||
found_exclude = true;
|
||||
continue;
|
||||
}
|
||||
if (*existing_prog != &dummy_bpf_prog.prog)
|
||||
carry_prog_cnt++;
|
||||
if (*existing_prog == include_prog)
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
if (exclude_prog && !found_exclude)
|
||||
return -ENOENT;
|
||||
|
||||
/* How many progs (not NULL) will be in the new array? */
|
||||
new_prog_cnt = carry_prog_cnt;
|
||||
if (include_prog)
|
||||
@@ -1772,6 +1700,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
|
||||
aux = container_of(work, struct bpf_prog_aux, work);
|
||||
if (bpf_prog_is_dev_bound(aux))
|
||||
bpf_prog_offload_destroy(aux->prog);
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if (aux->prog->has_callchain_buf)
|
||||
put_callchain_buffers();
|
||||
#endif
|
||||
for (i = 0; i < aux->func_cnt; i++)
|
||||
bpf_jit_free(aux->func[i]);
|
||||
if (aux->func_cnt) {
|
||||
@@ -1832,6 +1764,8 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_comm_proto __weak;
|
||||
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
|
||||
const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
|
||||
const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
|
||||
|
||||
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
|
||||
{
|
||||
@@ -1844,6 +1778,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_event_output);
|
||||
|
||||
/* Always built-in helper functions. */
|
||||
const struct bpf_func_proto bpf_tail_call_proto = {
|
||||
@@ -1890,9 +1825,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
|
||||
#include <linux/bpf_trace.h>
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
|
||||
|
||||
/* These are only used within the BPF_SYSCALL code */
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
|
||||
#endif
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/ptr_ring.h>
|
||||
#include <net/xdp.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/workqueue.h>
|
||||
@@ -137,27 +138,6 @@ free_cmap:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void __cpu_map_queue_destructor(void *ptr)
|
||||
{
|
||||
/* The tear-down procedure should have made sure that queue is
|
||||
* empty. See __cpu_map_entry_replace() and work-queue
|
||||
* invoked cpu_map_kthread_stop(). Catch any broken behaviour
|
||||
* gracefully and warn once.
|
||||
*/
|
||||
if (WARN_ON_ONCE(ptr))
|
||||
page_frag_free(ptr);
|
||||
}
|
||||
|
||||
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcpu->refcnt)) {
|
||||
/* The queue should be empty at this point */
|
||||
ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
|
||||
kfree(rcpu->queue);
|
||||
kfree(rcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
{
|
||||
atomic_inc(&rcpu->refcnt);
|
||||
@@ -179,45 +159,8 @@ static void cpu_map_kthread_stop(struct work_struct *work)
|
||||
kthread_stop(rcpu->kthread);
|
||||
}
|
||||
|
||||
/* For now, xdp_pkt is a cpumap internal data structure, with info
|
||||
* carried between enqueue to dequeue. It is mapped into the top
|
||||
* headroom of the packet, to avoid allocating separate mem.
|
||||
*/
|
||||
struct xdp_pkt {
|
||||
void *data;
|
||||
u16 len;
|
||||
u16 headroom;
|
||||
u16 metasize;
|
||||
struct net_device *dev_rx;
|
||||
};
|
||||
|
||||
/* Convert xdp_buff to xdp_pkt */
|
||||
static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
|
||||
{
|
||||
struct xdp_pkt *xdp_pkt;
|
||||
int metasize;
|
||||
int headroom;
|
||||
|
||||
/* Assure headroom is available for storing info */
|
||||
headroom = xdp->data - xdp->data_hard_start;
|
||||
metasize = xdp->data - xdp->data_meta;
|
||||
metasize = metasize > 0 ? metasize : 0;
|
||||
if (unlikely((headroom - metasize) < sizeof(*xdp_pkt)))
|
||||
return NULL;
|
||||
|
||||
/* Store info in top of packet */
|
||||
xdp_pkt = xdp->data_hard_start;
|
||||
|
||||
xdp_pkt->data = xdp->data;
|
||||
xdp_pkt->len = xdp->data_end - xdp->data;
|
||||
xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
|
||||
xdp_pkt->metasize = metasize;
|
||||
|
||||
return xdp_pkt;
|
||||
}
|
||||
|
||||
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
struct xdp_pkt *xdp_pkt)
|
||||
struct xdp_frame *xdpf)
|
||||
{
|
||||
unsigned int frame_size;
|
||||
void *pkt_data_start;
|
||||
@@ -232,7 +175,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
* would be preferred to set frame_size to 2048 or 4096
|
||||
* depending on the driver.
|
||||
* frame_size = 2048;
|
||||
* frame_len = frame_size - sizeof(*xdp_pkt);
|
||||
* frame_len = frame_size - sizeof(*xdp_frame);
|
||||
*
|
||||
* Instead, with info avail, skb_shared_info in placed after
|
||||
* packet len. This, unfortunately fakes the truesize.
|
||||
@@ -240,21 +183,21 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
* is not at a fixed memory location, with mixed length
|
||||
* packets, which is bad for cache-line hotness.
|
||||
*/
|
||||
frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
|
||||
frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom +
|
||||
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
|
||||
|
||||
pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
|
||||
pkt_data_start = xdpf->data - xdpf->headroom;
|
||||
skb = build_skb(pkt_data_start, frame_size);
|
||||
if (!skb)
|
||||
return NULL;
|
||||
|
||||
skb_reserve(skb, xdp_pkt->headroom);
|
||||
__skb_put(skb, xdp_pkt->len);
|
||||
if (xdp_pkt->metasize)
|
||||
skb_metadata_set(skb, xdp_pkt->metasize);
|
||||
skb_reserve(skb, xdpf->headroom);
|
||||
__skb_put(skb, xdpf->len);
|
||||
if (xdpf->metasize)
|
||||
skb_metadata_set(skb, xdpf->metasize);
|
||||
|
||||
/* Essential SKB info: protocol and skb->dev */
|
||||
skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
|
||||
skb->protocol = eth_type_trans(skb, xdpf->dev_rx);
|
||||
|
||||
/* Optional SKB info, currently missing:
|
||||
* - HW checksum info (skb->ip_summed)
|
||||
@@ -265,6 +208,31 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
|
||||
return skb;
|
||||
}
|
||||
|
||||
static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
|
||||
{
|
||||
/* The tear-down procedure should have made sure that queue is
|
||||
* empty. See __cpu_map_entry_replace() and work-queue
|
||||
* invoked cpu_map_kthread_stop(). Catch any broken behaviour
|
||||
* gracefully and warn once.
|
||||
*/
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
while ((xdpf = ptr_ring_consume(ring)))
|
||||
if (WARN_ON_ONCE(xdpf))
|
||||
xdp_return_frame(xdpf);
|
||||
}
|
||||
|
||||
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
|
||||
{
|
||||
if (atomic_dec_and_test(&rcpu->refcnt)) {
|
||||
/* The queue should be empty at this point */
|
||||
__cpu_map_ring_cleanup(rcpu->queue);
|
||||
ptr_ring_cleanup(rcpu->queue, NULL);
|
||||
kfree(rcpu->queue);
|
||||
kfree(rcpu);
|
||||
}
|
||||
}
|
||||
|
||||
static int cpu_map_kthread_run(void *data)
|
||||
{
|
||||
struct bpf_cpu_map_entry *rcpu = data;
|
||||
@@ -278,7 +246,7 @@ static int cpu_map_kthread_run(void *data)
|
||||
*/
|
||||
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
|
||||
unsigned int processed = 0, drops = 0, sched = 0;
|
||||
struct xdp_pkt *xdp_pkt;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
/* Release CPU reschedule checks */
|
||||
if (__ptr_ring_empty(rcpu->queue)) {
|
||||
@@ -301,13 +269,13 @@ static int cpu_map_kthread_run(void *data)
|
||||
* kthread CPU pinned. Lockless access to ptr_ring
|
||||
* consume side valid as no-resize allowed of queue.
|
||||
*/
|
||||
while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
|
||||
while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
|
||||
struct sk_buff *skb;
|
||||
int ret;
|
||||
|
||||
skb = cpu_map_build_skb(rcpu, xdp_pkt);
|
||||
skb = cpu_map_build_skb(rcpu, xdpf);
|
||||
if (!skb) {
|
||||
page_frag_free(xdp_pkt);
|
||||
xdp_return_frame(xdpf);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -604,13 +572,13 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
|
||||
spin_lock(&q->producer_lock);
|
||||
|
||||
for (i = 0; i < bq->count; i++) {
|
||||
void *xdp_pkt = bq->q[i];
|
||||
struct xdp_frame *xdpf = bq->q[i];
|
||||
int err;
|
||||
|
||||
err = __ptr_ring_produce(q, xdp_pkt);
|
||||
err = __ptr_ring_produce(q, xdpf);
|
||||
if (err) {
|
||||
drops++;
|
||||
page_frag_free(xdp_pkt); /* Free xdp_pkt */
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
}
|
||||
processed++;
|
||||
}
|
||||
@@ -625,7 +593,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
|
||||
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
||||
* Thus, safe percpu variable access.
|
||||
*/
|
||||
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
|
||||
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
|
||||
{
|
||||
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
|
||||
|
||||
@@ -636,28 +604,28 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
|
||||
* driver to code invoking us to finished, due to driver
|
||||
* (e.g. ixgbe) recycle tricks based on page-refcnt.
|
||||
*
|
||||
* Thus, incoming xdp_pkt is always queued here (else we race
|
||||
* Thus, incoming xdp_frame is always queued here (else we race
|
||||
* with another CPU on page-refcnt and remaining driver code).
|
||||
* Queue time is very short, as driver will invoke flush
|
||||
* operation, when completing napi->poll call.
|
||||
*/
|
||||
bq->q[bq->count++] = xdp_pkt;
|
||||
bq->q[bq->count++] = xdpf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
struct xdp_pkt *xdp_pkt;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
xdp_pkt = convert_to_xdp_pkt(xdp);
|
||||
if (unlikely(!xdp_pkt))
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* Info needed when constructing SKB on remote CPU */
|
||||
xdp_pkt->dev_rx = dev_rx;
|
||||
xdpf->dev_rx = dev_rx;
|
||||
|
||||
bq_enqueue(rcpu, xdp_pkt);
|
||||
bq_enqueue(rcpu, xdpf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -48,15 +48,25 @@
|
||||
* calls will fail at this point.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <net/xdp.h>
|
||||
#include <linux/filter.h>
|
||||
#include <trace/events/xdp.h>
|
||||
|
||||
#define DEV_CREATE_FLAG_MASK \
|
||||
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
||||
|
||||
#define DEV_MAP_BULK_SIZE 16
|
||||
struct xdp_bulk_queue {
|
||||
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
|
||||
struct net_device *dev_rx;
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
struct bpf_dtab_netdev {
|
||||
struct net_device *dev;
|
||||
struct net_device *dev; /* must be first member, due to tracepoint */
|
||||
struct bpf_dtab *dtab;
|
||||
unsigned int bit;
|
||||
struct xdp_bulk_queue __percpu *bulkq;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
|
||||
__set_bit(bit, bitmap);
|
||||
}
|
||||
|
||||
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
|
||||
struct xdp_bulk_queue *bq, u32 flags)
|
||||
{
|
||||
struct net_device *dev = obj->dev;
|
||||
int sent = 0, drops = 0, err = 0;
|
||||
int i;
|
||||
|
||||
if (unlikely(!bq->count))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < bq->count; i++) {
|
||||
struct xdp_frame *xdpf = bq->q[i];
|
||||
|
||||
prefetch(xdpf);
|
||||
}
|
||||
|
||||
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
|
||||
if (sent < 0) {
|
||||
err = sent;
|
||||
sent = 0;
|
||||
goto error;
|
||||
}
|
||||
drops = bq->count - sent;
|
||||
out:
|
||||
bq->count = 0;
|
||||
|
||||
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
|
||||
sent, drops, bq->dev_rx, dev, err);
|
||||
bq->dev_rx = NULL;
|
||||
return 0;
|
||||
error:
|
||||
/* If ndo_xdp_xmit fails with an errno, no frames have been
|
||||
* xmit'ed and it's our responsibility to them free all.
|
||||
*/
|
||||
for (i = 0; i < bq->count; i++) {
|
||||
struct xdp_frame *xdpf = bq->q[i];
|
||||
|
||||
/* RX path under NAPI protection, can return frames faster */
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
drops++;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
|
||||
* from the driver before returning from its napi->poll() routine. The poll()
|
||||
* routine is called either from busy_poll context or net_rx_action signaled
|
||||
@@ -221,7 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
|
||||
|
||||
for_each_set_bit(bit, bitmap, map->max_entries) {
|
||||
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
|
||||
struct net_device *netdev;
|
||||
struct xdp_bulk_queue *bq;
|
||||
|
||||
/* This is possible if the dev entry is removed by user space
|
||||
* between xdp redirect and flush op.
|
||||
@@ -230,9 +284,9 @@ void __dev_map_flush(struct bpf_map *map)
|
||||
continue;
|
||||
|
||||
__clear_bit(bit, bitmap);
|
||||
netdev = dev->dev;
|
||||
if (likely(netdev->netdev_ops->ndo_xdp_flush))
|
||||
netdev->netdev_ops->ndo_xdp_flush(netdev);
|
||||
|
||||
bq = this_cpu_ptr(dev->bulkq);
|
||||
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,37 +294,79 @@ void __dev_map_flush(struct bpf_map *map)
|
||||
* update happens in parallel here a dev_put wont happen until after reading the
|
||||
* ifindex.
|
||||
*/
|
||||
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct bpf_dtab_netdev *dev;
|
||||
struct bpf_dtab_netdev *obj;
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
dev = READ_ONCE(dtab->netdev_map[key]);
|
||||
return dev ? dev->dev : NULL;
|
||||
obj = READ_ONCE(dtab->netdev_map[key]);
|
||||
return obj;
|
||||
}
|
||||
|
||||
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
||||
* Thus, safe percpu variable access.
|
||||
*/
|
||||
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
|
||||
{
|
||||
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
|
||||
|
||||
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
|
||||
bq_xmit_all(obj, bq, 0);
|
||||
|
||||
/* Ingress dev_rx will be the same for all xdp_frame's in
|
||||
* bulk_queue, because bq stored per-CPU and must be flushed
|
||||
* from net_device drivers NAPI func end.
|
||||
*/
|
||||
if (!bq->dev_rx)
|
||||
bq->dev_rx = dev_rx;
|
||||
|
||||
bq->q[bq->count++] = xdpf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
struct net_device *dev = dst->dev;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
return bq_enqueue(dst, xdpf, dev_rx);
|
||||
}
|
||||
|
||||
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
|
||||
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
|
||||
struct net_device *dev = obj ? obj->dev : NULL;
|
||||
|
||||
return dev ? &dev->ifindex : NULL;
|
||||
}
|
||||
|
||||
static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
|
||||
{
|
||||
if (dev->dev->netdev_ops->ndo_xdp_flush) {
|
||||
struct net_device *fl = dev->dev;
|
||||
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
|
||||
struct xdp_bulk_queue *bq;
|
||||
unsigned long *bitmap;
|
||||
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
|
||||
__clear_bit(dev->bit, bitmap);
|
||||
|
||||
fl->netdev_ops->ndo_xdp_flush(dev->dev);
|
||||
bq = per_cpu_ptr(dev->bulkq, cpu);
|
||||
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -281,6 +377,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
|
||||
|
||||
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
|
||||
dev_map_flush_old(dev);
|
||||
free_percpu(dev->bulkq);
|
||||
dev_put(dev->dev);
|
||||
kfree(dev);
|
||||
}
|
||||
@@ -313,6 +410,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
|
||||
struct bpf_dtab_netdev *dev, *old_dev;
|
||||
u32 i = *(u32 *)key;
|
||||
u32 ifindex = *(u32 *)value;
|
||||
@@ -327,13 +425,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
if (!ifindex) {
|
||||
dev = NULL;
|
||||
} else {
|
||||
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
|
||||
map->numa_node);
|
||||
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
|
||||
sizeof(void *), gfp);
|
||||
if (!dev->bulkq) {
|
||||
kfree(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev->dev = dev_get_by_index(net, ifindex);
|
||||
if (!dev->dev) {
|
||||
free_percpu(dev->bulkq);
|
||||
kfree(dev);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -405,6 +510,9 @@ static struct notifier_block dev_map_notifier = {
|
||||
|
||||
static int __init dev_map_init(void)
|
||||
{
|
||||
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
|
||||
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
|
||||
offsetof(struct _bpf_dtab_netdev, dev));
|
||||
register_netdevice_notifier(&dev_map_notifier);
|
||||
return 0;
|
||||
}
|
||||
|
@@ -503,7 +503,9 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
const int ret = BPF_REG_0;
|
||||
|
||||
*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
|
||||
BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
|
||||
(void *(*)(struct bpf_map *map, void *key))NULL));
|
||||
*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
|
||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
|
||||
offsetof(struct htab_elem, key) +
|
||||
@@ -530,7 +532,9 @@ static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
|
||||
const int ret = BPF_REG_0;
|
||||
const int ref_reg = BPF_REG_1;
|
||||
|
||||
*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
|
||||
BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
|
||||
(void *(*)(struct bpf_map *map, void *key))NULL));
|
||||
*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
|
||||
*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
|
||||
offsetof(struct htab_elem, lru_node) +
|
||||
@@ -1369,7 +1373,9 @@ static u32 htab_of_map_gen_lookup(struct bpf_map *map,
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
const int ret = BPF_REG_0;
|
||||
|
||||
*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
|
||||
BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
|
||||
(void *(*)(struct bpf_map *map, void *key))NULL));
|
||||
*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
|
||||
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
|
||||
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
|
||||
offsetof(struct htab_elem, key) +
|
||||
|
@@ -179,3 +179,18 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
BPF_CALL_0(bpf_get_current_cgroup_id)
|
||||
{
|
||||
struct cgroup *cgrp = task_dfl_cgroup(current);
|
||||
|
||||
return cgrp->kn->id.id;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
|
||||
.func = bpf_get_current_cgroup_id,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
#endif
|
||||
|
@@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct map_iter {
|
||||
void *key;
|
||||
bool done;
|
||||
};
|
||||
|
||||
static struct map_iter *map_iter(struct seq_file *m)
|
||||
{
|
||||
return m->private;
|
||||
}
|
||||
|
||||
static struct bpf_map *seq_file_to_map(struct seq_file *m)
|
||||
{
|
||||
return file_inode(m->file)->i_private;
|
||||
}
|
||||
|
||||
static void map_iter_free(struct map_iter *iter)
|
||||
{
|
||||
if (iter) {
|
||||
kfree(iter->key);
|
||||
kfree(iter);
|
||||
}
|
||||
}
|
||||
|
||||
static struct map_iter *map_iter_alloc(struct bpf_map *map)
|
||||
{
|
||||
struct map_iter *iter;
|
||||
|
||||
iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!iter)
|
||||
goto error;
|
||||
|
||||
iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!iter->key)
|
||||
goto error;
|
||||
|
||||
return iter;
|
||||
|
||||
error:
|
||||
map_iter_free(iter);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct bpf_map *map = seq_file_to_map(m);
|
||||
void *key = map_iter(m)->key;
|
||||
|
||||
if (map_iter(m)->done)
|
||||
return NULL;
|
||||
|
||||
if (unlikely(v == SEQ_START_TOKEN))
|
||||
goto done;
|
||||
|
||||
if (map->ops->map_get_next_key(map, key, key)) {
|
||||
map_iter(m)->done = true;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
done:
|
||||
++(*pos);
|
||||
return key;
|
||||
}
|
||||
|
||||
static void *map_seq_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
if (map_iter(m)->done)
|
||||
return NULL;
|
||||
|
||||
return *pos ? map_iter(m)->key : SEQ_START_TOKEN;
|
||||
}
|
||||
|
||||
static void map_seq_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static int map_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct bpf_map *map = seq_file_to_map(m);
|
||||
void *key = map_iter(m)->key;
|
||||
|
||||
if (unlikely(v == SEQ_START_TOKEN)) {
|
||||
seq_puts(m, "# WARNING!! The output is for debug purpose only\n");
|
||||
seq_puts(m, "# WARNING!! The output format will change\n");
|
||||
} else {
|
||||
map->ops->map_seq_show_elem(map, key, m);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations bpffs_map_seq_ops = {
|
||||
.start = map_seq_start,
|
||||
.next = map_seq_next,
|
||||
.show = map_seq_show,
|
||||
.stop = map_seq_stop,
|
||||
};
|
||||
|
||||
static int bpffs_map_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct bpf_map *map = inode->i_private;
|
||||
struct map_iter *iter;
|
||||
struct seq_file *m;
|
||||
int err;
|
||||
|
||||
iter = map_iter_alloc(map);
|
||||
if (!iter)
|
||||
return -ENOMEM;
|
||||
|
||||
err = seq_open(file, &bpffs_map_seq_ops);
|
||||
if (err) {
|
||||
map_iter_free(iter);
|
||||
return err;
|
||||
}
|
||||
|
||||
m = file->private_data;
|
||||
m->private = iter;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpffs_map_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *m = file->private_data;
|
||||
|
||||
map_iter_free(map_iter(m));
|
||||
|
||||
return seq_release(inode, file);
|
||||
}
|
||||
|
||||
/* bpffs_map_fops should only implement the basic
|
||||
* read operation for a BPF map. The purpose is to
|
||||
* provide a simple user intuitive way to do
|
||||
* "cat bpffs/pathto/a-pinned-map".
|
||||
*
|
||||
* Other operations (e.g. write, lookup...) should be realized by
|
||||
* the userspace tools (e.g. bpftool) through the
|
||||
* BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update
|
||||
* interface.
|
||||
*/
|
||||
static const struct file_operations bpffs_map_fops = {
|
||||
.open = bpffs_map_open,
|
||||
.read = seq_read,
|
||||
.release = bpffs_map_release,
|
||||
};
|
||||
|
||||
static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
|
||||
const struct inode_operations *iops)
|
||||
const struct inode_operations *iops,
|
||||
const struct file_operations *fops)
|
||||
{
|
||||
struct inode *dir = dentry->d_parent->d_inode;
|
||||
struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
|
||||
@@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
|
||||
return PTR_ERR(inode);
|
||||
|
||||
inode->i_op = iops;
|
||||
inode->i_fop = fops;
|
||||
inode->i_private = raw;
|
||||
|
||||
bpf_dentry_finalize(dentry, inode, dir);
|
||||
@@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
|
||||
|
||||
static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
|
||||
{
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL);
|
||||
}
|
||||
|
||||
static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
|
||||
{
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
|
||||
struct bpf_map *map = arg;
|
||||
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
|
||||
map->btf ? &bpffs_map_fops : NULL);
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
@@ -279,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
|
||||
ret = bpf_obj_do_pin(pname, raw, type);
|
||||
if (ret != 0)
|
||||
bpf_any_put(raw, type);
|
||||
if ((trace_bpf_obj_pin_prog_enabled() ||
|
||||
trace_bpf_obj_pin_map_enabled()) && !ret) {
|
||||
if (type == BPF_TYPE_PROG)
|
||||
trace_bpf_obj_pin_prog(raw, ufd, pname);
|
||||
if (type == BPF_TYPE_MAP)
|
||||
trace_bpf_obj_pin_map(raw, ufd, pname);
|
||||
}
|
||||
out:
|
||||
putname(pname);
|
||||
return ret;
|
||||
@@ -352,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
|
||||
else
|
||||
goto out;
|
||||
|
||||
if (ret < 0) {
|
||||
if (ret < 0)
|
||||
bpf_any_put(raw, type);
|
||||
} else if (trace_bpf_obj_get_prog_enabled() ||
|
||||
trace_bpf_obj_get_map_enabled()) {
|
||||
if (type == BPF_TYPE_PROG)
|
||||
trace_bpf_obj_get_prog(raw, ret, pname);
|
||||
if (type == BPF_TYPE_MAP)
|
||||
trace_bpf_obj_get_map(raw, ret, pname);
|
||||
}
|
||||
out:
|
||||
putname(pname);
|
||||
return ret;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Netronome Systems, Inc.
|
||||
* Copyright (C) 2017-2018 Netronome Systems, Inc.
|
||||
*
|
||||
* This software is licensed under the GNU General License Version 2,
|
||||
* June 1991 as shown in the file COPYING in the top-level directory of this
|
||||
@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
|
||||
struct bpf_prog_offload *offload;
|
||||
bool ret;
|
||||
|
||||
if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
|
||||
if (!bpf_prog_is_dev_bound(prog->aux))
|
||||
return false;
|
||||
if (!bpf_map_is_dev_bound(map))
|
||||
return bpf_map_offload_neutral(map);
|
||||
|
||||
down_read(&bpf_devs_lock);
|
||||
offload = prog->aux->offload;
|
||||
|
@@ -48,14 +48,40 @@
|
||||
#define SOCK_CREATE_FLAG_MASK \
|
||||
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
||||
|
||||
struct bpf_stab {
|
||||
struct bpf_map map;
|
||||
struct sock **sock_map;
|
||||
struct bpf_sock_progs {
|
||||
struct bpf_prog *bpf_tx_msg;
|
||||
struct bpf_prog *bpf_parse;
|
||||
struct bpf_prog *bpf_verdict;
|
||||
};
|
||||
|
||||
struct bpf_stab {
|
||||
struct bpf_map map;
|
||||
struct sock **sock_map;
|
||||
struct bpf_sock_progs progs;
|
||||
};
|
||||
|
||||
struct bucket {
|
||||
struct hlist_head head;
|
||||
raw_spinlock_t lock;
|
||||
};
|
||||
|
||||
struct bpf_htab {
|
||||
struct bpf_map map;
|
||||
struct bucket *buckets;
|
||||
atomic_t count;
|
||||
u32 n_buckets;
|
||||
u32 elem_size;
|
||||
struct bpf_sock_progs progs;
|
||||
};
|
||||
|
||||
struct htab_elem {
|
||||
struct rcu_head rcu;
|
||||
struct hlist_node hash_node;
|
||||
u32 hash;
|
||||
struct sock *sk;
|
||||
char key[0];
|
||||
};
|
||||
|
||||
enum smap_psock_state {
|
||||
SMAP_TX_RUNNING,
|
||||
};
|
||||
@@ -63,6 +89,8 @@ enum smap_psock_state {
|
||||
struct smap_psock_map_entry {
|
||||
struct list_head list;
|
||||
struct sock **entry;
|
||||
struct htab_elem *hash_link;
|
||||
struct bpf_htab *htab;
|
||||
};
|
||||
|
||||
struct smap_psock {
|
||||
@@ -191,6 +219,12 @@ out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||
{
|
||||
atomic_dec(&htab->count);
|
||||
kfree_rcu(l, rcu);
|
||||
}
|
||||
|
||||
static void bpf_tcp_close(struct sock *sk, long timeout)
|
||||
{
|
||||
void (*close_fun)(struct sock *sk, long timeout);
|
||||
@@ -227,10 +261,16 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
|
||||
osk = cmpxchg(e->entry, sk, NULL);
|
||||
if (osk == sk) {
|
||||
list_del(&e->list);
|
||||
smap_release_sock(psock, sk);
|
||||
if (e->entry) {
|
||||
osk = cmpxchg(e->entry, sk, NULL);
|
||||
if (osk == sk) {
|
||||
list_del(&e->list);
|
||||
smap_release_sock(psock, sk);
|
||||
}
|
||||
} else {
|
||||
hlist_del_rcu(&e->hash_link->hash_node);
|
||||
smap_release_sock(psock, e->hash_link->sk);
|
||||
free_htab_elem(e->htab, e->hash_link);
|
||||
}
|
||||
}
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
@@ -461,7 +501,7 @@ static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
|
||||
static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
|
||||
{
|
||||
return ((_rc == SK_PASS) ?
|
||||
(md->map ? __SK_REDIRECT : __SK_PASS) :
|
||||
(md->sk_redir ? __SK_REDIRECT : __SK_PASS) :
|
||||
__SK_DROP);
|
||||
}
|
||||
|
||||
@@ -483,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
|
||||
}
|
||||
|
||||
bpf_compute_data_pointers_sg(md);
|
||||
md->sk = sk;
|
||||
rc = (*prog->bpf_func)(md, prog->insnsi);
|
||||
psock->apply_bytes = md->apply_bytes;
|
||||
|
||||
@@ -1092,7 +1133,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
|
||||
* when we orphan the skb so that we don't have the possibility
|
||||
* to reference a stale map.
|
||||
*/
|
||||
TCP_SKB_CB(skb)->bpf.map = NULL;
|
||||
TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
|
||||
skb->sk = psock->sock;
|
||||
bpf_compute_data_pointers(skb);
|
||||
preempt_disable();
|
||||
@@ -1102,7 +1143,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
|
||||
|
||||
/* Moving return codes from UAPI namespace into internal namespace */
|
||||
return rc == SK_PASS ?
|
||||
(TCP_SKB_CB(skb)->bpf.map ? __SK_REDIRECT : __SK_PASS) :
|
||||
(TCP_SKB_CB(skb)->bpf.sk_redir ? __SK_REDIRECT : __SK_PASS) :
|
||||
__SK_DROP;
|
||||
}
|
||||
|
||||
@@ -1372,7 +1413,6 @@ static int smap_init_sock(struct smap_psock *psock,
|
||||
}
|
||||
|
||||
static void smap_init_progs(struct smap_psock *psock,
|
||||
struct bpf_stab *stab,
|
||||
struct bpf_prog *verdict,
|
||||
struct bpf_prog *parse)
|
||||
{
|
||||
@@ -1450,14 +1490,13 @@ static void smap_gc_work(struct work_struct *w)
|
||||
kfree(psock);
|
||||
}
|
||||
|
||||
static struct smap_psock *smap_init_psock(struct sock *sock,
|
||||
struct bpf_stab *stab)
|
||||
static struct smap_psock *smap_init_psock(struct sock *sock, int node)
|
||||
{
|
||||
struct smap_psock *psock;
|
||||
|
||||
psock = kzalloc_node(sizeof(struct smap_psock),
|
||||
GFP_ATOMIC | __GFP_NOWARN,
|
||||
stab->map.numa_node);
|
||||
node);
|
||||
if (!psock)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
@@ -1525,12 +1564,14 @@ free_stab:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void smap_list_remove(struct smap_psock *psock, struct sock **entry)
|
||||
static void smap_list_remove(struct smap_psock *psock,
|
||||
struct sock **entry,
|
||||
struct htab_elem *hash_link)
|
||||
{
|
||||
struct smap_psock_map_entry *e, *tmp;
|
||||
|
||||
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
|
||||
if (e->entry == entry) {
|
||||
if (e->entry == entry || e->hash_link == hash_link) {
|
||||
list_del(&e->list);
|
||||
break;
|
||||
}
|
||||
@@ -1568,7 +1609,7 @@ static void sock_map_free(struct bpf_map *map)
|
||||
* to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, &stab->sock_map[i]);
|
||||
smap_list_remove(psock, &stab->sock_map[i], NULL);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
@@ -1627,7 +1668,7 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
|
||||
|
||||
if (psock->bpf_parse)
|
||||
smap_stop_sock(psock, sock);
|
||||
smap_list_remove(psock, &stab->sock_map[k]);
|
||||
smap_list_remove(psock, &stab->sock_map[k], NULL);
|
||||
smap_release_sock(psock, sock);
|
||||
out:
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
@@ -1662,40 +1703,26 @@ out:
|
||||
* - sock_map must use READ_ONCE and (cmp)xchg operations
|
||||
* - BPF verdict/parse programs must use READ_ONCE and xchg operations
|
||||
*/
|
||||
static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
struct bpf_map *map,
|
||||
void *key, u64 flags)
|
||||
|
||||
static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
||||
struct bpf_sock_progs *progs,
|
||||
struct sock *sock,
|
||||
struct sock **map_link,
|
||||
void *key)
|
||||
{
|
||||
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
||||
struct smap_psock_map_entry *e = NULL;
|
||||
struct bpf_prog *verdict, *parse, *tx_msg;
|
||||
struct sock *osock, *sock;
|
||||
struct smap_psock_map_entry *e = NULL;
|
||||
struct smap_psock *psock;
|
||||
u32 i = *(u32 *)key;
|
||||
bool new = false;
|
||||
int err;
|
||||
|
||||
if (unlikely(flags > BPF_EXIST))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(i >= stab->map.max_entries))
|
||||
return -E2BIG;
|
||||
|
||||
sock = READ_ONCE(stab->sock_map[i]);
|
||||
if (flags == BPF_EXIST && !sock)
|
||||
return -ENOENT;
|
||||
else if (flags == BPF_NOEXIST && sock)
|
||||
return -EEXIST;
|
||||
|
||||
sock = skops->sk;
|
||||
int err = 0;
|
||||
|
||||
/* 1. If sock map has BPF programs those will be inherited by the
|
||||
* sock being added. If the sock is already attached to BPF programs
|
||||
* this results in an error.
|
||||
*/
|
||||
verdict = READ_ONCE(stab->bpf_verdict);
|
||||
parse = READ_ONCE(stab->bpf_parse);
|
||||
tx_msg = READ_ONCE(stab->bpf_tx_msg);
|
||||
verdict = READ_ONCE(progs->bpf_verdict);
|
||||
parse = READ_ONCE(progs->bpf_parse);
|
||||
tx_msg = READ_ONCE(progs->bpf_tx_msg);
|
||||
|
||||
if (parse && verdict) {
|
||||
/* bpf prog refcnt may be zero if a concurrent attach operation
|
||||
@@ -1748,7 +1775,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
goto out_progs;
|
||||
}
|
||||
} else {
|
||||
psock = smap_init_psock(sock, stab);
|
||||
psock = smap_init_psock(sock, map->numa_node);
|
||||
if (IS_ERR(psock)) {
|
||||
err = PTR_ERR(psock);
|
||||
goto out_progs;
|
||||
@@ -1758,12 +1785,13 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
new = true;
|
||||
}
|
||||
|
||||
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!e) {
|
||||
err = -ENOMEM;
|
||||
goto out_progs;
|
||||
if (map_link) {
|
||||
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!e) {
|
||||
err = -ENOMEM;
|
||||
goto out_progs;
|
||||
}
|
||||
}
|
||||
e->entry = &stab->sock_map[i];
|
||||
|
||||
/* 3. At this point we have a reference to a valid psock that is
|
||||
* running. Attach any BPF programs needed.
|
||||
@@ -1780,7 +1808,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
err = smap_init_sock(psock, sock);
|
||||
if (err)
|
||||
goto out_free;
|
||||
smap_init_progs(psock, stab, verdict, parse);
|
||||
smap_init_progs(psock, verdict, parse);
|
||||
smap_start_sock(psock, sock);
|
||||
}
|
||||
|
||||
@@ -1789,19 +1817,12 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
* it with. Because we can only have a single set of programs if
|
||||
* old_sock has a strp we can stop it.
|
||||
*/
|
||||
list_add_tail(&e->list, &psock->maps);
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
|
||||
osock = xchg(&stab->sock_map[i], sock);
|
||||
if (osock) {
|
||||
struct smap_psock *opsock = smap_psock_sk(osock);
|
||||
|
||||
write_lock_bh(&osock->sk_callback_lock);
|
||||
smap_list_remove(opsock, &stab->sock_map[i]);
|
||||
smap_release_sock(opsock, osock);
|
||||
write_unlock_bh(&osock->sk_callback_lock);
|
||||
if (map_link) {
|
||||
e->entry = map_link;
|
||||
list_add_tail(&e->list, &psock->maps);
|
||||
}
|
||||
return 0;
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
return err;
|
||||
out_free:
|
||||
smap_release_sock(psock, sock);
|
||||
out_progs:
|
||||
@@ -1816,23 +1837,73 @@ out_progs:
|
||||
return err;
|
||||
}
|
||||
|
||||
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
|
||||
static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
struct bpf_map *map,
|
||||
void *key, u64 flags)
|
||||
{
|
||||
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
||||
struct bpf_sock_progs *progs = &stab->progs;
|
||||
struct sock *osock, *sock;
|
||||
u32 i = *(u32 *)key;
|
||||
int err;
|
||||
|
||||
if (unlikely(flags > BPF_EXIST))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(i >= stab->map.max_entries))
|
||||
return -E2BIG;
|
||||
|
||||
sock = READ_ONCE(stab->sock_map[i]);
|
||||
if (flags == BPF_EXIST && !sock)
|
||||
return -ENOENT;
|
||||
else if (flags == BPF_NOEXIST && sock)
|
||||
return -EEXIST;
|
||||
|
||||
sock = skops->sk;
|
||||
err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
|
||||
key);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
osock = xchg(&stab->sock_map[i], sock);
|
||||
if (osock) {
|
||||
struct smap_psock *opsock = smap_psock_sk(osock);
|
||||
|
||||
write_lock_bh(&osock->sk_callback_lock);
|
||||
smap_list_remove(opsock, &stab->sock_map[i], NULL);
|
||||
smap_release_sock(opsock, osock);
|
||||
write_unlock_bh(&osock->sk_callback_lock);
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
|
||||
{
|
||||
struct bpf_sock_progs *progs;
|
||||
struct bpf_prog *orig;
|
||||
|
||||
if (unlikely(map->map_type != BPF_MAP_TYPE_SOCKMAP))
|
||||
if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
|
||||
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
||||
|
||||
progs = &stab->progs;
|
||||
} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH) {
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
progs = &htab->progs;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case BPF_SK_MSG_VERDICT:
|
||||
orig = xchg(&stab->bpf_tx_msg, prog);
|
||||
orig = xchg(&progs->bpf_tx_msg, prog);
|
||||
break;
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
orig = xchg(&stab->bpf_parse, prog);
|
||||
orig = xchg(&progs->bpf_parse, prog);
|
||||
break;
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
orig = xchg(&stab->bpf_verdict, prog);
|
||||
orig = xchg(&progs->bpf_verdict, prog);
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
@@ -1880,21 +1951,421 @@ static int sock_map_update_elem(struct bpf_map *map,
|
||||
|
||||
static void sock_map_release(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
||||
struct bpf_sock_progs *progs;
|
||||
struct bpf_prog *orig;
|
||||
|
||||
orig = xchg(&stab->bpf_parse, NULL);
|
||||
if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
|
||||
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
||||
|
||||
progs = &stab->progs;
|
||||
} else {
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
|
||||
progs = &htab->progs;
|
||||
}
|
||||
|
||||
orig = xchg(&progs->bpf_parse, NULL);
|
||||
if (orig)
|
||||
bpf_prog_put(orig);
|
||||
orig = xchg(&stab->bpf_verdict, NULL);
|
||||
orig = xchg(&progs->bpf_verdict, NULL);
|
||||
if (orig)
|
||||
bpf_prog_put(orig);
|
||||
|
||||
orig = xchg(&stab->bpf_tx_msg, NULL);
|
||||
orig = xchg(&progs->bpf_tx_msg, NULL);
|
||||
if (orig)
|
||||
bpf_prog_put(orig);
|
||||
}
|
||||
|
||||
static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_htab *htab;
|
||||
int i, err;
|
||||
u64 cost;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
/* check sanity of attributes */
|
||||
if (attr->max_entries == 0 || attr->value_size != 4 ||
|
||||
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (attr->key_size > MAX_BPF_STACK)
|
||||
/* eBPF programs initialize keys on stack, so they cannot be
|
||||
* larger than max stack size
|
||||
*/
|
||||
return ERR_PTR(-E2BIG);
|
||||
|
||||
err = bpf_tcp_ulp_register();
|
||||
if (err && err != -EEXIST)
|
||||
return ERR_PTR(err);
|
||||
|
||||
htab = kzalloc(sizeof(*htab), GFP_USER);
|
||||
if (!htab)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bpf_map_init_from_attr(&htab->map, attr);
|
||||
|
||||
htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
|
||||
htab->elem_size = sizeof(struct htab_elem) +
|
||||
round_up(htab->map.key_size, 8);
|
||||
err = -EINVAL;
|
||||
if (htab->n_buckets == 0 ||
|
||||
htab->n_buckets > U32_MAX / sizeof(struct bucket))
|
||||
goto free_htab;
|
||||
|
||||
cost = (u64) htab->n_buckets * sizeof(struct bucket) +
|
||||
(u64) htab->elem_size * htab->map.max_entries;
|
||||
|
||||
if (cost >= U32_MAX - PAGE_SIZE)
|
||||
goto free_htab;
|
||||
|
||||
htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
err = bpf_map_precharge_memlock(htab->map.pages);
|
||||
if (err)
|
||||
goto free_htab;
|
||||
|
||||
err = -ENOMEM;
|
||||
htab->buckets = bpf_map_area_alloc(
|
||||
htab->n_buckets * sizeof(struct bucket),
|
||||
htab->map.numa_node);
|
||||
if (!htab->buckets)
|
||||
goto free_htab;
|
||||
|
||||
for (i = 0; i < htab->n_buckets; i++) {
|
||||
INIT_HLIST_HEAD(&htab->buckets[i].head);
|
||||
raw_spin_lock_init(&htab->buckets[i].lock);
|
||||
}
|
||||
|
||||
return &htab->map;
|
||||
free_htab:
|
||||
kfree(htab);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
|
||||
{
|
||||
return &htab->buckets[hash & (htab->n_buckets - 1)];
|
||||
}
|
||||
|
||||
static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
|
||||
{
|
||||
return &__select_bucket(htab, hash)->head;
|
||||
}
|
||||
|
||||
static void sock_hash_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
int i;
|
||||
|
||||
synchronize_rcu();
|
||||
|
||||
/* At this point no update, lookup or delete operations can happen.
|
||||
* However, be aware we can still get a socket state event updates,
|
||||
* and data ready callabacks that reference the psock from sk_user_data
|
||||
* Also psock worker threads are still in-flight. So smap_release_sock
|
||||
* will only free the psock after cancel_sync on the worker threads
|
||||
* and a grace period expire to ensure psock is really safe to remove.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < htab->n_buckets; i++) {
|
||||
struct hlist_head *head = select_bucket(htab, i);
|
||||
struct hlist_node *n;
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_for_each_entry_safe(l, n, head, hash_node) {
|
||||
struct sock *sock = l->sk;
|
||||
struct smap_psock *psock;
|
||||
|
||||
hlist_del_rcu(&l->hash_node);
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
/* This check handles a racing sock event that can get
|
||||
* the sk_callback_lock before this case but after xchg
|
||||
* causing the refcnt to hit zero and sock user data
|
||||
* (psock) to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, NULL, l);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
kfree(l);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
bpf_map_area_free(htab->buckets);
|
||||
kfree(htab);
|
||||
}
|
||||
|
||||
static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
|
||||
void *key, u32 key_size, u32 hash,
|
||||
struct sock *sk,
|
||||
struct htab_elem *old_elem)
|
||||
{
|
||||
struct htab_elem *l_new;
|
||||
|
||||
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
|
||||
if (!old_elem) {
|
||||
atomic_dec(&htab->count);
|
||||
return ERR_PTR(-E2BIG);
|
||||
}
|
||||
}
|
||||
l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
|
||||
htab->map.numa_node);
|
||||
if (!l_new)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
memcpy(l_new->key, key, key_size);
|
||||
l_new->sk = sk;
|
||||
l_new->hash = hash;
|
||||
return l_new;
|
||||
}
|
||||
|
||||
static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
|
||||
u32 hash, void *key, u32 key_size)
|
||||
{
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_for_each_entry_rcu(l, head, hash_node) {
|
||||
if (l->hash == hash && !memcmp(&l->key, key, key_size))
|
||||
return l;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline u32 htab_map_hash(const void *key, u32 key_len)
|
||||
{
|
||||
return jhash(key, key_len, 0);
|
||||
}
|
||||
|
||||
static int sock_hash_get_next_key(struct bpf_map *map,
|
||||
void *key, void *next_key)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
struct htab_elem *l, *next_l;
|
||||
struct hlist_head *h;
|
||||
u32 hash, key_size;
|
||||
int i = 0;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
key_size = map->key_size;
|
||||
if (!key)
|
||||
goto find_first_elem;
|
||||
hash = htab_map_hash(key, key_size);
|
||||
h = select_bucket(htab, hash);
|
||||
|
||||
l = lookup_elem_raw(h, hash, key, key_size);
|
||||
if (!l)
|
||||
goto find_first_elem;
|
||||
next_l = hlist_entry_safe(
|
||||
rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
|
||||
struct htab_elem, hash_node);
|
||||
if (next_l) {
|
||||
memcpy(next_key, next_l->key, key_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* no more elements in this hash list, go to the next bucket */
|
||||
i = hash & (htab->n_buckets - 1);
|
||||
i++;
|
||||
|
||||
find_first_elem:
|
||||
/* iterate over buckets */
|
||||
for (; i < htab->n_buckets; i++) {
|
||||
h = select_bucket(htab, i);
|
||||
|
||||
/* pick first element in the bucket */
|
||||
next_l = hlist_entry_safe(
|
||||
rcu_dereference_raw(hlist_first_rcu(h)),
|
||||
struct htab_elem, hash_node);
|
||||
if (next_l) {
|
||||
/* if it's not empty, just return it */
|
||||
memcpy(next_key, next_l->key, key_size);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* iterated over all buckets and all elements */
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
struct bpf_map *map,
|
||||
void *key, u64 map_flags)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
struct bpf_sock_progs *progs = &htab->progs;
|
||||
struct htab_elem *l_new = NULL, *l_old;
|
||||
struct smap_psock_map_entry *e = NULL;
|
||||
struct hlist_head *head;
|
||||
struct smap_psock *psock;
|
||||
u32 key_size, hash;
|
||||
struct sock *sock;
|
||||
struct bucket *b;
|
||||
int err;
|
||||
|
||||
sock = skops->sk;
|
||||
|
||||
if (sock->sk_type != SOCK_STREAM ||
|
||||
sock->sk_protocol != IPPROTO_TCP)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (unlikely(map_flags > BPF_EXIST))
|
||||
return -EINVAL;
|
||||
|
||||
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
|
||||
if (!e)
|
||||
return -ENOMEM;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
key_size = map->key_size;
|
||||
hash = htab_map_hash(key, key_size);
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
/* bpf_map_update_elem() can be called in_irq() */
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
if (l_old && map_flags == BPF_NOEXIST) {
|
||||
err = -EEXIST;
|
||||
goto bucket_err;
|
||||
}
|
||||
if (!l_old && map_flags == BPF_EXIST) {
|
||||
err = -ENOENT;
|
||||
goto bucket_err;
|
||||
}
|
||||
|
||||
l_new = alloc_sock_hash_elem(htab, key, key_size, hash, sock, l_old);
|
||||
if (IS_ERR(l_new)) {
|
||||
err = PTR_ERR(l_new);
|
||||
goto bucket_err;
|
||||
}
|
||||
|
||||
psock = smap_psock_sk(sock);
|
||||
if (unlikely(!psock)) {
|
||||
err = -EINVAL;
|
||||
goto bucket_err;
|
||||
}
|
||||
|
||||
e->hash_link = l_new;
|
||||
e->htab = container_of(map, struct bpf_htab, map);
|
||||
list_add_tail(&e->list, &psock->maps);
|
||||
|
||||
/* add new element to the head of the list, so that
|
||||
* concurrent search will find it before old elem
|
||||
*/
|
||||
hlist_add_head_rcu(&l_new->hash_node, head);
|
||||
if (l_old) {
|
||||
psock = smap_psock_sk(l_old->sk);
|
||||
|
||||
hlist_del_rcu(&l_old->hash_node);
|
||||
smap_list_remove(psock, NULL, l_old);
|
||||
smap_release_sock(psock, l_old->sk);
|
||||
free_htab_elem(htab, l_old);
|
||||
}
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
return 0;
|
||||
bucket_err:
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
err:
|
||||
kfree(e);
|
||||
psock = smap_psock_sk(sock);
|
||||
if (psock)
|
||||
smap_release_sock(psock, sock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int sock_hash_update_elem(struct bpf_map *map,
|
||||
void *key, void *value, u64 flags)
|
||||
{
|
||||
struct bpf_sock_ops_kern skops;
|
||||
u32 fd = *(u32 *)value;
|
||||
struct socket *socket;
|
||||
int err;
|
||||
|
||||
socket = sockfd_lookup(fd, &err);
|
||||
if (!socket)
|
||||
return err;
|
||||
|
||||
skops.sk = socket->sk;
|
||||
if (!skops.sk) {
|
||||
fput(socket->file);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = sock_hash_ctx_update_elem(&skops, map, key, flags);
|
||||
fput(socket->file);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int sock_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
struct hlist_head *head;
|
||||
struct bucket *b;
|
||||
struct htab_elem *l;
|
||||
u32 hash, key_size;
|
||||
int ret = -ENOENT;
|
||||
|
||||
key_size = map->key_size;
|
||||
hash = htab_map_hash(key, key_size);
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
if (l) {
|
||||
struct sock *sock = l->sk;
|
||||
struct smap_psock *psock;
|
||||
|
||||
hlist_del_rcu(&l->hash_node);
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
/* This check handles a racing sock event that can get the
|
||||
* sk_callback_lock before this case but after xchg happens
|
||||
* causing the refcnt to hit zero and sock user data (psock)
|
||||
* to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, NULL, l);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
free_htab_elem(htab, l);
|
||||
ret = 0;
|
||||
}
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
|
||||
struct hlist_head *head;
|
||||
struct htab_elem *l;
|
||||
u32 key_size, hash;
|
||||
struct bucket *b;
|
||||
struct sock *sk;
|
||||
|
||||
key_size = map->key_size;
|
||||
hash = htab_map_hash(key, key_size);
|
||||
b = __select_bucket(htab, hash);
|
||||
head = &b->head;
|
||||
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
l = lookup_elem_raw(head, hash, key, key_size);
|
||||
sk = l ? l->sk : NULL;
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
return sk;
|
||||
}
|
||||
|
||||
const struct bpf_map_ops sock_map_ops = {
|
||||
.map_alloc = sock_map_alloc,
|
||||
.map_free = sock_map_free,
|
||||
@@ -1905,6 +2376,15 @@ const struct bpf_map_ops sock_map_ops = {
|
||||
.map_release_uref = sock_map_release,
|
||||
};
|
||||
|
||||
const struct bpf_map_ops sock_hash_ops = {
|
||||
.map_alloc = sock_hash_alloc,
|
||||
.map_free = sock_hash_free,
|
||||
.map_lookup_elem = sock_map_lookup,
|
||||
.map_get_next_key = sock_hash_get_next_key,
|
||||
.map_update_elem = sock_hash_update_elem,
|
||||
.map_delete_elem = sock_hash_delete_elem,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
|
||||
struct bpf_map *, map, void *, key, u64, flags)
|
||||
{
|
||||
@@ -1922,3 +2402,21 @@ const struct bpf_func_proto bpf_sock_map_update_proto = {
|
||||
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
|
||||
struct bpf_map *, map, void *, key, u64, flags)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sock_hash_update_proto = {
|
||||
.func = bpf_sock_hash_update,
|
||||
.gpl_only = false,
|
||||
.pkt_access = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_CONST_MAP_PTR,
|
||||
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include "percpu_freelist.h"
|
||||
|
||||
#define STACK_CREATE_FLAG_MASK \
|
||||
@@ -32,6 +33,23 @@ struct bpf_stack_map {
|
||||
struct stack_map_bucket *buckets[];
|
||||
};
|
||||
|
||||
/* irq_work to run up_read() for build_id lookup in nmi context */
|
||||
struct stack_map_irq_work {
|
||||
struct irq_work irq_work;
|
||||
struct rw_semaphore *sem;
|
||||
};
|
||||
|
||||
static void do_up_read(struct irq_work *entry)
|
||||
{
|
||||
struct stack_map_irq_work *work;
|
||||
|
||||
work = container_of(entry, struct stack_map_irq_work, irq_work);
|
||||
up_read(work->sem);
|
||||
work->sem = NULL;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
|
||||
|
||||
static inline bool stack_map_use_build_id(struct bpf_map *map)
|
||||
{
|
||||
return (map->map_flags & BPF_F_STACK_BUILD_ID);
|
||||
@@ -262,27 +280,31 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void stack_map_get_build_id_offset(struct bpf_map *map,
|
||||
struct stack_map_bucket *bucket,
|
||||
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||
u64 *ips, u32 trace_nr, bool user)
|
||||
{
|
||||
int i;
|
||||
struct vm_area_struct *vma;
|
||||
struct bpf_stack_build_id *id_offs;
|
||||
bool irq_work_busy = false;
|
||||
struct stack_map_irq_work *work = NULL;
|
||||
|
||||
bucket->nr = trace_nr;
|
||||
id_offs = (struct bpf_stack_build_id *)bucket->data;
|
||||
if (in_nmi()) {
|
||||
work = this_cpu_ptr(&up_read_work);
|
||||
if (work->irq_work.flags & IRQ_WORK_BUSY)
|
||||
/* cannot queue more up_read, fallback */
|
||||
irq_work_busy = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot do up_read() in nmi context, so build_id lookup is
|
||||
* only supported for non-nmi events. If at some point, it is
|
||||
* possible to run find_vma() without taking the semaphore, we
|
||||
* would like to allow build_id lookup in nmi context.
|
||||
* We cannot do up_read() in nmi context. To do build_id lookup
|
||||
* in nmi context, we need to run up_read() in irq_work. We use
|
||||
* a percpu variable to do the irq_work. If the irq_work is
|
||||
* already used by another lookup, we fall back to report ips.
|
||||
*
|
||||
* Same fallback is used for kernel stack (!user) on a stackmap
|
||||
* with build_id.
|
||||
*/
|
||||
if (!user || !current || !current->mm || in_nmi() ||
|
||||
if (!user || !current || !current->mm || irq_work_busy ||
|
||||
down_read_trylock(¤t->mm->mmap_sem) == 0) {
|
||||
/* cannot access current->mm, fall back to ips */
|
||||
for (i = 0; i < trace_nr; i++) {
|
||||
@@ -304,7 +326,13 @@ static void stack_map_get_build_id_offset(struct bpf_map *map,
|
||||
- vma->vm_start;
|
||||
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
if (!work) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
} else {
|
||||
work->sem = ¤t->mm->mmap_sem;
|
||||
irq_work_queue(&work->irq_work);
|
||||
}
|
||||
}
|
||||
|
||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
@@ -361,8 +389,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
pcpu_freelist_pop(&smap->freelist);
|
||||
if (unlikely(!new_bucket))
|
||||
return -ENOMEM;
|
||||
stack_map_get_build_id_offset(map, new_bucket, ips,
|
||||
trace_nr, user);
|
||||
new_bucket->nr = trace_nr;
|
||||
stack_map_get_build_id_offset(
|
||||
(struct bpf_stack_build_id *)new_bucket->data,
|
||||
ips, trace_nr, user);
|
||||
trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
|
||||
if (hash_matches && bucket->nr == trace_nr &&
|
||||
memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
|
||||
@@ -405,6 +435,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
{
|
||||
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
|
||||
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
|
||||
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
|
||||
bool user = flags & BPF_F_USER_STACK;
|
||||
struct perf_callchain_entry *trace;
|
||||
bool kernel = !user;
|
||||
int err = -EINVAL;
|
||||
u64 *ips;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
|
||||
BPF_F_USER_BUILD_ID)))
|
||||
goto clear;
|
||||
if (kernel && user_build_id)
|
||||
goto clear;
|
||||
|
||||
elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
|
||||
: sizeof(u64);
|
||||
if (unlikely(size % elem_size))
|
||||
goto clear;
|
||||
|
||||
num_elem = size / elem_size;
|
||||
if (sysctl_perf_event_max_stack < num_elem)
|
||||
init_nr = 0;
|
||||
else
|
||||
init_nr = sysctl_perf_event_max_stack - num_elem;
|
||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||
sysctl_perf_event_max_stack, false, false);
|
||||
if (unlikely(!trace))
|
||||
goto err_fault;
|
||||
|
||||
trace_nr = trace->nr - init_nr;
|
||||
if (trace_nr < skip)
|
||||
goto err_fault;
|
||||
|
||||
trace_nr -= skip;
|
||||
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
|
||||
copy_len = trace_nr * elem_size;
|
||||
ips = trace->ip + skip + init_nr;
|
||||
if (user && user_build_id)
|
||||
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
|
||||
else
|
||||
memcpy(buf, ips, copy_len);
|
||||
|
||||
if (size > copy_len)
|
||||
memset(buf + copy_len, 0, size - copy_len);
|
||||
return copy_len;
|
||||
|
||||
err_fault:
|
||||
err = -EFAULT;
|
||||
clear:
|
||||
memset(buf, 0, size);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_stack_proto = {
|
||||
.func = bpf_get_stack,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
/* Called from eBPF program */
|
||||
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
@@ -511,3 +608,16 @@ const struct bpf_map_ops stack_map_ops = {
|
||||
.map_update_elem = stack_map_update_elem,
|
||||
.map_delete_elem = stack_map_delete_elem,
|
||||
};
|
||||
|
||||
static int __init stack_map_init(void)
|
||||
{
|
||||
int cpu;
|
||||
struct stack_map_irq_work *work;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
work = per_cpu_ptr(&up_read_work, cpu);
|
||||
init_irq_work(&work->irq_work, do_up_read);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(stack_map_init);
|
||||
|
@@ -11,13 +11,17 @@
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_trace.h>
|
||||
#include <linux/bpf_lirc.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/license.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/version.h>
|
||||
@@ -26,6 +30,7 @@
|
||||
#include <linux/cred.h>
|
||||
#include <linux/timekeeping.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
|
||||
@@ -63,9 +68,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
|
||||
* copy_from_user() call. However, this is not a concern since this function is
|
||||
* meant to be a future-proofing of bits.
|
||||
*/
|
||||
static int check_uarg_tail_zero(void __user *uaddr,
|
||||
size_t expected_size,
|
||||
size_t actual_size)
|
||||
int bpf_check_uarg_tail_zero(void __user *uaddr,
|
||||
size_t expected_size,
|
||||
size_t actual_size)
|
||||
{
|
||||
unsigned char __user *addr;
|
||||
unsigned char __user *end;
|
||||
@@ -273,6 +278,7 @@ static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
|
||||
if (atomic_dec_and_test(&map->refcnt)) {
|
||||
/* bpf_map_free_id() must be called first */
|
||||
bpf_map_free_id(map, do_idr_lock);
|
||||
btf_put(map->btf);
|
||||
INIT_WORK(&map->work, bpf_map_free_deferred);
|
||||
schedule_work(&map->work);
|
||||
}
|
||||
@@ -282,6 +288,7 @@ void bpf_map_put(struct bpf_map *map)
|
||||
{
|
||||
__bpf_map_put(map, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_map_put);
|
||||
|
||||
void bpf_map_put_with_uref(struct bpf_map *map)
|
||||
{
|
||||
@@ -320,13 +327,15 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||
"value_size:\t%u\n"
|
||||
"max_entries:\t%u\n"
|
||||
"map_flags:\t%#x\n"
|
||||
"memlock:\t%llu\n",
|
||||
"memlock:\t%llu\n"
|
||||
"map_id:\t%u\n",
|
||||
map->map_type,
|
||||
map->key_size,
|
||||
map->value_size,
|
||||
map->max_entries,
|
||||
map->map_flags,
|
||||
map->pages * 1ULL << PAGE_SHIFT);
|
||||
map->pages * 1ULL << PAGE_SHIFT,
|
||||
map->id);
|
||||
|
||||
if (owner_prog_type) {
|
||||
seq_printf(m, "owner_prog_type:\t%u\n",
|
||||
@@ -418,7 +427,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_MAP_CREATE_LAST_FIELD map_ifindex
|
||||
#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
|
||||
/* called via syscall */
|
||||
static int map_create(union bpf_attr *attr)
|
||||
{
|
||||
@@ -452,6 +461,33 @@ static int map_create(union bpf_attr *attr)
|
||||
atomic_set(&map->refcnt, 1);
|
||||
atomic_set(&map->usercnt, 1);
|
||||
|
||||
if (bpf_map_support_seq_show(map) &&
|
||||
(attr->btf_key_type_id || attr->btf_value_type_id)) {
|
||||
struct btf *btf;
|
||||
|
||||
if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
|
||||
err = -EINVAL;
|
||||
goto free_map_nouncharge;
|
||||
}
|
||||
|
||||
btf = btf_get_by_fd(attr->btf_fd);
|
||||
if (IS_ERR(btf)) {
|
||||
err = PTR_ERR(btf);
|
||||
goto free_map_nouncharge;
|
||||
}
|
||||
|
||||
err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
|
||||
attr->btf_value_type_id);
|
||||
if (err) {
|
||||
btf_put(btf);
|
||||
goto free_map_nouncharge;
|
||||
}
|
||||
|
||||
map->btf = btf;
|
||||
map->btf_key_type_id = attr->btf_key_type_id;
|
||||
map->btf_value_type_id = attr->btf_value_type_id;
|
||||
}
|
||||
|
||||
err = security_bpf_map_alloc(map);
|
||||
if (err)
|
||||
goto free_map_nouncharge;
|
||||
@@ -476,7 +512,6 @@ static int map_create(union bpf_attr *attr)
|
||||
return err;
|
||||
}
|
||||
|
||||
trace_bpf_map_create(map, err);
|
||||
return err;
|
||||
|
||||
free_map:
|
||||
@@ -484,6 +519,7 @@ free_map:
|
||||
free_map_sec:
|
||||
security_bpf_map_free(map);
|
||||
free_map_nouncharge:
|
||||
btf_put(map->btf);
|
||||
map->ops->map_free(map);
|
||||
return err;
|
||||
}
|
||||
@@ -516,6 +552,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
|
||||
atomic_inc(&map->usercnt);
|
||||
return map;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_map_inc);
|
||||
|
||||
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
|
||||
{
|
||||
@@ -635,7 +672,6 @@ static int map_lookup_elem(union bpf_attr *attr)
|
||||
if (copy_to_user(uvalue, value, value_size) != 0)
|
||||
goto free_value;
|
||||
|
||||
trace_bpf_map_lookup_elem(map, ufd, key, value);
|
||||
err = 0;
|
||||
|
||||
free_value:
|
||||
@@ -732,8 +768,6 @@ static int map_update_elem(union bpf_attr *attr)
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
preempt_enable();
|
||||
out:
|
||||
if (!err)
|
||||
trace_bpf_map_update_elem(map, ufd, key, value);
|
||||
free_value:
|
||||
kfree(value);
|
||||
free_key:
|
||||
@@ -786,8 +820,6 @@ static int map_delete_elem(union bpf_attr *attr)
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
preempt_enable();
|
||||
out:
|
||||
if (!err)
|
||||
trace_bpf_map_delete_elem(map, ufd, key);
|
||||
kfree(key);
|
||||
err_put:
|
||||
fdput(f);
|
||||
@@ -851,7 +883,6 @@ out:
|
||||
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
|
||||
goto free_next_key;
|
||||
|
||||
trace_bpf_map_next_key(map, ufd, key, next_key);
|
||||
err = 0;
|
||||
|
||||
free_next_key:
|
||||
@@ -1005,7 +1036,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
|
||||
if (atomic_dec_and_test(&prog->aux->refcnt)) {
|
||||
int i;
|
||||
|
||||
trace_bpf_prog_put_rcu(prog);
|
||||
/* bpf_prog_free_id() must be called first */
|
||||
bpf_prog_free_id(prog, do_idr_lock);
|
||||
|
||||
@@ -1042,11 +1072,13 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||
"prog_type:\t%u\n"
|
||||
"prog_jited:\t%u\n"
|
||||
"prog_tag:\t%s\n"
|
||||
"memlock:\t%llu\n",
|
||||
"memlock:\t%llu\n"
|
||||
"prog_id:\t%u\n",
|
||||
prog->type,
|
||||
prog->jited,
|
||||
prog_tag,
|
||||
prog->pages * 1ULL << PAGE_SHIFT);
|
||||
prog->pages * 1ULL << PAGE_SHIFT,
|
||||
prog->aux->id);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1172,11 +1204,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
|
||||
struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
|
||||
bool attach_drv)
|
||||
{
|
||||
struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv);
|
||||
|
||||
if (!IS_ERR(prog))
|
||||
trace_bpf_prog_get_type(prog);
|
||||
return prog;
|
||||
return __bpf_prog_get(ufd, &type, attach_drv);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
|
||||
|
||||
@@ -1226,6 +1254,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
|
||||
case BPF_CGROUP_INET6_BIND:
|
||||
case BPF_CGROUP_INET4_CONNECT:
|
||||
case BPF_CGROUP_INET6_CONNECT:
|
||||
case BPF_CGROUP_UDP4_SENDMSG:
|
||||
case BPF_CGROUP_UDP6_SENDMSG:
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@@ -1351,7 +1381,6 @@ static int bpf_prog_load(union bpf_attr *attr)
|
||||
}
|
||||
|
||||
bpf_prog_kallsyms_add(prog);
|
||||
trace_bpf_prog_load(prog, err);
|
||||
return err;
|
||||
|
||||
free_used_maps:
|
||||
@@ -1543,6 +1572,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
case BPF_CGROUP_INET6_BIND:
|
||||
case BPF_CGROUP_INET4_CONNECT:
|
||||
case BPF_CGROUP_INET6_CONNECT:
|
||||
case BPF_CGROUP_UDP4_SENDMSG:
|
||||
case BPF_CGROUP_UDP6_SENDMSG:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
|
||||
break;
|
||||
case BPF_CGROUP_SOCK_OPS:
|
||||
@@ -1556,6 +1587,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_attach(attr);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1613,6 +1646,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
case BPF_CGROUP_INET6_BIND:
|
||||
case BPF_CGROUP_INET4_CONNECT:
|
||||
case BPF_CGROUP_INET6_CONNECT:
|
||||
case BPF_CGROUP_UDP4_SENDMSG:
|
||||
case BPF_CGROUP_UDP6_SENDMSG:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
|
||||
break;
|
||||
case BPF_CGROUP_SOCK_OPS:
|
||||
@@ -1626,6 +1661,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_detach(attr);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1670,9 +1707,13 @@ static int bpf_prog_query(const union bpf_attr *attr,
|
||||
case BPF_CGROUP_INET6_POST_BIND:
|
||||
case BPF_CGROUP_INET4_CONNECT:
|
||||
case BPF_CGROUP_INET6_CONNECT:
|
||||
case BPF_CGROUP_UDP4_SENDMSG:
|
||||
case BPF_CGROUP_UDP6_SENDMSG:
|
||||
case BPF_CGROUP_SOCK_OPS:
|
||||
case BPF_CGROUP_DEVICE:
|
||||
break;
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_query(attr, uattr);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1879,7 +1920,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
||||
u32 ulen;
|
||||
int err;
|
||||
|
||||
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
if (err)
|
||||
return err;
|
||||
info_len = min_t(u32, sizeof(info), info_len);
|
||||
@@ -1892,6 +1933,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
||||
info.load_time = prog->aux->load_time;
|
||||
info.created_by_uid = from_kuid_munged(current_user_ns(),
|
||||
prog->aux->user->uid);
|
||||
info.gpl_compatible = prog->gpl_compatible;
|
||||
|
||||
memcpy(info.tag, prog->tag, sizeof(prog->tag));
|
||||
memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
|
||||
@@ -1912,6 +1954,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
info.jited_prog_len = 0;
|
||||
info.xlated_prog_len = 0;
|
||||
info.nr_jited_ksyms = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
@@ -1948,18 +1991,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
||||
* for offload.
|
||||
*/
|
||||
ulen = info.jited_prog_len;
|
||||
info.jited_prog_len = prog->jited_len;
|
||||
if (prog->aux->func_cnt) {
|
||||
u32 i;
|
||||
|
||||
info.jited_prog_len = 0;
|
||||
for (i = 0; i < prog->aux->func_cnt; i++)
|
||||
info.jited_prog_len += prog->aux->func[i]->jited_len;
|
||||
} else {
|
||||
info.jited_prog_len = prog->jited_len;
|
||||
}
|
||||
|
||||
if (info.jited_prog_len && ulen) {
|
||||
if (bpf_dump_raw_ok()) {
|
||||
uinsns = u64_to_user_ptr(info.jited_prog_insns);
|
||||
ulen = min_t(u32, info.jited_prog_len, ulen);
|
||||
if (copy_to_user(uinsns, prog->bpf_func, ulen))
|
||||
return -EFAULT;
|
||||
|
||||
/* for multi-function programs, copy the JITed
|
||||
* instructions for all the functions
|
||||
*/
|
||||
if (prog->aux->func_cnt) {
|
||||
u32 len, free, i;
|
||||
u8 *img;
|
||||
|
||||
free = ulen;
|
||||
for (i = 0; i < prog->aux->func_cnt; i++) {
|
||||
len = prog->aux->func[i]->jited_len;
|
||||
len = min_t(u32, len, free);
|
||||
img = (u8 *) prog->aux->func[i]->bpf_func;
|
||||
if (copy_to_user(uinsns, img, len))
|
||||
return -EFAULT;
|
||||
uinsns += len;
|
||||
free -= len;
|
||||
if (!free)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (copy_to_user(uinsns, prog->bpf_func, ulen))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_prog_insns = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ulen = info.nr_jited_ksyms;
|
||||
info.nr_jited_ksyms = prog->aux->func_cnt;
|
||||
if (info.nr_jited_ksyms && ulen) {
|
||||
if (bpf_dump_raw_ok()) {
|
||||
u64 __user *user_ksyms;
|
||||
ulong ksym_addr;
|
||||
u32 i;
|
||||
|
||||
/* copy the address of the kernel symbol
|
||||
* corresponding to each function
|
||||
*/
|
||||
ulen = min_t(u32, info.nr_jited_ksyms, ulen);
|
||||
user_ksyms = u64_to_user_ptr(info.jited_ksyms);
|
||||
for (i = 0; i < ulen; i++) {
|
||||
ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
|
||||
ksym_addr &= PAGE_MASK;
|
||||
if (put_user((u64) ksym_addr, &user_ksyms[i]))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_ksyms = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ulen = info.nr_jited_func_lens;
|
||||
info.nr_jited_func_lens = prog->aux->func_cnt;
|
||||
if (info.nr_jited_func_lens && ulen) {
|
||||
if (bpf_dump_raw_ok()) {
|
||||
u32 __user *user_lens;
|
||||
u32 func_len, i;
|
||||
|
||||
/* copy the JITed image lengths for each function */
|
||||
ulen = min_t(u32, info.nr_jited_func_lens, ulen);
|
||||
user_lens = u64_to_user_ptr(info.jited_func_lens);
|
||||
for (i = 0; i < ulen; i++) {
|
||||
func_len = prog->aux->func[i]->jited_len;
|
||||
if (put_user(func_len, &user_lens[i]))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_func_lens = 0;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (copy_to_user(uinfo, &info, info_len) ||
|
||||
put_user(info_len, &uattr->info.info_len))
|
||||
@@ -1977,7 +2095,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
||||
u32 info_len = attr->info.info_len;
|
||||
int err;
|
||||
|
||||
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
if (err)
|
||||
return err;
|
||||
info_len = min_t(u32, sizeof(info), info_len);
|
||||
@@ -1990,6 +2108,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
||||
info.map_flags = map->map_flags;
|
||||
memcpy(info.name, map->name, sizeof(map->name));
|
||||
|
||||
if (map->btf) {
|
||||
info.btf_id = btf_id(map->btf);
|
||||
info.btf_key_type_id = map->btf_key_type_id;
|
||||
info.btf_value_type_id = map->btf_value_type_id;
|
||||
}
|
||||
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
err = bpf_map_offload_info_fill(&info, map);
|
||||
if (err)
|
||||
@@ -2003,6 +2127,21 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_btf_get_info_by_fd(struct btf *btf,
|
||||
const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
|
||||
u32 info_len = attr->info.info_len;
|
||||
int err;
|
||||
|
||||
err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return btf_get_info_by_fd(btf, attr, uattr);
|
||||
}
|
||||
|
||||
#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
|
||||
|
||||
static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
|
||||
@@ -2025,6 +2164,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
|
||||
else if (f.file->f_op == &bpf_map_fops)
|
||||
err = bpf_map_get_info_by_fd(f.file->private_data, attr,
|
||||
uattr);
|
||||
else if (f.file->f_op == &btf_fops)
|
||||
err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
|
||||
else
|
||||
err = -EINVAL;
|
||||
|
||||
@@ -2032,6 +2173,158 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
|
||||
return err;
|
||||
}
|
||||
|
||||
#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
|
||||
|
||||
static int bpf_btf_load(const union bpf_attr *attr)
|
||||
{
|
||||
if (CHECK_ATTR(BPF_BTF_LOAD))
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
return btf_new_fd(attr);
|
||||
}
|
||||
|
||||
#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
|
||||
|
||||
static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
|
||||
{
|
||||
if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
return btf_get_fd_by_id(attr->btf_id);
|
||||
}
|
||||
|
||||
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr,
|
||||
u32 prog_id, u32 fd_type,
|
||||
const char *buf, u64 probe_offset,
|
||||
u64 probe_addr)
|
||||
{
|
||||
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
|
||||
u32 len = buf ? strlen(buf) : 0, input_len;
|
||||
int err = 0;
|
||||
|
||||
if (put_user(len, &uattr->task_fd_query.buf_len))
|
||||
return -EFAULT;
|
||||
input_len = attr->task_fd_query.buf_len;
|
||||
if (input_len && ubuf) {
|
||||
if (!len) {
|
||||
/* nothing to copy, just make ubuf NULL terminated */
|
||||
char zero = '\0';
|
||||
|
||||
if (put_user(zero, ubuf))
|
||||
return -EFAULT;
|
||||
} else if (input_len >= len + 1) {
|
||||
/* ubuf can hold the string with NULL terminator */
|
||||
if (copy_to_user(ubuf, buf, len + 1))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
/* ubuf cannot hold the string with NULL terminator,
|
||||
* do a partial copy with NULL terminator.
|
||||
*/
|
||||
char zero = '\0';
|
||||
|
||||
err = -ENOSPC;
|
||||
if (copy_to_user(ubuf, buf, input_len - 1))
|
||||
return -EFAULT;
|
||||
if (put_user(zero, ubuf + input_len - 1))
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
|
||||
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
|
||||
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
|
||||
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
|
||||
return -EFAULT;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
|
||||
|
||||
static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
pid_t pid = attr->task_fd_query.pid;
|
||||
u32 fd = attr->task_fd_query.fd;
|
||||
const struct perf_event *event;
|
||||
struct files_struct *files;
|
||||
struct task_struct *task;
|
||||
struct file *file;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (attr->task_fd_query.flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
|
||||
if (!task)
|
||||
return -ENOENT;
|
||||
|
||||
files = get_files_struct(task);
|
||||
put_task_struct(task);
|
||||
if (!files)
|
||||
return -ENOENT;
|
||||
|
||||
err = 0;
|
||||
spin_lock(&files->file_lock);
|
||||
file = fcheck_files(files, fd);
|
||||
if (!file)
|
||||
err = -EBADF;
|
||||
else
|
||||
get_file(file);
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_op == &bpf_raw_tp_fops) {
|
||||
struct bpf_raw_tracepoint *raw_tp = file->private_data;
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
event = perf_get_event(file);
|
||||
if (!IS_ERR(event)) {
|
||||
u64 probe_offset, probe_addr;
|
||||
u32 prog_id, fd_type;
|
||||
const char *buf;
|
||||
|
||||
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
|
||||
&buf, &probe_offset,
|
||||
&probe_addr);
|
||||
if (!err)
|
||||
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
|
||||
fd_type, buf,
|
||||
probe_offset,
|
||||
probe_addr);
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
err = -ENOTSUPP;
|
||||
put_file:
|
||||
fput(file);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||
{
|
||||
union bpf_attr attr = {};
|
||||
@@ -2040,7 +2333,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
|
||||
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
|
||||
if (err)
|
||||
return err;
|
||||
size = min_t(u32, size, sizeof(attr));
|
||||
@@ -2112,6 +2405,15 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
case BPF_RAW_TRACEPOINT_OPEN:
|
||||
err = bpf_raw_tracepoint_open(&attr);
|
||||
break;
|
||||
case BPF_BTF_LOAD:
|
||||
err = bpf_btf_load(&attr);
|
||||
break;
|
||||
case BPF_BTF_GET_FD_BY_ID:
|
||||
err = bpf_btf_get_fd_by_id(&attr);
|
||||
break;
|
||||
case BPF_TASK_FD_QUERY:
|
||||
err = bpf_task_fd_query(&attr, uattr);
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
@@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
|
||||
return TNUM(a.value >> shift, a.mask >> shift);
|
||||
}
|
||||
|
||||
struct tnum tnum_arshift(struct tnum a, u8 min_shift)
|
||||
{
|
||||
/* if a.value is negative, arithmetic shifting by minimum shift
|
||||
* will have larger negative offset compared to more shifting.
|
||||
* If a.value is nonnegative, arithmetic shifting by minimum shift
|
||||
* will have larger positive offset compare to more shifting.
|
||||
*/
|
||||
return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
|
||||
}
|
||||
|
||||
struct tnum tnum_add(struct tnum a, struct tnum b)
|
||||
{
|
||||
u64 sm, sv, sigma, chi, mu;
|
||||
|
@@ -22,6 +22,7 @@
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include "disasm.h"
|
||||
|
||||
@@ -186,6 +187,8 @@ struct bpf_call_arg_meta {
|
||||
bool pkt_access;
|
||||
int regno;
|
||||
int access_size;
|
||||
s64 msize_smax_value;
|
||||
u64 msize_umax_value;
|
||||
};
|
||||
|
||||
static DEFINE_MUTEX(bpf_verifier_lock);
|
||||
@@ -760,18 +763,19 @@ enum reg_arg_type {
|
||||
|
||||
static int cmp_subprogs(const void *a, const void *b)
|
||||
{
|
||||
return *(int *)a - *(int *)b;
|
||||
return ((struct bpf_subprog_info *)a)->start -
|
||||
((struct bpf_subprog_info *)b)->start;
|
||||
}
|
||||
|
||||
static int find_subprog(struct bpf_verifier_env *env, int off)
|
||||
{
|
||||
u32 *p;
|
||||
struct bpf_subprog_info *p;
|
||||
|
||||
p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
|
||||
sizeof(env->subprog_starts[0]), cmp_subprogs);
|
||||
p = bsearch(&off, env->subprog_info, env->subprog_cnt,
|
||||
sizeof(env->subprog_info[0]), cmp_subprogs);
|
||||
if (!p)
|
||||
return -ENOENT;
|
||||
return p - env->subprog_starts;
|
||||
return p - env->subprog_info;
|
||||
|
||||
}
|
||||
|
||||
@@ -791,18 +795,24 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
|
||||
verbose(env, "too many subprograms\n");
|
||||
return -E2BIG;
|
||||
}
|
||||
env->subprog_starts[env->subprog_cnt++] = off;
|
||||
sort(env->subprog_starts, env->subprog_cnt,
|
||||
sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
|
||||
env->subprog_info[env->subprog_cnt++].start = off;
|
||||
sort(env->subprog_info, env->subprog_cnt,
|
||||
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_subprogs(struct bpf_verifier_env *env)
|
||||
{
|
||||
int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
|
||||
struct bpf_subprog_info *subprog = env->subprog_info;
|
||||
struct bpf_insn *insn = env->prog->insnsi;
|
||||
int insn_cnt = env->prog->len;
|
||||
|
||||
/* Add entry function. */
|
||||
ret = add_subprog(env, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* determine subprog starts. The end is one before the next starts */
|
||||
for (i = 0; i < insn_cnt; i++) {
|
||||
if (insn[i].code != (BPF_JMP | BPF_CALL))
|
||||
@@ -822,16 +832,18 @@ static int check_subprogs(struct bpf_verifier_env *env)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Add a fake 'exit' subprog which could simplify subprog iteration
|
||||
* logic. 'subprog_cnt' should not be increased.
|
||||
*/
|
||||
subprog[env->subprog_cnt].start = insn_cnt;
|
||||
|
||||
if (env->log.level > 1)
|
||||
for (i = 0; i < env->subprog_cnt; i++)
|
||||
verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
|
||||
verbose(env, "func#%d @%d\n", i, subprog[i].start);
|
||||
|
||||
/* now check that all jumps are within the same subprog */
|
||||
subprog_start = 0;
|
||||
if (env->subprog_cnt == cur_subprog)
|
||||
subprog_end = insn_cnt;
|
||||
else
|
||||
subprog_end = env->subprog_starts[cur_subprog++];
|
||||
subprog_start = subprog[cur_subprog].start;
|
||||
subprog_end = subprog[cur_subprog + 1].start;
|
||||
for (i = 0; i < insn_cnt; i++) {
|
||||
u8 code = insn[i].code;
|
||||
|
||||
@@ -856,10 +868,9 @@ next:
|
||||
return -EINVAL;
|
||||
}
|
||||
subprog_start = subprog_end;
|
||||
if (env->subprog_cnt == cur_subprog)
|
||||
subprog_end = insn_cnt;
|
||||
else
|
||||
subprog_end = env->subprog_starts[cur_subprog++];
|
||||
cur_subprog++;
|
||||
if (cur_subprog < env->subprog_cnt)
|
||||
subprog_end = subprog[cur_subprog + 1].start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@@ -1298,6 +1309,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
|
||||
switch (env->prog->type) {
|
||||
case BPF_PROG_TYPE_LWT_IN:
|
||||
case BPF_PROG_TYPE_LWT_OUT:
|
||||
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
|
||||
/* dst_input() and dst_output() can't write for now */
|
||||
if (t == BPF_WRITE)
|
||||
return false;
|
||||
@@ -1517,13 +1529,13 @@ static int update_stack_depth(struct bpf_verifier_env *env,
|
||||
const struct bpf_func_state *func,
|
||||
int off)
|
||||
{
|
||||
u16 stack = env->subprog_stack_depth[func->subprogno];
|
||||
u16 stack = env->subprog_info[func->subprogno].stack_depth;
|
||||
|
||||
if (stack >= -off)
|
||||
return 0;
|
||||
|
||||
/* update known max for given subprogram */
|
||||
env->subprog_stack_depth[func->subprogno] = -off;
|
||||
env->subprog_info[func->subprogno].stack_depth = -off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1535,9 +1547,9 @@ static int update_stack_depth(struct bpf_verifier_env *env,
|
||||
*/
|
||||
static int check_max_stack_depth(struct bpf_verifier_env *env)
|
||||
{
|
||||
int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
|
||||
int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
|
||||
struct bpf_subprog_info *subprog = env->subprog_info;
|
||||
struct bpf_insn *insn = env->prog->insnsi;
|
||||
int insn_cnt = env->prog->len;
|
||||
int ret_insn[MAX_CALL_FRAMES];
|
||||
int ret_prog[MAX_CALL_FRAMES];
|
||||
|
||||
@@ -1545,17 +1557,14 @@ process_func:
|
||||
/* round up to 32-bytes, since this is granularity
|
||||
* of interpreter stack size
|
||||
*/
|
||||
depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
|
||||
depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
|
||||
if (depth > MAX_BPF_STACK) {
|
||||
verbose(env, "combined stack size of %d calls is %d. Too large\n",
|
||||
frame + 1, depth);
|
||||
return -EACCES;
|
||||
}
|
||||
continue_func:
|
||||
if (env->subprog_cnt == subprog)
|
||||
subprog_end = insn_cnt;
|
||||
else
|
||||
subprog_end = env->subprog_starts[subprog];
|
||||
subprog_end = subprog[idx + 1].start;
|
||||
for (; i < subprog_end; i++) {
|
||||
if (insn[i].code != (BPF_JMP | BPF_CALL))
|
||||
continue;
|
||||
@@ -1563,17 +1572,16 @@ continue_func:
|
||||
continue;
|
||||
/* remember insn and function to return to */
|
||||
ret_insn[frame] = i + 1;
|
||||
ret_prog[frame] = subprog;
|
||||
ret_prog[frame] = idx;
|
||||
|
||||
/* find the callee */
|
||||
i = i + insn[i].imm + 1;
|
||||
subprog = find_subprog(env, i);
|
||||
if (subprog < 0) {
|
||||
idx = find_subprog(env, i);
|
||||
if (idx < 0) {
|
||||
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
|
||||
i);
|
||||
return -EFAULT;
|
||||
}
|
||||
subprog++;
|
||||
frame++;
|
||||
if (frame >= MAX_CALL_FRAMES) {
|
||||
WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
|
||||
@@ -1586,10 +1594,10 @@ continue_func:
|
||||
*/
|
||||
if (frame == 0)
|
||||
return 0;
|
||||
depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
|
||||
depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
|
||||
frame--;
|
||||
i = ret_insn[frame];
|
||||
subprog = ret_prog[frame];
|
||||
idx = ret_prog[frame];
|
||||
goto continue_func;
|
||||
}
|
||||
|
||||
@@ -1605,8 +1613,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
|
||||
start);
|
||||
return -EFAULT;
|
||||
}
|
||||
subprog++;
|
||||
return env->subprog_stack_depth[subprog];
|
||||
return env->subprog_info[subprog].stack_depth;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1961,7 +1968,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
|
||||
if (arg_type == ARG_PTR_TO_MAP_KEY ||
|
||||
arg_type == ARG_PTR_TO_MAP_VALUE) {
|
||||
expected_type = PTR_TO_STACK;
|
||||
if (!type_is_pkt_pointer(type) &&
|
||||
if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
|
||||
type != expected_type)
|
||||
goto err_type;
|
||||
} else if (arg_type == ARG_CONST_SIZE ||
|
||||
@@ -2013,14 +2020,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
|
||||
verbose(env, "invalid map_ptr to access map->key\n");
|
||||
return -EACCES;
|
||||
}
|
||||
if (type_is_pkt_pointer(type))
|
||||
err = check_packet_access(env, regno, reg->off,
|
||||
meta->map_ptr->key_size,
|
||||
false);
|
||||
else
|
||||
err = check_stack_boundary(env, regno,
|
||||
meta->map_ptr->key_size,
|
||||
false, NULL);
|
||||
err = check_helper_mem_access(env, regno,
|
||||
meta->map_ptr->key_size, false,
|
||||
NULL);
|
||||
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
|
||||
/* bpf_map_xxx(..., map_ptr, ..., value) call:
|
||||
* check [value, value + map->value_size) validity
|
||||
@@ -2030,17 +2032,18 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
|
||||
verbose(env, "invalid map_ptr to access map->value\n");
|
||||
return -EACCES;
|
||||
}
|
||||
if (type_is_pkt_pointer(type))
|
||||
err = check_packet_access(env, regno, reg->off,
|
||||
meta->map_ptr->value_size,
|
||||
false);
|
||||
else
|
||||
err = check_stack_boundary(env, regno,
|
||||
meta->map_ptr->value_size,
|
||||
false, NULL);
|
||||
err = check_helper_mem_access(env, regno,
|
||||
meta->map_ptr->value_size, false,
|
||||
NULL);
|
||||
} else if (arg_type_is_mem_size(arg_type)) {
|
||||
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
|
||||
|
||||
/* remember the mem_size which may be used later
|
||||
* to refine return values.
|
||||
*/
|
||||
meta->msize_smax_value = reg->smax_value;
|
||||
meta->msize_umax_value = reg->umax_value;
|
||||
|
||||
/* The register is SCALAR_VALUE; the access check
|
||||
* happens using its boundaries.
|
||||
*/
|
||||
@@ -2118,8 +2121,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||
if (func_id != BPF_FUNC_redirect_map)
|
||||
goto error;
|
||||
break;
|
||||
/* Restrict bpf side of cpumap, open when use-cases appear */
|
||||
/* Restrict bpf side of cpumap and xskmap, open when use-cases
|
||||
* appear.
|
||||
*/
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
case BPF_MAP_TYPE_XSKMAP:
|
||||
if (func_id != BPF_FUNC_redirect_map)
|
||||
goto error;
|
||||
break;
|
||||
@@ -2135,6 +2141,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||
func_id != BPF_FUNC_msg_redirect_map)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_MAP_TYPE_SOCKHASH:
|
||||
if (func_id != BPF_FUNC_sk_redirect_hash &&
|
||||
func_id != BPF_FUNC_sock_hash_update &&
|
||||
func_id != BPF_FUNC_map_delete_elem &&
|
||||
func_id != BPF_FUNC_msg_redirect_hash)
|
||||
goto error;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -2144,7 +2157,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||
case BPF_FUNC_tail_call:
|
||||
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
|
||||
goto error;
|
||||
if (env->subprog_cnt) {
|
||||
if (env->subprog_cnt > 1) {
|
||||
verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -2166,16 +2179,20 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
|
||||
break;
|
||||
case BPF_FUNC_redirect_map:
|
||||
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
|
||||
map->map_type != BPF_MAP_TYPE_CPUMAP)
|
||||
map->map_type != BPF_MAP_TYPE_CPUMAP &&
|
||||
map->map_type != BPF_MAP_TYPE_XSKMAP)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_FUNC_sk_redirect_map:
|
||||
case BPF_FUNC_msg_redirect_map:
|
||||
case BPF_FUNC_sock_map_update:
|
||||
if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
|
||||
goto error;
|
||||
break;
|
||||
case BPF_FUNC_sock_map_update:
|
||||
if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
|
||||
case BPF_FUNC_sk_redirect_hash:
|
||||
case BPF_FUNC_msg_redirect_hash:
|
||||
case BPF_FUNC_sock_hash_update:
|
||||
if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
|
||||
goto error;
|
||||
break;
|
||||
default:
|
||||
@@ -2316,7 +2333,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
/* remember the callsite, it will be used by bpf_exit */
|
||||
*insn_idx /* callsite */,
|
||||
state->curframe + 1 /* frameno within this callchain */,
|
||||
subprog + 1 /* subprog number within this prog */);
|
||||
subprog /* subprog number within this prog */);
|
||||
|
||||
/* copy r1 - r5 args that callee can access */
|
||||
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
|
||||
@@ -2380,6 +2397,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
|
||||
int func_id,
|
||||
struct bpf_call_arg_meta *meta)
|
||||
{
|
||||
struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
|
||||
|
||||
if (ret_type != RET_INTEGER ||
|
||||
(func_id != BPF_FUNC_get_stack &&
|
||||
func_id != BPF_FUNC_probe_read_str))
|
||||
return;
|
||||
|
||||
ret_reg->smax_value = meta->msize_smax_value;
|
||||
ret_reg->umax_value = meta->msize_umax_value;
|
||||
__reg_deduce_bounds(ret_reg);
|
||||
__reg_bound_offset(ret_reg);
|
||||
}
|
||||
|
||||
static int
|
||||
record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
|
||||
int func_id, int insn_idx)
|
||||
@@ -2387,8 +2421,11 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
|
||||
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
|
||||
|
||||
if (func_id != BPF_FUNC_tail_call &&
|
||||
func_id != BPF_FUNC_map_lookup_elem)
|
||||
func_id != BPF_FUNC_map_lookup_elem &&
|
||||
func_id != BPF_FUNC_map_update_elem &&
|
||||
func_id != BPF_FUNC_map_delete_elem)
|
||||
return 0;
|
||||
|
||||
if (meta->map_ptr == NULL) {
|
||||
verbose(env, "kernel subsystem misconfigured verifier\n");
|
||||
return -EINVAL;
|
||||
@@ -2428,7 +2465,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||
|
||||
/* eBPF programs must be GPL compatible to use GPL-ed functions */
|
||||
if (!env->prog->gpl_compatible && fn->gpl_only) {
|
||||
verbose(env, "cannot call GPL only function from proprietary program\n");
|
||||
verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -2516,10 +2553,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
|
||||
|
||||
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
|
||||
const char *err_str;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
err = get_callchain_buffers(sysctl_perf_event_max_stack);
|
||||
err_str = "cannot get callchain buffer for func %s#%d\n";
|
||||
#else
|
||||
err = -ENOTSUPP;
|
||||
err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
|
||||
#endif
|
||||
if (err) {
|
||||
verbose(env, err_str, func_id_name(func_id), func_id);
|
||||
return err;
|
||||
}
|
||||
|
||||
env->prog->has_callchain_buf = true;
|
||||
}
|
||||
|
||||
if (changes_data)
|
||||
clear_all_pkt_pointers(env);
|
||||
return 0;
|
||||
@@ -2964,10 +3021,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
dst_reg->umin_value <<= umin_val;
|
||||
dst_reg->umax_value <<= umax_val;
|
||||
}
|
||||
if (src_known)
|
||||
dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
|
||||
else
|
||||
dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
|
||||
dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
|
||||
/* We may learn something more from the var_off */
|
||||
__update_reg_bounds(dst_reg);
|
||||
break;
|
||||
@@ -2995,16 +3049,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
*/
|
||||
dst_reg->smin_value = S64_MIN;
|
||||
dst_reg->smax_value = S64_MAX;
|
||||
if (src_known)
|
||||
dst_reg->var_off = tnum_rshift(dst_reg->var_off,
|
||||
umin_val);
|
||||
else
|
||||
dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
|
||||
dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
|
||||
dst_reg->umin_value >>= umax_val;
|
||||
dst_reg->umax_value >>= umin_val;
|
||||
/* We may learn something more from the var_off */
|
||||
__update_reg_bounds(dst_reg);
|
||||
break;
|
||||
case BPF_ARSH:
|
||||
if (umax_val >= insn_bitness) {
|
||||
/* Shifts greater than 31 or 63 are undefined.
|
||||
* This includes shifts by a negative number.
|
||||
*/
|
||||
mark_reg_unknown(env, regs, insn->dst_reg);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Upon reaching here, src_known is true and
|
||||
* umax_val is equal to umin_val.
|
||||
*/
|
||||
dst_reg->smin_value >>= umin_val;
|
||||
dst_reg->smax_value >>= umin_val;
|
||||
dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
|
||||
|
||||
/* blow away the dst_reg umin_value/umax_value and rely on
|
||||
* dst_reg var_off to refine the result.
|
||||
*/
|
||||
dst_reg->umin_value = 0;
|
||||
dst_reg->umax_value = U64_MAX;
|
||||
__update_reg_bounds(dst_reg);
|
||||
break;
|
||||
default:
|
||||
mark_reg_unknown(env, regs, insn->dst_reg);
|
||||
break;
|
||||
@@ -3888,7 +3961,12 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (env->subprog_cnt) {
|
||||
if (!env->ops->gen_ld_abs) {
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (env->subprog_cnt > 1) {
|
||||
/* when program has LD_ABS insn JITs and interpreter assume
|
||||
* that r1 == ctx == skb which is not the case for callees
|
||||
* that can have arbitrary arguments. It's problematic
|
||||
@@ -4919,15 +4997,15 @@ process_bpf_exit:
|
||||
|
||||
verbose(env, "processed %d insns (limit %d), stack depth ",
|
||||
insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
|
||||
for (i = 0; i < env->subprog_cnt + 1; i++) {
|
||||
u32 depth = env->subprog_stack_depth[i];
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
u32 depth = env->subprog_info[i].stack_depth;
|
||||
|
||||
verbose(env, "%d", depth);
|
||||
if (i + 1 < env->subprog_cnt + 1)
|
||||
if (i + 1 < env->subprog_cnt)
|
||||
verbose(env, "+");
|
||||
}
|
||||
verbose(env, "\n");
|
||||
env->prog->aux->stack_depth = env->subprog_stack_depth[0];
|
||||
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -5051,7 +5129,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
|
||||
/* hold the map. If the program is rejected by verifier,
|
||||
* the map will be released by release_maps() or it
|
||||
* will be used by the valid program until it's unloaded
|
||||
* and all maps are released in free_bpf_prog_info()
|
||||
* and all maps are released in free_used_maps()
|
||||
*/
|
||||
map = bpf_map_inc(map, false);
|
||||
if (IS_ERR(map)) {
|
||||
@@ -5133,10 +5211,11 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
|
||||
|
||||
if (len == 1)
|
||||
return;
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
if (env->subprog_starts[i] < off)
|
||||
/* NOTE: fake 'exit' subprog should be updated as well. */
|
||||
for (i = 0; i <= env->subprog_cnt; i++) {
|
||||
if (env->subprog_info[i].start < off)
|
||||
continue;
|
||||
env->subprog_starts[i] += len - 1;
|
||||
env->subprog_info[i].start += len - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5210,7 +5289,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
}
|
||||
}
|
||||
|
||||
if (!ops->convert_ctx_access)
|
||||
if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
|
||||
return 0;
|
||||
|
||||
insn = env->prog->insnsi + delta;
|
||||
@@ -5270,6 +5349,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
*/
|
||||
is_narrower_load = size < ctx_field_size;
|
||||
if (is_narrower_load) {
|
||||
u32 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
|
||||
u32 off = insn->off;
|
||||
u8 size_code;
|
||||
|
||||
@@ -5284,7 +5364,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
else if (ctx_field_size == 8)
|
||||
size_code = BPF_DW;
|
||||
|
||||
insn->off = off & ~(ctx_field_size - 1);
|
||||
insn->off = off & ~(size_default - 1);
|
||||
insn->code = BPF_LDX | BPF_MEM | size_code;
|
||||
}
|
||||
|
||||
@@ -5328,7 +5408,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
void *old_bpf_func;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (env->subprog_cnt == 0)
|
||||
if (env->subprog_cnt <= 1)
|
||||
return 0;
|
||||
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
@@ -5344,7 +5424,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
/* temporarily remember subprog id inside insn instead of
|
||||
* aux_data, since next loop will split up all insns into funcs
|
||||
*/
|
||||
insn->off = subprog + 1;
|
||||
insn->off = subprog;
|
||||
/* remember original imm in case JIT fails and fallback
|
||||
* to interpreter will be needed
|
||||
*/
|
||||
@@ -5353,16 +5433,13 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
insn->imm = 1;
|
||||
}
|
||||
|
||||
func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
|
||||
func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL);
|
||||
if (!func)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i <= env->subprog_cnt; i++) {
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
subprog_start = subprog_end;
|
||||
if (env->subprog_cnt == i)
|
||||
subprog_end = prog->len;
|
||||
else
|
||||
subprog_end = env->subprog_starts[i];
|
||||
subprog_end = env->subprog_info[i + 1].start;
|
||||
|
||||
len = subprog_end - subprog_start;
|
||||
func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
|
||||
@@ -5379,7 +5456,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
* Long term would need debug info to populate names
|
||||
*/
|
||||
func[i]->aux->name[0] = 'F';
|
||||
func[i]->aux->stack_depth = env->subprog_stack_depth[i];
|
||||
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
|
||||
func[i]->jit_requested = 1;
|
||||
func[i] = bpf_int_jit_compile(func[i]);
|
||||
if (!func[i]->jited) {
|
||||
@@ -5392,20 +5469,33 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
* now populate all bpf_calls with correct addresses and
|
||||
* run last pass of JIT
|
||||
*/
|
||||
for (i = 0; i <= env->subprog_cnt; i++) {
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
insn = func[i]->insnsi;
|
||||
for (j = 0; j < func[i]->len; j++, insn++) {
|
||||
if (insn->code != (BPF_JMP | BPF_CALL) ||
|
||||
insn->src_reg != BPF_PSEUDO_CALL)
|
||||
continue;
|
||||
subprog = insn->off;
|
||||
insn->off = 0;
|
||||
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
|
||||
func[subprog]->bpf_func -
|
||||
__bpf_call_base;
|
||||
}
|
||||
|
||||
/* we use the aux data to keep a list of the start addresses
|
||||
* of the JITed images for each function in the program
|
||||
*
|
||||
* for some architectures, such as powerpc64, the imm field
|
||||
* might not be large enough to hold the offset of the start
|
||||
* address of the callee's JITed image from __bpf_call_base
|
||||
*
|
||||
* in such cases, we can lookup the start address of a callee
|
||||
* by using its subprog id, available from the off field of
|
||||
* the call instruction, as an index for this list
|
||||
*/
|
||||
func[i]->aux->func = func;
|
||||
func[i]->aux->func_cnt = env->subprog_cnt;
|
||||
}
|
||||
for (i = 0; i <= env->subprog_cnt; i++) {
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
old_bpf_func = func[i]->bpf_func;
|
||||
tmp = bpf_int_jit_compile(func[i]);
|
||||
if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
|
||||
@@ -5419,7 +5509,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
/* finally lock prog and jit images for all functions and
|
||||
* populate kallsysm
|
||||
*/
|
||||
for (i = 0; i <= env->subprog_cnt; i++) {
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
bpf_prog_lock_ro(func[i]);
|
||||
bpf_prog_kallsyms_add(func[i]);
|
||||
}
|
||||
@@ -5429,26 +5519,21 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
* later look the same as if they were interpreted only.
|
||||
*/
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
unsigned long addr;
|
||||
|
||||
if (insn->code != (BPF_JMP | BPF_CALL) ||
|
||||
insn->src_reg != BPF_PSEUDO_CALL)
|
||||
continue;
|
||||
insn->off = env->insn_aux_data[i].call_imm;
|
||||
subprog = find_subprog(env, i + insn->off + 1);
|
||||
addr = (unsigned long)func[subprog + 1]->bpf_func;
|
||||
addr &= PAGE_MASK;
|
||||
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
|
||||
addr - __bpf_call_base;
|
||||
insn->imm = subprog;
|
||||
}
|
||||
|
||||
prog->jited = 1;
|
||||
prog->bpf_func = func[0]->bpf_func;
|
||||
prog->aux->func = func;
|
||||
prog->aux->func_cnt = env->subprog_cnt + 1;
|
||||
prog->aux->func_cnt = env->subprog_cnt;
|
||||
return 0;
|
||||
out_free:
|
||||
for (i = 0; i <= env->subprog_cnt; i++)
|
||||
for (i = 0; i < env->subprog_cnt; i++)
|
||||
if (func[i])
|
||||
bpf_jit_free(func[i]);
|
||||
kfree(func);
|
||||
@@ -5505,6 +5590,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
struct bpf_insn *insn = prog->insnsi;
|
||||
const struct bpf_func_proto *fn;
|
||||
const int insn_cnt = prog->len;
|
||||
const struct bpf_map_ops *ops;
|
||||
struct bpf_insn_aux_data *aux;
|
||||
struct bpf_insn insn_buf[16];
|
||||
struct bpf_prog *new_prog;
|
||||
@@ -5552,6 +5638,25 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (BPF_CLASS(insn->code) == BPF_LD &&
|
||||
(BPF_MODE(insn->code) == BPF_ABS ||
|
||||
BPF_MODE(insn->code) == BPF_IND)) {
|
||||
cnt = env->ops->gen_ld_abs(insn, insn_buf);
|
||||
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
delta += cnt - 1;
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (insn->code != (BPF_JMP | BPF_CALL))
|
||||
continue;
|
||||
if (insn->src_reg == BPF_PSEUDO_CALL)
|
||||
@@ -5615,35 +5720,61 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
|
||||
}
|
||||
|
||||
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
|
||||
* handlers are currently limited to 64 bit only.
|
||||
* and other inlining handlers are currently limited to 64 bit
|
||||
* only.
|
||||
*/
|
||||
if (prog->jit_requested && BITS_PER_LONG == 64 &&
|
||||
insn->imm == BPF_FUNC_map_lookup_elem) {
|
||||
(insn->imm == BPF_FUNC_map_lookup_elem ||
|
||||
insn->imm == BPF_FUNC_map_update_elem ||
|
||||
insn->imm == BPF_FUNC_map_delete_elem)) {
|
||||
aux = &env->insn_aux_data[i + delta];
|
||||
if (bpf_map_ptr_poisoned(aux))
|
||||
goto patch_call_imm;
|
||||
|
||||
map_ptr = BPF_MAP_PTR(aux->map_state);
|
||||
if (!map_ptr->ops->map_gen_lookup)
|
||||
goto patch_call_imm;
|
||||
ops = map_ptr->ops;
|
||||
if (insn->imm == BPF_FUNC_map_lookup_elem &&
|
||||
ops->map_gen_lookup) {
|
||||
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
|
||||
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
|
||||
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
|
||||
verbose(env, "bpf verifier is misconfigured\n");
|
||||
return -EINVAL;
|
||||
new_prog = bpf_patch_insn_data(env, i + delta,
|
||||
insn_buf, cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
|
||||
delta += cnt - 1;
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
}
|
||||
|
||||
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
|
||||
cnt);
|
||||
if (!new_prog)
|
||||
return -ENOMEM;
|
||||
BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
|
||||
(void *(*)(struct bpf_map *map, void *key))NULL));
|
||||
BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
|
||||
(int (*)(struct bpf_map *map, void *key))NULL));
|
||||
BUILD_BUG_ON(!__same_type(ops->map_update_elem,
|
||||
(int (*)(struct bpf_map *map, void *key, void *value,
|
||||
u64 flags))NULL));
|
||||
switch (insn->imm) {
|
||||
case BPF_FUNC_map_lookup_elem:
|
||||
insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
|
||||
__bpf_call_base;
|
||||
continue;
|
||||
case BPF_FUNC_map_update_elem:
|
||||
insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
|
||||
__bpf_call_base;
|
||||
continue;
|
||||
case BPF_FUNC_map_delete_elem:
|
||||
insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
|
||||
__bpf_call_base;
|
||||
continue;
|
||||
}
|
||||
|
||||
delta += cnt - 1;
|
||||
|
||||
/* keep walking new program and skip insns we just inserted */
|
||||
env->prog = prog = new_prog;
|
||||
insn = new_prog->insnsi + i + delta;
|
||||
continue;
|
||||
goto patch_call_imm;
|
||||
}
|
||||
|
||||
if (insn->imm == BPF_FUNC_redirect_map) {
|
||||
@@ -5755,16 +5886,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
|
||||
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
|
||||
env->strict_alignment = true;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
ret = bpf_prog_offload_verifier_prep(env);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
ret = replace_map_fd_with_map_ptr(env);
|
||||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
ret = bpf_prog_offload_verifier_prep(env);
|
||||
if (ret)
|
||||
goto skip_full_check;
|
||||
}
|
||||
|
||||
env->explored_states = kcalloc(env->prog->len,
|
||||
sizeof(struct bpf_verifier_state_list *),
|
||||
GFP_USER);
|
||||
@@ -5835,7 +5966,7 @@ skip_full_check:
|
||||
err_release_maps:
|
||||
if (!env->prog->aux->used_maps)
|
||||
/* if we didn't copy map pointers into bpf_prog_info, release
|
||||
* them now. Otherwise free_bpf_prog_info() will release them.
|
||||
* them now. Otherwise free_used_maps() will release them.
|
||||
*/
|
||||
release_maps(env);
|
||||
*prog = env->prog;
|
||||
|
232
kernel/bpf/xskmap.c
Normal file
232
kernel/bpf/xskmap.c
Normal file
@@ -0,0 +1,232 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* XSKMAP used for AF_XDP sockets
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*/
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/capability.h>
|
||||
#include <net/xdp_sock.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
struct xsk_map {
|
||||
struct bpf_map map;
|
||||
struct xdp_sock **xsk_map;
|
||||
struct list_head __percpu *flush_list;
|
||||
};
|
||||
|
||||
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
int cpu, err = -EINVAL;
|
||||
struct xsk_map *m;
|
||||
u64 cost;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (attr->max_entries == 0 || attr->key_size != 4 ||
|
||||
attr->value_size != 4 ||
|
||||
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
m = kzalloc(sizeof(*m), GFP_USER);
|
||||
if (!m)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
bpf_map_init_from_attr(&m->map, attr);
|
||||
|
||||
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
|
||||
cost += sizeof(struct list_head) * num_possible_cpus();
|
||||
if (cost >= U32_MAX - PAGE_SIZE)
|
||||
goto free_m;
|
||||
|
||||
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
|
||||
|
||||
/* Notice returns -EPERM on if map size is larger than memlock limit */
|
||||
err = bpf_map_precharge_memlock(m->map.pages);
|
||||
if (err)
|
||||
goto free_m;
|
||||
|
||||
err = -ENOMEM;
|
||||
|
||||
m->flush_list = alloc_percpu(struct list_head);
|
||||
if (!m->flush_list)
|
||||
goto free_m;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
|
||||
|
||||
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
|
||||
sizeof(struct xdp_sock *),
|
||||
m->map.numa_node);
|
||||
if (!m->xsk_map)
|
||||
goto free_percpu;
|
||||
return &m->map;
|
||||
|
||||
free_percpu:
|
||||
free_percpu(m->flush_list);
|
||||
free_m:
|
||||
kfree(m);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void xsk_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
int i;
|
||||
|
||||
synchronize_net();
|
||||
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
struct xdp_sock *xs;
|
||||
|
||||
xs = m->xsk_map[i];
|
||||
if (!xs)
|
||||
continue;
|
||||
|
||||
sock_put((struct sock *)xs);
|
||||
}
|
||||
|
||||
free_percpu(m->flush_list);
|
||||
bpf_map_area_free(m->xsk_map);
|
||||
kfree(m);
|
||||
}
|
||||
|
||||
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
u32 index = key ? *(u32 *)key : U32_MAX;
|
||||
u32 *next = next_key;
|
||||
|
||||
if (index >= m->map.max_entries) {
|
||||
*next = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (index == m->map.max_entries - 1)
|
||||
return -ENOENT;
|
||||
*next = index + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct xdp_sock *xs;
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
xs = READ_ONCE(m->xsk_map[key]);
|
||||
return xs;
|
||||
}
|
||||
|
||||
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
|
||||
struct xdp_sock *xs)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||
int err;
|
||||
|
||||
err = xsk_rcv(xs, xdp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!xs->flush_node.prev)
|
||||
list_add(&xs->flush_node, flush_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __xsk_map_flush(struct bpf_map *map)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
|
||||
struct xdp_sock *xs, *tmp;
|
||||
|
||||
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
|
||||
xsk_flush(xs);
|
||||
__list_del(xs->flush_node.prev, xs->flush_node.next);
|
||||
xs->flush_node.prev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
|
||||
u64 map_flags)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
u32 i = *(u32 *)key, fd = *(u32 *)value;
|
||||
struct xdp_sock *xs, *old_xs;
|
||||
struct socket *sock;
|
||||
int err;
|
||||
|
||||
if (unlikely(map_flags > BPF_EXIST))
|
||||
return -EINVAL;
|
||||
if (unlikely(i >= m->map.max_entries))
|
||||
return -E2BIG;
|
||||
if (unlikely(map_flags == BPF_NOEXIST))
|
||||
return -EEXIST;
|
||||
|
||||
sock = sockfd_lookup(fd, &err);
|
||||
if (!sock)
|
||||
return err;
|
||||
|
||||
if (sock->sk->sk_family != PF_XDP) {
|
||||
sockfd_put(sock);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
xs = (struct xdp_sock *)sock->sk;
|
||||
|
||||
if (!xsk_is_setup_for_bpf_map(xs)) {
|
||||
sockfd_put(sock);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
sock_hold(sock->sk);
|
||||
|
||||
old_xs = xchg(&m->xsk_map[i], xs);
|
||||
if (old_xs) {
|
||||
/* Make sure we've flushed everything. */
|
||||
synchronize_net();
|
||||
sock_put((struct sock *)old_xs);
|
||||
}
|
||||
|
||||
sockfd_put(sock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct xsk_map *m = container_of(map, struct xsk_map, map);
|
||||
struct xdp_sock *old_xs;
|
||||
int k = *(u32 *)key;
|
||||
|
||||
if (k >= map->max_entries)
|
||||
return -EINVAL;
|
||||
|
||||
old_xs = xchg(&m->xsk_map[k], NULL);
|
||||
if (old_xs) {
|
||||
/* Make sure we've flushed everything. */
|
||||
synchronize_net();
|
||||
sock_put((struct sock *)old_xs);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct bpf_map_ops xsk_map_ops = {
|
||||
.map_alloc = xsk_map_alloc,
|
||||
.map_free = xsk_map_free,
|
||||
.map_get_next_key = xsk_map_get_next_key,
|
||||
.map_lookup_elem = xsk_map_lookup_elem,
|
||||
.map_update_elem = xsk_map_update_elem,
|
||||
.map_delete_elem = xsk_map_delete_elem,
|
||||
};
|
||||
|
||||
|
@@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
|
||||
return file;
|
||||
}
|
||||
|
||||
const struct perf_event *perf_get_event(struct file *file)
|
||||
{
|
||||
if (file->f_op != &perf_fops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
return file->private_data;
|
||||
}
|
||||
|
||||
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
if (!event)
|
||||
|
@@ -14,12 +14,14 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/error-injection.h>
|
||||
|
||||
#include "trace_probe.h"
|
||||
#include "trace.h"
|
||||
|
||||
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
|
||||
/**
|
||||
* trace_call_bpf - invoke BPF program
|
||||
@@ -474,8 +476,6 @@ BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (unlikely(in_interrupt()))
|
||||
return -EINVAL;
|
||||
if (unlikely(idx >= array->map.max_entries))
|
||||
return -E2BIG;
|
||||
|
||||
@@ -564,6 +564,10 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_get_prandom_u32_proto;
|
||||
case BPF_FUNC_probe_read_str:
|
||||
return &bpf_probe_read_str_proto;
|
||||
#ifdef CONFIG_CGROUPS
|
||||
case BPF_FUNC_get_current_cgroup_id:
|
||||
return &bpf_get_current_cgroup_id_proto;
|
||||
#endif
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@@ -577,6 +581,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto;
|
||||
case BPF_FUNC_perf_event_read_value:
|
||||
return &bpf_perf_event_read_value_proto;
|
||||
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
|
||||
@@ -664,6 +670,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
|
||||
u64, flags)
|
||||
{
|
||||
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
|
||||
|
||||
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
|
||||
(unsigned long) size, flags, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_stack_proto_tp = {
|
||||
.func = bpf_get_stack_tp,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -672,6 +697,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_tp;
|
||||
default:
|
||||
return tracing_func_proto(func_id, prog);
|
||||
}
|
||||
@@ -734,6 +761,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_tp;
|
||||
case BPF_FUNC_perf_prog_read_value:
|
||||
return &bpf_perf_prog_read_value_proto;
|
||||
default:
|
||||
@@ -744,7 +773,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
/*
|
||||
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
|
||||
* to avoid potential recursive reuse issue when/if tracepoints are added
|
||||
* inside bpf_*_event_output and/or bpf_get_stack_id
|
||||
* inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
|
||||
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
||||
@@ -787,6 +816,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
|
||||
void *, buf, u32, size, u64, flags)
|
||||
{
|
||||
struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
|
||||
|
||||
perf_fetch_caller_regs(regs);
|
||||
return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
|
||||
(unsigned long) size, flags, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
|
||||
.func = bpf_get_stack_raw_tp,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_PTR_TO_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
@@ -795,6 +844,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
return &bpf_perf_event_output_proto_raw_tp;
|
||||
case BPF_FUNC_get_stackid:
|
||||
return &bpf_get_stackid_proto_raw_tp;
|
||||
case BPF_FUNC_get_stack:
|
||||
return &bpf_get_stack_proto_raw_tp;
|
||||
default:
|
||||
return tracing_func_proto(func_id, prog);
|
||||
}
|
||||
@@ -833,8 +884,14 @@ static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type
|
||||
return false;
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
if (off % size != 0) {
|
||||
if (sizeof(unsigned long) != 4)
|
||||
return false;
|
||||
if (size != 8)
|
||||
return false;
|
||||
if (off % size != 4)
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (off) {
|
||||
case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
|
||||
@@ -959,6 +1016,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
|
||||
|
||||
old_array = event->tp_event->prog_array;
|
||||
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
|
||||
if (ret == -ENOENT)
|
||||
goto unlock;
|
||||
if (ret < 0) {
|
||||
bpf_prog_array_delete_safe(old_array, event->prog);
|
||||
} else {
|
||||
@@ -1117,3 +1176,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
|
||||
mutex_unlock(&bpf_event_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
|
||||
u32 *fd_type, const char **buf,
|
||||
u64 *probe_offset, u64 *probe_addr)
|
||||
{
|
||||
bool is_tracepoint, is_syscall_tp;
|
||||
struct bpf_prog *prog;
|
||||
int flags, err = 0;
|
||||
|
||||
prog = event->prog;
|
||||
if (!prog)
|
||||
return -ENOENT;
|
||||
|
||||
/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
|
||||
if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*prog_id = prog->aux->id;
|
||||
flags = event->tp_event->flags;
|
||||
is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
|
||||
is_syscall_tp = is_syscall_trace_event(event->tp_event);
|
||||
|
||||
if (is_tracepoint || is_syscall_tp) {
|
||||
*buf = is_tracepoint ? event->tp_event->tp->name
|
||||
: event->tp_event->name;
|
||||
*fd_type = BPF_FD_TYPE_TRACEPOINT;
|
||||
*probe_offset = 0x0;
|
||||
*probe_addr = 0x0;
|
||||
} else {
|
||||
/* kprobe/uprobe */
|
||||
err = -EOPNOTSUPP;
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
if (flags & TRACE_EVENT_FL_KPROBE)
|
||||
err = bpf_get_kprobe_info(event, fd_type, buf,
|
||||
probe_offset, probe_addr,
|
||||
event->attr.type == PERF_TYPE_TRACEPOINT);
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
if (flags & TRACE_EVENT_FL_UPROBE)
|
||||
err = bpf_get_uprobe_info(event, fd_type, buf,
|
||||
probe_offset,
|
||||
event->attr.type == PERF_TYPE_TRACEPOINT);
|
||||
#endif
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
head, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_perf_func);
|
||||
|
||||
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
|
||||
const char **symbol, u64 *probe_offset,
|
||||
u64 *probe_addr, bool perf_type_tracepoint)
|
||||
{
|
||||
const char *pevent = trace_event_name(event->tp_event);
|
||||
const char *group = event->tp_event->class->system;
|
||||
struct trace_kprobe *tk;
|
||||
|
||||
if (perf_type_tracepoint)
|
||||
tk = find_trace_kprobe(pevent, group);
|
||||
else
|
||||
tk = event->tp_event->data;
|
||||
if (!tk)
|
||||
return -EINVAL;
|
||||
|
||||
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
|
||||
: BPF_FD_TYPE_KPROBE;
|
||||
if (tk->symbol) {
|
||||
*symbol = tk->symbol;
|
||||
*probe_offset = tk->rp.kp.offset;
|
||||
*probe_addr = 0;
|
||||
} else {
|
||||
*symbol = NULL;
|
||||
*probe_offset = 0;
|
||||
*probe_addr = (unsigned long)tk->rp.kp.addr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
/*
|
||||
|
@@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
|
||||
{
|
||||
__uprobe_perf_func(tu, func, regs, ucb, dsize);
|
||||
}
|
||||
|
||||
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
|
||||
const char **filename, u64 *probe_offset,
|
||||
bool perf_type_tracepoint)
|
||||
{
|
||||
const char *pevent = trace_event_name(event->tp_event);
|
||||
const char *group = event->tp_event->class->system;
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
if (perf_type_tracepoint)
|
||||
tu = find_probe_event(pevent, group);
|
||||
else
|
||||
tu = event->tp_event->data;
|
||||
if (!tu)
|
||||
return -EINVAL;
|
||||
|
||||
*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
|
||||
: BPF_FD_TYPE_UPROBE;
|
||||
*filename = tu->filename;
|
||||
*probe_offset = tu->offset;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
static int
|
||||
|
125
kernel/umh.c
125
kernel/umh.c
@@ -25,6 +25,8 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/async.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/pipe_fs_i.h>
|
||||
|
||||
#include <trace/events/module.h>
|
||||
|
||||
@@ -97,9 +99,13 @@ static int call_usermodehelper_exec_async(void *data)
|
||||
|
||||
commit_creds(new);
|
||||
|
||||
retval = do_execve(getname_kernel(sub_info->path),
|
||||
(const char __user *const __user *)sub_info->argv,
|
||||
(const char __user *const __user *)sub_info->envp);
|
||||
if (sub_info->file)
|
||||
retval = do_execve_file(sub_info->file,
|
||||
sub_info->argv, sub_info->envp);
|
||||
else
|
||||
retval = do_execve(getname_kernel(sub_info->path),
|
||||
(const char __user *const __user *)sub_info->argv,
|
||||
(const char __user *const __user *)sub_info->envp);
|
||||
out:
|
||||
sub_info->retval = retval;
|
||||
/*
|
||||
@@ -185,6 +191,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
|
||||
if (pid < 0) {
|
||||
sub_info->retval = pid;
|
||||
umh_complete(sub_info);
|
||||
} else {
|
||||
sub_info->pid = pid;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -393,6 +401,117 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper_setup);
|
||||
|
||||
struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
|
||||
int (*init)(struct subprocess_info *info, struct cred *new),
|
||||
void (*cleanup)(struct subprocess_info *info), void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info;
|
||||
|
||||
sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
|
||||
if (!sub_info)
|
||||
return NULL;
|
||||
|
||||
INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
|
||||
sub_info->path = "none";
|
||||
sub_info->file = file;
|
||||
sub_info->init = init;
|
||||
sub_info->cleanup = cleanup;
|
||||
sub_info->data = data;
|
||||
return sub_info;
|
||||
}
|
||||
|
||||
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
|
||||
{
|
||||
struct umh_info *umh_info = info->data;
|
||||
struct file *from_umh[2];
|
||||
struct file *to_umh[2];
|
||||
int err;
|
||||
|
||||
/* create pipe to send data to umh */
|
||||
err = create_pipe_files(to_umh, 0);
|
||||
if (err)
|
||||
return err;
|
||||
err = replace_fd(0, to_umh[0], 0);
|
||||
fput(to_umh[0]);
|
||||
if (err < 0) {
|
||||
fput(to_umh[1]);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* create pipe to receive data from umh */
|
||||
err = create_pipe_files(from_umh, 0);
|
||||
if (err) {
|
||||
fput(to_umh[1]);
|
||||
replace_fd(0, NULL, 0);
|
||||
return err;
|
||||
}
|
||||
err = replace_fd(1, from_umh[1], 0);
|
||||
fput(from_umh[1]);
|
||||
if (err < 0) {
|
||||
fput(to_umh[1]);
|
||||
replace_fd(0, NULL, 0);
|
||||
fput(from_umh[0]);
|
||||
return err;
|
||||
}
|
||||
|
||||
umh_info->pipe_to_umh = to_umh[1];
|
||||
umh_info->pipe_from_umh = from_umh[0];
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void umh_save_pid(struct subprocess_info *info)
|
||||
{
|
||||
struct umh_info *umh_info = info->data;
|
||||
|
||||
umh_info->pid = info->pid;
|
||||
}
|
||||
|
||||
/**
|
||||
* fork_usermode_blob - fork a blob of bytes as a usermode process
|
||||
* @data: a blob of bytes that can be do_execv-ed as a file
|
||||
* @len: length of the blob
|
||||
* @info: information about usermode process (shouldn't be NULL)
|
||||
*
|
||||
* Returns either negative error or zero which indicates success
|
||||
* in executing a blob of bytes as a usermode process. In such
|
||||
* case 'struct umh_info *info' is populated with two pipes
|
||||
* and a pid of the process. The caller is responsible for health
|
||||
* check of the user process, killing it via pid, and closing the
|
||||
* pipes when user process is no longer needed.
|
||||
*/
|
||||
int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
|
||||
{
|
||||
struct subprocess_info *sub_info;
|
||||
struct file *file;
|
||||
ssize_t written;
|
||||
loff_t pos = 0;
|
||||
int err;
|
||||
|
||||
file = shmem_kernel_file_setup("", len, 0);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
written = kernel_write(file, data, len, &pos);
|
||||
if (written != len) {
|
||||
err = written;
|
||||
if (err >= 0)
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
|
||||
umh_save_pid, info);
|
||||
if (!sub_info)
|
||||
goto out;
|
||||
|
||||
err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fork_usermode_blob);
|
||||
|
||||
/**
|
||||
* call_usermodehelper_exec - start a usermode application
|
||||
* @sub_info: information about the subprocessa
|
||||
|
Reference in New Issue
Block a user