Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) BBR TCP congestion control, from Neal Cardwell, Yuchung Cheng and co. at Google. https://lwn.net/Articles/701165/ 2) Do TCP Small Queues for retransmits, from Eric Dumazet. 3) Support collect_md mode for all IPV4 and IPV6 tunnels, from Alexei Starovoitov. 4) Allow cls_flower to classify packets in ip tunnels, from Amir Vadai. 5) Support DSA tagging in older mv88e6xxx switches, from Andrew Lunn. 6) Support GMAC protocol in iwlwifi mwm, from Ayala Beker. 7) Support ndo_poll_controller in mlx5, from Calvin Owens. 8) Move VRF processing to an output hook and allow l3mdev to be loopback, from David Ahern. 9) Support SOCK_DESTROY for UDP sockets. Also from David Ahern. 10) Congestion control in RXRPC, from David Howells. 11) Support geneve RX offload in ixgbe, from Emil Tantilov. 12) When hitting pressure for new incoming TCP data SKBs, perform a partial rathern than a full purge of the OFO queue (which could be huge). From Eric Dumazet. 13) Convert XFRM state and policy lookups to RCU, from Florian Westphal. 14) Support RX network flow classification to igb, from Gangfeng Huang. 15) Hardware offloading of eBPF in nfp driver, from Jakub Kicinski. 16) New skbmod packet action, from Jamal Hadi Salim. 17) Remove some inefficiencies in snmp proc output, from Jia He. 18) Add FIB notifications to properly propagate route changes to hardware which is doing forwarding offloading. From Jiri Pirko. 19) New dsa driver for qca8xxx chips, from John Crispin. 20) Implement RFC7559 ipv6 router solicitation backoff, from Maciej Żenczykowski. 21) Add L3 mode to ipvlan, from Mahesh Bandewar. 22) Support 802.1ad in mlx4, from Moshe Shemesh. 23) Support hardware LRO in mediatek driver, from Nelson Chang. 24) Add TC offloading to mlx5, from Or Gerlitz. 25) Convert various drivers to ethtool ksettings interfaces, from Philippe Reynes. 26) TX max rate limiting for cxgb4, from Rahul Lakkireddy. 27) NAPI support for ath10k, from Rajkumar Manoharan. 28) Support XDP in mlx5, from Rana Shahout and Saeed Mahameed. 29) UDP replicast support in TIPC, from Richard Alpe. 30) Per-queue statistics for qed driver, from Sudarsana Reddy Kalluru. 31) Support BQL in thunderx driver, from Sunil Goutham. 32) TSO support in alx driver, from Tobias Regnery. 33) Add stream parser engine and use it in kcm. 34) Support async DHCP replies in ipconfig module, from Uwe Kleine-König. 35) DSA port fast aging for mv88e6xxx driver, from Vivien Didelot. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1715 commits) mlxsw: switchx2: Fix misuse of hard_header_len mlxsw: spectrum: Fix misuse of hard_header_len net/faraday: Stop NCSI device on shutdown net/ncsi: Introduce ncsi_stop_dev() net/ncsi: Rework the channel monitoring net/ncsi: Allow to extend NCSI request properties net/ncsi: Rework request index allocation net/ncsi: Don't probe on the reserved channel ID (0x1f) net/ncsi: Introduce NCSI_RESERVED_CHANNEL net/ncsi: Avoid unused-value build warning from ia64-linux-gcc net: Add netdev all_adj_list refcnt propagation to fix panic net: phy: Add Edge-rate driver for Microsemi PHYs. vmxnet3: Wake queue from reset work i40e: avoid NULL pointer dereference and recursive errors on early PCI error qed: Add RoCE ll2 & GSI support qed: Add support for memory registeration verbs qed: Add support for QP verbs qed: PD,PKEY and CQ verb support qed: Add support for RoCE hw init qede: Add qedr framework ...
This commit is contained in:
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
|
||||
}
|
||||
late_initcall(register_perf_event_array_map);
|
||||
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
#ifdef CONFIG_CGROUPS
|
||||
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
|
||||
struct file *map_file /* not used */,
|
||||
int fd)
|
||||
|
@@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void)
|
||||
prandom_init_once(&bpf_user_rnd_state);
|
||||
}
|
||||
|
||||
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_0(bpf_user_rnd_u32)
|
||||
{
|
||||
/* Should someone ever have the rather unwise idea to use some
|
||||
* of the registers passed into this function, then note that
|
||||
@@ -1031,7 +1031,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
|
||||
state = &get_cpu_var(bpf_user_rnd_state);
|
||||
res = prandom_u32_state(state);
|
||||
put_cpu_var(state);
|
||||
put_cpu_var(bpf_user_rnd_state);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@@ -16,6 +16,7 @@
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/uidgid.h>
|
||||
#include <linux/filter.h>
|
||||
|
||||
/* If kernel subsystem is allowing eBPF programs to call this function,
|
||||
* inside its own verifier_ops->get_func_proto() callback it should return
|
||||
@@ -26,48 +27,32 @@
|
||||
* if program is allowed to access maps, so check rcu_read_lock_held in
|
||||
* all three functions.
|
||||
*/
|
||||
static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
|
||||
{
|
||||
/* verifier checked that R1 contains a valid pointer to bpf_map
|
||||
* and R2 points to a program stack and map->key_size bytes were
|
||||
* initialized
|
||||
*/
|
||||
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
||||
void *key = (void *) (unsigned long) r2;
|
||||
void *value;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
value = map->ops->map_lookup_elem(map, key);
|
||||
|
||||
/* lookup() returns either pointer to element value or NULL
|
||||
* which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
|
||||
*/
|
||||
return (unsigned long) value;
|
||||
return (unsigned long) map->ops->map_lookup_elem(map, key);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
|
||||
.func = bpf_map_lookup_elem,
|
||||
.gpl_only = false,
|
||||
.pkt_access = true,
|
||||
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
||||
};
|
||||
|
||||
static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
|
||||
void *, value, u64, flags)
|
||||
{
|
||||
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
||||
void *key = (void *) (unsigned long) r2;
|
||||
void *value = (void *) (unsigned long) r3;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
return map->ops->map_update_elem(map, key, value, r4);
|
||||
return map->ops->map_update_elem(map, key, value, flags);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_map_update_elem_proto = {
|
||||
.func = bpf_map_update_elem,
|
||||
.gpl_only = false,
|
||||
.pkt_access = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
||||
@@ -75,19 +60,16 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
|
||||
{
|
||||
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
||||
void *key = (void *) (unsigned long) r2;
|
||||
|
||||
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
|
||||
return map->ops->map_delete_elem(map, key);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_map_delete_elem_proto = {
|
||||
.func = bpf_map_delete_elem,
|
||||
.gpl_only = false,
|
||||
.pkt_access = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_PTR_TO_MAP_KEY,
|
||||
@@ -99,7 +81,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_0(bpf_get_smp_processor_id)
|
||||
{
|
||||
return smp_processor_id();
|
||||
}
|
||||
@@ -110,7 +92,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_0(bpf_ktime_get_ns)
|
||||
{
|
||||
/* NMI safe access to clock monotonic */
|
||||
return ktime_get_mono_fast_ns();
|
||||
@@ -122,11 +104,11 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_0(bpf_get_current_pid_tgid)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
|
||||
if (!task)
|
||||
if (unlikely(!task))
|
||||
return -EINVAL;
|
||||
|
||||
return (u64) task->tgid << 32 | task->pid;
|
||||
@@ -138,18 +120,18 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_0(bpf_get_current_uid_gid)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
kuid_t uid;
|
||||
kgid_t gid;
|
||||
|
||||
if (!task)
|
||||
if (unlikely(!task))
|
||||
return -EINVAL;
|
||||
|
||||
current_uid_gid(&uid, &gid);
|
||||
return (u64) from_kgid(&init_user_ns, gid) << 32 |
|
||||
from_kuid(&init_user_ns, uid);
|
||||
from_kuid(&init_user_ns, uid);
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
|
||||
@@ -158,10 +140,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
|
||||
{
|
||||
struct task_struct *task = current;
|
||||
char *buf = (char *) (long) r1;
|
||||
|
||||
if (unlikely(!task))
|
||||
goto err_clear;
|
||||
|
@@ -116,10 +116,9 @@ free_smap:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
|
||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
{
|
||||
struct pt_regs *regs = (struct pt_regs *) (long) r1;
|
||||
struct bpf_map *map = (struct bpf_map *) (long) r2;
|
||||
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
|
||||
struct perf_callchain_entry *trace;
|
||||
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -7079,7 +7079,7 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
irq_work_queue(&event->pending);
|
||||
}
|
||||
|
||||
event->overflow_handler(event, data, regs);
|
||||
READ_ONCE(event->overflow_handler)(event, data, regs);
|
||||
|
||||
if (*perf_event_fasync(event) && event->pending_kill) {
|
||||
event->pending_wakeup = 1;
|
||||
@@ -7694,11 +7694,83 @@ static void perf_event_free_filter(struct perf_event *event)
|
||||
ftrace_profile_free_filter(event);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static void bpf_overflow_handler(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct bpf_perf_event_data_kern ctx = {
|
||||
.data = data,
|
||||
.regs = regs,
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
preempt_disable();
|
||||
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
|
||||
goto out;
|
||||
rcu_read_lock();
|
||||
ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
|
||||
rcu_read_unlock();
|
||||
out:
|
||||
__this_cpu_dec(bpf_prog_active);
|
||||
preempt_enable();
|
||||
if (!ret)
|
||||
return;
|
||||
|
||||
event->orig_overflow_handler(event, data, regs);
|
||||
}
|
||||
|
||||
static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
||||
if (event->overflow_handler_context)
|
||||
/* hw breakpoint or kernel counter */
|
||||
return -EINVAL;
|
||||
|
||||
if (event->prog)
|
||||
return -EEXIST;
|
||||
|
||||
prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
event->prog = prog;
|
||||
event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
|
||||
WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void perf_event_free_bpf_handler(struct perf_event *event)
|
||||
{
|
||||
struct bpf_prog *prog = event->prog;
|
||||
|
||||
if (!prog)
|
||||
return;
|
||||
|
||||
WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
|
||||
event->prog = NULL;
|
||||
bpf_prog_put(prog);
|
||||
}
|
||||
#else
|
||||
static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static void perf_event_free_bpf_handler(struct perf_event *event)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
|
||||
{
|
||||
bool is_kprobe, is_tracepoint;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
if (event->attr.type == PERF_TYPE_HARDWARE ||
|
||||
event->attr.type == PERF_TYPE_SOFTWARE)
|
||||
return perf_event_set_bpf_handler(event, prog_fd);
|
||||
|
||||
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -7739,6 +7811,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
|
||||
perf_event_free_bpf_handler(event);
|
||||
|
||||
if (!event->tp_event)
|
||||
return;
|
||||
|
||||
@@ -9055,6 +9129,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
if (!overflow_handler && parent_event) {
|
||||
overflow_handler = parent_event->overflow_handler;
|
||||
context = parent_event->overflow_handler_context;
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
|
||||
if (overflow_handler == bpf_overflow_handler) {
|
||||
struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
|
||||
|
||||
if (IS_ERR(prog)) {
|
||||
err = PTR_ERR(prog);
|
||||
goto err_ns;
|
||||
}
|
||||
event->prog = prog;
|
||||
event->orig_overflow_handler =
|
||||
parent_event->orig_overflow_handler;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (overflow_handler) {
|
||||
|
@@ -1,4 +1,5 @@
|
||||
/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
|
||||
* Copyright (c) 2016 Facebook
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
@@ -8,6 +9,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_perf_event.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ctype.h>
|
||||
@@ -59,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(trace_call_bpf);
|
||||
|
||||
static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
|
||||
{
|
||||
void *dst = (void *) (long) r1;
|
||||
int ret, size = (int) r2;
|
||||
void *unsafe_ptr = (void *) (long) r3;
|
||||
int ret;
|
||||
|
||||
ret = probe_kernel_read(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
@@ -81,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
|
||||
u32, size)
|
||||
{
|
||||
void *unsafe_ptr = (void *) (long) r1;
|
||||
void *src = (void *) (long) r2;
|
||||
int size = (int) r3;
|
||||
|
||||
/*
|
||||
* Ensure we're in user context which is safe for the helper to
|
||||
* run. This helper has no business in a kthread.
|
||||
@@ -128,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
|
||||
* limited trace_printk()
|
||||
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
|
||||
*/
|
||||
static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
|
||||
u64, arg2, u64, arg3)
|
||||
{
|
||||
char *fmt = (char *) (long) r1;
|
||||
bool str_seen = false;
|
||||
int mod[3] = {};
|
||||
int fmt_cnt = 0;
|
||||
@@ -176,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
|
||||
|
||||
switch (fmt_cnt) {
|
||||
case 1:
|
||||
unsafe_addr = r3;
|
||||
r3 = (long) buf;
|
||||
unsafe_addr = arg1;
|
||||
arg1 = (long) buf;
|
||||
break;
|
||||
case 2:
|
||||
unsafe_addr = r4;
|
||||
r4 = (long) buf;
|
||||
unsafe_addr = arg2;
|
||||
arg2 = (long) buf;
|
||||
break;
|
||||
case 3:
|
||||
unsafe_addr = r5;
|
||||
r5 = (long) buf;
|
||||
unsafe_addr = arg3;
|
||||
arg3 = (long) buf;
|
||||
break;
|
||||
}
|
||||
buf[0] = 0;
|
||||
@@ -207,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
|
||||
}
|
||||
|
||||
return __trace_printk(1/* fake ip will not be printed */, fmt,
|
||||
mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
|
||||
mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
|
||||
mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
|
||||
mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
|
||||
mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
|
||||
mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_trace_printk_proto = {
|
||||
@@ -231,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
|
||||
return &bpf_trace_printk_proto;
|
||||
}
|
||||
|
||||
static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
|
||||
{
|
||||
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
unsigned int cpu = smp_processor_id();
|
||||
u64 index = flags & BPF_F_INDEX_MASK;
|
||||
@@ -310,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
|
||||
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
u64, flags, void *, data, u64, size)
|
||||
{
|
||||
struct pt_regs *regs = (struct pt_regs *)(long) r1;
|
||||
struct bpf_map *map = (struct bpf_map *)(long) r2;
|
||||
void *data = (void *)(long) r4;
|
||||
struct perf_raw_record raw = {
|
||||
.frag = {
|
||||
.size = size,
|
||||
@@ -365,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
return __bpf_perf_event_output(regs, map, flags, &raw);
|
||||
}
|
||||
|
||||
static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_0(bpf_get_current_task)
|
||||
{
|
||||
return (long) current;
|
||||
}
|
||||
@@ -376,6 +370,31 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
|
||||
{
|
||||
struct bpf_array *array = container_of(map, struct bpf_array, map);
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (unlikely(in_interrupt()))
|
||||
return -EINVAL;
|
||||
if (unlikely(idx >= array->map.max_entries))
|
||||
return -E2BIG;
|
||||
|
||||
cgrp = READ_ONCE(array->ptrs[idx]);
|
||||
if (unlikely(!cgrp))
|
||||
return -EAGAIN;
|
||||
|
||||
return task_under_cgroup_hierarchy(current, cgrp);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
|
||||
.func = bpf_current_task_under_cgroup,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_CONST_MAP_PTR,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
|
||||
{
|
||||
switch (func_id) {
|
||||
@@ -407,6 +426,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
|
||||
return &bpf_perf_event_read_proto;
|
||||
case BPF_FUNC_probe_write_user:
|
||||
return bpf_get_probe_write_proto();
|
||||
case BPF_FUNC_current_task_under_cgroup:
|
||||
return &bpf_current_task_under_cgroup_proto;
|
||||
case BPF_FUNC_get_prandom_u32:
|
||||
return &bpf_get_prandom_u32_proto;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@@ -447,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = {
|
||||
.type = BPF_PROG_TYPE_KPROBE,
|
||||
};
|
||||
|
||||
static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
|
||||
BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
|
||||
u64, flags, void *, data, u64, size)
|
||||
{
|
||||
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
|
||||
|
||||
/*
|
||||
* r1 points to perf tracepoint buffer where first 8 bytes are hidden
|
||||
* from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
|
||||
* from there and call the same bpf_perf_event_output() helper
|
||||
* from there and call the same bpf_perf_event_output() helper inline.
|
||||
*/
|
||||
u64 ctx = *(long *)(uintptr_t)r1;
|
||||
|
||||
return bpf_perf_event_output(ctx, r2, index, r4, size);
|
||||
return ____bpf_perf_event_output(regs, map, flags, data, size);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
|
||||
@@ -470,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
|
||||
.arg5_type = ARG_CONST_STACK_SIZE,
|
||||
};
|
||||
|
||||
static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
|
||||
BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
|
||||
u64, flags)
|
||||
{
|
||||
u64 ctx = *(long *)(uintptr_t)r1;
|
||||
struct pt_regs *regs = *(struct pt_regs **)tp_buff;
|
||||
|
||||
return bpf_get_stackid(ctx, r2, r3, r4, r5);
|
||||
/*
|
||||
* Same comment as in bpf_perf_event_output_tp(), only that this time
|
||||
* the other helper's function body cannot be inlined due to being
|
||||
* external, thus we need to call raw helper function.
|
||||
*/
|
||||
return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
|
||||
flags, 0, 0);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
|
||||
@@ -520,10 +551,69 @@ static struct bpf_prog_type_list tracepoint_tl = {
|
||||
.type = BPF_PROG_TYPE_TRACEPOINT,
|
||||
};
|
||||
|
||||
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||
enum bpf_reg_type *reg_type)
|
||||
{
|
||||
if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
|
||||
return false;
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
|
||||
if (size != sizeof(u64))
|
||||
return false;
|
||||
} else {
|
||||
if (size != sizeof(long))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
|
||||
int src_reg, int ctx_off,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
switch (ctx_off) {
|
||||
case offsetof(struct bpf_perf_event_data, sample_period):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
|
||||
data), dst_reg, src_reg,
|
||||
offsetof(struct bpf_perf_event_data_kern, data));
|
||||
*insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
|
||||
offsetof(struct perf_sample_data, period));
|
||||
break;
|
||||
default:
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
|
||||
regs), dst_reg, src_reg,
|
||||
offsetof(struct bpf_perf_event_data_kern, regs));
|
||||
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static const struct bpf_verifier_ops perf_event_prog_ops = {
|
||||
.get_func_proto = tp_prog_func_proto,
|
||||
.is_valid_access = pe_prog_is_valid_access,
|
||||
.convert_ctx_access = pe_prog_convert_ctx_access,
|
||||
};
|
||||
|
||||
static struct bpf_prog_type_list perf_event_tl = {
|
||||
.ops = &perf_event_prog_ops,
|
||||
.type = BPF_PROG_TYPE_PERF_EVENT,
|
||||
};
|
||||
|
||||
static int __init register_kprobe_prog_ops(void)
|
||||
{
|
||||
bpf_register_prog_type(&kprobe_tl);
|
||||
bpf_register_prog_type(&tracepoint_tl);
|
||||
bpf_register_prog_type(&perf_event_tl);
|
||||
return 0;
|
||||
}
|
||||
late_initcall(register_kprobe_prog_ops);
|
||||
|
Reference in New Issue
Block a user