Merge tag 'v4.18-rc5' into locking/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
|
||||
enum bpf_prog_type ptype, struct bpf_prog *prog)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
|
||||
attr->attach_flags);
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
||||
if (IS_ERR(prog))
|
||||
prog = NULL;
|
||||
|
||||
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
|
||||
if (prog)
|
||||
bpf_prog_put(prog);
|
||||
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int cgroup_bpf_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->query.target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
ret = cgroup_bpf_query(cgrp, attr, uattr);
|
||||
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
|
||||
* @sk: The socket sending or receiving traffic
|
||||
|
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
|
||||
bpf_fill_ill_insns(hdr, size);
|
||||
|
||||
hdr->pages = size / PAGE_SIZE;
|
||||
hdr->locked = 0;
|
||||
|
||||
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
|
||||
PAGE_SIZE - sizeof(*hdr));
|
||||
start = (get_random_int() % hole) & ~(alignment - 1);
|
||||
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
|
||||
{
|
||||
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
|
||||
int i, err;
|
||||
|
||||
for (i = 0; i < fp->aux->func_cnt; i++) {
|
||||
err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return bpf_prog_check_pages_ro_single(fp);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bpf_prog_select_func(struct bpf_prog *fp)
|
||||
{
|
||||
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
|
||||
@@ -1524,17 +1506,7 @@ finalize:
|
||||
* all eBPF JITs might immediately support all features.
|
||||
*/
|
||||
*err = bpf_check_tail_call(fp);
|
||||
if (*err)
|
||||
return fp;
|
||||
|
||||
/* Checkpoint: at this point onwards any cBPF -> eBPF or
|
||||
* native eBPF program is read-only. If we failed to change
|
||||
* the page attributes (e.g. allocation failure from
|
||||
* splitting large pages), then reject the whole program
|
||||
* in order to guarantee not ending up with any W+X pages
|
||||
* from BPF side in kernel.
|
||||
*/
|
||||
*err = bpf_prog_check_pages_ro_locked(fp);
|
||||
return fp;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
|
||||
|
@@ -72,6 +72,7 @@ struct bpf_htab {
|
||||
u32 n_buckets;
|
||||
u32 elem_size;
|
||||
struct bpf_sock_progs progs;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct htab_elem {
|
||||
@@ -89,8 +90,8 @@ enum smap_psock_state {
|
||||
struct smap_psock_map_entry {
|
||||
struct list_head list;
|
||||
struct sock **entry;
|
||||
struct htab_elem *hash_link;
|
||||
struct bpf_htab *htab;
|
||||
struct htab_elem __rcu *hash_link;
|
||||
struct bpf_htab __rcu *htab;
|
||||
};
|
||||
|
||||
struct smap_psock {
|
||||
@@ -120,6 +121,7 @@ struct smap_psock {
|
||||
struct bpf_prog *bpf_parse;
|
||||
struct bpf_prog *bpf_verdict;
|
||||
struct list_head maps;
|
||||
spinlock_t maps_lock;
|
||||
|
||||
/* Back reference used when sock callback trigger sockmap operations */
|
||||
struct sock *sock;
|
||||
@@ -140,6 +142,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
|
||||
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
|
||||
int offset, size_t size, int flags);
|
||||
static void bpf_tcp_close(struct sock *sk, long timeout);
|
||||
|
||||
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
|
||||
{
|
||||
@@ -161,7 +164,42 @@ out:
|
||||
return !empty;
|
||||
}
|
||||
|
||||
static struct proto tcp_bpf_proto;
|
||||
enum {
|
||||
SOCKMAP_IPV4,
|
||||
SOCKMAP_IPV6,
|
||||
SOCKMAP_NUM_PROTS,
|
||||
};
|
||||
|
||||
enum {
|
||||
SOCKMAP_BASE,
|
||||
SOCKMAP_TX,
|
||||
SOCKMAP_NUM_CONFIGS,
|
||||
};
|
||||
|
||||
static struct proto *saved_tcpv6_prot __read_mostly;
|
||||
static DEFINE_SPINLOCK(tcpv6_prot_lock);
|
||||
static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
|
||||
static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
|
||||
struct proto *base)
|
||||
{
|
||||
prot[SOCKMAP_BASE] = *base;
|
||||
prot[SOCKMAP_BASE].close = bpf_tcp_close;
|
||||
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
|
||||
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
|
||||
|
||||
prot[SOCKMAP_TX] = prot[SOCKMAP_BASE];
|
||||
prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg;
|
||||
prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage;
|
||||
}
|
||||
|
||||
static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
|
||||
{
|
||||
int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
|
||||
int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
|
||||
|
||||
sk->sk_prot = &bpf_tcp_prots[family][conf];
|
||||
}
|
||||
|
||||
static int bpf_tcp_init(struct sock *sk)
|
||||
{
|
||||
struct smap_psock *psock;
|
||||
@@ -181,14 +219,17 @@ static int bpf_tcp_init(struct sock *sk)
|
||||
psock->save_close = sk->sk_prot->close;
|
||||
psock->sk_proto = sk->sk_prot;
|
||||
|
||||
if (psock->bpf_tx_msg) {
|
||||
tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
|
||||
tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
|
||||
tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
|
||||
tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
|
||||
/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
|
||||
if (sk->sk_family == AF_INET6 &&
|
||||
unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
|
||||
spin_lock_bh(&tcpv6_prot_lock);
|
||||
if (likely(sk->sk_prot != saved_tcpv6_prot)) {
|
||||
build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
|
||||
smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
|
||||
}
|
||||
spin_unlock_bh(&tcpv6_prot_lock);
|
||||
}
|
||||
|
||||
sk->sk_prot = &tcp_bpf_proto;
|
||||
update_sk_prot(sk, psock);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@@ -219,16 +260,54 @@ out:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
|
||||
u32 hash, void *key, u32 key_size)
|
||||
{
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_for_each_entry_rcu(l, head, hash_node) {
|
||||
if (l->hash == hash && !memcmp(&l->key, key, key_size))
|
||||
return l;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
|
||||
{
|
||||
return &htab->buckets[hash & (htab->n_buckets - 1)];
|
||||
}
|
||||
|
||||
static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
|
||||
{
|
||||
return &__select_bucket(htab, hash)->head;
|
||||
}
|
||||
|
||||
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
|
||||
{
|
||||
atomic_dec(&htab->count);
|
||||
kfree_rcu(l, rcu);
|
||||
}
|
||||
|
||||
static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
|
||||
struct smap_psock *psock)
|
||||
{
|
||||
struct smap_psock_map_entry *e;
|
||||
|
||||
spin_lock_bh(&psock->maps_lock);
|
||||
e = list_first_entry_or_null(&psock->maps,
|
||||
struct smap_psock_map_entry,
|
||||
list);
|
||||
if (e)
|
||||
list_del(&e->list);
|
||||
spin_unlock_bh(&psock->maps_lock);
|
||||
return e;
|
||||
}
|
||||
|
||||
static void bpf_tcp_close(struct sock *sk, long timeout)
|
||||
{
|
||||
void (*close_fun)(struct sock *sk, long timeout);
|
||||
struct smap_psock_map_entry *e, *tmp;
|
||||
struct smap_psock_map_entry *e;
|
||||
struct sk_msg_buff *md, *mtmp;
|
||||
struct smap_psock *psock;
|
||||
struct sock *osk;
|
||||
@@ -247,7 +326,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
|
||||
*/
|
||||
close_fun = psock->save_close;
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (psock->cork) {
|
||||
free_start_sg(psock->sock, psock->cork);
|
||||
kfree(psock->cork);
|
||||
@@ -260,20 +338,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
|
||||
kfree(md);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
|
||||
e = psock_map_pop(sk, psock);
|
||||
while (e) {
|
||||
if (e->entry) {
|
||||
osk = cmpxchg(e->entry, sk, NULL);
|
||||
if (osk == sk) {
|
||||
list_del(&e->list);
|
||||
smap_release_sock(psock, sk);
|
||||
}
|
||||
} else {
|
||||
hlist_del_rcu(&e->hash_link->hash_node);
|
||||
smap_release_sock(psock, e->hash_link->sk);
|
||||
free_htab_elem(e->htab, e->hash_link);
|
||||
struct htab_elem *link = rcu_dereference(e->hash_link);
|
||||
struct bpf_htab *htab = rcu_dereference(e->htab);
|
||||
struct hlist_head *head;
|
||||
struct htab_elem *l;
|
||||
struct bucket *b;
|
||||
|
||||
b = __select_bucket(htab, link->hash);
|
||||
head = &b->head;
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
l = lookup_elem_raw(head,
|
||||
link->hash, link->key,
|
||||
htab->map.key_size);
|
||||
/* If another thread deleted this object skip deletion.
|
||||
* The refcnt on psock may or may not be zero.
|
||||
*/
|
||||
if (l) {
|
||||
hlist_del_rcu(&link->hash_node);
|
||||
smap_release_sock(psock, link->sk);
|
||||
free_htab_elem(htab, link);
|
||||
}
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
}
|
||||
e = psock_map_pop(sk, psock);
|
||||
}
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
rcu_read_unlock();
|
||||
close_fun(sk, timeout);
|
||||
}
|
||||
@@ -1111,8 +1207,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
|
||||
|
||||
static int bpf_tcp_ulp_register(void)
|
||||
{
|
||||
tcp_bpf_proto = tcp_prot;
|
||||
tcp_bpf_proto.close = bpf_tcp_close;
|
||||
build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
|
||||
/* Once BPF TX ULP is registered it is never unregistered. It
|
||||
* will be in the ULP list for the lifetime of the system. Doing
|
||||
* duplicate registers is not a problem.
|
||||
@@ -1357,7 +1452,9 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
|
||||
{
|
||||
if (refcount_dec_and_test(&psock->refcnt)) {
|
||||
tcp_cleanup_ulp(sock);
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
smap_stop_sock(psock, sock);
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
clear_bit(SMAP_TX_RUNNING, &psock->state);
|
||||
rcu_assign_sk_user_data(sock, NULL);
|
||||
call_rcu_sched(&psock->rcu, smap_destroy_psock);
|
||||
@@ -1508,6 +1605,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, int node)
|
||||
INIT_LIST_HEAD(&psock->maps);
|
||||
INIT_LIST_HEAD(&psock->ingress);
|
||||
refcount_set(&psock->refcnt, 1);
|
||||
spin_lock_init(&psock->maps_lock);
|
||||
|
||||
rcu_assign_sk_user_data(sock, psock);
|
||||
sock_hold(sock);
|
||||
@@ -1564,18 +1662,32 @@ free_stab:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void smap_list_remove(struct smap_psock *psock,
|
||||
struct sock **entry,
|
||||
struct htab_elem *hash_link)
|
||||
static void smap_list_map_remove(struct smap_psock *psock,
|
||||
struct sock **entry)
|
||||
{
|
||||
struct smap_psock_map_entry *e, *tmp;
|
||||
|
||||
spin_lock_bh(&psock->maps_lock);
|
||||
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
|
||||
if (e->entry == entry || e->hash_link == hash_link) {
|
||||
if (e->entry == entry)
|
||||
list_del(&e->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&psock->maps_lock);
|
||||
}
|
||||
|
||||
static void smap_list_hash_remove(struct smap_psock *psock,
|
||||
struct htab_elem *hash_link)
|
||||
{
|
||||
struct smap_psock_map_entry *e, *tmp;
|
||||
|
||||
spin_lock_bh(&psock->maps_lock);
|
||||
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
|
||||
struct htab_elem *c = rcu_dereference(e->hash_link);
|
||||
|
||||
if (c == hash_link)
|
||||
list_del(&e->list);
|
||||
}
|
||||
spin_unlock_bh(&psock->maps_lock);
|
||||
}
|
||||
|
||||
static void sock_map_free(struct bpf_map *map)
|
||||
@@ -1601,7 +1713,6 @@ static void sock_map_free(struct bpf_map *map)
|
||||
if (!sock)
|
||||
continue;
|
||||
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
/* This check handles a racing sock event that can get the
|
||||
* sk_callback_lock before this case but after xchg happens
|
||||
@@ -1609,10 +1720,9 @@ static void sock_map_free(struct bpf_map *map)
|
||||
* to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, &stab->sock_map[i], NULL);
|
||||
smap_list_map_remove(psock, &stab->sock_map[i]);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -1661,17 +1771,15 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
|
||||
if (!sock)
|
||||
return -EINVAL;
|
||||
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
if (!psock)
|
||||
goto out;
|
||||
|
||||
if (psock->bpf_parse)
|
||||
smap_stop_sock(psock, sock);
|
||||
smap_list_remove(psock, &stab->sock_map[k], NULL);
|
||||
smap_list_map_remove(psock, &stab->sock_map[k]);
|
||||
smap_release_sock(psock, sock);
|
||||
out:
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1752,7 +1860,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
||||
}
|
||||
}
|
||||
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
|
||||
/* 2. Do not allow inheriting programs if psock exists and has
|
||||
@@ -1809,7 +1916,9 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
||||
if (err)
|
||||
goto out_free;
|
||||
smap_init_progs(psock, verdict, parse);
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
smap_start_sock(psock, sock);
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
}
|
||||
|
||||
/* 4. Place psock in sockmap for use and stop any programs on
|
||||
@@ -1819,9 +1928,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
||||
*/
|
||||
if (map_link) {
|
||||
e->entry = map_link;
|
||||
spin_lock_bh(&psock->maps_lock);
|
||||
list_add_tail(&e->list, &psock->maps);
|
||||
spin_unlock_bh(&psock->maps_lock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
return err;
|
||||
out_free:
|
||||
smap_release_sock(psock, sock);
|
||||
@@ -1832,7 +1942,6 @@ out_progs:
|
||||
}
|
||||
if (tx_msg)
|
||||
bpf_prog_put(tx_msg);
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
kfree(e);
|
||||
return err;
|
||||
}
|
||||
@@ -1869,10 +1978,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
if (osock) {
|
||||
struct smap_psock *opsock = smap_psock_sk(osock);
|
||||
|
||||
write_lock_bh(&osock->sk_callback_lock);
|
||||
smap_list_remove(opsock, &stab->sock_map[i], NULL);
|
||||
smap_list_map_remove(opsock, &stab->sock_map[i]);
|
||||
smap_release_sock(opsock, osock);
|
||||
write_unlock_bh(&osock->sk_callback_lock);
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
@@ -1915,6 +2022,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sockmap_get_from_fd(const union bpf_attr *attr, int type,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
int ufd = attr->target_fd;
|
||||
struct bpf_map *map;
|
||||
struct fd f;
|
||||
int err;
|
||||
|
||||
f = fdget(ufd);
|
||||
map = __bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
err = sock_map_prog(map, prog, attr->attach_type);
|
||||
fdput(f);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void *sock_map_lookup(struct bpf_map *map, void *key)
|
||||
{
|
||||
return NULL;
|
||||
@@ -2043,14 +2168,13 @@ free_htab:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
|
||||
static void __bpf_htab_free(struct rcu_head *rcu)
|
||||
{
|
||||
return &htab->buckets[hash & (htab->n_buckets - 1)];
|
||||
}
|
||||
struct bpf_htab *htab;
|
||||
|
||||
static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
|
||||
{
|
||||
return &__select_bucket(htab, hash)->head;
|
||||
htab = container_of(rcu, struct bpf_htab, rcu);
|
||||
bpf_map_area_free(htab->buckets);
|
||||
kfree(htab);
|
||||
}
|
||||
|
||||
static void sock_hash_free(struct bpf_map *map)
|
||||
@@ -2069,16 +2193,18 @@ static void sock_hash_free(struct bpf_map *map)
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < htab->n_buckets; i++) {
|
||||
struct hlist_head *head = select_bucket(htab, i);
|
||||
struct bucket *b = __select_bucket(htab, i);
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *n;
|
||||
struct htab_elem *l;
|
||||
|
||||
raw_spin_lock_bh(&b->lock);
|
||||
head = &b->head;
|
||||
hlist_for_each_entry_safe(l, n, head, hash_node) {
|
||||
struct sock *sock = l->sk;
|
||||
struct smap_psock *psock;
|
||||
|
||||
hlist_del_rcu(&l->hash_node);
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
/* This check handles a racing sock event that can get
|
||||
* the sk_callback_lock before this case but after xchg
|
||||
@@ -2086,16 +2212,15 @@ static void sock_hash_free(struct bpf_map *map)
|
||||
* (psock) to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, NULL, l);
|
||||
smap_list_hash_remove(psock, l);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
kfree(l);
|
||||
free_htab_elem(htab, l);
|
||||
}
|
||||
raw_spin_unlock_bh(&b->lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
bpf_map_area_free(htab->buckets);
|
||||
kfree(htab);
|
||||
call_rcu(&htab->rcu, __bpf_htab_free);
|
||||
}
|
||||
|
||||
static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
|
||||
@@ -2122,19 +2247,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
|
||||
return l_new;
|
||||
}
|
||||
|
||||
static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
|
||||
u32 hash, void *key, u32 key_size)
|
||||
{
|
||||
struct htab_elem *l;
|
||||
|
||||
hlist_for_each_entry_rcu(l, head, hash_node) {
|
||||
if (l->hash == hash && !memcmp(&l->key, key, key_size))
|
||||
return l;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline u32 htab_map_hash(const void *key, u32 key_len)
|
||||
{
|
||||
return jhash(key, key_len, 0);
|
||||
@@ -2254,9 +2366,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
goto bucket_err;
|
||||
}
|
||||
|
||||
e->hash_link = l_new;
|
||||
e->htab = container_of(map, struct bpf_htab, map);
|
||||
rcu_assign_pointer(e->hash_link, l_new);
|
||||
rcu_assign_pointer(e->htab,
|
||||
container_of(map, struct bpf_htab, map));
|
||||
spin_lock_bh(&psock->maps_lock);
|
||||
list_add_tail(&e->list, &psock->maps);
|
||||
spin_unlock_bh(&psock->maps_lock);
|
||||
|
||||
/* add new element to the head of the list, so that
|
||||
* concurrent search will find it before old elem
|
||||
@@ -2266,7 +2381,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
|
||||
psock = smap_psock_sk(l_old->sk);
|
||||
|
||||
hlist_del_rcu(&l_old->hash_node);
|
||||
smap_list_remove(psock, NULL, l_old);
|
||||
smap_list_hash_remove(psock, l_old);
|
||||
smap_release_sock(psock, l_old->sk);
|
||||
free_htab_elem(htab, l_old);
|
||||
}
|
||||
@@ -2326,7 +2441,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
struct smap_psock *psock;
|
||||
|
||||
hlist_del_rcu(&l->hash_node);
|
||||
write_lock_bh(&sock->sk_callback_lock);
|
||||
psock = smap_psock_sk(sock);
|
||||
/* This check handles a racing sock event that can get the
|
||||
* sk_callback_lock before this case but after xchg happens
|
||||
@@ -2334,10 +2448,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
|
||||
* to be null and queued for garbage collection.
|
||||
*/
|
||||
if (likely(psock)) {
|
||||
smap_list_remove(psock, NULL, l);
|
||||
smap_list_hash_remove(psock, l);
|
||||
smap_release_sock(psock, sock);
|
||||
}
|
||||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
free_htab_elem(htab, l);
|
||||
ret = 0;
|
||||
}
|
||||
@@ -2383,6 +2496,7 @@ const struct bpf_map_ops sock_hash_ops = {
|
||||
.map_get_next_key = sock_hash_get_next_key,
|
||||
.map_update_elem = sock_hash_update_elem,
|
||||
.map_delete_elem = sock_hash_delete_elem,
|
||||
.map_release_uref = sock_map_release,
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
|
||||
|
@@ -1483,8 +1483,6 @@ out_free_tp:
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
|
||||
static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
|
||||
enum bpf_attach_type attach_type)
|
||||
{
|
||||
@@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
|
||||
|
||||
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
|
||||
|
||||
static int sockmap_get_from_fd(const union bpf_attr *attr,
|
||||
int type, bool attach)
|
||||
{
|
||||
struct bpf_prog *prog = NULL;
|
||||
int ufd = attr->target_fd;
|
||||
struct bpf_map *map;
|
||||
struct fd f;
|
||||
int err;
|
||||
|
||||
f = fdget(ufd);
|
||||
map = __bpf_map_get(f);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
if (attach) {
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
|
||||
if (IS_ERR(prog)) {
|
||||
fdput(f);
|
||||
return PTR_ERR(prog);
|
||||
}
|
||||
}
|
||||
|
||||
err = sock_map_prog(map, prog, attr->attach_type);
|
||||
if (err) {
|
||||
fdput(f);
|
||||
if (prog)
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
fdput(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_F_ATTACH_MASK \
|
||||
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
|
||||
|
||||
@@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
{
|
||||
enum bpf_prog_type ptype;
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
@@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
|
||||
break;
|
||||
case BPF_SK_MSG_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
|
||||
ptype = BPF_PROG_TYPE_SK_MSG;
|
||||
break;
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
|
||||
ptype = BPF_PROG_TYPE_SK_SKB;
|
||||
break;
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_attach(attr);
|
||||
ptype = BPF_PROG_TYPE_LIRC_MODE2;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp)) {
|
||||
bpf_prog_put(prog);
|
||||
return PTR_ERR(cgrp);
|
||||
switch (ptype) {
|
||||
case BPF_PROG_TYPE_SK_SKB:
|
||||
case BPF_PROG_TYPE_SK_MSG:
|
||||
ret = sockmap_get_from_fd(attr, ptype, prog);
|
||||
break;
|
||||
case BPF_PROG_TYPE_LIRC_MODE2:
|
||||
ret = lirc_prog_attach(attr, prog);
|
||||
break;
|
||||
default:
|
||||
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
|
||||
}
|
||||
|
||||
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
|
||||
attr->attach_flags);
|
||||
if (ret)
|
||||
bpf_prog_put(prog);
|
||||
cgroup_put(cgrp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
||||
static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
{
|
||||
enum bpf_prog_type ptype;
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
@@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
|
||||
break;
|
||||
case BPF_SK_MSG_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
|
||||
case BPF_SK_SKB_STREAM_PARSER:
|
||||
case BPF_SK_SKB_STREAM_VERDICT:
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
|
||||
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
|
||||
case BPF_LIRC_MODE2:
|
||||
return lirc_prog_detach(attr);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
||||
if (IS_ERR(prog))
|
||||
prog = NULL;
|
||||
|
||||
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
|
||||
if (prog)
|
||||
bpf_prog_put(prog);
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
return cgroup_bpf_prog_detach(attr, ptype);
|
||||
}
|
||||
|
||||
#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
|
||||
@@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
||||
static int bpf_prog_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
if (CHECK_ATTR(BPF_PROG_QUERY))
|
||||
@@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
cgrp = cgroup_get_from_fd(attr->query.target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
ret = cgroup_bpf_query(cgrp, attr, uattr);
|
||||
cgroup_put(cgrp);
|
||||
return ret;
|
||||
|
||||
return cgroup_bpf_prog_query(attr, uattr);
|
||||
}
|
||||
#endif /* CONFIG_CGROUP_BPF */
|
||||
|
||||
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
|
||||
|
||||
@@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
case BPF_OBJ_GET:
|
||||
err = bpf_obj_get(&attr);
|
||||
break;
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
case BPF_PROG_ATTACH:
|
||||
err = bpf_prog_attach(&attr);
|
||||
break;
|
||||
@@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
case BPF_PROG_QUERY:
|
||||
err = bpf_prog_query(&attr, uattr);
|
||||
break;
|
||||
#endif
|
||||
case BPF_PROG_TEST_RUN:
|
||||
err = bpf_prog_test_run(&attr, uattr);
|
||||
break;
|
||||
|
Reference in New Issue
Block a user