Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Include fixes for netrom and dsa (Fabian Frederick and Florian Fainelli) 2) Fix FIXED_PHY support in stmmac, from Giuseppe CAVALLARO. 3) Several SKB use after free fixes (vxlan, openvswitch, vxlan, ip_tunnel, fou), from Li ROngQing. 4) fec driver PTP support fixes from Luwei Zhou and Nimrod Andy. 5) Use after free in virtio_net, from Michael S Tsirkin. 6) Fix flow mask handling for megaflows in openvswitch, from Pravin B Shelar. 7) ISDN gigaset and capi bug fixes from Tilman Schmidt. 8) Fix route leak in ip_send_unicast_reply(), from Vasily Averin. 9) Fix two eBPF JIT bugs on x86, from Alexei Starovoitov. 10) TCP_SKB_CB() reorganization caused a few regressions, fixed by Cong Wang and Eric Dumazet. 11) Don't overwrite end of SKB when parsing malformed sctp ASCONF chunks, from Daniel Borkmann. 12) Don't call sock_kfree_s() with NULL pointers, this function also has the side effect of adjusting the socket memory usage. From Cong Wang. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (90 commits) bna: fix skb->truesize underestimation net: dsa: add includes for ethtool and phy_fixed definitions openvswitch: Set flow-key members. netrom: use linux/uaccess.h dsa: Fix conversion from host device to mii bus tipc: fix bug in bundled buffer reception ipv6: introduce tcp_v6_iif() sfc: add support for skb->xmit_more r8152: return -EBUSY for runtime suspend ipv4: fix a potential use after free in fou.c ipv4: fix a potential use after free in ip_tunnel_core.c hyperv: Add handling of IP header with option field in netvsc_set_hash() openvswitch: Create right mask with disabled megaflows vxlan: fix a free after use openvswitch: fix a use after free ipv4: dst_entry leak in ip_send_unicast_reply() ipv4: clean up cookie_v4_check() ipv4: share tcp_v4_save_options() with cookie_v4_check() ipv4: call __ip_options_echo() in cookie_v4_check() atm: simplify lanai.c by using module_pci_driver ...
This commit is contained in:
@@ -537,7 +537,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
|
||||
return 1;
|
||||
|
||||
attrlen = rtnh_attrlen(rtnh);
|
||||
if (attrlen < 0) {
|
||||
if (attrlen > 0) {
|
||||
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
|
||||
|
||||
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
|
||||
|
@@ -87,6 +87,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
|
||||
if (!pskb_may_pull(skb, len))
|
||||
goto drop;
|
||||
|
||||
uh = udp_hdr(skb);
|
||||
guehdr = (struct guehdr *)&uh[1];
|
||||
|
||||
if (guehdr->version != 0)
|
||||
goto drop;
|
||||
|
||||
|
@@ -1535,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
|
||||
struct sk_buff *nskb;
|
||||
struct sock *sk;
|
||||
struct inet_sock *inet;
|
||||
int err;
|
||||
|
||||
if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
|
||||
return;
|
||||
@@ -1574,8 +1575,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
|
||||
sock_net_set(sk, net);
|
||||
__skb_queue_head_init(&sk->sk_write_queue);
|
||||
sk->sk_sndbuf = sysctl_wmem_default;
|
||||
ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
|
||||
&ipc, &rt, MSG_DONTWAIT);
|
||||
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
|
||||
len, 0, &ipc, &rt, MSG_DONTWAIT);
|
||||
if (unlikely(err)) {
|
||||
ip_flush_pending_frames(sk);
|
||||
goto out;
|
||||
}
|
||||
|
||||
nskb = skb_peek(&sk->sk_write_queue);
|
||||
if (nskb) {
|
||||
if (arg->csumoffset >= 0)
|
||||
@@ -1587,7 +1593,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb,
|
||||
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
|
||||
ip_push_pending_frames(sk, &fl4);
|
||||
}
|
||||
|
||||
out:
|
||||
put_cpu_var(unicast_sock);
|
||||
|
||||
ip_rt_put(rt);
|
||||
|
@@ -91,11 +91,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
|
||||
skb_pull_rcsum(skb, hdr_len);
|
||||
|
||||
if (inner_proto == htons(ETH_P_TEB)) {
|
||||
struct ethhdr *eh = (struct ethhdr *)skb->data;
|
||||
struct ethhdr *eh;
|
||||
|
||||
if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
|
||||
return -ENOMEM;
|
||||
|
||||
eh = (struct ethhdr *)skb->data;
|
||||
if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
|
||||
skb->protocol = eh->h_proto;
|
||||
else
|
||||
|
@@ -255,9 +255,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
|
||||
}
|
||||
EXPORT_SYMBOL(cookie_check_timestamp);
|
||||
|
||||
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
|
||||
struct ip_options *opt)
|
||||
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
|
||||
struct tcp_options_received tcp_opt;
|
||||
struct inet_request_sock *ireq;
|
||||
struct tcp_request_sock *treq;
|
||||
@@ -317,15 +317,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
|
||||
/* We throwed the options of the initial SYN away, so we hope
|
||||
* the ACK carries the same options again (see RFC1122 4.2.3.8)
|
||||
*/
|
||||
if (opt && opt->optlen) {
|
||||
int opt_size = sizeof(struct ip_options_rcu) + opt->optlen;
|
||||
|
||||
ireq->opt = kmalloc(opt_size, GFP_ATOMIC);
|
||||
if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) {
|
||||
kfree(ireq->opt);
|
||||
ireq->opt = NULL;
|
||||
}
|
||||
}
|
||||
ireq->opt = tcp_v4_save_options(skb);
|
||||
|
||||
if (security_inet_conn_request(sk, skb, req)) {
|
||||
reqsk_free(req);
|
||||
@@ -344,7 +336,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
|
||||
flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
|
||||
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
|
||||
inet_sk_flowi_flags(sk),
|
||||
(opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
|
||||
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
|
||||
ireq->ir_loc_addr, th->source, th->dest);
|
||||
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
|
||||
rt = ip_route_output_key(sock_net(sk), &fl4);
|
||||
|
@@ -68,6 +68,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include <net/dst.h>
|
||||
#include <net/tcp.h>
|
||||
#include <net/inet_common.h>
|
||||
@@ -3029,6 +3030,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
|
||||
return packets_acked;
|
||||
}
|
||||
|
||||
static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
|
||||
u32 prior_snd_una)
|
||||
{
|
||||
const struct skb_shared_info *shinfo;
|
||||
|
||||
/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
|
||||
if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
|
||||
return;
|
||||
|
||||
shinfo = skb_shinfo(skb);
|
||||
if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
|
||||
between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
|
||||
__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
|
||||
}
|
||||
|
||||
/* Remove acknowledged frames from the retransmission queue. If our packet
|
||||
* is before the ack sequence we can discard it as it's confirmed to have
|
||||
* arrived at the other end.
|
||||
@@ -3052,14 +3068,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||
first_ackt.v64 = 0;
|
||||
|
||||
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
|
||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
|
||||
u8 sacked = scb->sacked;
|
||||
u32 acked_pcount;
|
||||
|
||||
if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
|
||||
between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
|
||||
__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
|
||||
tcp_ack_tstamp(sk, skb, prior_snd_una);
|
||||
|
||||
/* Determine how many packets and what bytes were acked, tso and else */
|
||||
if (after(scb->end_seq, tp->snd_una)) {
|
||||
@@ -3073,10 +3086,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||
|
||||
fully_acked = false;
|
||||
} else {
|
||||
/* Speedup tcp_unlink_write_queue() and next loop */
|
||||
prefetchw(skb->next);
|
||||
acked_pcount = tcp_skb_pcount(skb);
|
||||
}
|
||||
|
||||
if (sacked & TCPCB_RETRANS) {
|
||||
if (unlikely(sacked & TCPCB_RETRANS)) {
|
||||
if (sacked & TCPCB_SACKED_RETRANS)
|
||||
tp->retrans_out -= acked_pcount;
|
||||
flag |= FLAG_RETRANS_DATA_ACKED;
|
||||
@@ -3107,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||
* connection startup slow start one packet too
|
||||
* quickly. This is severely frowned upon behavior.
|
||||
*/
|
||||
if (!(scb->tcp_flags & TCPHDR_SYN)) {
|
||||
if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
|
||||
flag |= FLAG_DATA_ACKED;
|
||||
} else {
|
||||
flag |= FLAG_SYN_ACKED;
|
||||
@@ -3119,9 +3134,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||
|
||||
tcp_unlink_write_queue(skb, sk);
|
||||
sk_wmem_free_skb(sk, skb);
|
||||
if (skb == tp->retransmit_skb_hint)
|
||||
if (unlikely(skb == tp->retransmit_skb_hint))
|
||||
tp->retransmit_skb_hint = NULL;
|
||||
if (skb == tp->lost_skb_hint)
|
||||
if (unlikely(skb == tp->lost_skb_hint))
|
||||
tp->lost_skb_hint = NULL;
|
||||
}
|
||||
|
||||
@@ -3132,7 +3147,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
|
||||
flag |= FLAG_SACK_RENEGING;
|
||||
|
||||
skb_mstamp_get(&now);
|
||||
if (first_ackt.v64) {
|
||||
if (likely(first_ackt.v64)) {
|
||||
seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
|
||||
ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
|
||||
}
|
||||
@@ -3394,6 +3409,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
|
||||
int acked = 0; /* Number of packets newly acked */
|
||||
long sack_rtt_us = -1L;
|
||||
|
||||
/* We very likely will need to access write queue head. */
|
||||
prefetchw(sk->sk_write_queue.next);
|
||||
|
||||
/* If the ack is older than previous acks
|
||||
* then we can probably ignore it.
|
||||
*/
|
||||
|
@@ -880,26 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk,
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_syn_flood_action);
|
||||
|
||||
/*
|
||||
* Save and compile IPv4 options into the request_sock if needed.
|
||||
*/
|
||||
static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
|
||||
{
|
||||
const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
|
||||
struct ip_options_rcu *dopt = NULL;
|
||||
|
||||
if (opt && opt->optlen) {
|
||||
int opt_size = sizeof(*dopt) + opt->optlen;
|
||||
|
||||
dopt = kmalloc(opt_size, GFP_ATOMIC);
|
||||
if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
|
||||
kfree(dopt);
|
||||
dopt = NULL;
|
||||
}
|
||||
}
|
||||
return dopt;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
/*
|
||||
* RFC2385 MD5 checksumming requires a mapping of
|
||||
@@ -1428,7 +1408,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
|
||||
|
||||
#ifdef CONFIG_SYN_COOKIES
|
||||
if (!th->syn)
|
||||
sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt);
|
||||
sk = cookie_v4_check(sk, skb);
|
||||
#endif
|
||||
return sk;
|
||||
}
|
||||
|
@@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb)
|
||||
{
|
||||
struct sock *sk = skb->sk;
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
int wmem;
|
||||
|
||||
/* Keep one reference on sk_wmem_alloc.
|
||||
* Will be released by sk_free() from here or tcp_tasklet_func()
|
||||
*/
|
||||
wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc);
|
||||
|
||||
/* If this softirq is serviced by ksoftirqd, we are likely under stress.
|
||||
* Wait until our queues (qdisc + devices) are drained.
|
||||
* This gives :
|
||||
* - less callbacks to tcp_write_xmit(), reducing stress (batches)
|
||||
* - chance for incoming ACK (processed by another cpu maybe)
|
||||
* to migrate this flow (skb->ooo_okay will be eventually set)
|
||||
*/
|
||||
if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
|
||||
goto out;
|
||||
|
||||
if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
|
||||
!test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
|
||||
unsigned long flags;
|
||||
struct tsq_tasklet *tsq;
|
||||
|
||||
/* Keep a ref on socket.
|
||||
* This last ref will be released in tcp_tasklet_func()
|
||||
*/
|
||||
atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
|
||||
|
||||
/* queue this socket to tasklet queue */
|
||||
local_irq_save(flags);
|
||||
tsq = this_cpu_ptr(&tsq_tasklet);
|
||||
list_add(&tp->tsq_node, &tsq->head);
|
||||
tasklet_schedule(&tsq->tasklet);
|
||||
local_irq_restore(flags);
|
||||
} else {
|
||||
sock_wfree(skb);
|
||||
return;
|
||||
}
|
||||
out:
|
||||
sk_free(sk);
|
||||
}
|
||||
|
||||
/* This routine actually transmits TCP packets queued in by
|
||||
@@ -914,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
|
||||
tcp_ca_event(sk, CA_EVENT_TX_START);
|
||||
|
||||
/* if no packet is in qdisc/device queue, then allow XPS to select
|
||||
* another queue.
|
||||
* another queue. We can be called from tcp_tsq_handler()
|
||||
* which holds one reference to sk_wmem_alloc.
|
||||
*
|
||||
* TODO: Ideally, in-flight pure ACK packets should not matter here.
|
||||
* One way to get this would be to set skb->truesize = 2 on them.
|
||||
*/
|
||||
skb->ooo_okay = sk_wmem_alloc_get(sk) == 0;
|
||||
skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
|
||||
|
||||
skb_push(skb, tcp_header_size);
|
||||
skb_reset_transport_header(skb);
|
||||
|
Reference in New Issue
Block a user