Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next
tree. This includes better integration with the routing subsystem for
nf_tables, explicit notrack support and smaller updates. More
specifically, they are:

1) Add fib lookup expression for nf_tables, from Florian Westphal. This
   new expression provides a native replacement for iptables addrtype
   and rp_filter matches. This is more flexible though, since we can
   populate the kernel flowi representation to inquire fib to
   accomodate new usecases, such as RTBH through skb mark.

2) Introduce rt expression for nf_tables, from Anders K. Pedersen. This
   new expression allow you to access skbuff route metadata, more
   specifically nexthop and classid fields.

3) Add notrack support for nf_tables, to skip conntracking, requested by
   many users already.

4) Add boilerplate code to allow to use nf_log infrastructure from
   nf_tables ingress.

5) Allow to mangle pkttype from nf_tables prerouting chain, to emulate
   the xtables cluster match, from Liping Zhang.

6) Move socket lookup code into generic nf_socket_* infrastructure so
   we can provide a native replacement for the xtables socket match.

7) Make sure nfnetlink_queue data that is updated on every packets is
   placed in a different cache from read-only data, from Florian Westphal.

8) Handle NF_STOLEN from nf_tables core, also from Florian Westphal.

9) Start round robin number generation in nft_numgen from zero,
   instead of n-1, for consistency with xtables statistics match,
   patch from Liping Zhang.

10) Set GFP_NOWARN flag in skbuff netlink allocations in nfnetlink_log,
    given we retry with a smaller allocation on failure, from Calvin Owens.

11) Cleanup xt_multiport to use switch(), from Gao feng.

12) Remove superfluous check in nft_immediate and nft_cmp, from
    Liping Zhang.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2016-11-02 14:57:47 -04:00
31 changed files with 1610 additions and 340 deletions

View File

@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4
To compile it as a module, choose M here. If unsure, say N.
config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
This option enables the IPv4 socket lookup infrastructure. This is
is required by the iptables socket match.
if NF_TABLES
config NF_TABLES_IPV4
@@ -54,6 +60,14 @@ config NFT_DUP_IPV4
help
This module enables IPv4 packet duplication support for nf_tables.
config NFT_FIB_IPV4
select NFT_FIB
tristate "nf_tables fib / ip route lookup support"
help
This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
It also allows query of the FIB for the route type, e.g. local, unicast,
multicast or blackhole.
endif # NF_TABLES_IPV4
config NF_TABLES_ARP

View File

@@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
# logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
@@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o

View File

@@ -0,0 +1,163 @@
/*
* Copyright (C) 2007-2008 BalaBit IT Ltd.
* Author: Krisztian Kovacs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/icmp.h>
#include <net/sock.h>
#include <net/inet_sock.h>
#include <net/netfilter/nf_socket.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
#endif
static int
extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
__be32 *raddr, __be32 *laddr,
__be16 *rport, __be16 *lport)
{
unsigned int outside_hdrlen = ip_hdrlen(skb);
struct iphdr *inside_iph, _inside_iph;
struct icmphdr *icmph, _icmph;
__be16 *ports, _ports[2];
icmph = skb_header_pointer(skb, outside_hdrlen,
sizeof(_icmph), &_icmph);
if (icmph == NULL)
return 1;
switch (icmph->type) {
case ICMP_DEST_UNREACH:
case ICMP_SOURCE_QUENCH:
case ICMP_REDIRECT:
case ICMP_TIME_EXCEEDED:
case ICMP_PARAMETERPROB:
break;
default:
return 1;
}
inside_iph = skb_header_pointer(skb, outside_hdrlen +
sizeof(struct icmphdr),
sizeof(_inside_iph), &_inside_iph);
if (inside_iph == NULL)
return 1;
if (inside_iph->protocol != IPPROTO_TCP &&
inside_iph->protocol != IPPROTO_UDP)
return 1;
ports = skb_header_pointer(skb, outside_hdrlen +
sizeof(struct icmphdr) +
(inside_iph->ihl << 2),
sizeof(_ports), &_ports);
if (ports == NULL)
return 1;
/* the inside IP packet is the one quoted from our side, thus
* its saddr is the local address */
*protocol = inside_iph->protocol;
*laddr = inside_iph->saddr;
*lport = ports[0];
*raddr = inside_iph->daddr;
*rport = ports[1];
return 0;
}
static struct sock *
nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
return inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
}
return NULL;
}
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
const struct net_device *indev)
{
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
const struct iphdr *iph = ip_hdr(skb);
struct sk_buff *data_skb = NULL;
u8 uninitialized_var(protocol);
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
enum ip_conntrack_info ctinfo;
struct nf_conn const *ct;
#endif
int doff = 0;
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
struct udphdr _hdr, *hp;
hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp == NULL)
return NULL;
protocol = iph->protocol;
saddr = iph->saddr;
sport = hp->source;
daddr = iph->daddr;
dport = hp->dest;
data_skb = (struct sk_buff *)skb;
doff = iph->protocol == IPPROTO_TCP ?
ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
ip_hdrlen(skb) + sizeof(*hp);
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
&sport, &dport))
return NULL;
} else {
return NULL;
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Do the lookup with the original socket address in
* case this is a reply packet of an established
* SNAT-ted connection.
*/
ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct) &&
((iph->protocol != IPPROTO_ICMP &&
ctinfo == IP_CT_ESTABLISHED_REPLY) ||
(iph->protocol == IPPROTO_ICMP &&
ctinfo == IP_CT_RELATED_REPLY)) &&
(ct->status & IPS_SRC_NAT_DONE)) {
daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
dport = (iph->protocol == IPPROTO_TCP) ?
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
}
#endif
return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
daddr, sport, dport, indev);
}
EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");

View File

@@ -0,0 +1,238 @@
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
#include <net/ip_fib.h>
#include <net/route.h>
/* don't try to find route from mcast/bcast/zeronet */
static __be32 get_saddr(__be32 addr)
{
if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
ipv4_is_zeronet(addr))
return 0;
return addr;
}
static bool fib4_is_local(const struct sk_buff *skb)
{
const struct rtable *rt = skb_rtable(skb);
return rt && (rt->rt_flags & RTCF_LOCAL);
}
#define DSCP_BITS 0xfc
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
u32 *dst = &regs->data[priv->dreg];
const struct net_device *dev = NULL;
const struct iphdr *iph;
__be32 addr;
if (priv->flags & NFTA_FIB_F_IIF)
dev = pkt->in;
else if (priv->flags & NFTA_FIB_F_OIF)
dev = pkt->out;
iph = ip_hdr(pkt->skb);
if (priv->flags & NFTA_FIB_F_DADDR)
addr = iph->daddr;
else
addr = iph->saddr;
*dst = inet_dev_addr_type(pkt->net, dev, addr);
}
EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
static int get_ifindex(const struct net_device *dev)
{
return dev ? dev->ifindex : 0;
}
void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_fib *priv = nft_expr_priv(expr);
u32 *dest = &regs->data[priv->dreg];
const struct iphdr *iph;
struct fib_result res;
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_iif = LOOPBACK_IFINDEX,
};
const struct net_device *oif;
struct net_device *found;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int i;
#endif
/*
* Do not set flowi4_oif, it restricts results (for example, asking
* for oif 3 will get RTN_UNICAST result even if the daddr exits
* on another interface.
*
* Search results for the desired outinterface instead.
*/
if (priv->flags & NFTA_FIB_F_OIF)
oif = pkt->out;
else if (priv->flags & NFTA_FIB_F_IIF)
oif = pkt->in;
else
oif = NULL;
if (pkt->hook == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
return;
}
iph = ip_hdr(pkt->skb);
if (ipv4_is_multicast(iph->daddr) &&
ipv4_is_zeronet(iph->saddr) &&
ipv4_is_local_multicast(iph->daddr)) {
nft_fib_store_result(dest, priv->result, pkt,
get_ifindex(pkt->skb->dev));
return;
}
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
fl4.flowi4_tos = iph->tos & DSCP_BITS;
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
if (fib_lookup(pkt->net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
return;
switch (res.type) {
case RTN_UNICAST:
break;
case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
return;
default:
break;
}
if (!oif) {
found = FIB_RES_DEV(res);
goto ok;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
for (i = 0; i < res.fi->fib_nhs; i++) {
struct fib_nh *nh = &res.fi->fib_nh[i];
if (nh->nh_dev == oif) {
found = nh->nh_dev;
goto ok;
}
}
return;
#else
found = FIB_RES_DEV(res);
if (found != oif)
return;
#endif
ok:
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
*dest = found->ifindex;
break;
case NFT_FIB_RESULT_OIFNAME:
strncpy((char *)dest, found->name, IFNAMSIZ);
break;
default:
WARN_ON_ONCE(1);
break;
}
}
EXPORT_SYMBOL_GPL(nft_fib4_eval);
static struct nft_expr_type nft_fib4_type;
static const struct nft_expr_ops nft_fib4_type_ops = {
.type = &nft_fib4_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
.eval = nft_fib4_eval_type,
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
};
static const struct nft_expr_ops nft_fib4_ops = {
.type = &nft_fib4_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
.eval = nft_fib4_eval,
.init = nft_fib_init,
.dump = nft_fib_dump,
.validate = nft_fib_validate,
};
static const struct nft_expr_ops *
nft_fib4_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
enum nft_fib_result result;
if (!tb[NFTA_FIB_RESULT])
return ERR_PTR(-EINVAL);
result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
switch (result) {
case NFT_FIB_RESULT_OIF:
return &nft_fib4_ops;
case NFT_FIB_RESULT_OIFNAME:
return &nft_fib4_ops;
case NFT_FIB_RESULT_ADDRTYPE:
return &nft_fib4_type_ops;
default:
return ERR_PTR(-EOPNOTSUPP);
}
}
static struct nft_expr_type nft_fib4_type __read_mostly = {
.name = "fib",
.select_ops = &nft_fib4_select_ops,
.policy = nft_fib_policy,
.maxattr = NFTA_FIB_MAX,
.family = NFPROTO_IPV4,
.owner = THIS_MODULE,
};
static int __init nft_fib4_module_init(void)
{
return nft_register_expr(&nft_fib4_type);
}
static void __exit nft_fib4_module_exit(void)
{
nft_unregister_expr(&nft_fib4_type);
}
module_init(nft_fib4_module_init);
module_exit(nft_fib4_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
MODULE_ALIAS_NFT_AF_EXPR(2, "fib");