Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains a large Netfilter update for net-next,
to summarise:

1) Add support for stateful objects. This series provides a nf_tables
   native alternative to the extended accounting infrastructure for
   nf_tables. Two initial stateful objects are supported: counters and
   quotas. Objects are identified by a user-defined name, you can fetch
   and reset them anytime. You can also use a maps to allow fast lookups
   using any arbitrary key combination. More info at:

   http://marc.info/?l=netfilter-devel&m=148029128323837&w=2

2) On-demand registration of nf_conntrack and defrag hooks per netns.
   Register nf_conntrack hooks if we have a stateful ruleset, ie.
   state-based filtering or NAT. The new nf_conntrack_default_on sysctl
   enables this from newly created netnamespaces. Default behaviour is not
   modified. Patches from Florian Westphal.

3) Allocate 4k chunks and then use these for x_tables counter allocation
   requests, this improves ruleset load time and also datapath ruleset
   evaluation, patches from Florian Westphal.

4) Add support for ebpf to the existing x_tables bpf extension.
   From Willem de Bruijn.

5) Update layer 4 checksum if any of the pseudoheader fields is updated.
   This provides a limited form of 1:1 stateless NAT that make sense in
   specific scenario, eg. load balancing.

6) Add support to flush sets in nf_tables. This series comes with a new
   set->ops->deactivate_one() indirection given that we have to walk
   over the list of set elements, then deactivate them one by one.
   The existing set->ops->deactivate() performs an element lookup that
   we don't need.

7) Two patches to avoid cloning packets, thus speed up packet forwarding
   via nft_fwd from ingress. From Florian Westphal.

8) Two IPVS patches via Simon Horman: Decrement ttl in all modes to
   prevent infinite loops, patch from Dwip Banerjee. And one minor
   refactoring from Gao feng.

9) Revisit recent log support for nf_tables netdev families: One patch
   to ensure that we correctly handle non-ethernet packets. Another
   patch to add missing logger definition for netdev. Patches from
   Liping Zhang.

10) Three patches for nft_fib, one to address insufficient register
    initialization and another to solve incorrect (although harmless)
    byteswap operation. Moreover update xt_rpfilter and nft_fib to match
    lbcast packets with zeronet as source, eg. DHCP Discover packets
    (0.0.0.0 -> 255.255.255.255). Also from Liping Zhang.

11) Built-in DCCP, SCTP and UDPlite conntrack and NAT support, from
    Davide Caratti. While DCCP is rather hopeless lately, and UDPlite has
    been broken in many-cast mode for some little time, let's give them a
    chance by placing them at the same level as other existing protocols.
    Thus, users don't explicitly have to modprobe support for this and
    NAT rules work for them. Some people point to the lack of support in
    SOHO Linux-based routers that make deployment of new protocols harder.
    I guess other middleboxes outthere on the Internet are also to blame.
    Anyway, let's see if this has any impact in the midrun.

12) Skip software SCTP software checksum calculation if the NIC comes
    with SCTP checksum offload support. From Davide Caratti.

13) Initial core factoring to prepare conversion to hook array. Three
    patches from Aaron Conole.

14) Gao Feng made a wrong conversion to switch in the xt_multiport
    extension in a patch coming in the previous batch. Fix it in this
    batch.

15) Get vmalloc call in sync with kmalloc flags to avoid a warning
    and likely OOM killer intervention from x_tables. From Marcelo
    Ricardo Leitner.

16) Update Arturo Borrero's email address in all source code headers.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2016-12-07 19:16:46 -05:00
90 changed files with 2534 additions and 682 deletions

View File

@@ -2,7 +2,10 @@
#define _NF_CONNTRACK_TUPLE_COMMON_H
#include <linux/types.h>
#ifndef __KERNEL__
#include <linux/netfilter.h>
#endif
#include <linux/netfilter/nf_conntrack_common.h> /* IP_CT_IS_REPLY */
enum ip_conntrack_dir {
IP_CT_DIR_ORIGINAL,

View File

@@ -4,6 +4,7 @@
#define NFT_TABLE_MAXNAMELEN 32
#define NFT_CHAIN_MAXNAMELEN 32
#define NFT_SET_MAXNAMELEN 32
#define NFT_OBJ_MAXNAMELEN 32
#define NFT_USERDATA_MAXLEN 256
/**
@@ -85,6 +86,10 @@ enum nft_verdicts {
* @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
* @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
* @NFT_MSG_TRACE: trace event (enum nft_trace_attributes)
* @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes)
* @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes)
* @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes)
* @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes)
*/
enum nf_tables_msg_types {
NFT_MSG_NEWTABLE,
@@ -105,6 +110,10 @@ enum nf_tables_msg_types {
NFT_MSG_NEWGEN,
NFT_MSG_GETGEN,
NFT_MSG_TRACE,
NFT_MSG_NEWOBJ,
NFT_MSG_GETOBJ,
NFT_MSG_DELOBJ,
NFT_MSG_GETOBJ_RESET,
NFT_MSG_MAX,
};
@@ -246,6 +255,7 @@ enum nft_rule_compat_attributes {
* @NFT_SET_MAP: set is used as a dictionary
* @NFT_SET_TIMEOUT: set uses timeouts
* @NFT_SET_EVAL: set contains expressions for evaluation
* @NFT_SET_OBJECT: set contains stateful objects
*/
enum nft_set_flags {
NFT_SET_ANONYMOUS = 0x1,
@@ -254,6 +264,7 @@ enum nft_set_flags {
NFT_SET_MAP = 0x8,
NFT_SET_TIMEOUT = 0x10,
NFT_SET_EVAL = 0x20,
NFT_SET_OBJECT = 0x40,
};
/**
@@ -295,6 +306,7 @@ enum nft_set_desc_attributes {
* @NFTA_SET_TIMEOUT: default timeout value (NLA_U64)
* @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
* @NFTA_SET_USERDATA: user data (NLA_BINARY)
* @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -312,6 +324,7 @@ enum nft_set_attributes {
NFTA_SET_GC_INTERVAL,
NFTA_SET_USERDATA,
NFTA_SET_PAD,
NFTA_SET_OBJ_TYPE,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
@@ -335,6 +348,7 @@ enum nft_set_elem_flags {
* @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
* @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
* @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes)
* @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING)
*/
enum nft_set_elem_attributes {
NFTA_SET_ELEM_UNSPEC,
@@ -346,6 +360,7 @@ enum nft_set_elem_attributes {
NFTA_SET_ELEM_USERDATA,
NFTA_SET_ELEM_EXPR,
NFTA_SET_ELEM_PAD,
NFTA_SET_ELEM_OBJREF,
__NFTA_SET_ELEM_MAX
};
#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1)
@@ -659,6 +674,10 @@ enum nft_payload_csum_types {
NFT_PAYLOAD_CSUM_INET,
};
enum nft_payload_csum_flags {
NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0),
};
/**
* enum nft_payload_attributes - nf_tables payload expression netlink attributes
*
@@ -669,6 +688,7 @@ enum nft_payload_csum_types {
* @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers)
* @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32)
* @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32)
* @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32)
*/
enum nft_payload_attributes {
NFTA_PAYLOAD_UNSPEC,
@@ -679,6 +699,7 @@ enum nft_payload_attributes {
NFTA_PAYLOAD_SREG,
NFTA_PAYLOAD_CSUM_TYPE,
NFTA_PAYLOAD_CSUM_OFFSET,
NFTA_PAYLOAD_CSUM_FLAGS,
__NFTA_PAYLOAD_MAX
};
#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1)
@@ -968,6 +989,7 @@ enum nft_queue_attributes {
enum nft_quota_flags {
NFT_QUOTA_F_INV = (1 << 0),
NFT_QUOTA_F_DEPLETED = (1 << 1),
};
/**
@@ -975,12 +997,14 @@ enum nft_quota_flags {
*
* @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16)
* @NFTA_QUOTA_FLAGS: flags (NLA_U32)
* @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64)
*/
enum nft_quota_attributes {
NFTA_QUOTA_UNSPEC,
NFTA_QUOTA_BYTES,
NFTA_QUOTA_FLAGS,
NFTA_QUOTA_PAD,
NFTA_QUOTA_CONSUMED,
__NFTA_QUOTA_MAX
};
#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1)
@@ -1124,6 +1148,26 @@ enum nft_fwd_attributes {
};
#define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1)
/**
* enum nft_objref_attributes - nf_tables stateful object expression netlink attributes
*
* @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register)
* @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING)
* @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: nft_registers)
* @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING)
* @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction (NLA_U32)
*/
enum nft_objref_attributes {
NFTA_OBJREF_UNSPEC,
NFTA_OBJREF_IMM_TYPE,
NFTA_OBJREF_IMM_NAME,
NFTA_OBJREF_SET_SREG,
NFTA_OBJREF_SET_NAME,
NFTA_OBJREF_SET_ID,
__NFTA_OBJREF_MAX
};
#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1)
/**
* enum nft_gen_attributes - nf_tables ruleset generation attributes
*
@@ -1172,6 +1216,32 @@ enum nft_fib_flags {
NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */
};
#define NFT_OBJECT_UNSPEC 0
#define NFT_OBJECT_COUNTER 1
#define NFT_OBJECT_QUOTA 2
#define __NFT_OBJECT_MAX 3
#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1)
/**
* enum nft_object_attributes - nf_tables stateful object netlink attributes
*
* @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING)
* @NFTA_OBJ_NAME: name of this expression type (NLA_STRING)
* @NFTA_OBJ_TYPE: stateful object type (NLA_U32)
* @NFTA_OBJ_DATA: stateful object data (NLA_NESTED)
* @NFTA_OBJ_USE: number of references to this expression (NLA_U32)
*/
enum nft_object_attributes {
NFTA_OBJ_UNSPEC,
NFTA_OBJ_TABLE,
NFTA_OBJ_NAME,
NFTA_OBJ_TYPE,
NFTA_OBJ_DATA,
NFTA_OBJ_USE,
__NFTA_OBJ_MAX
};
#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)
/**
* enum nft_trace_attributes - nf_tables trace netlink attributes
*

View File

@@ -2,9 +2,11 @@
#define _XT_BPF_H
#include <linux/filter.h>
#include <linux/limits.h>
#include <linux/types.h>
#define XT_BPF_MAX_NUM_INSTR 64
#define XT_BPF_PATH_MAX (XT_BPF_MAX_NUM_INSTR * sizeof(struct sock_filter))
struct bpf_prog;
@@ -16,4 +18,23 @@ struct xt_bpf_info {
struct bpf_prog *filter __attribute__((aligned(8)));
};
enum xt_bpf_modes {
XT_BPF_MODE_BYTECODE,
XT_BPF_MODE_FD_PINNED,
XT_BPF_MODE_FD_ELF,
};
struct xt_bpf_info_v1 {
__u16 mode;
__u16 bpf_program_num_elem;
__s32 fd;
union {
struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
char path[XT_BPF_PATH_MAX];
};
/* only used in the kernel */
struct bpf_prog *filter __attribute__((aligned(8)));
};
#endif /*_XT_BPF_H */