Files
android_kernel_xiaomi_sm8450/include/uapi/linux/pkt_cls.h
John Fastabend 9e8ce79cd7 net: sched: cls_u32 add bit to specify software only rules
In the initial implementation the only way to stop a rule from being
inserted into the hardware table was via the device feature flag.
However this doesn't work well when working on an end host system
where packets are expect to hit both the hardware and software
datapaths.

For example we can imagine a rule that will match an IP address and
increment a field. If we install this rule in both hardware and
software we may increment the field twice. To date we have only
added support for the drop action so we have been able to ignore
these cases. But as we extend the action support we will hit this
example plus more such cases. Arguably these are not even corner
cases in many working systems these cases will be common.

To avoid forcing the driver to always abort (i.e. the above example)
this patch adds a flag to add a rule in software only. A careful
user can use this flag to build software and hardware datapaths
that work together. One example we have found particularly useful
is to use hardware resources to set the skb->mark on the skb when
the match may be expensive to run in software but a mark lookup
in a hash table is cheap. The idea here is hardware can do in one
lookup what the u32 classifier may need to traverse multiple lists
and hash tables to compute. The flag is only passed down on inserts.
On deletion to avoid stale references in hardware we always try
to remove a rule if it exists.

The flags field is part of the classifier specific options. Although
it is tempting to lift this into the generic structure doing this
proves difficult do to how the tc netlink attributes are implemented
along with how the dump/change routines are called. There is also
precedence for putting seemingly generic pieces in the specific
classifier options such as TCA_U32_POLICE, TCA_U32_ACT, etc. So
although not ideal I've left FLAGS in the u32 options as well as it
simplifies the code greatly and user space has already learned how
to manage these bits ala 'tc' tool.

Another thing if trying to update a rule we require the flags to
be unchanged. This is to force user space, software u32 and
the hardware u32 to keep in sync. Thanks to Simon Horman for
catching this case.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-03-01 16:05:39 -05:00

504 lines
9.5 KiB
C

#ifndef __LINUX_PKT_CLS_H
#define __LINUX_PKT_CLS_H
#include <linux/types.h>
#include <linux/pkt_sched.h>
#ifdef __KERNEL__
/* I think i could have done better macros ; for now this is stolen from
* some arch/mips code - jhs
*/
#define _TC_MAKE32(x) ((x))
#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n))
#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n))
#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n))
#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n))
/* verdict bit breakdown
*
bit 0: when set -> this packet has been munged already
bit 1: when set -> It is ok to munge this packet
bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded
assume loop
bit 6,7: Where this packet was last seen
0: Above the transmit example at the socket level
1: on the Ingress
2: on the Egress
bit 8: when set --> Request not to classify on ingress.
bits 9,10,11: redirect counter - redirect TTL. Loop avoidance
*
* */
#define S_TC_FROM _TC_MAKE32(6)
#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM)
#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM)
#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM))
#define AT_STACK 0x0
#define AT_INGRESS 0x1
#define AT_EGRESS 0x2
#define TC_NCLS _TC_MAKEMASK1(8)
#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS))
#define CLR_TC_NCLS(v) ( v & ~TC_NCLS)
#define S_TC_AT _TC_MAKE32(12)
#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT)
#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT)
#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT))
#define MAX_REC_LOOP 4
#define MAX_RED_LOOP 4
#endif
/* Action attributes */
enum {
TCA_ACT_UNSPEC,
TCA_ACT_KIND,
TCA_ACT_OPTIONS,
TCA_ACT_INDEX,
TCA_ACT_STATS,
__TCA_ACT_MAX
};
#define TCA_ACT_MAX __TCA_ACT_MAX
#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
#define TCA_ACT_MAX_PRIO 32
#define TCA_ACT_BIND 1
#define TCA_ACT_NOBIND 0
#define TCA_ACT_UNBIND 1
#define TCA_ACT_NOUNBIND 0
#define TCA_ACT_REPLACE 1
#define TCA_ACT_NOREPLACE 0
#define TC_ACT_UNSPEC (-1)
#define TC_ACT_OK 0
#define TC_ACT_RECLASSIFY 1
#define TC_ACT_SHOT 2
#define TC_ACT_PIPE 3
#define TC_ACT_STOLEN 4
#define TC_ACT_QUEUED 5
#define TC_ACT_REPEAT 6
#define TC_ACT_REDIRECT 7
#define TC_ACT_JUMP 0x10000000
/* Action type identifiers*/
enum {
TCA_ID_UNSPEC=0,
TCA_ID_POLICE=1,
/* other actions go here */
__TCA_ID_MAX=255
};
#define TCA_ID_MAX __TCA_ID_MAX
struct tc_police {
__u32 index;
int action;
#define TC_POLICE_UNSPEC TC_ACT_UNSPEC
#define TC_POLICE_OK TC_ACT_OK
#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY
#define TC_POLICE_SHOT TC_ACT_SHOT
#define TC_POLICE_PIPE TC_ACT_PIPE
__u32 limit;
__u32 burst;
__u32 mtu;
struct tc_ratespec rate;
struct tc_ratespec peakrate;
int refcnt;
int bindcnt;
__u32 capab;
};
struct tcf_t {
__u64 install;
__u64 lastuse;
__u64 expires;
};
struct tc_cnt {
int refcnt;
int bindcnt;
};
#define tc_gen \
__u32 index; \
__u32 capab; \
int action; \
int refcnt; \
int bindcnt
enum {
TCA_POLICE_UNSPEC,
TCA_POLICE_TBF,
TCA_POLICE_RATE,
TCA_POLICE_PEAKRATE,
TCA_POLICE_AVRATE,
TCA_POLICE_RESULT,
__TCA_POLICE_MAX
#define TCA_POLICE_RESULT TCA_POLICE_RESULT
};
#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
/* U32 filters */
#define TC_U32_HTID(h) ((h)&0xFFF00000)
#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20)
#define TC_U32_HASH(h) (((h)>>12)&0xFF)
#define TC_U32_NODE(h) ((h)&0xFFF)
#define TC_U32_KEY(h) ((h)&0xFFFFF)
#define TC_U32_UNSPEC 0
#define TC_U32_ROOT (0xFFF00000)
enum {
TCA_U32_UNSPEC,
TCA_U32_CLASSID,
TCA_U32_HASH,
TCA_U32_LINK,
TCA_U32_DIVISOR,
TCA_U32_SEL,
TCA_U32_POLICE,
TCA_U32_ACT,
TCA_U32_INDEV,
TCA_U32_PCNT,
TCA_U32_MARK,
TCA_U32_FLAGS,
__TCA_U32_MAX
};
#define TCA_U32_MAX (__TCA_U32_MAX - 1)
struct tc_u32_key {
__be32 mask;
__be32 val;
int off;
int offmask;
};
struct tc_u32_sel {
unsigned char flags;
unsigned char offshift;
unsigned char nkeys;
__be16 offmask;
__u16 off;
short offoff;
short hoff;
__be32 hmask;
struct tc_u32_key keys[0];
};
struct tc_u32_mark {
__u32 val;
__u32 mask;
__u32 success;
};
struct tc_u32_pcnt {
__u64 rcnt;
__u64 rhit;
__u64 kcnts[0];
};
/* Flags */
#define TC_U32_TERMINAL 1
#define TC_U32_OFFSET 2
#define TC_U32_VAROFFSET 4
#define TC_U32_EAT 8
#define TC_U32_MAXDEPTH 8
/* RSVP filter */
enum {
TCA_RSVP_UNSPEC,
TCA_RSVP_CLASSID,
TCA_RSVP_DST,
TCA_RSVP_SRC,
TCA_RSVP_PINFO,
TCA_RSVP_POLICE,
TCA_RSVP_ACT,
__TCA_RSVP_MAX
};
#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
struct tc_rsvp_gpi {
__u32 key;
__u32 mask;
int offset;
};
struct tc_rsvp_pinfo {
struct tc_rsvp_gpi dpi;
struct tc_rsvp_gpi spi;
__u8 protocol;
__u8 tunnelid;
__u8 tunnelhdr;
__u8 pad;
};
/* ROUTE filter */
enum {
TCA_ROUTE4_UNSPEC,
TCA_ROUTE4_CLASSID,
TCA_ROUTE4_TO,
TCA_ROUTE4_FROM,
TCA_ROUTE4_IIF,
TCA_ROUTE4_POLICE,
TCA_ROUTE4_ACT,
__TCA_ROUTE4_MAX
};
#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1)
/* FW filter */
enum {
TCA_FW_UNSPEC,
TCA_FW_CLASSID,
TCA_FW_POLICE,
TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */
TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
TCA_FW_MASK,
__TCA_FW_MAX
};
#define TCA_FW_MAX (__TCA_FW_MAX - 1)
/* TC index filter */
enum {
TCA_TCINDEX_UNSPEC,
TCA_TCINDEX_HASH,
TCA_TCINDEX_MASK,
TCA_TCINDEX_SHIFT,
TCA_TCINDEX_FALL_THROUGH,
TCA_TCINDEX_CLASSID,
TCA_TCINDEX_POLICE,
TCA_TCINDEX_ACT,
__TCA_TCINDEX_MAX
};
#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1)
/* Flow filter */
enum {
FLOW_KEY_SRC,
FLOW_KEY_DST,
FLOW_KEY_PROTO,
FLOW_KEY_PROTO_SRC,
FLOW_KEY_PROTO_DST,
FLOW_KEY_IIF,
FLOW_KEY_PRIORITY,
FLOW_KEY_MARK,
FLOW_KEY_NFCT,
FLOW_KEY_NFCT_SRC,
FLOW_KEY_NFCT_DST,
FLOW_KEY_NFCT_PROTO_SRC,
FLOW_KEY_NFCT_PROTO_DST,
FLOW_KEY_RTCLASSID,
FLOW_KEY_SKUID,
FLOW_KEY_SKGID,
FLOW_KEY_VLAN_TAG,
FLOW_KEY_RXHASH,
__FLOW_KEY_MAX,
};
#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1)
enum {
FLOW_MODE_MAP,
FLOW_MODE_HASH,
};
enum {
TCA_FLOW_UNSPEC,
TCA_FLOW_KEYS,
TCA_FLOW_MODE,
TCA_FLOW_BASECLASS,
TCA_FLOW_RSHIFT,
TCA_FLOW_ADDEND,
TCA_FLOW_MASK,
TCA_FLOW_XOR,
TCA_FLOW_DIVISOR,
TCA_FLOW_ACT,
TCA_FLOW_POLICE,
TCA_FLOW_EMATCHES,
TCA_FLOW_PERTURB,
__TCA_FLOW_MAX
};
#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1)
/* Basic filter */
enum {
TCA_BASIC_UNSPEC,
TCA_BASIC_CLASSID,
TCA_BASIC_EMATCHES,
TCA_BASIC_ACT,
TCA_BASIC_POLICE,
__TCA_BASIC_MAX
};
#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
/* Cgroup classifier */
enum {
TCA_CGROUP_UNSPEC,
TCA_CGROUP_ACT,
TCA_CGROUP_POLICE,
TCA_CGROUP_EMATCHES,
__TCA_CGROUP_MAX,
};
#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
/* BPF classifier */
#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0)
enum {
TCA_BPF_UNSPEC,
TCA_BPF_ACT,
TCA_BPF_POLICE,
TCA_BPF_CLASSID,
TCA_BPF_OPS_LEN,
TCA_BPF_OPS,
TCA_BPF_FD,
TCA_BPF_NAME,
TCA_BPF_FLAGS,
__TCA_BPF_MAX,
};
#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
/* Flower classifier */
enum {
TCA_FLOWER_UNSPEC,
TCA_FLOWER_CLASSID,
TCA_FLOWER_INDEV,
TCA_FLOWER_ACT,
TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */
TCA_FLOWER_KEY_ETH_TYPE, /* be16 */
TCA_FLOWER_KEY_IP_PROTO, /* u8 */
TCA_FLOWER_KEY_IPV4_SRC, /* be32 */
TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */
TCA_FLOWER_KEY_IPV4_DST, /* be32 */
TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */
TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */
TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */
TCA_FLOWER_KEY_TCP_SRC, /* be16 */
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
__TCA_FLOWER_MAX,
};
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
/* Extended Matches */
struct tcf_ematch_tree_hdr {
__u16 nmatches;
__u16 progid;
};
enum {
TCA_EMATCH_TREE_UNSPEC,
TCA_EMATCH_TREE_HDR,
TCA_EMATCH_TREE_LIST,
__TCA_EMATCH_TREE_MAX
};
#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
struct tcf_ematch_hdr {
__u16 matchid;
__u16 kind;
__u16 flags;
__u16 pad; /* currently unused */
};
/* 0 1
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
* +-----------------------+-+-+---+
* | Unused |S|I| R |
* +-----------------------+-+-+---+
*
* R(2) ::= relation to next ematch
* where: 0 0 END (last ematch)
* 0 1 AND
* 1 0 OR
* 1 1 Unused (invalid)
* I(1) ::= invert result
* S(1) ::= simple payload
*/
#define TCF_EM_REL_END 0
#define TCF_EM_REL_AND (1<<0)
#define TCF_EM_REL_OR (1<<1)
#define TCF_EM_INVERT (1<<2)
#define TCF_EM_SIMPLE (1<<3)
#define TCF_EM_REL_MASK 3
#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
enum {
TCF_LAYER_LINK,
TCF_LAYER_NETWORK,
TCF_LAYER_TRANSPORT,
__TCF_LAYER_MAX
};
#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
/* Ematch type assignments
* 1..32767 Reserved for ematches inside kernel tree
* 32768..65535 Free to use, not reliable
*/
#define TCF_EM_CONTAINER 0
#define TCF_EM_CMP 1
#define TCF_EM_NBYTE 2
#define TCF_EM_U32 3
#define TCF_EM_META 4
#define TCF_EM_TEXT 5
#define TCF_EM_VLAN 6
#define TCF_EM_CANID 7
#define TCF_EM_IPSET 8
#define TCF_EM_MAX 8
enum {
TCF_EM_PROG_TC
};
enum {
TCF_EM_OPND_EQ,
TCF_EM_OPND_GT,
TCF_EM_OPND_LT
};
#endif