fib_rules.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * INET An implementation of the TCP/IP protocol suite for the LINUX
  4. * operating system. INET is implemented using the BSD Socket
  5. * interface as the means of communication with the user level.
  6. *
  7. * IPv4 Forwarding Information Base: policy rules.
  8. *
  9. * Authors: Alexey Kuznetsov, <[email protected]>
  10. * Thomas Graf <[email protected]>
  11. *
  12. * Fixes:
  13. * Rani Assaf : local_rule cannot be deleted
  14. * Marc Boucher : routing by fwmark
  15. */
  16. #include <linux/types.h>
  17. #include <linux/kernel.h>
  18. #include <linux/netdevice.h>
  19. #include <linux/netlink.h>
  20. #include <linux/inetdevice.h>
  21. #include <linux/init.h>
  22. #include <linux/list.h>
  23. #include <linux/rcupdate.h>
  24. #include <linux/export.h>
  25. #include <net/inet_dscp.h>
  26. #include <net/ip.h>
  27. #include <net/route.h>
  28. #include <net/tcp.h>
  29. #include <net/ip_fib.h>
  30. #include <net/nexthop.h>
  31. #include <net/fib_rules.h>
  32. #include <linux/indirect_call_wrapper.h>
  33. struct fib4_rule {
  34. struct fib_rule common;
  35. u8 dst_len;
  36. u8 src_len;
  37. dscp_t dscp;
  38. __be32 src;
  39. __be32 srcmask;
  40. __be32 dst;
  41. __be32 dstmask;
  42. #ifdef CONFIG_IP_ROUTE_CLASSID
  43. u32 tclassid;
  44. #endif
  45. };
  46. static bool fib4_rule_matchall(const struct fib_rule *rule)
  47. {
  48. struct fib4_rule *r = container_of(rule, struct fib4_rule, common);
  49. if (r->dst_len || r->src_len || r->dscp)
  50. return false;
  51. return fib_rule_matchall(rule);
  52. }
  53. bool fib4_rule_default(const struct fib_rule *rule)
  54. {
  55. if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL ||
  56. rule->l3mdev)
  57. return false;
  58. if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN &&
  59. rule->table != RT_TABLE_DEFAULT)
  60. return false;
  61. return true;
  62. }
  63. EXPORT_SYMBOL_GPL(fib4_rule_default);
  64. int fib4_rules_dump(struct net *net, struct notifier_block *nb,
  65. struct netlink_ext_ack *extack)
  66. {
  67. return fib_rules_dump(net, nb, AF_INET, extack);
  68. }
  69. unsigned int fib4_rules_seq_read(struct net *net)
  70. {
  71. return fib_rules_seq_read(net, AF_INET);
  72. }
  73. int __fib_lookup(struct net *net, struct flowi4 *flp,
  74. struct fib_result *res, unsigned int flags)
  75. {
  76. struct fib_lookup_arg arg = {
  77. .result = res,
  78. .flags = flags,
  79. };
  80. int err;
  81. /* update flow if oif or iif point to device enslaved to l3mdev */
  82. l3mdev_update_flow(net, flowi4_to_flowi(flp));
  83. err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
  84. #ifdef CONFIG_IP_ROUTE_CLASSID
  85. if (arg.rule)
  86. res->tclassid = ((struct fib4_rule *)arg.rule)->tclassid;
  87. else
  88. res->tclassid = 0;
  89. #endif
  90. if (err == -ESRCH)
  91. err = -ENETUNREACH;
  92. return err;
  93. }
  94. EXPORT_SYMBOL_GPL(__fib_lookup);
  95. INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule,
  96. struct flowi *flp, int flags,
  97. struct fib_lookup_arg *arg)
  98. {
  99. int err = -EAGAIN;
  100. struct fib_table *tbl;
  101. u32 tb_id;
  102. switch (rule->action) {
  103. case FR_ACT_TO_TBL:
  104. break;
  105. case FR_ACT_UNREACHABLE:
  106. return -ENETUNREACH;
  107. case FR_ACT_PROHIBIT:
  108. return -EACCES;
  109. case FR_ACT_BLACKHOLE:
  110. default:
  111. return -EINVAL;
  112. }
  113. rcu_read_lock();
  114. tb_id = fib_rule_get_table(rule, arg);
  115. tbl = fib_get_table(rule->fr_net, tb_id);
  116. if (tbl)
  117. err = fib_table_lookup(tbl, &flp->u.ip4,
  118. (struct fib_result *)arg->result,
  119. arg->flags);
  120. rcu_read_unlock();
  121. return err;
  122. }
  123. INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
  124. int flags,
  125. struct fib_lookup_arg *arg)
  126. {
  127. struct fib_result *result = arg->result;
  128. struct net_device *dev = NULL;
  129. if (result->fi) {
  130. struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0);
  131. dev = nhc->nhc_dev;
  132. }
  133. /* do not accept result if the route does
  134. * not meet the required prefix length
  135. */
  136. if (result->prefixlen <= rule->suppress_prefixlen)
  137. goto suppress_route;
  138. /* do not accept result if the route uses a device
  139. * belonging to a forbidden interface group
  140. */
  141. if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
  142. goto suppress_route;
  143. return false;
  144. suppress_route:
  145. if (!(arg->flags & FIB_LOOKUP_NOREF))
  146. fib_info_put(result->fi);
  147. return true;
  148. }
  149. INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
  150. struct flowi *fl, int flags)
  151. {
  152. struct fib4_rule *r = (struct fib4_rule *) rule;
  153. struct flowi4 *fl4 = &fl->u.ip4;
  154. __be32 daddr = fl4->daddr;
  155. __be32 saddr = fl4->saddr;
  156. if (((saddr ^ r->src) & r->srcmask) ||
  157. ((daddr ^ r->dst) & r->dstmask))
  158. return 0;
  159. if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
  160. return 0;
  161. if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
  162. return 0;
  163. if (fib_rule_port_range_set(&rule->sport_range) &&
  164. !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport))
  165. return 0;
  166. if (fib_rule_port_range_set(&rule->dport_range) &&
  167. !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport))
  168. return 0;
  169. return 1;
  170. }
  171. static struct fib_table *fib_empty_table(struct net *net)
  172. {
  173. u32 id = 1;
  174. while (1) {
  175. if (!fib_get_table(net, id))
  176. return fib_new_table(net, id);
  177. if (id++ == RT_TABLE_MAX)
  178. break;
  179. }
  180. return NULL;
  181. }
  182. static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
  183. struct fib_rule_hdr *frh,
  184. struct nlattr **tb,
  185. struct netlink_ext_ack *extack)
  186. {
  187. struct net *net = sock_net(skb->sk);
  188. int err = -EINVAL;
  189. struct fib4_rule *rule4 = (struct fib4_rule *) rule;
  190. if (!inet_validate_dscp(frh->tos)) {
  191. NL_SET_ERR_MSG(extack,
  192. "Invalid dsfield (tos): ECN bits must be 0");
  193. goto errout;
  194. }
  195. /* IPv4 currently doesn't handle high order DSCP bits correctly */
  196. if (frh->tos & ~IPTOS_TOS_MASK) {
  197. NL_SET_ERR_MSG(extack, "Invalid tos");
  198. goto errout;
  199. }
  200. rule4->dscp = inet_dsfield_to_dscp(frh->tos);
  201. /* split local/main if they are not already split */
  202. err = fib_unmerge(net);
  203. if (err)
  204. goto errout;
  205. if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) {
  206. if (rule->action == FR_ACT_TO_TBL) {
  207. struct fib_table *table;
  208. table = fib_empty_table(net);
  209. if (!table) {
  210. err = -ENOBUFS;
  211. goto errout;
  212. }
  213. rule->table = table->tb_id;
  214. }
  215. }
  216. if (frh->src_len)
  217. rule4->src = nla_get_in_addr(tb[FRA_SRC]);
  218. if (frh->dst_len)
  219. rule4->dst = nla_get_in_addr(tb[FRA_DST]);
  220. #ifdef CONFIG_IP_ROUTE_CLASSID
  221. if (tb[FRA_FLOW]) {
  222. rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
  223. if (rule4->tclassid)
  224. atomic_inc(&net->ipv4.fib_num_tclassid_users);
  225. }
  226. #endif
  227. if (fib_rule_requires_fldissect(rule))
  228. net->ipv4.fib_rules_require_fldissect++;
  229. rule4->src_len = frh->src_len;
  230. rule4->srcmask = inet_make_mask(rule4->src_len);
  231. rule4->dst_len = frh->dst_len;
  232. rule4->dstmask = inet_make_mask(rule4->dst_len);
  233. net->ipv4.fib_has_custom_rules = true;
  234. err = 0;
  235. errout:
  236. return err;
  237. }
  238. static int fib4_rule_delete(struct fib_rule *rule)
  239. {
  240. struct net *net = rule->fr_net;
  241. int err;
  242. /* split local/main if they are not already split */
  243. err = fib_unmerge(net);
  244. if (err)
  245. goto errout;
  246. #ifdef CONFIG_IP_ROUTE_CLASSID
  247. if (((struct fib4_rule *)rule)->tclassid)
  248. atomic_dec(&net->ipv4.fib_num_tclassid_users);
  249. #endif
  250. net->ipv4.fib_has_custom_rules = true;
  251. if (net->ipv4.fib_rules_require_fldissect &&
  252. fib_rule_requires_fldissect(rule))
  253. net->ipv4.fib_rules_require_fldissect--;
  254. errout:
  255. return err;
  256. }
  257. static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
  258. struct nlattr **tb)
  259. {
  260. struct fib4_rule *rule4 = (struct fib4_rule *) rule;
  261. if (frh->src_len && (rule4->src_len != frh->src_len))
  262. return 0;
  263. if (frh->dst_len && (rule4->dst_len != frh->dst_len))
  264. return 0;
  265. if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
  266. return 0;
  267. #ifdef CONFIG_IP_ROUTE_CLASSID
  268. if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
  269. return 0;
  270. #endif
  271. if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC])))
  272. return 0;
  273. if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST])))
  274. return 0;
  275. return 1;
  276. }
  277. static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
  278. struct fib_rule_hdr *frh)
  279. {
  280. struct fib4_rule *rule4 = (struct fib4_rule *) rule;
  281. frh->dst_len = rule4->dst_len;
  282. frh->src_len = rule4->src_len;
  283. frh->tos = inet_dscp_to_dsfield(rule4->dscp);
  284. if ((rule4->dst_len &&
  285. nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
  286. (rule4->src_len &&
  287. nla_put_in_addr(skb, FRA_SRC, rule4->src)))
  288. goto nla_put_failure;
  289. #ifdef CONFIG_IP_ROUTE_CLASSID
  290. if (rule4->tclassid &&
  291. nla_put_u32(skb, FRA_FLOW, rule4->tclassid))
  292. goto nla_put_failure;
  293. #endif
  294. return 0;
  295. nla_put_failure:
  296. return -ENOBUFS;
  297. }
  298. static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
  299. {
  300. return nla_total_size(4) /* dst */
  301. + nla_total_size(4) /* src */
  302. + nla_total_size(4); /* flow */
  303. }
  304. static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
  305. {
  306. rt_cache_flush(ops->fro_net);
  307. }
  308. static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
  309. .family = AF_INET,
  310. .rule_size = sizeof(struct fib4_rule),
  311. .addr_size = sizeof(u32),
  312. .action = fib4_rule_action,
  313. .suppress = fib4_rule_suppress,
  314. .match = fib4_rule_match,
  315. .configure = fib4_rule_configure,
  316. .delete = fib4_rule_delete,
  317. .compare = fib4_rule_compare,
  318. .fill = fib4_rule_fill,
  319. .nlmsg_payload = fib4_rule_nlmsg_payload,
  320. .flush_cache = fib4_rule_flush_cache,
  321. .nlgroup = RTNLGRP_IPV4_RULE,
  322. .owner = THIS_MODULE,
  323. };
  324. static int fib_default_rules_init(struct fib_rules_ops *ops)
  325. {
  326. int err;
  327. err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0);
  328. if (err < 0)
  329. return err;
  330. err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
  331. if (err < 0)
  332. return err;
  333. err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0);
  334. if (err < 0)
  335. return err;
  336. return 0;
  337. }
  338. int __net_init fib4_rules_init(struct net *net)
  339. {
  340. int err;
  341. struct fib_rules_ops *ops;
  342. ops = fib_rules_register(&fib4_rules_ops_template, net);
  343. if (IS_ERR(ops))
  344. return PTR_ERR(ops);
  345. err = fib_default_rules_init(ops);
  346. if (err < 0)
  347. goto fail;
  348. net->ipv4.rules_ops = ops;
  349. net->ipv4.fib_has_custom_rules = false;
  350. net->ipv4.fib_rules_require_fldissect = 0;
  351. return 0;
  352. fail:
  353. /* also cleans all rules already added */
  354. fib_rules_unregister(ops);
  355. return err;
  356. }
  357. void __net_exit fib4_rules_exit(struct net *net)
  358. {
  359. fib_rules_unregister(net->ipv4.rules_ops);
  360. }