pmtu.sh 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282
  1. #!/bin/sh
  2. # SPDX-License-Identifier: GPL-2.0
  3. #
  4. # Check that route PMTU values match expectations, and that initial device MTU
  5. # values are assigned correctly
  6. #
  7. # Tests currently implemented:
  8. #
  9. # - pmtu_ipv4
  10. # Set up two namespaces, A and B, with two paths between them over routers
  11. # R1 and R2 (also implemented with namespaces), with different MTUs:
  12. #
  13. # segment a_r1 segment b_r1 a_r1: 2000
  14. # .--------------R1--------------. b_r1: 1400
  15. # A B a_r2: 2000
  16. # '--------------R2--------------' b_r2: 1500
  17. # segment a_r2 segment b_r2
  18. #
  19. # Check that PMTU exceptions with the correct PMTU are created. Then
  20. # decrease and increase the MTU of the local link for one of the paths,
  21. # A to R1, checking that route exception PMTU changes accordingly over
  22. # this path. Also check that locked exceptions are created when an ICMP
  23. # message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
  24. # received
  25. #
  26. # - pmtu_ipv6
  27. # Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
  28. #
  29. # - pmtu_ipv4_dscp_icmp_exception
  30. # Set up the same network topology as pmtu_ipv4, but use non-default
  31. # routing table in A. A fib-rule is used to jump to this routing table
  32. # based on DSCP. Send ICMPv4 packets with the expected DSCP value and
  33. # verify that ECN doesn't interfere with the creation of PMTU exceptions.
  34. #
  35. # - pmtu_ipv4_dscp_udp_exception
  36. # Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
  37. #
  38. # - pmtu_ipv4_vxlan4_exception
  39. # Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
  40. # over IPv4 between A and B, routed via R1. On the link between R1 and B,
  41. # set a MTU lower than the VXLAN MTU and the MTU on the link between A and
  42. # R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
  43. # from A to B and check that the PMTU exception is created with the right
  44. # value on A
  45. #
  46. # - pmtu_ipv6_vxlan4_exception
  47. # Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
  48. #
  49. # - pmtu_ipv4_vxlan6_exception
  50. # Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
  51. #
  52. # - pmtu_ipv6_vxlan6_exception
  53. # Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
  54. #
  55. # - pmtu_ipv4_geneve4_exception
  56. # Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
  57. # VXLAN
  58. #
  59. # - pmtu_ipv6_geneve4_exception
  60. # Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
  61. # VXLAN
  62. #
  63. # - pmtu_ipv4_geneve6_exception
  64. # Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
  65. # VXLAN
  66. #
  67. # - pmtu_ipv6_geneve6_exception
  68. # Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
  69. # VXLAN
  70. #
  71. # - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
  72. # Set up three namespaces, A, B, and C, with routing between A and B over
  73. # R1. R2 is unused in these tests. A has a veth connection to C, and is
  74. # connected to B via a VXLAN endpoint, which is directly bridged to C.
  75. # MTU on the B-R1 link is lower than other MTUs.
  76. #
  77. # Check that both C and A are able to communicate with B over the VXLAN
  78. # tunnel, and that PMTU exceptions with the correct values are created.
  79. #
  80. # segment a_r1 segment b_r1 b_r1: 4000
  81. # .--------------R1--------------. everything
  82. # C---veth A B else: 5000
  83. # ' bridge |
  84. # '---- - - - - - VXLAN - - - - - - - '
  85. #
  86. # - pmtu_ipv{4,6}_br_geneve{4,6}_exception
  87. # Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
  88. # instead.
  89. #
  90. # - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
  91. # Set up two namespaces, B, and C, with routing between the init namespace
  92. # and B over R1. A and R2 are unused in these tests. The init namespace
  93. # has a veth connection to C, and is connected to B via a VXLAN endpoint,
  94. # which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
  95. # is lower than other MTUs.
  96. #
  97. # Check that C is able to communicate with B over the VXLAN tunnel, and
  98. # that PMTU exceptions with the correct values are created.
  99. #
  100. # segment a_r1 segment b_r1 b_r1: 4000
  101. # .--------------R1--------------. everything
  102. # C---veth init B else: 5000
  103. # '- ovs |
  104. # '---- - - - - - VXLAN - - - - - - - '
  105. #
  106. # - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
  107. # Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
  108. # instead.
  109. #
  110. # - pmtu_ipv{4,6}_fou{4,6}_exception
  111. # Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
  112. # (FoU) over IPv4/IPv6, instead of VXLAN
  113. #
  114. # - pmtu_ipv{4,6}_fou{4,6}_exception
  115. # Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
  116. # encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
  117. #
  118. # - pmtu_ipv{4,6}_ipv{4,6}_exception
  119. # Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
  120. # instead of VXLAN
  121. #
  122. # - pmtu_vti4_exception
  123. # Set up vti tunnel on top of veth, with xfrm states and policies, in two
  124. # namespaces with matching endpoints. Check that route exception is not
  125. # created if link layer MTU is not exceeded, then exceed it and check that
  126. # exception is created with the expected PMTU. The approach described
  127. # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
  128. # changes alone won't affect PMTU
  129. #
  130. # - pmtu_vti4_udp_exception
  131. # Same as pmtu_vti4_exception, but using ESP-in-UDP
  132. #
  133. # - pmtu_vti4_udp_routed_exception
  134. # Set up vti tunnel on top of veth connected through routing namespace and
  135. # add xfrm states and policies with ESP-in-UDP encapsulation. Check that
  136. # route exception is not created if link layer MTU is not exceeded, then
  137. # lower MTU on second part of routed environment and check that exception
  138. # is created with the expected PMTU.
  139. #
  140. # - pmtu_vti6_exception
  141. # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
  142. # namespaces with matching endpoints. Check that route exception is
  143. # created by exceeding link layer MTU with ping to other endpoint. Then
  144. # decrease and increase MTU of tunnel, checking that route exception PMTU
  145. # changes accordingly
  146. #
  147. # - pmtu_vti6_udp_exception
  148. # Same as pmtu_vti6_exception, but using ESP-in-UDP
  149. #
  150. # - pmtu_vti6_udp_routed_exception
  151. # Same as pmtu_vti6_udp_routed_exception but with routing between vti
  152. # endpoints
  153. #
  154. # - pmtu_vti4_default_mtu
  155. # Set up vti4 tunnel on top of veth, in two namespaces with matching
  156. # endpoints. Check that MTU assigned to vti interface is the MTU of the
  157. # lower layer (veth) minus additional lower layer headers (zero, for veth)
  158. # minus IPv4 header length
  159. #
  160. # - pmtu_vti6_default_mtu
  161. # Same as above, for IPv6
  162. #
  163. # - pmtu_vti4_link_add_mtu
  164. # Set up vti4 interface passing MTU value at link creation, check MTU is
  165. # configured, and that link is not created with invalid MTU values
  166. #
  167. # - pmtu_vti6_link_add_mtu
  168. # Same as above, for IPv6
  169. #
  170. # - pmtu_vti6_link_change_mtu
  171. # Set up two dummy interfaces with different MTUs, create a vti6 tunnel
  172. # and check that configured MTU is used on link creation and changes, and
  173. # that MTU is properly calculated instead when MTU is not configured from
  174. # userspace
  175. #
  176. # - cleanup_ipv4_exception
  177. # Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
  178. # exceptions on multiple CPUs and check that the veth device tear-down
  179. # happens in a timely manner
  180. #
  181. # - cleanup_ipv6_exception
  182. # Same as above, but use IPv6 transport from A to B
  183. #
  184. # - list_flush_ipv4_exception
  185. # Using the same topology as in pmtu_ipv4, create exceptions, and check
  186. # they are shown when listing exception caches, gone after flushing them
  187. #
  188. # - list_flush_ipv6_exception
  189. # Using the same topology as in pmtu_ipv6, create exceptions, and check
  190. # they are shown when listing exception caches, gone after flushing them
  191. #
  192. # - pmtu_ipv4_route_change
  193. # Use the same topology as in pmtu_ipv4, but issue a route replacement
  194. # command and delete the corresponding device afterward. This tests for
  195. # proper cleanup of the PMTU exceptions by the route replacement path.
  196. # Device unregistration should complete successfully
  197. #
  198. # - pmtu_ipv6_route_change
  199. # Same as above but with IPv6
  200. # Kselftest framework requirement - SKIP code is 4.
  201. ksft_skip=4
  202. PAUSE_ON_FAIL=no
  203. VERBOSE=0
  204. TRACING=0
  205. # Some systems don't have a ping6 binary anymore
  206. which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
  207. # Name Description re-run with nh
  208. tests="
  209. pmtu_ipv4_exception ipv4: PMTU exceptions 1
  210. pmtu_ipv6_exception ipv6: PMTU exceptions 1
  211. pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1
  212. pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1
  213. pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
  214. pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
  215. pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
  216. pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions 1
  217. pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions 1
  218. pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
  219. pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
  220. pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
  221. pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1
  222. pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1
  223. pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1
  224. pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1
  225. pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1
  226. pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1
  227. pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1
  228. pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1
  229. pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1
  230. pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1
  231. pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1
  232. pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1
  233. pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1
  234. pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1
  235. pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1
  236. pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1
  237. pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
  238. pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
  239. pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
  240. pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions 1
  241. pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions 1
  242. pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
  243. pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
  244. pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
  245. pmtu_ipv4_ipv4_exception IPv4 over IPv4: PMTU exceptions 1
  246. pmtu_ipv6_ipv4_exception IPv6 over IPv4: PMTU exceptions 1
  247. pmtu_ipv4_ipv6_exception IPv4 over IPv6: PMTU exceptions 1
  248. pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1
  249. pmtu_vti6_exception vti6: PMTU exceptions 0
  250. pmtu_vti4_exception vti4: PMTU exceptions 0
  251. pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0
  252. pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0
  253. pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0
  254. pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0
  255. pmtu_vti4_default_mtu vti4: default MTU assignment 0
  256. pmtu_vti6_default_mtu vti6: default MTU assignment 0
  257. pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
  258. pmtu_vti6_link_add_mtu vti6: MTU setting on link creation 0
  259. pmtu_vti6_link_change_mtu vti6: MTU changes on link changes 0
  260. cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
  261. cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1
  262. list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
  263. list_flush_ipv6_exception ipv6: list and flush cached exceptions 1
  264. pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1
  265. pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1"
  266. NS_A="ns-A"
  267. NS_B="ns-B"
  268. NS_C="ns-C"
  269. NS_R1="ns-R1"
  270. NS_R2="ns-R2"
  271. ns_a="ip netns exec ${NS_A}"
  272. ns_b="ip netns exec ${NS_B}"
  273. ns_c="ip netns exec ${NS_C}"
  274. ns_r1="ip netns exec ${NS_R1}"
  275. ns_r2="ip netns exec ${NS_R2}"
  276. # Addressing and routing for tests with routers: four network segments, with
  277. # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
  278. # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
  279. # Addresses are:
  280. # - IPv4: PREFIX4.SEGMENT.ID (/24)
  281. # - IPv6: PREFIX6:SEGMENT::ID (/64)
  282. prefix4="10.0"
  283. prefix6="fc00"
  284. a_r1=1
  285. a_r2=2
  286. b_r1=3
  287. b_r2=4
  288. # ns peer segment
  289. routing_addrs="
  290. A R1 ${a_r1}
  291. A R2 ${a_r2}
  292. B R1 ${b_r1}
  293. B R2 ${b_r2}
  294. "
  295. # Traffic from A to B goes through R1 by default, and through R2, if destined to
  296. # B's address on the b_r2 segment.
  297. # Traffic from B to A goes through R1.
  298. # ns destination gateway
  299. routes="
  300. A default ${prefix4}.${a_r1}.2
  301. A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2
  302. B default ${prefix4}.${b_r1}.2
  303. A default ${prefix6}:${a_r1}::2
  304. A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
  305. B default ${prefix6}:${b_r1}::2
  306. "
  307. USE_NH="no"
  308. # ns family nh id destination gateway
  309. nexthops="
  310. A 4 41 ${prefix4}.${a_r1}.2 veth_A-R1
  311. A 4 42 ${prefix4}.${a_r2}.2 veth_A-R2
  312. B 4 41 ${prefix4}.${b_r1}.2 veth_B-R1
  313. A 6 61 ${prefix6}:${a_r1}::2 veth_A-R1
  314. A 6 62 ${prefix6}:${a_r2}::2 veth_A-R2
  315. B 6 61 ${prefix6}:${b_r1}::2 veth_B-R1
  316. "
  317. # nexthop id correlates to id in nexthops config above
  318. # ns family prefix nh id
  319. routes_nh="
  320. A 4 default 41
  321. A 4 ${prefix4}.${b_r2}.1 42
  322. B 4 default 41
  323. A 6 default 61
  324. A 6 ${prefix6}:${b_r2}::1 62
  325. B 6 default 61
  326. "
  327. policy_mark=0x04
  328. rt_table=main
  329. veth4_a_addr="192.168.1.1"
  330. veth4_b_addr="192.168.1.2"
  331. veth4_c_addr="192.168.2.10"
  332. veth4_mask="24"
  333. veth6_a_addr="fd00:1::a"
  334. veth6_b_addr="fd00:1::b"
  335. veth6_c_addr="fd00:2::c"
  336. veth6_mask="64"
  337. tunnel4_a_addr="192.168.2.1"
  338. tunnel4_b_addr="192.168.2.2"
  339. tunnel4_mask="24"
  340. tunnel6_a_addr="fd00:2::a"
  341. tunnel6_b_addr="fd00:2::b"
  342. tunnel6_mask="64"
  343. dummy6_0_prefix="fc00:1000::"
  344. dummy6_1_prefix="fc00:1001::"
  345. dummy6_mask="64"
  346. err_buf=
  347. tcpdump_pids=
  348. nettest_pids=
  349. socat_pids=
  350. err() {
  351. err_buf="${err_buf}${1}
  352. "
  353. }
  354. err_flush() {
  355. echo -n "${err_buf}"
  356. err_buf=
  357. }
  358. run_cmd() {
  359. cmd="$*"
  360. if [ "$VERBOSE" = "1" ]; then
  361. printf " COMMAND: $cmd\n"
  362. fi
  363. out="$($cmd 2>&1)"
  364. rc=$?
  365. if [ "$VERBOSE" = "1" -a -n "$out" ]; then
  366. echo " $out"
  367. echo
  368. fi
  369. return $rc
  370. }
  371. run_cmd_bg() {
  372. cmd="$*"
  373. if [ "$VERBOSE" = "1" ]; then
  374. printf " COMMAND: %s &\n" "${cmd}"
  375. fi
  376. $cmd 2>&1 &
  377. }
  378. # Find the auto-generated name for this namespace
  379. nsname() {
  380. eval echo \$NS_$1
  381. }
  382. setup_fou_or_gue() {
  383. outer="${1}"
  384. inner="${2}"
  385. encap="${3}"
  386. if [ "${outer}" = "4" ]; then
  387. modprobe fou || return $ksft_skip
  388. a_addr="${prefix4}.${a_r1}.1"
  389. b_addr="${prefix4}.${b_r1}.1"
  390. if [ "${inner}" = "4" ]; then
  391. type="ipip"
  392. ipproto="4"
  393. else
  394. type="sit"
  395. ipproto="41"
  396. fi
  397. else
  398. modprobe fou6 || return $ksft_skip
  399. a_addr="${prefix6}:${a_r1}::1"
  400. b_addr="${prefix6}:${b_r1}::1"
  401. if [ "${inner}" = "4" ]; then
  402. type="ip6tnl"
  403. mode="mode ipip6"
  404. ipproto="4 -6"
  405. else
  406. type="ip6tnl"
  407. mode="mode ip6ip6"
  408. ipproto="41 -6"
  409. fi
  410. fi
  411. run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
  412. run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
  413. run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
  414. run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
  415. if [ "${inner}" = "4" ]; then
  416. run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
  417. run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
  418. else
  419. run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
  420. run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
  421. fi
  422. run_cmd ${ns_a} ip link set ${encap}_a up
  423. run_cmd ${ns_b} ip link set ${encap}_b up
  424. }
  425. setup_fou44() {
  426. setup_fou_or_gue 4 4 fou
  427. }
  428. setup_fou46() {
  429. setup_fou_or_gue 4 6 fou
  430. }
  431. setup_fou64() {
  432. setup_fou_or_gue 6 4 fou
  433. }
  434. setup_fou66() {
  435. setup_fou_or_gue 6 6 fou
  436. }
  437. setup_gue44() {
  438. setup_fou_or_gue 4 4 gue
  439. }
  440. setup_gue46() {
  441. setup_fou_or_gue 4 6 gue
  442. }
  443. setup_gue64() {
  444. setup_fou_or_gue 6 4 gue
  445. }
  446. setup_gue66() {
  447. setup_fou_or_gue 6 6 gue
  448. }
  449. setup_ipvX_over_ipvY() {
  450. inner=${1}
  451. outer=${2}
  452. if [ "${outer}" -eq 4 ]; then
  453. a_addr="${prefix4}.${a_r1}.1"
  454. b_addr="${prefix4}.${b_r1}.1"
  455. if [ "${inner}" -eq 4 ]; then
  456. type="ipip"
  457. mode="ipip"
  458. else
  459. type="sit"
  460. mode="ip6ip"
  461. fi
  462. else
  463. a_addr="${prefix6}:${a_r1}::1"
  464. b_addr="${prefix6}:${b_r1}::1"
  465. type="ip6tnl"
  466. if [ "${inner}" -eq 4 ]; then
  467. mode="ipip6"
  468. else
  469. mode="ip6ip6"
  470. fi
  471. fi
  472. run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
  473. run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
  474. run_cmd ${ns_a} ip link set ip_a up
  475. run_cmd ${ns_b} ip link set ip_b up
  476. if [ "${inner}" = "4" ]; then
  477. run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
  478. run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
  479. else
  480. run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
  481. run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
  482. fi
  483. }
  484. setup_ip4ip4() {
  485. setup_ipvX_over_ipvY 4 4
  486. }
  487. setup_ip6ip4() {
  488. setup_ipvX_over_ipvY 6 4
  489. }
  490. setup_ip4ip6() {
  491. setup_ipvX_over_ipvY 4 6
  492. }
  493. setup_ip6ip6() {
  494. setup_ipvX_over_ipvY 6 6
  495. }
  496. setup_namespaces() {
  497. for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
  498. ip netns add ${n} || return 1
  499. # Disable DAD, so that we don't have to wait to use the
  500. # configured IPv6 addresses
  501. ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
  502. done
  503. }
  504. setup_veth() {
  505. run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
  506. run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
  507. run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
  508. run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
  509. run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
  510. run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
  511. run_cmd ${ns_a} ip link set veth_a up
  512. run_cmd ${ns_b} ip link set veth_b up
  513. }
  514. setup_vti() {
  515. proto=${1}
  516. veth_a_addr="${2}"
  517. veth_b_addr="${3}"
  518. vti_a_addr="${4}"
  519. vti_b_addr="${5}"
  520. vti_mask=${6}
  521. [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
  522. run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
  523. run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
  524. run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
  525. run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
  526. run_cmd ${ns_a} ip link set vti${proto}_a up
  527. run_cmd ${ns_b} ip link set vti${proto}_b up
  528. }
  529. setup_vti4() {
  530. setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
  531. }
  532. setup_vti6() {
  533. setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
  534. }
  535. setup_vti4routed() {
  536. setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
  537. }
  538. setup_vti6routed() {
  539. setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
  540. }
  541. setup_vxlan_or_geneve() {
  542. type="${1}"
  543. a_addr="${2}"
  544. b_addr="${3}"
  545. opts="${4}"
  546. br_if_a="${5}"
  547. if [ "${type}" = "vxlan" ]; then
  548. opts="${opts} ttl 64 dstport 4789"
  549. opts_a="local ${a_addr}"
  550. opts_b="local ${b_addr}"
  551. else
  552. opts_a=""
  553. opts_b=""
  554. fi
  555. run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
  556. run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
  557. if [ -n "${br_if_a}" ]; then
  558. run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
  559. run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
  560. run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
  561. else
  562. run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
  563. run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
  564. fi
  565. run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
  566. run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
  567. run_cmd ${ns_a} ip link set ${type}_a up
  568. run_cmd ${ns_b} ip link set ${type}_b up
  569. }
  570. setup_geneve4() {
  571. setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set"
  572. }
  573. setup_vxlan4() {
  574. setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set"
  575. }
  576. setup_geneve6() {
  577. setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
  578. }
  579. setup_vxlan6() {
  580. setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
  581. }
  582. setup_bridged_geneve4() {
  583. setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
  584. }
  585. setup_bridged_vxlan4() {
  586. setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
  587. }
  588. setup_bridged_geneve6() {
  589. setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
  590. }
  591. setup_bridged_vxlan6() {
  592. setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
  593. }
  594. setup_xfrm() {
  595. proto=${1}
  596. veth_a_addr="${2}"
  597. veth_b_addr="${3}"
  598. encap=${4}
  599. run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
  600. run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
  601. run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
  602. run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
  603. run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
  604. run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
  605. run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
  606. run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
  607. }
  608. setup_nettest_xfrm() {
  609. if ! which nettest >/dev/null; then
  610. PATH=$PWD:$PATH
  611. if ! which nettest >/dev/null; then
  612. echo "'nettest' command not found; skipping tests"
  613. return 1
  614. fi
  615. fi
  616. [ ${1} -eq 6 ] && proto="-6" || proto=""
  617. port=${2}
  618. run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
  619. nettest_pids="${nettest_pids} $!"
  620. run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
  621. nettest_pids="${nettest_pids} $!"
  622. }
  623. setup_xfrm4() {
  624. setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
  625. }
  626. setup_xfrm6() {
  627. setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
  628. }
  629. setup_xfrm4udp() {
  630. setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
  631. setup_nettest_xfrm 4 4500
  632. }
  633. setup_xfrm6udp() {
  634. setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
  635. setup_nettest_xfrm 6 4500
  636. }
  637. setup_xfrm4udprouted() {
  638. setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
  639. setup_nettest_xfrm 4 4500
  640. }
  641. setup_xfrm6udprouted() {
  642. setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
  643. setup_nettest_xfrm 6 4500
  644. }
  645. setup_routing_old() {
  646. for i in ${routes}; do
  647. [ "${ns}" = "" ] && ns="${i}" && continue
  648. [ "${addr}" = "" ] && addr="${i}" && continue
  649. [ "${gw}" = "" ] && gw="${i}"
  650. ns_name="$(nsname ${ns})"
  651. ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
  652. ns=""; addr=""; gw=""
  653. done
  654. }
  655. setup_routing_new() {
  656. for i in ${nexthops}; do
  657. [ "${ns}" = "" ] && ns="${i}" && continue
  658. [ "${fam}" = "" ] && fam="${i}" && continue
  659. [ "${nhid}" = "" ] && nhid="${i}" && continue
  660. [ "${gw}" = "" ] && gw="${i}" && continue
  661. [ "${dev}" = "" ] && dev="${i}"
  662. ns_name="$(nsname ${ns})"
  663. ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
  664. ns=""; fam=""; nhid=""; gw=""; dev=""
  665. done
  666. for i in ${routes_nh}; do
  667. [ "${ns}" = "" ] && ns="${i}" && continue
  668. [ "${fam}" = "" ] && fam="${i}" && continue
  669. [ "${addr}" = "" ] && addr="${i}" && continue
  670. [ "${nhid}" = "" ] && nhid="${i}"
  671. ns_name="$(nsname ${ns})"
  672. ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
  673. ns=""; fam=""; addr=""; nhid=""
  674. done
  675. }
  676. setup_routing() {
  677. for i in ${NS_R1} ${NS_R2}; do
  678. ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
  679. ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
  680. done
  681. for i in ${routing_addrs}; do
  682. [ "${ns}" = "" ] && ns="${i}" && continue
  683. [ "${peer}" = "" ] && peer="${i}" && continue
  684. [ "${segment}" = "" ] && segment="${i}"
  685. ns_name="$(nsname ${ns})"
  686. peer_name="$(nsname ${peer})"
  687. if="veth_${ns}-${peer}"
  688. ifpeer="veth_${peer}-${ns}"
  689. # Create veth links
  690. ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
  691. ip -n ${peer_name} link set dev ${ifpeer} up
  692. # Add addresses
  693. ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if}
  694. ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if}
  695. ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer}
  696. ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
  697. ns=""; peer=""; segment=""
  698. done
  699. if [ "$USE_NH" = "yes" ]; then
  700. setup_routing_new
  701. else
  702. setup_routing_old
  703. fi
  704. return 0
  705. }
  706. setup_policy_routing() {
  707. setup_routing
  708. ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
  709. table "${rt_table}"
  710. # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
  711. # have an option do to it.
  712. tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
  713. tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
  714. tc -netns "${NS_A}" filter add dev veth_A-R1 \
  715. protocol ipv4 flower ip_proto udp \
  716. action pedit ex munge ip df set 0x40 pipe csum ip and udp
  717. tc -netns "${NS_A}" filter add dev veth_A-R2 \
  718. protocol ipv4 flower ip_proto udp \
  719. action pedit ex munge ip df set 0x40 pipe csum ip and udp
  720. }
  721. setup_bridge() {
  722. run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
  723. run_cmd ${ns_a} ip link set br0 up
  724. run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
  725. run_cmd ${ns_c} ip link set veth_A-C netns ns-A
  726. run_cmd ${ns_a} ip link set veth_A-C up
  727. run_cmd ${ns_c} ip link set veth_C-A up
  728. run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
  729. run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
  730. run_cmd ${ns_a} ip link set veth_A-C master br0
  731. }
  732. setup_ovs_vxlan_or_geneve() {
  733. type="${1}"
  734. a_addr="${2}"
  735. b_addr="${3}"
  736. if [ "${type}" = "vxlan" ]; then
  737. opts="${opts} ttl 64 dstport 4789"
  738. opts_b="local ${b_addr}"
  739. fi
  740. run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
  741. set interface ${type}_a type=${type} \
  742. options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
  743. run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
  744. run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
  745. run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
  746. run_cmd ${ns_b} ip link set ${type}_b up
  747. }
  748. setup_ovs_geneve4() {
  749. setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
  750. }
  751. setup_ovs_vxlan4() {
  752. setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
  753. }
  754. setup_ovs_geneve6() {
  755. setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
  756. }
  757. setup_ovs_vxlan6() {
  758. setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
  759. }
  760. setup_ovs_bridge() {
  761. run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
  762. run_cmd ip link set ovs_br0 up
  763. run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
  764. run_cmd ${ns_c} ip link set veth_A-C netns 1
  765. run_cmd ip link set veth_A-C up
  766. run_cmd ${ns_c} ip link set veth_C-A up
  767. run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
  768. run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
  769. run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
  770. # Move veth_A-R1 to init
  771. run_cmd ${ns_a} ip link set veth_A-R1 netns 1
  772. run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
  773. run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
  774. run_cmd ip link set veth_A-R1 up
  775. run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
  776. run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
  777. }
  778. setup() {
  779. [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
  780. for arg do
  781. eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
  782. done
  783. }
  784. trace() {
  785. [ $TRACING -eq 0 ] && return
  786. for arg do
  787. [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
  788. ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
  789. tcpdump_pids="${tcpdump_pids} $!"
  790. ns_cmd=
  791. done
  792. sleep 1
  793. }
  794. cleanup() {
  795. for pid in ${tcpdump_pids}; do
  796. kill ${pid}
  797. done
  798. tcpdump_pids=
  799. for pid in ${nettest_pids}; do
  800. kill ${pid}
  801. done
  802. nettest_pids=
  803. for pid in ${socat_pids}; do
  804. kill "${pid}"
  805. done
  806. socat_pids=
  807. for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
  808. ip netns del ${n} 2> /dev/null
  809. done
  810. ip link del veth_A-C 2>/dev/null
  811. ip link del veth_A-R1 2>/dev/null
  812. ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
  813. ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
  814. }
  815. mtu() {
  816. ns_cmd="${1}"
  817. dev="${2}"
  818. mtu="${3}"
  819. ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
  820. }
  821. mtu_parse() {
  822. input="${1}"
  823. next=0
  824. for i in ${input}; do
  825. [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
  826. [ ${next} -eq 1 ] && echo "${i}" && return
  827. [ ${next} -eq 2 ] && echo "lock ${i}" && return
  828. [ "${i}" = "mtu" ] && next=1
  829. done
  830. }
  831. link_get() {
  832. ns_cmd="${1}"
  833. name="${2}"
  834. ${ns_cmd} ip link show dev "${name}"
  835. }
  836. link_get_mtu() {
  837. ns_cmd="${1}"
  838. name="${2}"
  839. mtu_parse "$(link_get "${ns_cmd}" ${name})"
  840. }
  841. route_get_dst_exception() {
  842. ns_cmd="${1}"
  843. dst="${2}"
  844. dsfield="${3}"
  845. if [ -z "${dsfield}" ]; then
  846. dsfield=0
  847. fi
  848. ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
  849. }
  850. route_get_dst_pmtu_from_exception() {
  851. ns_cmd="${1}"
  852. dst="${2}"
  853. dsfield="${3}"
  854. mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
  855. }
  856. check_pmtu_value() {
  857. expected="${1}"
  858. value="${2}"
  859. event="${3}"
  860. [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
  861. [ "${value}" = "${expected}" ] && return 0
  862. [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1
  863. [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1
  864. err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
  865. return 1
  866. }
  867. test_pmtu_ipvX() {
  868. family=${1}
  869. setup namespaces routing || return $ksft_skip
  870. trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  871. "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
  872. "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
  873. "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
  874. if [ ${family} -eq 4 ]; then
  875. ping=ping
  876. dst1="${prefix4}.${b_r1}.1"
  877. dst2="${prefix4}.${b_r2}.1"
  878. else
  879. ping=${ping6}
  880. dst1="${prefix6}:${b_r1}::1"
  881. dst2="${prefix6}:${b_r2}::1"
  882. fi
  883. # Set up initial MTU values
  884. mtu "${ns_a}" veth_A-R1 2000
  885. mtu "${ns_r1}" veth_R1-A 2000
  886. mtu "${ns_r1}" veth_R1-B 1400
  887. mtu "${ns_b}" veth_B-R1 1400
  888. mtu "${ns_a}" veth_A-R2 2000
  889. mtu "${ns_r2}" veth_R2-A 2000
  890. mtu "${ns_r2}" veth_R2-B 1500
  891. mtu "${ns_b}" veth_B-R2 1500
  892. # Create route exceptions
  893. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
  894. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
  895. # Check that exceptions have been created with the correct PMTU
  896. pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
  897. check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
  898. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  899. check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
  900. # Decrease local MTU below PMTU, check for PMTU decrease in route exception
  901. mtu "${ns_a}" veth_A-R1 1300
  902. mtu "${ns_r1}" veth_R1-A 1300
  903. pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
  904. check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
  905. # Second exception shouldn't be modified
  906. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  907. check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
  908. # Increase MTU, check for PMTU increase in route exception
  909. mtu "${ns_a}" veth_A-R1 1700
  910. mtu "${ns_r1}" veth_R1-A 1700
  911. pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
  912. check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
  913. # Second exception shouldn't be modified
  914. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  915. check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
  916. # Skip PMTU locking tests for IPv6
  917. [ $family -eq 6 ] && return 0
  918. # Decrease remote MTU on path via R2, get new exception
  919. mtu "${ns_r2}" veth_R2-B 400
  920. mtu "${ns_b}" veth_B-R2 400
  921. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
  922. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  923. check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
  924. # Decrease local MTU below PMTU
  925. mtu "${ns_a}" veth_A-R2 500
  926. mtu "${ns_r2}" veth_R2-A 500
  927. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  928. check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
  929. # Increase local MTU
  930. mtu "${ns_a}" veth_A-R2 1500
  931. mtu "${ns_r2}" veth_R2-A 1500
  932. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  933. check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
  934. # Get new exception
  935. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
  936. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  937. check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
  938. }
  939. test_pmtu_ipv4_exception() {
  940. test_pmtu_ipvX 4
  941. }
  942. test_pmtu_ipv6_exception() {
  943. test_pmtu_ipvX 6
  944. }
  945. test_pmtu_ipv4_dscp_icmp_exception() {
  946. rt_table=100
  947. setup namespaces policy_routing || return $ksft_skip
  948. trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  949. "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
  950. "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
  951. "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
  952. # Set up initial MTU values
  953. mtu "${ns_a}" veth_A-R1 2000
  954. mtu "${ns_r1}" veth_R1-A 2000
  955. mtu "${ns_r1}" veth_R1-B 1400
  956. mtu "${ns_b}" veth_B-R1 1400
  957. mtu "${ns_a}" veth_A-R2 2000
  958. mtu "${ns_r2}" veth_R2-A 2000
  959. mtu "${ns_r2}" veth_R2-B 1500
  960. mtu "${ns_b}" veth_B-R2 1500
  961. len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
  962. dst1="${prefix4}.${b_r1}.1"
  963. dst2="${prefix4}.${b_r2}.1"
  964. # Create route exceptions
  965. dsfield=${policy_mark} # No ECN bit set (Not-ECT)
  966. run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
  967. dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
  968. run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
  969. # Check that exceptions have been created with the correct PMTU
  970. pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
  971. check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
  972. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
  973. check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
  974. }
  975. test_pmtu_ipv4_dscp_udp_exception() {
  976. rt_table=100
  977. if ! which socat > /dev/null 2>&1; then
  978. echo "'socat' command not found; skipping tests"
  979. return $ksft_skip
  980. fi
  981. setup namespaces policy_routing || return $ksft_skip
  982. trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  983. "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
  984. "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
  985. "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
  986. # Set up initial MTU values
  987. mtu "${ns_a}" veth_A-R1 2000
  988. mtu "${ns_r1}" veth_R1-A 2000
  989. mtu "${ns_r1}" veth_R1-B 1400
  990. mtu "${ns_b}" veth_B-R1 1400
  991. mtu "${ns_a}" veth_A-R2 2000
  992. mtu "${ns_r2}" veth_R2-A 2000
  993. mtu "${ns_r2}" veth_R2-B 1500
  994. mtu "${ns_b}" veth_B-R2 1500
  995. len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
  996. dst1="${prefix4}.${b_r1}.1"
  997. dst2="${prefix4}.${b_r2}.1"
  998. # Create route exceptions
  999. run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
  1000. socat_pids="${socat_pids} $!"
  1001. dsfield=${policy_mark} # No ECN bit set (Not-ECT)
  1002. run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
  1003. UDP:"${dst1}":50000,tos="${dsfield}"
  1004. dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
  1005. run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
  1006. UDP:"${dst2}":50000,tos="${dsfield}"
  1007. # Check that exceptions have been created with the correct PMTU
  1008. pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
  1009. check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
  1010. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
  1011. check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
  1012. }
  1013. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
  1014. type=${1}
  1015. family=${2}
  1016. outer_family=${3}
  1017. ll_mtu=4000
  1018. if [ ${outer_family} -eq 4 ]; then
  1019. setup namespaces routing ${type}4 || return $ksft_skip
  1020. # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
  1021. exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
  1022. else
  1023. setup namespaces routing ${type}6 || return $ksft_skip
  1024. # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
  1025. exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
  1026. fi
  1027. trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
  1028. "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1029. "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
  1030. if [ ${family} -eq 4 ]; then
  1031. ping=ping
  1032. dst=${tunnel4_b_addr}
  1033. else
  1034. ping=${ping6}
  1035. dst=${tunnel6_b_addr}
  1036. fi
  1037. # Create route exception by exceeding link layer MTU
  1038. mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
  1039. mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
  1040. mtu "${ns_b}" veth_B-R1 ${ll_mtu}
  1041. mtu "${ns_r1}" veth_R1-B ${ll_mtu}
  1042. mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
  1043. mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
  1044. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
  1045. # Check that exception was created
  1046. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
  1047. check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
  1048. }
  1049. test_pmtu_ipv4_vxlan4_exception() {
  1050. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4
  1051. }
  1052. test_pmtu_ipv6_vxlan4_exception() {
  1053. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4
  1054. }
  1055. test_pmtu_ipv4_geneve4_exception() {
  1056. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
  1057. }
  1058. test_pmtu_ipv6_geneve4_exception() {
  1059. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
  1060. }
  1061. test_pmtu_ipv4_vxlan6_exception() {
  1062. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6
  1063. }
  1064. test_pmtu_ipv6_vxlan6_exception() {
  1065. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6
  1066. }
  1067. test_pmtu_ipv4_geneve6_exception() {
  1068. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
  1069. }
  1070. test_pmtu_ipv6_geneve6_exception() {
  1071. test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
  1072. }
  1073. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
  1074. type=${1}
  1075. family=${2}
  1076. outer_family=${3}
  1077. ll_mtu=4000
  1078. if [ ${outer_family} -eq 4 ]; then
  1079. setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
  1080. # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
  1081. exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
  1082. else
  1083. setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
  1084. # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
  1085. exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
  1086. fi
  1087. trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
  1088. "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1089. "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
  1090. "${ns_a}" br0 "${ns_a}" veth-A-C \
  1091. "${ns_c}" veth_C-A
  1092. if [ ${family} -eq 4 ]; then
  1093. ping=ping
  1094. dst=${tunnel4_b_addr}
  1095. else
  1096. ping=${ping6}
  1097. dst=${tunnel6_b_addr}
  1098. fi
  1099. # Create route exception by exceeding link layer MTU
  1100. mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
  1101. mtu "${ns_a}" br0 $((${ll_mtu} + 1000))
  1102. mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000))
  1103. mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
  1104. mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
  1105. mtu "${ns_b}" veth_B-R1 ${ll_mtu}
  1106. mtu "${ns_r1}" veth_R1-B ${ll_mtu}
  1107. mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
  1108. mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
  1109. run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
  1110. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1
  1111. # Check that exceptions were created
  1112. pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
  1113. check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
  1114. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
  1115. check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
  1116. }
  1117. test_pmtu_ipv4_br_vxlan4_exception() {
  1118. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4
  1119. }
  1120. test_pmtu_ipv6_br_vxlan4_exception() {
  1121. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4
  1122. }
  1123. test_pmtu_ipv4_br_geneve4_exception() {
  1124. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
  1125. }
  1126. test_pmtu_ipv6_br_geneve4_exception() {
  1127. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
  1128. }
  1129. test_pmtu_ipv4_br_vxlan6_exception() {
  1130. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6
  1131. }
  1132. test_pmtu_ipv6_br_vxlan6_exception() {
  1133. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6
  1134. }
  1135. test_pmtu_ipv4_br_geneve6_exception() {
  1136. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
  1137. }
  1138. test_pmtu_ipv6_br_geneve6_exception() {
  1139. test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
  1140. }
  1141. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
  1142. type=${1}
  1143. family=${2}
  1144. outer_family=${3}
  1145. ll_mtu=4000
  1146. if [ ${outer_family} -eq 4 ]; then
  1147. setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
  1148. # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
  1149. exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
  1150. else
  1151. setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
  1152. # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
  1153. exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
  1154. fi
  1155. if [ "${type}" = "vxlan" ]; then
  1156. tun_a="vxlan_sys_4789"
  1157. elif [ "${type}" = "geneve" ]; then
  1158. tun_a="genev_sys_6081"
  1159. fi
  1160. trace "" "${tun_a}" "${ns_b}" ${type}_b \
  1161. "" veth_A-R1 "${ns_r1}" veth_R1-A \
  1162. "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
  1163. "" ovs_br0 "" veth-A-C \
  1164. "${ns_c}" veth_C-A
  1165. if [ ${family} -eq 4 ]; then
  1166. ping=ping
  1167. dst=${tunnel4_b_addr}
  1168. else
  1169. ping=${ping6}
  1170. dst=${tunnel6_b_addr}
  1171. fi
  1172. # Create route exception by exceeding link layer MTU
  1173. mtu "" veth_A-R1 $((${ll_mtu} + 1000))
  1174. mtu "" ovs_br0 $((${ll_mtu} + 1000))
  1175. mtu "" veth_A-C $((${ll_mtu} + 1000))
  1176. mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
  1177. mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
  1178. mtu "${ns_b}" veth_B-R1 ${ll_mtu}
  1179. mtu "${ns_r1}" veth_R1-B ${ll_mtu}
  1180. mtu "" ${tun_a} $((${ll_mtu} + 1000))
  1181. mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
  1182. run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
  1183. # Check that exceptions were created
  1184. pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
  1185. check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
  1186. }
  1187. test_pmtu_ipv4_ovs_vxlan4_exception() {
  1188. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4
  1189. }
  1190. test_pmtu_ipv6_ovs_vxlan4_exception() {
  1191. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4
  1192. }
  1193. test_pmtu_ipv4_ovs_geneve4_exception() {
  1194. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
  1195. }
  1196. test_pmtu_ipv6_ovs_geneve4_exception() {
  1197. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
  1198. }
  1199. test_pmtu_ipv4_ovs_vxlan6_exception() {
  1200. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6
  1201. }
  1202. test_pmtu_ipv6_ovs_vxlan6_exception() {
  1203. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6
  1204. }
  1205. test_pmtu_ipv4_ovs_geneve6_exception() {
  1206. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
  1207. }
  1208. test_pmtu_ipv6_ovs_geneve6_exception() {
  1209. test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
  1210. }
  1211. test_pmtu_ipvX_over_fouY_or_gueY() {
  1212. inner_family=${1}
  1213. outer_family=${2}
  1214. encap=${3}
  1215. ll_mtu=4000
  1216. setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
  1217. trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
  1218. "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1219. "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
  1220. if [ ${inner_family} -eq 4 ]; then
  1221. ping=ping
  1222. dst=${tunnel4_b_addr}
  1223. else
  1224. ping=${ping6}
  1225. dst=${tunnel6_b_addr}
  1226. fi
  1227. if [ "${encap}" = "gue" ]; then
  1228. encap_overhead=4
  1229. else
  1230. encap_overhead=0
  1231. fi
  1232. if [ ${outer_family} -eq 4 ]; then
  1233. # IPv4 header UDP header
  1234. exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead}))
  1235. else
  1236. # IPv6 header Option 4 UDP header
  1237. exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead}))
  1238. fi
  1239. # Create route exception by exceeding link layer MTU
  1240. mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
  1241. mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
  1242. mtu "${ns_b}" veth_B-R1 ${ll_mtu}
  1243. mtu "${ns_r1}" veth_R1-B ${ll_mtu}
  1244. mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
  1245. mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
  1246. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
  1247. # Check that exception was created
  1248. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
  1249. check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
  1250. }
  1251. test_pmtu_ipv4_fou4_exception() {
  1252. test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
  1253. }
  1254. test_pmtu_ipv6_fou4_exception() {
  1255. test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
  1256. }
  1257. test_pmtu_ipv4_fou6_exception() {
  1258. test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
  1259. }
  1260. test_pmtu_ipv6_fou6_exception() {
  1261. test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
  1262. }
  1263. test_pmtu_ipv4_gue4_exception() {
  1264. test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
  1265. }
  1266. test_pmtu_ipv6_gue4_exception() {
  1267. test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
  1268. }
  1269. test_pmtu_ipv4_gue6_exception() {
  1270. test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
  1271. }
  1272. test_pmtu_ipv6_gue6_exception() {
  1273. test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
  1274. }
  1275. test_pmtu_ipvX_over_ipvY_exception() {
  1276. inner=${1}
  1277. outer=${2}
  1278. ll_mtu=4000
  1279. setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
  1280. trace "${ns_a}" ip_a "${ns_b}" ip_b \
  1281. "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1282. "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
  1283. if [ ${inner} -eq 4 ]; then
  1284. ping=ping
  1285. dst=${tunnel4_b_addr}
  1286. else
  1287. ping=${ping6}
  1288. dst=${tunnel6_b_addr}
  1289. fi
  1290. if [ ${outer} -eq 4 ]; then
  1291. # IPv4 header
  1292. exp_mtu=$((${ll_mtu} - 20))
  1293. else
  1294. # IPv6 header Option 4
  1295. exp_mtu=$((${ll_mtu} - 40 - 8))
  1296. fi
  1297. # Create route exception by exceeding link layer MTU
  1298. mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
  1299. mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
  1300. mtu "${ns_b}" veth_B-R1 ${ll_mtu}
  1301. mtu "${ns_r1}" veth_R1-B ${ll_mtu}
  1302. mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
  1303. mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
  1304. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
  1305. # Check that exception was created
  1306. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
  1307. check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
  1308. }
  1309. test_pmtu_ipv4_ipv4_exception() {
  1310. test_pmtu_ipvX_over_ipvY_exception 4 4
  1311. }
  1312. test_pmtu_ipv6_ipv4_exception() {
  1313. test_pmtu_ipvX_over_ipvY_exception 6 4
  1314. }
  1315. test_pmtu_ipv4_ipv6_exception() {
  1316. test_pmtu_ipvX_over_ipvY_exception 4 6
  1317. }
  1318. test_pmtu_ipv6_ipv6_exception() {
  1319. test_pmtu_ipvX_over_ipvY_exception 6 6
  1320. }
  1321. test_pmtu_vti4_exception() {
  1322. setup namespaces veth vti4 xfrm4 || return $ksft_skip
  1323. trace "${ns_a}" veth_a "${ns_b}" veth_b \
  1324. "${ns_a}" vti4_a "${ns_b}" vti4_b
  1325. veth_mtu=1500
  1326. vti_mtu=$((veth_mtu - 20))
  1327. # SPI SN IV ICV pad length next header
  1328. esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1))
  1329. ping_payload=$((esp_payload_rfc4106 - 28))
  1330. mtu "${ns_a}" veth_a ${veth_mtu}
  1331. mtu "${ns_b}" veth_b ${veth_mtu}
  1332. mtu "${ns_a}" vti4_a ${vti_mtu}
  1333. mtu "${ns_b}" vti4_b ${vti_mtu}
  1334. # Send DF packet without exceeding link layer MTU, check that no
  1335. # exception is created
  1336. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
  1337. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
  1338. check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
  1339. # Now exceed link layer MTU by one byte, check that exception is created
  1340. # with the right PMTU value
  1341. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
  1342. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
  1343. check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
  1344. }
  1345. test_pmtu_vti6_exception() {
  1346. setup namespaces veth vti6 xfrm6 || return $ksft_skip
  1347. trace "${ns_a}" veth_a "${ns_b}" veth_b \
  1348. "${ns_a}" vti6_a "${ns_b}" vti6_b
  1349. fail=0
  1350. # Create route exception by exceeding link layer MTU
  1351. mtu "${ns_a}" veth_a 4000
  1352. mtu "${ns_b}" veth_b 4000
  1353. mtu "${ns_a}" vti6_a 5000
  1354. mtu "${ns_b}" vti6_b 5000
  1355. run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
  1356. # Check that exception was created
  1357. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1358. check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
  1359. # Decrease tunnel MTU, check for PMTU decrease in route exception
  1360. mtu "${ns_a}" vti6_a 3000
  1361. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1362. check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
  1363. # Increase tunnel MTU, check for PMTU increase in route exception
  1364. mtu "${ns_a}" vti6_a 9000
  1365. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1366. check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
  1367. return ${fail}
  1368. }
  1369. test_pmtu_vti4_udp_exception() {
  1370. setup namespaces veth vti4 xfrm4udp || return $ksft_skip
  1371. trace "${ns_a}" veth_a "${ns_b}" veth_b \
  1372. "${ns_a}" vti4_a "${ns_b}" vti4_b
  1373. veth_mtu=1500
  1374. vti_mtu=$((veth_mtu - 20))
  1375. # UDP SPI SN IV ICV pad length next header
  1376. esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
  1377. ping_payload=$((esp_payload_rfc4106 - 28))
  1378. mtu "${ns_a}" veth_a ${veth_mtu}
  1379. mtu "${ns_b}" veth_b ${veth_mtu}
  1380. mtu "${ns_a}" vti4_a ${vti_mtu}
  1381. mtu "${ns_b}" vti4_b ${vti_mtu}
  1382. # Send DF packet without exceeding link layer MTU, check that no
  1383. # exception is created
  1384. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
  1385. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
  1386. check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
  1387. # Now exceed link layer MTU by one byte, check that exception is created
  1388. # with the right PMTU value
  1389. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
  1390. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
  1391. check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
  1392. }
  1393. test_pmtu_vti6_udp_exception() {
  1394. setup namespaces veth vti6 xfrm6udp || return $ksft_skip
  1395. trace "${ns_a}" veth_a "${ns_b}" veth_b \
  1396. "${ns_a}" vti6_a "${ns_b}" vti6_b
  1397. fail=0
  1398. # Create route exception by exceeding link layer MTU
  1399. mtu "${ns_a}" veth_a 4000
  1400. mtu "${ns_b}" veth_b 4000
  1401. mtu "${ns_a}" vti6_a 5000
  1402. mtu "${ns_b}" vti6_b 5000
  1403. run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
  1404. # Check that exception was created
  1405. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1406. check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
  1407. # Decrease tunnel MTU, check for PMTU decrease in route exception
  1408. mtu "${ns_a}" vti6_a 3000
  1409. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1410. check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
  1411. # Increase tunnel MTU, check for PMTU increase in route exception
  1412. mtu "${ns_a}" vti6_a 9000
  1413. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1414. check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
  1415. return ${fail}
  1416. }
  1417. test_pmtu_vti4_udp_routed_exception() {
  1418. setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
  1419. trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
  1420. "${ns_a}" vti4_a "${ns_b}" vti4_b
  1421. veth_mtu=1500
  1422. vti_mtu=$((veth_mtu - 20))
  1423. # UDP SPI SN IV ICV pad length next header
  1424. esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
  1425. ping_payload=$((esp_payload_rfc4106 - 28))
  1426. mtu "${ns_a}" veth_A-R1 ${veth_mtu}
  1427. mtu "${ns_r1}" veth_R1-A ${veth_mtu}
  1428. mtu "${ns_b}" veth_B-R1 ${veth_mtu}
  1429. mtu "${ns_r1}" veth_R1-B ${veth_mtu}
  1430. mtu "${ns_a}" vti4_a ${vti_mtu}
  1431. mtu "${ns_b}" vti4_b ${vti_mtu}
  1432. # Send DF packet without exceeding link layer MTU, check that no
  1433. # exception is created
  1434. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
  1435. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
  1436. check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
  1437. # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
  1438. # with the right PMTU value
  1439. mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
  1440. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
  1441. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
  1442. check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
  1443. }
  1444. test_pmtu_vti6_udp_routed_exception() {
  1445. setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
  1446. trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
  1447. "${ns_a}" vti6_a "${ns_b}" vti6_b
  1448. veth_mtu=1500
  1449. vti_mtu=$((veth_mtu - 40))
  1450. # UDP SPI SN IV ICV pad length next header
  1451. esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
  1452. ping_payload=$((esp_payload_rfc4106 - 48))
  1453. mtu "${ns_a}" veth_A-R1 ${veth_mtu}
  1454. mtu "${ns_r1}" veth_R1-A ${veth_mtu}
  1455. mtu "${ns_b}" veth_B-R1 ${veth_mtu}
  1456. mtu "${ns_r1}" veth_R1-B ${veth_mtu}
  1457. # mtu "${ns_a}" vti6_a ${vti_mtu}
  1458. # mtu "${ns_b}" vti6_b ${vti_mtu}
  1459. run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
  1460. # Check that exception was not created
  1461. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1462. check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
  1463. # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
  1464. # with the right PMTU value
  1465. mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
  1466. run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
  1467. pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
  1468. check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
  1469. }
  1470. test_pmtu_vti4_default_mtu() {
  1471. setup namespaces veth vti4 || return $ksft_skip
  1472. # Check that MTU of vti device is MTU of veth minus IPv4 header length
  1473. veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
  1474. vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
  1475. if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
  1476. err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
  1477. return 1
  1478. fi
  1479. }
  1480. test_pmtu_vti6_default_mtu() {
  1481. setup namespaces veth vti6 || return $ksft_skip
  1482. # Check that MTU of vti device is MTU of veth minus IPv6 header length
  1483. veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
  1484. vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  1485. if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
  1486. err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
  1487. return 1
  1488. fi
  1489. }
  1490. test_pmtu_vti4_link_add_mtu() {
  1491. setup namespaces || return $ksft_skip
  1492. run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
  1493. [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip
  1494. run_cmd ${ns_a} ip link del vti4_a
  1495. fail=0
  1496. min=68
  1497. max=$((65535 - 20))
  1498. # Check invalid values first
  1499. for v in $((min - 1)) $((max + 1)); do
  1500. run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
  1501. # This can fail, or MTU can be adjusted to a proper value
  1502. [ $? -ne 0 ] && continue
  1503. mtu="$(link_get_mtu "${ns_a}" vti4_a)"
  1504. if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
  1505. err " vti tunnel created with invalid MTU ${mtu}"
  1506. fail=1
  1507. fi
  1508. run_cmd ${ns_a} ip link del vti4_a
  1509. done
  1510. # Now check valid values
  1511. for v in ${min} 1300 ${max}; do
  1512. run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
  1513. mtu="$(link_get_mtu "${ns_a}" vti4_a)"
  1514. run_cmd ${ns_a} ip link del vti4_a
  1515. if [ "${mtu}" != "${v}" ]; then
  1516. err " vti MTU ${mtu} doesn't match configured value ${v}"
  1517. fail=1
  1518. fi
  1519. done
  1520. return ${fail}
  1521. }
  1522. test_pmtu_vti6_link_add_mtu() {
  1523. setup namespaces || return $ksft_skip
  1524. run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
  1525. [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip
  1526. run_cmd ${ns_a} ip link del vti6_a
  1527. fail=0
  1528. min=68 # vti6 can carry IPv4 packets too
  1529. max=$((65535 - 40))
  1530. # Check invalid values first
  1531. for v in $((min - 1)) $((max + 1)); do
  1532. run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
  1533. # This can fail, or MTU can be adjusted to a proper value
  1534. [ $? -ne 0 ] && continue
  1535. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  1536. if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
  1537. err " vti6 tunnel created with invalid MTU ${v}"
  1538. fail=1
  1539. fi
  1540. run_cmd ${ns_a} ip link del vti6_a
  1541. done
  1542. # Now check valid values
  1543. for v in 68 1280 1300 $((65535 - 40)); do
  1544. run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
  1545. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  1546. run_cmd ${ns_a} ip link del vti6_a
  1547. if [ "${mtu}" != "${v}" ]; then
  1548. err " vti6 MTU ${mtu} doesn't match configured value ${v}"
  1549. fail=1
  1550. fi
  1551. done
  1552. return ${fail}
  1553. }
  1554. test_pmtu_vti6_link_change_mtu() {
  1555. setup namespaces || return $ksft_skip
  1556. run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
  1557. [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip
  1558. run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
  1559. run_cmd ${ns_a} ip link set dummy0 up
  1560. run_cmd ${ns_a} ip link set dummy1 up
  1561. run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
  1562. run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1
  1563. fail=0
  1564. # Create vti6 interface bound to device, passing MTU, check it
  1565. run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
  1566. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  1567. if [ ${mtu} -ne 1300 ]; then
  1568. err " vti6 MTU ${mtu} doesn't match configured value 1300"
  1569. fail=1
  1570. fi
  1571. # Move to another device with different MTU, without passing MTU, check
  1572. # MTU is adjusted
  1573. run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
  1574. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  1575. if [ ${mtu} -ne $((3000 - 40)) ]; then
  1576. err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
  1577. fail=1
  1578. fi
  1579. # Move it back, passing MTU, check MTU is not overridden
  1580. run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
  1581. mtu="$(link_get_mtu "${ns_a}" vti6_a)"
  1582. if [ ${mtu} -ne 1280 ]; then
  1583. err " vti6 MTU ${mtu} doesn't match configured value 1280"
  1584. fail=1
  1585. fi
  1586. return ${fail}
  1587. }
  1588. check_command() {
  1589. cmd=${1}
  1590. if ! which ${cmd} > /dev/null 2>&1; then
  1591. err " missing required command: '${cmd}'"
  1592. return 1
  1593. fi
  1594. return 0
  1595. }
  1596. test_cleanup_vxlanX_exception() {
  1597. outer="${1}"
  1598. encap="vxlan"
  1599. ll_mtu=4000
  1600. check_command taskset || return $ksft_skip
  1601. cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
  1602. setup namespaces routing ${encap}${outer} || return $ksft_skip
  1603. trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
  1604. "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1605. "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
  1606. # Create route exception by exceeding link layer MTU
  1607. mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
  1608. mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
  1609. mtu "${ns_b}" veth_B-R1 ${ll_mtu}
  1610. mtu "${ns_r1}" veth_R1-B ${ll_mtu}
  1611. mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
  1612. mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
  1613. # Fill exception cache for multiple CPUs (2)
  1614. # we can always use inner IPv4 for that
  1615. for cpu in ${cpu_list}; do
  1616. run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
  1617. done
  1618. ${ns_a} ip link del dev veth_A-R1 &
  1619. iplink_pid=$!
  1620. sleep 1
  1621. if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
  1622. err " can't delete veth device in a timely manner, PMTU dst likely leaked"
  1623. return 1
  1624. fi
  1625. }
  1626. test_cleanup_ipv6_exception() {
  1627. test_cleanup_vxlanX_exception 6
  1628. }
  1629. test_cleanup_ipv4_exception() {
  1630. test_cleanup_vxlanX_exception 4
  1631. }
  1632. run_test() {
  1633. (
  1634. tname="$1"
  1635. tdesc="$2"
  1636. unset IFS
  1637. # Since cleanup() relies on variables modified by this subshell, it
  1638. # has to run in this context.
  1639. trap cleanup EXIT
  1640. if [ "$VERBOSE" = "1" ]; then
  1641. printf "\n##########################################################################\n\n"
  1642. fi
  1643. eval test_${tname}
  1644. ret=$?
  1645. if [ $ret -eq 0 ]; then
  1646. printf "TEST: %-60s [ OK ]\n" "${tdesc}"
  1647. elif [ $ret -eq 1 ]; then
  1648. printf "TEST: %-60s [FAIL]\n" "${tdesc}"
  1649. if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
  1650. echo
  1651. echo "Pausing. Hit enter to continue"
  1652. read a
  1653. fi
  1654. err_flush
  1655. exit 1
  1656. elif [ $ret -eq $ksft_skip ]; then
  1657. printf "TEST: %-60s [SKIP]\n" "${tdesc}"
  1658. err_flush
  1659. fi
  1660. return $ret
  1661. )
  1662. ret=$?
  1663. case $ret in
  1664. 0)
  1665. all_skipped=false
  1666. [ $exitcode -eq $ksft_skip ] && exitcode=0
  1667. ;;
  1668. $ksft_skip)
  1669. [ $all_skipped = true ] && exitcode=$ksft_skip
  1670. ;;
  1671. *)
  1672. all_skipped=false
  1673. exitcode=1
  1674. ;;
  1675. esac
  1676. return $ret
  1677. }
  1678. run_test_nh() {
  1679. tname="$1"
  1680. tdesc="$2"
  1681. USE_NH=yes
  1682. run_test "${tname}" "${tdesc} - nexthop objects"
  1683. USE_NH=no
  1684. }
  1685. test_list_flush_ipv4_exception() {
  1686. setup namespaces routing || return $ksft_skip
  1687. trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1688. "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
  1689. "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
  1690. "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
  1691. dst_prefix1="${prefix4}.${b_r1}."
  1692. dst2="${prefix4}.${b_r2}.1"
  1693. # Set up initial MTU values
  1694. mtu "${ns_a}" veth_A-R1 2000
  1695. mtu "${ns_r1}" veth_R1-A 2000
  1696. mtu "${ns_r1}" veth_R1-B 1500
  1697. mtu "${ns_b}" veth_B-R1 1500
  1698. mtu "${ns_a}" veth_A-R2 2000
  1699. mtu "${ns_r2}" veth_R2-A 2000
  1700. mtu "${ns_r2}" veth_R2-B 1500
  1701. mtu "${ns_b}" veth_B-R2 1500
  1702. fail=0
  1703. # Add 100 addresses for veth endpoint on B reached by default A route
  1704. for i in $(seq 100 199); do
  1705. run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
  1706. done
  1707. # Create 100 cached route exceptions for path via R1, one via R2. Note
  1708. # that with IPv4 we need to actually cause a route lookup that matches
  1709. # the exception caused by ICMP, in order to actually have a cached
  1710. # route, so we need to ping each destination twice
  1711. for i in $(seq 100 199); do
  1712. run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
  1713. done
  1714. run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
  1715. if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
  1716. err " can't list cached exceptions"
  1717. fail=1
  1718. fi
  1719. run_cmd ${ns_a} ip route flush cache
  1720. pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
  1721. pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
  1722. if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
  1723. [ -n "$(${ns_a} ip route list cache)" ]; then
  1724. err " can't flush cached exceptions"
  1725. fail=1
  1726. fi
  1727. return ${fail}
  1728. }
  1729. test_list_flush_ipv6_exception() {
  1730. setup namespaces routing || return $ksft_skip
  1731. trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1732. "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
  1733. "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
  1734. "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
  1735. dst_prefix1="${prefix6}:${b_r1}::"
  1736. dst2="${prefix6}:${b_r2}::1"
  1737. # Set up initial MTU values
  1738. mtu "${ns_a}" veth_A-R1 2000
  1739. mtu "${ns_r1}" veth_R1-A 2000
  1740. mtu "${ns_r1}" veth_R1-B 1500
  1741. mtu "${ns_b}" veth_B-R1 1500
  1742. mtu "${ns_a}" veth_A-R2 2000
  1743. mtu "${ns_r2}" veth_R2-A 2000
  1744. mtu "${ns_r2}" veth_R2-B 1500
  1745. mtu "${ns_b}" veth_B-R2 1500
  1746. fail=0
  1747. # Add 100 addresses for veth endpoint on B reached by default A route
  1748. for i in $(seq 100 199); do
  1749. run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
  1750. done
  1751. # Create 100 cached route exceptions for path via R1, one via R2
  1752. for i in $(seq 100 199); do
  1753. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
  1754. done
  1755. run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
  1756. if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
  1757. err " can't list cached exceptions"
  1758. fail=1
  1759. fi
  1760. run_cmd ${ns_a} ip -6 route flush cache
  1761. pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
  1762. pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  1763. if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
  1764. [ -n "$(${ns_a} ip -6 route list cache)" ]; then
  1765. err " can't flush cached exceptions"
  1766. fail=1
  1767. fi
  1768. return ${fail}
  1769. }
  1770. test_pmtu_ipvX_route_change() {
  1771. family=${1}
  1772. setup namespaces routing || return 2
  1773. trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
  1774. "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
  1775. "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
  1776. "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
  1777. if [ ${family} -eq 4 ]; then
  1778. ping=ping
  1779. dst1="${prefix4}.${b_r1}.1"
  1780. dst2="${prefix4}.${b_r2}.1"
  1781. gw="${prefix4}.${a_r1}.2"
  1782. else
  1783. ping=${ping6}
  1784. dst1="${prefix6}:${b_r1}::1"
  1785. dst2="${prefix6}:${b_r2}::1"
  1786. gw="${prefix6}:${a_r1}::2"
  1787. fi
  1788. # Set up initial MTU values
  1789. mtu "${ns_a}" veth_A-R1 2000
  1790. mtu "${ns_r1}" veth_R1-A 2000
  1791. mtu "${ns_r1}" veth_R1-B 1400
  1792. mtu "${ns_b}" veth_B-R1 1400
  1793. mtu "${ns_a}" veth_A-R2 2000
  1794. mtu "${ns_r2}" veth_R2-A 2000
  1795. mtu "${ns_r2}" veth_R2-B 1500
  1796. mtu "${ns_b}" veth_B-R2 1500
  1797. # Create route exceptions
  1798. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
  1799. run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
  1800. # Check that exceptions have been created with the correct PMTU
  1801. pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
  1802. check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
  1803. pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
  1804. check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
  1805. # Replace the route from A to R1
  1806. run_cmd ${ns_a} ip route change default via ${gw}
  1807. # Delete the device in A
  1808. run_cmd ${ns_a} ip link del "veth_A-R1"
  1809. }
  1810. test_pmtu_ipv4_route_change() {
  1811. test_pmtu_ipvX_route_change 4
  1812. }
  1813. test_pmtu_ipv6_route_change() {
  1814. test_pmtu_ipvX_route_change 6
  1815. }
  1816. usage() {
  1817. echo
  1818. echo "$0 [OPTIONS] [TEST]..."
  1819. echo "If no TEST argument is given, all tests will be run."
  1820. echo
  1821. echo "Options"
  1822. echo " --trace: capture traffic to TEST_INTERFACE.pcap"
  1823. echo
  1824. echo "Available tests${tests}"
  1825. exit 1
  1826. }
  1827. ################################################################################
  1828. #
  1829. exitcode=0
  1830. desc=0
  1831. all_skipped=true
  1832. while getopts :ptv o
  1833. do
  1834. case $o in
  1835. p) PAUSE_ON_FAIL=yes;;
  1836. v) VERBOSE=1;;
  1837. t) if which tcpdump > /dev/null 2>&1; then
  1838. TRACING=1
  1839. else
  1840. echo "=== tcpdump not available, tracing disabled"
  1841. fi
  1842. ;;
  1843. *) usage;;
  1844. esac
  1845. done
  1846. shift $(($OPTIND-1))
  1847. IFS="
  1848. "
  1849. for arg do
  1850. # Check first that all requested tests are available before running any
  1851. command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
  1852. done
  1853. trap cleanup EXIT
  1854. # start clean
  1855. cleanup
  1856. HAVE_NH=no
  1857. ip nexthop ls >/dev/null 2>&1
  1858. [ $? -eq 0 ] && HAVE_NH=yes
  1859. name=""
  1860. desc=""
  1861. rerun_nh=0
  1862. for t in ${tests}; do
  1863. [ "${name}" = "" ] && name="${t}" && continue
  1864. [ "${desc}" = "" ] && desc="${t}" && continue
  1865. if [ "${HAVE_NH}" = "yes" ]; then
  1866. rerun_nh="${t}"
  1867. fi
  1868. run_this=1
  1869. for arg do
  1870. [ "${arg}" != "${arg#--*}" ] && continue
  1871. [ "${arg}" = "${name}" ] && run_this=1 && break
  1872. run_this=0
  1873. done
  1874. if [ $run_this -eq 1 ]; then
  1875. run_test "${name}" "${desc}"
  1876. # if test was skipped no need to retry with nexthop objects
  1877. [ $? -eq $ksft_skip ] && rerun_nh=0
  1878. if [ "${rerun_nh}" = "1" ]; then
  1879. run_test_nh "${name}" "${desc}"
  1880. fi
  1881. fi
  1882. name=""
  1883. desc=""
  1884. rerun_nh=0
  1885. done
  1886. exit ${exitcode}