do_hbm_test.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. #!/bin/bash
  2. # SPDX-License-Identifier: GPL-2.0
  3. #
  4. # Copyright (c) 2019 Facebook
  5. #
  6. # This program is free software; you can redistribute it and/or
  7. # modify it under the terms of version 2 of the GNU General Public
  8. # License as published by the Free Software Foundation.
  9. Usage() {
  10. echo "Script for testing HBM (Host Bandwidth Manager) framework."
  11. echo "It creates a cgroup to use for testing and load a BPF program to limit"
  12. echo "egress or ingress bandwidth. It then uses iperf3 or netperf to create"
  13. echo "loads. The output is the goodput in Mbps (unless -D was used)."
  14. echo ""
  15. echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
  16. echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
  17. echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
  18. echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
  19. echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
  20. echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
  21. echo " Where:"
  22. echo " out egress (default)"
  23. echo " -b or --bpf BPF program filename to load and attach."
  24. echo " Default is hbm_out_kern.o for egress,"
  25. echo " -c or -cc TCP congestion control (cubic or dctcp)"
  26. echo " --debug print BPF trace buffer"
  27. echo " -d or --delay add a delay in ms using netem"
  28. echo " -D In addition to the goodput in Mbps, it also outputs"
  29. echo " other detailed information. This information is"
  30. echo " test dependent (i.e. iperf3 or netperf)."
  31. echo " -E enable ECN (not required for dctcp)"
  32. echo " --edt use fq's Earliest Departure Time (requires fq)"
  33. echo " -f or --flows number of concurrent flows (default=1)"
  34. echo " -i or --id cgroup id (an integer, default is 1)"
  35. echo " -N use netperf instead of iperf3"
  36. echo " --no_cn Do not return CN notifications"
  37. echo " -l do not limit flows using loopback"
  38. echo " -h Help"
  39. echo " -p or --port iperf3 port (default is 5201)"
  40. echo " -P use an iperf3 instance for each flow"
  41. echo " -q use the specified qdisc"
  42. echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
  43. echo " -R Use TCP_RR for netperf. 1st flow has req"
  44. echo " size of 10KB, rest of 1MB. Reply in all"
  45. echo " cases is 1 byte."
  46. echo " More detailed output for each flow can be found"
  47. echo " in the files netperf.<cg>.<flow>, where <cg> is the"
  48. echo " cgroup id as specified with the -i flag, and <flow>"
  49. echo " is the flow id starting at 1 and increasing by 1 for"
  50. echo " flow (as specified by -f)."
  51. echo " -s or --server hostname of netperf server. Used to create netperf"
  52. echo " test traffic between to hosts (default is within host)"
  53. echo " netserver must be running on the host."
  54. echo " -S or --stats whether to update hbm stats (default is yes)."
  55. echo " -t or --time duration of iperf3 in seconds (default=5)"
  56. echo " -w Work conserving flag. cgroup can increase its"
  57. echo " bandwidth beyond the rate limit specified"
  58. echo " while there is available bandwidth. Current"
  59. echo " implementation assumes there is only one NIC"
  60. echo " (eth0), but can be extended to support multiple"
  61. echo " NICs."
  62. echo " cubic or dctcp specify which TCP CC to use"
  63. echo " "
  64. exit
  65. }
  66. #set -x
  67. debug_flag=0
  68. args="$@"
  69. name="$0"
  70. netem=0
  71. cc=x
  72. dir="-o"
  73. dir_name="out"
  74. dur=5
  75. flows=1
  76. id=1
  77. prog=""
  78. port=5201
  79. rate=1000
  80. multi_iperf=0
  81. flow_cnt=1
  82. use_netperf=0
  83. rr=0
  84. ecn=0
  85. details=0
  86. server=""
  87. qdisc=""
  88. flags=""
  89. do_stats=0
  90. BPFFS=/sys/fs/bpf
  91. function config_bpffs () {
  92. if mount | grep $BPFFS > /dev/null; then
  93. echo "bpffs already mounted"
  94. else
  95. echo "bpffs not mounted. Mounting..."
  96. mount -t bpf none $BPFFS
  97. fi
  98. }
  99. function start_hbm () {
  100. rm -f hbm.out
  101. echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
  102. echo " " >> hbm.out
  103. ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
  104. echo $!
  105. }
  106. processArgs () {
  107. for i in $args ; do
  108. case $i in
  109. # Support for upcomming ingress rate limiting
  110. #in) # support for upcoming ingress rate limiting
  111. # dir="-i"
  112. # dir_name="in"
  113. # ;;
  114. out)
  115. dir="-o"
  116. dir_name="out"
  117. ;;
  118. -b=*|--bpf=*)
  119. prog="${i#*=}"
  120. ;;
  121. -c=*|--cc=*)
  122. cc="${i#*=}"
  123. ;;
  124. --no_cn)
  125. flags="$flags --no_cn"
  126. ;;
  127. --debug)
  128. flags="$flags -d"
  129. debug_flag=1
  130. ;;
  131. -d=*|--delay=*)
  132. netem="${i#*=}"
  133. ;;
  134. -D)
  135. details=1
  136. ;;
  137. -E)
  138. ecn=1
  139. ;;
  140. --edt)
  141. flags="$flags --edt"
  142. qdisc="fq"
  143. ;;
  144. -f=*|--flows=*)
  145. flows="${i#*=}"
  146. ;;
  147. -i=*|--id=*)
  148. id="${i#*=}"
  149. ;;
  150. -l)
  151. flags="$flags -l"
  152. ;;
  153. -N)
  154. use_netperf=1
  155. ;;
  156. -p=*|--port=*)
  157. port="${i#*=}"
  158. ;;
  159. -P)
  160. multi_iperf=1
  161. ;;
  162. -q=*)
  163. qdisc="${i#*=}"
  164. ;;
  165. -r=*|--rate=*)
  166. rate="${i#*=}"
  167. ;;
  168. -R)
  169. rr=1
  170. ;;
  171. -s=*|--server=*)
  172. server="${i#*=}"
  173. ;;
  174. -S|--stats)
  175. flags="$flags -s"
  176. do_stats=1
  177. ;;
  178. -t=*|--time=*)
  179. dur="${i#*=}"
  180. ;;
  181. -w)
  182. flags="$flags -w"
  183. ;;
  184. cubic)
  185. cc=cubic
  186. ;;
  187. dctcp)
  188. cc=dctcp
  189. ;;
  190. *)
  191. echo "Unknown arg:$i"
  192. Usage
  193. ;;
  194. esac
  195. done
  196. }
  197. processArgs
  198. config_bpffs
  199. if [ $debug_flag -eq 1 ] ; then
  200. rm -f hbm_out.log
  201. fi
  202. hbm_pid=$(start_hbm)
  203. usleep 100000
  204. host=`hostname`
  205. cg_base_dir=/sys/fs/cgroup/unified
  206. cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
  207. echo $$ >> $cg_dir/cgroup.procs
  208. ulimit -l unlimited
  209. rm -f ss.out
  210. rm -f hbm.[0-9]*.$dir_name
  211. if [ $ecn -ne 0 ] ; then
  212. sysctl -w -q -n net.ipv4.tcp_ecn=1
  213. fi
  214. if [ $use_netperf -eq 0 ] ; then
  215. cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
  216. if [ "$cc" != "x" ] ; then
  217. sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
  218. fi
  219. fi
  220. if [ "$netem" -ne "0" ] ; then
  221. if [ "$qdisc" != "" ] ; then
  222. echo "WARNING: Ignoring -q options because -d option used"
  223. fi
  224. tc qdisc del dev lo root > /dev/null 2>&1
  225. tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
  226. elif [ "$qdisc" != "" ] ; then
  227. tc qdisc del dev eth0 root > /dev/null 2>&1
  228. tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
  229. fi
  230. n=0
  231. m=$[$dur * 5]
  232. hn="::1"
  233. if [ $use_netperf -ne 0 ] ; then
  234. if [ "$server" != "" ] ; then
  235. hn=$server
  236. fi
  237. fi
  238. ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
  239. if [ $use_netperf -ne 0 ] ; then
  240. begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
  241. awk '{ print $1 }'`
  242. if [ "$begNetserverPid" == "" ] ; then
  243. if [ "$server" == "" ] ; then
  244. ( ./netserver > /dev/null 2>&1) &
  245. usleep 100000
  246. fi
  247. fi
  248. flow_cnt=1
  249. if [ "$server" == "" ] ; then
  250. np_server=$host
  251. else
  252. np_server=$server
  253. fi
  254. if [ "$cc" == "x" ] ; then
  255. np_cc=""
  256. else
  257. np_cc="-K $cc,$cc"
  258. fi
  259. replySize=1
  260. while [ $flow_cnt -le $flows ] ; do
  261. if [ $rr -ne 0 ] ; then
  262. reqSize=1M
  263. if [ $flow_cnt -eq 1 ] ; then
  264. reqSize=10K
  265. fi
  266. if [ "$dir" == "-i" ] ; then
  267. replySize=$reqSize
  268. reqSize=1
  269. fi
  270. ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
  271. else
  272. if [ "$dir" == "-i" ] ; then
  273. ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
  274. else
  275. ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
  276. fi
  277. fi
  278. flow_cnt=$[flow_cnt+1]
  279. done
  280. # sleep for duration of test (plus some buffer)
  281. n=$[dur+2]
  282. sleep $n
  283. # force graceful termination of netperf
  284. pids=`pgrep netperf`
  285. for p in $pids ; do
  286. kill -SIGALRM $p
  287. done
  288. flow_cnt=1
  289. rate=0
  290. if [ $details -ne 0 ] ; then
  291. echo ""
  292. echo "Details for HBM in cgroup $id"
  293. if [ $do_stats -eq 1 ] ; then
  294. if [ -e hbm.$id.$dir_name ] ; then
  295. cat hbm.$id.$dir_name
  296. fi
  297. fi
  298. fi
  299. while [ $flow_cnt -le $flows ] ; do
  300. if [ "$dir" == "-i" ] ; then
  301. r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
  302. else
  303. r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
  304. fi
  305. echo "rate for flow $flow_cnt: $r"
  306. rate=$[rate+r]
  307. if [ $details -ne 0 ] ; then
  308. echo "-----"
  309. echo "Details for cgroup $id, flow $flow_cnt"
  310. cat netperf.$id.$flow_cnt
  311. fi
  312. flow_cnt=$[flow_cnt+1]
  313. done
  314. if [ $details -ne 0 ] ; then
  315. echo ""
  316. delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
  317. echo "PING AVG DELAY:$delay"
  318. echo "AGGREGATE_GOODPUT:$rate"
  319. else
  320. echo $rate
  321. fi
  322. elif [ $multi_iperf -eq 0 ] ; then
  323. (iperf3 -s -p $port -1 > /dev/null 2>&1) &
  324. usleep 100000
  325. iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
  326. rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
  327. rate=`echo $rates | grep -o "[0-9]*$"`
  328. if [ $details -ne 0 ] ; then
  329. echo ""
  330. echo "Details for HBM in cgroup $id"
  331. if [ $do_stats -eq 1 ] ; then
  332. if [ -e hbm.$id.$dir_name ] ; then
  333. cat hbm.$id.$dir_name
  334. fi
  335. fi
  336. delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
  337. echo "PING AVG DELAY:$delay"
  338. echo "AGGREGATE_GOODPUT:$rate"
  339. else
  340. echo $rate
  341. fi
  342. else
  343. flow_cnt=1
  344. while [ $flow_cnt -le $flows ] ; do
  345. (iperf3 -s -p $port -1 > /dev/null 2>&1) &
  346. ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
  347. port=$[port+1]
  348. flow_cnt=$[flow_cnt+1]
  349. done
  350. n=$[dur+1]
  351. sleep $n
  352. flow_cnt=1
  353. rate=0
  354. if [ $details -ne 0 ] ; then
  355. echo ""
  356. echo "Details for HBM in cgroup $id"
  357. if [ $do_stats -eq 1 ] ; then
  358. if [ -e hbm.$id.$dir_name ] ; then
  359. cat hbm.$id.$dir_name
  360. fi
  361. fi
  362. fi
  363. while [ $flow_cnt -le $flows ] ; do
  364. r=`cat iperf3.$id.$flow_cnt`
  365. # echo "rate for flow $flow_cnt: $r"
  366. if [ $details -ne 0 ] ; then
  367. echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
  368. fi
  369. rate=$[rate+r]
  370. flow_cnt=$[flow_cnt+1]
  371. done
  372. if [ $details -ne 0 ] ; then
  373. delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
  374. echo "PING AVG DELAY:$delay"
  375. echo "AGGREGATE_GOODPUT:$rate"
  376. else
  377. echo $rate
  378. fi
  379. fi
  380. if [ $use_netperf -eq 0 ] ; then
  381. sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
  382. fi
  383. if [ $ecn -ne 0 ] ; then
  384. sysctl -w -q -n net.ipv4.tcp_ecn=0
  385. fi
  386. if [ "$netem" -ne "0" ] ; then
  387. tc qdisc del dev lo root > /dev/null 2>&1
  388. fi
  389. if [ "$qdisc" != "" ] ; then
  390. tc qdisc del dev eth0 root > /dev/null 2>&1
  391. fi
  392. sleep 2
  393. hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
  394. if [ "$hbmPid" == "$hbm_pid" ] ; then
  395. kill $hbm_pid
  396. fi
  397. sleep 1
  398. # Detach any pinned BPF programs that may have lingered
  399. rm -rf $BPFFS/hbm*
  400. if [ $use_netperf -ne 0 ] ; then
  401. if [ "$server" == "" ] ; then
  402. if [ "$begNetserverPid" == "" ] ; then
  403. netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
  404. if [ "$netserverPid" != "" ] ; then
  405. kill $netserverPid
  406. fi
  407. fi
  408. fi
  409. fi
  410. exit