msg_zerocopy.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811
  1. /* Evaluate MSG_ZEROCOPY
  2. *
  3. * Send traffic between two processes over one of the supported
  4. * protocols and modes:
  5. *
  6. * PF_INET/PF_INET6
  7. * - SOCK_STREAM
  8. * - SOCK_DGRAM
  9. * - SOCK_DGRAM with UDP_CORK
  10. * - SOCK_RAW
  11. * - SOCK_RAW with IP_HDRINCL
  12. *
  13. * PF_PACKET
  14. * - SOCK_DGRAM
  15. * - SOCK_RAW
  16. *
  17. * PF_RDS
  18. * - SOCK_SEQPACKET
  19. *
  20. * Start this program on two connected hosts, one in send mode and
  21. * the other with option '-r' to put it in receiver mode.
  22. *
  23. * If zerocopy mode ('-z') is enabled, the sender will verify that
  24. * the kernel queues completions on the error queue for all zerocopy
  25. * transfers.
  26. */
  27. #define _GNU_SOURCE
  28. #include <arpa/inet.h>
  29. #include <error.h>
  30. #include <errno.h>
  31. #include <limits.h>
  32. #include <linux/errqueue.h>
  33. #include <linux/if_packet.h>
  34. #include <linux/ipv6.h>
  35. #include <linux/socket.h>
  36. #include <linux/sockios.h>
  37. #include <net/ethernet.h>
  38. #include <net/if.h>
  39. #include <netinet/ip.h>
  40. #include <netinet/ip6.h>
  41. #include <netinet/tcp.h>
  42. #include <netinet/udp.h>
  43. #include <poll.h>
  44. #include <sched.h>
  45. #include <stdbool.h>
  46. #include <stdio.h>
  47. #include <stdint.h>
  48. #include <stdlib.h>
  49. #include <string.h>
  50. #include <sys/ioctl.h>
  51. #include <sys/socket.h>
  52. #include <sys/stat.h>
  53. #include <sys/time.h>
  54. #include <sys/types.h>
  55. #include <sys/wait.h>
  56. #include <unistd.h>
  57. #include <linux/rds.h>
  58. #ifndef SO_EE_ORIGIN_ZEROCOPY
  59. #define SO_EE_ORIGIN_ZEROCOPY 5
  60. #endif
  61. #ifndef SO_ZEROCOPY
  62. #define SO_ZEROCOPY 60
  63. #endif
  64. #ifndef SO_EE_CODE_ZEROCOPY_COPIED
  65. #define SO_EE_CODE_ZEROCOPY_COPIED 1
  66. #endif
  67. #ifndef MSG_ZEROCOPY
  68. #define MSG_ZEROCOPY 0x4000000
  69. #endif
  70. static int cfg_cork;
  71. static bool cfg_cork_mixed;
  72. static int cfg_cpu = -1; /* default: pin to last cpu */
  73. static int cfg_family = PF_UNSPEC;
  74. static int cfg_ifindex = 1;
  75. static int cfg_payload_len;
  76. static int cfg_port = 8000;
  77. static bool cfg_rx;
  78. static int cfg_runtime_ms = 4200;
  79. static int cfg_verbose;
  80. static int cfg_waittime_ms = 500;
  81. static bool cfg_zerocopy;
  82. static socklen_t cfg_alen;
  83. static struct sockaddr_storage cfg_dst_addr;
  84. static struct sockaddr_storage cfg_src_addr;
  85. static char payload[IP_MAXPACKET];
  86. static long packets, bytes, completions, expected_completions;
  87. static int zerocopied = -1;
  88. static uint32_t next_completion;
  89. static unsigned long gettimeofday_ms(void)
  90. {
  91. struct timeval tv;
  92. gettimeofday(&tv, NULL);
  93. return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
  94. }
  95. static uint16_t get_ip_csum(const uint16_t *start, int num_words)
  96. {
  97. unsigned long sum = 0;
  98. int i;
  99. for (i = 0; i < num_words; i++)
  100. sum += start[i];
  101. while (sum >> 16)
  102. sum = (sum & 0xFFFF) + (sum >> 16);
  103. return ~sum;
  104. }
  105. static int do_setcpu(int cpu)
  106. {
  107. cpu_set_t mask;
  108. CPU_ZERO(&mask);
  109. CPU_SET(cpu, &mask);
  110. if (sched_setaffinity(0, sizeof(mask), &mask))
  111. fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
  112. else if (cfg_verbose)
  113. fprintf(stderr, "cpu: %u\n", cpu);
  114. return 0;
  115. }
  116. static void do_setsockopt(int fd, int level, int optname, int val)
  117. {
  118. if (setsockopt(fd, level, optname, &val, sizeof(val)))
  119. error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
  120. }
  121. static int do_poll(int fd, int events)
  122. {
  123. struct pollfd pfd;
  124. int ret;
  125. pfd.events = events;
  126. pfd.revents = 0;
  127. pfd.fd = fd;
  128. ret = poll(&pfd, 1, cfg_waittime_ms);
  129. if (ret == -1)
  130. error(1, errno, "poll");
  131. return ret && (pfd.revents & events);
  132. }
  133. static int do_accept(int fd)
  134. {
  135. int fda = fd;
  136. fd = accept(fda, NULL, NULL);
  137. if (fd == -1)
  138. error(1, errno, "accept");
  139. if (close(fda))
  140. error(1, errno, "close listen sock");
  141. return fd;
  142. }
  143. static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
  144. {
  145. struct cmsghdr *cm;
  146. if (!msg->msg_control)
  147. error(1, errno, "NULL cookie");
  148. cm = (void *)msg->msg_control;
  149. cm->cmsg_len = CMSG_LEN(sizeof(cookie));
  150. cm->cmsg_level = SOL_RDS;
  151. cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
  152. memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
  153. }
  154. static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
  155. {
  156. int ret, len, i, flags;
  157. static uint32_t cookie;
  158. char ckbuf[CMSG_SPACE(sizeof(cookie))];
  159. len = 0;
  160. for (i = 0; i < msg->msg_iovlen; i++)
  161. len += msg->msg_iov[i].iov_len;
  162. flags = MSG_DONTWAIT;
  163. if (do_zerocopy) {
  164. flags |= MSG_ZEROCOPY;
  165. if (domain == PF_RDS) {
  166. memset(&msg->msg_control, 0, sizeof(msg->msg_control));
  167. msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
  168. msg->msg_control = (struct cmsghdr *)ckbuf;
  169. add_zcopy_cookie(msg, ++cookie);
  170. }
  171. }
  172. ret = sendmsg(fd, msg, flags);
  173. if (ret == -1 && errno == EAGAIN)
  174. return false;
  175. if (ret == -1)
  176. error(1, errno, "send");
  177. if (cfg_verbose && ret != len)
  178. fprintf(stderr, "send: ret=%u != %u\n", ret, len);
  179. if (len) {
  180. packets++;
  181. bytes += ret;
  182. if (do_zerocopy && ret)
  183. expected_completions++;
  184. }
  185. if (do_zerocopy && domain == PF_RDS) {
  186. msg->msg_control = NULL;
  187. msg->msg_controllen = 0;
  188. }
  189. return true;
  190. }
  191. static void do_sendmsg_corked(int fd, struct msghdr *msg)
  192. {
  193. bool do_zerocopy = cfg_zerocopy;
  194. int i, payload_len, extra_len;
  195. /* split up the packet. for non-multiple, make first buffer longer */
  196. payload_len = cfg_payload_len / cfg_cork;
  197. extra_len = cfg_payload_len - (cfg_cork * payload_len);
  198. do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
  199. for (i = 0; i < cfg_cork; i++) {
  200. /* in mixed-frags mode, alternate zerocopy and copy frags
  201. * start with non-zerocopy, to ensure attach later works
  202. */
  203. if (cfg_cork_mixed)
  204. do_zerocopy = (i & 1);
  205. msg->msg_iov[0].iov_len = payload_len + extra_len;
  206. extra_len = 0;
  207. do_sendmsg(fd, msg, do_zerocopy,
  208. (cfg_dst_addr.ss_family == AF_INET ?
  209. PF_INET : PF_INET6));
  210. }
  211. do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
  212. }
  213. static int setup_iph(struct iphdr *iph, uint16_t payload_len)
  214. {
  215. struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
  216. struct sockaddr_in *saddr = (void *) &cfg_src_addr;
  217. memset(iph, 0, sizeof(*iph));
  218. iph->version = 4;
  219. iph->tos = 0;
  220. iph->ihl = 5;
  221. iph->ttl = 2;
  222. iph->saddr = saddr->sin_addr.s_addr;
  223. iph->daddr = daddr->sin_addr.s_addr;
  224. iph->protocol = IPPROTO_EGP;
  225. iph->tot_len = htons(sizeof(*iph) + payload_len);
  226. iph->check = get_ip_csum((void *) iph, iph->ihl << 1);
  227. return sizeof(*iph);
  228. }
  229. static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
  230. {
  231. struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
  232. struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
  233. memset(ip6h, 0, sizeof(*ip6h));
  234. ip6h->version = 6;
  235. ip6h->payload_len = htons(payload_len);
  236. ip6h->nexthdr = IPPROTO_EGP;
  237. ip6h->hop_limit = 2;
  238. ip6h->saddr = saddr->sin6_addr;
  239. ip6h->daddr = daddr->sin6_addr;
  240. return sizeof(*ip6h);
  241. }
  242. static void setup_sockaddr(int domain, const char *str_addr,
  243. struct sockaddr_storage *sockaddr)
  244. {
  245. struct sockaddr_in6 *addr6 = (void *) sockaddr;
  246. struct sockaddr_in *addr4 = (void *) sockaddr;
  247. switch (domain) {
  248. case PF_INET:
  249. memset(addr4, 0, sizeof(*addr4));
  250. addr4->sin_family = AF_INET;
  251. addr4->sin_port = htons(cfg_port);
  252. if (str_addr &&
  253. inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
  254. error(1, 0, "ipv4 parse error: %s", str_addr);
  255. break;
  256. case PF_INET6:
  257. memset(addr6, 0, sizeof(*addr6));
  258. addr6->sin6_family = AF_INET6;
  259. addr6->sin6_port = htons(cfg_port);
  260. if (str_addr &&
  261. inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
  262. error(1, 0, "ipv6 parse error: %s", str_addr);
  263. break;
  264. default:
  265. error(1, 0, "illegal domain");
  266. }
  267. }
  268. static int do_setup_tx(int domain, int type, int protocol)
  269. {
  270. int fd;
  271. fd = socket(domain, type, protocol);
  272. if (fd == -1)
  273. error(1, errno, "socket t");
  274. do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
  275. if (cfg_zerocopy)
  276. do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
  277. if (domain != PF_PACKET && domain != PF_RDS)
  278. if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
  279. error(1, errno, "connect");
  280. if (domain == PF_RDS) {
  281. if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
  282. error(1, errno, "bind");
  283. }
  284. return fd;
  285. }
  286. static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
  287. {
  288. int i;
  289. if (ck->num > RDS_MAX_ZCOOKIES)
  290. error(1, 0, "Returned %d cookies, max expected %d\n",
  291. ck->num, RDS_MAX_ZCOOKIES);
  292. for (i = 0; i < ck->num; i++)
  293. if (cfg_verbose >= 2)
  294. fprintf(stderr, "%d\n", ck->cookies[i]);
  295. return ck->num;
  296. }
  297. static bool do_recvmsg_completion(int fd)
  298. {
  299. char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
  300. struct rds_zcopy_cookies *ck;
  301. struct cmsghdr *cmsg;
  302. struct msghdr msg;
  303. bool ret = false;
  304. memset(&msg, 0, sizeof(msg));
  305. msg.msg_control = cmsgbuf;
  306. msg.msg_controllen = sizeof(cmsgbuf);
  307. if (recvmsg(fd, &msg, MSG_DONTWAIT))
  308. return ret;
  309. if (msg.msg_flags & MSG_CTRUNC)
  310. error(1, errno, "recvmsg notification: truncated");
  311. for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
  312. if (cmsg->cmsg_level == SOL_RDS &&
  313. cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
  314. ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
  315. completions += do_process_zerocopy_cookies(ck);
  316. ret = true;
  317. break;
  318. }
  319. error(0, 0, "ignoring cmsg at level %d type %d\n",
  320. cmsg->cmsg_level, cmsg->cmsg_type);
  321. }
  322. return ret;
  323. }
  324. static bool do_recv_completion(int fd, int domain)
  325. {
  326. struct sock_extended_err *serr;
  327. struct msghdr msg = {};
  328. struct cmsghdr *cm;
  329. uint32_t hi, lo, range;
  330. int ret, zerocopy;
  331. char control[100];
  332. if (domain == PF_RDS)
  333. return do_recvmsg_completion(fd);
  334. msg.msg_control = control;
  335. msg.msg_controllen = sizeof(control);
  336. ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
  337. if (ret == -1 && errno == EAGAIN)
  338. return false;
  339. if (ret == -1)
  340. error(1, errno, "recvmsg notification");
  341. if (msg.msg_flags & MSG_CTRUNC)
  342. error(1, errno, "recvmsg notification: truncated");
  343. cm = CMSG_FIRSTHDR(&msg);
  344. if (!cm)
  345. error(1, 0, "cmsg: no cmsg");
  346. if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
  347. (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
  348. (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
  349. error(1, 0, "serr: wrong type: %d.%d",
  350. cm->cmsg_level, cm->cmsg_type);
  351. serr = (void *) CMSG_DATA(cm);
  352. if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
  353. error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
  354. if (serr->ee_errno != 0)
  355. error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
  356. hi = serr->ee_data;
  357. lo = serr->ee_info;
  358. range = hi - lo + 1;
  359. /* Detect notification gaps. These should not happen often, if at all.
  360. * Gaps can occur due to drops, reordering and retransmissions.
  361. */
  362. if (lo != next_completion)
  363. fprintf(stderr, "gap: %u..%u does not append to %u\n",
  364. lo, hi, next_completion);
  365. next_completion = hi + 1;
  366. zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
  367. if (zerocopied == -1)
  368. zerocopied = zerocopy;
  369. else if (zerocopied != zerocopy) {
  370. fprintf(stderr, "serr: inconsistent\n");
  371. zerocopied = zerocopy;
  372. }
  373. if (cfg_verbose >= 2)
  374. fprintf(stderr, "completed: %u (h=%u l=%u)\n",
  375. range, hi, lo);
  376. completions += range;
  377. return true;
  378. }
  379. /* Read all outstanding messages on the errqueue */
  380. static void do_recv_completions(int fd, int domain)
  381. {
  382. while (do_recv_completion(fd, domain)) {}
  383. }
  384. /* Wait for all remaining completions on the errqueue */
  385. static void do_recv_remaining_completions(int fd, int domain)
  386. {
  387. int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
  388. while (completions < expected_completions &&
  389. gettimeofday_ms() < tstop) {
  390. if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
  391. do_recv_completions(fd, domain);
  392. }
  393. if (completions < expected_completions)
  394. fprintf(stderr, "missing notifications: %lu < %lu\n",
  395. completions, expected_completions);
  396. }
  397. static void do_tx(int domain, int type, int protocol)
  398. {
  399. struct iovec iov[3] = { {0} };
  400. struct sockaddr_ll laddr;
  401. struct msghdr msg = {0};
  402. struct ethhdr eth;
  403. union {
  404. struct ipv6hdr ip6h;
  405. struct iphdr iph;
  406. } nh;
  407. uint64_t tstop;
  408. int fd;
  409. fd = do_setup_tx(domain, type, protocol);
  410. if (domain == PF_PACKET) {
  411. uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
  412. /* sock_raw passes ll header as data */
  413. if (type == SOCK_RAW) {
  414. memset(eth.h_dest, 0x06, ETH_ALEN);
  415. memset(eth.h_source, 0x02, ETH_ALEN);
  416. eth.h_proto = htons(proto);
  417. iov[0].iov_base = &eth;
  418. iov[0].iov_len = sizeof(eth);
  419. msg.msg_iovlen++;
  420. }
  421. /* both sock_raw and sock_dgram expect name */
  422. memset(&laddr, 0, sizeof(laddr));
  423. laddr.sll_family = AF_PACKET;
  424. laddr.sll_ifindex = cfg_ifindex;
  425. laddr.sll_protocol = htons(proto);
  426. laddr.sll_halen = ETH_ALEN;
  427. memset(laddr.sll_addr, 0x06, ETH_ALEN);
  428. msg.msg_name = &laddr;
  429. msg.msg_namelen = sizeof(laddr);
  430. }
  431. /* packet and raw sockets with hdrincl must pass network header */
  432. if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
  433. if (cfg_family == PF_INET)
  434. iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
  435. else
  436. iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
  437. iov[1].iov_base = (void *) &nh;
  438. msg.msg_iovlen++;
  439. }
  440. if (domain == PF_RDS) {
  441. msg.msg_name = &cfg_dst_addr;
  442. msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ?
  443. sizeof(struct sockaddr_in) :
  444. sizeof(struct sockaddr_in6));
  445. }
  446. iov[2].iov_base = payload;
  447. iov[2].iov_len = cfg_payload_len;
  448. msg.msg_iovlen++;
  449. msg.msg_iov = &iov[3 - msg.msg_iovlen];
  450. tstop = gettimeofday_ms() + cfg_runtime_ms;
  451. do {
  452. if (cfg_cork)
  453. do_sendmsg_corked(fd, &msg);
  454. else
  455. do_sendmsg(fd, &msg, cfg_zerocopy, domain);
  456. while (!do_poll(fd, POLLOUT)) {
  457. if (cfg_zerocopy)
  458. do_recv_completions(fd, domain);
  459. }
  460. } while (gettimeofday_ms() < tstop);
  461. if (cfg_zerocopy)
  462. do_recv_remaining_completions(fd, domain);
  463. if (close(fd))
  464. error(1, errno, "close");
  465. fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
  466. packets, bytes >> 20, completions,
  467. zerocopied == 1 ? 'y' : 'n');
  468. }
  469. static int do_setup_rx(int domain, int type, int protocol)
  470. {
  471. int fd;
  472. /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
  473. * to recv the only copy of the packet, not a clone
  474. */
  475. if (domain == PF_PACKET)
  476. error(1, 0, "Use PF_INET/SOCK_RAW to read");
  477. if (type == SOCK_RAW && protocol == IPPROTO_RAW)
  478. error(1, 0, "IPPROTO_RAW: not supported on Rx");
  479. fd = socket(domain, type, protocol);
  480. if (fd == -1)
  481. error(1, errno, "socket r");
  482. do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
  483. do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
  484. do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
  485. if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
  486. error(1, errno, "bind");
  487. if (type == SOCK_STREAM) {
  488. if (listen(fd, 1))
  489. error(1, errno, "listen");
  490. fd = do_accept(fd);
  491. }
  492. return fd;
  493. }
  494. /* Flush all outstanding bytes for the tcp receive queue */
  495. static void do_flush_tcp(int fd)
  496. {
  497. int ret;
  498. /* MSG_TRUNC flushes up to len bytes */
  499. ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
  500. if (ret == -1 && errno == EAGAIN)
  501. return;
  502. if (ret == -1)
  503. error(1, errno, "flush");
  504. if (!ret)
  505. return;
  506. packets++;
  507. bytes += ret;
  508. }
  509. /* Flush all outstanding datagrams. Verify first few bytes of each. */
  510. static void do_flush_datagram(int fd, int type)
  511. {
  512. int ret, off = 0;
  513. char buf[64];
  514. /* MSG_TRUNC will return full datagram length */
  515. ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
  516. if (ret == -1 && errno == EAGAIN)
  517. return;
  518. /* raw ipv4 return with header, raw ipv6 without */
  519. if (cfg_family == PF_INET && type == SOCK_RAW) {
  520. off += sizeof(struct iphdr);
  521. ret -= sizeof(struct iphdr);
  522. }
  523. if (ret == -1)
  524. error(1, errno, "recv");
  525. if (ret != cfg_payload_len)
  526. error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
  527. if (ret > sizeof(buf) - off)
  528. ret = sizeof(buf) - off;
  529. if (memcmp(buf + off, payload, ret))
  530. error(1, 0, "recv: data mismatch");
  531. packets++;
  532. bytes += cfg_payload_len;
  533. }
  534. static void do_rx(int domain, int type, int protocol)
  535. {
  536. const int cfg_receiver_wait_ms = 400;
  537. uint64_t tstop;
  538. int fd;
  539. fd = do_setup_rx(domain, type, protocol);
  540. tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms;
  541. do {
  542. if (type == SOCK_STREAM)
  543. do_flush_tcp(fd);
  544. else
  545. do_flush_datagram(fd, type);
  546. do_poll(fd, POLLIN);
  547. } while (gettimeofday_ms() < tstop);
  548. if (close(fd))
  549. error(1, errno, "close");
  550. fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
  551. }
  552. static void do_test(int domain, int type, int protocol)
  553. {
  554. int i;
  555. if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
  556. error(1, 0, "can only cork udp sockets");
  557. do_setcpu(cfg_cpu);
  558. for (i = 0; i < IP_MAXPACKET; i++)
  559. payload[i] = 'a' + (i % 26);
  560. if (cfg_rx)
  561. do_rx(domain, type, protocol);
  562. else
  563. do_tx(domain, type, protocol);
  564. }
  565. static void usage(const char *filepath)
  566. {
  567. error(1, 0, "Usage: %s [options] <test>", filepath);
  568. }
  569. static void parse_opts(int argc, char **argv)
  570. {
  571. const int max_payload_len = sizeof(payload) -
  572. sizeof(struct ipv6hdr) -
  573. sizeof(struct tcphdr) -
  574. 40 /* max tcp options */;
  575. int c;
  576. char *daddr = NULL, *saddr = NULL;
  577. char *cfg_test;
  578. cfg_payload_len = max_payload_len;
  579. while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
  580. switch (c) {
  581. case '4':
  582. if (cfg_family != PF_UNSPEC)
  583. error(1, 0, "Pass one of -4 or -6");
  584. cfg_family = PF_INET;
  585. cfg_alen = sizeof(struct sockaddr_in);
  586. break;
  587. case '6':
  588. if (cfg_family != PF_UNSPEC)
  589. error(1, 0, "Pass one of -4 or -6");
  590. cfg_family = PF_INET6;
  591. cfg_alen = sizeof(struct sockaddr_in6);
  592. break;
  593. case 'c':
  594. cfg_cork = strtol(optarg, NULL, 0);
  595. break;
  596. case 'C':
  597. cfg_cpu = strtol(optarg, NULL, 0);
  598. break;
  599. case 'D':
  600. daddr = optarg;
  601. break;
  602. case 'i':
  603. cfg_ifindex = if_nametoindex(optarg);
  604. if (cfg_ifindex == 0)
  605. error(1, errno, "invalid iface: %s", optarg);
  606. break;
  607. case 'm':
  608. cfg_cork_mixed = true;
  609. break;
  610. case 'p':
  611. cfg_port = strtoul(optarg, NULL, 0);
  612. break;
  613. case 'r':
  614. cfg_rx = true;
  615. break;
  616. case 's':
  617. cfg_payload_len = strtoul(optarg, NULL, 0);
  618. break;
  619. case 'S':
  620. saddr = optarg;
  621. break;
  622. case 't':
  623. cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
  624. break;
  625. case 'v':
  626. cfg_verbose++;
  627. break;
  628. case 'z':
  629. cfg_zerocopy = true;
  630. break;
  631. }
  632. }
  633. cfg_test = argv[argc - 1];
  634. if (strcmp(cfg_test, "rds") == 0) {
  635. if (!daddr)
  636. error(1, 0, "-D <server addr> required for PF_RDS\n");
  637. if (!cfg_rx && !saddr)
  638. error(1, 0, "-S <client addr> required for PF_RDS\n");
  639. }
  640. setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
  641. setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
  642. if (cfg_payload_len > max_payload_len)
  643. error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
  644. if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
  645. error(1, 0, "-m: cork_mixed requires corking and zerocopy");
  646. if (optind != argc - 1)
  647. usage(argv[0]);
  648. }
  649. int main(int argc, char **argv)
  650. {
  651. const char *cfg_test;
  652. parse_opts(argc, argv);
  653. cfg_test = argv[argc - 1];
  654. if (!strcmp(cfg_test, "packet"))
  655. do_test(PF_PACKET, SOCK_RAW, 0);
  656. else if (!strcmp(cfg_test, "packet_dgram"))
  657. do_test(PF_PACKET, SOCK_DGRAM, 0);
  658. else if (!strcmp(cfg_test, "raw"))
  659. do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
  660. else if (!strcmp(cfg_test, "raw_hdrincl"))
  661. do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
  662. else if (!strcmp(cfg_test, "tcp"))
  663. do_test(cfg_family, SOCK_STREAM, 0);
  664. else if (!strcmp(cfg_test, "udp"))
  665. do_test(cfg_family, SOCK_DGRAM, 0);
  666. else if (!strcmp(cfg_test, "rds"))
  667. do_test(PF_RDS, SOCK_SEQPACKET, 0);
  668. else
  669. error(1, 0, "unknown cfg_test %s", cfg_test);
  670. return 0;
  671. }