procacct.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. // SPDX-License-Identifier: GPL-2.0
  2. /* procacct.c
  3. *
  4. * Demonstrator of fetching resource data on task exit, as a way
  5. * to accumulate accurate program resource usage statistics, without
  6. * prior identification of the programs. For that, the fields for
  7. * device and inode of the program executable binary file are also
  8. * extracted in addition to the command string.
  9. *
  10. * The TGID together with the PID and the AGROUP flag allow
  11. * identification of threads in a process and single-threaded processes.
  12. * The ac_tgetime field gives proper whole-process walltime.
  13. *
  14. * Written (changed) by Thomas Orgis, University of Hamburg in 2022
  15. *
  16. * This is a cheap derivation (inheriting the style) of getdelays.c:
  17. *
  18. * Utility to get per-pid and per-tgid delay accounting statistics
  19. * Also illustrates usage of the taskstats interface
  20. *
  21. * Copyright (C) Shailabh Nagar, IBM Corp. 2005
  22. * Copyright (C) Balbir Singh, IBM Corp. 2006
  23. * Copyright (c) Jay Lan, SGI. 2006
  24. */
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <errno.h>
  28. #include <unistd.h>
  29. #include <poll.h>
  30. #include <string.h>
  31. #include <fcntl.h>
  32. #include <sys/types.h>
  33. #include <sys/stat.h>
  34. #include <sys/socket.h>
  35. #include <sys/wait.h>
  36. #include <signal.h>
  37. #include <linux/genetlink.h>
  38. #include <linux/acct.h>
  39. #include <linux/taskstats.h>
  40. #include <linux/kdev_t.h>
  41. /*
  42. * Generic macros for dealing with netlink sockets. Might be duplicated
  43. * elsewhere. It is recommended that commercial grade applications use
  44. * libnl or libnetlink and use the interfaces provided by the library
  45. */
  46. #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
  47. #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
  48. #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
  49. #define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
  50. #define err(code, fmt, arg...) \
  51. do { \
  52. fprintf(stderr, fmt, ##arg); \
  53. exit(code); \
  54. } while (0)
  55. int rcvbufsz;
  56. char name[100];
  57. int dbg;
  58. int print_delays;
  59. int print_io_accounting;
  60. int print_task_context_switch_counts;
  61. #define PRINTF(fmt, arg...) { \
  62. if (dbg) { \
  63. printf(fmt, ##arg); \
  64. } \
  65. }
  66. /* Maximum size of response requested or message sent */
  67. #define MAX_MSG_SIZE 1024
  68. /* Maximum number of cpus expected to be specified in a cpumask */
  69. #define MAX_CPUS 32
  70. struct msgtemplate {
  71. struct nlmsghdr n;
  72. struct genlmsghdr g;
  73. char buf[MAX_MSG_SIZE];
  74. };
  75. char cpumask[100+6*MAX_CPUS];
  76. static void usage(void)
  77. {
  78. fprintf(stderr, "procacct [-v] [-w logfile] [-r bufsize] [-m cpumask]\n");
  79. fprintf(stderr, " -v: debug on\n");
  80. }
  81. /*
  82. * Create a raw netlink socket and bind
  83. */
  84. static int create_nl_socket(int protocol)
  85. {
  86. int fd;
  87. struct sockaddr_nl local;
  88. fd = socket(AF_NETLINK, SOCK_RAW, protocol);
  89. if (fd < 0)
  90. return -1;
  91. if (rcvbufsz)
  92. if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
  93. &rcvbufsz, sizeof(rcvbufsz)) < 0) {
  94. fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
  95. rcvbufsz);
  96. goto error;
  97. }
  98. memset(&local, 0, sizeof(local));
  99. local.nl_family = AF_NETLINK;
  100. if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
  101. goto error;
  102. return fd;
  103. error:
  104. close(fd);
  105. return -1;
  106. }
  107. static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
  108. __u8 genl_cmd, __u16 nla_type,
  109. void *nla_data, int nla_len)
  110. {
  111. struct nlattr *na;
  112. struct sockaddr_nl nladdr;
  113. int r, buflen;
  114. char *buf;
  115. struct msgtemplate msg;
  116. msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
  117. msg.n.nlmsg_type = nlmsg_type;
  118. msg.n.nlmsg_flags = NLM_F_REQUEST;
  119. msg.n.nlmsg_seq = 0;
  120. msg.n.nlmsg_pid = nlmsg_pid;
  121. msg.g.cmd = genl_cmd;
  122. msg.g.version = 0x1;
  123. na = (struct nlattr *) GENLMSG_DATA(&msg);
  124. na->nla_type = nla_type;
  125. na->nla_len = nla_len + 1 + NLA_HDRLEN;
  126. memcpy(NLA_DATA(na), nla_data, nla_len);
  127. msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
  128. buf = (char *) &msg;
  129. buflen = msg.n.nlmsg_len;
  130. memset(&nladdr, 0, sizeof(nladdr));
  131. nladdr.nl_family = AF_NETLINK;
  132. while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
  133. sizeof(nladdr))) < buflen) {
  134. if (r > 0) {
  135. buf += r;
  136. buflen -= r;
  137. } else if (errno != EAGAIN)
  138. return -1;
  139. }
  140. return 0;
  141. }
  142. /*
  143. * Probe the controller in genetlink to find the family id
  144. * for the TASKSTATS family
  145. */
  146. static int get_family_id(int sd)
  147. {
  148. struct {
  149. struct nlmsghdr n;
  150. struct genlmsghdr g;
  151. char buf[256];
  152. } ans;
  153. int id = 0, rc;
  154. struct nlattr *na;
  155. int rep_len;
  156. strcpy(name, TASKSTATS_GENL_NAME);
  157. rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
  158. CTRL_ATTR_FAMILY_NAME, (void *)name,
  159. strlen(TASKSTATS_GENL_NAME)+1);
  160. if (rc < 0)
  161. return 0; /* sendto() failure? */
  162. rep_len = recv(sd, &ans, sizeof(ans), 0);
  163. if (ans.n.nlmsg_type == NLMSG_ERROR ||
  164. (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
  165. return 0;
  166. na = (struct nlattr *) GENLMSG_DATA(&ans);
  167. na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
  168. if (na->nla_type == CTRL_ATTR_FAMILY_ID)
  169. id = *(__u16 *) NLA_DATA(na);
  170. return id;
  171. }
  172. #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
  173. static void print_procacct(struct taskstats *t)
  174. {
  175. /* First letter: T is a mere thread, G the last in a group, U unknown. */
  176. printf(
  177. "%c pid=%lu tgid=%lu uid=%lu wall=%llu gwall=%llu cpu=%llu vmpeak=%llu rsspeak=%llu dev=%lu:%lu inode=%llu comm=%s\n"
  178. , t->version >= 12 ? (t->ac_flag & AGROUP ? 'P' : 'T') : '?'
  179. , (unsigned long)t->ac_pid
  180. , (unsigned long)(t->version >= 12 ? t->ac_tgid : 0)
  181. , (unsigned long)t->ac_uid
  182. , (unsigned long long)t->ac_etime
  183. , (unsigned long long)(t->version >= 12 ? t->ac_tgetime : 0)
  184. , (unsigned long long)(t->ac_utime+t->ac_stime)
  185. , (unsigned long long)t->hiwater_vm
  186. , (unsigned long long)t->hiwater_rss
  187. , (unsigned long)(t->version >= 12 ? MAJOR(t->ac_exe_dev) : 0)
  188. , (unsigned long)(t->version >= 12 ? MINOR(t->ac_exe_dev) : 0)
  189. , (unsigned long long)(t->version >= 12 ? t->ac_exe_inode : 0)
  190. , t->ac_comm
  191. );
  192. }
  193. void handle_aggr(int mother, struct nlattr *na, int fd)
  194. {
  195. int aggr_len = NLA_PAYLOAD(na->nla_len);
  196. int len2 = 0;
  197. pid_t rtid = 0;
  198. na = (struct nlattr *) NLA_DATA(na);
  199. while (len2 < aggr_len) {
  200. switch (na->nla_type) {
  201. case TASKSTATS_TYPE_PID:
  202. rtid = *(int *) NLA_DATA(na);
  203. PRINTF("PID\t%d\n", rtid);
  204. break;
  205. case TASKSTATS_TYPE_TGID:
  206. rtid = *(int *) NLA_DATA(na);
  207. PRINTF("TGID\t%d\n", rtid);
  208. break;
  209. case TASKSTATS_TYPE_STATS:
  210. if (mother == TASKSTATS_TYPE_AGGR_PID)
  211. print_procacct((struct taskstats *) NLA_DATA(na));
  212. if (fd) {
  213. if (write(fd, NLA_DATA(na), na->nla_len) < 0)
  214. err(1, "write error\n");
  215. }
  216. break;
  217. case TASKSTATS_TYPE_NULL:
  218. break;
  219. default:
  220. fprintf(stderr, "Unknown nested nla_type %d\n",
  221. na->nla_type);
  222. break;
  223. }
  224. len2 += NLA_ALIGN(na->nla_len);
  225. na = (struct nlattr *)((char *)na +
  226. NLA_ALIGN(na->nla_len));
  227. }
  228. }
  229. int main(int argc, char *argv[])
  230. {
  231. int c, rc, rep_len, aggr_len, len2;
  232. int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
  233. __u16 id;
  234. __u32 mypid;
  235. struct nlattr *na;
  236. int nl_sd = -1;
  237. int len = 0;
  238. pid_t tid = 0;
  239. int fd = 0;
  240. int write_file = 0;
  241. int maskset = 0;
  242. char *logfile = NULL;
  243. int containerset = 0;
  244. char *containerpath = NULL;
  245. int cfd = 0;
  246. int forking = 0;
  247. sigset_t sigset;
  248. struct msgtemplate msg;
  249. while (!forking) {
  250. c = getopt(argc, argv, "m:vr:");
  251. if (c < 0)
  252. break;
  253. switch (c) {
  254. case 'w':
  255. logfile = strdup(optarg);
  256. printf("write to file %s\n", logfile);
  257. write_file = 1;
  258. break;
  259. case 'r':
  260. rcvbufsz = atoi(optarg);
  261. printf("receive buf size %d\n", rcvbufsz);
  262. if (rcvbufsz < 0)
  263. err(1, "Invalid rcv buf size\n");
  264. break;
  265. case 'm':
  266. strncpy(cpumask, optarg, sizeof(cpumask));
  267. cpumask[sizeof(cpumask) - 1] = '\0';
  268. maskset = 1;
  269. break;
  270. case 'v':
  271. printf("debug on\n");
  272. dbg = 1;
  273. break;
  274. default:
  275. usage();
  276. exit(-1);
  277. }
  278. }
  279. if (!maskset) {
  280. maskset = 1;
  281. strncpy(cpumask, "1", sizeof(cpumask));
  282. cpumask[sizeof(cpumask) - 1] = '\0';
  283. }
  284. printf("cpumask %s maskset %d\n", cpumask, maskset);
  285. if (write_file) {
  286. fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
  287. if (fd == -1) {
  288. perror("Cannot open output file\n");
  289. exit(1);
  290. }
  291. }
  292. nl_sd = create_nl_socket(NETLINK_GENERIC);
  293. if (nl_sd < 0)
  294. err(1, "error creating Netlink socket\n");
  295. mypid = getpid();
  296. id = get_family_id(nl_sd);
  297. if (!id) {
  298. fprintf(stderr, "Error getting family id, errno %d\n", errno);
  299. goto err;
  300. }
  301. PRINTF("family id %d\n", id);
  302. if (maskset) {
  303. rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
  304. TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
  305. &cpumask, strlen(cpumask) + 1);
  306. PRINTF("Sent register cpumask, retval %d\n", rc);
  307. if (rc < 0) {
  308. fprintf(stderr, "error sending register cpumask\n");
  309. goto err;
  310. }
  311. }
  312. do {
  313. rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
  314. PRINTF("received %d bytes\n", rep_len);
  315. if (rep_len < 0) {
  316. fprintf(stderr, "nonfatal reply error: errno %d\n",
  317. errno);
  318. continue;
  319. }
  320. if (msg.n.nlmsg_type == NLMSG_ERROR ||
  321. !NLMSG_OK((&msg.n), rep_len)) {
  322. struct nlmsgerr *err = NLMSG_DATA(&msg);
  323. fprintf(stderr, "fatal reply error, errno %d\n",
  324. err->error);
  325. goto done;
  326. }
  327. PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
  328. sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
  329. rep_len = GENLMSG_PAYLOAD(&msg.n);
  330. na = (struct nlattr *) GENLMSG_DATA(&msg);
  331. len = 0;
  332. while (len < rep_len) {
  333. len += NLA_ALIGN(na->nla_len);
  334. int mother = na->nla_type;
  335. PRINTF("mother=%i\n", mother);
  336. switch (na->nla_type) {
  337. case TASKSTATS_TYPE_AGGR_PID:
  338. case TASKSTATS_TYPE_AGGR_TGID:
  339. /* For nested attributes, na follows */
  340. handle_aggr(mother, na, fd);
  341. break;
  342. default:
  343. fprintf(stderr, "Unexpected nla_type %d\n",
  344. na->nla_type);
  345. case TASKSTATS_TYPE_NULL:
  346. break;
  347. }
  348. na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
  349. }
  350. } while (1);
  351. done:
  352. if (maskset) {
  353. rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
  354. TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
  355. &cpumask, strlen(cpumask) + 1);
  356. printf("Sent deregister mask, retval %d\n", rc);
  357. if (rc < 0)
  358. err(rc, "error sending deregister cpumask\n");
  359. }
  360. err:
  361. close(nl_sd);
  362. if (fd)
  363. close(fd);
  364. if (cfd)
  365. close(cfd);
  366. return 0;
  367. }