trace_events_user.c 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2021, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Beau Belgrave <[email protected]>
  7. */
  8. #include <linux/bitmap.h>
  9. #include <linux/cdev.h>
  10. #include <linux/hashtable.h>
  11. #include <linux/list.h>
  12. #include <linux/io.h>
  13. #include <linux/uio.h>
  14. #include <linux/ioctl.h>
  15. #include <linux/jhash.h>
  16. #include <linux/refcount.h>
  17. #include <linux/trace_events.h>
  18. #include <linux/tracefs.h>
  19. #include <linux/types.h>
  20. #include <linux/uaccess.h>
  21. /* Reminder to move to uapi when everything works */
  22. #ifdef CONFIG_COMPILE_TEST
  23. #include <linux/user_events.h>
  24. #else
  25. #include <uapi/linux/user_events.h>
  26. #endif
  27. #include "trace.h"
  28. #include "trace_dynevent.h"
  29. #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1)
  30. #define FIELD_DEPTH_TYPE 0
  31. #define FIELD_DEPTH_NAME 1
  32. #define FIELD_DEPTH_SIZE 2
  33. /*
  34. * Limits how many trace_event calls user processes can create:
  35. * Must be a power of two of PAGE_SIZE.
  36. */
  37. #define MAX_PAGE_ORDER 0
  38. #define MAX_PAGES (1 << MAX_PAGE_ORDER)
  39. #define MAX_BYTES (MAX_PAGES * PAGE_SIZE)
  40. #define MAX_EVENTS (MAX_BYTES * 8)
  41. /* Limit how long of an event name plus args within the subsystem. */
  42. #define MAX_EVENT_DESC 512
  43. #define EVENT_NAME(user_event) ((user_event)->tracepoint.name)
  44. #define MAX_FIELD_ARRAY_SIZE 1024
  45. /*
  46. * The MAP_STATUS_* macros are used for taking a index and determining the
  47. * appropriate byte and the bit in the byte to set/reset for an event.
  48. *
  49. * The lower 3 bits of the index decide which bit to set.
  50. * The remaining upper bits of the index decide which byte to use for the bit.
  51. *
  52. * This is used when an event has a probe attached/removed to reflect live
  53. * status of the event wanting tracing or not to user-programs via shared
  54. * memory maps.
  55. */
  56. #define MAP_STATUS_BYTE(index) ((index) >> 3)
  57. #define MAP_STATUS_MASK(index) BIT((index) & 7)
  58. /*
  59. * Internal bits (kernel side only) to keep track of connected probes:
  60. * These are used when status is requested in text form about an event. These
  61. * bits are compared against an internal byte on the event to determine which
  62. * probes to print out to the user.
  63. *
  64. * These do not reflect the mapped bytes between the user and kernel space.
  65. */
  66. #define EVENT_STATUS_FTRACE BIT(0)
  67. #define EVENT_STATUS_PERF BIT(1)
  68. #define EVENT_STATUS_OTHER BIT(7)
  69. /*
  70. * Stores the pages, tables, and locks for a group of events.
  71. * Each logical grouping of events has its own group, with a
  72. * matching page for status checks within user programs. This
  73. * allows for isolation of events to user programs by various
  74. * means.
  75. */
  76. struct user_event_group {
  77. struct page *pages;
  78. char *register_page_data;
  79. char *system_name;
  80. struct hlist_node node;
  81. struct mutex reg_mutex;
  82. DECLARE_HASHTABLE(register_table, 8);
  83. DECLARE_BITMAP(page_bitmap, MAX_EVENTS);
  84. };
  85. /* Group for init_user_ns mapping, top-most group */
  86. static struct user_event_group *init_group;
  87. /*
  88. * Stores per-event properties, as users register events
  89. * within a file a user_event might be created if it does not
  90. * already exist. These are globally used and their lifetime
  91. * is tied to the refcnt member. These cannot go away until the
  92. * refcnt reaches one.
  93. */
  94. struct user_event {
  95. struct user_event_group *group;
  96. struct tracepoint tracepoint;
  97. struct trace_event_call call;
  98. struct trace_event_class class;
  99. struct dyn_event devent;
  100. struct hlist_node node;
  101. struct list_head fields;
  102. struct list_head validators;
  103. refcount_t refcnt;
  104. int index;
  105. int flags;
  106. int min_size;
  107. char status;
  108. };
  109. /*
  110. * Stores per-file events references, as users register events
  111. * within a file this structure is modified and freed via RCU.
  112. * The lifetime of this struct is tied to the lifetime of the file.
  113. * These are not shared and only accessible by the file that created it.
  114. */
  115. struct user_event_refs {
  116. struct rcu_head rcu;
  117. int count;
  118. struct user_event *events[];
  119. };
  120. struct user_event_file_info {
  121. struct user_event_group *group;
  122. struct user_event_refs *refs;
  123. };
  124. #define VALIDATOR_ENSURE_NULL (1 << 0)
  125. #define VALIDATOR_REL (1 << 1)
  126. struct user_event_validator {
  127. struct list_head link;
  128. int offset;
  129. int flags;
  130. };
  131. typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
  132. void *tpdata, bool *faulted);
  133. static int user_event_parse(struct user_event_group *group, char *name,
  134. char *args, char *flags,
  135. struct user_event **newuser);
  136. static u32 user_event_key(char *name)
  137. {
  138. return jhash(name, strlen(name), 0);
  139. }
  140. static void set_page_reservations(char *pages, bool set)
  141. {
  142. int page;
  143. for (page = 0; page < MAX_PAGES; ++page) {
  144. void *addr = pages + (PAGE_SIZE * page);
  145. if (set)
  146. SetPageReserved(virt_to_page(addr));
  147. else
  148. ClearPageReserved(virt_to_page(addr));
  149. }
  150. }
  151. static void user_event_group_destroy(struct user_event_group *group)
  152. {
  153. if (group->register_page_data)
  154. set_page_reservations(group->register_page_data, false);
  155. if (group->pages)
  156. __free_pages(group->pages, MAX_PAGE_ORDER);
  157. kfree(group->system_name);
  158. kfree(group);
  159. }
  160. static char *user_event_group_system_name(struct user_namespace *user_ns)
  161. {
  162. char *system_name;
  163. int len = sizeof(USER_EVENTS_SYSTEM) + 1;
  164. if (user_ns != &init_user_ns) {
  165. /*
  166. * Unexpected at this point:
  167. * We only currently support init_user_ns.
  168. * When we enable more, this will trigger a failure so log.
  169. */
  170. pr_warn("user_events: Namespace other than init_user_ns!\n");
  171. return NULL;
  172. }
  173. system_name = kmalloc(len, GFP_KERNEL);
  174. if (!system_name)
  175. return NULL;
  176. snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM);
  177. return system_name;
  178. }
  179. static inline struct user_event_group
  180. *user_event_group_from_user_ns(struct user_namespace *user_ns)
  181. {
  182. if (user_ns == &init_user_ns)
  183. return init_group;
  184. return NULL;
  185. }
  186. static struct user_event_group *current_user_event_group(void)
  187. {
  188. struct user_namespace *user_ns = current_user_ns();
  189. struct user_event_group *group = NULL;
  190. while (user_ns) {
  191. group = user_event_group_from_user_ns(user_ns);
  192. if (group)
  193. break;
  194. user_ns = user_ns->parent;
  195. }
  196. return group;
  197. }
  198. static struct user_event_group
  199. *user_event_group_create(struct user_namespace *user_ns)
  200. {
  201. struct user_event_group *group;
  202. group = kzalloc(sizeof(*group), GFP_KERNEL);
  203. if (!group)
  204. return NULL;
  205. group->system_name = user_event_group_system_name(user_ns);
  206. if (!group->system_name)
  207. goto error;
  208. group->pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER);
  209. if (!group->pages)
  210. goto error;
  211. group->register_page_data = page_address(group->pages);
  212. set_page_reservations(group->register_page_data, true);
  213. /* Zero all bits beside 0 (which is reserved for failures) */
  214. bitmap_zero(group->page_bitmap, MAX_EVENTS);
  215. set_bit(0, group->page_bitmap);
  216. mutex_init(&group->reg_mutex);
  217. hash_init(group->register_table);
  218. return group;
  219. error:
  220. if (group)
  221. user_event_group_destroy(group);
  222. return NULL;
  223. };
  224. static __always_inline
  225. void user_event_register_set(struct user_event *user)
  226. {
  227. int i = user->index;
  228. user->group->register_page_data[MAP_STATUS_BYTE(i)] |= MAP_STATUS_MASK(i);
  229. }
  230. static __always_inline
  231. void user_event_register_clear(struct user_event *user)
  232. {
  233. int i = user->index;
  234. user->group->register_page_data[MAP_STATUS_BYTE(i)] &= ~MAP_STATUS_MASK(i);
  235. }
  236. static __always_inline __must_check
  237. bool user_event_last_ref(struct user_event *user)
  238. {
  239. return refcount_read(&user->refcnt) == 1;
  240. }
  241. static __always_inline __must_check
  242. size_t copy_nofault(void *addr, size_t bytes, struct iov_iter *i)
  243. {
  244. size_t ret;
  245. pagefault_disable();
  246. ret = copy_from_iter_nocache(addr, bytes, i);
  247. pagefault_enable();
  248. return ret;
  249. }
  250. static struct list_head *user_event_get_fields(struct trace_event_call *call)
  251. {
  252. struct user_event *user = (struct user_event *)call->data;
  253. return &user->fields;
  254. }
  255. /*
  256. * Parses a register command for user_events
  257. * Format: event_name[:FLAG1[,FLAG2...]] [field1[;field2...]]
  258. *
  259. * Example event named 'test' with a 20 char 'msg' field with an unsigned int
  260. * 'id' field after:
  261. * test char[20] msg;unsigned int id
  262. *
  263. * NOTE: Offsets are from the user data perspective, they are not from the
  264. * trace_entry/buffer perspective. We automatically add the common properties
  265. * sizes to the offset for the user.
  266. *
  267. * Upon success user_event has its ref count increased by 1.
  268. */
  269. static int user_event_parse_cmd(struct user_event_group *group,
  270. char *raw_command, struct user_event **newuser)
  271. {
  272. char *name = raw_command;
  273. char *args = strpbrk(name, " ");
  274. char *flags;
  275. if (args)
  276. *args++ = '\0';
  277. flags = strpbrk(name, ":");
  278. if (flags)
  279. *flags++ = '\0';
  280. return user_event_parse(group, name, args, flags, newuser);
  281. }
  282. static int user_field_array_size(const char *type)
  283. {
  284. const char *start = strchr(type, '[');
  285. char val[8];
  286. char *bracket;
  287. int size = 0;
  288. if (start == NULL)
  289. return -EINVAL;
  290. if (strscpy(val, start + 1, sizeof(val)) <= 0)
  291. return -EINVAL;
  292. bracket = strchr(val, ']');
  293. if (!bracket)
  294. return -EINVAL;
  295. *bracket = '\0';
  296. if (kstrtouint(val, 0, &size))
  297. return -EINVAL;
  298. if (size > MAX_FIELD_ARRAY_SIZE)
  299. return -EINVAL;
  300. return size;
  301. }
  302. static int user_field_size(const char *type)
  303. {
  304. /* long is not allowed from a user, since it's ambigious in size */
  305. if (strcmp(type, "s64") == 0)
  306. return sizeof(s64);
  307. if (strcmp(type, "u64") == 0)
  308. return sizeof(u64);
  309. if (strcmp(type, "s32") == 0)
  310. return sizeof(s32);
  311. if (strcmp(type, "u32") == 0)
  312. return sizeof(u32);
  313. if (strcmp(type, "int") == 0)
  314. return sizeof(int);
  315. if (strcmp(type, "unsigned int") == 0)
  316. return sizeof(unsigned int);
  317. if (strcmp(type, "s16") == 0)
  318. return sizeof(s16);
  319. if (strcmp(type, "u16") == 0)
  320. return sizeof(u16);
  321. if (strcmp(type, "short") == 0)
  322. return sizeof(short);
  323. if (strcmp(type, "unsigned short") == 0)
  324. return sizeof(unsigned short);
  325. if (strcmp(type, "s8") == 0)
  326. return sizeof(s8);
  327. if (strcmp(type, "u8") == 0)
  328. return sizeof(u8);
  329. if (strcmp(type, "char") == 0)
  330. return sizeof(char);
  331. if (strcmp(type, "unsigned char") == 0)
  332. return sizeof(unsigned char);
  333. if (str_has_prefix(type, "char["))
  334. return user_field_array_size(type);
  335. if (str_has_prefix(type, "unsigned char["))
  336. return user_field_array_size(type);
  337. if (str_has_prefix(type, "__data_loc "))
  338. return sizeof(u32);
  339. if (str_has_prefix(type, "__rel_loc "))
  340. return sizeof(u32);
  341. /* Uknown basic type, error */
  342. return -EINVAL;
  343. }
  344. static void user_event_destroy_validators(struct user_event *user)
  345. {
  346. struct user_event_validator *validator, *next;
  347. struct list_head *head = &user->validators;
  348. list_for_each_entry_safe(validator, next, head, link) {
  349. list_del(&validator->link);
  350. kfree(validator);
  351. }
  352. }
  353. static void user_event_destroy_fields(struct user_event *user)
  354. {
  355. struct ftrace_event_field *field, *next;
  356. struct list_head *head = &user->fields;
  357. list_for_each_entry_safe(field, next, head, link) {
  358. list_del(&field->link);
  359. kfree(field);
  360. }
  361. }
  362. static int user_event_add_field(struct user_event *user, const char *type,
  363. const char *name, int offset, int size,
  364. int is_signed, int filter_type)
  365. {
  366. struct user_event_validator *validator;
  367. struct ftrace_event_field *field;
  368. int validator_flags = 0;
  369. field = kmalloc(sizeof(*field), GFP_KERNEL);
  370. if (!field)
  371. return -ENOMEM;
  372. if (str_has_prefix(type, "__data_loc "))
  373. goto add_validator;
  374. if (str_has_prefix(type, "__rel_loc ")) {
  375. validator_flags |= VALIDATOR_REL;
  376. goto add_validator;
  377. }
  378. goto add_field;
  379. add_validator:
  380. if (strstr(type, "char") != NULL)
  381. validator_flags |= VALIDATOR_ENSURE_NULL;
  382. validator = kmalloc(sizeof(*validator), GFP_KERNEL);
  383. if (!validator) {
  384. kfree(field);
  385. return -ENOMEM;
  386. }
  387. validator->flags = validator_flags;
  388. validator->offset = offset;
  389. /* Want sequential access when validating */
  390. list_add_tail(&validator->link, &user->validators);
  391. add_field:
  392. field->type = type;
  393. field->name = name;
  394. field->offset = offset;
  395. field->size = size;
  396. field->is_signed = is_signed;
  397. field->filter_type = filter_type;
  398. list_add(&field->link, &user->fields);
  399. /*
  400. * Min size from user writes that are required, this does not include
  401. * the size of trace_entry (common fields).
  402. */
  403. user->min_size = (offset + size) - sizeof(struct trace_entry);
  404. return 0;
  405. }
  406. /*
  407. * Parses the values of a field within the description
  408. * Format: type name [size]
  409. */
  410. static int user_event_parse_field(char *field, struct user_event *user,
  411. u32 *offset)
  412. {
  413. char *part, *type, *name;
  414. u32 depth = 0, saved_offset = *offset;
  415. int len, size = -EINVAL;
  416. bool is_struct = false;
  417. field = skip_spaces(field);
  418. if (*field == '\0')
  419. return 0;
  420. /* Handle types that have a space within */
  421. len = str_has_prefix(field, "unsigned ");
  422. if (len)
  423. goto skip_next;
  424. len = str_has_prefix(field, "struct ");
  425. if (len) {
  426. is_struct = true;
  427. goto skip_next;
  428. }
  429. len = str_has_prefix(field, "__data_loc unsigned ");
  430. if (len)
  431. goto skip_next;
  432. len = str_has_prefix(field, "__data_loc ");
  433. if (len)
  434. goto skip_next;
  435. len = str_has_prefix(field, "__rel_loc unsigned ");
  436. if (len)
  437. goto skip_next;
  438. len = str_has_prefix(field, "__rel_loc ");
  439. if (len)
  440. goto skip_next;
  441. goto parse;
  442. skip_next:
  443. type = field;
  444. field = strpbrk(field + len, " ");
  445. if (field == NULL)
  446. return -EINVAL;
  447. *field++ = '\0';
  448. depth++;
  449. parse:
  450. name = NULL;
  451. while ((part = strsep(&field, " ")) != NULL) {
  452. switch (depth++) {
  453. case FIELD_DEPTH_TYPE:
  454. type = part;
  455. break;
  456. case FIELD_DEPTH_NAME:
  457. name = part;
  458. break;
  459. case FIELD_DEPTH_SIZE:
  460. if (!is_struct)
  461. return -EINVAL;
  462. if (kstrtou32(part, 10, &size))
  463. return -EINVAL;
  464. break;
  465. default:
  466. return -EINVAL;
  467. }
  468. }
  469. if (depth < FIELD_DEPTH_SIZE || !name)
  470. return -EINVAL;
  471. if (depth == FIELD_DEPTH_SIZE)
  472. size = user_field_size(type);
  473. if (size == 0)
  474. return -EINVAL;
  475. if (size < 0)
  476. return size;
  477. *offset = saved_offset + size;
  478. return user_event_add_field(user, type, name, saved_offset, size,
  479. type[0] != 'u', FILTER_OTHER);
  480. }
  481. static int user_event_parse_fields(struct user_event *user, char *args)
  482. {
  483. char *field;
  484. u32 offset = sizeof(struct trace_entry);
  485. int ret = -EINVAL;
  486. if (args == NULL)
  487. return 0;
  488. while ((field = strsep(&args, ";")) != NULL) {
  489. ret = user_event_parse_field(field, user, &offset);
  490. if (ret)
  491. break;
  492. }
  493. return ret;
  494. }
  495. static struct trace_event_fields user_event_fields_array[1];
  496. static const char *user_field_format(const char *type)
  497. {
  498. if (strcmp(type, "s64") == 0)
  499. return "%lld";
  500. if (strcmp(type, "u64") == 0)
  501. return "%llu";
  502. if (strcmp(type, "s32") == 0)
  503. return "%d";
  504. if (strcmp(type, "u32") == 0)
  505. return "%u";
  506. if (strcmp(type, "int") == 0)
  507. return "%d";
  508. if (strcmp(type, "unsigned int") == 0)
  509. return "%u";
  510. if (strcmp(type, "s16") == 0)
  511. return "%d";
  512. if (strcmp(type, "u16") == 0)
  513. return "%u";
  514. if (strcmp(type, "short") == 0)
  515. return "%d";
  516. if (strcmp(type, "unsigned short") == 0)
  517. return "%u";
  518. if (strcmp(type, "s8") == 0)
  519. return "%d";
  520. if (strcmp(type, "u8") == 0)
  521. return "%u";
  522. if (strcmp(type, "char") == 0)
  523. return "%d";
  524. if (strcmp(type, "unsigned char") == 0)
  525. return "%u";
  526. if (strstr(type, "char[") != NULL)
  527. return "%s";
  528. /* Unknown, likely struct, allowed treat as 64-bit */
  529. return "%llu";
  530. }
  531. static bool user_field_is_dyn_string(const char *type, const char **str_func)
  532. {
  533. if (str_has_prefix(type, "__data_loc ")) {
  534. *str_func = "__get_str";
  535. goto check;
  536. }
  537. if (str_has_prefix(type, "__rel_loc ")) {
  538. *str_func = "__get_rel_str";
  539. goto check;
  540. }
  541. return false;
  542. check:
  543. return strstr(type, "char") != NULL;
  544. }
  545. #define LEN_OR_ZERO (len ? len - pos : 0)
  546. static int user_dyn_field_set_string(int argc, const char **argv, int *iout,
  547. char *buf, int len, bool *colon)
  548. {
  549. int pos = 0, i = *iout;
  550. *colon = false;
  551. for (; i < argc; ++i) {
  552. if (i != *iout)
  553. pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
  554. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", argv[i]);
  555. if (strchr(argv[i], ';')) {
  556. ++i;
  557. *colon = true;
  558. break;
  559. }
  560. }
  561. /* Actual set, advance i */
  562. if (len != 0)
  563. *iout = i;
  564. return pos + 1;
  565. }
  566. static int user_field_set_string(struct ftrace_event_field *field,
  567. char *buf, int len, bool colon)
  568. {
  569. int pos = 0;
  570. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->type);
  571. pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
  572. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->name);
  573. if (str_has_prefix(field->type, "struct "))
  574. pos += snprintf(buf + pos, LEN_OR_ZERO, " %d", field->size);
  575. if (colon)
  576. pos += snprintf(buf + pos, LEN_OR_ZERO, ";");
  577. return pos + 1;
  578. }
  579. static int user_event_set_print_fmt(struct user_event *user, char *buf, int len)
  580. {
  581. struct ftrace_event_field *field, *next;
  582. struct list_head *head = &user->fields;
  583. int pos = 0, depth = 0;
  584. const char *str_func;
  585. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  586. list_for_each_entry_safe_reverse(field, next, head, link) {
  587. if (depth != 0)
  588. pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
  589. pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s",
  590. field->name, user_field_format(field->type));
  591. depth++;
  592. }
  593. pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
  594. list_for_each_entry_safe_reverse(field, next, head, link) {
  595. if (user_field_is_dyn_string(field->type, &str_func))
  596. pos += snprintf(buf + pos, LEN_OR_ZERO,
  597. ", %s(%s)", str_func, field->name);
  598. else
  599. pos += snprintf(buf + pos, LEN_OR_ZERO,
  600. ", REC->%s", field->name);
  601. }
  602. return pos + 1;
  603. }
  604. #undef LEN_OR_ZERO
  605. static int user_event_create_print_fmt(struct user_event *user)
  606. {
  607. char *print_fmt;
  608. int len;
  609. len = user_event_set_print_fmt(user, NULL, 0);
  610. print_fmt = kmalloc(len, GFP_KERNEL);
  611. if (!print_fmt)
  612. return -ENOMEM;
  613. user_event_set_print_fmt(user, print_fmt, len);
  614. user->call.print_fmt = print_fmt;
  615. return 0;
  616. }
  617. static enum print_line_t user_event_print_trace(struct trace_iterator *iter,
  618. int flags,
  619. struct trace_event *event)
  620. {
  621. /* Unsafe to try to decode user provided print_fmt, use hex */
  622. trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16,
  623. 1, iter->ent, iter->ent_size, true);
  624. return trace_handle_return(&iter->seq);
  625. }
  626. static struct trace_event_functions user_event_funcs = {
  627. .trace = user_event_print_trace,
  628. };
  629. static int user_event_set_call_visible(struct user_event *user, bool visible)
  630. {
  631. int ret;
  632. const struct cred *old_cred;
  633. struct cred *cred;
  634. cred = prepare_creds();
  635. if (!cred)
  636. return -ENOMEM;
  637. /*
  638. * While by default tracefs is locked down, systems can be configured
  639. * to allow user_event files to be less locked down. The extreme case
  640. * being "other" has read/write access to user_events_data/status.
  641. *
  642. * When not locked down, processes may not have permissions to
  643. * add/remove calls themselves to tracefs. We need to temporarily
  644. * switch to root file permission to allow for this scenario.
  645. */
  646. cred->fsuid = GLOBAL_ROOT_UID;
  647. old_cred = override_creds(cred);
  648. if (visible)
  649. ret = trace_add_event_call(&user->call);
  650. else
  651. ret = trace_remove_event_call(&user->call);
  652. revert_creds(old_cred);
  653. put_cred(cred);
  654. return ret;
  655. }
  656. static int destroy_user_event(struct user_event *user)
  657. {
  658. int ret = 0;
  659. /* Must destroy fields before call removal */
  660. user_event_destroy_fields(user);
  661. ret = user_event_set_call_visible(user, false);
  662. if (ret)
  663. return ret;
  664. dyn_event_remove(&user->devent);
  665. user_event_register_clear(user);
  666. clear_bit(user->index, user->group->page_bitmap);
  667. hash_del(&user->node);
  668. user_event_destroy_validators(user);
  669. kfree(user->call.print_fmt);
  670. kfree(EVENT_NAME(user));
  671. kfree(user);
  672. return ret;
  673. }
  674. static struct user_event *find_user_event(struct user_event_group *group,
  675. char *name, u32 *outkey)
  676. {
  677. struct user_event *user;
  678. u32 key = user_event_key(name);
  679. *outkey = key;
  680. hash_for_each_possible(group->register_table, user, node, key)
  681. if (!strcmp(EVENT_NAME(user), name)) {
  682. refcount_inc(&user->refcnt);
  683. return user;
  684. }
  685. return NULL;
  686. }
  687. static int user_event_validate(struct user_event *user, void *data, int len)
  688. {
  689. struct list_head *head = &user->validators;
  690. struct user_event_validator *validator;
  691. void *pos, *end = data + len;
  692. u32 loc, offset, size;
  693. list_for_each_entry(validator, head, link) {
  694. pos = data + validator->offset;
  695. /* Already done min_size check, no bounds check here */
  696. loc = *(u32 *)pos;
  697. offset = loc & 0xffff;
  698. size = loc >> 16;
  699. if (likely(validator->flags & VALIDATOR_REL))
  700. pos += offset + sizeof(loc);
  701. else
  702. pos = data + offset;
  703. pos += size;
  704. if (unlikely(pos > end))
  705. return -EFAULT;
  706. if (likely(validator->flags & VALIDATOR_ENSURE_NULL))
  707. if (unlikely(*(char *)(pos - 1) != '\0'))
  708. return -EFAULT;
  709. }
  710. return 0;
  711. }
  712. /*
  713. * Writes the user supplied payload out to a trace file.
  714. */
  715. static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
  716. void *tpdata, bool *faulted)
  717. {
  718. struct trace_event_file *file;
  719. struct trace_entry *entry;
  720. struct trace_event_buffer event_buffer;
  721. size_t size = sizeof(*entry) + i->count;
  722. file = (struct trace_event_file *)tpdata;
  723. if (!file ||
  724. !(file->flags & EVENT_FILE_FL_ENABLED) ||
  725. trace_trigger_soft_disabled(file))
  726. return;
  727. /* Allocates and fills trace_entry, + 1 of this is data payload */
  728. entry = trace_event_buffer_reserve(&event_buffer, file, size);
  729. if (unlikely(!entry))
  730. return;
  731. if (unlikely(!copy_nofault(entry + 1, i->count, i)))
  732. goto discard;
  733. if (!list_empty(&user->validators) &&
  734. unlikely(user_event_validate(user, entry, size)))
  735. goto discard;
  736. trace_event_buffer_commit(&event_buffer);
  737. return;
  738. discard:
  739. *faulted = true;
  740. __trace_event_discard_commit(event_buffer.buffer,
  741. event_buffer.event);
  742. }
  743. #ifdef CONFIG_PERF_EVENTS
  744. /*
  745. * Writes the user supplied payload out to perf ring buffer.
  746. */
  747. static void user_event_perf(struct user_event *user, struct iov_iter *i,
  748. void *tpdata, bool *faulted)
  749. {
  750. struct hlist_head *perf_head;
  751. perf_head = this_cpu_ptr(user->call.perf_events);
  752. if (perf_head && !hlist_empty(perf_head)) {
  753. struct trace_entry *perf_entry;
  754. struct pt_regs *regs;
  755. size_t size = sizeof(*perf_entry) + i->count;
  756. int context;
  757. perf_entry = perf_trace_buf_alloc(ALIGN(size, 8),
  758. &regs, &context);
  759. if (unlikely(!perf_entry))
  760. return;
  761. perf_fetch_caller_regs(regs);
  762. if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
  763. goto discard;
  764. if (!list_empty(&user->validators) &&
  765. unlikely(user_event_validate(user, perf_entry, size)))
  766. goto discard;
  767. perf_trace_buf_submit(perf_entry, size, context,
  768. user->call.event.type, 1, regs,
  769. perf_head, NULL);
  770. return;
  771. discard:
  772. *faulted = true;
  773. perf_swevent_put_recursion_context(context);
  774. }
  775. }
  776. #endif
  777. /*
  778. * Update the register page that is shared between user processes.
  779. */
  780. static void update_reg_page_for(struct user_event *user)
  781. {
  782. struct tracepoint *tp = &user->tracepoint;
  783. char status = 0;
  784. if (atomic_read(&tp->key.enabled) > 0) {
  785. struct tracepoint_func *probe_func_ptr;
  786. user_event_func_t probe_func;
  787. rcu_read_lock_sched();
  788. probe_func_ptr = rcu_dereference_sched(tp->funcs);
  789. if (probe_func_ptr) {
  790. do {
  791. probe_func = probe_func_ptr->func;
  792. if (probe_func == user_event_ftrace)
  793. status |= EVENT_STATUS_FTRACE;
  794. #ifdef CONFIG_PERF_EVENTS
  795. else if (probe_func == user_event_perf)
  796. status |= EVENT_STATUS_PERF;
  797. #endif
  798. else
  799. status |= EVENT_STATUS_OTHER;
  800. } while ((++probe_func_ptr)->func);
  801. }
  802. rcu_read_unlock_sched();
  803. }
  804. if (status)
  805. user_event_register_set(user);
  806. else
  807. user_event_register_clear(user);
  808. user->status = status;
  809. }
  810. /*
  811. * Register callback for our events from tracing sub-systems.
  812. */
  813. static int user_event_reg(struct trace_event_call *call,
  814. enum trace_reg type,
  815. void *data)
  816. {
  817. struct user_event *user = (struct user_event *)call->data;
  818. int ret = 0;
  819. if (!user)
  820. return -ENOENT;
  821. switch (type) {
  822. case TRACE_REG_REGISTER:
  823. ret = tracepoint_probe_register(call->tp,
  824. call->class->probe,
  825. data);
  826. if (!ret)
  827. goto inc;
  828. break;
  829. case TRACE_REG_UNREGISTER:
  830. tracepoint_probe_unregister(call->tp,
  831. call->class->probe,
  832. data);
  833. goto dec;
  834. #ifdef CONFIG_PERF_EVENTS
  835. case TRACE_REG_PERF_REGISTER:
  836. ret = tracepoint_probe_register(call->tp,
  837. call->class->perf_probe,
  838. data);
  839. if (!ret)
  840. goto inc;
  841. break;
  842. case TRACE_REG_PERF_UNREGISTER:
  843. tracepoint_probe_unregister(call->tp,
  844. call->class->perf_probe,
  845. data);
  846. goto dec;
  847. case TRACE_REG_PERF_OPEN:
  848. case TRACE_REG_PERF_CLOSE:
  849. case TRACE_REG_PERF_ADD:
  850. case TRACE_REG_PERF_DEL:
  851. break;
  852. #endif
  853. }
  854. return ret;
  855. inc:
  856. refcount_inc(&user->refcnt);
  857. update_reg_page_for(user);
  858. return 0;
  859. dec:
  860. update_reg_page_for(user);
  861. refcount_dec(&user->refcnt);
  862. return 0;
  863. }
  864. static int user_event_create(const char *raw_command)
  865. {
  866. struct user_event_group *group;
  867. struct user_event *user;
  868. char *name;
  869. int ret;
  870. if (!str_has_prefix(raw_command, USER_EVENTS_PREFIX))
  871. return -ECANCELED;
  872. raw_command += USER_EVENTS_PREFIX_LEN;
  873. raw_command = skip_spaces(raw_command);
  874. name = kstrdup(raw_command, GFP_KERNEL);
  875. if (!name)
  876. return -ENOMEM;
  877. group = current_user_event_group();
  878. if (!group) {
  879. kfree(name);
  880. return -ENOENT;
  881. }
  882. mutex_lock(&group->reg_mutex);
  883. ret = user_event_parse_cmd(group, name, &user);
  884. if (!ret)
  885. refcount_dec(&user->refcnt);
  886. mutex_unlock(&group->reg_mutex);
  887. if (ret)
  888. kfree(name);
  889. return ret;
  890. }
  891. static int user_event_show(struct seq_file *m, struct dyn_event *ev)
  892. {
  893. struct user_event *user = container_of(ev, struct user_event, devent);
  894. struct ftrace_event_field *field, *next;
  895. struct list_head *head;
  896. int depth = 0;
  897. seq_printf(m, "%s%s", USER_EVENTS_PREFIX, EVENT_NAME(user));
  898. head = trace_get_fields(&user->call);
  899. list_for_each_entry_safe_reverse(field, next, head, link) {
  900. if (depth == 0)
  901. seq_puts(m, " ");
  902. else
  903. seq_puts(m, "; ");
  904. seq_printf(m, "%s %s", field->type, field->name);
  905. if (str_has_prefix(field->type, "struct "))
  906. seq_printf(m, " %d", field->size);
  907. depth++;
  908. }
  909. seq_puts(m, "\n");
  910. return 0;
  911. }
  912. static bool user_event_is_busy(struct dyn_event *ev)
  913. {
  914. struct user_event *user = container_of(ev, struct user_event, devent);
  915. return !user_event_last_ref(user);
  916. }
  917. static int user_event_free(struct dyn_event *ev)
  918. {
  919. struct user_event *user = container_of(ev, struct user_event, devent);
  920. if (!user_event_last_ref(user))
  921. return -EBUSY;
  922. return destroy_user_event(user);
  923. }
  924. static bool user_field_match(struct ftrace_event_field *field, int argc,
  925. const char **argv, int *iout)
  926. {
  927. char *field_name = NULL, *dyn_field_name = NULL;
  928. bool colon = false, match = false;
  929. int dyn_len, len;
  930. if (*iout >= argc)
  931. return false;
  932. dyn_len = user_dyn_field_set_string(argc, argv, iout, dyn_field_name,
  933. 0, &colon);
  934. len = user_field_set_string(field, field_name, 0, colon);
  935. if (dyn_len != len)
  936. return false;
  937. dyn_field_name = kmalloc(dyn_len, GFP_KERNEL);
  938. field_name = kmalloc(len, GFP_KERNEL);
  939. if (!dyn_field_name || !field_name)
  940. goto out;
  941. user_dyn_field_set_string(argc, argv, iout, dyn_field_name,
  942. dyn_len, &colon);
  943. user_field_set_string(field, field_name, len, colon);
  944. match = strcmp(dyn_field_name, field_name) == 0;
  945. out:
  946. kfree(dyn_field_name);
  947. kfree(field_name);
  948. return match;
  949. }
  950. static bool user_fields_match(struct user_event *user, int argc,
  951. const char **argv)
  952. {
  953. struct ftrace_event_field *field, *next;
  954. struct list_head *head = &user->fields;
  955. int i = 0;
  956. list_for_each_entry_safe_reverse(field, next, head, link)
  957. if (!user_field_match(field, argc, argv, &i))
  958. return false;
  959. if (i != argc)
  960. return false;
  961. return true;
  962. }
  963. static bool user_event_match(const char *system, const char *event,
  964. int argc, const char **argv, struct dyn_event *ev)
  965. {
  966. struct user_event *user = container_of(ev, struct user_event, devent);
  967. bool match;
  968. match = strcmp(EVENT_NAME(user), event) == 0 &&
  969. (!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
  970. if (match && argc > 0)
  971. match = user_fields_match(user, argc, argv);
  972. return match;
  973. }
  974. static struct dyn_event_operations user_event_dops = {
  975. .create = user_event_create,
  976. .show = user_event_show,
  977. .is_busy = user_event_is_busy,
  978. .free = user_event_free,
  979. .match = user_event_match,
  980. };
  981. static int user_event_trace_register(struct user_event *user)
  982. {
  983. int ret;
  984. ret = register_trace_event(&user->call.event);
  985. if (!ret)
  986. return -ENODEV;
  987. ret = user_event_set_call_visible(user, true);
  988. if (ret)
  989. unregister_trace_event(&user->call.event);
  990. return ret;
  991. }
  992. /*
  993. * Parses the event name, arguments and flags then registers if successful.
  994. * The name buffer lifetime is owned by this method for success cases only.
  995. * Upon success the returned user_event has its ref count increased by 1.
  996. */
  997. static int user_event_parse(struct user_event_group *group, char *name,
  998. char *args, char *flags,
  999. struct user_event **newuser)
  1000. {
  1001. int ret;
  1002. int index;
  1003. u32 key;
  1004. struct user_event *user;
  1005. /* Prevent dyn_event from racing */
  1006. mutex_lock(&event_mutex);
  1007. user = find_user_event(group, name, &key);
  1008. mutex_unlock(&event_mutex);
  1009. if (user) {
  1010. *newuser = user;
  1011. /*
  1012. * Name is allocated by caller, free it since it already exists.
  1013. * Caller only worries about failure cases for freeing.
  1014. */
  1015. kfree(name);
  1016. return 0;
  1017. }
  1018. index = find_first_zero_bit(group->page_bitmap, MAX_EVENTS);
  1019. if (index == MAX_EVENTS)
  1020. return -EMFILE;
  1021. user = kzalloc(sizeof(*user), GFP_KERNEL);
  1022. if (!user)
  1023. return -ENOMEM;
  1024. INIT_LIST_HEAD(&user->class.fields);
  1025. INIT_LIST_HEAD(&user->fields);
  1026. INIT_LIST_HEAD(&user->validators);
  1027. user->group = group;
  1028. user->tracepoint.name = name;
  1029. ret = user_event_parse_fields(user, args);
  1030. if (ret)
  1031. goto put_user;
  1032. ret = user_event_create_print_fmt(user);
  1033. if (ret)
  1034. goto put_user;
  1035. user->call.data = user;
  1036. user->call.class = &user->class;
  1037. user->call.name = name;
  1038. user->call.flags = TRACE_EVENT_FL_TRACEPOINT;
  1039. user->call.tp = &user->tracepoint;
  1040. user->call.event.funcs = &user_event_funcs;
  1041. user->class.system = group->system_name;
  1042. user->class.fields_array = user_event_fields_array;
  1043. user->class.get_fields = user_event_get_fields;
  1044. user->class.reg = user_event_reg;
  1045. user->class.probe = user_event_ftrace;
  1046. #ifdef CONFIG_PERF_EVENTS
  1047. user->class.perf_probe = user_event_perf;
  1048. #endif
  1049. mutex_lock(&event_mutex);
  1050. ret = user_event_trace_register(user);
  1051. if (ret)
  1052. goto put_user_lock;
  1053. user->index = index;
  1054. /* Ensure we track self ref and caller ref (2) */
  1055. refcount_set(&user->refcnt, 2);
  1056. dyn_event_init(&user->devent, &user_event_dops);
  1057. dyn_event_add(&user->devent, &user->call);
  1058. set_bit(user->index, group->page_bitmap);
  1059. hash_add(group->register_table, &user->node, key);
  1060. mutex_unlock(&event_mutex);
  1061. *newuser = user;
  1062. return 0;
  1063. put_user_lock:
  1064. mutex_unlock(&event_mutex);
  1065. put_user:
  1066. user_event_destroy_fields(user);
  1067. user_event_destroy_validators(user);
  1068. kfree(user->call.print_fmt);
  1069. kfree(user);
  1070. return ret;
  1071. }
  1072. /*
  1073. * Deletes a previously created event if it is no longer being used.
  1074. */
  1075. static int delete_user_event(struct user_event_group *group, char *name)
  1076. {
  1077. u32 key;
  1078. struct user_event *user = find_user_event(group, name, &key);
  1079. if (!user)
  1080. return -ENOENT;
  1081. refcount_dec(&user->refcnt);
  1082. if (!user_event_last_ref(user))
  1083. return -EBUSY;
  1084. return destroy_user_event(user);
  1085. }
  1086. /*
  1087. * Validates the user payload and writes via iterator.
  1088. */
  1089. static ssize_t user_events_write_core(struct file *file, struct iov_iter *i)
  1090. {
  1091. struct user_event_file_info *info = file->private_data;
  1092. struct user_event_refs *refs;
  1093. struct user_event *user = NULL;
  1094. struct tracepoint *tp;
  1095. ssize_t ret = i->count;
  1096. int idx;
  1097. if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx)))
  1098. return -EFAULT;
  1099. if (idx < 0)
  1100. return -EINVAL;
  1101. rcu_read_lock_sched();
  1102. refs = rcu_dereference_sched(info->refs);
  1103. /*
  1104. * The refs->events array is protected by RCU, and new items may be
  1105. * added. But the user retrieved from indexing into the events array
  1106. * shall be immutable while the file is opened.
  1107. */
  1108. if (likely(refs && idx < refs->count))
  1109. user = refs->events[idx];
  1110. rcu_read_unlock_sched();
  1111. if (unlikely(user == NULL))
  1112. return -ENOENT;
  1113. if (unlikely(i->count < user->min_size))
  1114. return -EINVAL;
  1115. tp = &user->tracepoint;
  1116. /*
  1117. * It's possible key.enabled disables after this check, however
  1118. * we don't mind if a few events are included in this condition.
  1119. */
  1120. if (likely(atomic_read(&tp->key.enabled) > 0)) {
  1121. struct tracepoint_func *probe_func_ptr;
  1122. user_event_func_t probe_func;
  1123. struct iov_iter copy;
  1124. void *tpdata;
  1125. bool faulted;
  1126. if (unlikely(fault_in_iov_iter_readable(i, i->count)))
  1127. return -EFAULT;
  1128. faulted = false;
  1129. rcu_read_lock_sched();
  1130. probe_func_ptr = rcu_dereference_sched(tp->funcs);
  1131. if (probe_func_ptr) {
  1132. do {
  1133. copy = *i;
  1134. probe_func = probe_func_ptr->func;
  1135. tpdata = probe_func_ptr->data;
  1136. probe_func(user, &copy, tpdata, &faulted);
  1137. } while ((++probe_func_ptr)->func);
  1138. }
  1139. rcu_read_unlock_sched();
  1140. if (unlikely(faulted))
  1141. return -EFAULT;
  1142. } else
  1143. return -EBADF;
  1144. return ret;
  1145. }
  1146. static int user_events_open(struct inode *node, struct file *file)
  1147. {
  1148. struct user_event_group *group;
  1149. struct user_event_file_info *info;
  1150. group = current_user_event_group();
  1151. if (!group)
  1152. return -ENOENT;
  1153. info = kzalloc(sizeof(*info), GFP_KERNEL);
  1154. if (!info)
  1155. return -ENOMEM;
  1156. info->group = group;
  1157. file->private_data = info;
  1158. return 0;
  1159. }
  1160. static ssize_t user_events_write(struct file *file, const char __user *ubuf,
  1161. size_t count, loff_t *ppos)
  1162. {
  1163. struct iovec iov;
  1164. struct iov_iter i;
  1165. if (unlikely(*ppos != 0))
  1166. return -EFAULT;
  1167. if (unlikely(import_single_range(ITER_SOURCE, (char __user *)ubuf,
  1168. count, &iov, &i)))
  1169. return -EFAULT;
  1170. return user_events_write_core(file, &i);
  1171. }
  1172. static ssize_t user_events_write_iter(struct kiocb *kp, struct iov_iter *i)
  1173. {
  1174. return user_events_write_core(kp->ki_filp, i);
  1175. }
  1176. static int user_events_ref_add(struct user_event_file_info *info,
  1177. struct user_event *user)
  1178. {
  1179. struct user_event_group *group = info->group;
  1180. struct user_event_refs *refs, *new_refs;
  1181. int i, size, count = 0;
  1182. refs = rcu_dereference_protected(info->refs,
  1183. lockdep_is_held(&group->reg_mutex));
  1184. if (refs) {
  1185. count = refs->count;
  1186. for (i = 0; i < count; ++i)
  1187. if (refs->events[i] == user)
  1188. return i;
  1189. }
  1190. size = struct_size(refs, events, count + 1);
  1191. new_refs = kzalloc(size, GFP_KERNEL);
  1192. if (!new_refs)
  1193. return -ENOMEM;
  1194. new_refs->count = count + 1;
  1195. for (i = 0; i < count; ++i)
  1196. new_refs->events[i] = refs->events[i];
  1197. new_refs->events[i] = user;
  1198. refcount_inc(&user->refcnt);
  1199. rcu_assign_pointer(info->refs, new_refs);
  1200. if (refs)
  1201. kfree_rcu(refs, rcu);
  1202. return i;
  1203. }
  1204. static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
  1205. {
  1206. u32 size;
  1207. long ret;
  1208. ret = get_user(size, &ureg->size);
  1209. if (ret)
  1210. return ret;
  1211. if (size > PAGE_SIZE)
  1212. return -E2BIG;
  1213. if (size < offsetofend(struct user_reg, write_index))
  1214. return -EINVAL;
  1215. ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size);
  1216. if (ret)
  1217. return ret;
  1218. kreg->size = size;
  1219. return 0;
  1220. }
  1221. /*
  1222. * Registers a user_event on behalf of a user process.
  1223. */
  1224. static long user_events_ioctl_reg(struct user_event_file_info *info,
  1225. unsigned long uarg)
  1226. {
  1227. struct user_reg __user *ureg = (struct user_reg __user *)uarg;
  1228. struct user_reg reg;
  1229. struct user_event *user;
  1230. char *name;
  1231. long ret;
  1232. ret = user_reg_get(ureg, &reg);
  1233. if (ret)
  1234. return ret;
  1235. name = strndup_user((const char __user *)(uintptr_t)reg.name_args,
  1236. MAX_EVENT_DESC);
  1237. if (IS_ERR(name)) {
  1238. ret = PTR_ERR(name);
  1239. return ret;
  1240. }
  1241. ret = user_event_parse_cmd(info->group, name, &user);
  1242. if (ret) {
  1243. kfree(name);
  1244. return ret;
  1245. }
  1246. ret = user_events_ref_add(info, user);
  1247. /* No longer need parse ref, ref_add either worked or not */
  1248. refcount_dec(&user->refcnt);
  1249. /* Positive number is index and valid */
  1250. if (ret < 0)
  1251. return ret;
  1252. put_user((u32)ret, &ureg->write_index);
  1253. put_user(user->index, &ureg->status_bit);
  1254. return 0;
  1255. }
  1256. /*
  1257. * Deletes a user_event on behalf of a user process.
  1258. */
  1259. static long user_events_ioctl_del(struct user_event_file_info *info,
  1260. unsigned long uarg)
  1261. {
  1262. void __user *ubuf = (void __user *)uarg;
  1263. char *name;
  1264. long ret;
  1265. name = strndup_user(ubuf, MAX_EVENT_DESC);
  1266. if (IS_ERR(name))
  1267. return PTR_ERR(name);
  1268. /* event_mutex prevents dyn_event from racing */
  1269. mutex_lock(&event_mutex);
  1270. ret = delete_user_event(info->group, name);
  1271. mutex_unlock(&event_mutex);
  1272. kfree(name);
  1273. return ret;
  1274. }
  1275. /*
  1276. * Handles the ioctl from user mode to register or alter operations.
  1277. */
  1278. static long user_events_ioctl(struct file *file, unsigned int cmd,
  1279. unsigned long uarg)
  1280. {
  1281. struct user_event_file_info *info = file->private_data;
  1282. struct user_event_group *group = info->group;
  1283. long ret = -ENOTTY;
  1284. switch (cmd) {
  1285. case DIAG_IOCSREG:
  1286. mutex_lock(&group->reg_mutex);
  1287. ret = user_events_ioctl_reg(info, uarg);
  1288. mutex_unlock(&group->reg_mutex);
  1289. break;
  1290. case DIAG_IOCSDEL:
  1291. mutex_lock(&group->reg_mutex);
  1292. ret = user_events_ioctl_del(info, uarg);
  1293. mutex_unlock(&group->reg_mutex);
  1294. break;
  1295. }
  1296. return ret;
  1297. }
  1298. /*
  1299. * Handles the final close of the file from user mode.
  1300. */
  1301. static int user_events_release(struct inode *node, struct file *file)
  1302. {
  1303. struct user_event_file_info *info = file->private_data;
  1304. struct user_event_group *group;
  1305. struct user_event_refs *refs;
  1306. struct user_event *user;
  1307. int i;
  1308. if (!info)
  1309. return -EINVAL;
  1310. group = info->group;
  1311. /*
  1312. * Ensure refs cannot change under any situation by taking the
  1313. * register mutex during the final freeing of the references.
  1314. */
  1315. mutex_lock(&group->reg_mutex);
  1316. refs = info->refs;
  1317. if (!refs)
  1318. goto out;
  1319. /*
  1320. * The lifetime of refs has reached an end, it's tied to this file.
  1321. * The underlying user_events are ref counted, and cannot be freed.
  1322. * After this decrement, the user_events may be freed elsewhere.
  1323. */
  1324. for (i = 0; i < refs->count; ++i) {
  1325. user = refs->events[i];
  1326. if (user)
  1327. refcount_dec(&user->refcnt);
  1328. }
  1329. out:
  1330. file->private_data = NULL;
  1331. mutex_unlock(&group->reg_mutex);
  1332. kfree(refs);
  1333. kfree(info);
  1334. return 0;
  1335. }
  1336. static const struct file_operations user_data_fops = {
  1337. .open = user_events_open,
  1338. .write = user_events_write,
  1339. .write_iter = user_events_write_iter,
  1340. .unlocked_ioctl = user_events_ioctl,
  1341. .release = user_events_release,
  1342. };
  1343. static struct user_event_group *user_status_group(struct file *file)
  1344. {
  1345. struct seq_file *m = file->private_data;
  1346. if (!m)
  1347. return NULL;
  1348. return m->private;
  1349. }
  1350. /*
  1351. * Maps the shared page into the user process for checking if event is enabled.
  1352. */
  1353. static int user_status_mmap(struct file *file, struct vm_area_struct *vma)
  1354. {
  1355. char *pages;
  1356. struct user_event_group *group = user_status_group(file);
  1357. unsigned long size = vma->vm_end - vma->vm_start;
  1358. if (size != MAX_BYTES)
  1359. return -EINVAL;
  1360. if (!group)
  1361. return -EINVAL;
  1362. pages = group->register_page_data;
  1363. return remap_pfn_range(vma, vma->vm_start,
  1364. virt_to_phys(pages) >> PAGE_SHIFT,
  1365. size, vm_get_page_prot(VM_READ));
  1366. }
  1367. static void *user_seq_start(struct seq_file *m, loff_t *pos)
  1368. {
  1369. if (*pos)
  1370. return NULL;
  1371. return (void *)1;
  1372. }
  1373. static void *user_seq_next(struct seq_file *m, void *p, loff_t *pos)
  1374. {
  1375. ++*pos;
  1376. return NULL;
  1377. }
  1378. static void user_seq_stop(struct seq_file *m, void *p)
  1379. {
  1380. }
  1381. static int user_seq_show(struct seq_file *m, void *p)
  1382. {
  1383. struct user_event_group *group = m->private;
  1384. struct user_event *user;
  1385. char status;
  1386. int i, active = 0, busy = 0, flags;
  1387. if (!group)
  1388. return -EINVAL;
  1389. mutex_lock(&group->reg_mutex);
  1390. hash_for_each(group->register_table, i, user, node) {
  1391. status = user->status;
  1392. flags = user->flags;
  1393. seq_printf(m, "%d:%s", user->index, EVENT_NAME(user));
  1394. if (flags != 0 || status != 0)
  1395. seq_puts(m, " #");
  1396. if (status != 0) {
  1397. seq_puts(m, " Used by");
  1398. if (status & EVENT_STATUS_FTRACE)
  1399. seq_puts(m, " ftrace");
  1400. if (status & EVENT_STATUS_PERF)
  1401. seq_puts(m, " perf");
  1402. if (status & EVENT_STATUS_OTHER)
  1403. seq_puts(m, " other");
  1404. busy++;
  1405. }
  1406. seq_puts(m, "\n");
  1407. active++;
  1408. }
  1409. mutex_unlock(&group->reg_mutex);
  1410. seq_puts(m, "\n");
  1411. seq_printf(m, "Active: %d\n", active);
  1412. seq_printf(m, "Busy: %d\n", busy);
  1413. seq_printf(m, "Max: %ld\n", MAX_EVENTS);
  1414. return 0;
  1415. }
  1416. static const struct seq_operations user_seq_ops = {
  1417. .start = user_seq_start,
  1418. .next = user_seq_next,
  1419. .stop = user_seq_stop,
  1420. .show = user_seq_show,
  1421. };
  1422. static int user_status_open(struct inode *node, struct file *file)
  1423. {
  1424. struct user_event_group *group;
  1425. int ret;
  1426. group = current_user_event_group();
  1427. if (!group)
  1428. return -ENOENT;
  1429. ret = seq_open(file, &user_seq_ops);
  1430. if (!ret) {
  1431. /* Chain group to seq_file */
  1432. struct seq_file *m = file->private_data;
  1433. m->private = group;
  1434. }
  1435. return ret;
  1436. }
  1437. static const struct file_operations user_status_fops = {
  1438. .open = user_status_open,
  1439. .mmap = user_status_mmap,
  1440. .read = seq_read,
  1441. .llseek = seq_lseek,
  1442. .release = seq_release,
  1443. };
  1444. /*
  1445. * Creates a set of tracefs files to allow user mode interactions.
  1446. */
  1447. static int create_user_tracefs(void)
  1448. {
  1449. struct dentry *edata, *emmap;
  1450. edata = tracefs_create_file("user_events_data", TRACE_MODE_WRITE,
  1451. NULL, NULL, &user_data_fops);
  1452. if (!edata) {
  1453. pr_warn("Could not create tracefs 'user_events_data' entry\n");
  1454. goto err;
  1455. }
  1456. /* mmap with MAP_SHARED requires writable fd */
  1457. emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE,
  1458. NULL, NULL, &user_status_fops);
  1459. if (!emmap) {
  1460. tracefs_remove(edata);
  1461. pr_warn("Could not create tracefs 'user_events_mmap' entry\n");
  1462. goto err;
  1463. }
  1464. return 0;
  1465. err:
  1466. return -ENODEV;
  1467. }
  1468. static int __init trace_events_user_init(void)
  1469. {
  1470. int ret;
  1471. init_group = user_event_group_create(&init_user_ns);
  1472. if (!init_group)
  1473. return -ENOMEM;
  1474. ret = create_user_tracefs();
  1475. if (ret) {
  1476. pr_warn("user_events could not register with tracefs\n");
  1477. user_event_group_destroy(init_group);
  1478. init_group = NULL;
  1479. return ret;
  1480. }
  1481. if (dyn_event_register(&user_event_dops))
  1482. pr_warn("user_events could not register with dyn_events\n");
  1483. return 0;
  1484. }
  1485. fs_initcall(trace_events_user_init);