syscalls.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. #include <linux/compat.h>
  3. #include <linux/syscalls.h>
  4. #include <linux/time_namespace.h>
  5. #include "futex.h"
  6. #include <trace/hooks/futex.h>
  7. /*
  8. * Support for robust futexes: the kernel cleans up held futexes at
  9. * thread exit time.
  10. *
  11. * Implementation: user-space maintains a per-thread list of locks it
  12. * is holding. Upon do_exit(), the kernel carefully walks this list,
  13. * and marks all locks that are owned by this thread with the
  14. * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
  15. * always manipulated with the lock held, so the list is private and
  16. * per-thread. Userspace also maintains a per-thread 'list_op_pending'
  17. * field, to allow the kernel to clean up if the thread dies after
  18. * acquiring the lock, but just before it could have added itself to
  19. * the list. There can only be one such pending lock.
  20. */
  21. /**
  22. * sys_set_robust_list() - Set the robust-futex list head of a task
  23. * @head: pointer to the list-head
  24. * @len: length of the list-head, as userspace expects
  25. */
  26. SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
  27. size_t, len)
  28. {
  29. /*
  30. * The kernel knows only one size for now:
  31. */
  32. if (unlikely(len != sizeof(*head)))
  33. return -EINVAL;
  34. current->robust_list = head;
  35. return 0;
  36. }
  37. /**
  38. * sys_get_robust_list() - Get the robust-futex list head of a task
  39. * @pid: pid of the process [zero for current task]
  40. * @head_ptr: pointer to a list-head pointer, the kernel fills it in
  41. * @len_ptr: pointer to a length field, the kernel fills in the header size
  42. */
  43. SYSCALL_DEFINE3(get_robust_list, int, pid,
  44. struct robust_list_head __user * __user *, head_ptr,
  45. size_t __user *, len_ptr)
  46. {
  47. struct robust_list_head __user *head;
  48. unsigned long ret;
  49. struct task_struct *p;
  50. rcu_read_lock();
  51. ret = -ESRCH;
  52. if (!pid)
  53. p = current;
  54. else {
  55. p = find_task_by_vpid(pid);
  56. if (!p)
  57. goto err_unlock;
  58. }
  59. ret = -EPERM;
  60. if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
  61. goto err_unlock;
  62. head = p->robust_list;
  63. rcu_read_unlock();
  64. if (put_user(sizeof(*head), len_ptr))
  65. return -EFAULT;
  66. return put_user(head, head_ptr);
  67. err_unlock:
  68. rcu_read_unlock();
  69. return ret;
  70. }
  71. long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
  72. u32 __user *uaddr2, u32 val2, u32 val3)
  73. {
  74. int cmd = op & FUTEX_CMD_MASK;
  75. unsigned int flags = 0;
  76. if (!(op & FUTEX_PRIVATE_FLAG))
  77. flags |= FLAGS_SHARED;
  78. if (op & FUTEX_CLOCK_REALTIME) {
  79. flags |= FLAGS_CLOCKRT;
  80. if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
  81. cmd != FUTEX_LOCK_PI2)
  82. return -ENOSYS;
  83. }
  84. trace_android_vh_do_futex(cmd, &flags, uaddr2);
  85. switch (cmd) {
  86. case FUTEX_WAIT:
  87. val3 = FUTEX_BITSET_MATCH_ANY;
  88. fallthrough;
  89. case FUTEX_WAIT_BITSET:
  90. return futex_wait(uaddr, flags, val, timeout, val3);
  91. case FUTEX_WAKE:
  92. val3 = FUTEX_BITSET_MATCH_ANY;
  93. fallthrough;
  94. case FUTEX_WAKE_BITSET:
  95. return futex_wake(uaddr, flags, val, val3);
  96. case FUTEX_REQUEUE:
  97. return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
  98. case FUTEX_CMP_REQUEUE:
  99. return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
  100. case FUTEX_WAKE_OP:
  101. return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
  102. case FUTEX_LOCK_PI:
  103. flags |= FLAGS_CLOCKRT;
  104. fallthrough;
  105. case FUTEX_LOCK_PI2:
  106. return futex_lock_pi(uaddr, flags, timeout, 0);
  107. case FUTEX_UNLOCK_PI:
  108. return futex_unlock_pi(uaddr, flags);
  109. case FUTEX_TRYLOCK_PI:
  110. return futex_lock_pi(uaddr, flags, NULL, 1);
  111. case FUTEX_WAIT_REQUEUE_PI:
  112. val3 = FUTEX_BITSET_MATCH_ANY;
  113. return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
  114. uaddr2);
  115. case FUTEX_CMP_REQUEUE_PI:
  116. return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
  117. }
  118. return -ENOSYS;
  119. }
  120. static __always_inline bool futex_cmd_has_timeout(u32 cmd)
  121. {
  122. switch (cmd) {
  123. case FUTEX_WAIT:
  124. case FUTEX_LOCK_PI:
  125. case FUTEX_LOCK_PI2:
  126. case FUTEX_WAIT_BITSET:
  127. case FUTEX_WAIT_REQUEUE_PI:
  128. return true;
  129. }
  130. return false;
  131. }
  132. static __always_inline int
  133. futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
  134. {
  135. if (!timespec64_valid(ts))
  136. return -EINVAL;
  137. *t = timespec64_to_ktime(*ts);
  138. if (cmd == FUTEX_WAIT)
  139. *t = ktime_add_safe(ktime_get(), *t);
  140. else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
  141. *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
  142. return 0;
  143. }
  144. SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
  145. const struct __kernel_timespec __user *, utime,
  146. u32 __user *, uaddr2, u32, val3)
  147. {
  148. int ret, cmd = op & FUTEX_CMD_MASK;
  149. ktime_t t, *tp = NULL;
  150. struct timespec64 ts;
  151. if (utime && futex_cmd_has_timeout(cmd)) {
  152. if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
  153. return -EFAULT;
  154. if (get_timespec64(&ts, utime))
  155. return -EFAULT;
  156. ret = futex_init_timeout(cmd, op, &ts, &t);
  157. if (ret)
  158. return ret;
  159. tp = &t;
  160. }
  161. return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
  162. }
  163. /* Mask of available flags for each futex in futex_waitv list */
  164. #define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
  165. /**
  166. * futex_parse_waitv - Parse a waitv array from userspace
  167. * @futexv: Kernel side list of waiters to be filled
  168. * @uwaitv: Userspace list to be parsed
  169. * @nr_futexes: Length of futexv
  170. *
  171. * Return: Error code on failure, 0 on success
  172. */
  173. static int futex_parse_waitv(struct futex_vector *futexv,
  174. struct futex_waitv __user *uwaitv,
  175. unsigned int nr_futexes)
  176. {
  177. struct futex_waitv aux;
  178. unsigned int i;
  179. for (i = 0; i < nr_futexes; i++) {
  180. if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
  181. return -EFAULT;
  182. if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
  183. return -EINVAL;
  184. if (!(aux.flags & FUTEX_32))
  185. return -EINVAL;
  186. futexv[i].w.flags = aux.flags;
  187. futexv[i].w.val = aux.val;
  188. futexv[i].w.uaddr = aux.uaddr;
  189. futexv[i].q = futex_q_init;
  190. }
  191. return 0;
  192. }
  193. /**
  194. * sys_futex_waitv - Wait on a list of futexes
  195. * @waiters: List of futexes to wait on
  196. * @nr_futexes: Length of futexv
  197. * @flags: Flag for timeout (monotonic/realtime)
  198. * @timeout: Optional absolute timeout.
  199. * @clockid: Clock to be used for the timeout, realtime or monotonic.
  200. *
  201. * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
  202. * if a futex_wake() is performed at any uaddr. The syscall returns immediately
  203. * if any waiter has *uaddr != val. *timeout is an optional timeout value for
  204. * the operation. Each waiter has individual flags. The `flags` argument for
  205. * the syscall should be used solely for specifying the timeout as realtime, if
  206. * needed. Flags for private futexes, sizes, etc. should be used on the
  207. * individual flags of each waiter.
  208. *
  209. * Returns the array index of one of the woken futexes. No further information
  210. * is provided: any number of other futexes may also have been woken by the
  211. * same event, and if more than one futex was woken, the retrned index may
  212. * refer to any one of them. (It is not necessaryily the futex with the
  213. * smallest index, nor the one most recently woken, nor...)
  214. */
  215. SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
  216. unsigned int, nr_futexes, unsigned int, flags,
  217. struct __kernel_timespec __user *, timeout, clockid_t, clockid)
  218. {
  219. struct hrtimer_sleeper to;
  220. struct futex_vector *futexv;
  221. struct timespec64 ts;
  222. ktime_t time;
  223. int ret;
  224. /* This syscall supports no flags for now */
  225. if (flags)
  226. return -EINVAL;
  227. if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
  228. return -EINVAL;
  229. if (timeout) {
  230. int flag_clkid = 0, flag_init = 0;
  231. if (clockid == CLOCK_REALTIME) {
  232. flag_clkid = FLAGS_CLOCKRT;
  233. flag_init = FUTEX_CLOCK_REALTIME;
  234. }
  235. if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
  236. return -EINVAL;
  237. if (get_timespec64(&ts, timeout))
  238. return -EFAULT;
  239. /*
  240. * Since there's no opcode for futex_waitv, use
  241. * FUTEX_WAIT_BITSET that uses absolute timeout as well
  242. */
  243. ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
  244. if (ret)
  245. return ret;
  246. futex_setup_timer(&time, &to, flag_clkid, 0);
  247. }
  248. futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
  249. if (!futexv) {
  250. ret = -ENOMEM;
  251. goto destroy_timer;
  252. }
  253. ret = futex_parse_waitv(futexv, waiters, nr_futexes);
  254. if (!ret)
  255. ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
  256. kfree(futexv);
  257. destroy_timer:
  258. if (timeout) {
  259. hrtimer_cancel(&to.timer);
  260. destroy_hrtimer_on_stack(&to.timer);
  261. }
  262. return ret;
  263. }
  264. #ifdef CONFIG_COMPAT
  265. COMPAT_SYSCALL_DEFINE2(set_robust_list,
  266. struct compat_robust_list_head __user *, head,
  267. compat_size_t, len)
  268. {
  269. if (unlikely(len != sizeof(*head)))
  270. return -EINVAL;
  271. current->compat_robust_list = head;
  272. return 0;
  273. }
  274. COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
  275. compat_uptr_t __user *, head_ptr,
  276. compat_size_t __user *, len_ptr)
  277. {
  278. struct compat_robust_list_head __user *head;
  279. unsigned long ret;
  280. struct task_struct *p;
  281. rcu_read_lock();
  282. ret = -ESRCH;
  283. if (!pid)
  284. p = current;
  285. else {
  286. p = find_task_by_vpid(pid);
  287. if (!p)
  288. goto err_unlock;
  289. }
  290. ret = -EPERM;
  291. if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
  292. goto err_unlock;
  293. head = p->compat_robust_list;
  294. rcu_read_unlock();
  295. if (put_user(sizeof(*head), len_ptr))
  296. return -EFAULT;
  297. return put_user(ptr_to_compat(head), head_ptr);
  298. err_unlock:
  299. rcu_read_unlock();
  300. return ret;
  301. }
  302. #endif /* CONFIG_COMPAT */
  303. #ifdef CONFIG_COMPAT_32BIT_TIME
  304. SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
  305. const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
  306. u32, val3)
  307. {
  308. int ret, cmd = op & FUTEX_CMD_MASK;
  309. ktime_t t, *tp = NULL;
  310. struct timespec64 ts;
  311. if (utime && futex_cmd_has_timeout(cmd)) {
  312. if (get_old_timespec32(&ts, utime))
  313. return -EFAULT;
  314. ret = futex_init_timeout(cmd, op, &ts, &t);
  315. if (ret)
  316. return ret;
  317. tp = &t;
  318. }
  319. return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
  320. }
  321. #endif /* CONFIG_COMPAT_32BIT_TIME */