posix-cpu-timers.c 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Implement CPU time clocks for the POSIX clock interface.
  4. */
  5. #include <linux/sched/signal.h>
  6. #include <linux/sched/cputime.h>
  7. #include <linux/posix-timers.h>
  8. #include <linux/errno.h>
  9. #include <linux/math64.h>
  10. #include <linux/uaccess.h>
  11. #include <linux/kernel_stat.h>
  12. #include <trace/events/timer.h>
  13. #include <linux/tick.h>
  14. #include <linux/workqueue.h>
  15. #include <linux/compat.h>
  16. #include <linux/sched/deadline.h>
  17. #include <linux/task_work.h>
  18. #include "posix-timers.h"
  19. static void posix_cpu_timer_rearm(struct k_itimer *timer);
  20. void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
  21. {
  22. posix_cputimers_init(pct);
  23. if (cpu_limit != RLIM_INFINITY) {
  24. pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
  25. pct->timers_active = true;
  26. }
  27. }
  28. /*
  29. * Called after updating RLIMIT_CPU to run cpu timer and update
  30. * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
  31. * necessary. Needs siglock protection since other code may update the
  32. * expiration cache as well.
  33. *
  34. * Returns 0 on success, -ESRCH on failure. Can fail if the task is exiting and
  35. * we cannot lock_task_sighand. Cannot fail if task is current.
  36. */
  37. int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
  38. {
  39. u64 nsecs = rlim_new * NSEC_PER_SEC;
  40. unsigned long irq_fl;
  41. if (!lock_task_sighand(task, &irq_fl))
  42. return -ESRCH;
  43. set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
  44. unlock_task_sighand(task, &irq_fl);
  45. return 0;
  46. }
  47. /*
  48. * Functions for validating access to tasks.
  49. */
  50. static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
  51. {
  52. const bool thread = !!CPUCLOCK_PERTHREAD(clock);
  53. const pid_t upid = CPUCLOCK_PID(clock);
  54. struct pid *pid;
  55. if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
  56. return NULL;
  57. /*
  58. * If the encoded PID is 0, then the timer is targeted at current
  59. * or the process to which current belongs.
  60. */
  61. if (upid == 0)
  62. return thread ? task_pid(current) : task_tgid(current);
  63. pid = find_vpid(upid);
  64. if (!pid)
  65. return NULL;
  66. if (thread) {
  67. struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);
  68. return (tsk && same_thread_group(tsk, current)) ? pid : NULL;
  69. }
  70. /*
  71. * For clock_gettime(PROCESS) allow finding the process by
  72. * with the pid of the current task. The code needs the tgid
  73. * of the process so that pid_task(pid, PIDTYPE_TGID) can be
  74. * used to find the process.
  75. */
  76. if (gettime && (pid == task_pid(current)))
  77. return task_tgid(current);
  78. /*
  79. * For processes require that pid identifies a process.
  80. */
  81. return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
  82. }
  83. static inline int validate_clock_permissions(const clockid_t clock)
  84. {
  85. int ret;
  86. rcu_read_lock();
  87. ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
  88. rcu_read_unlock();
  89. return ret;
  90. }
  91. static inline enum pid_type clock_pid_type(const clockid_t clock)
  92. {
  93. return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;
  94. }
  95. static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)
  96. {
  97. return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));
  98. }
  99. /*
  100. * Update expiry time from increment, and increase overrun count,
  101. * given the current clock sample.
  102. */
  103. static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
  104. {
  105. u64 delta, incr, expires = timer->it.cpu.node.expires;
  106. int i;
  107. if (!timer->it_interval)
  108. return expires;
  109. if (now < expires)
  110. return expires;
  111. incr = timer->it_interval;
  112. delta = now + incr - expires;
  113. /* Don't use (incr*2 < delta), incr*2 might overflow. */
  114. for (i = 0; incr < delta - incr; i++)
  115. incr = incr << 1;
  116. for (; i >= 0; incr >>= 1, i--) {
  117. if (delta < incr)
  118. continue;
  119. timer->it.cpu.node.expires += incr;
  120. timer->it_overrun += 1LL << i;
  121. delta -= incr;
  122. }
  123. return timer->it.cpu.node.expires;
  124. }
  125. /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
  126. static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
  127. {
  128. return !(~pct->bases[CPUCLOCK_PROF].nextevt |
  129. ~pct->bases[CPUCLOCK_VIRT].nextevt |
  130. ~pct->bases[CPUCLOCK_SCHED].nextevt);
  131. }
  132. static int
  133. posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
  134. {
  135. int error = validate_clock_permissions(which_clock);
  136. if (!error) {
  137. tp->tv_sec = 0;
  138. tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
  139. if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
  140. /*
  141. * If sched_clock is using a cycle counter, we
  142. * don't have any idea of its true resolution
  143. * exported, but it is much more than 1s/HZ.
  144. */
  145. tp->tv_nsec = 1;
  146. }
  147. }
  148. return error;
  149. }
  150. static int
  151. posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
  152. {
  153. int error = validate_clock_permissions(clock);
  154. /*
  155. * You can never reset a CPU clock, but we check for other errors
  156. * in the call before failing with EPERM.
  157. */
  158. return error ? : -EPERM;
  159. }
  160. /*
  161. * Sample a per-thread clock for the given task. clkid is validated.
  162. */
  163. static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
  164. {
  165. u64 utime, stime;
  166. if (clkid == CPUCLOCK_SCHED)
  167. return task_sched_runtime(p);
  168. task_cputime(p, &utime, &stime);
  169. switch (clkid) {
  170. case CPUCLOCK_PROF:
  171. return utime + stime;
  172. case CPUCLOCK_VIRT:
  173. return utime;
  174. default:
  175. WARN_ON_ONCE(1);
  176. }
  177. return 0;
  178. }
  179. static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
  180. {
  181. samples[CPUCLOCK_PROF] = stime + utime;
  182. samples[CPUCLOCK_VIRT] = utime;
  183. samples[CPUCLOCK_SCHED] = rtime;
  184. }
  185. static void task_sample_cputime(struct task_struct *p, u64 *samples)
  186. {
  187. u64 stime, utime;
  188. task_cputime(p, &utime, &stime);
  189. store_samples(samples, stime, utime, p->se.sum_exec_runtime);
  190. }
  191. static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
  192. u64 *samples)
  193. {
  194. u64 stime, utime, rtime;
  195. utime = atomic64_read(&at->utime);
  196. stime = atomic64_read(&at->stime);
  197. rtime = atomic64_read(&at->sum_exec_runtime);
  198. store_samples(samples, stime, utime, rtime);
  199. }
  200. /*
  201. * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
  202. * to avoid race conditions with concurrent updates to cputime.
  203. */
  204. static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
  205. {
  206. u64 curr_cputime;
  207. retry:
  208. curr_cputime = atomic64_read(cputime);
  209. if (sum_cputime > curr_cputime) {
  210. if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
  211. goto retry;
  212. }
  213. }
  214. static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
  215. struct task_cputime *sum)
  216. {
  217. __update_gt_cputime(&cputime_atomic->utime, sum->utime);
  218. __update_gt_cputime(&cputime_atomic->stime, sum->stime);
  219. __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
  220. }
  221. /**
  222. * thread_group_sample_cputime - Sample cputime for a given task
  223. * @tsk: Task for which cputime needs to be started
  224. * @samples: Storage for time samples
  225. *
  226. * Called from sys_getitimer() to calculate the expiry time of an active
  227. * timer. That means group cputime accounting is already active. Called
  228. * with task sighand lock held.
  229. *
  230. * Updates @times with an uptodate sample of the thread group cputimes.
  231. */
  232. void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
  233. {
  234. struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
  235. struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
  236. WARN_ON_ONCE(!pct->timers_active);
  237. proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
  238. }
  239. /**
  240. * thread_group_start_cputime - Start cputime and return a sample
  241. * @tsk: Task for which cputime needs to be started
  242. * @samples: Storage for time samples
  243. *
  244. * The thread group cputime accounting is avoided when there are no posix
  245. * CPU timers armed. Before starting a timer it's required to check whether
  246. * the time accounting is active. If not, a full update of the atomic
  247. * accounting store needs to be done and the accounting enabled.
  248. *
  249. * Updates @times with an uptodate sample of the thread group cputimes.
  250. */
  251. static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
  252. {
  253. struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
  254. struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
  255. lockdep_assert_task_sighand_held(tsk);
  256. /* Check if cputimer isn't running. This is accessed without locking. */
  257. if (!READ_ONCE(pct->timers_active)) {
  258. struct task_cputime sum;
  259. /*
  260. * The POSIX timer interface allows for absolute time expiry
  261. * values through the TIMER_ABSTIME flag, therefore we have
  262. * to synchronize the timer to the clock every time we start it.
  263. */
  264. thread_group_cputime(tsk, &sum);
  265. update_gt_cputime(&cputimer->cputime_atomic, &sum);
  266. /*
  267. * We're setting timers_active without a lock. Ensure this
  268. * only gets written to in one operation. We set it after
  269. * update_gt_cputime() as a small optimization, but
  270. * barriers are not required because update_gt_cputime()
  271. * can handle concurrent updates.
  272. */
  273. WRITE_ONCE(pct->timers_active, true);
  274. }
  275. proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
  276. }
  277. static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
  278. {
  279. struct task_cputime ct;
  280. thread_group_cputime(tsk, &ct);
  281. store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
  282. }
  283. /*
  284. * Sample a process (thread group) clock for the given task clkid. If the
  285. * group's cputime accounting is already enabled, read the atomic
  286. * store. Otherwise a full update is required. clkid is already validated.
  287. */
  288. static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
  289. bool start)
  290. {
  291. struct thread_group_cputimer *cputimer = &p->signal->cputimer;
  292. struct posix_cputimers *pct = &p->signal->posix_cputimers;
  293. u64 samples[CPUCLOCK_MAX];
  294. if (!READ_ONCE(pct->timers_active)) {
  295. if (start)
  296. thread_group_start_cputime(p, samples);
  297. else
  298. __thread_group_cputime(p, samples);
  299. } else {
  300. proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
  301. }
  302. return samples[clkid];
  303. }
  304. static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
  305. {
  306. const clockid_t clkid = CPUCLOCK_WHICH(clock);
  307. struct task_struct *tsk;
  308. u64 t;
  309. rcu_read_lock();
  310. tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));
  311. if (!tsk) {
  312. rcu_read_unlock();
  313. return -EINVAL;
  314. }
  315. if (CPUCLOCK_PERTHREAD(clock))
  316. t = cpu_clock_sample(clkid, tsk);
  317. else
  318. t = cpu_clock_sample_group(clkid, tsk, false);
  319. rcu_read_unlock();
  320. *tp = ns_to_timespec64(t);
  321. return 0;
  322. }
  323. /*
  324. * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
  325. * This is called from sys_timer_create() and do_cpu_nanosleep() with the
  326. * new timer already all-zeros initialized.
  327. */
  328. static int posix_cpu_timer_create(struct k_itimer *new_timer)
  329. {
  330. static struct lock_class_key posix_cpu_timers_key;
  331. struct pid *pid;
  332. rcu_read_lock();
  333. pid = pid_for_clock(new_timer->it_clock, false);
  334. if (!pid) {
  335. rcu_read_unlock();
  336. return -EINVAL;
  337. }
  338. /*
  339. * If posix timer expiry is handled in task work context then
  340. * timer::it_lock can be taken without disabling interrupts as all
  341. * other locking happens in task context. This requires a separate
  342. * lock class key otherwise regular posix timer expiry would record
  343. * the lock class being taken in interrupt context and generate a
  344. * false positive warning.
  345. */
  346. if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))
  347. lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);
  348. new_timer->kclock = &clock_posix_cpu;
  349. timerqueue_init(&new_timer->it.cpu.node);
  350. new_timer->it.cpu.pid = get_pid(pid);
  351. rcu_read_unlock();
  352. return 0;
  353. }
  354. static struct posix_cputimer_base *timer_base(struct k_itimer *timer,
  355. struct task_struct *tsk)
  356. {
  357. int clkidx = CPUCLOCK_WHICH(timer->it_clock);
  358. if (CPUCLOCK_PERTHREAD(timer->it_clock))
  359. return tsk->posix_cputimers.bases + clkidx;
  360. else
  361. return tsk->signal->posix_cputimers.bases + clkidx;
  362. }
  363. /*
  364. * Force recalculating the base earliest expiration on the next tick.
  365. * This will also re-evaluate the need to keep around the process wide
  366. * cputime counter and tick dependency and eventually shut these down
  367. * if necessary.
  368. */
  369. static void trigger_base_recalc_expires(struct k_itimer *timer,
  370. struct task_struct *tsk)
  371. {
  372. struct posix_cputimer_base *base = timer_base(timer, tsk);
  373. base->nextevt = 0;
  374. }
  375. /*
  376. * Dequeue the timer and reset the base if it was its earliest expiration.
  377. * It makes sure the next tick recalculates the base next expiration so we
  378. * don't keep the costly process wide cputime counter around for a random
  379. * amount of time, along with the tick dependency.
  380. *
  381. * If another timer gets queued between this and the next tick, its
  382. * expiration will update the base next event if necessary on the next
  383. * tick.
  384. */
  385. static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
  386. {
  387. struct cpu_timer *ctmr = &timer->it.cpu;
  388. struct posix_cputimer_base *base;
  389. if (!cpu_timer_dequeue(ctmr))
  390. return;
  391. base = timer_base(timer, p);
  392. if (cpu_timer_getexpires(ctmr) == base->nextevt)
  393. trigger_base_recalc_expires(timer, p);
  394. }
  395. /*
  396. * Clean up a CPU-clock timer that is about to be destroyed.
  397. * This is called from timer deletion with the timer already locked.
  398. * If we return TIMER_RETRY, it's necessary to release the timer's lock
  399. * and try again. (This happens when the timer is in the middle of firing.)
  400. */
  401. static int posix_cpu_timer_del(struct k_itimer *timer)
  402. {
  403. struct cpu_timer *ctmr = &timer->it.cpu;
  404. struct sighand_struct *sighand;
  405. struct task_struct *p;
  406. unsigned long flags;
  407. int ret = 0;
  408. rcu_read_lock();
  409. p = cpu_timer_task_rcu(timer);
  410. if (!p)
  411. goto out;
  412. /*
  413. * Protect against sighand release/switch in exit/exec and process/
  414. * thread timer list entry concurrent read/writes.
  415. */
  416. sighand = lock_task_sighand(p, &flags);
  417. if (unlikely(sighand == NULL)) {
  418. /*
  419. * This raced with the reaping of the task. The exit cleanup
  420. * should have removed this timer from the timer queue.
  421. */
  422. WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
  423. } else {
  424. if (timer->it.cpu.firing)
  425. ret = TIMER_RETRY;
  426. else
  427. disarm_timer(timer, p);
  428. unlock_task_sighand(p, &flags);
  429. }
  430. out:
  431. rcu_read_unlock();
  432. if (!ret)
  433. put_pid(ctmr->pid);
  434. return ret;
  435. }
  436. static void cleanup_timerqueue(struct timerqueue_head *head)
  437. {
  438. struct timerqueue_node *node;
  439. struct cpu_timer *ctmr;
  440. while ((node = timerqueue_getnext(head))) {
  441. timerqueue_del(head, node);
  442. ctmr = container_of(node, struct cpu_timer, node);
  443. ctmr->head = NULL;
  444. }
  445. }
  446. /*
  447. * Clean out CPU timers which are still armed when a thread exits. The
  448. * timers are only removed from the list. No other updates are done. The
  449. * corresponding posix timers are still accessible, but cannot be rearmed.
  450. *
  451. * This must be called with the siglock held.
  452. */
  453. static void cleanup_timers(struct posix_cputimers *pct)
  454. {
  455. cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
  456. cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
  457. cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
  458. }
  459. /*
  460. * These are both called with the siglock held, when the current thread
  461. * is being reaped. When the final (leader) thread in the group is reaped,
  462. * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
  463. */
  464. void posix_cpu_timers_exit(struct task_struct *tsk)
  465. {
  466. cleanup_timers(&tsk->posix_cputimers);
  467. }
  468. void posix_cpu_timers_exit_group(struct task_struct *tsk)
  469. {
  470. cleanup_timers(&tsk->signal->posix_cputimers);
  471. }
  472. /*
  473. * Insert the timer on the appropriate list before any timers that
  474. * expire later. This must be called with the sighand lock held.
  475. */
  476. static void arm_timer(struct k_itimer *timer, struct task_struct *p)
  477. {
  478. struct posix_cputimer_base *base = timer_base(timer, p);
  479. struct cpu_timer *ctmr = &timer->it.cpu;
  480. u64 newexp = cpu_timer_getexpires(ctmr);
  481. if (!cpu_timer_enqueue(&base->tqhead, ctmr))
  482. return;
  483. /*
  484. * We are the new earliest-expiring POSIX 1.b timer, hence
  485. * need to update expiration cache. Take into account that
  486. * for process timers we share expiration cache with itimers
  487. * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
  488. */
  489. if (newexp < base->nextevt)
  490. base->nextevt = newexp;
  491. if (CPUCLOCK_PERTHREAD(timer->it_clock))
  492. tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
  493. else
  494. tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
  495. }
  496. /*
  497. * The timer is locked, fire it and arrange for its reload.
  498. */
  499. static void cpu_timer_fire(struct k_itimer *timer)
  500. {
  501. struct cpu_timer *ctmr = &timer->it.cpu;
  502. if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
  503. /*
  504. * User don't want any signal.
  505. */
  506. cpu_timer_setexpires(ctmr, 0);
  507. } else if (unlikely(timer->sigq == NULL)) {
  508. /*
  509. * This a special case for clock_nanosleep,
  510. * not a normal timer from sys_timer_create.
  511. */
  512. wake_up_process(timer->it_process);
  513. cpu_timer_setexpires(ctmr, 0);
  514. } else if (!timer->it_interval) {
  515. /*
  516. * One-shot timer. Clear it as soon as it's fired.
  517. */
  518. posix_timer_event(timer, 0);
  519. cpu_timer_setexpires(ctmr, 0);
  520. } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
  521. /*
  522. * The signal did not get queued because the signal
  523. * was ignored, so we won't get any callback to
  524. * reload the timer. But we need to keep it
  525. * ticking in case the signal is deliverable next time.
  526. */
  527. posix_cpu_timer_rearm(timer);
  528. ++timer->it_requeue_pending;
  529. }
  530. }
  531. /*
  532. * Guts of sys_timer_settime for CPU timers.
  533. * This is called with the timer locked and interrupts disabled.
  534. * If we return TIMER_RETRY, it's necessary to release the timer's lock
  535. * and try again. (This happens when the timer is in the middle of firing.)
  536. */
  537. static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
  538. struct itimerspec64 *new, struct itimerspec64 *old)
  539. {
  540. clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
  541. u64 old_expires, new_expires, old_incr, val;
  542. struct cpu_timer *ctmr = &timer->it.cpu;
  543. struct sighand_struct *sighand;
  544. struct task_struct *p;
  545. unsigned long flags;
  546. int ret = 0;
  547. rcu_read_lock();
  548. p = cpu_timer_task_rcu(timer);
  549. if (!p) {
  550. /*
  551. * If p has just been reaped, we can no
  552. * longer get any information about it at all.
  553. */
  554. rcu_read_unlock();
  555. return -ESRCH;
  556. }
  557. /*
  558. * Use the to_ktime conversion because that clamps the maximum
  559. * value to KTIME_MAX and avoid multiplication overflows.
  560. */
  561. new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value));
  562. /*
  563. * Protect against sighand release/switch in exit/exec and p->cpu_timers
  564. * and p->signal->cpu_timers read/write in arm_timer()
  565. */
  566. sighand = lock_task_sighand(p, &flags);
  567. /*
  568. * If p has just been reaped, we can no
  569. * longer get any information about it at all.
  570. */
  571. if (unlikely(sighand == NULL)) {
  572. rcu_read_unlock();
  573. return -ESRCH;
  574. }
  575. /*
  576. * Disarm any old timer after extracting its expiry time.
  577. */
  578. old_incr = timer->it_interval;
  579. old_expires = cpu_timer_getexpires(ctmr);
  580. if (unlikely(timer->it.cpu.firing)) {
  581. timer->it.cpu.firing = -1;
  582. ret = TIMER_RETRY;
  583. } else {
  584. cpu_timer_dequeue(ctmr);
  585. }
  586. /*
  587. * We need to sample the current value to convert the new
  588. * value from to relative and absolute, and to convert the
  589. * old value from absolute to relative. To set a process
  590. * timer, we need a sample to balance the thread expiry
  591. * times (in arm_timer). With an absolute time, we must
  592. * check if it's already passed. In short, we need a sample.
  593. */
  594. if (CPUCLOCK_PERTHREAD(timer->it_clock))
  595. val = cpu_clock_sample(clkid, p);
  596. else
  597. val = cpu_clock_sample_group(clkid, p, true);
  598. if (old) {
  599. if (old_expires == 0) {
  600. old->it_value.tv_sec = 0;
  601. old->it_value.tv_nsec = 0;
  602. } else {
  603. /*
  604. * Update the timer in case it has overrun already.
  605. * If it has, we'll report it as having overrun and
  606. * with the next reloaded timer already ticking,
  607. * though we are swallowing that pending
  608. * notification here to install the new setting.
  609. */
  610. u64 exp = bump_cpu_timer(timer, val);
  611. if (val < exp) {
  612. old_expires = exp - val;
  613. old->it_value = ns_to_timespec64(old_expires);
  614. } else {
  615. old->it_value.tv_nsec = 1;
  616. old->it_value.tv_sec = 0;
  617. }
  618. }
  619. }
  620. if (unlikely(ret)) {
  621. /*
  622. * We are colliding with the timer actually firing.
  623. * Punt after filling in the timer's old value, and
  624. * disable this firing since we are already reporting
  625. * it as an overrun (thanks to bump_cpu_timer above).
  626. */
  627. unlock_task_sighand(p, &flags);
  628. goto out;
  629. }
  630. if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
  631. new_expires += val;
  632. }
  633. /*
  634. * Install the new expiry time (or zero).
  635. * For a timer with no notification action, we don't actually
  636. * arm the timer (we'll just fake it for timer_gettime).
  637. */
  638. cpu_timer_setexpires(ctmr, new_expires);
  639. if (new_expires != 0 && val < new_expires) {
  640. arm_timer(timer, p);
  641. }
  642. unlock_task_sighand(p, &flags);
  643. /*
  644. * Install the new reload setting, and
  645. * set up the signal and overrun bookkeeping.
  646. */
  647. timer->it_interval = timespec64_to_ktime(new->it_interval);
  648. /*
  649. * This acts as a modification timestamp for the timer,
  650. * so any automatic reload attempt will punt on seeing
  651. * that we have reset the timer manually.
  652. */
  653. timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
  654. ~REQUEUE_PENDING;
  655. timer->it_overrun_last = 0;
  656. timer->it_overrun = -1;
  657. if (val >= new_expires) {
  658. if (new_expires != 0) {
  659. /*
  660. * The designated time already passed, so we notify
  661. * immediately, even if the thread never runs to
  662. * accumulate more time on this clock.
  663. */
  664. cpu_timer_fire(timer);
  665. }
  666. /*
  667. * Make sure we don't keep around the process wide cputime
  668. * counter or the tick dependency if they are not necessary.
  669. */
  670. sighand = lock_task_sighand(p, &flags);
  671. if (!sighand)
  672. goto out;
  673. if (!cpu_timer_queued(ctmr))
  674. trigger_base_recalc_expires(timer, p);
  675. unlock_task_sighand(p, &flags);
  676. }
  677. out:
  678. rcu_read_unlock();
  679. if (old)
  680. old->it_interval = ns_to_timespec64(old_incr);
  681. return ret;
  682. }
  683. static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
  684. {
  685. clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
  686. struct cpu_timer *ctmr = &timer->it.cpu;
  687. u64 now, expires = cpu_timer_getexpires(ctmr);
  688. struct task_struct *p;
  689. rcu_read_lock();
  690. p = cpu_timer_task_rcu(timer);
  691. if (!p)
  692. goto out;
  693. /*
  694. * Easy part: convert the reload time.
  695. */
  696. itp->it_interval = ktime_to_timespec64(timer->it_interval);
  697. if (!expires)
  698. goto out;
  699. /*
  700. * Sample the clock to take the difference with the expiry time.
  701. */
  702. if (CPUCLOCK_PERTHREAD(timer->it_clock))
  703. now = cpu_clock_sample(clkid, p);
  704. else
  705. now = cpu_clock_sample_group(clkid, p, false);
  706. if (now < expires) {
  707. itp->it_value = ns_to_timespec64(expires - now);
  708. } else {
  709. /*
  710. * The timer should have expired already, but the firing
  711. * hasn't taken place yet. Say it's just about to expire.
  712. */
  713. itp->it_value.tv_nsec = 1;
  714. itp->it_value.tv_sec = 0;
  715. }
  716. out:
  717. rcu_read_unlock();
  718. }
  719. #define MAX_COLLECTED 20
  720. static u64 collect_timerqueue(struct timerqueue_head *head,
  721. struct list_head *firing, u64 now)
  722. {
  723. struct timerqueue_node *next;
  724. int i = 0;
  725. while ((next = timerqueue_getnext(head))) {
  726. struct cpu_timer *ctmr;
  727. u64 expires;
  728. ctmr = container_of(next, struct cpu_timer, node);
  729. expires = cpu_timer_getexpires(ctmr);
  730. /* Limit the number of timers to expire at once */
  731. if (++i == MAX_COLLECTED || now < expires)
  732. return expires;
  733. ctmr->firing = 1;
  734. /* See posix_cpu_timer_wait_running() */
  735. rcu_assign_pointer(ctmr->handling, current);
  736. cpu_timer_dequeue(ctmr);
  737. list_add_tail(&ctmr->elist, firing);
  738. }
  739. return U64_MAX;
  740. }
  741. static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
  742. struct list_head *firing)
  743. {
  744. struct posix_cputimer_base *base = pct->bases;
  745. int i;
  746. for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
  747. base->nextevt = collect_timerqueue(&base->tqhead, firing,
  748. samples[i]);
  749. }
  750. }
  751. static inline void check_dl_overrun(struct task_struct *tsk)
  752. {
  753. if (tsk->dl.dl_overrun) {
  754. tsk->dl.dl_overrun = 0;
  755. send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
  756. }
  757. }
  758. static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
  759. {
  760. if (time < limit)
  761. return false;
  762. if (print_fatal_signals) {
  763. pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
  764. rt ? "RT" : "CPU", hard ? "hard" : "soft",
  765. current->comm, task_pid_nr(current));
  766. }
  767. send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID);
  768. return true;
  769. }
  770. /*
  771. * Check for any per-thread CPU timers that have fired and move them off
  772. * the tsk->cpu_timers[N] list onto the firing list. Here we update the
  773. * tsk->it_*_expires values to reflect the remaining thread CPU timers.
  774. */
  775. static void check_thread_timers(struct task_struct *tsk,
  776. struct list_head *firing)
  777. {
  778. struct posix_cputimers *pct = &tsk->posix_cputimers;
  779. u64 samples[CPUCLOCK_MAX];
  780. unsigned long soft;
  781. if (dl_task(tsk))
  782. check_dl_overrun(tsk);
  783. if (expiry_cache_is_inactive(pct))
  784. return;
  785. task_sample_cputime(tsk, samples);
  786. collect_posix_cputimers(pct, samples, firing);
  787. /*
  788. * Check for the special case thread timers.
  789. */
  790. soft = task_rlimit(tsk, RLIMIT_RTTIME);
  791. if (soft != RLIM_INFINITY) {
  792. /* Task RT timeout is accounted in jiffies. RTTIME is usec */
  793. unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
  794. unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
  795. /* At the hard limit, send SIGKILL. No further action. */
  796. if (hard != RLIM_INFINITY &&
  797. check_rlimit(rttime, hard, SIGKILL, true, true))
  798. return;
  799. /* At the soft limit, send a SIGXCPU every second */
  800. if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
  801. soft += USEC_PER_SEC;
  802. tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
  803. }
  804. }
  805. if (expiry_cache_is_inactive(pct))
  806. tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
  807. }
  808. static inline void stop_process_timers(struct signal_struct *sig)
  809. {
  810. struct posix_cputimers *pct = &sig->posix_cputimers;
  811. /* Turn off the active flag. This is done without locking. */
  812. WRITE_ONCE(pct->timers_active, false);
  813. tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
  814. }
  815. static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
  816. u64 *expires, u64 cur_time, int signo)
  817. {
  818. if (!it->expires)
  819. return;
  820. if (cur_time >= it->expires) {
  821. if (it->incr)
  822. it->expires += it->incr;
  823. else
  824. it->expires = 0;
  825. trace_itimer_expire(signo == SIGPROF ?
  826. ITIMER_PROF : ITIMER_VIRTUAL,
  827. task_tgid(tsk), cur_time);
  828. send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
  829. }
  830. if (it->expires && it->expires < *expires)
  831. *expires = it->expires;
  832. }
  833. /*
  834. * Check for any per-thread CPU timers that have fired and move them
  835. * off the tsk->*_timers list onto the firing list. Per-thread timers
  836. * have already been taken off.
  837. */
  838. static void check_process_timers(struct task_struct *tsk,
  839. struct list_head *firing)
  840. {
  841. struct signal_struct *const sig = tsk->signal;
  842. struct posix_cputimers *pct = &sig->posix_cputimers;
  843. u64 samples[CPUCLOCK_MAX];
  844. unsigned long soft;
  845. /*
  846. * If there are no active process wide timers (POSIX 1.b, itimers,
  847. * RLIMIT_CPU) nothing to check. Also skip the process wide timer
  848. * processing when there is already another task handling them.
  849. */
  850. if (!READ_ONCE(pct->timers_active) || pct->expiry_active)
  851. return;
  852. /*
  853. * Signify that a thread is checking for process timers.
  854. * Write access to this field is protected by the sighand lock.
  855. */
  856. pct->expiry_active = true;
  857. /*
  858. * Collect the current process totals. Group accounting is active
  859. * so the sample can be taken directly.
  860. */
  861. proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
  862. collect_posix_cputimers(pct, samples, firing);
  863. /*
  864. * Check for the special case process timers.
  865. */
  866. check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],
  867. &pct->bases[CPUCLOCK_PROF].nextevt,
  868. samples[CPUCLOCK_PROF], SIGPROF);
  869. check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],
  870. &pct->bases[CPUCLOCK_VIRT].nextevt,
  871. samples[CPUCLOCK_VIRT], SIGVTALRM);
  872. soft = task_rlimit(tsk, RLIMIT_CPU);
  873. if (soft != RLIM_INFINITY) {
  874. /* RLIMIT_CPU is in seconds. Samples are nanoseconds */
  875. unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
  876. u64 ptime = samples[CPUCLOCK_PROF];
  877. u64 softns = (u64)soft * NSEC_PER_SEC;
  878. u64 hardns = (u64)hard * NSEC_PER_SEC;
  879. /* At the hard limit, send SIGKILL. No further action. */
  880. if (hard != RLIM_INFINITY &&
  881. check_rlimit(ptime, hardns, SIGKILL, false, true))
  882. return;
  883. /* At the soft limit, send a SIGXCPU every second */
  884. if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
  885. sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
  886. softns += NSEC_PER_SEC;
  887. }
  888. /* Update the expiry cache */
  889. if (softns < pct->bases[CPUCLOCK_PROF].nextevt)
  890. pct->bases[CPUCLOCK_PROF].nextevt = softns;
  891. }
  892. if (expiry_cache_is_inactive(pct))
  893. stop_process_timers(sig);
  894. pct->expiry_active = false;
  895. }
  896. /*
  897. * This is called from the signal code (via posixtimer_rearm)
  898. * when the last timer signal was delivered and we have to reload the timer.
  899. */
  900. static void posix_cpu_timer_rearm(struct k_itimer *timer)
  901. {
  902. clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
  903. struct task_struct *p;
  904. struct sighand_struct *sighand;
  905. unsigned long flags;
  906. u64 now;
  907. rcu_read_lock();
  908. p = cpu_timer_task_rcu(timer);
  909. if (!p)
  910. goto out;
  911. /* Protect timer list r/w in arm_timer() */
  912. sighand = lock_task_sighand(p, &flags);
  913. if (unlikely(sighand == NULL))
  914. goto out;
  915. /*
  916. * Fetch the current sample and update the timer's expiry time.
  917. */
  918. if (CPUCLOCK_PERTHREAD(timer->it_clock))
  919. now = cpu_clock_sample(clkid, p);
  920. else
  921. now = cpu_clock_sample_group(clkid, p, true);
  922. bump_cpu_timer(timer, now);
  923. /*
  924. * Now re-arm for the new expiry time.
  925. */
  926. arm_timer(timer, p);
  927. unlock_task_sighand(p, &flags);
  928. out:
  929. rcu_read_unlock();
  930. }
  931. /**
  932. * task_cputimers_expired - Check whether posix CPU timers are expired
  933. *
  934. * @samples: Array of current samples for the CPUCLOCK clocks
  935. * @pct: Pointer to a posix_cputimers container
  936. *
  937. * Returns true if any member of @samples is greater than the corresponding
  938. * member of @pct->bases[CLK].nextevt. False otherwise
  939. */
  940. static inline bool
  941. task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
  942. {
  943. int i;
  944. for (i = 0; i < CPUCLOCK_MAX; i++) {
  945. if (samples[i] >= pct->bases[i].nextevt)
  946. return true;
  947. }
  948. return false;
  949. }
  950. /**
  951. * fastpath_timer_check - POSIX CPU timers fast path.
  952. *
  953. * @tsk: The task (thread) being checked.
  954. *
  955. * Check the task and thread group timers. If both are zero (there are no
  956. * timers set) return false. Otherwise snapshot the task and thread group
  957. * timers and compare them with the corresponding expiration times. Return
  958. * true if a timer has expired, else return false.
  959. */
  960. static inline bool fastpath_timer_check(struct task_struct *tsk)
  961. {
  962. struct posix_cputimers *pct = &tsk->posix_cputimers;
  963. struct signal_struct *sig;
  964. if (!expiry_cache_is_inactive(pct)) {
  965. u64 samples[CPUCLOCK_MAX];
  966. task_sample_cputime(tsk, samples);
  967. if (task_cputimers_expired(samples, pct))
  968. return true;
  969. }
  970. sig = tsk->signal;
  971. pct = &sig->posix_cputimers;
  972. /*
  973. * Check if thread group timers expired when timers are active and
  974. * no other thread in the group is already handling expiry for
  975. * thread group cputimers. These fields are read without the
  976. * sighand lock. However, this is fine because this is meant to be
  977. * a fastpath heuristic to determine whether we should try to
  978. * acquire the sighand lock to handle timer expiry.
  979. *
  980. * In the worst case scenario, if concurrently timers_active is set
  981. * or expiry_active is cleared, but the current thread doesn't see
  982. * the change yet, the timer checks are delayed until the next
  983. * thread in the group gets a scheduler interrupt to handle the
  984. * timer. This isn't an issue in practice because these types of
  985. * delays with signals actually getting sent are expected.
  986. */
  987. if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
  988. u64 samples[CPUCLOCK_MAX];
  989. proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
  990. samples);
  991. if (task_cputimers_expired(samples, pct))
  992. return true;
  993. }
  994. if (dl_task(tsk) && tsk->dl.dl_overrun)
  995. return true;
  996. return false;
  997. }
  998. static void handle_posix_cpu_timers(struct task_struct *tsk);
  999. #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
  1000. static void posix_cpu_timers_work(struct callback_head *work)
  1001. {
  1002. struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);
  1003. mutex_lock(&cw->mutex);
  1004. handle_posix_cpu_timers(current);
  1005. mutex_unlock(&cw->mutex);
  1006. }
  1007. /*
  1008. * Invoked from the posix-timer core when a cancel operation failed because
  1009. * the timer is marked firing. The caller holds rcu_read_lock(), which
  1010. * protects the timer and the task which is expiring it from being freed.
  1011. */
  1012. static void posix_cpu_timer_wait_running(struct k_itimer *timr)
  1013. {
  1014. struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);
  1015. /* Has the handling task completed expiry already? */
  1016. if (!tsk)
  1017. return;
  1018. /* Ensure that the task cannot go away */
  1019. get_task_struct(tsk);
  1020. /* Now drop the RCU protection so the mutex can be locked */
  1021. rcu_read_unlock();
  1022. /* Wait on the expiry mutex */
  1023. mutex_lock(&tsk->posix_cputimers_work.mutex);
  1024. /* Release it immediately again. */
  1025. mutex_unlock(&tsk->posix_cputimers_work.mutex);
  1026. /* Drop the task reference. */
  1027. put_task_struct(tsk);
  1028. /* Relock RCU so the callsite is balanced */
  1029. rcu_read_lock();
  1030. }
  1031. static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
  1032. {
  1033. /* Ensure that timr->it.cpu.handling task cannot go away */
  1034. rcu_read_lock();
  1035. spin_unlock_irq(&timr->it_lock);
  1036. posix_cpu_timer_wait_running(timr);
  1037. rcu_read_unlock();
  1038. /* @timr is on stack and is valid */
  1039. spin_lock_irq(&timr->it_lock);
  1040. }
  1041. /*
  1042. * Clear existing posix CPU timers task work.
  1043. */
  1044. void clear_posix_cputimers_work(struct task_struct *p)
  1045. {
  1046. /*
  1047. * A copied work entry from the old task is not meaningful, clear it.
  1048. * N.B. init_task_work will not do this.
  1049. */
  1050. memset(&p->posix_cputimers_work.work, 0,
  1051. sizeof(p->posix_cputimers_work.work));
  1052. init_task_work(&p->posix_cputimers_work.work,
  1053. posix_cpu_timers_work);
  1054. mutex_init(&p->posix_cputimers_work.mutex);
  1055. p->posix_cputimers_work.scheduled = false;
  1056. }
  1057. /*
  1058. * Initialize posix CPU timers task work in init task. Out of line to
  1059. * keep the callback static and to avoid header recursion hell.
  1060. */
  1061. void __init posix_cputimers_init_work(void)
  1062. {
  1063. clear_posix_cputimers_work(current);
  1064. }
  1065. /*
  1066. * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
  1067. * in hard interrupt context or in task context with interrupts
  1068. * disabled. Aside of that the writer/reader interaction is always in the
  1069. * context of the current task, which means they are strict per CPU.
  1070. */
  1071. static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
  1072. {
  1073. return tsk->posix_cputimers_work.scheduled;
  1074. }
  1075. static inline void __run_posix_cpu_timers(struct task_struct *tsk)
  1076. {
  1077. if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))
  1078. return;
  1079. /* Schedule task work to actually expire the timers */
  1080. tsk->posix_cputimers_work.scheduled = true;
  1081. task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);
  1082. }
  1083. static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
  1084. unsigned long start)
  1085. {
  1086. bool ret = true;
  1087. /*
  1088. * On !RT kernels interrupts are disabled while collecting expired
  1089. * timers, so no tick can happen and the fast path check can be
  1090. * reenabled without further checks.
  1091. */
  1092. if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
  1093. tsk->posix_cputimers_work.scheduled = false;
  1094. return true;
  1095. }
  1096. /*
  1097. * On RT enabled kernels ticks can happen while the expired timers
  1098. * are collected under sighand lock. But any tick which observes
  1099. * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
  1100. * checks. So reenabling the tick work has do be done carefully:
  1101. *
  1102. * Disable interrupts and run the fast path check if jiffies have
  1103. * advanced since the collecting of expired timers started. If
  1104. * jiffies have not advanced or the fast path check did not find
  1105. * newly expired timers, reenable the fast path check in the timer
  1106. * interrupt. If there are newly expired timers, return false and
  1107. * let the collection loop repeat.
  1108. */
  1109. local_irq_disable();
  1110. if (start != jiffies && fastpath_timer_check(tsk))
  1111. ret = false;
  1112. else
  1113. tsk->posix_cputimers_work.scheduled = false;
  1114. local_irq_enable();
  1115. return ret;
  1116. }
  1117. #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
  1118. static inline void __run_posix_cpu_timers(struct task_struct *tsk)
  1119. {
  1120. lockdep_posixtimer_enter();
  1121. handle_posix_cpu_timers(tsk);
  1122. lockdep_posixtimer_exit();
  1123. }
  1124. static void posix_cpu_timer_wait_running(struct k_itimer *timr)
  1125. {
  1126. cpu_relax();
  1127. }
  1128. static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
  1129. {
  1130. spin_unlock_irq(&timr->it_lock);
  1131. cpu_relax();
  1132. spin_lock_irq(&timr->it_lock);
  1133. }
  1134. static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
  1135. {
  1136. return false;
  1137. }
  1138. static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
  1139. unsigned long start)
  1140. {
  1141. return true;
  1142. }
  1143. #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
  1144. static void handle_posix_cpu_timers(struct task_struct *tsk)
  1145. {
  1146. struct k_itimer *timer, *next;
  1147. unsigned long flags, start;
  1148. LIST_HEAD(firing);
  1149. if (!lock_task_sighand(tsk, &flags))
  1150. return;
  1151. do {
  1152. /*
  1153. * On RT locking sighand lock does not disable interrupts,
  1154. * so this needs to be careful vs. ticks. Store the current
  1155. * jiffies value.
  1156. */
  1157. start = READ_ONCE(jiffies);
  1158. barrier();
  1159. /*
  1160. * Here we take off tsk->signal->cpu_timers[N] and
  1161. * tsk->cpu_timers[N] all the timers that are firing, and
  1162. * put them on the firing list.
  1163. */
  1164. check_thread_timers(tsk, &firing);
  1165. check_process_timers(tsk, &firing);
  1166. /*
  1167. * The above timer checks have updated the expiry cache and
  1168. * because nothing can have queued or modified timers after
  1169. * sighand lock was taken above it is guaranteed to be
  1170. * consistent. So the next timer interrupt fastpath check
  1171. * will find valid data.
  1172. *
  1173. * If timer expiry runs in the timer interrupt context then
  1174. * the loop is not relevant as timers will be directly
  1175. * expired in interrupt context. The stub function below
  1176. * returns always true which allows the compiler to
  1177. * optimize the loop out.
  1178. *
  1179. * If timer expiry is deferred to task work context then
  1180. * the following rules apply:
  1181. *
  1182. * - On !RT kernels no tick can have happened on this CPU
  1183. * after sighand lock was acquired because interrupts are
  1184. * disabled. So reenabling task work before dropping
  1185. * sighand lock and reenabling interrupts is race free.
  1186. *
  1187. * - On RT kernels ticks might have happened but the tick
  1188. * work ignored posix CPU timer handling because the
  1189. * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
  1190. * must be done very carefully including a check whether
  1191. * ticks have happened since the start of the timer
  1192. * expiry checks. posix_cpu_timers_enable_work() takes
  1193. * care of that and eventually lets the expiry checks
  1194. * run again.
  1195. */
  1196. } while (!posix_cpu_timers_enable_work(tsk, start));
  1197. /*
  1198. * We must release sighand lock before taking any timer's lock.
  1199. * There is a potential race with timer deletion here, as the
  1200. * siglock now protects our private firing list. We have set
  1201. * the firing flag in each timer, so that a deletion attempt
  1202. * that gets the timer lock before we do will give it up and
  1203. * spin until we've taken care of that timer below.
  1204. */
  1205. unlock_task_sighand(tsk, &flags);
  1206. /*
  1207. * Now that all the timers on our list have the firing flag,
  1208. * no one will touch their list entries but us. We'll take
  1209. * each timer's lock before clearing its firing flag, so no
  1210. * timer call will interfere.
  1211. */
  1212. list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
  1213. int cpu_firing;
  1214. /*
  1215. * spin_lock() is sufficient here even independent of the
  1216. * expiry context. If expiry happens in hard interrupt
  1217. * context it's obvious. For task work context it's safe
  1218. * because all other operations on timer::it_lock happen in
  1219. * task context (syscall or exit).
  1220. */
  1221. spin_lock(&timer->it_lock);
  1222. list_del_init(&timer->it.cpu.elist);
  1223. cpu_firing = timer->it.cpu.firing;
  1224. timer->it.cpu.firing = 0;
  1225. /*
  1226. * The firing flag is -1 if we collided with a reset
  1227. * of the timer, which already reported this
  1228. * almost-firing as an overrun. So don't generate an event.
  1229. */
  1230. if (likely(cpu_firing >= 0))
  1231. cpu_timer_fire(timer);
  1232. /* See posix_cpu_timer_wait_running() */
  1233. rcu_assign_pointer(timer->it.cpu.handling, NULL);
  1234. spin_unlock(&timer->it_lock);
  1235. }
  1236. }
  1237. /*
  1238. * This is called from the timer interrupt handler. The irq handler has
  1239. * already updated our counts. We need to check if any timers fire now.
  1240. * Interrupts are disabled.
  1241. */
  1242. void run_posix_cpu_timers(void)
  1243. {
  1244. struct task_struct *tsk = current;
  1245. lockdep_assert_irqs_disabled();
  1246. /*
  1247. * If the actual expiry is deferred to task work context and the
  1248. * work is already scheduled there is no point to do anything here.
  1249. */
  1250. if (posix_cpu_timers_work_scheduled(tsk))
  1251. return;
  1252. /*
  1253. * The fast path checks that there are no expired thread or thread
  1254. * group timers. If that's so, just return.
  1255. */
  1256. if (!fastpath_timer_check(tsk))
  1257. return;
  1258. __run_posix_cpu_timers(tsk);
  1259. }
  1260. /*
  1261. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
  1262. * The tsk->sighand->siglock must be held by the caller.
  1263. */
  1264. void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
  1265. u64 *newval, u64 *oldval)
  1266. {
  1267. u64 now, *nextevt;
  1268. if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
  1269. return;
  1270. nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
  1271. now = cpu_clock_sample_group(clkid, tsk, true);
  1272. if (oldval) {
  1273. /*
  1274. * We are setting itimer. The *oldval is absolute and we update
  1275. * it to be relative, *newval argument is relative and we update
  1276. * it to be absolute.
  1277. */
  1278. if (*oldval) {
  1279. if (*oldval <= now) {
  1280. /* Just about to fire. */
  1281. *oldval = TICK_NSEC;
  1282. } else {
  1283. *oldval -= now;
  1284. }
  1285. }
  1286. if (*newval)
  1287. *newval += now;
  1288. }
  1289. /*
  1290. * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF
  1291. * expiry cache is also used by RLIMIT_CPU!.
  1292. */
  1293. if (*newval < *nextevt)
  1294. *nextevt = *newval;
  1295. tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
  1296. }
  1297. static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
  1298. const struct timespec64 *rqtp)
  1299. {
  1300. struct itimerspec64 it;
  1301. struct k_itimer timer;
  1302. u64 expires;
  1303. int error;
  1304. /*
  1305. * Set up a temporary timer and then wait for it to go off.
  1306. */
  1307. memset(&timer, 0, sizeof timer);
  1308. spin_lock_init(&timer.it_lock);
  1309. timer.it_clock = which_clock;
  1310. timer.it_overrun = -1;
  1311. error = posix_cpu_timer_create(&timer);
  1312. timer.it_process = current;
  1313. if (!error) {
  1314. static struct itimerspec64 zero_it;
  1315. struct restart_block *restart;
  1316. memset(&it, 0, sizeof(it));
  1317. it.it_value = *rqtp;
  1318. spin_lock_irq(&timer.it_lock);
  1319. error = posix_cpu_timer_set(&timer, flags, &it, NULL);
  1320. if (error) {
  1321. spin_unlock_irq(&timer.it_lock);
  1322. return error;
  1323. }
  1324. while (!signal_pending(current)) {
  1325. if (!cpu_timer_getexpires(&timer.it.cpu)) {
  1326. /*
  1327. * Our timer fired and was reset, below
  1328. * deletion can not fail.
  1329. */
  1330. posix_cpu_timer_del(&timer);
  1331. spin_unlock_irq(&timer.it_lock);
  1332. return 0;
  1333. }
  1334. /*
  1335. * Block until cpu_timer_fire (or a signal) wakes us.
  1336. */
  1337. __set_current_state(TASK_INTERRUPTIBLE);
  1338. spin_unlock_irq(&timer.it_lock);
  1339. schedule();
  1340. spin_lock_irq(&timer.it_lock);
  1341. }
  1342. /*
  1343. * We were interrupted by a signal.
  1344. */
  1345. expires = cpu_timer_getexpires(&timer.it.cpu);
  1346. error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
  1347. if (!error) {
  1348. /* Timer is now unarmed, deletion can not fail. */
  1349. posix_cpu_timer_del(&timer);
  1350. } else {
  1351. while (error == TIMER_RETRY) {
  1352. posix_cpu_timer_wait_running_nsleep(&timer);
  1353. error = posix_cpu_timer_del(&timer);
  1354. }
  1355. }
  1356. spin_unlock_irq(&timer.it_lock);
  1357. if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
  1358. /*
  1359. * It actually did fire already.
  1360. */
  1361. return 0;
  1362. }
  1363. error = -ERESTART_RESTARTBLOCK;
  1364. /*
  1365. * Report back to the user the time still remaining.
  1366. */
  1367. restart = &current->restart_block;
  1368. restart->nanosleep.expires = expires;
  1369. if (restart->nanosleep.type != TT_NONE)
  1370. error = nanosleep_copyout(restart, &it.it_value);
  1371. }
  1372. return error;
  1373. }
  1374. static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
  1375. static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
  1376. const struct timespec64 *rqtp)
  1377. {
  1378. struct restart_block *restart_block = &current->restart_block;
  1379. int error;
  1380. /*
  1381. * Diagnose required errors first.
  1382. */
  1383. if (CPUCLOCK_PERTHREAD(which_clock) &&
  1384. (CPUCLOCK_PID(which_clock) == 0 ||
  1385. CPUCLOCK_PID(which_clock) == task_pid_vnr(current)))
  1386. return -EINVAL;
  1387. error = do_cpu_nanosleep(which_clock, flags, rqtp);
  1388. if (error == -ERESTART_RESTARTBLOCK) {
  1389. if (flags & TIMER_ABSTIME)
  1390. return -ERESTARTNOHAND;
  1391. restart_block->nanosleep.clockid = which_clock;
  1392. set_restart_fn(restart_block, posix_cpu_nsleep_restart);
  1393. }
  1394. return error;
  1395. }
  1396. static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
  1397. {
  1398. clockid_t which_clock = restart_block->nanosleep.clockid;
  1399. struct timespec64 t;
  1400. t = ns_to_timespec64(restart_block->nanosleep.expires);
  1401. return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t);
  1402. }
  1403. #define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED)
  1404. #define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED)
  1405. static int process_cpu_clock_getres(const clockid_t which_clock,
  1406. struct timespec64 *tp)
  1407. {
  1408. return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
  1409. }
  1410. static int process_cpu_clock_get(const clockid_t which_clock,
  1411. struct timespec64 *tp)
  1412. {
  1413. return posix_cpu_clock_get(PROCESS_CLOCK, tp);
  1414. }
  1415. static int process_cpu_timer_create(struct k_itimer *timer)
  1416. {
  1417. timer->it_clock = PROCESS_CLOCK;
  1418. return posix_cpu_timer_create(timer);
  1419. }
  1420. static int process_cpu_nsleep(const clockid_t which_clock, int flags,
  1421. const struct timespec64 *rqtp)
  1422. {
  1423. return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
  1424. }
  1425. static int thread_cpu_clock_getres(const clockid_t which_clock,
  1426. struct timespec64 *tp)
  1427. {
  1428. return posix_cpu_clock_getres(THREAD_CLOCK, tp);
  1429. }
  1430. static int thread_cpu_clock_get(const clockid_t which_clock,
  1431. struct timespec64 *tp)
  1432. {
  1433. return posix_cpu_clock_get(THREAD_CLOCK, tp);
  1434. }
  1435. static int thread_cpu_timer_create(struct k_itimer *timer)
  1436. {
  1437. timer->it_clock = THREAD_CLOCK;
  1438. return posix_cpu_timer_create(timer);
  1439. }
  1440. const struct k_clock clock_posix_cpu = {
  1441. .clock_getres = posix_cpu_clock_getres,
  1442. .clock_set = posix_cpu_clock_set,
  1443. .clock_get_timespec = posix_cpu_clock_get,
  1444. .timer_create = posix_cpu_timer_create,
  1445. .nsleep = posix_cpu_nsleep,
  1446. .timer_set = posix_cpu_timer_set,
  1447. .timer_del = posix_cpu_timer_del,
  1448. .timer_get = posix_cpu_timer_get,
  1449. .timer_rearm = posix_cpu_timer_rearm,
  1450. .timer_wait_running = posix_cpu_timer_wait_running,
  1451. };
  1452. const struct k_clock clock_process = {
  1453. .clock_getres = process_cpu_clock_getres,
  1454. .clock_get_timespec = process_cpu_clock_get,
  1455. .timer_create = process_cpu_timer_create,
  1456. .nsleep = process_cpu_nsleep,
  1457. };
  1458. const struct k_clock clock_thread = {
  1459. .clock_getres = thread_cpu_clock_getres,
  1460. .clock_get_timespec = thread_cpu_clock_get,
  1461. .timer_create = thread_cpu_timer_create,
  1462. };