syscall.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. #include <linux/compat.h>
  3. #include <linux/context_tracking.h>
  4. #include <linux/randomize_kstack.h>
  5. #include <asm/interrupt.h>
  6. #include <asm/kup.h>
  7. #include <asm/syscall.h>
  8. #include <asm/time.h>
  9. #include <asm/tm.h>
  10. #include <asm/unistd.h>
  11. /* Has to run notrace because it is entered not completely "reconciled" */
  12. notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
  13. {
  14. long ret;
  15. syscall_fn f;
  16. kuap_lock();
  17. add_random_kstack_offset();
  18. if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
  19. BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
  20. trace_hardirqs_off(); /* finish reconciling */
  21. CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
  22. user_exit_irqoff();
  23. BUG_ON(regs_is_unrecoverable(regs));
  24. BUG_ON(!(regs->msr & MSR_PR));
  25. BUG_ON(arch_irq_disabled_regs(regs));
  26. #ifdef CONFIG_PPC_PKEY
  27. if (mmu_has_feature(MMU_FTR_PKEY)) {
  28. unsigned long amr, iamr;
  29. bool flush_needed = false;
  30. /*
  31. * When entering from userspace we mostly have the AMR/IAMR
  32. * different from kernel default values. Hence don't compare.
  33. */
  34. amr = mfspr(SPRN_AMR);
  35. iamr = mfspr(SPRN_IAMR);
  36. regs->amr = amr;
  37. regs->iamr = iamr;
  38. if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
  39. mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
  40. flush_needed = true;
  41. }
  42. if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
  43. mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
  44. flush_needed = true;
  45. }
  46. if (flush_needed)
  47. isync();
  48. } else
  49. #endif
  50. kuap_assert_locked();
  51. booke_restore_dbcr0();
  52. account_cpu_user_entry();
  53. account_stolen_time();
  54. /*
  55. * This is not required for the syscall exit path, but makes the
  56. * stack frame look nicer. If this was initialised in the first stack
  57. * frame, or if the unwinder was taught the first stack frame always
  58. * returns to user with IRQS_ENABLED, this store could be avoided!
  59. */
  60. irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
  61. /*
  62. * If system call is called with TM active, set _TIF_RESTOREALL to
  63. * prevent RFSCV being used to return to userspace, because POWER9
  64. * TM implementation has problems with this instruction returning to
  65. * transactional state. Final register values are not relevant because
  66. * the transaction will be aborted upon return anyway. Or in the case
  67. * of unsupported_scv SIGILL fault, the return state does not much
  68. * matter because it's an edge case.
  69. */
  70. if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
  71. unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
  72. set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
  73. /*
  74. * If the system call was made with a transaction active, doom it and
  75. * return without performing the system call. Unless it was an
  76. * unsupported scv vector, in which case it's treated like an illegal
  77. * instruction.
  78. */
  79. #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  80. if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
  81. !trap_is_unsupported_scv(regs)) {
  82. /* Enable TM in the kernel, and disable EE (for scv) */
  83. hard_irq_disable();
  84. mtmsr(mfmsr() | MSR_TM);
  85. /* tabort, this dooms the transaction, nothing else */
  86. asm volatile(".long 0x7c00071d | ((%0) << 16)"
  87. :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
  88. /*
  89. * Userspace will never see the return value. Execution will
  90. * resume after the tbegin. of the aborted transaction with the
  91. * checkpointed register state. A context switch could occur
  92. * or signal delivered to the process before resuming the
  93. * doomed transaction context, but that should all be handled
  94. * as expected.
  95. */
  96. return -ENOSYS;
  97. }
  98. #endif // CONFIG_PPC_TRANSACTIONAL_MEM
  99. local_irq_enable();
  100. if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
  101. if (unlikely(trap_is_unsupported_scv(regs))) {
  102. /* Unsupported scv vector */
  103. _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
  104. return regs->gpr[3];
  105. }
  106. /*
  107. * We use the return value of do_syscall_trace_enter() as the
  108. * syscall number. If the syscall was rejected for any reason
  109. * do_syscall_trace_enter() returns an invalid syscall number
  110. * and the test against NR_syscalls will fail and the return
  111. * value to be used is in regs->gpr[3].
  112. */
  113. r0 = do_syscall_trace_enter(regs);
  114. if (unlikely(r0 >= NR_syscalls))
  115. return regs->gpr[3];
  116. } else if (unlikely(r0 >= NR_syscalls)) {
  117. if (unlikely(trap_is_unsupported_scv(regs))) {
  118. /* Unsupported scv vector */
  119. _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
  120. return regs->gpr[3];
  121. }
  122. return -ENOSYS;
  123. }
  124. /* May be faster to do array_index_nospec? */
  125. barrier_nospec();
  126. #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
  127. // No COMPAT if we have SYSCALL_WRAPPER, see Kconfig
  128. f = (void *)sys_call_table[r0];
  129. ret = f(regs);
  130. #else
  131. if (unlikely(is_compat_task())) {
  132. unsigned long r3, r4, r5, r6, r7, r8;
  133. f = (void *)compat_sys_call_table[r0];
  134. r3 = regs->gpr[3] & 0x00000000ffffffffULL;
  135. r4 = regs->gpr[4] & 0x00000000ffffffffULL;
  136. r5 = regs->gpr[5] & 0x00000000ffffffffULL;
  137. r6 = regs->gpr[6] & 0x00000000ffffffffULL;
  138. r7 = regs->gpr[7] & 0x00000000ffffffffULL;
  139. r8 = regs->gpr[8] & 0x00000000ffffffffULL;
  140. ret = f(r3, r4, r5, r6, r7, r8);
  141. } else {
  142. f = (void *)sys_call_table[r0];
  143. ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
  144. regs->gpr[6], regs->gpr[7], regs->gpr[8]);
  145. }
  146. #endif
  147. /*
  148. * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
  149. * so the maximum stack offset is 1k bytes (10 bits).
  150. *
  151. * The actual entropy will be further reduced by the compiler when
  152. * applying stack alignment constraints: the powerpc architecture
  153. * may have two kinds of stack alignment (16-bytes and 8-bytes).
  154. *
  155. * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
  156. */
  157. choose_random_kstack_offset(mftb());
  158. return ret;
  159. }