umip.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /*
  2. * umip.c Emulation for instruction protected by the User-Mode Instruction
  3. * Prevention feature
  4. *
  5. * Copyright (c) 2017, Intel Corporation.
  6. * Ricardo Neri <[email protected]>
  7. */
  8. #include <linux/uaccess.h>
  9. #include <asm/umip.h>
  10. #include <asm/traps.h>
  11. #include <asm/insn.h>
  12. #include <asm/insn-eval.h>
  13. #include <linux/ratelimit.h>
  14. #undef pr_fmt
  15. #define pr_fmt(fmt) "umip: " fmt
  16. /** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
  17. *
  18. * User-Mode Instruction Prevention is a security feature present in recent
  19. * x86 processors that, when enabled, prevents a group of instructions (SGDT,
  20. * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general
  21. * protection fault if the instruction is executed with CPL > 0.
  22. *
  23. * Rather than relaying to the user space the general protection fault caused by
  24. * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
  25. * trapped and emulate the result of such instructions to provide dummy values.
  26. * This allows to both conserve the current kernel behavior and not reveal the
  27. * system resources that UMIP intends to protect (i.e., the locations of the
  28. * global descriptor and interrupt descriptor tables, the segment selectors of
  29. * the local descriptor table, the value of the task state register and the
  30. * contents of the CR0 register).
  31. *
  32. * This emulation is needed because certain applications (e.g., WineHQ and
  33. * DOSEMU2) rely on this subset of instructions to function.
  34. *
  35. * The instructions protected by UMIP can be split in two groups. Those which
  36. * return a kernel memory address (SGDT and SIDT) and those which return a
  37. * value (SLDT, STR and SMSW).
  38. *
  39. * For the instructions that return a kernel memory address, applications
  40. * such as WineHQ rely on the result being located in the kernel memory space,
  41. * not the actual location of the table. The result is emulated as a hard-coded
  42. * value that, lies close to the top of the kernel memory. The limit for the GDT
  43. * and the IDT are set to zero.
  44. *
  45. * The instruction SMSW is emulated to return the value that the register CR0
  46. * has at boot time as set in the head_32.
  47. * SLDT and STR are emulated to return the values that the kernel programmatically
  48. * assigns:
  49. * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
  50. * - STR returns (GDT_ENTRY_TSS * 8).
  51. *
  52. * Emulation is provided for both 32-bit and 64-bit processes.
  53. *
  54. * Care is taken to appropriately emulate the results when segmentation is
  55. * used. That is, rather than relying on USER_DS and USER_CS, the function
  56. * insn_get_addr_ref() inspects the segment descriptor pointed by the
  57. * registers in pt_regs. This ensures that we correctly obtain the segment
  58. * base address and the address and operand sizes even if the user space
  59. * application uses a local descriptor table.
  60. */
  61. #define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL
  62. #define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL
  63. /*
  64. * The SGDT and SIDT instructions store the contents of the global descriptor
  65. * table and interrupt table registers, respectively. The destination is a
  66. * memory operand of X+2 bytes. X bytes are used to store the base address of
  67. * the table and 2 bytes are used to store the limit. In 32-bit processes X
  68. * has a value of 4, in 64-bit processes X has a value of 8.
  69. */
  70. #define UMIP_GDT_IDT_BASE_SIZE_64BIT 8
  71. #define UMIP_GDT_IDT_BASE_SIZE_32BIT 4
  72. #define UMIP_GDT_IDT_LIMIT_SIZE 2
  73. #define UMIP_INST_SGDT 0 /* 0F 01 /0 */
  74. #define UMIP_INST_SIDT 1 /* 0F 01 /1 */
  75. #define UMIP_INST_SMSW 2 /* 0F 01 /4 */
  76. #define UMIP_INST_SLDT 3 /* 0F 00 /0 */
  77. #define UMIP_INST_STR 4 /* 0F 00 /1 */
  78. static const char * const umip_insns[5] = {
  79. [UMIP_INST_SGDT] = "SGDT",
  80. [UMIP_INST_SIDT] = "SIDT",
  81. [UMIP_INST_SMSW] = "SMSW",
  82. [UMIP_INST_SLDT] = "SLDT",
  83. [UMIP_INST_STR] = "STR",
  84. };
  85. #define umip_pr_err(regs, fmt, ...) \
  86. umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
  87. #define umip_pr_debug(regs, fmt, ...) \
  88. umip_printk(regs, KERN_DEBUG, fmt, ##__VA_ARGS__)
  89. /**
  90. * umip_printk() - Print a rate-limited message
  91. * @regs: Register set with the context in which the warning is printed
  92. * @log_level: Kernel log level to print the message
  93. * @fmt: The text string to print
  94. *
  95. * Print the text contained in @fmt. The print rate is limited to bursts of 5
  96. * messages every two minutes. The purpose of this customized version of
  97. * printk() is to print messages when user space processes use any of the
  98. * UMIP-protected instructions. Thus, the printed text is prepended with the
  99. * task name and process ID number of the current task as well as the
  100. * instruction and stack pointers in @regs as seen when entering kernel mode.
  101. *
  102. * Returns:
  103. *
  104. * None.
  105. */
  106. static __printf(3, 4)
  107. void umip_printk(const struct pt_regs *regs, const char *log_level,
  108. const char *fmt, ...)
  109. {
  110. /* Bursts of 5 messages every two minutes */
  111. static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
  112. struct task_struct *tsk = current;
  113. struct va_format vaf;
  114. va_list args;
  115. if (!__ratelimit(&ratelimit))
  116. return;
  117. va_start(args, fmt);
  118. vaf.fmt = fmt;
  119. vaf.va = &args;
  120. printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
  121. task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
  122. va_end(args);
  123. }
  124. /**
  125. * identify_insn() - Identify a UMIP-protected instruction
  126. * @insn: Instruction structure with opcode and ModRM byte.
  127. *
  128. * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
  129. * instruction that can be emulated.
  130. *
  131. * Returns:
  132. *
  133. * On success, a constant identifying a specific UMIP-protected instruction that
  134. * can be emulated.
  135. *
  136. * -EINVAL on error or when not an UMIP-protected instruction that can be
  137. * emulated.
  138. */
  139. static int identify_insn(struct insn *insn)
  140. {
  141. /* By getting modrm we also get the opcode. */
  142. insn_get_modrm(insn);
  143. if (!insn->modrm.nbytes)
  144. return -EINVAL;
  145. /* All the instructions of interest start with 0x0f. */
  146. if (insn->opcode.bytes[0] != 0xf)
  147. return -EINVAL;
  148. if (insn->opcode.bytes[1] == 0x1) {
  149. switch (X86_MODRM_REG(insn->modrm.value)) {
  150. case 0:
  151. return UMIP_INST_SGDT;
  152. case 1:
  153. return UMIP_INST_SIDT;
  154. case 4:
  155. return UMIP_INST_SMSW;
  156. default:
  157. return -EINVAL;
  158. }
  159. } else if (insn->opcode.bytes[1] == 0x0) {
  160. if (X86_MODRM_REG(insn->modrm.value) == 0)
  161. return UMIP_INST_SLDT;
  162. else if (X86_MODRM_REG(insn->modrm.value) == 1)
  163. return UMIP_INST_STR;
  164. else
  165. return -EINVAL;
  166. } else {
  167. return -EINVAL;
  168. }
  169. }
  170. /**
  171. * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
  172. * @insn: Instruction structure with operands
  173. * @umip_inst: A constant indicating the instruction to emulate
  174. * @data: Buffer into which the dummy result is stored
  175. * @data_size: Size of the emulated result
  176. * @x86_64: true if process is 64-bit, false otherwise
  177. *
  178. * Emulate an instruction protected by UMIP and provide a dummy result. The
  179. * result of the emulation is saved in @data. The size of the results depends
  180. * on both the instruction and type of operand (register vs memory address).
  181. * The size of the result is updated in @data_size. Caller is responsible
  182. * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
  183. * UMIP_GDT_IDT_LIMIT_SIZE bytes.
  184. *
  185. * Returns:
  186. *
  187. * 0 on success, -EINVAL on error while emulating.
  188. */
  189. static int emulate_umip_insn(struct insn *insn, int umip_inst,
  190. unsigned char *data, int *data_size, bool x86_64)
  191. {
  192. if (!data || !data_size || !insn)
  193. return -EINVAL;
  194. /*
  195. * These two instructions return the base address and limit of the
  196. * global and interrupt descriptor table, respectively. According to the
  197. * Intel Software Development manual, the base address can be 24-bit,
  198. * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
  199. * 16-bit, the returned value of the base address is supposed to be a
  200. * zero-extended 24-byte number. However, it seems that a 32-byte number
  201. * is always returned irrespective of the operand size.
  202. */
  203. if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
  204. u64 dummy_base_addr;
  205. u16 dummy_limit = 0;
  206. /* SGDT and SIDT do not use registers operands. */
  207. if (X86_MODRM_MOD(insn->modrm.value) == 3)
  208. return -EINVAL;
  209. if (umip_inst == UMIP_INST_SGDT)
  210. dummy_base_addr = UMIP_DUMMY_GDT_BASE;
  211. else
  212. dummy_base_addr = UMIP_DUMMY_IDT_BASE;
  213. /*
  214. * 64-bit processes use the entire dummy base address.
  215. * 32-bit processes use the lower 32 bits of the base address.
  216. * dummy_base_addr is always 64 bits, but we memcpy the correct
  217. * number of bytes from it to the destination.
  218. */
  219. if (x86_64)
  220. *data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT;
  221. else
  222. *data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT;
  223. memcpy(data + 2, &dummy_base_addr, *data_size);
  224. *data_size += UMIP_GDT_IDT_LIMIT_SIZE;
  225. memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
  226. } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
  227. umip_inst == UMIP_INST_STR) {
  228. unsigned long dummy_value;
  229. if (umip_inst == UMIP_INST_SMSW) {
  230. dummy_value = CR0_STATE;
  231. } else if (umip_inst == UMIP_INST_STR) {
  232. dummy_value = GDT_ENTRY_TSS * 8;
  233. } else if (umip_inst == UMIP_INST_SLDT) {
  234. #ifdef CONFIG_MODIFY_LDT_SYSCALL
  235. down_read(&current->mm->context.ldt_usr_sem);
  236. if (current->mm->context.ldt)
  237. dummy_value = GDT_ENTRY_LDT * 8;
  238. else
  239. dummy_value = 0;
  240. up_read(&current->mm->context.ldt_usr_sem);
  241. #else
  242. dummy_value = 0;
  243. #endif
  244. }
  245. /*
  246. * For these 3 instructions, the number
  247. * of bytes to be copied in the result buffer is determined
  248. * by whether the operand is a register or a memory location.
  249. * If operand is a register, return as many bytes as the operand
  250. * size. If operand is memory, return only the two least
  251. * significant bytes.
  252. */
  253. if (X86_MODRM_MOD(insn->modrm.value) == 3)
  254. *data_size = insn->opnd_bytes;
  255. else
  256. *data_size = 2;
  257. memcpy(data, &dummy_value, *data_size);
  258. } else {
  259. return -EINVAL;
  260. }
  261. return 0;
  262. }
  263. /**
  264. * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR
  265. * @addr: Address that caused the signal
  266. * @regs: Register set containing the instruction pointer
  267. *
  268. * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is
  269. * intended to be used to provide a segmentation fault when the result of the
  270. * UMIP emulation could not be copied to the user space memory.
  271. *
  272. * Returns: none
  273. */
  274. static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
  275. {
  276. struct task_struct *tsk = current;
  277. tsk->thread.cr2 = (unsigned long)addr;
  278. tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
  279. tsk->thread.trap_nr = X86_TRAP_PF;
  280. force_sig_fault(SIGSEGV, SEGV_MAPERR, addr);
  281. if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
  282. return;
  283. umip_pr_err(regs, "segfault in emulation. error%x\n",
  284. X86_PF_USER | X86_PF_WRITE);
  285. }
  286. /**
  287. * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
  288. * @regs: Registers as saved when entering the #GP handler
  289. *
  290. * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection
  291. * fault if executed with CPL > 0 (i.e., from user space). This function fixes
  292. * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR
  293. * and SLDT are not fixed up.
  294. *
  295. * If operands are memory addresses, results are copied to user-space memory as
  296. * indicated by the instruction pointed by eIP using the registers indicated in
  297. * the instruction operands. If operands are registers, results are copied into
  298. * the context that was saved when entering kernel mode.
  299. *
  300. * Returns:
  301. *
  302. * True if emulation was successful; false if not.
  303. */
  304. bool fixup_umip_exception(struct pt_regs *regs)
  305. {
  306. int nr_copied, reg_offset, dummy_data_size, umip_inst;
  307. /* 10 bytes is the maximum size of the result of UMIP instructions */
  308. unsigned char dummy_data[10] = { 0 };
  309. unsigned char buf[MAX_INSN_SIZE];
  310. unsigned long *reg_addr;
  311. void __user *uaddr;
  312. struct insn insn;
  313. if (!regs)
  314. return false;
  315. /*
  316. * Give up on emulation if fetching the instruction failed. Should a
  317. * page fault or a #GP be issued?
  318. */
  319. nr_copied = insn_fetch_from_user(regs, buf);
  320. if (nr_copied <= 0)
  321. return false;
  322. if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
  323. return false;
  324. umip_inst = identify_insn(&insn);
  325. if (umip_inst < 0)
  326. return false;
  327. umip_pr_debug(regs, "%s instruction cannot be used by applications.\n",
  328. umip_insns[umip_inst]);
  329. umip_pr_debug(regs, "For now, expensive software emulation returns the result.\n");
  330. if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
  331. user_64bit_mode(regs)))
  332. return false;
  333. /*
  334. * If operand is a register, write result to the copy of the register
  335. * value that was pushed to the stack when entering into kernel mode.
  336. * Upon exit, the value we write will be restored to the actual hardware
  337. * register.
  338. */
  339. if (X86_MODRM_MOD(insn.modrm.value) == 3) {
  340. reg_offset = insn_get_modrm_rm_off(&insn, regs);
  341. /*
  342. * Negative values are usually errors. In memory addressing,
  343. * the exception is -EDOM. Since we expect a register operand,
  344. * all negative values are errors.
  345. */
  346. if (reg_offset < 0)
  347. return false;
  348. reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
  349. memcpy(reg_addr, dummy_data, dummy_data_size);
  350. } else {
  351. uaddr = insn_get_addr_ref(&insn, regs);
  352. if ((unsigned long)uaddr == -1L)
  353. return false;
  354. nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
  355. if (nr_copied > 0) {
  356. /*
  357. * If copy fails, send a signal and tell caller that
  358. * fault was fixed up.
  359. */
  360. force_sig_info_umip_fault(uaddr, regs);
  361. return true;
  362. }
  363. }
  364. /* increase IP to let the program keep going */
  365. regs->ip += insn.length;
  366. return true;
  367. }