xstate.c 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * xsave/xrstor support.
  4. *
  5. * Author: Suresh Siddha <[email protected]>
  6. */
  7. #include <linux/bitops.h>
  8. #include <linux/compat.h>
  9. #include <linux/cpu.h>
  10. #include <linux/mman.h>
  11. #include <linux/nospec.h>
  12. #include <linux/pkeys.h>
  13. #include <linux/seq_file.h>
  14. #include <linux/proc_fs.h>
  15. #include <linux/vmalloc.h>
  16. #include <asm/fpu/api.h>
  17. #include <asm/fpu/regset.h>
  18. #include <asm/fpu/signal.h>
  19. #include <asm/fpu/xcr.h>
  20. #include <asm/tlbflush.h>
  21. #include <asm/prctl.h>
  22. #include <asm/elf.h>
  23. #include "context.h"
  24. #include "internal.h"
  25. #include "legacy.h"
  26. #include "xstate.h"
  27. #define for_each_extended_xfeature(bit, mask) \
  28. (bit) = FIRST_EXTENDED_XFEATURE; \
  29. for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
  30. /*
  31. * Although we spell it out in here, the Processor Trace
  32. * xfeature is completely unused. We use other mechanisms
  33. * to save/restore PT state in Linux.
  34. */
  35. static const char *xfeature_names[] =
  36. {
  37. "x87 floating point registers" ,
  38. "SSE registers" ,
  39. "AVX registers" ,
  40. "MPX bounds registers" ,
  41. "MPX CSR" ,
  42. "AVX-512 opmask" ,
  43. "AVX-512 Hi256" ,
  44. "AVX-512 ZMM_Hi256" ,
  45. "Processor Trace (unused)" ,
  46. "Protection Keys User registers",
  47. "PASID state",
  48. "unknown xstate feature" ,
  49. "unknown xstate feature" ,
  50. "unknown xstate feature" ,
  51. "unknown xstate feature" ,
  52. "unknown xstate feature" ,
  53. "unknown xstate feature" ,
  54. "AMX Tile config" ,
  55. "AMX Tile data" ,
  56. "unknown xstate feature" ,
  57. };
  58. static unsigned short xsave_cpuid_features[] __initdata = {
  59. [XFEATURE_FP] = X86_FEATURE_FPU,
  60. [XFEATURE_SSE] = X86_FEATURE_XMM,
  61. [XFEATURE_YMM] = X86_FEATURE_AVX,
  62. [XFEATURE_BNDREGS] = X86_FEATURE_MPX,
  63. [XFEATURE_BNDCSR] = X86_FEATURE_MPX,
  64. [XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
  65. [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
  66. [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
  67. [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
  68. [XFEATURE_PKRU] = X86_FEATURE_PKU,
  69. [XFEATURE_PASID] = X86_FEATURE_ENQCMD,
  70. [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
  71. [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
  72. };
  73. static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
  74. { [ 0 ... XFEATURE_MAX - 1] = -1};
  75. static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
  76. { [ 0 ... XFEATURE_MAX - 1] = -1};
  77. static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
  78. #define XSTATE_FLAG_SUPERVISOR BIT(0)
  79. #define XSTATE_FLAG_ALIGNED64 BIT(1)
  80. /*
  81. * Return whether the system supports a given xfeature.
  82. *
  83. * Also return the name of the (most advanced) feature that the caller requested:
  84. */
  85. int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
  86. {
  87. u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
  88. if (unlikely(feature_name)) {
  89. long xfeature_idx, max_idx;
  90. u64 xfeatures_print;
  91. /*
  92. * So we use FLS here to be able to print the most advanced
  93. * feature that was requested but is missing. So if a driver
  94. * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
  95. * missing AVX feature - this is the most informative message
  96. * to users:
  97. */
  98. if (xfeatures_missing)
  99. xfeatures_print = xfeatures_missing;
  100. else
  101. xfeatures_print = xfeatures_needed;
  102. xfeature_idx = fls64(xfeatures_print)-1;
  103. max_idx = ARRAY_SIZE(xfeature_names)-1;
  104. xfeature_idx = min(xfeature_idx, max_idx);
  105. *feature_name = xfeature_names[xfeature_idx];
  106. }
  107. if (xfeatures_missing)
  108. return 0;
  109. return 1;
  110. }
  111. EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
  112. static bool xfeature_is_aligned64(int xfeature_nr)
  113. {
  114. return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
  115. }
  116. static bool xfeature_is_supervisor(int xfeature_nr)
  117. {
  118. return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
  119. }
  120. static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
  121. {
  122. unsigned int offs, i;
  123. /*
  124. * Non-compacted format and legacy features use the cached fixed
  125. * offsets.
  126. */
  127. if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
  128. xfeature <= XFEATURE_SSE)
  129. return xstate_offsets[xfeature];
  130. /*
  131. * Compacted format offsets depend on the actual content of the
  132. * compacted xsave area which is determined by the xcomp_bv header
  133. * field.
  134. */
  135. offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
  136. for_each_extended_xfeature(i, xcomp_bv) {
  137. if (xfeature_is_aligned64(i))
  138. offs = ALIGN(offs, 64);
  139. if (i == xfeature)
  140. break;
  141. offs += xstate_sizes[i];
  142. }
  143. return offs;
  144. }
  145. /*
  146. * Enable the extended processor state save/restore feature.
  147. * Called once per CPU onlining.
  148. */
  149. void fpu__init_cpu_xstate(void)
  150. {
  151. if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
  152. return;
  153. cr4_set_bits(X86_CR4_OSXSAVE);
  154. /*
  155. * Must happen after CR4 setup and before xsetbv() to allow KVM
  156. * lazy passthrough. Write independent of the dynamic state static
  157. * key as that does not work on the boot CPU. This also ensures
  158. * that any stale state is wiped out from XFD.
  159. */
  160. if (cpu_feature_enabled(X86_FEATURE_XFD))
  161. wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
  162. /*
  163. * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
  164. * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
  165. * states can be set here.
  166. */
  167. xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
  168. /*
  169. * MSR_IA32_XSS sets supervisor states managed by XSAVES.
  170. */
  171. if (boot_cpu_has(X86_FEATURE_XSAVES)) {
  172. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
  173. xfeatures_mask_independent());
  174. }
  175. }
  176. static bool xfeature_enabled(enum xfeature xfeature)
  177. {
  178. return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
  179. }
  180. /*
  181. * Record the offsets and sizes of various xstates contained
  182. * in the XSAVE state memory layout.
  183. */
  184. static void __init setup_xstate_cache(void)
  185. {
  186. u32 eax, ebx, ecx, edx, i;
  187. /* start at the beginning of the "extended state" */
  188. unsigned int last_good_offset = offsetof(struct xregs_state,
  189. extended_state_area);
  190. /*
  191. * The FP xstates and SSE xstates are legacy states. They are always
  192. * in the fixed offsets in the xsave area in either compacted form
  193. * or standard form.
  194. */
  195. xstate_offsets[XFEATURE_FP] = 0;
  196. xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
  197. xmm_space);
  198. xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
  199. xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
  200. xmm_space);
  201. for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
  202. cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
  203. xstate_sizes[i] = eax;
  204. xstate_flags[i] = ecx;
  205. /*
  206. * If an xfeature is supervisor state, the offset in EBX is
  207. * invalid, leave it to -1.
  208. */
  209. if (xfeature_is_supervisor(i))
  210. continue;
  211. xstate_offsets[i] = ebx;
  212. /*
  213. * In our xstate size checks, we assume that the highest-numbered
  214. * xstate feature has the highest offset in the buffer. Ensure
  215. * it does.
  216. */
  217. WARN_ONCE(last_good_offset > xstate_offsets[i],
  218. "x86/fpu: misordered xstate at %d\n", last_good_offset);
  219. last_good_offset = xstate_offsets[i];
  220. }
  221. }
  222. static void __init print_xstate_feature(u64 xstate_mask)
  223. {
  224. const char *feature_name;
  225. if (cpu_has_xfeatures(xstate_mask, &feature_name))
  226. pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
  227. }
  228. /*
  229. * Print out all the supported xstate features:
  230. */
  231. static void __init print_xstate_features(void)
  232. {
  233. print_xstate_feature(XFEATURE_MASK_FP);
  234. print_xstate_feature(XFEATURE_MASK_SSE);
  235. print_xstate_feature(XFEATURE_MASK_YMM);
  236. print_xstate_feature(XFEATURE_MASK_BNDREGS);
  237. print_xstate_feature(XFEATURE_MASK_BNDCSR);
  238. print_xstate_feature(XFEATURE_MASK_OPMASK);
  239. print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
  240. print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
  241. print_xstate_feature(XFEATURE_MASK_PKRU);
  242. print_xstate_feature(XFEATURE_MASK_PASID);
  243. print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
  244. print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
  245. }
  246. /*
  247. * This check is important because it is easy to get XSTATE_*
  248. * confused with XSTATE_BIT_*.
  249. */
  250. #define CHECK_XFEATURE(nr) do { \
  251. WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \
  252. WARN_ON(nr >= XFEATURE_MAX); \
  253. } while (0)
  254. /*
  255. * Print out xstate component offsets and sizes
  256. */
  257. static void __init print_xstate_offset_size(void)
  258. {
  259. int i;
  260. for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
  261. pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
  262. i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
  263. i, xstate_sizes[i]);
  264. }
  265. }
  266. /*
  267. * This function is called only during boot time when x86 caps are not set
  268. * up and alternative can not be used yet.
  269. */
  270. static __init void os_xrstor_booting(struct xregs_state *xstate)
  271. {
  272. u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
  273. u32 lmask = mask;
  274. u32 hmask = mask >> 32;
  275. int err;
  276. if (cpu_feature_enabled(X86_FEATURE_XSAVES))
  277. XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
  278. else
  279. XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
  280. /*
  281. * We should never fault when copying from a kernel buffer, and the FPU
  282. * state we set at boot time should be valid.
  283. */
  284. WARN_ON_FPU(err);
  285. }
  286. /*
  287. * All supported features have either init state all zeros or are
  288. * handled in setup_init_fpu() individually. This is an explicit
  289. * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
  290. * newly added supported features at build time and make people
  291. * actually look at the init state for the new feature.
  292. */
  293. #define XFEATURES_INIT_FPSTATE_HANDLED \
  294. (XFEATURE_MASK_FP | \
  295. XFEATURE_MASK_SSE | \
  296. XFEATURE_MASK_YMM | \
  297. XFEATURE_MASK_OPMASK | \
  298. XFEATURE_MASK_ZMM_Hi256 | \
  299. XFEATURE_MASK_Hi16_ZMM | \
  300. XFEATURE_MASK_PKRU | \
  301. XFEATURE_MASK_BNDREGS | \
  302. XFEATURE_MASK_BNDCSR | \
  303. XFEATURE_MASK_PASID | \
  304. XFEATURE_MASK_XTILE)
  305. /*
  306. * setup the xstate image representing the init state
  307. */
  308. static void __init setup_init_fpu_buf(void)
  309. {
  310. BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
  311. XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
  312. XFEATURES_INIT_FPSTATE_HANDLED);
  313. if (!boot_cpu_has(X86_FEATURE_XSAVE))
  314. return;
  315. print_xstate_features();
  316. xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
  317. /*
  318. * Init all the features state with header.xfeatures being 0x0
  319. */
  320. os_xrstor_booting(&init_fpstate.regs.xsave);
  321. /*
  322. * All components are now in init state. Read the state back so
  323. * that init_fpstate contains all non-zero init state. This only
  324. * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
  325. * those use the init optimization which skips writing data for
  326. * components in init state.
  327. *
  328. * XSAVE could be used, but that would require to reshuffle the
  329. * data when XSAVEC/S is available because XSAVEC/S uses xstate
  330. * compaction. But doing so is a pointless exercise because most
  331. * components have an all zeros init state except for the legacy
  332. * ones (FP and SSE). Those can be saved with FXSAVE into the
  333. * legacy area. Adding new features requires to ensure that init
  334. * state is all zeroes or if not to add the necessary handling
  335. * here.
  336. */
  337. fxsave(&init_fpstate.regs.fxsave);
  338. }
  339. int xfeature_size(int xfeature_nr)
  340. {
  341. u32 eax, ebx, ecx, edx;
  342. CHECK_XFEATURE(xfeature_nr);
  343. cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
  344. return eax;
  345. }
  346. /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
  347. static int validate_user_xstate_header(const struct xstate_header *hdr,
  348. struct fpstate *fpstate)
  349. {
  350. /* No unknown or supervisor features may be set */
  351. if (hdr->xfeatures & ~fpstate->user_xfeatures)
  352. return -EINVAL;
  353. /* Userspace must use the uncompacted format */
  354. if (hdr->xcomp_bv)
  355. return -EINVAL;
  356. /*
  357. * If 'reserved' is shrunken to add a new field, make sure to validate
  358. * that new field here!
  359. */
  360. BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
  361. /* No reserved bits may be set */
  362. if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
  363. return -EINVAL;
  364. return 0;
  365. }
  366. static void __init __xstate_dump_leaves(void)
  367. {
  368. int i;
  369. u32 eax, ebx, ecx, edx;
  370. static int should_dump = 1;
  371. if (!should_dump)
  372. return;
  373. should_dump = 0;
  374. /*
  375. * Dump out a few leaves past the ones that we support
  376. * just in case there are some goodies up there
  377. */
  378. for (i = 0; i < XFEATURE_MAX + 10; i++) {
  379. cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
  380. pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
  381. XSTATE_CPUID, i, eax, ebx, ecx, edx);
  382. }
  383. }
  384. #define XSTATE_WARN_ON(x, fmt, ...) do { \
  385. if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) { \
  386. __xstate_dump_leaves(); \
  387. } \
  388. } while (0)
  389. #define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \
  390. if ((nr == nr_macro) && \
  391. WARN_ONCE(sz != sizeof(__struct), \
  392. "%s: struct is %zu bytes, cpu state %d bytes\n", \
  393. __stringify(nr_macro), sizeof(__struct), sz)) { \
  394. __xstate_dump_leaves(); \
  395. } \
  396. } while (0)
  397. /**
  398. * check_xtile_data_against_struct - Check tile data state size.
  399. *
  400. * Calculate the state size by multiplying the single tile size which is
  401. * recorded in a C struct, and the number of tiles that the CPU informs.
  402. * Compare the provided size with the calculation.
  403. *
  404. * @size: The tile data state size
  405. *
  406. * Returns: 0 on success, -EINVAL on mismatch.
  407. */
  408. static int __init check_xtile_data_against_struct(int size)
  409. {
  410. u32 max_palid, palid, state_size;
  411. u32 eax, ebx, ecx, edx;
  412. u16 max_tile;
  413. /*
  414. * Check the maximum palette id:
  415. * eax: the highest numbered palette subleaf.
  416. */
  417. cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
  418. /*
  419. * Cross-check each tile size and find the maximum number of
  420. * supported tiles.
  421. */
  422. for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
  423. u16 tile_size, max;
  424. /*
  425. * Check the tile size info:
  426. * eax[31:16]: bytes per title
  427. * ebx[31:16]: the max names (or max number of tiles)
  428. */
  429. cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
  430. tile_size = eax >> 16;
  431. max = ebx >> 16;
  432. if (tile_size != sizeof(struct xtile_data)) {
  433. pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
  434. __stringify(XFEATURE_XTILE_DATA),
  435. sizeof(struct xtile_data), tile_size);
  436. __xstate_dump_leaves();
  437. return -EINVAL;
  438. }
  439. if (max > max_tile)
  440. max_tile = max;
  441. }
  442. state_size = sizeof(struct xtile_data) * max_tile;
  443. if (size != state_size) {
  444. pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
  445. __stringify(XFEATURE_XTILE_DATA), state_size, size);
  446. __xstate_dump_leaves();
  447. return -EINVAL;
  448. }
  449. return 0;
  450. }
  451. /*
  452. * We have a C struct for each 'xstate'. We need to ensure
  453. * that our software representation matches what the CPU
  454. * tells us about the state's size.
  455. */
  456. static bool __init check_xstate_against_struct(int nr)
  457. {
  458. /*
  459. * Ask the CPU for the size of the state.
  460. */
  461. int sz = xfeature_size(nr);
  462. /*
  463. * Match each CPU state with the corresponding software
  464. * structure.
  465. */
  466. XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct);
  467. XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state);
  468. XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state);
  469. XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
  470. XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
  471. XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
  472. XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
  473. XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
  474. XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
  475. /* The tile data size varies between implementations. */
  476. if (nr == XFEATURE_XTILE_DATA)
  477. check_xtile_data_against_struct(sz);
  478. /*
  479. * Make *SURE* to add any feature numbers in below if
  480. * there are "holes" in the xsave state component
  481. * numbers.
  482. */
  483. if ((nr < XFEATURE_YMM) ||
  484. (nr >= XFEATURE_MAX) ||
  485. (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
  486. ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
  487. XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
  488. return false;
  489. }
  490. return true;
  491. }
  492. static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
  493. {
  494. unsigned int topmost = fls64(xfeatures) - 1;
  495. unsigned int offset = xstate_offsets[topmost];
  496. if (topmost <= XFEATURE_SSE)
  497. return sizeof(struct xregs_state);
  498. if (compacted)
  499. offset = xfeature_get_offset(xfeatures, topmost);
  500. return offset + xstate_sizes[topmost];
  501. }
  502. /*
  503. * This essentially double-checks what the cpu told us about
  504. * how large the XSAVE buffer needs to be. We are recalculating
  505. * it to be safe.
  506. *
  507. * Independent XSAVE features allocate their own buffers and are not
  508. * covered by these checks. Only the size of the buffer for task->fpu
  509. * is checked here.
  510. */
  511. static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
  512. {
  513. bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
  514. bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
  515. unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
  516. int i;
  517. for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
  518. if (!check_xstate_against_struct(i))
  519. return false;
  520. /*
  521. * Supervisor state components can be managed only by
  522. * XSAVES.
  523. */
  524. if (!xsaves && xfeature_is_supervisor(i)) {
  525. XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
  526. return false;
  527. }
  528. }
  529. size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
  530. XSTATE_WARN_ON(size != kernel_size,
  531. "size %u != kernel_size %u\n", size, kernel_size);
  532. return size == kernel_size;
  533. }
  534. /*
  535. * Get total size of enabled xstates in XCR0 | IA32_XSS.
  536. *
  537. * Note the SDM's wording here. "sub-function 0" only enumerates
  538. * the size of the *user* states. If we use it to size a buffer
  539. * that we use 'XSAVES' on, we could potentially overflow the
  540. * buffer because 'XSAVES' saves system states too.
  541. *
  542. * This also takes compaction into account. So this works for
  543. * XSAVEC as well.
  544. */
  545. static unsigned int __init get_compacted_size(void)
  546. {
  547. unsigned int eax, ebx, ecx, edx;
  548. /*
  549. * - CPUID function 0DH, sub-function 1:
  550. * EBX enumerates the size (in bytes) required by
  551. * the XSAVES instruction for an XSAVE area
  552. * containing all the state components
  553. * corresponding to bits currently set in
  554. * XCR0 | IA32_XSS.
  555. *
  556. * When XSAVES is not available but XSAVEC is (virt), then there
  557. * are no supervisor states, but XSAVEC still uses compacted
  558. * format.
  559. */
  560. cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
  561. return ebx;
  562. }
  563. /*
  564. * Get the total size of the enabled xstates without the independent supervisor
  565. * features.
  566. */
  567. static unsigned int __init get_xsave_compacted_size(void)
  568. {
  569. u64 mask = xfeatures_mask_independent();
  570. unsigned int size;
  571. if (!mask)
  572. return get_compacted_size();
  573. /* Disable independent features. */
  574. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
  575. /*
  576. * Ask the hardware what size is required of the buffer.
  577. * This is the size required for the task->fpu buffer.
  578. */
  579. size = get_compacted_size();
  580. /* Re-enable independent features so XSAVES will work on them again. */
  581. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
  582. return size;
  583. }
  584. static unsigned int __init get_xsave_size_user(void)
  585. {
  586. unsigned int eax, ebx, ecx, edx;
  587. /*
  588. * - CPUID function 0DH, sub-function 0:
  589. * EBX enumerates the size (in bytes) required by
  590. * the XSAVE instruction for an XSAVE area
  591. * containing all the *user* state components
  592. * corresponding to bits currently set in XCR0.
  593. */
  594. cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
  595. return ebx;
  596. }
  597. static int __init init_xstate_size(void)
  598. {
  599. /* Recompute the context size for enabled features: */
  600. unsigned int user_size, kernel_size, kernel_default_size;
  601. bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
  602. /* Uncompacted user space size */
  603. user_size = get_xsave_size_user();
  604. /*
  605. * XSAVES kernel size includes supervisor states and uses compacted
  606. * format. XSAVEC uses compacted format, but does not save
  607. * supervisor states.
  608. *
  609. * XSAVE[OPT] do not support supervisor states so kernel and user
  610. * size is identical.
  611. */
  612. if (compacted)
  613. kernel_size = get_xsave_compacted_size();
  614. else
  615. kernel_size = user_size;
  616. kernel_default_size =
  617. xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
  618. if (!paranoid_xstate_size_valid(kernel_size))
  619. return -EINVAL;
  620. fpu_kernel_cfg.max_size = kernel_size;
  621. fpu_user_cfg.max_size = user_size;
  622. fpu_kernel_cfg.default_size = kernel_default_size;
  623. fpu_user_cfg.default_size =
  624. xstate_calculate_size(fpu_user_cfg.default_features, false);
  625. return 0;
  626. }
  627. /*
  628. * We enabled the XSAVE hardware, but something went wrong and
  629. * we can not use it. Disable it.
  630. */
  631. static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
  632. {
  633. fpu_kernel_cfg.max_features = 0;
  634. cr4_clear_bits(X86_CR4_OSXSAVE);
  635. setup_clear_cpu_cap(X86_FEATURE_XSAVE);
  636. /* Restore the legacy size.*/
  637. fpu_kernel_cfg.max_size = legacy_size;
  638. fpu_kernel_cfg.default_size = legacy_size;
  639. fpu_user_cfg.max_size = legacy_size;
  640. fpu_user_cfg.default_size = legacy_size;
  641. /*
  642. * Prevent enabling the static branch which enables writes to the
  643. * XFD MSR.
  644. */
  645. init_fpstate.xfd = 0;
  646. fpstate_reset(&current->thread.fpu);
  647. }
  648. /*
  649. * Enable and initialize the xsave feature.
  650. * Called once per system bootup.
  651. */
  652. void __init fpu__init_system_xstate(unsigned int legacy_size)
  653. {
  654. unsigned int eax, ebx, ecx, edx;
  655. u64 xfeatures;
  656. int err;
  657. int i;
  658. if (!boot_cpu_has(X86_FEATURE_FPU)) {
  659. pr_info("x86/fpu: No FPU detected\n");
  660. return;
  661. }
  662. if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
  663. pr_info("x86/fpu: x87 FPU will use %s\n",
  664. boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
  665. return;
  666. }
  667. if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
  668. WARN_ON_FPU(1);
  669. return;
  670. }
  671. /*
  672. * Find user xstates supported by the processor.
  673. */
  674. cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
  675. fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
  676. /*
  677. * Find supervisor xstates supported by the processor.
  678. */
  679. cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
  680. fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
  681. if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
  682. /*
  683. * This indicates that something really unexpected happened
  684. * with the enumeration. Disable XSAVE and try to continue
  685. * booting without it. This is too early to BUG().
  686. */
  687. pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
  688. fpu_kernel_cfg.max_features);
  689. goto out_disable;
  690. }
  691. /*
  692. * Clear XSAVE features that are disabled in the normal CPUID.
  693. */
  694. for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
  695. unsigned short cid = xsave_cpuid_features[i];
  696. /* Careful: X86_FEATURE_FPU is 0! */
  697. if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
  698. fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
  699. }
  700. if (!cpu_feature_enabled(X86_FEATURE_XFD))
  701. fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
  702. if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
  703. fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
  704. else
  705. fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
  706. XFEATURE_MASK_SUPERVISOR_SUPPORTED;
  707. fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
  708. fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
  709. /* Clean out dynamic features from default */
  710. fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
  711. fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
  712. fpu_user_cfg.default_features = fpu_user_cfg.max_features;
  713. fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
  714. /* Store it for paranoia check at the end */
  715. xfeatures = fpu_kernel_cfg.max_features;
  716. /*
  717. * Initialize the default XFD state in initfp_state and enable the
  718. * dynamic sizing mechanism if dynamic states are available. The
  719. * static key cannot be enabled here because this runs before
  720. * jump_label_init(). This is delayed to an initcall.
  721. */
  722. init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
  723. /* Set up compaction feature bit */
  724. if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
  725. cpu_feature_enabled(X86_FEATURE_XSAVES))
  726. setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
  727. /* Enable xstate instructions to be able to continue with initialization: */
  728. fpu__init_cpu_xstate();
  729. /* Cache size, offset and flags for initialization */
  730. setup_xstate_cache();
  731. err = init_xstate_size();
  732. if (err)
  733. goto out_disable;
  734. /* Reset the state for the current task */
  735. fpstate_reset(&current->thread.fpu);
  736. /*
  737. * Update info used for ptrace frames; use standard-format size and no
  738. * supervisor xstates:
  739. */
  740. update_regset_xstate_info(fpu_user_cfg.max_size,
  741. fpu_user_cfg.max_features);
  742. /*
  743. * init_fpstate excludes dynamic states as they are large but init
  744. * state is zero.
  745. */
  746. init_fpstate.size = fpu_kernel_cfg.default_size;
  747. init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
  748. if (init_fpstate.size > sizeof(init_fpstate.regs)) {
  749. pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
  750. sizeof(init_fpstate.regs), init_fpstate.size);
  751. goto out_disable;
  752. }
  753. setup_init_fpu_buf();
  754. /*
  755. * Paranoia check whether something in the setup modified the
  756. * xfeatures mask.
  757. */
  758. if (xfeatures != fpu_kernel_cfg.max_features) {
  759. pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
  760. xfeatures, fpu_kernel_cfg.max_features);
  761. goto out_disable;
  762. }
  763. /*
  764. * CPU capabilities initialization runs before FPU init. So
  765. * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
  766. * functional, set the feature bit so depending code works.
  767. */
  768. setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
  769. print_xstate_offset_size();
  770. pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
  771. fpu_kernel_cfg.max_features,
  772. fpu_kernel_cfg.max_size,
  773. boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
  774. return;
  775. out_disable:
  776. /* something went wrong, try to boot without any XSAVE support */
  777. fpu__init_disable_system_xstate(legacy_size);
  778. }
  779. /*
  780. * Restore minimal FPU state after suspend:
  781. */
  782. void fpu__resume_cpu(void)
  783. {
  784. /*
  785. * Restore XCR0 on xsave capable CPUs:
  786. */
  787. if (cpu_feature_enabled(X86_FEATURE_XSAVE))
  788. xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
  789. /*
  790. * Restore IA32_XSS. The same CPUID bit enumerates support
  791. * of XSAVES and MSR_IA32_XSS.
  792. */
  793. if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
  794. wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
  795. xfeatures_mask_independent());
  796. }
  797. if (fpu_state_size_dynamic())
  798. wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
  799. }
  800. /*
  801. * Given an xstate feature nr, calculate where in the xsave
  802. * buffer the state is. Callers should ensure that the buffer
  803. * is valid.
  804. */
  805. static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  806. {
  807. u64 xcomp_bv = xsave->header.xcomp_bv;
  808. if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
  809. return NULL;
  810. if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
  811. if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
  812. return NULL;
  813. }
  814. return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
  815. }
  816. /*
  817. * Given the xsave area and a state inside, this function returns the
  818. * address of the state.
  819. *
  820. * This is the API that is called to get xstate address in either
  821. * standard format or compacted format of xsave area.
  822. *
  823. * Note that if there is no data for the field in the xsave buffer
  824. * this will return NULL.
  825. *
  826. * Inputs:
  827. * xstate: the thread's storage area for all FPU data
  828. * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
  829. * XFEATURE_SSE, etc...)
  830. * Output:
  831. * address of the state in the xsave area, or NULL if the
  832. * field is not present in the xsave buffer.
  833. */
  834. void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  835. {
  836. /*
  837. * Do we even *have* xsave state?
  838. */
  839. if (!boot_cpu_has(X86_FEATURE_XSAVE))
  840. return NULL;
  841. /*
  842. * We should not ever be requesting features that we
  843. * have not enabled.
  844. */
  845. if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
  846. return NULL;
  847. /*
  848. * This assumes the last 'xsave*' instruction to
  849. * have requested that 'xfeature_nr' be saved.
  850. * If it did not, we might be seeing and old value
  851. * of the field in the buffer.
  852. *
  853. * This can happen because the last 'xsave' did not
  854. * request that this feature be saved (unlikely)
  855. * or because the "init optimization" caused it
  856. * to not be saved.
  857. */
  858. if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
  859. return NULL;
  860. return __raw_xsave_addr(xsave, xfeature_nr);
  861. }
  862. #ifdef CONFIG_ARCH_HAS_PKEYS
  863. /*
  864. * This will go out and modify PKRU register to set the access
  865. * rights for @pkey to @init_val.
  866. */
  867. int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
  868. unsigned long init_val)
  869. {
  870. u32 old_pkru, new_pkru_bits = 0;
  871. int pkey_shift;
  872. /*
  873. * This check implies XSAVE support. OSPKE only gets
  874. * set if we enable XSAVE and we enable PKU in XCR0.
  875. */
  876. if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
  877. return -EINVAL;
  878. /*
  879. * This code should only be called with valid 'pkey'
  880. * values originating from in-kernel users. Complain
  881. * if a bad value is observed.
  882. */
  883. if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
  884. return -EINVAL;
  885. /* Set the bits we need in PKRU: */
  886. if (init_val & PKEY_DISABLE_ACCESS)
  887. new_pkru_bits |= PKRU_AD_BIT;
  888. if (init_val & PKEY_DISABLE_WRITE)
  889. new_pkru_bits |= PKRU_WD_BIT;
  890. /* Shift the bits in to the correct place in PKRU for pkey: */
  891. pkey_shift = pkey * PKRU_BITS_PER_PKEY;
  892. new_pkru_bits <<= pkey_shift;
  893. /* Get old PKRU and mask off any old bits in place: */
  894. old_pkru = read_pkru();
  895. old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
  896. /* Write old part along with new part: */
  897. write_pkru(old_pkru | new_pkru_bits);
  898. return 0;
  899. }
  900. #endif /* ! CONFIG_ARCH_HAS_PKEYS */
  901. static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
  902. void *init_xstate, unsigned int size)
  903. {
  904. membuf_write(to, from_xstate ? xstate : init_xstate, size);
  905. }
  906. /**
  907. * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
  908. * @to: membuf descriptor
  909. * @fpstate: The fpstate buffer from which to copy
  910. * @xfeatures: The mask of xfeatures to save (XSAVE mode only)
  911. * @pkru_val: The PKRU value to store in the PKRU component
  912. * @copy_mode: The requested copy mode
  913. *
  914. * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
  915. * format, i.e. from the kernel internal hardware dependent storage format
  916. * to the requested @mode. UABI XSTATE is always uncompacted!
  917. *
  918. * It supports partial copy but @to.pos always starts from zero.
  919. */
  920. void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
  921. u64 xfeatures, u32 pkru_val,
  922. enum xstate_copy_mode copy_mode)
  923. {
  924. const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
  925. struct xregs_state *xinit = &init_fpstate.regs.xsave;
  926. struct xregs_state *xsave = &fpstate->regs.xsave;
  927. struct xstate_header header;
  928. unsigned int zerofrom;
  929. u64 mask;
  930. int i;
  931. memset(&header, 0, sizeof(header));
  932. header.xfeatures = xsave->header.xfeatures;
  933. /* Mask out the feature bits depending on copy mode */
  934. switch (copy_mode) {
  935. case XSTATE_COPY_FP:
  936. header.xfeatures &= XFEATURE_MASK_FP;
  937. break;
  938. case XSTATE_COPY_FX:
  939. header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
  940. break;
  941. case XSTATE_COPY_XSAVE:
  942. header.xfeatures &= fpstate->user_xfeatures & xfeatures;
  943. break;
  944. }
  945. /* Copy FP state up to MXCSR */
  946. copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
  947. &xinit->i387, off_mxcsr);
  948. /* Copy MXCSR when SSE or YMM are set in the feature mask */
  949. copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
  950. &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
  951. MXCSR_AND_FLAGS_SIZE);
  952. /* Copy the remaining FP state */
  953. copy_feature(header.xfeatures & XFEATURE_MASK_FP,
  954. &to, &xsave->i387.st_space, &xinit->i387.st_space,
  955. sizeof(xsave->i387.st_space));
  956. /* Copy the SSE state - shared with YMM, but independently managed */
  957. copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
  958. &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
  959. sizeof(xsave->i387.xmm_space));
  960. if (copy_mode != XSTATE_COPY_XSAVE)
  961. goto out;
  962. /* Zero the padding area */
  963. membuf_zero(&to, sizeof(xsave->i387.padding));
  964. /* Copy xsave->i387.sw_reserved */
  965. membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
  966. /* Copy the user space relevant state of @xsave->header */
  967. membuf_write(&to, &header, sizeof(header));
  968. zerofrom = offsetof(struct xregs_state, extended_state_area);
  969. /*
  970. * This 'mask' indicates which states to copy from fpstate.
  971. * Those extended states that are not present in fpstate are
  972. * either disabled or initialized:
  973. *
  974. * In non-compacted format, disabled features still occupy
  975. * state space but there is no state to copy from in the
  976. * compacted init_fpstate. The gap tracking will zero these
  977. * states.
  978. *
  979. * The extended features have an all zeroes init state. Thus,
  980. * remove them from 'mask' to zero those features in the user
  981. * buffer instead of retrieving them from init_fpstate.
  982. */
  983. mask = header.xfeatures;
  984. for_each_extended_xfeature(i, mask) {
  985. /*
  986. * If there was a feature or alignment gap, zero the space
  987. * in the destination buffer.
  988. */
  989. if (zerofrom < xstate_offsets[i])
  990. membuf_zero(&to, xstate_offsets[i] - zerofrom);
  991. if (i == XFEATURE_PKRU) {
  992. struct pkru_state pkru = {0};
  993. /*
  994. * PKRU is not necessarily up to date in the
  995. * XSAVE buffer. Use the provided value.
  996. */
  997. pkru.pkru = pkru_val;
  998. membuf_write(&to, &pkru, sizeof(pkru));
  999. } else {
  1000. membuf_write(&to,
  1001. __raw_xsave_addr(xsave, i),
  1002. xstate_sizes[i]);
  1003. }
  1004. /*
  1005. * Keep track of the last copied state in the non-compacted
  1006. * target buffer for gap zeroing.
  1007. */
  1008. zerofrom = xstate_offsets[i] + xstate_sizes[i];
  1009. }
  1010. out:
  1011. if (to.left)
  1012. membuf_zero(&to, to.left);
  1013. }
  1014. /**
  1015. * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
  1016. * @to: membuf descriptor
  1017. * @tsk: The task from which to copy the saved xstate
  1018. * @copy_mode: The requested copy mode
  1019. *
  1020. * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
  1021. * format, i.e. from the kernel internal hardware dependent storage format
  1022. * to the requested @mode. UABI XSTATE is always uncompacted!
  1023. *
  1024. * It supports partial copy but @to.pos always starts from zero.
  1025. */
  1026. void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
  1027. enum xstate_copy_mode copy_mode)
  1028. {
  1029. __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
  1030. tsk->thread.fpu.fpstate->user_xfeatures,
  1031. tsk->thread.pkru, copy_mode);
  1032. }
  1033. static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
  1034. const void *kbuf, const void __user *ubuf)
  1035. {
  1036. if (kbuf) {
  1037. memcpy(dst, kbuf + offset, size);
  1038. } else {
  1039. if (copy_from_user(dst, ubuf + offset, size))
  1040. return -EFAULT;
  1041. }
  1042. return 0;
  1043. }
  1044. /**
  1045. * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
  1046. * @fpstate: The fpstate buffer to copy to
  1047. * @kbuf: The UABI format buffer, if it comes from the kernel
  1048. * @ubuf: The UABI format buffer, if it comes from userspace
  1049. * @pkru: The location to write the PKRU value to
  1050. *
  1051. * Converts from the UABI format into the kernel internal hardware
  1052. * dependent format.
  1053. *
  1054. * This function ultimately has three different callers with distinct PKRU
  1055. * behavior.
  1056. * 1. When called from sigreturn the PKRU register will be restored from
  1057. * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to
  1058. * @fpstate is sufficient to cover this case, but the caller will also
  1059. * pass a pointer to the thread_struct's pkru field in @pkru and updating
  1060. * it is harmless.
  1061. * 2. When called from ptrace the PKRU register will be restored from the
  1062. * thread_struct's pkru field. A pointer to that is passed in @pkru.
  1063. * The kernel will restore it manually, so the XRSTOR behavior that resets
  1064. * the PKRU register to the hardware init value (0) if the corresponding
  1065. * xfeatures bit is not set is emulated here.
  1066. * 3. When called from KVM the PKRU register will be restored from the vcpu's
  1067. * pkru field. A pointer to that is passed in @pkru. KVM hasn't used
  1068. * XRSTOR and hasn't had the PKRU resetting behavior described above. To
  1069. * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
  1070. * bit is not set.
  1071. */
  1072. static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
  1073. const void __user *ubuf, u32 *pkru)
  1074. {
  1075. struct xregs_state *xsave = &fpstate->regs.xsave;
  1076. unsigned int offset, size;
  1077. struct xstate_header hdr;
  1078. u64 mask;
  1079. int i;
  1080. offset = offsetof(struct xregs_state, header);
  1081. if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
  1082. return -EFAULT;
  1083. if (validate_user_xstate_header(&hdr, fpstate))
  1084. return -EINVAL;
  1085. /* Validate MXCSR when any of the related features is in use */
  1086. mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
  1087. if (hdr.xfeatures & mask) {
  1088. u32 mxcsr[2];
  1089. offset = offsetof(struct fxregs_state, mxcsr);
  1090. if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
  1091. return -EFAULT;
  1092. /* Reserved bits in MXCSR must be zero. */
  1093. if (mxcsr[0] & ~mxcsr_feature_mask)
  1094. return -EINVAL;
  1095. /* SSE and YMM require MXCSR even when FP is not in use. */
  1096. if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
  1097. xsave->i387.mxcsr = mxcsr[0];
  1098. xsave->i387.mxcsr_mask = mxcsr[1];
  1099. }
  1100. }
  1101. for (i = 0; i < XFEATURE_MAX; i++) {
  1102. mask = BIT_ULL(i);
  1103. if (hdr.xfeatures & mask) {
  1104. void *dst = __raw_xsave_addr(xsave, i);
  1105. offset = xstate_offsets[i];
  1106. size = xstate_sizes[i];
  1107. if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
  1108. return -EFAULT;
  1109. }
  1110. }
  1111. if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
  1112. struct pkru_state *xpkru;
  1113. xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
  1114. *pkru = xpkru->pkru;
  1115. } else {
  1116. /*
  1117. * KVM may pass NULL here to indicate that it does not need
  1118. * PKRU updated.
  1119. */
  1120. if (pkru)
  1121. *pkru = 0;
  1122. }
  1123. /*
  1124. * The state that came in from userspace was user-state only.
  1125. * Mask all the user states out of 'xfeatures':
  1126. */
  1127. xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
  1128. /*
  1129. * Add back in the features that came in from userspace:
  1130. */
  1131. xsave->header.xfeatures |= hdr.xfeatures;
  1132. return 0;
  1133. }
  1134. /*
  1135. * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
  1136. * format and copy to the target thread. Used by ptrace and KVM.
  1137. */
  1138. int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
  1139. {
  1140. return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
  1141. }
  1142. /*
  1143. * Convert from a sigreturn standard-format user-space buffer to kernel
  1144. * XSAVE[S] format and copy to the target thread. This is called from the
  1145. * sigreturn() and rt_sigreturn() system calls.
  1146. */
  1147. int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
  1148. const void __user *ubuf)
  1149. {
  1150. return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
  1151. }
  1152. static bool validate_independent_components(u64 mask)
  1153. {
  1154. u64 xchk;
  1155. if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
  1156. return false;
  1157. xchk = ~xfeatures_mask_independent();
  1158. if (WARN_ON_ONCE(!mask || mask & xchk))
  1159. return false;
  1160. return true;
  1161. }
  1162. /**
  1163. * xsaves - Save selected components to a kernel xstate buffer
  1164. * @xstate: Pointer to the buffer
  1165. * @mask: Feature mask to select the components to save
  1166. *
  1167. * The @xstate buffer must be 64 byte aligned and correctly initialized as
  1168. * XSAVES does not write the full xstate header. Before first use the
  1169. * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
  1170. * can #GP.
  1171. *
  1172. * The feature mask must be a subset of the independent features.
  1173. */
  1174. void xsaves(struct xregs_state *xstate, u64 mask)
  1175. {
  1176. int err;
  1177. if (!validate_independent_components(mask))
  1178. return;
  1179. XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
  1180. WARN_ON_ONCE(err);
  1181. }
  1182. /**
  1183. * xrstors - Restore selected components from a kernel xstate buffer
  1184. * @xstate: Pointer to the buffer
  1185. * @mask: Feature mask to select the components to restore
  1186. *
  1187. * The @xstate buffer must be 64 byte aligned and correctly initialized
  1188. * otherwise XRSTORS from that buffer can #GP.
  1189. *
  1190. * Proper usage is to restore the state which was saved with
  1191. * xsaves() into @xstate.
  1192. *
  1193. * The feature mask must be a subset of the independent features.
  1194. */
  1195. void xrstors(struct xregs_state *xstate, u64 mask)
  1196. {
  1197. int err;
  1198. if (!validate_independent_components(mask))
  1199. return;
  1200. XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
  1201. WARN_ON_ONCE(err);
  1202. }
  1203. #if IS_ENABLED(CONFIG_KVM)
  1204. void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
  1205. {
  1206. void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
  1207. if (addr)
  1208. memset(addr, 0, xstate_sizes[xfeature]);
  1209. }
  1210. EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
  1211. #endif
  1212. #ifdef CONFIG_X86_64
  1213. #ifdef CONFIG_X86_DEBUG_FPU
  1214. /*
  1215. * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
  1216. * can safely operate on the @fpstate buffer.
  1217. */
  1218. static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
  1219. {
  1220. u64 xfd = __this_cpu_read(xfd_state);
  1221. if (fpstate->xfd == xfd)
  1222. return true;
  1223. /*
  1224. * The XFD MSR does not match fpstate->xfd. That's invalid when
  1225. * the passed in fpstate is current's fpstate.
  1226. */
  1227. if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
  1228. return false;
  1229. /*
  1230. * XRSTOR(S) from init_fpstate are always correct as it will just
  1231. * bring all components into init state and not read from the
  1232. * buffer. XSAVE(S) raises #PF after init.
  1233. */
  1234. if (fpstate == &init_fpstate)
  1235. return rstor;
  1236. /*
  1237. * XSAVE(S): clone(), fpu_swap_kvm_fpu()
  1238. * XRSTORS(S): fpu_swap_kvm_fpu()
  1239. */
  1240. /*
  1241. * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
  1242. * the buffer area for XFD-disabled state components.
  1243. */
  1244. mask &= ~xfd;
  1245. /*
  1246. * Remove features which are valid in fpstate. They
  1247. * have space allocated in fpstate.
  1248. */
  1249. mask &= ~fpstate->xfeatures;
  1250. /*
  1251. * Any remaining state components in 'mask' might be written
  1252. * by XSAVE/XRSTOR. Fail validation it found.
  1253. */
  1254. return !mask;
  1255. }
  1256. void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
  1257. {
  1258. WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
  1259. }
  1260. #endif /* CONFIG_X86_DEBUG_FPU */
  1261. static int __init xfd_update_static_branch(void)
  1262. {
  1263. /*
  1264. * If init_fpstate.xfd has bits set then dynamic features are
  1265. * available and the dynamic sizing must be enabled.
  1266. */
  1267. if (init_fpstate.xfd)
  1268. static_branch_enable(&__fpu_state_size_dynamic);
  1269. return 0;
  1270. }
  1271. arch_initcall(xfd_update_static_branch)
  1272. void fpstate_free(struct fpu *fpu)
  1273. {
  1274. if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
  1275. vfree(fpu->fpstate);
  1276. }
  1277. /**
  1278. * fpstate_realloc - Reallocate struct fpstate for the requested new features
  1279. *
  1280. * @xfeatures: A bitmap of xstate features which extend the enabled features
  1281. * of that task
  1282. * @ksize: The required size for the kernel buffer
  1283. * @usize: The required size for user space buffers
  1284. * @guest_fpu: Pointer to a guest FPU container. NULL for host allocations
  1285. *
  1286. * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
  1287. * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
  1288. * with large states are likely to live longer.
  1289. *
  1290. * Returns: 0 on success, -ENOMEM on allocation error.
  1291. */
  1292. static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
  1293. unsigned int usize, struct fpu_guest *guest_fpu)
  1294. {
  1295. struct fpu *fpu = &current->thread.fpu;
  1296. struct fpstate *curfps, *newfps = NULL;
  1297. unsigned int fpsize;
  1298. bool in_use;
  1299. fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
  1300. newfps = vzalloc(fpsize);
  1301. if (!newfps)
  1302. return -ENOMEM;
  1303. newfps->size = ksize;
  1304. newfps->user_size = usize;
  1305. newfps->is_valloc = true;
  1306. /*
  1307. * When a guest FPU is supplied, use @guest_fpu->fpstate
  1308. * as reference independent whether it is in use or not.
  1309. */
  1310. curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
  1311. /* Determine whether @curfps is the active fpstate */
  1312. in_use = fpu->fpstate == curfps;
  1313. if (guest_fpu) {
  1314. newfps->is_guest = true;
  1315. newfps->is_confidential = curfps->is_confidential;
  1316. newfps->in_use = curfps->in_use;
  1317. guest_fpu->xfeatures |= xfeatures;
  1318. guest_fpu->uabi_size = usize;
  1319. }
  1320. fpregs_lock();
  1321. /*
  1322. * If @curfps is in use, ensure that the current state is in the
  1323. * registers before swapping fpstate as that might invalidate it
  1324. * due to layout changes.
  1325. */
  1326. if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
  1327. fpregs_restore_userregs();
  1328. newfps->xfeatures = curfps->xfeatures | xfeatures;
  1329. newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
  1330. newfps->xfd = curfps->xfd & ~xfeatures;
  1331. /* Do the final updates within the locked region */
  1332. xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
  1333. if (guest_fpu) {
  1334. guest_fpu->fpstate = newfps;
  1335. /* If curfps is active, update the FPU fpstate pointer */
  1336. if (in_use)
  1337. fpu->fpstate = newfps;
  1338. } else {
  1339. fpu->fpstate = newfps;
  1340. }
  1341. if (in_use)
  1342. xfd_update_state(fpu->fpstate);
  1343. fpregs_unlock();
  1344. /* Only free valloc'ed state */
  1345. if (curfps && curfps->is_valloc)
  1346. vfree(curfps);
  1347. return 0;
  1348. }
  1349. static int validate_sigaltstack(unsigned int usize)
  1350. {
  1351. struct task_struct *thread, *leader = current->group_leader;
  1352. unsigned long framesize = get_sigframe_size();
  1353. lockdep_assert_held(&current->sighand->siglock);
  1354. /* get_sigframe_size() is based on fpu_user_cfg.max_size */
  1355. framesize -= fpu_user_cfg.max_size;
  1356. framesize += usize;
  1357. for_each_thread(leader, thread) {
  1358. if (thread->sas_ss_size && thread->sas_ss_size < framesize)
  1359. return -ENOSPC;
  1360. }
  1361. return 0;
  1362. }
  1363. static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
  1364. {
  1365. /*
  1366. * This deliberately does not exclude !XSAVES as we still might
  1367. * decide to optionally context switch XCR0 or talk the silicon
  1368. * vendors into extending XFD for the pre AMX states, especially
  1369. * AVX512.
  1370. */
  1371. bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
  1372. struct fpu *fpu = &current->group_leader->thread.fpu;
  1373. struct fpu_state_perm *perm;
  1374. unsigned int ksize, usize;
  1375. u64 mask;
  1376. int ret = 0;
  1377. /* Check whether fully enabled */
  1378. if ((permitted & requested) == requested)
  1379. return 0;
  1380. /* Calculate the resulting kernel state size */
  1381. mask = permitted | requested;
  1382. /* Take supervisor states into account on the host */
  1383. if (!guest)
  1384. mask |= xfeatures_mask_supervisor();
  1385. ksize = xstate_calculate_size(mask, compacted);
  1386. /* Calculate the resulting user state size */
  1387. mask &= XFEATURE_MASK_USER_SUPPORTED;
  1388. usize = xstate_calculate_size(mask, false);
  1389. if (!guest) {
  1390. ret = validate_sigaltstack(usize);
  1391. if (ret)
  1392. return ret;
  1393. }
  1394. perm = guest ? &fpu->guest_perm : &fpu->perm;
  1395. /* Pairs with the READ_ONCE() in xstate_get_group_perm() */
  1396. WRITE_ONCE(perm->__state_perm, mask);
  1397. /* Protected by sighand lock */
  1398. perm->__state_size = ksize;
  1399. perm->__user_state_size = usize;
  1400. return ret;
  1401. }
  1402. /*
  1403. * Permissions array to map facilities with more than one component
  1404. */
  1405. static const u64 xstate_prctl_req[XFEATURE_MAX] = {
  1406. [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
  1407. };
  1408. static int xstate_request_perm(unsigned long idx, bool guest)
  1409. {
  1410. u64 permitted, requested;
  1411. int ret;
  1412. if (idx >= XFEATURE_MAX)
  1413. return -EINVAL;
  1414. /*
  1415. * Look up the facility mask which can require more than
  1416. * one xstate component.
  1417. */
  1418. idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
  1419. requested = xstate_prctl_req[idx];
  1420. if (!requested)
  1421. return -EOPNOTSUPP;
  1422. if ((fpu_user_cfg.max_features & requested) != requested)
  1423. return -EOPNOTSUPP;
  1424. /* Lockless quick check */
  1425. permitted = xstate_get_group_perm(guest);
  1426. if ((permitted & requested) == requested)
  1427. return 0;
  1428. /* Protect against concurrent modifications */
  1429. spin_lock_irq(&current->sighand->siglock);
  1430. permitted = xstate_get_group_perm(guest);
  1431. /* First vCPU allocation locks the permissions. */
  1432. if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
  1433. ret = -EBUSY;
  1434. else
  1435. ret = __xstate_request_perm(permitted, requested, guest);
  1436. spin_unlock_irq(&current->sighand->siglock);
  1437. return ret;
  1438. }
  1439. int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
  1440. {
  1441. u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
  1442. struct fpu_state_perm *perm;
  1443. unsigned int ksize, usize;
  1444. struct fpu *fpu;
  1445. if (!xfd_event) {
  1446. if (!guest_fpu)
  1447. pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
  1448. return 0;
  1449. }
  1450. /* Protect against concurrent modifications */
  1451. spin_lock_irq(&current->sighand->siglock);
  1452. /* If not permitted let it die */
  1453. if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
  1454. spin_unlock_irq(&current->sighand->siglock);
  1455. return -EPERM;
  1456. }
  1457. fpu = &current->group_leader->thread.fpu;
  1458. perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
  1459. ksize = perm->__state_size;
  1460. usize = perm->__user_state_size;
  1461. /*
  1462. * The feature is permitted. State size is sufficient. Dropping
  1463. * the lock is safe here even if more features are added from
  1464. * another task, the retrieved buffer sizes are valid for the
  1465. * currently requested feature(s).
  1466. */
  1467. spin_unlock_irq(&current->sighand->siglock);
  1468. /*
  1469. * Try to allocate a new fpstate. If that fails there is no way
  1470. * out.
  1471. */
  1472. if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
  1473. return -EFAULT;
  1474. return 0;
  1475. }
  1476. int xfd_enable_feature(u64 xfd_err)
  1477. {
  1478. return __xfd_enable_feature(xfd_err, NULL);
  1479. }
  1480. #else /* CONFIG_X86_64 */
  1481. static inline int xstate_request_perm(unsigned long idx, bool guest)
  1482. {
  1483. return -EPERM;
  1484. }
  1485. #endif /* !CONFIG_X86_64 */
  1486. u64 xstate_get_guest_group_perm(void)
  1487. {
  1488. return xstate_get_group_perm(true);
  1489. }
  1490. EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
  1491. /**
  1492. * fpu_xstate_prctl - xstate permission operations
  1493. * @tsk: Redundant pointer to current
  1494. * @option: A subfunction of arch_prctl()
  1495. * @arg2: option argument
  1496. * Return: 0 if successful; otherwise, an error code
  1497. *
  1498. * Option arguments:
  1499. *
  1500. * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
  1501. * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
  1502. * ARCH_REQ_XCOMP_PERM: Facility number requested
  1503. *
  1504. * For facilities which require more than one XSTATE component, the request
  1505. * must be the highest state component number related to that facility,
  1506. * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
  1507. * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
  1508. */
  1509. long fpu_xstate_prctl(int option, unsigned long arg2)
  1510. {
  1511. u64 __user *uptr = (u64 __user *)arg2;
  1512. u64 permitted, supported;
  1513. unsigned long idx = arg2;
  1514. bool guest = false;
  1515. switch (option) {
  1516. case ARCH_GET_XCOMP_SUPP:
  1517. supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
  1518. return put_user(supported, uptr);
  1519. case ARCH_GET_XCOMP_PERM:
  1520. /*
  1521. * Lockless snapshot as it can also change right after the
  1522. * dropping the lock.
  1523. */
  1524. permitted = xstate_get_host_group_perm();
  1525. permitted &= XFEATURE_MASK_USER_SUPPORTED;
  1526. return put_user(permitted, uptr);
  1527. case ARCH_GET_XCOMP_GUEST_PERM:
  1528. permitted = xstate_get_guest_group_perm();
  1529. permitted &= XFEATURE_MASK_USER_SUPPORTED;
  1530. return put_user(permitted, uptr);
  1531. case ARCH_REQ_XCOMP_GUEST_PERM:
  1532. guest = true;
  1533. fallthrough;
  1534. case ARCH_REQ_XCOMP_PERM:
  1535. if (!IS_ENABLED(CONFIG_X86_64))
  1536. return -EOPNOTSUPP;
  1537. return xstate_request_perm(idx, guest);
  1538. default:
  1539. return -EINVAL;
  1540. }
  1541. }
  1542. #ifdef CONFIG_PROC_PID_ARCH_STATUS
  1543. /*
  1544. * Report the amount of time elapsed in millisecond since last AVX512
  1545. * use in the task.
  1546. */
  1547. static void avx512_status(struct seq_file *m, struct task_struct *task)
  1548. {
  1549. unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
  1550. long delta;
  1551. if (!timestamp) {
  1552. /*
  1553. * Report -1 if no AVX512 usage
  1554. */
  1555. delta = -1;
  1556. } else {
  1557. delta = (long)(jiffies - timestamp);
  1558. /*
  1559. * Cap to LONG_MAX if time difference > LONG_MAX
  1560. */
  1561. if (delta < 0)
  1562. delta = LONG_MAX;
  1563. delta = jiffies_to_msecs(delta);
  1564. }
  1565. seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
  1566. seq_putc(m, '\n');
  1567. }
  1568. /*
  1569. * Report architecture specific information
  1570. */
  1571. int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
  1572. struct pid *pid, struct task_struct *task)
  1573. {
  1574. /*
  1575. * Report AVX512 state if the processor and build option supported.
  1576. */
  1577. if (cpu_feature_enabled(X86_FEATURE_AVX512F))
  1578. avx512_status(m, task);
  1579. return 0;
  1580. }
  1581. #endif /* CONFIG_PROC_PID_ARCH_STATUS */