bpf_jit_comp32.c 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Just-In-Time compiler for eBPF bytecode on MIPS.
  4. * Implementation of JIT functions for 32-bit CPUs.
  5. *
  6. * Copyright (c) 2021 Anyfi Networks AB.
  7. * Author: Johan Almbladh <[email protected]>
  8. *
  9. * Based on code and ideas from
  10. * Copyright (c) 2017 Cavium, Inc.
  11. * Copyright (c) 2017 Shubham Bansal <[email protected]>
  12. * Copyright (c) 2011 Mircea Gherzan <[email protected]>
  13. */
  14. #include <linux/math64.h>
  15. #include <linux/errno.h>
  16. #include <linux/filter.h>
  17. #include <linux/bpf.h>
  18. #include <asm/cpu-features.h>
  19. #include <asm/isa-rev.h>
  20. #include <asm/uasm.h>
  21. #include "bpf_jit_comp.h"
  22. /* MIPS a4-a7 are not available in the o32 ABI */
  23. #undef MIPS_R_A4
  24. #undef MIPS_R_A5
  25. #undef MIPS_R_A6
  26. #undef MIPS_R_A7
  27. /* Stack is 8-byte aligned in o32 ABI */
  28. #define MIPS_STACK_ALIGNMENT 8
  29. /*
  30. * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
  31. * This corresponds to stack space for register arguments a0-a3.
  32. */
  33. #define JIT_RESERVED_STACK 16
  34. /* Temporary 64-bit register used by JIT */
  35. #define JIT_REG_TMP MAX_BPF_JIT_REG
  36. /*
  37. * Number of prologue bytes to skip when doing a tail call.
  38. * Tail call count (TCC) initialization (8 bytes) always, plus
  39. * R0-to-v0 assignment (4 bytes) if big endian.
  40. */
  41. #ifdef __BIG_ENDIAN
  42. #define JIT_TCALL_SKIP 12
  43. #else
  44. #define JIT_TCALL_SKIP 8
  45. #endif
  46. /* CPU registers holding the callee return value */
  47. #define JIT_RETURN_REGS \
  48. (BIT(MIPS_R_V0) | \
  49. BIT(MIPS_R_V1))
  50. /* CPU registers arguments passed to callee directly */
  51. #define JIT_ARG_REGS \
  52. (BIT(MIPS_R_A0) | \
  53. BIT(MIPS_R_A1) | \
  54. BIT(MIPS_R_A2) | \
  55. BIT(MIPS_R_A3))
  56. /* CPU register arguments passed to callee on stack */
  57. #define JIT_STACK_REGS \
  58. (BIT(MIPS_R_T0) | \
  59. BIT(MIPS_R_T1) | \
  60. BIT(MIPS_R_T2) | \
  61. BIT(MIPS_R_T3) | \
  62. BIT(MIPS_R_T4) | \
  63. BIT(MIPS_R_T5))
  64. /* Caller-saved CPU registers */
  65. #define JIT_CALLER_REGS \
  66. (JIT_RETURN_REGS | \
  67. JIT_ARG_REGS | \
  68. JIT_STACK_REGS)
  69. /* Callee-saved CPU registers */
  70. #define JIT_CALLEE_REGS \
  71. (BIT(MIPS_R_S0) | \
  72. BIT(MIPS_R_S1) | \
  73. BIT(MIPS_R_S2) | \
  74. BIT(MIPS_R_S3) | \
  75. BIT(MIPS_R_S4) | \
  76. BIT(MIPS_R_S5) | \
  77. BIT(MIPS_R_S6) | \
  78. BIT(MIPS_R_S7) | \
  79. BIT(MIPS_R_GP) | \
  80. BIT(MIPS_R_FP) | \
  81. BIT(MIPS_R_RA))
  82. /*
  83. * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
  84. *
  85. * 1) Native register pairs are ordered according to CPU endiannes, following
  86. * the MIPS convention for passing 64-bit arguments and return values.
  87. * 2) The eBPF return value, arguments and callee-saved registers are mapped
  88. * to their native MIPS equivalents.
  89. * 3) Since the 32 highest bits in the eBPF FP register are always zero,
  90. * only one general-purpose register is actually needed for the mapping.
  91. * We use the fp register for this purpose, and map the highest bits to
  92. * the MIPS register r0 (zero).
  93. * 4) We use the MIPS gp and at registers as internal temporary registers
  94. * for constant blinding. The gp register is callee-saved.
  95. * 5) One 64-bit temporary register is mapped for use when sign-extending
  96. * immediate operands. MIPS registers t6-t9 are available to the JIT
  97. * for as temporaries when implementing complex 64-bit operations.
  98. *
  99. * With this scheme all eBPF registers are being mapped to native MIPS
  100. * registers without having to use any stack scratch space. The direct
  101. * register mapping (2) simplifies the handling of function calls.
  102. */
  103. static const u8 bpf2mips32[][2] = {
  104. /* Return value from in-kernel function, and exit value from eBPF */
  105. [BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
  106. /* Arguments from eBPF program to in-kernel function */
  107. [BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
  108. [BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
  109. /* Remaining arguments, to be passed on the stack per O32 ABI */
  110. [BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
  111. [BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
  112. [BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
  113. /* Callee-saved registers that in-kernel function will preserve */
  114. [BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
  115. [BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
  116. [BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
  117. [BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
  118. /* Read-only frame pointer to access the eBPF stack */
  119. #ifdef __BIG_ENDIAN
  120. [BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
  121. #else
  122. [BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
  123. #endif
  124. /* Temporary register for blinding constants */
  125. [BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
  126. /* Temporary register for internal JIT use */
  127. [JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
  128. };
  129. /* Get low CPU register for a 64-bit eBPF register mapping */
  130. static inline u8 lo(const u8 reg[])
  131. {
  132. #ifdef __BIG_ENDIAN
  133. return reg[0];
  134. #else
  135. return reg[1];
  136. #endif
  137. }
  138. /* Get high CPU register for a 64-bit eBPF register mapping */
  139. static inline u8 hi(const u8 reg[])
  140. {
  141. #ifdef __BIG_ENDIAN
  142. return reg[1];
  143. #else
  144. return reg[0];
  145. #endif
  146. }
  147. /*
  148. * Mark a 64-bit CPU register pair as clobbered, it needs to be
  149. * saved/restored by the program if callee-saved.
  150. */
  151. static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
  152. {
  153. clobber_reg(ctx, reg[0]);
  154. clobber_reg(ctx, reg[1]);
  155. }
  156. /* dst = imm (sign-extended) */
  157. static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
  158. {
  159. emit_mov_i(ctx, lo(dst), imm);
  160. if (imm < 0)
  161. emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
  162. else
  163. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  164. clobber_reg64(ctx, dst);
  165. }
  166. /* Zero extension, if verifier does not do it for us */
  167. static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
  168. {
  169. if (!ctx->program->aux->verifier_zext) {
  170. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  171. clobber_reg(ctx, hi(dst));
  172. }
  173. }
  174. /* Load delay slot, if ISA mandates it */
  175. static void emit_load_delay(struct jit_context *ctx)
  176. {
  177. if (!cpu_has_mips_2_3_4_5_r)
  178. emit(ctx, nop);
  179. }
  180. /* ALU immediate operation (64-bit) */
  181. static void emit_alu_i64(struct jit_context *ctx,
  182. const u8 dst[], s32 imm, u8 op)
  183. {
  184. u8 src = MIPS_R_T6;
  185. /*
  186. * ADD/SUB with all but the max negative imm can be handled by
  187. * inverting the operation and the imm value, saving one insn.
  188. */
  189. if (imm > S32_MIN && imm < 0)
  190. switch (op) {
  191. case BPF_ADD:
  192. op = BPF_SUB;
  193. imm = -imm;
  194. break;
  195. case BPF_SUB:
  196. op = BPF_ADD;
  197. imm = -imm;
  198. break;
  199. }
  200. /* Move immediate to temporary register */
  201. emit_mov_i(ctx, src, imm);
  202. switch (op) {
  203. /* dst = dst + imm */
  204. case BPF_ADD:
  205. emit(ctx, addu, lo(dst), lo(dst), src);
  206. emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
  207. emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
  208. if (imm < 0)
  209. emit(ctx, addiu, hi(dst), hi(dst), -1);
  210. break;
  211. /* dst = dst - imm */
  212. case BPF_SUB:
  213. emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
  214. emit(ctx, subu, lo(dst), lo(dst), src);
  215. emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
  216. if (imm < 0)
  217. emit(ctx, addiu, hi(dst), hi(dst), 1);
  218. break;
  219. /* dst = dst | imm */
  220. case BPF_OR:
  221. emit(ctx, or, lo(dst), lo(dst), src);
  222. if (imm < 0)
  223. emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
  224. break;
  225. /* dst = dst & imm */
  226. case BPF_AND:
  227. emit(ctx, and, lo(dst), lo(dst), src);
  228. if (imm >= 0)
  229. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  230. break;
  231. /* dst = dst ^ imm */
  232. case BPF_XOR:
  233. emit(ctx, xor, lo(dst), lo(dst), src);
  234. if (imm < 0) {
  235. emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
  236. emit(ctx, addiu, hi(dst), hi(dst), -1);
  237. }
  238. break;
  239. }
  240. clobber_reg64(ctx, dst);
  241. }
  242. /* ALU register operation (64-bit) */
  243. static void emit_alu_r64(struct jit_context *ctx,
  244. const u8 dst[], const u8 src[], u8 op)
  245. {
  246. switch (BPF_OP(op)) {
  247. /* dst = dst + src */
  248. case BPF_ADD:
  249. if (src == dst) {
  250. emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
  251. emit(ctx, addu, lo(dst), lo(dst), lo(dst));
  252. } else {
  253. emit(ctx, addu, lo(dst), lo(dst), lo(src));
  254. emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
  255. }
  256. emit(ctx, addu, hi(dst), hi(dst), hi(src));
  257. emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
  258. break;
  259. /* dst = dst - src */
  260. case BPF_SUB:
  261. emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
  262. emit(ctx, subu, lo(dst), lo(dst), lo(src));
  263. emit(ctx, subu, hi(dst), hi(dst), hi(src));
  264. emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
  265. break;
  266. /* dst = dst | src */
  267. case BPF_OR:
  268. emit(ctx, or, lo(dst), lo(dst), lo(src));
  269. emit(ctx, or, hi(dst), hi(dst), hi(src));
  270. break;
  271. /* dst = dst & src */
  272. case BPF_AND:
  273. emit(ctx, and, lo(dst), lo(dst), lo(src));
  274. emit(ctx, and, hi(dst), hi(dst), hi(src));
  275. break;
  276. /* dst = dst ^ src */
  277. case BPF_XOR:
  278. emit(ctx, xor, lo(dst), lo(dst), lo(src));
  279. emit(ctx, xor, hi(dst), hi(dst), hi(src));
  280. break;
  281. }
  282. clobber_reg64(ctx, dst);
  283. }
  284. /* ALU invert (64-bit) */
  285. static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
  286. {
  287. emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
  288. emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
  289. emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
  290. emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
  291. clobber_reg64(ctx, dst);
  292. }
  293. /* ALU shift immediate (64-bit) */
  294. static void emit_shift_i64(struct jit_context *ctx,
  295. const u8 dst[], u32 imm, u8 op)
  296. {
  297. switch (BPF_OP(op)) {
  298. /* dst = dst << imm */
  299. case BPF_LSH:
  300. if (imm < 32) {
  301. emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
  302. emit(ctx, sll, lo(dst), lo(dst), imm);
  303. emit(ctx, sll, hi(dst), hi(dst), imm);
  304. emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
  305. } else {
  306. emit(ctx, sll, hi(dst), lo(dst), imm - 32);
  307. emit(ctx, move, lo(dst), MIPS_R_ZERO);
  308. }
  309. break;
  310. /* dst = dst >> imm */
  311. case BPF_RSH:
  312. if (imm < 32) {
  313. emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
  314. emit(ctx, srl, lo(dst), lo(dst), imm);
  315. emit(ctx, srl, hi(dst), hi(dst), imm);
  316. emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
  317. } else {
  318. emit(ctx, srl, lo(dst), hi(dst), imm - 32);
  319. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  320. }
  321. break;
  322. /* dst = dst >> imm (arithmetic) */
  323. case BPF_ARSH:
  324. if (imm < 32) {
  325. emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
  326. emit(ctx, srl, lo(dst), lo(dst), imm);
  327. emit(ctx, sra, hi(dst), hi(dst), imm);
  328. emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
  329. } else {
  330. emit(ctx, sra, lo(dst), hi(dst), imm - 32);
  331. emit(ctx, sra, hi(dst), hi(dst), 31);
  332. }
  333. break;
  334. }
  335. clobber_reg64(ctx, dst);
  336. }
  337. /* ALU shift register (64-bit) */
  338. static void emit_shift_r64(struct jit_context *ctx,
  339. const u8 dst[], u8 src, u8 op)
  340. {
  341. u8 t1 = MIPS_R_T8;
  342. u8 t2 = MIPS_R_T9;
  343. emit(ctx, andi, t1, src, 32); /* t1 = src & 32 */
  344. emit(ctx, beqz, t1, 16); /* PC += 16 if t1 == 0 */
  345. emit(ctx, nor, t2, src, MIPS_R_ZERO); /* t2 = ~src (delay slot) */
  346. switch (BPF_OP(op)) {
  347. /* dst = dst << src */
  348. case BPF_LSH:
  349. /* Next: shift >= 32 */
  350. emit(ctx, sllv, hi(dst), lo(dst), src); /* dh = dl << src */
  351. emit(ctx, move, lo(dst), MIPS_R_ZERO); /* dl = 0 */
  352. emit(ctx, b, 20); /* PC += 20 */
  353. /* +16: shift < 32 */
  354. emit(ctx, srl, t1, lo(dst), 1); /* t1 = dl >> 1 */
  355. emit(ctx, srlv, t1, t1, t2); /* t1 = t1 >> t2 */
  356. emit(ctx, sllv, lo(dst), lo(dst), src); /* dl = dl << src */
  357. emit(ctx, sllv, hi(dst), hi(dst), src); /* dh = dh << src */
  358. emit(ctx, or, hi(dst), hi(dst), t1); /* dh = dh | t1 */
  359. break;
  360. /* dst = dst >> src */
  361. case BPF_RSH:
  362. /* Next: shift >= 32 */
  363. emit(ctx, srlv, lo(dst), hi(dst), src); /* dl = dh >> src */
  364. emit(ctx, move, hi(dst), MIPS_R_ZERO); /* dh = 0 */
  365. emit(ctx, b, 20); /* PC += 20 */
  366. /* +16: shift < 32 */
  367. emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */
  368. emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */
  369. emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >> src */
  370. emit(ctx, srlv, hi(dst), hi(dst), src); /* dh = dh >> src */
  371. emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */
  372. break;
  373. /* dst = dst >> src (arithmetic) */
  374. case BPF_ARSH:
  375. /* Next: shift >= 32 */
  376. emit(ctx, srav, lo(dst), hi(dst), src); /* dl = dh >>a src */
  377. emit(ctx, sra, hi(dst), hi(dst), 31); /* dh = dh >>a 31 */
  378. emit(ctx, b, 20); /* PC += 20 */
  379. /* +16: shift < 32 */
  380. emit(ctx, sll, t1, hi(dst), 1); /* t1 = dl << 1 */
  381. emit(ctx, sllv, t1, t1, t2); /* t1 = t1 << t2 */
  382. emit(ctx, srlv, lo(dst), lo(dst), src); /* dl = dl >>a src */
  383. emit(ctx, srav, hi(dst), hi(dst), src); /* dh = dh >> src */
  384. emit(ctx, or, lo(dst), lo(dst), t1); /* dl = dl | t1 */
  385. break;
  386. }
  387. /* +20: Done */
  388. clobber_reg64(ctx, dst);
  389. }
  390. /* ALU mul immediate (64x32-bit) */
  391. static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
  392. {
  393. u8 src = MIPS_R_T6;
  394. u8 tmp = MIPS_R_T9;
  395. switch (imm) {
  396. /* dst = dst * 1 is a no-op */
  397. case 1:
  398. break;
  399. /* dst = dst * -1 */
  400. case -1:
  401. emit_neg_i64(ctx, dst);
  402. break;
  403. case 0:
  404. emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
  405. emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
  406. break;
  407. /* Full 64x32 multiply */
  408. default:
  409. /* hi(dst) = hi(dst) * src(imm) */
  410. emit_mov_i(ctx, src, imm);
  411. if (cpu_has_mips32r1 || cpu_has_mips32r6) {
  412. emit(ctx, mul, hi(dst), hi(dst), src);
  413. } else {
  414. emit(ctx, multu, hi(dst), src);
  415. emit(ctx, mflo, hi(dst));
  416. }
  417. /* hi(dst) = hi(dst) - lo(dst) */
  418. if (imm < 0)
  419. emit(ctx, subu, hi(dst), hi(dst), lo(dst));
  420. /* tmp = lo(dst) * src(imm) >> 32 */
  421. /* lo(dst) = lo(dst) * src(imm) */
  422. if (cpu_has_mips32r6) {
  423. emit(ctx, muhu, tmp, lo(dst), src);
  424. emit(ctx, mulu, lo(dst), lo(dst), src);
  425. } else {
  426. emit(ctx, multu, lo(dst), src);
  427. emit(ctx, mflo, lo(dst));
  428. emit(ctx, mfhi, tmp);
  429. }
  430. /* hi(dst) += tmp */
  431. emit(ctx, addu, hi(dst), hi(dst), tmp);
  432. clobber_reg64(ctx, dst);
  433. break;
  434. }
  435. }
  436. /* ALU mul register (64x64-bit) */
  437. static void emit_mul_r64(struct jit_context *ctx,
  438. const u8 dst[], const u8 src[])
  439. {
  440. u8 acc = MIPS_R_T8;
  441. u8 tmp = MIPS_R_T9;
  442. /* acc = hi(dst) * lo(src) */
  443. if (cpu_has_mips32r1 || cpu_has_mips32r6) {
  444. emit(ctx, mul, acc, hi(dst), lo(src));
  445. } else {
  446. emit(ctx, multu, hi(dst), lo(src));
  447. emit(ctx, mflo, acc);
  448. }
  449. /* tmp = lo(dst) * hi(src) */
  450. if (cpu_has_mips32r1 || cpu_has_mips32r6) {
  451. emit(ctx, mul, tmp, lo(dst), hi(src));
  452. } else {
  453. emit(ctx, multu, lo(dst), hi(src));
  454. emit(ctx, mflo, tmp);
  455. }
  456. /* acc += tmp */
  457. emit(ctx, addu, acc, acc, tmp);
  458. /* tmp = lo(dst) * lo(src) >> 32 */
  459. /* lo(dst) = lo(dst) * lo(src) */
  460. if (cpu_has_mips32r6) {
  461. emit(ctx, muhu, tmp, lo(dst), lo(src));
  462. emit(ctx, mulu, lo(dst), lo(dst), lo(src));
  463. } else {
  464. emit(ctx, multu, lo(dst), lo(src));
  465. emit(ctx, mflo, lo(dst));
  466. emit(ctx, mfhi, tmp);
  467. }
  468. /* hi(dst) = acc + tmp */
  469. emit(ctx, addu, hi(dst), acc, tmp);
  470. clobber_reg64(ctx, dst);
  471. }
  472. /* Helper function for 64-bit modulo */
  473. static u64 jit_mod64(u64 a, u64 b)
  474. {
  475. u64 rem;
  476. div64_u64_rem(a, b, &rem);
  477. return rem;
  478. }
  479. /* ALU div/mod register (64-bit) */
  480. static void emit_divmod_r64(struct jit_context *ctx,
  481. const u8 dst[], const u8 src[], u8 op)
  482. {
  483. const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
  484. const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
  485. const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
  486. int exclude, k;
  487. u32 addr = 0;
  488. /* Push caller-saved registers on stack */
  489. push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  490. 0, JIT_RESERVED_STACK);
  491. /* Put 64-bit arguments 1 and 2 in registers a0-a3 */
  492. for (k = 0; k < 2; k++) {
  493. emit(ctx, move, MIPS_R_T9, src[k]);
  494. emit(ctx, move, r1[k], dst[k]);
  495. emit(ctx, move, r2[k], MIPS_R_T9);
  496. }
  497. /* Emit function call */
  498. switch (BPF_OP(op)) {
  499. /* dst = dst / src */
  500. case BPF_DIV:
  501. addr = (u32)&div64_u64;
  502. break;
  503. /* dst = dst % src */
  504. case BPF_MOD:
  505. addr = (u32)&jit_mod64;
  506. break;
  507. }
  508. emit_mov_i(ctx, MIPS_R_T9, addr);
  509. emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
  510. emit(ctx, nop); /* Delay slot */
  511. /* Store the 64-bit result in dst */
  512. emit(ctx, move, dst[0], r0[0]);
  513. emit(ctx, move, dst[1], r0[1]);
  514. /* Restore caller-saved registers, excluding the computed result */
  515. exclude = BIT(lo(dst)) | BIT(hi(dst));
  516. pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  517. exclude, JIT_RESERVED_STACK);
  518. emit_load_delay(ctx);
  519. clobber_reg64(ctx, dst);
  520. clobber_reg(ctx, MIPS_R_V0);
  521. clobber_reg(ctx, MIPS_R_V1);
  522. clobber_reg(ctx, MIPS_R_RA);
  523. }
  524. /* Swap bytes in a register word */
  525. static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
  526. {
  527. u8 tmp = MIPS_R_T9;
  528. emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
  529. emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */
  530. emit(ctx, srl, dst, src, 8); /* dst = src >> 8 */
  531. emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
  532. emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
  533. }
  534. /* Swap half words in a register word */
  535. static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
  536. {
  537. u8 tmp = MIPS_R_T9;
  538. emit(ctx, sll, tmp, src, 16); /* tmp = src << 16 */
  539. emit(ctx, srl, dst, src, 16); /* dst = src >> 16 */
  540. emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
  541. }
  542. /* Swap bytes and truncate a register double word, word or half word */
  543. static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
  544. {
  545. u8 tmp = MIPS_R_T8;
  546. switch (width) {
  547. /* Swap bytes in a double word */
  548. case 64:
  549. if (cpu_has_mips32r2 || cpu_has_mips32r6) {
  550. emit(ctx, rotr, tmp, hi(dst), 16);
  551. emit(ctx, rotr, hi(dst), lo(dst), 16);
  552. emit(ctx, wsbh, lo(dst), tmp);
  553. emit(ctx, wsbh, hi(dst), hi(dst));
  554. } else {
  555. emit_swap16_r(ctx, tmp, lo(dst));
  556. emit_swap16_r(ctx, lo(dst), hi(dst));
  557. emit(ctx, move, hi(dst), tmp);
  558. emit(ctx, lui, tmp, 0xff); /* tmp = 0x00ff0000 */
  559. emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
  560. emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
  561. emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
  562. }
  563. break;
  564. /* Swap bytes in a word */
  565. /* Swap bytes in a half word */
  566. case 32:
  567. case 16:
  568. emit_bswap_r(ctx, lo(dst), width);
  569. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  570. break;
  571. }
  572. clobber_reg64(ctx, dst);
  573. }
  574. /* Truncate a register double word, word or half word */
  575. static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
  576. {
  577. switch (width) {
  578. case 64:
  579. break;
  580. /* Zero-extend a word */
  581. case 32:
  582. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  583. clobber_reg(ctx, hi(dst));
  584. break;
  585. /* Zero-extend a half word */
  586. case 16:
  587. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  588. emit(ctx, andi, lo(dst), lo(dst), 0xffff);
  589. clobber_reg64(ctx, dst);
  590. break;
  591. }
  592. }
  593. /* Load operation: dst = *(size*)(src + off) */
  594. static void emit_ldx(struct jit_context *ctx,
  595. const u8 dst[], u8 src, s16 off, u8 size)
  596. {
  597. switch (size) {
  598. /* Load a byte */
  599. case BPF_B:
  600. emit(ctx, lbu, lo(dst), off, src);
  601. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  602. break;
  603. /* Load a half word */
  604. case BPF_H:
  605. emit(ctx, lhu, lo(dst), off, src);
  606. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  607. break;
  608. /* Load a word */
  609. case BPF_W:
  610. emit(ctx, lw, lo(dst), off, src);
  611. emit(ctx, move, hi(dst), MIPS_R_ZERO);
  612. break;
  613. /* Load a double word */
  614. case BPF_DW:
  615. if (dst[1] == src) {
  616. emit(ctx, lw, dst[0], off + 4, src);
  617. emit(ctx, lw, dst[1], off, src);
  618. } else {
  619. emit(ctx, lw, dst[1], off, src);
  620. emit(ctx, lw, dst[0], off + 4, src);
  621. }
  622. emit_load_delay(ctx);
  623. break;
  624. }
  625. clobber_reg64(ctx, dst);
  626. }
  627. /* Store operation: *(size *)(dst + off) = src */
  628. static void emit_stx(struct jit_context *ctx,
  629. const u8 dst, const u8 src[], s16 off, u8 size)
  630. {
  631. switch (size) {
  632. /* Store a byte */
  633. case BPF_B:
  634. emit(ctx, sb, lo(src), off, dst);
  635. break;
  636. /* Store a half word */
  637. case BPF_H:
  638. emit(ctx, sh, lo(src), off, dst);
  639. break;
  640. /* Store a word */
  641. case BPF_W:
  642. emit(ctx, sw, lo(src), off, dst);
  643. break;
  644. /* Store a double word */
  645. case BPF_DW:
  646. emit(ctx, sw, src[1], off, dst);
  647. emit(ctx, sw, src[0], off + 4, dst);
  648. break;
  649. }
  650. }
  651. /* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
  652. static void emit_atomic_r32(struct jit_context *ctx,
  653. u8 dst, u8 src, s16 off, u8 code)
  654. {
  655. u32 exclude = 0;
  656. u32 addr = 0;
  657. /* Push caller-saved registers on stack */
  658. push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  659. 0, JIT_RESERVED_STACK);
  660. /*
  661. * Argument 1: dst+off if xchg, otherwise src, passed in register a0
  662. * Argument 2: src if xchg, otherwise dst+off, passed in register a1
  663. */
  664. emit(ctx, move, MIPS_R_T9, dst);
  665. if (code == BPF_XCHG) {
  666. emit(ctx, move, MIPS_R_A1, src);
  667. emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off);
  668. } else {
  669. emit(ctx, move, MIPS_R_A0, src);
  670. emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
  671. }
  672. /* Emit function call */
  673. switch (code) {
  674. case BPF_ADD:
  675. addr = (u32)&atomic_add;
  676. break;
  677. case BPF_ADD | BPF_FETCH:
  678. addr = (u32)&atomic_fetch_add;
  679. break;
  680. case BPF_SUB:
  681. addr = (u32)&atomic_sub;
  682. break;
  683. case BPF_SUB | BPF_FETCH:
  684. addr = (u32)&atomic_fetch_sub;
  685. break;
  686. case BPF_OR:
  687. addr = (u32)&atomic_or;
  688. break;
  689. case BPF_OR | BPF_FETCH:
  690. addr = (u32)&atomic_fetch_or;
  691. break;
  692. case BPF_AND:
  693. addr = (u32)&atomic_and;
  694. break;
  695. case BPF_AND | BPF_FETCH:
  696. addr = (u32)&atomic_fetch_and;
  697. break;
  698. case BPF_XOR:
  699. addr = (u32)&atomic_xor;
  700. break;
  701. case BPF_XOR | BPF_FETCH:
  702. addr = (u32)&atomic_fetch_xor;
  703. break;
  704. case BPF_XCHG:
  705. addr = (u32)&atomic_xchg;
  706. break;
  707. }
  708. emit_mov_i(ctx, MIPS_R_T9, addr);
  709. emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
  710. emit(ctx, nop); /* Delay slot */
  711. /* Update src register with old value, if specified */
  712. if (code & BPF_FETCH) {
  713. emit(ctx, move, src, MIPS_R_V0);
  714. exclude = BIT(src);
  715. clobber_reg(ctx, src);
  716. }
  717. /* Restore caller-saved registers, except any fetched value */
  718. pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  719. exclude, JIT_RESERVED_STACK);
  720. emit_load_delay(ctx);
  721. clobber_reg(ctx, MIPS_R_RA);
  722. }
  723. /* Helper function for 64-bit atomic exchange */
  724. static s64 jit_xchg64(s64 a, atomic64_t *v)
  725. {
  726. return atomic64_xchg(v, a);
  727. }
  728. /* Atomic read-modify-write (64-bit) */
  729. static void emit_atomic_r64(struct jit_context *ctx,
  730. u8 dst, const u8 src[], s16 off, u8 code)
  731. {
  732. const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
  733. const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
  734. u32 exclude = 0;
  735. u32 addr = 0;
  736. /* Push caller-saved registers on stack */
  737. push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  738. 0, JIT_RESERVED_STACK);
  739. /*
  740. * Argument 1: 64-bit src, passed in registers a0-a1
  741. * Argument 2: 32-bit dst+off, passed in register a2
  742. */
  743. emit(ctx, move, MIPS_R_T9, dst);
  744. emit(ctx, move, r1[0], src[0]);
  745. emit(ctx, move, r1[1], src[1]);
  746. emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
  747. /* Emit function call */
  748. switch (code) {
  749. case BPF_ADD:
  750. addr = (u32)&atomic64_add;
  751. break;
  752. case BPF_ADD | BPF_FETCH:
  753. addr = (u32)&atomic64_fetch_add;
  754. break;
  755. case BPF_SUB:
  756. addr = (u32)&atomic64_sub;
  757. break;
  758. case BPF_SUB | BPF_FETCH:
  759. addr = (u32)&atomic64_fetch_sub;
  760. break;
  761. case BPF_OR:
  762. addr = (u32)&atomic64_or;
  763. break;
  764. case BPF_OR | BPF_FETCH:
  765. addr = (u32)&atomic64_fetch_or;
  766. break;
  767. case BPF_AND:
  768. addr = (u32)&atomic64_and;
  769. break;
  770. case BPF_AND | BPF_FETCH:
  771. addr = (u32)&atomic64_fetch_and;
  772. break;
  773. case BPF_XOR:
  774. addr = (u32)&atomic64_xor;
  775. break;
  776. case BPF_XOR | BPF_FETCH:
  777. addr = (u32)&atomic64_fetch_xor;
  778. break;
  779. case BPF_XCHG:
  780. addr = (u32)&jit_xchg64;
  781. break;
  782. }
  783. emit_mov_i(ctx, MIPS_R_T9, addr);
  784. emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
  785. emit(ctx, nop); /* Delay slot */
  786. /* Update src register with old value, if specified */
  787. if (code & BPF_FETCH) {
  788. emit(ctx, move, lo(src), lo(r0));
  789. emit(ctx, move, hi(src), hi(r0));
  790. exclude = BIT(src[0]) | BIT(src[1]);
  791. clobber_reg64(ctx, src);
  792. }
  793. /* Restore caller-saved registers, except any fetched value */
  794. pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  795. exclude, JIT_RESERVED_STACK);
  796. emit_load_delay(ctx);
  797. clobber_reg(ctx, MIPS_R_RA);
  798. }
  799. /* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */
  800. static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off)
  801. {
  802. const u8 *r0 = bpf2mips32[BPF_REG_0];
  803. /* Push caller-saved registers on stack */
  804. push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  805. JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
  806. /*
  807. * Argument 1: 32-bit dst+off, passed in register a0
  808. * Argument 2: 32-bit r0, passed in register a1
  809. * Argument 3: 32-bit src, passed in register a2
  810. */
  811. emit(ctx, addiu, MIPS_R_T9, dst, off);
  812. emit(ctx, move, MIPS_R_T8, src);
  813. emit(ctx, move, MIPS_R_A1, lo(r0));
  814. emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
  815. emit(ctx, move, MIPS_R_A2, MIPS_R_T8);
  816. /* Emit function call */
  817. emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg);
  818. emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
  819. emit(ctx, nop); /* Delay slot */
  820. #ifdef __BIG_ENDIAN
  821. emit(ctx, move, lo(r0), MIPS_R_V0);
  822. #endif
  823. /* Restore caller-saved registers, except the return value */
  824. pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  825. JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
  826. emit_load_delay(ctx);
  827. clobber_reg(ctx, MIPS_R_V0);
  828. clobber_reg(ctx, MIPS_R_V1);
  829. clobber_reg(ctx, MIPS_R_RA);
  830. }
  831. /* Atomic compare-and-exchange (64-bit) */
  832. static void emit_cmpxchg_r64(struct jit_context *ctx,
  833. u8 dst, const u8 src[], s16 off)
  834. {
  835. const u8 *r0 = bpf2mips32[BPF_REG_0];
  836. const u8 *r2 = bpf2mips32[BPF_REG_2];
  837. /* Push caller-saved registers on stack */
  838. push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  839. JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
  840. /*
  841. * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused)
  842. * Argument 2: 64-bit r0, passed in registers a2-a3
  843. * Argument 3: 64-bit src, passed on stack
  844. */
  845. push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK);
  846. emit(ctx, addiu, MIPS_R_T9, dst, off);
  847. emit(ctx, move, r2[0], r0[0]);
  848. emit(ctx, move, r2[1], r0[1]);
  849. emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
  850. /* Emit function call */
  851. emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg);
  852. emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
  853. emit(ctx, nop); /* Delay slot */
  854. /* Restore caller-saved registers, except the return value */
  855. pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
  856. JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
  857. emit_load_delay(ctx);
  858. clobber_reg(ctx, MIPS_R_V0);
  859. clobber_reg(ctx, MIPS_R_V1);
  860. clobber_reg(ctx, MIPS_R_RA);
  861. }
  862. /*
  863. * Conditional movz or an emulated equivalent.
  864. * Note that the rs register may be modified.
  865. */
  866. static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
  867. {
  868. if (cpu_has_mips_2) {
  869. emit(ctx, movz, rd, rs, rt); /* rd = rt ? rd : rs */
  870. } else if (cpu_has_mips32r6) {
  871. if (rs != MIPS_R_ZERO)
  872. emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0 */
  873. emit(ctx, selnez, rd, rd, rt); /* rd = 0 if rt != 0 */
  874. if (rs != MIPS_R_ZERO)
  875. emit(ctx, or, rd, rd, rs); /* rd = rd | rs */
  876. } else {
  877. emit(ctx, bnez, rt, 8); /* PC += 8 if rd != 0 */
  878. emit(ctx, nop); /* +0: delay slot */
  879. emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */
  880. }
  881. clobber_reg(ctx, rd);
  882. clobber_reg(ctx, rs);
  883. }
  884. /*
  885. * Conditional movn or an emulated equivalent.
  886. * Note that the rs register may be modified.
  887. */
  888. static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
  889. {
  890. if (cpu_has_mips_2) {
  891. emit(ctx, movn, rd, rs, rt); /* rd = rt ? rs : rd */
  892. } else if (cpu_has_mips32r6) {
  893. if (rs != MIPS_R_ZERO)
  894. emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0 */
  895. emit(ctx, seleqz, rd, rd, rt); /* rd = 0 if rt != 0 */
  896. if (rs != MIPS_R_ZERO)
  897. emit(ctx, or, rd, rd, rs); /* rd = rd | rs */
  898. } else {
  899. emit(ctx, beqz, rt, 8); /* PC += 8 if rd == 0 */
  900. emit(ctx, nop); /* +0: delay slot */
  901. emit(ctx, or, rd, rs, MIPS_R_ZERO); /* +4: rd = rs */
  902. }
  903. clobber_reg(ctx, rd);
  904. clobber_reg(ctx, rs);
  905. }
  906. /* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
  907. static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
  908. const u8 rs[], s64 imm)
  909. {
  910. u8 tmp = MIPS_R_T9;
  911. if (imm < 0) {
  912. emit_mov_i(ctx, rd, imm); /* rd = imm */
  913. emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */
  914. emit(ctx, sltiu, tmp, hi(rs), -1); /* tmp = rsh < ~0U */
  915. emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */
  916. } else { /* imm >= 0 */
  917. if (imm > 0x7fff) {
  918. emit_mov_i(ctx, rd, (s32)imm); /* rd = imm */
  919. emit(ctx, sltu, rd, lo(rs), rd); /* rd = rsl < rd */
  920. } else {
  921. emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
  922. }
  923. emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh */
  924. }
  925. }
  926. /* Emulation of 64-bit sltu rd, rs, rt */
  927. static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
  928. const u8 rs[], const u8 rt[])
  929. {
  930. u8 tmp = MIPS_R_T9;
  931. emit(ctx, sltu, rd, lo(rs), lo(rt)); /* rd = rsl < rtl */
  932. emit(ctx, subu, tmp, hi(rs), hi(rt)); /* tmp = rsh - rth */
  933. emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp); /* rd = 0 if tmp != 0 */
  934. emit(ctx, sltu, tmp, hi(rs), hi(rt)); /* tmp = rsh < rth */
  935. emit(ctx, or, rd, rd, tmp); /* rd = rd | tmp */
  936. }
  937. /* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
  938. static void emit_slti_r64(struct jit_context *ctx, u8 rd,
  939. const u8 rs[], s64 imm)
  940. {
  941. u8 t1 = MIPS_R_T8;
  942. u8 t2 = MIPS_R_T9;
  943. u8 cmp;
  944. /*
  945. * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
  946. * else t1 = rsl <u imm
  947. */
  948. emit_mov_i(ctx, rd, (s32)imm);
  949. emit(ctx, sltu, t1, lo(rs), rd); /* t1 = rsl <u imm */
  950. emit(ctx, sltu, t2, rd, lo(rs)); /* t2 = imm <u rsl */
  951. emit(ctx, srl, rd, hi(rs), 31); /* rd = rsh >> 31 */
  952. if (imm < 0)
  953. emit_movz_r(ctx, t1, t2, rd); /* t1 = rd ? t1 : t2 */
  954. else
  955. emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */
  956. /*
  957. * if ((imm < 0 && rsh != 0xffffffff) ||
  958. * (imm >= 0 && rsh != 0))
  959. * t1 = 0
  960. */
  961. if (imm < 0) {
  962. emit(ctx, addiu, rd, hi(rs), 1); /* rd = rsh + 1 */
  963. cmp = rd;
  964. } else { /* imm >= 0 */
  965. cmp = hi(rs);
  966. }
  967. emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp); /* t1 = 0 if cmp != 0 */
  968. /*
  969. * if (imm < 0) rd = rsh < -1
  970. * else rd = rsh != 0
  971. * rd = rd | t1
  972. */
  973. emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
  974. emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */
  975. }
  976. /* Emulation of 64-bit(slt rd, rs, rt) */
  977. static void emit_slt_r64(struct jit_context *ctx, u8 rd,
  978. const u8 rs[], const u8 rt[])
  979. {
  980. u8 t1 = MIPS_R_T7;
  981. u8 t2 = MIPS_R_T8;
  982. u8 t3 = MIPS_R_T9;
  983. /*
  984. * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
  985. * else t1 = rsl <u rtl
  986. * if (rsh == rth) t1 = 0
  987. */
  988. emit(ctx, sltu, t1, lo(rs), lo(rt)); /* t1 = rsl <u rtl */
  989. emit(ctx, sltu, t2, lo(rt), lo(rs)); /* t2 = rtl <u rsl */
  990. emit(ctx, xor, t3, hi(rs), hi(rt)); /* t3 = rlh ^ rth */
  991. emit(ctx, srl, rd, t3, 31); /* rd = t3 >> 31 */
  992. emit_movn_r(ctx, t1, t2, rd); /* t1 = rd ? t2 : t1 */
  993. emit_movn_r(ctx, t1, MIPS_R_ZERO, t3); /* t1 = 0 if t3 != 0 */
  994. /* rd = (rsh < rth) | t1 */
  995. emit(ctx, slt, rd, hi(rs), hi(rt)); /* rd = rsh <s rth */
  996. emit(ctx, or, rd, rd, t1); /* rd = rd | t1 */
  997. }
  998. /* Jump immediate (64-bit) */
  999. static void emit_jmp_i64(struct jit_context *ctx,
  1000. const u8 dst[], s32 imm, s32 off, u8 op)
  1001. {
  1002. u8 tmp = MIPS_R_T6;
  1003. switch (op) {
  1004. /* No-op, used internally for branch optimization */
  1005. case JIT_JNOP:
  1006. break;
  1007. /* PC += off if dst == imm */
  1008. /* PC += off if dst != imm */
  1009. case BPF_JEQ:
  1010. case BPF_JNE:
  1011. if (imm >= -0x7fff && imm <= 0x8000) {
  1012. emit(ctx, addiu, tmp, lo(dst), -imm);
  1013. } else if ((u32)imm <= 0xffff) {
  1014. emit(ctx, xori, tmp, lo(dst), imm);
  1015. } else { /* Register fallback */
  1016. emit_mov_i(ctx, tmp, imm);
  1017. emit(ctx, xor, tmp, lo(dst), tmp);
  1018. }
  1019. if (imm < 0) { /* Compare sign extension */
  1020. emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
  1021. emit(ctx, or, tmp, tmp, MIPS_R_T9);
  1022. } else { /* Compare zero extension */
  1023. emit(ctx, or, tmp, tmp, hi(dst));
  1024. }
  1025. if (op == BPF_JEQ)
  1026. emit(ctx, beqz, tmp, off);
  1027. else /* BPF_JNE */
  1028. emit(ctx, bnez, tmp, off);
  1029. break;
  1030. /* PC += off if dst & imm */
  1031. /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
  1032. case BPF_JSET:
  1033. case JIT_JNSET:
  1034. if ((u32)imm <= 0xffff) {
  1035. emit(ctx, andi, tmp, lo(dst), imm);
  1036. } else { /* Register fallback */
  1037. emit_mov_i(ctx, tmp, imm);
  1038. emit(ctx, and, tmp, lo(dst), tmp);
  1039. }
  1040. if (imm < 0) /* Sign-extension pulls in high word */
  1041. emit(ctx, or, tmp, tmp, hi(dst));
  1042. if (op == BPF_JSET)
  1043. emit(ctx, bnez, tmp, off);
  1044. else /* JIT_JNSET */
  1045. emit(ctx, beqz, tmp, off);
  1046. break;
  1047. /* PC += off if dst > imm */
  1048. case BPF_JGT:
  1049. emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
  1050. emit(ctx, beqz, tmp, off);
  1051. break;
  1052. /* PC += off if dst >= imm */
  1053. case BPF_JGE:
  1054. emit_sltiu_r64(ctx, tmp, dst, imm);
  1055. emit(ctx, beqz, tmp, off);
  1056. break;
  1057. /* PC += off if dst < imm */
  1058. case BPF_JLT:
  1059. emit_sltiu_r64(ctx, tmp, dst, imm);
  1060. emit(ctx, bnez, tmp, off);
  1061. break;
  1062. /* PC += off if dst <= imm */
  1063. case BPF_JLE:
  1064. emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
  1065. emit(ctx, bnez, tmp, off);
  1066. break;
  1067. /* PC += off if dst > imm (signed) */
  1068. case BPF_JSGT:
  1069. emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
  1070. emit(ctx, beqz, tmp, off);
  1071. break;
  1072. /* PC += off if dst >= imm (signed) */
  1073. case BPF_JSGE:
  1074. emit_slti_r64(ctx, tmp, dst, imm);
  1075. emit(ctx, beqz, tmp, off);
  1076. break;
  1077. /* PC += off if dst < imm (signed) */
  1078. case BPF_JSLT:
  1079. emit_slti_r64(ctx, tmp, dst, imm);
  1080. emit(ctx, bnez, tmp, off);
  1081. break;
  1082. /* PC += off if dst <= imm (signed) */
  1083. case BPF_JSLE:
  1084. emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
  1085. emit(ctx, bnez, tmp, off);
  1086. break;
  1087. }
  1088. }
  1089. /* Jump register (64-bit) */
  1090. static void emit_jmp_r64(struct jit_context *ctx,
  1091. const u8 dst[], const u8 src[], s32 off, u8 op)
  1092. {
  1093. u8 t1 = MIPS_R_T6;
  1094. u8 t2 = MIPS_R_T7;
  1095. switch (op) {
  1096. /* No-op, used internally for branch optimization */
  1097. case JIT_JNOP:
  1098. break;
  1099. /* PC += off if dst == src */
  1100. /* PC += off if dst != src */
  1101. case BPF_JEQ:
  1102. case BPF_JNE:
  1103. emit(ctx, subu, t1, lo(dst), lo(src));
  1104. emit(ctx, subu, t2, hi(dst), hi(src));
  1105. emit(ctx, or, t1, t1, t2);
  1106. if (op == BPF_JEQ)
  1107. emit(ctx, beqz, t1, off);
  1108. else /* BPF_JNE */
  1109. emit(ctx, bnez, t1, off);
  1110. break;
  1111. /* PC += off if dst & src */
  1112. /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
  1113. case BPF_JSET:
  1114. case JIT_JNSET:
  1115. emit(ctx, and, t1, lo(dst), lo(src));
  1116. emit(ctx, and, t2, hi(dst), hi(src));
  1117. emit(ctx, or, t1, t1, t2);
  1118. if (op == BPF_JSET)
  1119. emit(ctx, bnez, t1, off);
  1120. else /* JIT_JNSET */
  1121. emit(ctx, beqz, t1, off);
  1122. break;
  1123. /* PC += off if dst > src */
  1124. case BPF_JGT:
  1125. emit_sltu_r64(ctx, t1, src, dst);
  1126. emit(ctx, bnez, t1, off);
  1127. break;
  1128. /* PC += off if dst >= src */
  1129. case BPF_JGE:
  1130. emit_sltu_r64(ctx, t1, dst, src);
  1131. emit(ctx, beqz, t1, off);
  1132. break;
  1133. /* PC += off if dst < src */
  1134. case BPF_JLT:
  1135. emit_sltu_r64(ctx, t1, dst, src);
  1136. emit(ctx, bnez, t1, off);
  1137. break;
  1138. /* PC += off if dst <= src */
  1139. case BPF_JLE:
  1140. emit_sltu_r64(ctx, t1, src, dst);
  1141. emit(ctx, beqz, t1, off);
  1142. break;
  1143. /* PC += off if dst > src (signed) */
  1144. case BPF_JSGT:
  1145. emit_slt_r64(ctx, t1, src, dst);
  1146. emit(ctx, bnez, t1, off);
  1147. break;
  1148. /* PC += off if dst >= src (signed) */
  1149. case BPF_JSGE:
  1150. emit_slt_r64(ctx, t1, dst, src);
  1151. emit(ctx, beqz, t1, off);
  1152. break;
  1153. /* PC += off if dst < src (signed) */
  1154. case BPF_JSLT:
  1155. emit_slt_r64(ctx, t1, dst, src);
  1156. emit(ctx, bnez, t1, off);
  1157. break;
  1158. /* PC += off if dst <= src (signed) */
  1159. case BPF_JSLE:
  1160. emit_slt_r64(ctx, t1, src, dst);
  1161. emit(ctx, beqz, t1, off);
  1162. break;
  1163. }
  1164. }
  1165. /* Function call */
  1166. static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
  1167. {
  1168. bool fixed;
  1169. u64 addr;
  1170. /* Decode the call address */
  1171. if (bpf_jit_get_func_addr(ctx->program, insn, false,
  1172. &addr, &fixed) < 0)
  1173. return -1;
  1174. if (!fixed)
  1175. return -1;
  1176. /* Push stack arguments */
  1177. push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
  1178. /* Emit function call */
  1179. emit_mov_i(ctx, MIPS_R_T9, addr);
  1180. emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
  1181. emit(ctx, nop); /* Delay slot */
  1182. clobber_reg(ctx, MIPS_R_RA);
  1183. clobber_reg(ctx, MIPS_R_V0);
  1184. clobber_reg(ctx, MIPS_R_V1);
  1185. return 0;
  1186. }
  1187. /* Function tail call */
  1188. static int emit_tail_call(struct jit_context *ctx)
  1189. {
  1190. u8 ary = lo(bpf2mips32[BPF_REG_2]);
  1191. u8 ind = lo(bpf2mips32[BPF_REG_3]);
  1192. u8 t1 = MIPS_R_T8;
  1193. u8 t2 = MIPS_R_T9;
  1194. int off;
  1195. /*
  1196. * Tail call:
  1197. * eBPF R1 - function argument (context ptr), passed in a0-a1
  1198. * eBPF R2 - ptr to object with array of function entry points
  1199. * eBPF R3 - array index of function to be called
  1200. * stack[sz] - remaining tail call count, initialized in prologue
  1201. */
  1202. /* if (ind >= ary->map.max_entries) goto out */
  1203. off = offsetof(struct bpf_array, map.max_entries);
  1204. if (off > 0x7fff)
  1205. return -1;
  1206. emit(ctx, lw, t1, off, ary); /* t1 = ary->map.max_entries*/
  1207. emit_load_delay(ctx); /* Load delay slot */
  1208. emit(ctx, sltu, t1, ind, t1); /* t1 = ind < t1 */
  1209. emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0 */
  1210. /* (next insn delay slot) */
  1211. /* if (TCC-- <= 0) goto out */
  1212. emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP); /* t2 = *(SP + size) */
  1213. emit_load_delay(ctx); /* Load delay slot */
  1214. emit(ctx, blez, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 <= 0 */
  1215. emit(ctx, addiu, t2, t2, -1); /* t2-- (delay slot) */
  1216. emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP); /* *(SP + size) = t2 */
  1217. /* prog = ary->ptrs[ind] */
  1218. off = offsetof(struct bpf_array, ptrs);
  1219. if (off > 0x7fff)
  1220. return -1;
  1221. emit(ctx, sll, t1, ind, 2); /* t1 = ind << 2 */
  1222. emit(ctx, addu, t1, t1, ary); /* t1 += ary */
  1223. emit(ctx, lw, t2, off, t1); /* t2 = *(t1 + off) */
  1224. emit_load_delay(ctx); /* Load delay slot */
  1225. /* if (prog == 0) goto out */
  1226. emit(ctx, beqz, t2, get_offset(ctx, 1)); /* PC += off(1) if t2 == 0 */
  1227. emit(ctx, nop); /* Delay slot */
  1228. /* func = prog->bpf_func + 8 (prologue skip offset) */
  1229. off = offsetof(struct bpf_prog, bpf_func);
  1230. if (off > 0x7fff)
  1231. return -1;
  1232. emit(ctx, lw, t1, off, t2); /* t1 = *(t2 + off) */
  1233. emit_load_delay(ctx); /* Load delay slot */
  1234. emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP); /* t1 += skip (8 or 12) */
  1235. /* goto func */
  1236. build_epilogue(ctx, t1);
  1237. return 0;
  1238. }
  1239. /*
  1240. * Stack frame layout for a JITed program (stack grows down).
  1241. *
  1242. * Higher address : Caller's stack frame :
  1243. * :----------------------------:
  1244. * : 64-bit eBPF args r3-r5 :
  1245. * :----------------------------:
  1246. * : Reserved / tail call count :
  1247. * +============================+ <--- MIPS sp before call
  1248. * | Callee-saved registers, |
  1249. * | including RA and FP |
  1250. * +----------------------------+ <--- eBPF FP (MIPS zero,fp)
  1251. * | Local eBPF variables |
  1252. * | allocated by program |
  1253. * +----------------------------+
  1254. * | Reserved for caller-saved |
  1255. * | registers |
  1256. * +----------------------------+
  1257. * | Reserved for 64-bit eBPF |
  1258. * | args r3-r5 & args passed |
  1259. * | on stack in kernel calls |
  1260. * Lower address +============================+ <--- MIPS sp
  1261. */
  1262. /* Build program prologue to set up the stack and registers */
  1263. void build_prologue(struct jit_context *ctx)
  1264. {
  1265. const u8 *r1 = bpf2mips32[BPF_REG_1];
  1266. const u8 *fp = bpf2mips32[BPF_REG_FP];
  1267. int stack, saved, locals, reserved;
  1268. /*
  1269. * In the unlikely event that the TCC limit is raised to more
  1270. * than 16 bits, it is clamped to the maximum value allowed for
  1271. * the generated code (0xffff). It is better fail to compile
  1272. * instead of degrading gracefully.
  1273. */
  1274. BUILD_BUG_ON(MAX_TAIL_CALL_CNT > 0xffff);
  1275. /*
  1276. * The first two instructions initialize TCC in the reserved (for us)
  1277. * 16-byte area in the parent's stack frame. On a tail call, the
  1278. * calling function jumps into the prologue after these instructions.
  1279. */
  1280. emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
  1281. emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
  1282. /*
  1283. * Register eBPF R1 contains the 32-bit context pointer argument.
  1284. * A 32-bit argument is always passed in MIPS register a0, regardless
  1285. * of CPU endianness. Initialize R1 accordingly and zero-extend.
  1286. */
  1287. #ifdef __BIG_ENDIAN
  1288. emit(ctx, move, lo(r1), MIPS_R_A0);
  1289. #endif
  1290. /* === Entry-point for tail calls === */
  1291. /* Zero-extend the 32-bit argument */
  1292. emit(ctx, move, hi(r1), MIPS_R_ZERO);
  1293. /* If the eBPF frame pointer was accessed it must be saved */
  1294. if (ctx->accessed & BIT(BPF_REG_FP))
  1295. clobber_reg64(ctx, fp);
  1296. /* Compute the stack space needed for callee-saved registers */
  1297. saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
  1298. saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
  1299. /* Stack space used by eBPF program local data */
  1300. locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
  1301. /*
  1302. * If we are emitting function calls, reserve extra stack space for
  1303. * caller-saved registers and function arguments passed on the stack.
  1304. * The required space is computed automatically during resource
  1305. * usage discovery (pass 1).
  1306. */
  1307. reserved = ctx->stack_used;
  1308. /* Allocate the stack frame */
  1309. stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
  1310. emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
  1311. /* Store callee-saved registers on stack */
  1312. push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
  1313. /* Initialize the eBPF frame pointer if accessed */
  1314. if (ctx->accessed & BIT(BPF_REG_FP))
  1315. emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
  1316. ctx->saved_size = saved;
  1317. ctx->stack_size = stack;
  1318. }
  1319. /* Build the program epilogue to restore the stack and registers */
  1320. void build_epilogue(struct jit_context *ctx, int dest_reg)
  1321. {
  1322. /* Restore callee-saved registers from stack */
  1323. pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
  1324. ctx->stack_size - ctx->saved_size);
  1325. /*
  1326. * A 32-bit return value is always passed in MIPS register v0,
  1327. * but on big-endian targets the low part of R0 is mapped to v1.
  1328. */
  1329. #ifdef __BIG_ENDIAN
  1330. emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
  1331. #endif
  1332. /* Jump to the return address and adjust the stack pointer */
  1333. emit(ctx, jr, dest_reg);
  1334. emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
  1335. }
  1336. /* Build one eBPF instruction */
  1337. int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
  1338. {
  1339. const u8 *dst = bpf2mips32[insn->dst_reg];
  1340. const u8 *src = bpf2mips32[insn->src_reg];
  1341. const u8 *res = bpf2mips32[BPF_REG_0];
  1342. const u8 *tmp = bpf2mips32[JIT_REG_TMP];
  1343. u8 code = insn->code;
  1344. s16 off = insn->off;
  1345. s32 imm = insn->imm;
  1346. s32 val, rel;
  1347. u8 alu, jmp;
  1348. switch (code) {
  1349. /* ALU operations */
  1350. /* dst = imm */
  1351. case BPF_ALU | BPF_MOV | BPF_K:
  1352. emit_mov_i(ctx, lo(dst), imm);
  1353. emit_zext_ver(ctx, dst);
  1354. break;
  1355. /* dst = src */
  1356. case BPF_ALU | BPF_MOV | BPF_X:
  1357. if (imm == 1) {
  1358. /* Special mov32 for zext */
  1359. emit_mov_i(ctx, hi(dst), 0);
  1360. } else {
  1361. emit_mov_r(ctx, lo(dst), lo(src));
  1362. emit_zext_ver(ctx, dst);
  1363. }
  1364. break;
  1365. /* dst = -dst */
  1366. case BPF_ALU | BPF_NEG:
  1367. emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
  1368. emit_zext_ver(ctx, dst);
  1369. break;
  1370. /* dst = dst & imm */
  1371. /* dst = dst | imm */
  1372. /* dst = dst ^ imm */
  1373. /* dst = dst << imm */
  1374. /* dst = dst >> imm */
  1375. /* dst = dst >> imm (arithmetic) */
  1376. /* dst = dst + imm */
  1377. /* dst = dst - imm */
  1378. /* dst = dst * imm */
  1379. /* dst = dst / imm */
  1380. /* dst = dst % imm */
  1381. case BPF_ALU | BPF_OR | BPF_K:
  1382. case BPF_ALU | BPF_AND | BPF_K:
  1383. case BPF_ALU | BPF_XOR | BPF_K:
  1384. case BPF_ALU | BPF_LSH | BPF_K:
  1385. case BPF_ALU | BPF_RSH | BPF_K:
  1386. case BPF_ALU | BPF_ARSH | BPF_K:
  1387. case BPF_ALU | BPF_ADD | BPF_K:
  1388. case BPF_ALU | BPF_SUB | BPF_K:
  1389. case BPF_ALU | BPF_MUL | BPF_K:
  1390. case BPF_ALU | BPF_DIV | BPF_K:
  1391. case BPF_ALU | BPF_MOD | BPF_K:
  1392. if (!valid_alu_i(BPF_OP(code), imm)) {
  1393. emit_mov_i(ctx, MIPS_R_T6, imm);
  1394. emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
  1395. } else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
  1396. emit_alu_i(ctx, lo(dst), val, alu);
  1397. }
  1398. emit_zext_ver(ctx, dst);
  1399. break;
  1400. /* dst = dst & src */
  1401. /* dst = dst | src */
  1402. /* dst = dst ^ src */
  1403. /* dst = dst << src */
  1404. /* dst = dst >> src */
  1405. /* dst = dst >> src (arithmetic) */
  1406. /* dst = dst + src */
  1407. /* dst = dst - src */
  1408. /* dst = dst * src */
  1409. /* dst = dst / src */
  1410. /* dst = dst % src */
  1411. case BPF_ALU | BPF_AND | BPF_X:
  1412. case BPF_ALU | BPF_OR | BPF_X:
  1413. case BPF_ALU | BPF_XOR | BPF_X:
  1414. case BPF_ALU | BPF_LSH | BPF_X:
  1415. case BPF_ALU | BPF_RSH | BPF_X:
  1416. case BPF_ALU | BPF_ARSH | BPF_X:
  1417. case BPF_ALU | BPF_ADD | BPF_X:
  1418. case BPF_ALU | BPF_SUB | BPF_X:
  1419. case BPF_ALU | BPF_MUL | BPF_X:
  1420. case BPF_ALU | BPF_DIV | BPF_X:
  1421. case BPF_ALU | BPF_MOD | BPF_X:
  1422. emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
  1423. emit_zext_ver(ctx, dst);
  1424. break;
  1425. /* dst = imm (64-bit) */
  1426. case BPF_ALU64 | BPF_MOV | BPF_K:
  1427. emit_mov_se_i64(ctx, dst, imm);
  1428. break;
  1429. /* dst = src (64-bit) */
  1430. case BPF_ALU64 | BPF_MOV | BPF_X:
  1431. emit_mov_r(ctx, lo(dst), lo(src));
  1432. emit_mov_r(ctx, hi(dst), hi(src));
  1433. break;
  1434. /* dst = -dst (64-bit) */
  1435. case BPF_ALU64 | BPF_NEG:
  1436. emit_neg_i64(ctx, dst);
  1437. break;
  1438. /* dst = dst & imm (64-bit) */
  1439. case BPF_ALU64 | BPF_AND | BPF_K:
  1440. emit_alu_i64(ctx, dst, imm, BPF_OP(code));
  1441. break;
  1442. /* dst = dst | imm (64-bit) */
  1443. /* dst = dst ^ imm (64-bit) */
  1444. /* dst = dst + imm (64-bit) */
  1445. /* dst = dst - imm (64-bit) */
  1446. case BPF_ALU64 | BPF_OR | BPF_K:
  1447. case BPF_ALU64 | BPF_XOR | BPF_K:
  1448. case BPF_ALU64 | BPF_ADD | BPF_K:
  1449. case BPF_ALU64 | BPF_SUB | BPF_K:
  1450. if (imm)
  1451. emit_alu_i64(ctx, dst, imm, BPF_OP(code));
  1452. break;
  1453. /* dst = dst << imm (64-bit) */
  1454. /* dst = dst >> imm (64-bit) */
  1455. /* dst = dst >> imm (64-bit, arithmetic) */
  1456. case BPF_ALU64 | BPF_LSH | BPF_K:
  1457. case BPF_ALU64 | BPF_RSH | BPF_K:
  1458. case BPF_ALU64 | BPF_ARSH | BPF_K:
  1459. if (imm)
  1460. emit_shift_i64(ctx, dst, imm, BPF_OP(code));
  1461. break;
  1462. /* dst = dst * imm (64-bit) */
  1463. case BPF_ALU64 | BPF_MUL | BPF_K:
  1464. emit_mul_i64(ctx, dst, imm);
  1465. break;
  1466. /* dst = dst / imm (64-bit) */
  1467. /* dst = dst % imm (64-bit) */
  1468. case BPF_ALU64 | BPF_DIV | BPF_K:
  1469. case BPF_ALU64 | BPF_MOD | BPF_K:
  1470. /*
  1471. * Sign-extend the immediate value into a temporary register,
  1472. * and then do the operation on this register.
  1473. */
  1474. emit_mov_se_i64(ctx, tmp, imm);
  1475. emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
  1476. break;
  1477. /* dst = dst & src (64-bit) */
  1478. /* dst = dst | src (64-bit) */
  1479. /* dst = dst ^ src (64-bit) */
  1480. /* dst = dst + src (64-bit) */
  1481. /* dst = dst - src (64-bit) */
  1482. case BPF_ALU64 | BPF_AND | BPF_X:
  1483. case BPF_ALU64 | BPF_OR | BPF_X:
  1484. case BPF_ALU64 | BPF_XOR | BPF_X:
  1485. case BPF_ALU64 | BPF_ADD | BPF_X:
  1486. case BPF_ALU64 | BPF_SUB | BPF_X:
  1487. emit_alu_r64(ctx, dst, src, BPF_OP(code));
  1488. break;
  1489. /* dst = dst << src (64-bit) */
  1490. /* dst = dst >> src (64-bit) */
  1491. /* dst = dst >> src (64-bit, arithmetic) */
  1492. case BPF_ALU64 | BPF_LSH | BPF_X:
  1493. case BPF_ALU64 | BPF_RSH | BPF_X:
  1494. case BPF_ALU64 | BPF_ARSH | BPF_X:
  1495. emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
  1496. break;
  1497. /* dst = dst * src (64-bit) */
  1498. case BPF_ALU64 | BPF_MUL | BPF_X:
  1499. emit_mul_r64(ctx, dst, src);
  1500. break;
  1501. /* dst = dst / src (64-bit) */
  1502. /* dst = dst % src (64-bit) */
  1503. case BPF_ALU64 | BPF_DIV | BPF_X:
  1504. case BPF_ALU64 | BPF_MOD | BPF_X:
  1505. emit_divmod_r64(ctx, dst, src, BPF_OP(code));
  1506. break;
  1507. /* dst = htole(dst) */
  1508. /* dst = htobe(dst) */
  1509. case BPF_ALU | BPF_END | BPF_FROM_LE:
  1510. case BPF_ALU | BPF_END | BPF_FROM_BE:
  1511. if (BPF_SRC(code) ==
  1512. #ifdef __BIG_ENDIAN
  1513. BPF_FROM_LE
  1514. #else
  1515. BPF_FROM_BE
  1516. #endif
  1517. )
  1518. emit_bswap_r64(ctx, dst, imm);
  1519. else
  1520. emit_trunc_r64(ctx, dst, imm);
  1521. break;
  1522. /* dst = imm64 */
  1523. case BPF_LD | BPF_IMM | BPF_DW:
  1524. emit_mov_i(ctx, lo(dst), imm);
  1525. emit_mov_i(ctx, hi(dst), insn[1].imm);
  1526. return 1;
  1527. /* LDX: dst = *(size *)(src + off) */
  1528. case BPF_LDX | BPF_MEM | BPF_W:
  1529. case BPF_LDX | BPF_MEM | BPF_H:
  1530. case BPF_LDX | BPF_MEM | BPF_B:
  1531. case BPF_LDX | BPF_MEM | BPF_DW:
  1532. emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
  1533. break;
  1534. /* ST: *(size *)(dst + off) = imm */
  1535. case BPF_ST | BPF_MEM | BPF_W:
  1536. case BPF_ST | BPF_MEM | BPF_H:
  1537. case BPF_ST | BPF_MEM | BPF_B:
  1538. case BPF_ST | BPF_MEM | BPF_DW:
  1539. switch (BPF_SIZE(code)) {
  1540. case BPF_DW:
  1541. /* Sign-extend immediate value into temporary reg */
  1542. emit_mov_se_i64(ctx, tmp, imm);
  1543. break;
  1544. case BPF_W:
  1545. case BPF_H:
  1546. case BPF_B:
  1547. emit_mov_i(ctx, lo(tmp), imm);
  1548. break;
  1549. }
  1550. emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
  1551. break;
  1552. /* STX: *(size *)(dst + off) = src */
  1553. case BPF_STX | BPF_MEM | BPF_W:
  1554. case BPF_STX | BPF_MEM | BPF_H:
  1555. case BPF_STX | BPF_MEM | BPF_B:
  1556. case BPF_STX | BPF_MEM | BPF_DW:
  1557. emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
  1558. break;
  1559. /* Speculation barrier */
  1560. case BPF_ST | BPF_NOSPEC:
  1561. break;
  1562. /* Atomics */
  1563. case BPF_STX | BPF_ATOMIC | BPF_W:
  1564. switch (imm) {
  1565. case BPF_ADD:
  1566. case BPF_ADD | BPF_FETCH:
  1567. case BPF_AND:
  1568. case BPF_AND | BPF_FETCH:
  1569. case BPF_OR:
  1570. case BPF_OR | BPF_FETCH:
  1571. case BPF_XOR:
  1572. case BPF_XOR | BPF_FETCH:
  1573. case BPF_XCHG:
  1574. if (cpu_has_llsc)
  1575. emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
  1576. else /* Non-ll/sc fallback */
  1577. emit_atomic_r32(ctx, lo(dst), lo(src),
  1578. off, imm);
  1579. if (imm & BPF_FETCH)
  1580. emit_zext_ver(ctx, src);
  1581. break;
  1582. case BPF_CMPXCHG:
  1583. if (cpu_has_llsc)
  1584. emit_cmpxchg_r(ctx, lo(dst), lo(src),
  1585. lo(res), off);
  1586. else /* Non-ll/sc fallback */
  1587. emit_cmpxchg_r32(ctx, lo(dst), lo(src), off);
  1588. /* Result zero-extension inserted by verifier */
  1589. break;
  1590. default:
  1591. goto notyet;
  1592. }
  1593. break;
  1594. /* Atomics (64-bit) */
  1595. case BPF_STX | BPF_ATOMIC | BPF_DW:
  1596. switch (imm) {
  1597. case BPF_ADD:
  1598. case BPF_ADD | BPF_FETCH:
  1599. case BPF_AND:
  1600. case BPF_AND | BPF_FETCH:
  1601. case BPF_OR:
  1602. case BPF_OR | BPF_FETCH:
  1603. case BPF_XOR:
  1604. case BPF_XOR | BPF_FETCH:
  1605. case BPF_XCHG:
  1606. emit_atomic_r64(ctx, lo(dst), src, off, imm);
  1607. break;
  1608. case BPF_CMPXCHG:
  1609. emit_cmpxchg_r64(ctx, lo(dst), src, off);
  1610. break;
  1611. default:
  1612. goto notyet;
  1613. }
  1614. break;
  1615. /* PC += off if dst == src */
  1616. /* PC += off if dst != src */
  1617. /* PC += off if dst & src */
  1618. /* PC += off if dst > src */
  1619. /* PC += off if dst >= src */
  1620. /* PC += off if dst < src */
  1621. /* PC += off if dst <= src */
  1622. /* PC += off if dst > src (signed) */
  1623. /* PC += off if dst >= src (signed) */
  1624. /* PC += off if dst < src (signed) */
  1625. /* PC += off if dst <= src (signed) */
  1626. case BPF_JMP32 | BPF_JEQ | BPF_X:
  1627. case BPF_JMP32 | BPF_JNE | BPF_X:
  1628. case BPF_JMP32 | BPF_JSET | BPF_X:
  1629. case BPF_JMP32 | BPF_JGT | BPF_X:
  1630. case BPF_JMP32 | BPF_JGE | BPF_X:
  1631. case BPF_JMP32 | BPF_JLT | BPF_X:
  1632. case BPF_JMP32 | BPF_JLE | BPF_X:
  1633. case BPF_JMP32 | BPF_JSGT | BPF_X:
  1634. case BPF_JMP32 | BPF_JSGE | BPF_X:
  1635. case BPF_JMP32 | BPF_JSLT | BPF_X:
  1636. case BPF_JMP32 | BPF_JSLE | BPF_X:
  1637. if (off == 0)
  1638. break;
  1639. setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
  1640. emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
  1641. if (finish_jmp(ctx, jmp, off) < 0)
  1642. goto toofar;
  1643. break;
  1644. /* PC += off if dst == imm */
  1645. /* PC += off if dst != imm */
  1646. /* PC += off if dst & imm */
  1647. /* PC += off if dst > imm */
  1648. /* PC += off if dst >= imm */
  1649. /* PC += off if dst < imm */
  1650. /* PC += off if dst <= imm */
  1651. /* PC += off if dst > imm (signed) */
  1652. /* PC += off if dst >= imm (signed) */
  1653. /* PC += off if dst < imm (signed) */
  1654. /* PC += off if dst <= imm (signed) */
  1655. case BPF_JMP32 | BPF_JEQ | BPF_K:
  1656. case BPF_JMP32 | BPF_JNE | BPF_K:
  1657. case BPF_JMP32 | BPF_JSET | BPF_K:
  1658. case BPF_JMP32 | BPF_JGT | BPF_K:
  1659. case BPF_JMP32 | BPF_JGE | BPF_K:
  1660. case BPF_JMP32 | BPF_JLT | BPF_K:
  1661. case BPF_JMP32 | BPF_JLE | BPF_K:
  1662. case BPF_JMP32 | BPF_JSGT | BPF_K:
  1663. case BPF_JMP32 | BPF_JSGE | BPF_K:
  1664. case BPF_JMP32 | BPF_JSLT | BPF_K:
  1665. case BPF_JMP32 | BPF_JSLE | BPF_K:
  1666. if (off == 0)
  1667. break;
  1668. setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
  1669. if (valid_jmp_i(jmp, imm)) {
  1670. emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
  1671. } else {
  1672. /* Move large immediate to register */
  1673. emit_mov_i(ctx, MIPS_R_T6, imm);
  1674. emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
  1675. }
  1676. if (finish_jmp(ctx, jmp, off) < 0)
  1677. goto toofar;
  1678. break;
  1679. /* PC += off if dst == src */
  1680. /* PC += off if dst != src */
  1681. /* PC += off if dst & src */
  1682. /* PC += off if dst > src */
  1683. /* PC += off if dst >= src */
  1684. /* PC += off if dst < src */
  1685. /* PC += off if dst <= src */
  1686. /* PC += off if dst > src (signed) */
  1687. /* PC += off if dst >= src (signed) */
  1688. /* PC += off if dst < src (signed) */
  1689. /* PC += off if dst <= src (signed) */
  1690. case BPF_JMP | BPF_JEQ | BPF_X:
  1691. case BPF_JMP | BPF_JNE | BPF_X:
  1692. case BPF_JMP | BPF_JSET | BPF_X:
  1693. case BPF_JMP | BPF_JGT | BPF_X:
  1694. case BPF_JMP | BPF_JGE | BPF_X:
  1695. case BPF_JMP | BPF_JLT | BPF_X:
  1696. case BPF_JMP | BPF_JLE | BPF_X:
  1697. case BPF_JMP | BPF_JSGT | BPF_X:
  1698. case BPF_JMP | BPF_JSGE | BPF_X:
  1699. case BPF_JMP | BPF_JSLT | BPF_X:
  1700. case BPF_JMP | BPF_JSLE | BPF_X:
  1701. if (off == 0)
  1702. break;
  1703. setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
  1704. emit_jmp_r64(ctx, dst, src, rel, jmp);
  1705. if (finish_jmp(ctx, jmp, off) < 0)
  1706. goto toofar;
  1707. break;
  1708. /* PC += off if dst == imm */
  1709. /* PC += off if dst != imm */
  1710. /* PC += off if dst & imm */
  1711. /* PC += off if dst > imm */
  1712. /* PC += off if dst >= imm */
  1713. /* PC += off if dst < imm */
  1714. /* PC += off if dst <= imm */
  1715. /* PC += off if dst > imm (signed) */
  1716. /* PC += off if dst >= imm (signed) */
  1717. /* PC += off if dst < imm (signed) */
  1718. /* PC += off if dst <= imm (signed) */
  1719. case BPF_JMP | BPF_JEQ | BPF_K:
  1720. case BPF_JMP | BPF_JNE | BPF_K:
  1721. case BPF_JMP | BPF_JSET | BPF_K:
  1722. case BPF_JMP | BPF_JGT | BPF_K:
  1723. case BPF_JMP | BPF_JGE | BPF_K:
  1724. case BPF_JMP | BPF_JLT | BPF_K:
  1725. case BPF_JMP | BPF_JLE | BPF_K:
  1726. case BPF_JMP | BPF_JSGT | BPF_K:
  1727. case BPF_JMP | BPF_JSGE | BPF_K:
  1728. case BPF_JMP | BPF_JSLT | BPF_K:
  1729. case BPF_JMP | BPF_JSLE | BPF_K:
  1730. if (off == 0)
  1731. break;
  1732. setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
  1733. emit_jmp_i64(ctx, dst, imm, rel, jmp);
  1734. if (finish_jmp(ctx, jmp, off) < 0)
  1735. goto toofar;
  1736. break;
  1737. /* PC += off */
  1738. case BPF_JMP | BPF_JA:
  1739. if (off == 0)
  1740. break;
  1741. if (emit_ja(ctx, off) < 0)
  1742. goto toofar;
  1743. break;
  1744. /* Tail call */
  1745. case BPF_JMP | BPF_TAIL_CALL:
  1746. if (emit_tail_call(ctx) < 0)
  1747. goto invalid;
  1748. break;
  1749. /* Function call */
  1750. case BPF_JMP | BPF_CALL:
  1751. if (emit_call(ctx, insn) < 0)
  1752. goto invalid;
  1753. break;
  1754. /* Function return */
  1755. case BPF_JMP | BPF_EXIT:
  1756. /*
  1757. * Optimization: when last instruction is EXIT
  1758. * simply continue to epilogue.
  1759. */
  1760. if (ctx->bpf_index == ctx->program->len - 1)
  1761. break;
  1762. if (emit_exit(ctx) < 0)
  1763. goto toofar;
  1764. break;
  1765. default:
  1766. invalid:
  1767. pr_err_once("unknown opcode %02x\n", code);
  1768. return -EINVAL;
  1769. notyet:
  1770. pr_info_once("*** NOT YET: opcode %02x ***\n", code);
  1771. return -EFAULT;
  1772. toofar:
  1773. pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
  1774. ctx->bpf_index, code);
  1775. return -E2BIG;
  1776. }
  1777. return 0;
  1778. }