bpf_jit_comp.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Just-In-Time compiler for eBPF bytecode on MIPS.
  4. * Implementation of JIT functions common to 32-bit and 64-bit CPUs.
  5. *
  6. * Copyright (c) 2021 Anyfi Networks AB.
  7. * Author: Johan Almbladh <[email protected]>
  8. *
  9. * Based on code and ideas from
  10. * Copyright (c) 2017 Cavium, Inc.
  11. * Copyright (c) 2017 Shubham Bansal <[email protected]>
  12. * Copyright (c) 2011 Mircea Gherzan <[email protected]>
  13. */
  14. /*
  15. * Code overview
  16. * =============
  17. *
  18. * - bpf_jit_comp.h
  19. * Common definitions and utilities.
  20. *
  21. * - bpf_jit_comp.c
  22. * Implementation of JIT top-level logic and exported JIT API functions.
  23. * Implementation of internal operations shared by 32-bit and 64-bit code.
  24. * JMP and ALU JIT control code, register control code, shared ALU and
  25. * JMP/JMP32 JIT operations.
  26. *
  27. * - bpf_jit_comp32.c
  28. * Implementation of functions to JIT prologue, epilogue and a single eBPF
  29. * instruction for 32-bit MIPS CPUs. The functions use shared operations
  30. * where possible, and implement the rest for 32-bit MIPS such as ALU64
  31. * operations.
  32. *
  33. * - bpf_jit_comp64.c
  34. * Ditto, for 64-bit MIPS CPUs.
  35. *
  36. * Zero and sign extension
  37. * ========================
  38. * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension,
  39. * but the eBPF instruction set mandates zero extension. We let the verifier
  40. * insert explicit zero-extensions after 32-bit ALU operations, both for
  41. * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs
  42. * are JITed with sign extensions inserted when so expected.
  43. *
  44. * ALU operations
  45. * ==============
  46. * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are
  47. * JITed in the following steps. ALU64 operations on 32-bit MIPS are more
  48. * complicated and therefore only processed by special implementations in
  49. * step (3).
  50. *
  51. * 1) valid_alu_i:
  52. * Determine if an immediate operation can be emitted as such, or if
  53. * we must fall back to the register version.
  54. *
  55. * 2) rewrite_alu_i:
  56. * Convert BPF operation and immediate value to a canonical form for
  57. * JITing. In some degenerate cases this form may be a no-op.
  58. *
  59. * 3) emit_alu_{i,i64,r,64}:
  60. * Emit instructions for an ALU or ALU64 immediate or register operation.
  61. *
  62. * JMP operations
  63. * ==============
  64. * JMP and JMP32 operations require an JIT instruction offset table for
  65. * translating the jump offset. This table is computed by dry-running the
  66. * JIT without actually emitting anything. However, the computed PC-relative
  67. * offset may overflow the 18-bit offset field width of the native MIPS
  68. * branch instruction. In such cases, the long jump is converted into the
  69. * following sequence.
  70. *
  71. * <branch> !<cond> +2 Inverted PC-relative branch
  72. * nop Delay slot
  73. * j <offset> Unconditional absolute long jump
  74. * nop Delay slot
  75. *
  76. * Since this converted sequence alters the offset table, all offsets must
  77. * be re-calculated. This may in turn trigger new branch conversions, so
  78. * the process is repeated until no further changes are made. Normally it
  79. * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we
  80. * fall back to converting every remaining jump operation. The branch
  81. * conversion is independent of how the JMP or JMP32 condition is JITed.
  82. *
  83. * JMP32 and JMP operations are JITed as follows.
  84. *
  85. * 1) setup_jmp_{i,r}:
  86. * Convert jump conditional and offset into a form that can be JITed.
  87. * This form may be a no-op, a canonical form, or an inverted PC-relative
  88. * jump if branch conversion is necessary.
  89. *
  90. * 2) valid_jmp_i:
  91. * Determine if an immediate operations can be emitted as such, or if
  92. * we must fall back to the register version. Applies to JMP32 for 32-bit
  93. * MIPS, and both JMP and JMP32 for 64-bit MIPS.
  94. *
  95. * 3) emit_jmp_{i,i64,r,r64}:
  96. * Emit instructions for an JMP or JMP32 immediate or register operation.
  97. *
  98. * 4) finish_jmp_{i,r}:
  99. * Emit any instructions needed to finish the jump. This includes a nop
  100. * for the delay slot if a branch was emitted, and a long absolute jump
  101. * if the branch was converted.
  102. */
  103. #include <linux/limits.h>
  104. #include <linux/bitops.h>
  105. #include <linux/errno.h>
  106. #include <linux/filter.h>
  107. #include <linux/bpf.h>
  108. #include <linux/slab.h>
  109. #include <asm/bitops.h>
  110. #include <asm/cacheflush.h>
  111. #include <asm/cpu-features.h>
  112. #include <asm/isa-rev.h>
  113. #include <asm/uasm.h>
  114. #include "bpf_jit_comp.h"
  115. /* Convenience macros for descriptor access */
  116. #define CONVERTED(desc) ((desc) & JIT_DESC_CONVERT)
  117. #define INDEX(desc) ((desc) & ~JIT_DESC_CONVERT)
  118. /*
  119. * Push registers on the stack, starting at a given depth from the stack
  120. * pointer and increasing. The next depth to be written is returned.
  121. */
  122. int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth)
  123. {
  124. int reg;
  125. for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++)
  126. if (mask & BIT(reg)) {
  127. if ((excl & BIT(reg)) == 0) {
  128. if (sizeof(long) == 4)
  129. emit(ctx, sw, reg, depth, MIPS_R_SP);
  130. else /* sizeof(long) == 8 */
  131. emit(ctx, sd, reg, depth, MIPS_R_SP);
  132. }
  133. depth += sizeof(long);
  134. }
  135. ctx->stack_used = max((int)ctx->stack_used, depth);
  136. return depth;
  137. }
  138. /*
  139. * Pop registers from the stack, starting at a given depth from the stack
  140. * pointer and increasing. The next depth to be read is returned.
  141. */
  142. int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth)
  143. {
  144. int reg;
  145. for (reg = 0; reg < BITS_PER_BYTE * sizeof(mask); reg++)
  146. if (mask & BIT(reg)) {
  147. if ((excl & BIT(reg)) == 0) {
  148. if (sizeof(long) == 4)
  149. emit(ctx, lw, reg, depth, MIPS_R_SP);
  150. else /* sizeof(long) == 8 */
  151. emit(ctx, ld, reg, depth, MIPS_R_SP);
  152. }
  153. depth += sizeof(long);
  154. }
  155. return depth;
  156. }
  157. /* Compute the 28-bit jump target address from a BPF program location */
  158. int get_target(struct jit_context *ctx, u32 loc)
  159. {
  160. u32 index = INDEX(ctx->descriptors[loc]);
  161. unsigned long pc = (unsigned long)&ctx->target[ctx->jit_index];
  162. unsigned long addr = (unsigned long)&ctx->target[index];
  163. if (!ctx->target)
  164. return 0;
  165. if ((addr ^ pc) & ~MIPS_JMP_MASK)
  166. return -1;
  167. return addr & MIPS_JMP_MASK;
  168. }
  169. /* Compute the PC-relative offset to relative BPF program offset */
  170. int get_offset(const struct jit_context *ctx, int off)
  171. {
  172. return (INDEX(ctx->descriptors[ctx->bpf_index + off]) -
  173. ctx->jit_index - 1) * sizeof(u32);
  174. }
  175. /* dst = imm (register width) */
  176. void emit_mov_i(struct jit_context *ctx, u8 dst, s32 imm)
  177. {
  178. if (imm >= -0x8000 && imm <= 0x7fff) {
  179. emit(ctx, addiu, dst, MIPS_R_ZERO, imm);
  180. } else {
  181. emit(ctx, lui, dst, (s16)((u32)imm >> 16));
  182. emit(ctx, ori, dst, dst, (u16)(imm & 0xffff));
  183. }
  184. clobber_reg(ctx, dst);
  185. }
  186. /* dst = src (register width) */
  187. void emit_mov_r(struct jit_context *ctx, u8 dst, u8 src)
  188. {
  189. emit(ctx, ori, dst, src, 0);
  190. clobber_reg(ctx, dst);
  191. }
  192. /* Validate ALU immediate range */
  193. bool valid_alu_i(u8 op, s32 imm)
  194. {
  195. switch (BPF_OP(op)) {
  196. case BPF_NEG:
  197. case BPF_LSH:
  198. case BPF_RSH:
  199. case BPF_ARSH:
  200. /* All legal eBPF values are valid */
  201. return true;
  202. case BPF_ADD:
  203. /* imm must be 16 bits */
  204. return imm >= -0x8000 && imm <= 0x7fff;
  205. case BPF_SUB:
  206. /* -imm must be 16 bits */
  207. return imm >= -0x7fff && imm <= 0x8000;
  208. case BPF_AND:
  209. case BPF_OR:
  210. case BPF_XOR:
  211. /* imm must be 16 bits unsigned */
  212. return imm >= 0 && imm <= 0xffff;
  213. case BPF_MUL:
  214. /* imm must be zero or a positive power of two */
  215. return imm == 0 || (imm > 0 && is_power_of_2(imm));
  216. case BPF_DIV:
  217. case BPF_MOD:
  218. /* imm must be an 17-bit power of two */
  219. return (u32)imm <= 0x10000 && is_power_of_2((u32)imm);
  220. }
  221. return false;
  222. }
  223. /* Rewrite ALU immediate operation */
  224. bool rewrite_alu_i(u8 op, s32 imm, u8 *alu, s32 *val)
  225. {
  226. bool act = true;
  227. switch (BPF_OP(op)) {
  228. case BPF_LSH:
  229. case BPF_RSH:
  230. case BPF_ARSH:
  231. case BPF_ADD:
  232. case BPF_SUB:
  233. case BPF_OR:
  234. case BPF_XOR:
  235. /* imm == 0 is a no-op */
  236. act = imm != 0;
  237. break;
  238. case BPF_MUL:
  239. if (imm == 1) {
  240. /* dst * 1 is a no-op */
  241. act = false;
  242. } else if (imm == 0) {
  243. /* dst * 0 is dst & 0 */
  244. op = BPF_AND;
  245. } else {
  246. /* dst * (1 << n) is dst << n */
  247. op = BPF_LSH;
  248. imm = ilog2(abs(imm));
  249. }
  250. break;
  251. case BPF_DIV:
  252. if (imm == 1) {
  253. /* dst / 1 is a no-op */
  254. act = false;
  255. } else {
  256. /* dst / (1 << n) is dst >> n */
  257. op = BPF_RSH;
  258. imm = ilog2(imm);
  259. }
  260. break;
  261. case BPF_MOD:
  262. /* dst % (1 << n) is dst & ((1 << n) - 1) */
  263. op = BPF_AND;
  264. imm--;
  265. break;
  266. }
  267. *alu = op;
  268. *val = imm;
  269. return act;
  270. }
  271. /* ALU immediate operation (32-bit) */
  272. void emit_alu_i(struct jit_context *ctx, u8 dst, s32 imm, u8 op)
  273. {
  274. switch (BPF_OP(op)) {
  275. /* dst = -dst */
  276. case BPF_NEG:
  277. emit(ctx, subu, dst, MIPS_R_ZERO, dst);
  278. break;
  279. /* dst = dst & imm */
  280. case BPF_AND:
  281. emit(ctx, andi, dst, dst, (u16)imm);
  282. break;
  283. /* dst = dst | imm */
  284. case BPF_OR:
  285. emit(ctx, ori, dst, dst, (u16)imm);
  286. break;
  287. /* dst = dst ^ imm */
  288. case BPF_XOR:
  289. emit(ctx, xori, dst, dst, (u16)imm);
  290. break;
  291. /* dst = dst << imm */
  292. case BPF_LSH:
  293. emit(ctx, sll, dst, dst, imm);
  294. break;
  295. /* dst = dst >> imm */
  296. case BPF_RSH:
  297. emit(ctx, srl, dst, dst, imm);
  298. break;
  299. /* dst = dst >> imm (arithmetic) */
  300. case BPF_ARSH:
  301. emit(ctx, sra, dst, dst, imm);
  302. break;
  303. /* dst = dst + imm */
  304. case BPF_ADD:
  305. emit(ctx, addiu, dst, dst, imm);
  306. break;
  307. /* dst = dst - imm */
  308. case BPF_SUB:
  309. emit(ctx, addiu, dst, dst, -imm);
  310. break;
  311. }
  312. clobber_reg(ctx, dst);
  313. }
  314. /* ALU register operation (32-bit) */
  315. void emit_alu_r(struct jit_context *ctx, u8 dst, u8 src, u8 op)
  316. {
  317. switch (BPF_OP(op)) {
  318. /* dst = dst & src */
  319. case BPF_AND:
  320. emit(ctx, and, dst, dst, src);
  321. break;
  322. /* dst = dst | src */
  323. case BPF_OR:
  324. emit(ctx, or, dst, dst, src);
  325. break;
  326. /* dst = dst ^ src */
  327. case BPF_XOR:
  328. emit(ctx, xor, dst, dst, src);
  329. break;
  330. /* dst = dst << src */
  331. case BPF_LSH:
  332. emit(ctx, sllv, dst, dst, src);
  333. break;
  334. /* dst = dst >> src */
  335. case BPF_RSH:
  336. emit(ctx, srlv, dst, dst, src);
  337. break;
  338. /* dst = dst >> src (arithmetic) */
  339. case BPF_ARSH:
  340. emit(ctx, srav, dst, dst, src);
  341. break;
  342. /* dst = dst + src */
  343. case BPF_ADD:
  344. emit(ctx, addu, dst, dst, src);
  345. break;
  346. /* dst = dst - src */
  347. case BPF_SUB:
  348. emit(ctx, subu, dst, dst, src);
  349. break;
  350. /* dst = dst * src */
  351. case BPF_MUL:
  352. if (cpu_has_mips32r1 || cpu_has_mips32r6) {
  353. emit(ctx, mul, dst, dst, src);
  354. } else {
  355. emit(ctx, multu, dst, src);
  356. emit(ctx, mflo, dst);
  357. }
  358. break;
  359. /* dst = dst / src */
  360. case BPF_DIV:
  361. if (cpu_has_mips32r6) {
  362. emit(ctx, divu_r6, dst, dst, src);
  363. } else {
  364. emit(ctx, divu, dst, src);
  365. emit(ctx, mflo, dst);
  366. }
  367. break;
  368. /* dst = dst % src */
  369. case BPF_MOD:
  370. if (cpu_has_mips32r6) {
  371. emit(ctx, modu, dst, dst, src);
  372. } else {
  373. emit(ctx, divu, dst, src);
  374. emit(ctx, mfhi, dst);
  375. }
  376. break;
  377. }
  378. clobber_reg(ctx, dst);
  379. }
  380. /* Atomic read-modify-write (32-bit) */
  381. void emit_atomic_r(struct jit_context *ctx, u8 dst, u8 src, s16 off, u8 code)
  382. {
  383. LLSC_sync(ctx);
  384. emit(ctx, ll, MIPS_R_T9, off, dst);
  385. switch (code) {
  386. case BPF_ADD:
  387. case BPF_ADD | BPF_FETCH:
  388. emit(ctx, addu, MIPS_R_T8, MIPS_R_T9, src);
  389. break;
  390. case BPF_AND:
  391. case BPF_AND | BPF_FETCH:
  392. emit(ctx, and, MIPS_R_T8, MIPS_R_T9, src);
  393. break;
  394. case BPF_OR:
  395. case BPF_OR | BPF_FETCH:
  396. emit(ctx, or, MIPS_R_T8, MIPS_R_T9, src);
  397. break;
  398. case BPF_XOR:
  399. case BPF_XOR | BPF_FETCH:
  400. emit(ctx, xor, MIPS_R_T8, MIPS_R_T9, src);
  401. break;
  402. case BPF_XCHG:
  403. emit(ctx, move, MIPS_R_T8, src);
  404. break;
  405. }
  406. emit(ctx, sc, MIPS_R_T8, off, dst);
  407. emit(ctx, LLSC_beqz, MIPS_R_T8, -16 - LLSC_offset);
  408. emit(ctx, nop); /* Delay slot */
  409. if (code & BPF_FETCH) {
  410. emit(ctx, move, src, MIPS_R_T9);
  411. clobber_reg(ctx, src);
  412. }
  413. }
  414. /* Atomic compare-and-exchange (32-bit) */
  415. void emit_cmpxchg_r(struct jit_context *ctx, u8 dst, u8 src, u8 res, s16 off)
  416. {
  417. LLSC_sync(ctx);
  418. emit(ctx, ll, MIPS_R_T9, off, dst);
  419. emit(ctx, bne, MIPS_R_T9, res, 12);
  420. emit(ctx, move, MIPS_R_T8, src); /* Delay slot */
  421. emit(ctx, sc, MIPS_R_T8, off, dst);
  422. emit(ctx, LLSC_beqz, MIPS_R_T8, -20 - LLSC_offset);
  423. emit(ctx, move, res, MIPS_R_T9); /* Delay slot */
  424. clobber_reg(ctx, res);
  425. }
  426. /* Swap bytes and truncate a register word or half word */
  427. void emit_bswap_r(struct jit_context *ctx, u8 dst, u32 width)
  428. {
  429. u8 tmp = MIPS_R_T8;
  430. u8 msk = MIPS_R_T9;
  431. switch (width) {
  432. /* Swap bytes in a word */
  433. case 32:
  434. if (cpu_has_mips32r2 || cpu_has_mips32r6) {
  435. emit(ctx, wsbh, dst, dst);
  436. emit(ctx, rotr, dst, dst, 16);
  437. } else {
  438. emit(ctx, sll, tmp, dst, 16); /* tmp = dst << 16 */
  439. emit(ctx, srl, dst, dst, 16); /* dst = dst >> 16 */
  440. emit(ctx, or, dst, dst, tmp); /* dst = dst | tmp */
  441. emit(ctx, lui, msk, 0xff); /* msk = 0x00ff0000 */
  442. emit(ctx, ori, msk, msk, 0xff); /* msk = msk | 0xff */
  443. emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */
  444. emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */
  445. emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */
  446. emit(ctx, and, dst, dst, msk); /* dst = dst & msk */
  447. emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */
  448. }
  449. break;
  450. /* Swap bytes in a half word */
  451. case 16:
  452. if (cpu_has_mips32r2 || cpu_has_mips32r6) {
  453. emit(ctx, wsbh, dst, dst);
  454. emit(ctx, andi, dst, dst, 0xffff);
  455. } else {
  456. emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */
  457. emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */
  458. emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */
  459. emit(ctx, sll, dst, dst, 8); /* d = d << 8 */
  460. emit(ctx, or, dst, dst, tmp); /* d = d | t */
  461. }
  462. break;
  463. }
  464. clobber_reg(ctx, dst);
  465. }
  466. /* Validate jump immediate range */
  467. bool valid_jmp_i(u8 op, s32 imm)
  468. {
  469. switch (op) {
  470. case JIT_JNOP:
  471. /* Immediate value not used */
  472. return true;
  473. case BPF_JEQ:
  474. case BPF_JNE:
  475. /* No immediate operation */
  476. return false;
  477. case BPF_JSET:
  478. case JIT_JNSET:
  479. /* imm must be 16 bits unsigned */
  480. return imm >= 0 && imm <= 0xffff;
  481. case BPF_JGE:
  482. case BPF_JLT:
  483. case BPF_JSGE:
  484. case BPF_JSLT:
  485. /* imm must be 16 bits */
  486. return imm >= -0x8000 && imm <= 0x7fff;
  487. case BPF_JGT:
  488. case BPF_JLE:
  489. case BPF_JSGT:
  490. case BPF_JSLE:
  491. /* imm + 1 must be 16 bits */
  492. return imm >= -0x8001 && imm <= 0x7ffe;
  493. }
  494. return false;
  495. }
  496. /* Invert a conditional jump operation */
  497. static u8 invert_jmp(u8 op)
  498. {
  499. switch (op) {
  500. case BPF_JA: return JIT_JNOP;
  501. case BPF_JEQ: return BPF_JNE;
  502. case BPF_JNE: return BPF_JEQ;
  503. case BPF_JSET: return JIT_JNSET;
  504. case BPF_JGT: return BPF_JLE;
  505. case BPF_JGE: return BPF_JLT;
  506. case BPF_JLT: return BPF_JGE;
  507. case BPF_JLE: return BPF_JGT;
  508. case BPF_JSGT: return BPF_JSLE;
  509. case BPF_JSGE: return BPF_JSLT;
  510. case BPF_JSLT: return BPF_JSGE;
  511. case BPF_JSLE: return BPF_JSGT;
  512. }
  513. return 0;
  514. }
  515. /* Prepare a PC-relative jump operation */
  516. static void setup_jmp(struct jit_context *ctx, u8 bpf_op,
  517. s16 bpf_off, u8 *jit_op, s32 *jit_off)
  518. {
  519. u32 *descp = &ctx->descriptors[ctx->bpf_index];
  520. int op = bpf_op;
  521. int offset = 0;
  522. /* Do not compute offsets on the first pass */
  523. if (INDEX(*descp) == 0)
  524. goto done;
  525. /* Skip jumps never taken */
  526. if (bpf_op == JIT_JNOP)
  527. goto done;
  528. /* Convert jumps always taken */
  529. if (bpf_op == BPF_JA)
  530. *descp |= JIT_DESC_CONVERT;
  531. /*
  532. * Current ctx->jit_index points to the start of the branch preamble.
  533. * Since the preamble differs among different branch conditionals,
  534. * the current index cannot be used to compute the branch offset.
  535. * Instead, we use the offset table value for the next instruction,
  536. * which gives the index immediately after the branch delay slot.
  537. */
  538. if (!CONVERTED(*descp)) {
  539. int target = ctx->bpf_index + bpf_off + 1;
  540. int origin = ctx->bpf_index + 1;
  541. offset = (INDEX(ctx->descriptors[target]) -
  542. INDEX(ctx->descriptors[origin]) + 1) * sizeof(u32);
  543. }
  544. /*
  545. * The PC-relative branch offset field on MIPS is 18 bits signed,
  546. * so if the computed offset is larger than this we generate a an
  547. * absolute jump that we skip with an inverted conditional branch.
  548. */
  549. if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) {
  550. offset = 3 * sizeof(u32);
  551. op = invert_jmp(bpf_op);
  552. ctx->changes += !CONVERTED(*descp);
  553. *descp |= JIT_DESC_CONVERT;
  554. }
  555. done:
  556. *jit_off = offset;
  557. *jit_op = op;
  558. }
  559. /* Prepare a PC-relative jump operation with immediate conditional */
  560. void setup_jmp_i(struct jit_context *ctx, s32 imm, u8 width,
  561. u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off)
  562. {
  563. bool always = false;
  564. bool never = false;
  565. switch (bpf_op) {
  566. case BPF_JEQ:
  567. case BPF_JNE:
  568. break;
  569. case BPF_JSET:
  570. case BPF_JLT:
  571. never = imm == 0;
  572. break;
  573. case BPF_JGE:
  574. always = imm == 0;
  575. break;
  576. case BPF_JGT:
  577. never = (u32)imm == U32_MAX;
  578. break;
  579. case BPF_JLE:
  580. always = (u32)imm == U32_MAX;
  581. break;
  582. case BPF_JSGT:
  583. never = imm == S32_MAX && width == 32;
  584. break;
  585. case BPF_JSGE:
  586. always = imm == S32_MIN && width == 32;
  587. break;
  588. case BPF_JSLT:
  589. never = imm == S32_MIN && width == 32;
  590. break;
  591. case BPF_JSLE:
  592. always = imm == S32_MAX && width == 32;
  593. break;
  594. }
  595. if (never)
  596. bpf_op = JIT_JNOP;
  597. if (always)
  598. bpf_op = BPF_JA;
  599. setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off);
  600. }
  601. /* Prepare a PC-relative jump operation with register conditional */
  602. void setup_jmp_r(struct jit_context *ctx, bool same_reg,
  603. u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off)
  604. {
  605. switch (bpf_op) {
  606. case BPF_JSET:
  607. break;
  608. case BPF_JEQ:
  609. case BPF_JGE:
  610. case BPF_JLE:
  611. case BPF_JSGE:
  612. case BPF_JSLE:
  613. if (same_reg)
  614. bpf_op = BPF_JA;
  615. break;
  616. case BPF_JNE:
  617. case BPF_JLT:
  618. case BPF_JGT:
  619. case BPF_JSGT:
  620. case BPF_JSLT:
  621. if (same_reg)
  622. bpf_op = JIT_JNOP;
  623. break;
  624. }
  625. setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off);
  626. }
  627. /* Finish a PC-relative jump operation */
  628. int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off)
  629. {
  630. /* Emit conditional branch delay slot */
  631. if (jit_op != JIT_JNOP)
  632. emit(ctx, nop);
  633. /*
  634. * Emit an absolute long jump with delay slot,
  635. * if the PC-relative branch was converted.
  636. */
  637. if (CONVERTED(ctx->descriptors[ctx->bpf_index])) {
  638. int target = get_target(ctx, ctx->bpf_index + bpf_off + 1);
  639. if (target < 0)
  640. return -1;
  641. emit(ctx, j, target);
  642. emit(ctx, nop);
  643. }
  644. return 0;
  645. }
  646. /* Jump immediate (32-bit) */
  647. void emit_jmp_i(struct jit_context *ctx, u8 dst, s32 imm, s32 off, u8 op)
  648. {
  649. switch (op) {
  650. /* No-op, used internally for branch optimization */
  651. case JIT_JNOP:
  652. break;
  653. /* PC += off if dst & imm */
  654. case BPF_JSET:
  655. emit(ctx, andi, MIPS_R_T9, dst, (u16)imm);
  656. emit(ctx, bnez, MIPS_R_T9, off);
  657. break;
  658. /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
  659. case JIT_JNSET:
  660. emit(ctx, andi, MIPS_R_T9, dst, (u16)imm);
  661. emit(ctx, beqz, MIPS_R_T9, off);
  662. break;
  663. /* PC += off if dst > imm */
  664. case BPF_JGT:
  665. emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1);
  666. emit(ctx, beqz, MIPS_R_T9, off);
  667. break;
  668. /* PC += off if dst >= imm */
  669. case BPF_JGE:
  670. emit(ctx, sltiu, MIPS_R_T9, dst, imm);
  671. emit(ctx, beqz, MIPS_R_T9, off);
  672. break;
  673. /* PC += off if dst < imm */
  674. case BPF_JLT:
  675. emit(ctx, sltiu, MIPS_R_T9, dst, imm);
  676. emit(ctx, bnez, MIPS_R_T9, off);
  677. break;
  678. /* PC += off if dst <= imm */
  679. case BPF_JLE:
  680. emit(ctx, sltiu, MIPS_R_T9, dst, imm + 1);
  681. emit(ctx, bnez, MIPS_R_T9, off);
  682. break;
  683. /* PC += off if dst > imm (signed) */
  684. case BPF_JSGT:
  685. emit(ctx, slti, MIPS_R_T9, dst, imm + 1);
  686. emit(ctx, beqz, MIPS_R_T9, off);
  687. break;
  688. /* PC += off if dst >= imm (signed) */
  689. case BPF_JSGE:
  690. emit(ctx, slti, MIPS_R_T9, dst, imm);
  691. emit(ctx, beqz, MIPS_R_T9, off);
  692. break;
  693. /* PC += off if dst < imm (signed) */
  694. case BPF_JSLT:
  695. emit(ctx, slti, MIPS_R_T9, dst, imm);
  696. emit(ctx, bnez, MIPS_R_T9, off);
  697. break;
  698. /* PC += off if dst <= imm (signed) */
  699. case BPF_JSLE:
  700. emit(ctx, slti, MIPS_R_T9, dst, imm + 1);
  701. emit(ctx, bnez, MIPS_R_T9, off);
  702. break;
  703. }
  704. }
  705. /* Jump register (32-bit) */
  706. void emit_jmp_r(struct jit_context *ctx, u8 dst, u8 src, s32 off, u8 op)
  707. {
  708. switch (op) {
  709. /* No-op, used internally for branch optimization */
  710. case JIT_JNOP:
  711. break;
  712. /* PC += off if dst == src */
  713. case BPF_JEQ:
  714. emit(ctx, beq, dst, src, off);
  715. break;
  716. /* PC += off if dst != src */
  717. case BPF_JNE:
  718. emit(ctx, bne, dst, src, off);
  719. break;
  720. /* PC += off if dst & src */
  721. case BPF_JSET:
  722. emit(ctx, and, MIPS_R_T9, dst, src);
  723. emit(ctx, bnez, MIPS_R_T9, off);
  724. break;
  725. /* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
  726. case JIT_JNSET:
  727. emit(ctx, and, MIPS_R_T9, dst, src);
  728. emit(ctx, beqz, MIPS_R_T9, off);
  729. break;
  730. /* PC += off if dst > src */
  731. case BPF_JGT:
  732. emit(ctx, sltu, MIPS_R_T9, src, dst);
  733. emit(ctx, bnez, MIPS_R_T9, off);
  734. break;
  735. /* PC += off if dst >= src */
  736. case BPF_JGE:
  737. emit(ctx, sltu, MIPS_R_T9, dst, src);
  738. emit(ctx, beqz, MIPS_R_T9, off);
  739. break;
  740. /* PC += off if dst < src */
  741. case BPF_JLT:
  742. emit(ctx, sltu, MIPS_R_T9, dst, src);
  743. emit(ctx, bnez, MIPS_R_T9, off);
  744. break;
  745. /* PC += off if dst <= src */
  746. case BPF_JLE:
  747. emit(ctx, sltu, MIPS_R_T9, src, dst);
  748. emit(ctx, beqz, MIPS_R_T9, off);
  749. break;
  750. /* PC += off if dst > src (signed) */
  751. case BPF_JSGT:
  752. emit(ctx, slt, MIPS_R_T9, src, dst);
  753. emit(ctx, bnez, MIPS_R_T9, off);
  754. break;
  755. /* PC += off if dst >= src (signed) */
  756. case BPF_JSGE:
  757. emit(ctx, slt, MIPS_R_T9, dst, src);
  758. emit(ctx, beqz, MIPS_R_T9, off);
  759. break;
  760. /* PC += off if dst < src (signed) */
  761. case BPF_JSLT:
  762. emit(ctx, slt, MIPS_R_T9, dst, src);
  763. emit(ctx, bnez, MIPS_R_T9, off);
  764. break;
  765. /* PC += off if dst <= src (signed) */
  766. case BPF_JSLE:
  767. emit(ctx, slt, MIPS_R_T9, src, dst);
  768. emit(ctx, beqz, MIPS_R_T9, off);
  769. break;
  770. }
  771. }
  772. /* Jump always */
  773. int emit_ja(struct jit_context *ctx, s16 off)
  774. {
  775. int target = get_target(ctx, ctx->bpf_index + off + 1);
  776. if (target < 0)
  777. return -1;
  778. emit(ctx, j, target);
  779. emit(ctx, nop);
  780. return 0;
  781. }
  782. /* Jump to epilogue */
  783. int emit_exit(struct jit_context *ctx)
  784. {
  785. int target = get_target(ctx, ctx->program->len);
  786. if (target < 0)
  787. return -1;
  788. emit(ctx, j, target);
  789. emit(ctx, nop);
  790. return 0;
  791. }
  792. /* Build the program body from eBPF bytecode */
  793. static int build_body(struct jit_context *ctx)
  794. {
  795. const struct bpf_prog *prog = ctx->program;
  796. unsigned int i;
  797. ctx->stack_used = 0;
  798. for (i = 0; i < prog->len; i++) {
  799. const struct bpf_insn *insn = &prog->insnsi[i];
  800. u32 *descp = &ctx->descriptors[i];
  801. int ret;
  802. access_reg(ctx, insn->src_reg);
  803. access_reg(ctx, insn->dst_reg);
  804. ctx->bpf_index = i;
  805. if (ctx->target == NULL) {
  806. ctx->changes += INDEX(*descp) != ctx->jit_index;
  807. *descp &= JIT_DESC_CONVERT;
  808. *descp |= ctx->jit_index;
  809. }
  810. ret = build_insn(insn, ctx);
  811. if (ret < 0)
  812. return ret;
  813. if (ret > 0) {
  814. i++;
  815. if (ctx->target == NULL)
  816. descp[1] = ctx->jit_index;
  817. }
  818. }
  819. /* Store the end offset, where the epilogue begins */
  820. ctx->descriptors[prog->len] = ctx->jit_index;
  821. return 0;
  822. }
  823. /* Set the branch conversion flag on all instructions */
  824. static void set_convert_flag(struct jit_context *ctx, bool enable)
  825. {
  826. const struct bpf_prog *prog = ctx->program;
  827. u32 flag = enable ? JIT_DESC_CONVERT : 0;
  828. unsigned int i;
  829. for (i = 0; i <= prog->len; i++)
  830. ctx->descriptors[i] = INDEX(ctx->descriptors[i]) | flag;
  831. }
  832. static void jit_fill_hole(void *area, unsigned int size)
  833. {
  834. u32 *p;
  835. /* We are guaranteed to have aligned memory. */
  836. for (p = area; size >= sizeof(u32); size -= sizeof(u32))
  837. uasm_i_break(&p, BRK_BUG); /* Increments p */
  838. }
  839. bool bpf_jit_needs_zext(void)
  840. {
  841. return true;
  842. }
  843. struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
  844. {
  845. struct bpf_prog *tmp, *orig_prog = prog;
  846. struct bpf_binary_header *header = NULL;
  847. struct jit_context ctx;
  848. bool tmp_blinded = false;
  849. unsigned int tmp_idx;
  850. unsigned int image_size;
  851. u8 *image_ptr;
  852. int tries;
  853. /*
  854. * If BPF JIT was not enabled then we must fall back to
  855. * the interpreter.
  856. */
  857. if (!prog->jit_requested)
  858. return orig_prog;
  859. /*
  860. * If constant blinding was enabled and we failed during blinding
  861. * then we must fall back to the interpreter. Otherwise, we save
  862. * the new JITed code.
  863. */
  864. tmp = bpf_jit_blind_constants(prog);
  865. if (IS_ERR(tmp))
  866. return orig_prog;
  867. if (tmp != prog) {
  868. tmp_blinded = true;
  869. prog = tmp;
  870. }
  871. memset(&ctx, 0, sizeof(ctx));
  872. ctx.program = prog;
  873. /*
  874. * Not able to allocate memory for descriptors[], then
  875. * we must fall back to the interpreter
  876. */
  877. ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors),
  878. GFP_KERNEL);
  879. if (ctx.descriptors == NULL)
  880. goto out_err;
  881. /* First pass discovers used resources */
  882. if (build_body(&ctx) < 0)
  883. goto out_err;
  884. /*
  885. * Second pass computes instruction offsets.
  886. * If any PC-relative branches are out of range, a sequence of
  887. * a PC-relative branch + a jump is generated, and we have to
  888. * try again from the beginning to generate the new offsets.
  889. * This is done until no additional conversions are necessary.
  890. * The last two iterations are done with all branches being
  891. * converted, to guarantee offset table convergence within a
  892. * fixed number of iterations.
  893. */
  894. ctx.jit_index = 0;
  895. build_prologue(&ctx);
  896. tmp_idx = ctx.jit_index;
  897. tries = JIT_MAX_ITERATIONS;
  898. do {
  899. ctx.jit_index = tmp_idx;
  900. ctx.changes = 0;
  901. if (tries == 2)
  902. set_convert_flag(&ctx, true);
  903. if (build_body(&ctx) < 0)
  904. goto out_err;
  905. } while (ctx.changes > 0 && --tries > 0);
  906. if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge"))
  907. goto out_err;
  908. build_epilogue(&ctx, MIPS_R_RA);
  909. /* Now we know the size of the structure to make */
  910. image_size = sizeof(u32) * ctx.jit_index;
  911. header = bpf_jit_binary_alloc(image_size, &image_ptr,
  912. sizeof(u32), jit_fill_hole);
  913. /*
  914. * Not able to allocate memory for the structure then
  915. * we must fall back to the interpretation
  916. */
  917. if (header == NULL)
  918. goto out_err;
  919. /* Actual pass to generate final JIT code */
  920. ctx.target = (u32 *)image_ptr;
  921. ctx.jit_index = 0;
  922. /*
  923. * If building the JITed code fails somehow,
  924. * we fall back to the interpretation.
  925. */
  926. build_prologue(&ctx);
  927. if (build_body(&ctx) < 0)
  928. goto out_err;
  929. build_epilogue(&ctx, MIPS_R_RA);
  930. /* Populate line info meta data */
  931. set_convert_flag(&ctx, false);
  932. bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]);
  933. /* Set as read-only exec and flush instruction cache */
  934. bpf_jit_binary_lock_ro(header);
  935. flush_icache_range((unsigned long)header,
  936. (unsigned long)&ctx.target[ctx.jit_index]);
  937. if (bpf_jit_enable > 1)
  938. bpf_jit_dump(prog->len, image_size, 2, ctx.target);
  939. prog->bpf_func = (void *)ctx.target;
  940. prog->jited = 1;
  941. prog->jited_len = image_size;
  942. out:
  943. if (tmp_blinded)
  944. bpf_jit_prog_release_other(prog, prog == orig_prog ?
  945. tmp : orig_prog);
  946. kfree(ctx.descriptors);
  947. return prog;
  948. out_err:
  949. prog = orig_prog;
  950. if (header)
  951. bpf_jit_binary_free(header);
  952. goto out;
  953. }