vc4_validate.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. /*
  2. * Copyright © 2014 Broadcom
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice (including the next
  12. * paragraph) shall be included in all copies or substantial portions of the
  13. * Software.
  14. *
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  18. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21. * IN THE SOFTWARE.
  22. */
  23. /**
  24. * DOC: Command list validator for VC4.
  25. *
  26. * Since the VC4 has no IOMMU between it and system memory, a user
  27. * with access to execute command lists could escalate privilege by
  28. * overwriting system memory (drawing to it as a framebuffer) or
  29. * reading system memory it shouldn't (reading it as a vertex buffer
  30. * or index buffer)
  31. *
  32. * We validate binner command lists to ensure that all accesses are
  33. * within the bounds of the GEM objects referenced by the submitted
  34. * job. It explicitly whitelists packets, and looks at the offsets in
  35. * any address fields to make sure they're contained within the BOs
  36. * they reference.
  37. *
  38. * Note that because CL validation is already reading the
  39. * user-submitted CL and writing the validated copy out to the memory
  40. * that the GPU will actually read, this is also where GEM relocation
  41. * processing (turning BO references into actual addresses for the GPU
  42. * to use) happens.
  43. */
  44. #include "uapi/drm/vc4_drm.h"
  45. #include "vc4_drv.h"
  46. #include "vc4_packet.h"
  47. #define VALIDATE_ARGS \
  48. struct vc4_exec_info *exec, \
  49. void *validated, \
  50. void *untrusted
  51. /** Return the width in pixels of a 64-byte microtile. */
  52. static uint32_t
  53. utile_width(int cpp)
  54. {
  55. switch (cpp) {
  56. case 1:
  57. case 2:
  58. return 8;
  59. case 4:
  60. return 4;
  61. case 8:
  62. return 2;
  63. default:
  64. DRM_ERROR("unknown cpp: %d\n", cpp);
  65. return 1;
  66. }
  67. }
  68. /** Return the height in pixels of a 64-byte microtile. */
  69. static uint32_t
  70. utile_height(int cpp)
  71. {
  72. switch (cpp) {
  73. case 1:
  74. return 8;
  75. case 2:
  76. case 4:
  77. case 8:
  78. return 4;
  79. default:
  80. DRM_ERROR("unknown cpp: %d\n", cpp);
  81. return 1;
  82. }
  83. }
  84. /**
  85. * size_is_lt() - Returns whether a miplevel of the given size will
  86. * use the lineartile (LT) tiling layout rather than the normal T
  87. * tiling layout.
  88. * @width: Width in pixels of the miplevel
  89. * @height: Height in pixels of the miplevel
  90. * @cpp: Bytes per pixel of the pixel format
  91. */
  92. static bool
  93. size_is_lt(uint32_t width, uint32_t height, int cpp)
  94. {
  95. return (width <= 4 * utile_width(cpp) ||
  96. height <= 4 * utile_height(cpp));
  97. }
  98. struct drm_gem_dma_object *
  99. vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
  100. {
  101. struct vc4_dev *vc4 = exec->dev;
  102. struct drm_gem_dma_object *obj;
  103. struct vc4_bo *bo;
  104. if (WARN_ON_ONCE(vc4->is_vc5))
  105. return NULL;
  106. if (hindex >= exec->bo_count) {
  107. DRM_DEBUG("BO index %d greater than BO count %d\n",
  108. hindex, exec->bo_count);
  109. return NULL;
  110. }
  111. obj = exec->bo[hindex];
  112. bo = to_vc4_bo(&obj->base);
  113. if (bo->validated_shader) {
  114. DRM_DEBUG("Trying to use shader BO as something other than "
  115. "a shader\n");
  116. return NULL;
  117. }
  118. return obj;
  119. }
  120. static struct drm_gem_dma_object *
  121. vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
  122. {
  123. return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
  124. }
  125. static bool
  126. validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
  127. {
  128. /* Note that the untrusted pointer passed to these functions is
  129. * incremented past the packet byte.
  130. */
  131. return (untrusted - 1 == exec->bin_u + pos);
  132. }
  133. static uint32_t
  134. gl_shader_rec_size(uint32_t pointer_bits)
  135. {
  136. uint32_t attribute_count = pointer_bits & 7;
  137. bool extended = pointer_bits & 8;
  138. if (attribute_count == 0)
  139. attribute_count = 8;
  140. if (extended)
  141. return 100 + attribute_count * 4;
  142. else
  143. return 36 + attribute_count * 8;
  144. }
  145. bool
  146. vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_dma_object *fbo,
  147. uint32_t offset, uint8_t tiling_format,
  148. uint32_t width, uint32_t height, uint8_t cpp)
  149. {
  150. struct vc4_dev *vc4 = exec->dev;
  151. uint32_t aligned_width, aligned_height, stride, size;
  152. uint32_t utile_w = utile_width(cpp);
  153. uint32_t utile_h = utile_height(cpp);
  154. if (WARN_ON_ONCE(vc4->is_vc5))
  155. return false;
  156. /* The shaded vertex format stores signed 12.4 fixed point
  157. * (-2048,2047) offsets from the viewport center, so we should
  158. * never have a render target larger than 4096. The texture
  159. * unit can only sample from 2048x2048, so it's even more
  160. * restricted. This lets us avoid worrying about overflow in
  161. * our math.
  162. */
  163. if (width > 4096 || height > 4096) {
  164. DRM_DEBUG("Surface dimensions (%d,%d) too large",
  165. width, height);
  166. return false;
  167. }
  168. switch (tiling_format) {
  169. case VC4_TILING_FORMAT_LINEAR:
  170. aligned_width = round_up(width, utile_w);
  171. aligned_height = height;
  172. break;
  173. case VC4_TILING_FORMAT_T:
  174. aligned_width = round_up(width, utile_w * 8);
  175. aligned_height = round_up(height, utile_h * 8);
  176. break;
  177. case VC4_TILING_FORMAT_LT:
  178. aligned_width = round_up(width, utile_w);
  179. aligned_height = round_up(height, utile_h);
  180. break;
  181. default:
  182. DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format);
  183. return false;
  184. }
  185. stride = aligned_width * cpp;
  186. size = stride * aligned_height;
  187. if (size + offset < size ||
  188. size + offset > fbo->base.size) {
  189. DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
  190. width, height,
  191. aligned_width, aligned_height,
  192. size, offset, fbo->base.size);
  193. return false;
  194. }
  195. return true;
  196. }
  197. static int
  198. validate_flush(VALIDATE_ARGS)
  199. {
  200. if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
  201. DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n");
  202. return -EINVAL;
  203. }
  204. exec->found_flush = true;
  205. return 0;
  206. }
  207. static int
  208. validate_start_tile_binning(VALIDATE_ARGS)
  209. {
  210. if (exec->found_start_tile_binning_packet) {
  211. DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n");
  212. return -EINVAL;
  213. }
  214. exec->found_start_tile_binning_packet = true;
  215. if (!exec->found_tile_binning_mode_config_packet) {
  216. DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
  217. return -EINVAL;
  218. }
  219. return 0;
  220. }
  221. static int
  222. validate_increment_semaphore(VALIDATE_ARGS)
  223. {
  224. if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
  225. DRM_DEBUG("Bin CL must end with "
  226. "VC4_PACKET_INCREMENT_SEMAPHORE\n");
  227. return -EINVAL;
  228. }
  229. exec->found_increment_semaphore_packet = true;
  230. return 0;
  231. }
  232. static int
  233. validate_indexed_prim_list(VALIDATE_ARGS)
  234. {
  235. struct drm_gem_dma_object *ib;
  236. uint32_t length = *(uint32_t *)(untrusted + 1);
  237. uint32_t offset = *(uint32_t *)(untrusted + 5);
  238. uint32_t max_index = *(uint32_t *)(untrusted + 9);
  239. uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
  240. struct vc4_shader_state *shader_state;
  241. /* Check overflow condition */
  242. if (exec->shader_state_count == 0) {
  243. DRM_DEBUG("shader state must precede primitives\n");
  244. return -EINVAL;
  245. }
  246. shader_state = &exec->shader_state[exec->shader_state_count - 1];
  247. if (max_index > shader_state->max_index)
  248. shader_state->max_index = max_index;
  249. ib = vc4_use_handle(exec, 0);
  250. if (!ib)
  251. return -EINVAL;
  252. exec->bin_dep_seqno = max(exec->bin_dep_seqno,
  253. to_vc4_bo(&ib->base)->write_seqno);
  254. if (offset > ib->base.size ||
  255. (ib->base.size - offset) / index_size < length) {
  256. DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n",
  257. offset, length, index_size, ib->base.size);
  258. return -EINVAL;
  259. }
  260. *(uint32_t *)(validated + 5) = ib->dma_addr + offset;
  261. return 0;
  262. }
  263. static int
  264. validate_gl_array_primitive(VALIDATE_ARGS)
  265. {
  266. uint32_t length = *(uint32_t *)(untrusted + 1);
  267. uint32_t base_index = *(uint32_t *)(untrusted + 5);
  268. uint32_t max_index;
  269. struct vc4_shader_state *shader_state;
  270. /* Check overflow condition */
  271. if (exec->shader_state_count == 0) {
  272. DRM_DEBUG("shader state must precede primitives\n");
  273. return -EINVAL;
  274. }
  275. shader_state = &exec->shader_state[exec->shader_state_count - 1];
  276. if (length + base_index < length) {
  277. DRM_DEBUG("primitive vertex count overflow\n");
  278. return -EINVAL;
  279. }
  280. max_index = length + base_index - 1;
  281. if (max_index > shader_state->max_index)
  282. shader_state->max_index = max_index;
  283. return 0;
  284. }
  285. static int
  286. validate_gl_shader_state(VALIDATE_ARGS)
  287. {
  288. uint32_t i = exec->shader_state_count++;
  289. if (i >= exec->shader_state_size) {
  290. DRM_DEBUG("More requests for shader states than declared\n");
  291. return -EINVAL;
  292. }
  293. exec->shader_state[i].addr = *(uint32_t *)untrusted;
  294. exec->shader_state[i].max_index = 0;
  295. if (exec->shader_state[i].addr & ~0xf) {
  296. DRM_DEBUG("high bits set in GL shader rec reference\n");
  297. return -EINVAL;
  298. }
  299. *(uint32_t *)validated = (exec->shader_rec_p +
  300. exec->shader_state[i].addr);
  301. exec->shader_rec_p +=
  302. roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
  303. return 0;
  304. }
  305. static int
  306. validate_tile_binning_config(VALIDATE_ARGS)
  307. {
  308. struct drm_device *dev = exec->exec_bo->base.dev;
  309. struct vc4_dev *vc4 = to_vc4_dev(dev);
  310. uint8_t flags;
  311. uint32_t tile_state_size;
  312. uint32_t tile_count, bin_addr;
  313. int bin_slot;
  314. if (exec->found_tile_binning_mode_config_packet) {
  315. DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
  316. return -EINVAL;
  317. }
  318. exec->found_tile_binning_mode_config_packet = true;
  319. exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
  320. exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
  321. tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
  322. flags = *(uint8_t *)(untrusted + 14);
  323. if (exec->bin_tiles_x == 0 ||
  324. exec->bin_tiles_y == 0) {
  325. DRM_DEBUG("Tile binning config of %dx%d too small\n",
  326. exec->bin_tiles_x, exec->bin_tiles_y);
  327. return -EINVAL;
  328. }
  329. if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
  330. VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
  331. DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags);
  332. return -EINVAL;
  333. }
  334. bin_slot = vc4_v3d_get_bin_slot(vc4);
  335. if (bin_slot < 0) {
  336. if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) {
  337. DRM_ERROR("Failed to allocate binner memory: %d\n",
  338. bin_slot);
  339. }
  340. return bin_slot;
  341. }
  342. /* The slot we allocated will only be used by this job, and is
  343. * free when the job completes rendering.
  344. */
  345. exec->bin_slots |= BIT(bin_slot);
  346. bin_addr = vc4->bin_bo->base.dma_addr + bin_slot * vc4->bin_alloc_size;
  347. /* The tile state data array is 48 bytes per tile, and we put it at
  348. * the start of a BO containing both it and the tile alloc.
  349. */
  350. tile_state_size = 48 * tile_count;
  351. /* Since the tile alloc array will follow us, align. */
  352. exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096);
  353. *(uint8_t *)(validated + 14) =
  354. ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
  355. VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
  356. VC4_BIN_CONFIG_AUTO_INIT_TSDA |
  357. VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
  358. VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
  359. VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
  360. VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
  361. /* tile alloc address. */
  362. *(uint32_t *)(validated + 0) = exec->tile_alloc_offset;
  363. /* tile alloc size. */
  364. *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size -
  365. exec->tile_alloc_offset);
  366. /* tile state address. */
  367. *(uint32_t *)(validated + 8) = bin_addr;
  368. return 0;
  369. }
  370. static int
  371. validate_gem_handles(VALIDATE_ARGS)
  372. {
  373. memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
  374. return 0;
  375. }
  376. #define VC4_DEFINE_PACKET(packet, func) \
  377. [packet] = { packet ## _SIZE, #packet, func }
  378. static const struct cmd_info {
  379. uint16_t len;
  380. const char *name;
  381. int (*func)(struct vc4_exec_info *exec, void *validated,
  382. void *untrusted);
  383. } cmd_info[] = {
  384. VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
  385. VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
  386. VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
  387. VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
  388. VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
  389. validate_start_tile_binning),
  390. VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
  391. validate_increment_semaphore),
  392. VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
  393. validate_indexed_prim_list),
  394. VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
  395. validate_gl_array_primitive),
  396. VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
  397. VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
  398. VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
  399. VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
  400. VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
  401. VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
  402. VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
  403. VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
  404. VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
  405. VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
  406. VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
  407. /* Note: The docs say this was also 105, but it was 106 in the
  408. * initial userland code drop.
  409. */
  410. VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
  411. VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
  412. validate_tile_binning_config),
  413. VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
  414. };
  415. int
  416. vc4_validate_bin_cl(struct drm_device *dev,
  417. void *validated,
  418. void *unvalidated,
  419. struct vc4_exec_info *exec)
  420. {
  421. struct vc4_dev *vc4 = to_vc4_dev(dev);
  422. uint32_t len = exec->args->bin_cl_size;
  423. uint32_t dst_offset = 0;
  424. uint32_t src_offset = 0;
  425. if (WARN_ON_ONCE(vc4->is_vc5))
  426. return -ENODEV;
  427. while (src_offset < len) {
  428. void *dst_pkt = validated + dst_offset;
  429. void *src_pkt = unvalidated + src_offset;
  430. u8 cmd = *(uint8_t *)src_pkt;
  431. const struct cmd_info *info;
  432. if (cmd >= ARRAY_SIZE(cmd_info)) {
  433. DRM_DEBUG("0x%08x: packet %d out of bounds\n",
  434. src_offset, cmd);
  435. return -EINVAL;
  436. }
  437. info = &cmd_info[cmd];
  438. if (!info->name) {
  439. DRM_DEBUG("0x%08x: packet %d invalid\n",
  440. src_offset, cmd);
  441. return -EINVAL;
  442. }
  443. if (src_offset + info->len > len) {
  444. DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x "
  445. "exceeds bounds (0x%08x)\n",
  446. src_offset, cmd, info->name, info->len,
  447. src_offset + len);
  448. return -EINVAL;
  449. }
  450. if (cmd != VC4_PACKET_GEM_HANDLES)
  451. memcpy(dst_pkt, src_pkt, info->len);
  452. if (info->func && info->func(exec,
  453. dst_pkt + 1,
  454. src_pkt + 1)) {
  455. DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n",
  456. src_offset, cmd, info->name);
  457. return -EINVAL;
  458. }
  459. src_offset += info->len;
  460. /* GEM handle loading doesn't produce HW packets. */
  461. if (cmd != VC4_PACKET_GEM_HANDLES)
  462. dst_offset += info->len;
  463. /* When the CL hits halt, it'll stop reading anything else. */
  464. if (cmd == VC4_PACKET_HALT)
  465. break;
  466. }
  467. exec->ct0ea = exec->ct0ca + dst_offset;
  468. if (!exec->found_start_tile_binning_packet) {
  469. DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
  470. return -EINVAL;
  471. }
  472. /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The
  473. * semaphore is used to trigger the render CL to start up, and the
  474. * FLUSH is what caps the bin lists with
  475. * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
  476. * render CL when they get called to) and actually triggers the queued
  477. * semaphore increment.
  478. */
  479. if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
  480. DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
  481. "VC4_PACKET_FLUSH\n");
  482. return -EINVAL;
  483. }
  484. return 0;
  485. }
  486. static bool
  487. reloc_tex(struct vc4_exec_info *exec,
  488. void *uniform_data_u,
  489. struct vc4_texture_sample_info *sample,
  490. uint32_t texture_handle_index, bool is_cs)
  491. {
  492. struct drm_gem_dma_object *tex;
  493. uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
  494. uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
  495. uint32_t p2 = (sample->p_offset[2] != ~0 ?
  496. *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
  497. uint32_t p3 = (sample->p_offset[3] != ~0 ?
  498. *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
  499. uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
  500. uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
  501. uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
  502. uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
  503. uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
  504. uint32_t cpp, tiling_format, utile_w, utile_h;
  505. uint32_t i;
  506. uint32_t cube_map_stride = 0;
  507. enum vc4_texture_data_type type;
  508. tex = vc4_use_bo(exec, texture_handle_index);
  509. if (!tex)
  510. return false;
  511. if (sample->is_direct) {
  512. uint32_t remaining_size = tex->base.size - p0;
  513. if (p0 > tex->base.size - 4) {
  514. DRM_DEBUG("UBO offset greater than UBO size\n");
  515. goto fail;
  516. }
  517. if (p1 > remaining_size - 4) {
  518. DRM_DEBUG("UBO clamp would allow reads "
  519. "outside of UBO\n");
  520. goto fail;
  521. }
  522. *validated_p0 = tex->dma_addr + p0;
  523. return true;
  524. }
  525. if (width == 0)
  526. width = 2048;
  527. if (height == 0)
  528. height = 2048;
  529. if (p0 & VC4_TEX_P0_CMMODE_MASK) {
  530. if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
  531. VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
  532. cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
  533. if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
  534. VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
  535. if (cube_map_stride) {
  536. DRM_DEBUG("Cube map stride set twice\n");
  537. goto fail;
  538. }
  539. cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
  540. }
  541. if (!cube_map_stride) {
  542. DRM_DEBUG("Cube map stride not set\n");
  543. goto fail;
  544. }
  545. }
  546. type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
  547. (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
  548. switch (type) {
  549. case VC4_TEXTURE_TYPE_RGBA8888:
  550. case VC4_TEXTURE_TYPE_RGBX8888:
  551. case VC4_TEXTURE_TYPE_RGBA32R:
  552. cpp = 4;
  553. break;
  554. case VC4_TEXTURE_TYPE_RGBA4444:
  555. case VC4_TEXTURE_TYPE_RGBA5551:
  556. case VC4_TEXTURE_TYPE_RGB565:
  557. case VC4_TEXTURE_TYPE_LUMALPHA:
  558. case VC4_TEXTURE_TYPE_S16F:
  559. case VC4_TEXTURE_TYPE_S16:
  560. cpp = 2;
  561. break;
  562. case VC4_TEXTURE_TYPE_LUMINANCE:
  563. case VC4_TEXTURE_TYPE_ALPHA:
  564. case VC4_TEXTURE_TYPE_S8:
  565. cpp = 1;
  566. break;
  567. case VC4_TEXTURE_TYPE_ETC1:
  568. /* ETC1 is arranged as 64-bit blocks, where each block is 4x4
  569. * pixels.
  570. */
  571. cpp = 8;
  572. width = (width + 3) >> 2;
  573. height = (height + 3) >> 2;
  574. break;
  575. case VC4_TEXTURE_TYPE_BW1:
  576. case VC4_TEXTURE_TYPE_A4:
  577. case VC4_TEXTURE_TYPE_A1:
  578. case VC4_TEXTURE_TYPE_RGBA64:
  579. case VC4_TEXTURE_TYPE_YUV422R:
  580. default:
  581. DRM_DEBUG("Texture format %d unsupported\n", type);
  582. goto fail;
  583. }
  584. utile_w = utile_width(cpp);
  585. utile_h = utile_height(cpp);
  586. if (type == VC4_TEXTURE_TYPE_RGBA32R) {
  587. tiling_format = VC4_TILING_FORMAT_LINEAR;
  588. } else {
  589. if (size_is_lt(width, height, cpp))
  590. tiling_format = VC4_TILING_FORMAT_LT;
  591. else
  592. tiling_format = VC4_TILING_FORMAT_T;
  593. }
  594. if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
  595. tiling_format, width, height, cpp)) {
  596. goto fail;
  597. }
  598. /* The mipmap levels are stored before the base of the texture. Make
  599. * sure there is actually space in the BO.
  600. */
  601. for (i = 1; i <= miplevels; i++) {
  602. uint32_t level_width = max(width >> i, 1u);
  603. uint32_t level_height = max(height >> i, 1u);
  604. uint32_t aligned_width, aligned_height;
  605. uint32_t level_size;
  606. /* Once the levels get small enough, they drop from T to LT. */
  607. if (tiling_format == VC4_TILING_FORMAT_T &&
  608. size_is_lt(level_width, level_height, cpp)) {
  609. tiling_format = VC4_TILING_FORMAT_LT;
  610. }
  611. switch (tiling_format) {
  612. case VC4_TILING_FORMAT_T:
  613. aligned_width = round_up(level_width, utile_w * 8);
  614. aligned_height = round_up(level_height, utile_h * 8);
  615. break;
  616. case VC4_TILING_FORMAT_LT:
  617. aligned_width = round_up(level_width, utile_w);
  618. aligned_height = round_up(level_height, utile_h);
  619. break;
  620. default:
  621. aligned_width = round_up(level_width, utile_w);
  622. aligned_height = level_height;
  623. break;
  624. }
  625. level_size = aligned_width * cpp * aligned_height;
  626. if (offset < level_size) {
  627. DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db "
  628. "overflowed buffer bounds (offset %d)\n",
  629. i, level_width, level_height,
  630. aligned_width, aligned_height,
  631. level_size, offset);
  632. goto fail;
  633. }
  634. offset -= level_size;
  635. }
  636. *validated_p0 = tex->dma_addr + p0;
  637. if (is_cs) {
  638. exec->bin_dep_seqno = max(exec->bin_dep_seqno,
  639. to_vc4_bo(&tex->base)->write_seqno);
  640. }
  641. return true;
  642. fail:
  643. DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
  644. DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
  645. DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
  646. DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
  647. return false;
  648. }
  649. static int
  650. validate_gl_shader_rec(struct drm_device *dev,
  651. struct vc4_exec_info *exec,
  652. struct vc4_shader_state *state)
  653. {
  654. uint32_t *src_handles;
  655. void *pkt_u, *pkt_v;
  656. static const uint32_t shader_reloc_offsets[] = {
  657. 4, /* fs */
  658. 16, /* vs */
  659. 28, /* cs */
  660. };
  661. uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
  662. struct drm_gem_dma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8];
  663. uint32_t nr_attributes, nr_relocs, packet_size;
  664. int i;
  665. nr_attributes = state->addr & 0x7;
  666. if (nr_attributes == 0)
  667. nr_attributes = 8;
  668. packet_size = gl_shader_rec_size(state->addr);
  669. nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
  670. if (nr_relocs * 4 > exec->shader_rec_size) {
  671. DRM_DEBUG("overflowed shader recs reading %d handles "
  672. "from %d bytes left\n",
  673. nr_relocs, exec->shader_rec_size);
  674. return -EINVAL;
  675. }
  676. src_handles = exec->shader_rec_u;
  677. exec->shader_rec_u += nr_relocs * 4;
  678. exec->shader_rec_size -= nr_relocs * 4;
  679. if (packet_size > exec->shader_rec_size) {
  680. DRM_DEBUG("overflowed shader recs copying %db packet "
  681. "from %d bytes left\n",
  682. packet_size, exec->shader_rec_size);
  683. return -EINVAL;
  684. }
  685. pkt_u = exec->shader_rec_u;
  686. pkt_v = exec->shader_rec_v;
  687. memcpy(pkt_v, pkt_u, packet_size);
  688. exec->shader_rec_u += packet_size;
  689. /* Shader recs have to be aligned to 16 bytes (due to the attribute
  690. * flags being in the low bytes), so round the next validated shader
  691. * rec address up. This should be safe, since we've got so many
  692. * relocations in a shader rec packet.
  693. */
  694. BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
  695. exec->shader_rec_v += roundup(packet_size, 16);
  696. exec->shader_rec_size -= packet_size;
  697. for (i = 0; i < shader_reloc_count; i++) {
  698. if (src_handles[i] > exec->bo_count) {
  699. DRM_DEBUG("Shader handle %d too big\n", src_handles[i]);
  700. return -EINVAL;
  701. }
  702. bo[i] = exec->bo[src_handles[i]];
  703. if (!bo[i])
  704. return -EINVAL;
  705. }
  706. for (i = shader_reloc_count; i < nr_relocs; i++) {
  707. bo[i] = vc4_use_bo(exec, src_handles[i]);
  708. if (!bo[i])
  709. return -EINVAL;
  710. }
  711. if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
  712. to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
  713. DRM_DEBUG("Thread mode of CL and FS do not match\n");
  714. return -EINVAL;
  715. }
  716. if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
  717. to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
  718. DRM_DEBUG("cs and vs cannot be threaded\n");
  719. return -EINVAL;
  720. }
  721. for (i = 0; i < shader_reloc_count; i++) {
  722. struct vc4_validated_shader_info *validated_shader;
  723. uint32_t o = shader_reloc_offsets[i];
  724. uint32_t src_offset = *(uint32_t *)(pkt_u + o);
  725. uint32_t *texture_handles_u;
  726. void *uniform_data_u;
  727. uint32_t tex, uni;
  728. *(uint32_t *)(pkt_v + o) = bo[i]->dma_addr + src_offset;
  729. if (src_offset != 0) {
  730. DRM_DEBUG("Shaders must be at offset 0 of "
  731. "the BO.\n");
  732. return -EINVAL;
  733. }
  734. validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
  735. if (!validated_shader)
  736. return -EINVAL;
  737. if (validated_shader->uniforms_src_size >
  738. exec->uniforms_size) {
  739. DRM_DEBUG("Uniforms src buffer overflow\n");
  740. return -EINVAL;
  741. }
  742. texture_handles_u = exec->uniforms_u;
  743. uniform_data_u = (texture_handles_u +
  744. validated_shader->num_texture_samples);
  745. memcpy(exec->uniforms_v, uniform_data_u,
  746. validated_shader->uniforms_size);
  747. for (tex = 0;
  748. tex < validated_shader->num_texture_samples;
  749. tex++) {
  750. if (!reloc_tex(exec,
  751. uniform_data_u,
  752. &validated_shader->texture_samples[tex],
  753. texture_handles_u[tex],
  754. i == 2)) {
  755. return -EINVAL;
  756. }
  757. }
  758. /* Fill in the uniform slots that need this shader's
  759. * start-of-uniforms address (used for resetting the uniform
  760. * stream in the presence of control flow).
  761. */
  762. for (uni = 0;
  763. uni < validated_shader->num_uniform_addr_offsets;
  764. uni++) {
  765. uint32_t o = validated_shader->uniform_addr_offsets[uni];
  766. ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
  767. }
  768. *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
  769. exec->uniforms_u += validated_shader->uniforms_src_size;
  770. exec->uniforms_v += validated_shader->uniforms_size;
  771. exec->uniforms_p += validated_shader->uniforms_size;
  772. }
  773. for (i = 0; i < nr_attributes; i++) {
  774. struct drm_gem_dma_object *vbo =
  775. bo[ARRAY_SIZE(shader_reloc_offsets) + i];
  776. uint32_t o = 36 + i * 8;
  777. uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
  778. uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
  779. uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
  780. uint32_t max_index;
  781. exec->bin_dep_seqno = max(exec->bin_dep_seqno,
  782. to_vc4_bo(&vbo->base)->write_seqno);
  783. if (state->addr & 0x8)
  784. stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
  785. if (vbo->base.size < offset ||
  786. vbo->base.size - offset < attr_size) {
  787. DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n",
  788. offset, attr_size, vbo->base.size);
  789. return -EINVAL;
  790. }
  791. if (stride != 0) {
  792. max_index = ((vbo->base.size - offset - attr_size) /
  793. stride);
  794. if (state->max_index > max_index) {
  795. DRM_DEBUG("primitives use index %d out of "
  796. "supplied %d\n",
  797. state->max_index, max_index);
  798. return -EINVAL;
  799. }
  800. }
  801. *(uint32_t *)(pkt_v + o) = vbo->dma_addr + offset;
  802. }
  803. return 0;
  804. }
  805. int
  806. vc4_validate_shader_recs(struct drm_device *dev,
  807. struct vc4_exec_info *exec)
  808. {
  809. struct vc4_dev *vc4 = to_vc4_dev(dev);
  810. uint32_t i;
  811. int ret = 0;
  812. if (WARN_ON_ONCE(vc4->is_vc5))
  813. return -ENODEV;
  814. for (i = 0; i < exec->shader_state_count; i++) {
  815. ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
  816. if (ret)
  817. return ret;
  818. }
  819. return ret;
  820. }