radeon_cs.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. /*
  2. * Copyright 2008 Jerome Glisse.
  3. * All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22. * DEALINGS IN THE SOFTWARE.
  23. *
  24. * Authors:
  25. * Jerome Glisse <[email protected]>
  26. */
  27. #include <linux/list_sort.h>
  28. #include <linux/pci.h>
  29. #include <linux/uaccess.h>
  30. #include <drm/drm_device.h>
  31. #include <drm/drm_file.h>
  32. #include <drm/radeon_drm.h>
  33. #include "radeon.h"
  34. #include "radeon_reg.h"
  35. #include "radeon_trace.h"
  36. #define RADEON_CS_MAX_PRIORITY 32u
  37. #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
  38. /* This is based on the bucket sort with O(n) time complexity.
  39. * An item with priority "i" is added to bucket[i]. The lists are then
  40. * concatenated in descending order.
  41. */
  42. struct radeon_cs_buckets {
  43. struct list_head bucket[RADEON_CS_NUM_BUCKETS];
  44. };
  45. static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
  46. {
  47. unsigned i;
  48. for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
  49. INIT_LIST_HEAD(&b->bucket[i]);
  50. }
  51. static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
  52. struct list_head *item, unsigned priority)
  53. {
  54. /* Since buffers which appear sooner in the relocation list are
  55. * likely to be used more often than buffers which appear later
  56. * in the list, the sort mustn't change the ordering of buffers
  57. * with the same priority, i.e. it must be stable.
  58. */
  59. list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
  60. }
  61. static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
  62. struct list_head *out_list)
  63. {
  64. unsigned i;
  65. /* Connect the sorted buckets in the output list. */
  66. for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
  67. list_splice(&b->bucket[i], out_list);
  68. }
  69. }
  70. static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
  71. {
  72. struct radeon_cs_chunk *chunk;
  73. struct radeon_cs_buckets buckets;
  74. unsigned i;
  75. bool need_mmap_lock = false;
  76. int r;
  77. if (p->chunk_relocs == NULL) {
  78. return 0;
  79. }
  80. chunk = p->chunk_relocs;
  81. p->dma_reloc_idx = 0;
  82. /* FIXME: we assume that each relocs use 4 dwords */
  83. p->nrelocs = chunk->length_dw / 4;
  84. p->relocs = kvcalloc(p->nrelocs, sizeof(struct radeon_bo_list),
  85. GFP_KERNEL);
  86. if (p->relocs == NULL) {
  87. return -ENOMEM;
  88. }
  89. radeon_cs_buckets_init(&buckets);
  90. for (i = 0; i < p->nrelocs; i++) {
  91. struct drm_radeon_cs_reloc *r;
  92. struct drm_gem_object *gobj;
  93. unsigned priority;
  94. r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
  95. gobj = drm_gem_object_lookup(p->filp, r->handle);
  96. if (gobj == NULL) {
  97. DRM_ERROR("gem object lookup failed 0x%x\n",
  98. r->handle);
  99. return -ENOENT;
  100. }
  101. p->relocs[i].robj = gem_to_radeon_bo(gobj);
  102. /* The userspace buffer priorities are from 0 to 15. A higher
  103. * number means the buffer is more important.
  104. * Also, the buffers used for write have a higher priority than
  105. * the buffers used for read only, which doubles the range
  106. * to 0 to 31. 32 is reserved for the kernel driver.
  107. */
  108. priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
  109. + !!r->write_domain;
  110. /* The first reloc of an UVD job is the msg and that must be in
  111. * VRAM, the second reloc is the DPB and for WMV that must be in
  112. * VRAM as well. Also put everything into VRAM on AGP cards and older
  113. * IGP chips to avoid image corruptions
  114. */
  115. if (p->ring == R600_RING_TYPE_UVD_INDEX &&
  116. (i <= 0 || pci_find_capability(p->rdev->pdev, PCI_CAP_ID_AGP) ||
  117. p->rdev->family == CHIP_RS780 ||
  118. p->rdev->family == CHIP_RS880)) {
  119. /* TODO: is this still needed for NI+ ? */
  120. p->relocs[i].preferred_domains =
  121. RADEON_GEM_DOMAIN_VRAM;
  122. p->relocs[i].allowed_domains =
  123. RADEON_GEM_DOMAIN_VRAM;
  124. /* prioritize this over any other relocation */
  125. priority = RADEON_CS_MAX_PRIORITY;
  126. } else {
  127. uint32_t domain = r->write_domain ?
  128. r->write_domain : r->read_domains;
  129. if (domain & RADEON_GEM_DOMAIN_CPU) {
  130. DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
  131. "for command submission\n");
  132. return -EINVAL;
  133. }
  134. p->relocs[i].preferred_domains = domain;
  135. if (domain == RADEON_GEM_DOMAIN_VRAM)
  136. domain |= RADEON_GEM_DOMAIN_GTT;
  137. p->relocs[i].allowed_domains = domain;
  138. }
  139. if (radeon_ttm_tt_has_userptr(p->rdev, p->relocs[i].robj->tbo.ttm)) {
  140. uint32_t domain = p->relocs[i].preferred_domains;
  141. if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
  142. DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
  143. "allowed for userptr BOs\n");
  144. return -EINVAL;
  145. }
  146. need_mmap_lock = true;
  147. domain = RADEON_GEM_DOMAIN_GTT;
  148. p->relocs[i].preferred_domains = domain;
  149. p->relocs[i].allowed_domains = domain;
  150. }
  151. /* Objects shared as dma-bufs cannot be moved to VRAM */
  152. if (p->relocs[i].robj->prime_shared_count) {
  153. p->relocs[i].allowed_domains &= ~RADEON_GEM_DOMAIN_VRAM;
  154. if (!p->relocs[i].allowed_domains) {
  155. DRM_ERROR("BO associated with dma-buf cannot "
  156. "be moved to VRAM\n");
  157. return -EINVAL;
  158. }
  159. }
  160. p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
  161. p->relocs[i].tv.num_shared = !r->write_domain;
  162. radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
  163. priority);
  164. }
  165. radeon_cs_buckets_get_list(&buckets, &p->validated);
  166. if (p->cs_flags & RADEON_CS_USE_VM)
  167. p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
  168. &p->validated);
  169. if (need_mmap_lock)
  170. mmap_read_lock(current->mm);
  171. r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
  172. if (need_mmap_lock)
  173. mmap_read_unlock(current->mm);
  174. return r;
  175. }
  176. static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
  177. {
  178. p->priority = priority;
  179. switch (ring) {
  180. default:
  181. DRM_ERROR("unknown ring id: %d\n", ring);
  182. return -EINVAL;
  183. case RADEON_CS_RING_GFX:
  184. p->ring = RADEON_RING_TYPE_GFX_INDEX;
  185. break;
  186. case RADEON_CS_RING_COMPUTE:
  187. if (p->rdev->family >= CHIP_TAHITI) {
  188. if (p->priority > 0)
  189. p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
  190. else
  191. p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
  192. } else
  193. p->ring = RADEON_RING_TYPE_GFX_INDEX;
  194. break;
  195. case RADEON_CS_RING_DMA:
  196. if (p->rdev->family >= CHIP_CAYMAN) {
  197. if (p->priority > 0)
  198. p->ring = R600_RING_TYPE_DMA_INDEX;
  199. else
  200. p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
  201. } else if (p->rdev->family >= CHIP_RV770) {
  202. p->ring = R600_RING_TYPE_DMA_INDEX;
  203. } else {
  204. return -EINVAL;
  205. }
  206. break;
  207. case RADEON_CS_RING_UVD:
  208. p->ring = R600_RING_TYPE_UVD_INDEX;
  209. break;
  210. case RADEON_CS_RING_VCE:
  211. /* TODO: only use the low priority ring for now */
  212. p->ring = TN_RING_TYPE_VCE1_INDEX;
  213. break;
  214. }
  215. return 0;
  216. }
  217. static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
  218. {
  219. struct radeon_bo_list *reloc;
  220. int r;
  221. list_for_each_entry(reloc, &p->validated, tv.head) {
  222. struct dma_resv *resv;
  223. resv = reloc->robj->tbo.base.resv;
  224. r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
  225. reloc->tv.num_shared);
  226. if (r)
  227. return r;
  228. }
  229. return 0;
  230. }
  231. /* XXX: note that this is called from the legacy UMS CS ioctl as well */
  232. int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
  233. {
  234. struct drm_radeon_cs *cs = data;
  235. uint64_t *chunk_array_ptr;
  236. u64 size;
  237. unsigned i;
  238. u32 ring = RADEON_CS_RING_GFX;
  239. s32 priority = 0;
  240. INIT_LIST_HEAD(&p->validated);
  241. if (!cs->num_chunks) {
  242. return 0;
  243. }
  244. /* get chunks */
  245. p->idx = 0;
  246. p->ib.sa_bo = NULL;
  247. p->const_ib.sa_bo = NULL;
  248. p->chunk_ib = NULL;
  249. p->chunk_relocs = NULL;
  250. p->chunk_flags = NULL;
  251. p->chunk_const_ib = NULL;
  252. p->chunks_array = kvmalloc_array(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
  253. if (p->chunks_array == NULL) {
  254. return -ENOMEM;
  255. }
  256. chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
  257. if (copy_from_user(p->chunks_array, chunk_array_ptr,
  258. sizeof(uint64_t)*cs->num_chunks)) {
  259. return -EFAULT;
  260. }
  261. p->cs_flags = 0;
  262. p->nchunks = cs->num_chunks;
  263. p->chunks = kvcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
  264. if (p->chunks == NULL) {
  265. return -ENOMEM;
  266. }
  267. for (i = 0; i < p->nchunks; i++) {
  268. struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
  269. struct drm_radeon_cs_chunk user_chunk;
  270. uint32_t __user *cdata;
  271. chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
  272. if (copy_from_user(&user_chunk, chunk_ptr,
  273. sizeof(struct drm_radeon_cs_chunk))) {
  274. return -EFAULT;
  275. }
  276. p->chunks[i].length_dw = user_chunk.length_dw;
  277. if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
  278. p->chunk_relocs = &p->chunks[i];
  279. }
  280. if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
  281. p->chunk_ib = &p->chunks[i];
  282. /* zero length IB isn't useful */
  283. if (p->chunks[i].length_dw == 0)
  284. return -EINVAL;
  285. }
  286. if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
  287. p->chunk_const_ib = &p->chunks[i];
  288. /* zero length CONST IB isn't useful */
  289. if (p->chunks[i].length_dw == 0)
  290. return -EINVAL;
  291. }
  292. if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
  293. p->chunk_flags = &p->chunks[i];
  294. /* zero length flags aren't useful */
  295. if (p->chunks[i].length_dw == 0)
  296. return -EINVAL;
  297. }
  298. size = p->chunks[i].length_dw;
  299. cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
  300. p->chunks[i].user_ptr = cdata;
  301. if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
  302. continue;
  303. if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
  304. if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
  305. continue;
  306. }
  307. p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
  308. size *= sizeof(uint32_t);
  309. if (p->chunks[i].kdata == NULL) {
  310. return -ENOMEM;
  311. }
  312. if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
  313. return -EFAULT;
  314. }
  315. if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
  316. p->cs_flags = p->chunks[i].kdata[0];
  317. if (p->chunks[i].length_dw > 1)
  318. ring = p->chunks[i].kdata[1];
  319. if (p->chunks[i].length_dw > 2)
  320. priority = (s32)p->chunks[i].kdata[2];
  321. }
  322. }
  323. /* these are KMS only */
  324. if (p->rdev) {
  325. if ((p->cs_flags & RADEON_CS_USE_VM) &&
  326. !p->rdev->vm_manager.enabled) {
  327. DRM_ERROR("VM not active on asic!\n");
  328. return -EINVAL;
  329. }
  330. if (radeon_cs_get_ring(p, ring, priority))
  331. return -EINVAL;
  332. /* we only support VM on some SI+ rings */
  333. if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
  334. if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
  335. DRM_ERROR("Ring %d requires VM!\n", p->ring);
  336. return -EINVAL;
  337. }
  338. } else {
  339. if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
  340. DRM_ERROR("VM not supported on ring %d!\n",
  341. p->ring);
  342. return -EINVAL;
  343. }
  344. }
  345. }
  346. return 0;
  347. }
  348. static int cmp_size_smaller_first(void *priv, const struct list_head *a,
  349. const struct list_head *b)
  350. {
  351. struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
  352. struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
  353. /* Sort A before B if A is smaller. */
  354. return (int)la->robj->tbo.resource->num_pages -
  355. (int)lb->robj->tbo.resource->num_pages;
  356. }
  357. /**
  358. * radeon_cs_parser_fini() - clean parser states
  359. * @parser: parser structure holding parsing context.
  360. * @error: error number
  361. * @backoff: indicator to backoff the reservation
  362. *
  363. * If error is set than unvalidate buffer, otherwise just free memory
  364. * used by parsing context.
  365. **/
  366. static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
  367. {
  368. unsigned i;
  369. if (!error) {
  370. /* Sort the buffer list from the smallest to largest buffer,
  371. * which affects the order of buffers in the LRU list.
  372. * This assures that the smallest buffers are added first
  373. * to the LRU list, so they are likely to be later evicted
  374. * first, instead of large buffers whose eviction is more
  375. * expensive.
  376. *
  377. * This slightly lowers the number of bytes moved by TTM
  378. * per frame under memory pressure.
  379. */
  380. list_sort(NULL, &parser->validated, cmp_size_smaller_first);
  381. ttm_eu_fence_buffer_objects(&parser->ticket,
  382. &parser->validated,
  383. &parser->ib.fence->base);
  384. } else if (backoff) {
  385. ttm_eu_backoff_reservation(&parser->ticket,
  386. &parser->validated);
  387. }
  388. if (parser->relocs != NULL) {
  389. for (i = 0; i < parser->nrelocs; i++) {
  390. struct radeon_bo *bo = parser->relocs[i].robj;
  391. if (bo == NULL)
  392. continue;
  393. drm_gem_object_put(&bo->tbo.base);
  394. }
  395. }
  396. kfree(parser->track);
  397. kvfree(parser->relocs);
  398. kvfree(parser->vm_bos);
  399. for (i = 0; i < parser->nchunks; i++)
  400. kvfree(parser->chunks[i].kdata);
  401. kvfree(parser->chunks);
  402. kvfree(parser->chunks_array);
  403. radeon_ib_free(parser->rdev, &parser->ib);
  404. radeon_ib_free(parser->rdev, &parser->const_ib);
  405. }
  406. static int radeon_cs_ib_chunk(struct radeon_device *rdev,
  407. struct radeon_cs_parser *parser)
  408. {
  409. int r;
  410. if (parser->chunk_ib == NULL)
  411. return 0;
  412. if (parser->cs_flags & RADEON_CS_USE_VM)
  413. return 0;
  414. r = radeon_cs_parse(rdev, parser->ring, parser);
  415. if (r || parser->parser_error) {
  416. DRM_ERROR("Invalid command stream !\n");
  417. return r;
  418. }
  419. r = radeon_cs_sync_rings(parser);
  420. if (r) {
  421. if (r != -ERESTARTSYS)
  422. DRM_ERROR("Failed to sync rings: %i\n", r);
  423. return r;
  424. }
  425. if (parser->ring == R600_RING_TYPE_UVD_INDEX)
  426. radeon_uvd_note_usage(rdev);
  427. else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
  428. (parser->ring == TN_RING_TYPE_VCE2_INDEX))
  429. radeon_vce_note_usage(rdev);
  430. r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
  431. if (r) {
  432. DRM_ERROR("Failed to schedule IB !\n");
  433. }
  434. return r;
  435. }
  436. static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
  437. struct radeon_vm *vm)
  438. {
  439. struct radeon_device *rdev = p->rdev;
  440. struct radeon_bo_va *bo_va;
  441. int i, r;
  442. r = radeon_vm_update_page_directory(rdev, vm);
  443. if (r)
  444. return r;
  445. r = radeon_vm_clear_freed(rdev, vm);
  446. if (r)
  447. return r;
  448. if (vm->ib_bo_va == NULL) {
  449. DRM_ERROR("Tmp BO not in VM!\n");
  450. return -EINVAL;
  451. }
  452. r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
  453. rdev->ring_tmp_bo.bo->tbo.resource);
  454. if (r)
  455. return r;
  456. for (i = 0; i < p->nrelocs; i++) {
  457. struct radeon_bo *bo;
  458. bo = p->relocs[i].robj;
  459. bo_va = radeon_vm_bo_find(vm, bo);
  460. if (bo_va == NULL) {
  461. dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
  462. return -EINVAL;
  463. }
  464. r = radeon_vm_bo_update(rdev, bo_va, bo->tbo.resource);
  465. if (r)
  466. return r;
  467. radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
  468. r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
  469. if (r)
  470. return r;
  471. }
  472. return radeon_vm_clear_invalids(rdev, vm);
  473. }
  474. static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
  475. struct radeon_cs_parser *parser)
  476. {
  477. struct radeon_fpriv *fpriv = parser->filp->driver_priv;
  478. struct radeon_vm *vm = &fpriv->vm;
  479. int r;
  480. if (parser->chunk_ib == NULL)
  481. return 0;
  482. if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
  483. return 0;
  484. if (parser->const_ib.length_dw) {
  485. r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
  486. if (r) {
  487. return r;
  488. }
  489. }
  490. r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
  491. if (r) {
  492. return r;
  493. }
  494. if (parser->ring == R600_RING_TYPE_UVD_INDEX)
  495. radeon_uvd_note_usage(rdev);
  496. mutex_lock(&vm->mutex);
  497. r = radeon_bo_vm_update_pte(parser, vm);
  498. if (r) {
  499. goto out;
  500. }
  501. r = radeon_cs_sync_rings(parser);
  502. if (r) {
  503. if (r != -ERESTARTSYS)
  504. DRM_ERROR("Failed to sync rings: %i\n", r);
  505. goto out;
  506. }
  507. if ((rdev->family >= CHIP_TAHITI) &&
  508. (parser->chunk_const_ib != NULL)) {
  509. r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
  510. } else {
  511. r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
  512. }
  513. out:
  514. mutex_unlock(&vm->mutex);
  515. return r;
  516. }
  517. static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
  518. {
  519. if (r == -EDEADLK) {
  520. r = radeon_gpu_reset(rdev);
  521. if (!r)
  522. r = -EAGAIN;
  523. }
  524. return r;
  525. }
  526. static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
  527. {
  528. struct radeon_cs_chunk *ib_chunk;
  529. struct radeon_vm *vm = NULL;
  530. int r;
  531. if (parser->chunk_ib == NULL)
  532. return 0;
  533. if (parser->cs_flags & RADEON_CS_USE_VM) {
  534. struct radeon_fpriv *fpriv = parser->filp->driver_priv;
  535. vm = &fpriv->vm;
  536. if ((rdev->family >= CHIP_TAHITI) &&
  537. (parser->chunk_const_ib != NULL)) {
  538. ib_chunk = parser->chunk_const_ib;
  539. if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
  540. DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
  541. return -EINVAL;
  542. }
  543. r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
  544. vm, ib_chunk->length_dw * 4);
  545. if (r) {
  546. DRM_ERROR("Failed to get const ib !\n");
  547. return r;
  548. }
  549. parser->const_ib.is_const_ib = true;
  550. parser->const_ib.length_dw = ib_chunk->length_dw;
  551. if (copy_from_user(parser->const_ib.ptr,
  552. ib_chunk->user_ptr,
  553. ib_chunk->length_dw * 4))
  554. return -EFAULT;
  555. }
  556. ib_chunk = parser->chunk_ib;
  557. if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
  558. DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
  559. return -EINVAL;
  560. }
  561. }
  562. ib_chunk = parser->chunk_ib;
  563. r = radeon_ib_get(rdev, parser->ring, &parser->ib,
  564. vm, ib_chunk->length_dw * 4);
  565. if (r) {
  566. DRM_ERROR("Failed to get ib !\n");
  567. return r;
  568. }
  569. parser->ib.length_dw = ib_chunk->length_dw;
  570. if (ib_chunk->kdata)
  571. memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
  572. else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
  573. return -EFAULT;
  574. return 0;
  575. }
  576. int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  577. {
  578. struct radeon_device *rdev = dev->dev_private;
  579. struct radeon_cs_parser parser;
  580. int r;
  581. down_read(&rdev->exclusive_lock);
  582. if (!rdev->accel_working) {
  583. up_read(&rdev->exclusive_lock);
  584. return -EBUSY;
  585. }
  586. if (rdev->in_reset) {
  587. up_read(&rdev->exclusive_lock);
  588. r = radeon_gpu_reset(rdev);
  589. if (!r)
  590. r = -EAGAIN;
  591. return r;
  592. }
  593. /* initialize parser */
  594. memset(&parser, 0, sizeof(struct radeon_cs_parser));
  595. parser.filp = filp;
  596. parser.rdev = rdev;
  597. parser.dev = rdev->dev;
  598. parser.family = rdev->family;
  599. r = radeon_cs_parser_init(&parser, data);
  600. if (r) {
  601. DRM_ERROR("Failed to initialize parser !\n");
  602. radeon_cs_parser_fini(&parser, r, false);
  603. up_read(&rdev->exclusive_lock);
  604. r = radeon_cs_handle_lockup(rdev, r);
  605. return r;
  606. }
  607. r = radeon_cs_ib_fill(rdev, &parser);
  608. if (!r) {
  609. r = radeon_cs_parser_relocs(&parser);
  610. if (r && r != -ERESTARTSYS)
  611. DRM_ERROR("Failed to parse relocation %d!\n", r);
  612. }
  613. if (r) {
  614. radeon_cs_parser_fini(&parser, r, false);
  615. up_read(&rdev->exclusive_lock);
  616. r = radeon_cs_handle_lockup(rdev, r);
  617. return r;
  618. }
  619. trace_radeon_cs(&parser);
  620. r = radeon_cs_ib_chunk(rdev, &parser);
  621. if (r) {
  622. goto out;
  623. }
  624. r = radeon_cs_ib_vm_chunk(rdev, &parser);
  625. if (r) {
  626. goto out;
  627. }
  628. out:
  629. radeon_cs_parser_fini(&parser, r, true);
  630. up_read(&rdev->exclusive_lock);
  631. r = radeon_cs_handle_lockup(rdev, r);
  632. return r;
  633. }
  634. /**
  635. * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
  636. * @p: parser structure holding parsing context.
  637. * @pkt: where to store packet information
  638. * @idx: packet index
  639. *
  640. * Assume that chunk_ib_index is properly set. Will return -EINVAL
  641. * if packet is bigger than remaining ib size. or if packets is unknown.
  642. **/
  643. int radeon_cs_packet_parse(struct radeon_cs_parser *p,
  644. struct radeon_cs_packet *pkt,
  645. unsigned idx)
  646. {
  647. struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
  648. struct radeon_device *rdev = p->rdev;
  649. uint32_t header;
  650. int ret = 0, i;
  651. if (idx >= ib_chunk->length_dw) {
  652. DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
  653. idx, ib_chunk->length_dw);
  654. return -EINVAL;
  655. }
  656. header = radeon_get_ib_value(p, idx);
  657. pkt->idx = idx;
  658. pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
  659. pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
  660. pkt->one_reg_wr = 0;
  661. switch (pkt->type) {
  662. case RADEON_PACKET_TYPE0:
  663. if (rdev->family < CHIP_R600) {
  664. pkt->reg = R100_CP_PACKET0_GET_REG(header);
  665. pkt->one_reg_wr =
  666. RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
  667. } else
  668. pkt->reg = R600_CP_PACKET0_GET_REG(header);
  669. break;
  670. case RADEON_PACKET_TYPE3:
  671. pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
  672. break;
  673. case RADEON_PACKET_TYPE2:
  674. pkt->count = -1;
  675. break;
  676. default:
  677. DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
  678. ret = -EINVAL;
  679. goto dump_ib;
  680. }
  681. if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
  682. DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
  683. pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
  684. ret = -EINVAL;
  685. goto dump_ib;
  686. }
  687. return 0;
  688. dump_ib:
  689. for (i = 0; i < ib_chunk->length_dw; i++) {
  690. if (i == idx)
  691. printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
  692. else
  693. printk("\t0x%08x\n", radeon_get_ib_value(p, i));
  694. }
  695. return ret;
  696. }
  697. /**
  698. * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
  699. * @p: structure holding the parser context.
  700. *
  701. * Check if the next packet is NOP relocation packet3.
  702. **/
  703. bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
  704. {
  705. struct radeon_cs_packet p3reloc;
  706. int r;
  707. r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
  708. if (r)
  709. return false;
  710. if (p3reloc.type != RADEON_PACKET_TYPE3)
  711. return false;
  712. if (p3reloc.opcode != RADEON_PACKET3_NOP)
  713. return false;
  714. return true;
  715. }
  716. /**
  717. * radeon_cs_dump_packet() - dump raw packet context
  718. * @p: structure holding the parser context.
  719. * @pkt: structure holding the packet.
  720. *
  721. * Used mostly for debugging and error reporting.
  722. **/
  723. void radeon_cs_dump_packet(struct radeon_cs_parser *p,
  724. struct radeon_cs_packet *pkt)
  725. {
  726. volatile uint32_t *ib;
  727. unsigned i;
  728. unsigned idx;
  729. ib = p->ib.ptr;
  730. idx = pkt->idx;
  731. for (i = 0; i <= (pkt->count + 1); i++, idx++)
  732. DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
  733. }
  734. /**
  735. * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
  736. * @p: parser structure holding parsing context.
  737. * @cs_reloc: reloc informations
  738. * @nomm: no memory management for debugging
  739. *
  740. * Check if next packet is relocation packet3, do bo validation and compute
  741. * GPU offset using the provided start.
  742. **/
  743. int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
  744. struct radeon_bo_list **cs_reloc,
  745. int nomm)
  746. {
  747. struct radeon_cs_chunk *relocs_chunk;
  748. struct radeon_cs_packet p3reloc;
  749. unsigned idx;
  750. int r;
  751. if (p->chunk_relocs == NULL) {
  752. DRM_ERROR("No relocation chunk !\n");
  753. return -EINVAL;
  754. }
  755. *cs_reloc = NULL;
  756. relocs_chunk = p->chunk_relocs;
  757. r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
  758. if (r)
  759. return r;
  760. p->idx += p3reloc.count + 2;
  761. if (p3reloc.type != RADEON_PACKET_TYPE3 ||
  762. p3reloc.opcode != RADEON_PACKET3_NOP) {
  763. DRM_ERROR("No packet3 for relocation for packet at %d.\n",
  764. p3reloc.idx);
  765. radeon_cs_dump_packet(p, &p3reloc);
  766. return -EINVAL;
  767. }
  768. idx = radeon_get_ib_value(p, p3reloc.idx + 1);
  769. if (idx >= relocs_chunk->length_dw) {
  770. DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
  771. idx, relocs_chunk->length_dw);
  772. radeon_cs_dump_packet(p, &p3reloc);
  773. return -EINVAL;
  774. }
  775. /* FIXME: we assume reloc size is 4 dwords */
  776. if (nomm) {
  777. *cs_reloc = p->relocs;
  778. (*cs_reloc)->gpu_offset =
  779. (u64)relocs_chunk->kdata[idx + 3] << 32;
  780. (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
  781. } else
  782. *cs_reloc = &p->relocs[(idx / 4)];
  783. return 0;
  784. }