device.c 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2016-2022 HabanaLabs, Ltd.
  4. * All Rights Reserved.
  5. */
  6. #define pr_fmt(fmt) "habanalabs: " fmt
  7. #include <uapi/misc/habanalabs.h>
  8. #include "habanalabs.h"
  9. #include <linux/pci.h>
  10. #include <linux/hwmon.h>
  11. #include <trace/events/habanalabs.h>
  12. #define HL_RESET_DELAY_USEC 10000 /* 10ms */
  13. enum dma_alloc_type {
  14. DMA_ALLOC_COHERENT,
  15. DMA_ALLOC_CPU_ACCESSIBLE,
  16. DMA_ALLOC_POOL,
  17. };
  18. #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788
  19. /*
  20. * hl_set_dram_bar- sets the bar to allow later access to address
  21. *
  22. * @hdev: pointer to habanalabs device structure.
  23. * @addr: the address the caller wants to access.
  24. * @region: the PCI region.
  25. *
  26. * @return: the old BAR base address on success, U64_MAX for failure.
  27. * The caller should set it back to the old address after use.
  28. *
  29. * In case the bar space does not cover the whole address space,
  30. * the bar base address should be set to allow access to a given address.
  31. * This function can be called also if the bar doesn't need to be set,
  32. * in that case it just won't change the base.
  33. */
  34. static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
  35. {
  36. struct asic_fixed_properties *prop = &hdev->asic_prop;
  37. u64 bar_base_addr, old_base;
  38. if (is_power_of_2(prop->dram_pci_bar_size))
  39. bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
  40. else
  41. bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
  42. prop->dram_pci_bar_size;
  43. old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
  44. /* in case of success we need to update the new BAR base */
  45. if (old_base != U64_MAX)
  46. region->region_base = bar_base_addr;
  47. return old_base;
  48. }
  49. static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
  50. enum debugfs_access_type acc_type, enum pci_region region_type)
  51. {
  52. struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
  53. void __iomem *acc_addr;
  54. u64 old_base = 0, rc;
  55. if (region_type == PCI_REGION_DRAM) {
  56. old_base = hl_set_dram_bar(hdev, addr, region);
  57. if (old_base == U64_MAX)
  58. return -EIO;
  59. }
  60. acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
  61. region->offset_in_bar;
  62. switch (acc_type) {
  63. case DEBUGFS_READ8:
  64. *val = readb(acc_addr);
  65. break;
  66. case DEBUGFS_WRITE8:
  67. writeb(*val, acc_addr);
  68. break;
  69. case DEBUGFS_READ32:
  70. *val = readl(acc_addr);
  71. break;
  72. case DEBUGFS_WRITE32:
  73. writel(*val, acc_addr);
  74. break;
  75. case DEBUGFS_READ64:
  76. *val = readq(acc_addr);
  77. break;
  78. case DEBUGFS_WRITE64:
  79. writeq(*val, acc_addr);
  80. break;
  81. }
  82. if (region_type == PCI_REGION_DRAM) {
  83. rc = hl_set_dram_bar(hdev, old_base, region);
  84. if (rc == U64_MAX)
  85. return -EIO;
  86. }
  87. return 0;
  88. }
  89. static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
  90. gfp_t flag, enum dma_alloc_type alloc_type,
  91. const char *caller)
  92. {
  93. void *ptr = NULL;
  94. switch (alloc_type) {
  95. case DMA_ALLOC_COHERENT:
  96. ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag);
  97. break;
  98. case DMA_ALLOC_CPU_ACCESSIBLE:
  99. ptr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
  100. break;
  101. case DMA_ALLOC_POOL:
  102. ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle);
  103. break;
  104. }
  105. if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
  106. trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size,
  107. caller);
  108. return ptr;
  109. }
  110. static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
  111. dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
  112. const char *caller)
  113. {
  114. switch (alloc_type) {
  115. case DMA_ALLOC_COHERENT:
  116. hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
  117. break;
  118. case DMA_ALLOC_CPU_ACCESSIBLE:
  119. hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, cpu_addr);
  120. break;
  121. case DMA_ALLOC_POOL:
  122. hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
  123. break;
  124. }
  125. trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
  126. }
  127. void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
  128. gfp_t flag, const char *caller)
  129. {
  130. return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller);
  131. }
  132. void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
  133. dma_addr_t dma_handle, const char *caller)
  134. {
  135. hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller);
  136. }
  137. void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
  138. dma_addr_t *dma_handle, const char *caller)
  139. {
  140. return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
  141. }
  142. void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
  143. const char *caller)
  144. {
  145. hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
  146. }
  147. void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
  148. dma_addr_t *dma_handle, const char *caller)
  149. {
  150. return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller);
  151. }
  152. void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
  153. const char *caller)
  154. {
  155. hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller);
  156. }
  157. int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
  158. {
  159. struct asic_fixed_properties *prop = &hdev->asic_prop;
  160. struct scatterlist *sg;
  161. int rc, i;
  162. rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0);
  163. if (rc)
  164. return rc;
  165. /* Shift to the device's base physical address of host memory if necessary */
  166. if (prop->device_dma_offset_for_host_access)
  167. for_each_sgtable_dma_sg(sgt, sg, i)
  168. sg->dma_address += prop->device_dma_offset_for_host_access;
  169. return 0;
  170. }
  171. void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
  172. {
  173. struct asic_fixed_properties *prop = &hdev->asic_prop;
  174. struct scatterlist *sg;
  175. int i;
  176. /* Cancel the device's base physical address of host memory if necessary */
  177. if (prop->device_dma_offset_for_host_access)
  178. for_each_sgtable_dma_sg(sgt, sg, i)
  179. sg->dma_address -= prop->device_dma_offset_for_host_access;
  180. dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0);
  181. }
  182. /*
  183. * hl_access_cfg_region - access the config region
  184. *
  185. * @hdev: pointer to habanalabs device structure
  186. * @addr: the address to access
  187. * @val: the value to write from or read to
  188. * @acc_type: the type of access (read/write 64/32)
  189. */
  190. int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
  191. enum debugfs_access_type acc_type)
  192. {
  193. struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG];
  194. u32 val_h, val_l;
  195. if (!IS_ALIGNED(addr, sizeof(u32))) {
  196. dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32));
  197. return -EINVAL;
  198. }
  199. switch (acc_type) {
  200. case DEBUGFS_READ32:
  201. *val = RREG32(addr - cfg_region->region_base);
  202. break;
  203. case DEBUGFS_WRITE32:
  204. WREG32(addr - cfg_region->region_base, *val);
  205. break;
  206. case DEBUGFS_READ64:
  207. val_l = RREG32(addr - cfg_region->region_base);
  208. val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base);
  209. *val = (((u64) val_h) << 32) | val_l;
  210. break;
  211. case DEBUGFS_WRITE64:
  212. WREG32(addr - cfg_region->region_base, lower_32_bits(*val));
  213. WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val));
  214. break;
  215. default:
  216. dev_err(hdev->dev, "access type %d is not supported\n", acc_type);
  217. return -EOPNOTSUPP;
  218. }
  219. return 0;
  220. }
  221. /*
  222. * hl_access_dev_mem - access device memory
  223. *
  224. * @hdev: pointer to habanalabs device structure
  225. * @region_type: the type of the region the address belongs to
  226. * @addr: the address to access
  227. * @val: the value to write from or read to
  228. * @acc_type: the type of access (r/w, 32/64)
  229. */
  230. int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
  231. u64 addr, u64 *val, enum debugfs_access_type acc_type)
  232. {
  233. switch (region_type) {
  234. case PCI_REGION_CFG:
  235. return hl_access_cfg_region(hdev, addr, val, acc_type);
  236. case PCI_REGION_SRAM:
  237. case PCI_REGION_DRAM:
  238. return hl_access_sram_dram_region(hdev, addr, val, acc_type,
  239. region_type);
  240. default:
  241. return -EFAULT;
  242. }
  243. return 0;
  244. }
  245. void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...)
  246. {
  247. va_list args;
  248. int str_size;
  249. va_start(args, fmt);
  250. /* Calculate formatted string length. Assuming each string is null terminated, hence
  251. * increment result by 1
  252. */
  253. str_size = vsnprintf(NULL, 0, fmt, args) + 1;
  254. va_end(args);
  255. if ((e->actual_size + str_size) < e->allocated_buf_size) {
  256. va_start(args, fmt);
  257. vsnprintf(e->buf + e->actual_size, str_size, fmt, args);
  258. va_end(args);
  259. }
  260. /* Need to update the size even when not updating destination buffer to get the exact size
  261. * of all input strings
  262. */
  263. e->actual_size += str_size;
  264. }
  265. enum hl_device_status hl_device_status(struct hl_device *hdev)
  266. {
  267. enum hl_device_status status;
  268. if (hdev->reset_info.in_reset) {
  269. if (hdev->reset_info.in_compute_reset)
  270. status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
  271. else
  272. status = HL_DEVICE_STATUS_IN_RESET;
  273. } else if (hdev->reset_info.needs_reset) {
  274. status = HL_DEVICE_STATUS_NEEDS_RESET;
  275. } else if (hdev->disabled) {
  276. status = HL_DEVICE_STATUS_MALFUNCTION;
  277. } else if (!hdev->init_done) {
  278. status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
  279. } else {
  280. status = HL_DEVICE_STATUS_OPERATIONAL;
  281. }
  282. return status;
  283. }
  284. bool hl_device_operational(struct hl_device *hdev,
  285. enum hl_device_status *status)
  286. {
  287. enum hl_device_status current_status;
  288. current_status = hl_device_status(hdev);
  289. if (status)
  290. *status = current_status;
  291. switch (current_status) {
  292. case HL_DEVICE_STATUS_IN_RESET:
  293. case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
  294. case HL_DEVICE_STATUS_MALFUNCTION:
  295. case HL_DEVICE_STATUS_NEEDS_RESET:
  296. return false;
  297. case HL_DEVICE_STATUS_OPERATIONAL:
  298. case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
  299. default:
  300. return true;
  301. }
  302. }
  303. static void hpriv_release(struct kref *ref)
  304. {
  305. u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
  306. bool device_is_idle = true;
  307. struct hl_fpriv *hpriv;
  308. struct hl_device *hdev;
  309. hpriv = container_of(ref, struct hl_fpriv, refcount);
  310. hdev = hpriv->hdev;
  311. hdev->asic_funcs->send_device_activity(hdev, false);
  312. put_pid(hpriv->taskpid);
  313. hl_debugfs_remove_file(hpriv);
  314. mutex_destroy(&hpriv->ctx_lock);
  315. mutex_destroy(&hpriv->restore_phase_mutex);
  316. if ((!hdev->pldm) && (hdev->pdev) &&
  317. (!hdev->asic_funcs->is_device_idle(hdev,
  318. idle_mask,
  319. HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
  320. dev_err(hdev->dev,
  321. "device not idle after user context is closed (0x%llx_%llx)\n",
  322. idle_mask[1], idle_mask[0]);
  323. device_is_idle = false;
  324. }
  325. /* We need to remove the user from the list to make sure the reset process won't
  326. * try to kill the user process. Because, if we got here, it means there are no
  327. * more driver/device resources that the user process is occupying so there is
  328. * no need to kill it
  329. *
  330. * However, we can't set the compute_ctx to NULL at this stage. This is to prevent
  331. * a race between the release and opening the device again. We don't want to let
  332. * a user open the device while there a reset is about to happen.
  333. */
  334. mutex_lock(&hdev->fpriv_list_lock);
  335. list_del(&hpriv->dev_node);
  336. mutex_unlock(&hdev->fpriv_list_lock);
  337. if (!device_is_idle || hdev->reset_upon_device_release) {
  338. hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
  339. } else {
  340. int rc = hdev->asic_funcs->scrub_device_mem(hdev);
  341. if (rc)
  342. dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc);
  343. }
  344. /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
  345. * thread, we don't care because the in_reset is marked so if a user will try to open
  346. * the device it will fail on that, even if compute_ctx is false.
  347. */
  348. mutex_lock(&hdev->fpriv_list_lock);
  349. hdev->is_compute_ctx_active = false;
  350. mutex_unlock(&hdev->fpriv_list_lock);
  351. hdev->compute_ctx_in_release = 0;
  352. /* release the eventfd */
  353. if (hpriv->notifier_event.eventfd)
  354. eventfd_ctx_put(hpriv->notifier_event.eventfd);
  355. mutex_destroy(&hpriv->notifier_event.lock);
  356. kfree(hpriv);
  357. }
  358. void hl_hpriv_get(struct hl_fpriv *hpriv)
  359. {
  360. kref_get(&hpriv->refcount);
  361. }
  362. int hl_hpriv_put(struct hl_fpriv *hpriv)
  363. {
  364. return kref_put(&hpriv->refcount, hpriv_release);
  365. }
  366. /*
  367. * hl_device_release - release function for habanalabs device
  368. *
  369. * @inode: pointer to inode structure
  370. * @filp: pointer to file structure
  371. *
  372. * Called when process closes an habanalabs device
  373. */
  374. static int hl_device_release(struct inode *inode, struct file *filp)
  375. {
  376. struct hl_fpriv *hpriv = filp->private_data;
  377. struct hl_device *hdev = hpriv->hdev;
  378. filp->private_data = NULL;
  379. if (!hdev) {
  380. pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
  381. put_pid(hpriv->taskpid);
  382. return 0;
  383. }
  384. /* Each pending user interrupt holds the user's context, hence we
  385. * must release them all before calling hl_ctx_mgr_fini().
  386. */
  387. hl_release_pending_user_interrupts(hpriv->hdev);
  388. hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
  389. hl_mem_mgr_fini(&hpriv->mem_mgr);
  390. hdev->compute_ctx_in_release = 1;
  391. if (!hl_hpriv_put(hpriv))
  392. dev_notice(hdev->dev,
  393. "User process closed FD but device still in use\n");
  394. hdev->last_open_session_duration_jif =
  395. jiffies - hdev->last_successful_open_jif;
  396. return 0;
  397. }
  398. static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
  399. {
  400. struct hl_fpriv *hpriv = filp->private_data;
  401. struct hl_device *hdev = hpriv->hdev;
  402. filp->private_data = NULL;
  403. if (!hdev) {
  404. pr_err("Closing FD after device was removed\n");
  405. goto out;
  406. }
  407. mutex_lock(&hdev->fpriv_ctrl_list_lock);
  408. list_del(&hpriv->dev_node);
  409. mutex_unlock(&hdev->fpriv_ctrl_list_lock);
  410. out:
  411. /* release the eventfd */
  412. if (hpriv->notifier_event.eventfd)
  413. eventfd_ctx_put(hpriv->notifier_event.eventfd);
  414. mutex_destroy(&hpriv->notifier_event.lock);
  415. put_pid(hpriv->taskpid);
  416. kfree(hpriv);
  417. return 0;
  418. }
  419. /*
  420. * hl_mmap - mmap function for habanalabs device
  421. *
  422. * @*filp: pointer to file structure
  423. * @*vma: pointer to vm_area_struct of the process
  424. *
  425. * Called when process does an mmap on habanalabs device. Call the relevant mmap
  426. * function at the end of the common code.
  427. */
  428. static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
  429. {
  430. struct hl_fpriv *hpriv = filp->private_data;
  431. struct hl_device *hdev = hpriv->hdev;
  432. unsigned long vm_pgoff;
  433. if (!hdev) {
  434. pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
  435. return -ENODEV;
  436. }
  437. vm_pgoff = vma->vm_pgoff;
  438. switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
  439. case HL_MMAP_TYPE_BLOCK:
  440. vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
  441. return hl_hw_block_mmap(hpriv, vma);
  442. case HL_MMAP_TYPE_CB:
  443. case HL_MMAP_TYPE_TS_BUFF:
  444. return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL);
  445. }
  446. return -EINVAL;
  447. }
  448. static const struct file_operations hl_ops = {
  449. .owner = THIS_MODULE,
  450. .open = hl_device_open,
  451. .release = hl_device_release,
  452. .mmap = hl_mmap,
  453. .unlocked_ioctl = hl_ioctl,
  454. .compat_ioctl = hl_ioctl
  455. };
  456. static const struct file_operations hl_ctrl_ops = {
  457. .owner = THIS_MODULE,
  458. .open = hl_device_open_ctrl,
  459. .release = hl_device_release_ctrl,
  460. .unlocked_ioctl = hl_ioctl_control,
  461. .compat_ioctl = hl_ioctl_control
  462. };
  463. static void device_release_func(struct device *dev)
  464. {
  465. kfree(dev);
  466. }
  467. /*
  468. * device_init_cdev - Initialize cdev and device for habanalabs device
  469. *
  470. * @hdev: pointer to habanalabs device structure
  471. * @hclass: pointer to the class object of the device
  472. * @minor: minor number of the specific device
  473. * @fpos: file operations to install for this device
  474. * @name: name of the device as it will appear in the filesystem
  475. * @cdev: pointer to the char device object that will be initialized
  476. * @dev: pointer to the device object that will be initialized
  477. *
  478. * Initialize a cdev and a Linux device for habanalabs's device.
  479. */
  480. static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
  481. int minor, const struct file_operations *fops,
  482. char *name, struct cdev *cdev,
  483. struct device **dev)
  484. {
  485. cdev_init(cdev, fops);
  486. cdev->owner = THIS_MODULE;
  487. *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
  488. if (!*dev)
  489. return -ENOMEM;
  490. device_initialize(*dev);
  491. (*dev)->devt = MKDEV(hdev->major, minor);
  492. (*dev)->class = hclass;
  493. (*dev)->release = device_release_func;
  494. dev_set_drvdata(*dev, hdev);
  495. dev_set_name(*dev, "%s", name);
  496. return 0;
  497. }
  498. static int device_cdev_sysfs_add(struct hl_device *hdev)
  499. {
  500. int rc;
  501. rc = cdev_device_add(&hdev->cdev, hdev->dev);
  502. if (rc) {
  503. dev_err(hdev->dev,
  504. "failed to add a char device to the system\n");
  505. return rc;
  506. }
  507. rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
  508. if (rc) {
  509. dev_err(hdev->dev,
  510. "failed to add a control char device to the system\n");
  511. goto delete_cdev_device;
  512. }
  513. /* hl_sysfs_init() must be done after adding the device to the system */
  514. rc = hl_sysfs_init(hdev);
  515. if (rc) {
  516. dev_err(hdev->dev, "failed to initialize sysfs\n");
  517. goto delete_ctrl_cdev_device;
  518. }
  519. hdev->cdev_sysfs_created = true;
  520. return 0;
  521. delete_ctrl_cdev_device:
  522. cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
  523. delete_cdev_device:
  524. cdev_device_del(&hdev->cdev, hdev->dev);
  525. return rc;
  526. }
  527. static void device_cdev_sysfs_del(struct hl_device *hdev)
  528. {
  529. if (!hdev->cdev_sysfs_created)
  530. goto put_devices;
  531. hl_sysfs_fini(hdev);
  532. cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
  533. cdev_device_del(&hdev->cdev, hdev->dev);
  534. put_devices:
  535. put_device(hdev->dev);
  536. put_device(hdev->dev_ctrl);
  537. }
  538. static void device_hard_reset_pending(struct work_struct *work)
  539. {
  540. struct hl_device_reset_work *device_reset_work =
  541. container_of(work, struct hl_device_reset_work, reset_work.work);
  542. struct hl_device *hdev = device_reset_work->hdev;
  543. u32 flags;
  544. int rc;
  545. flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR;
  546. rc = hl_device_reset(hdev, flags);
  547. if ((rc == -EBUSY) && !hdev->device_fini_pending) {
  548. dev_info(hdev->dev,
  549. "Could not reset device. will try again in %u seconds",
  550. HL_PENDING_RESET_PER_SEC);
  551. queue_delayed_work(device_reset_work->wq,
  552. &device_reset_work->reset_work,
  553. msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
  554. }
  555. }
  556. /*
  557. * device_early_init - do some early initialization for the habanalabs device
  558. *
  559. * @hdev: pointer to habanalabs device structure
  560. *
  561. * Install the relevant function pointers and call the early_init function,
  562. * if such a function exists
  563. */
  564. static int device_early_init(struct hl_device *hdev)
  565. {
  566. int i, rc;
  567. char workq_name[32];
  568. switch (hdev->asic_type) {
  569. case ASIC_GOYA:
  570. goya_set_asic_funcs(hdev);
  571. strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
  572. break;
  573. case ASIC_GAUDI:
  574. gaudi_set_asic_funcs(hdev);
  575. strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
  576. break;
  577. case ASIC_GAUDI_SEC:
  578. gaudi_set_asic_funcs(hdev);
  579. strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
  580. break;
  581. case ASIC_GAUDI2:
  582. gaudi2_set_asic_funcs(hdev);
  583. strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name));
  584. break;
  585. case ASIC_GAUDI2_SEC:
  586. gaudi2_set_asic_funcs(hdev);
  587. strscpy(hdev->asic_name, "GAUDI2 SEC", sizeof(hdev->asic_name));
  588. break;
  589. default:
  590. dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
  591. hdev->asic_type);
  592. return -EINVAL;
  593. }
  594. rc = hdev->asic_funcs->early_init(hdev);
  595. if (rc)
  596. return rc;
  597. rc = hl_asid_init(hdev);
  598. if (rc)
  599. goto early_fini;
  600. if (hdev->asic_prop.completion_queues_count) {
  601. hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
  602. sizeof(struct workqueue_struct *),
  603. GFP_KERNEL);
  604. if (!hdev->cq_wq) {
  605. rc = -ENOMEM;
  606. goto asid_fini;
  607. }
  608. }
  609. for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
  610. snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
  611. hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
  612. if (hdev->cq_wq[i] == NULL) {
  613. dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
  614. rc = -ENOMEM;
  615. goto free_cq_wq;
  616. }
  617. }
  618. hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
  619. if (hdev->eq_wq == NULL) {
  620. dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
  621. rc = -ENOMEM;
  622. goto free_cq_wq;
  623. }
  624. hdev->cs_cmplt_wq = alloc_workqueue("hl-cs-completions", WQ_UNBOUND, 0);
  625. if (!hdev->cs_cmplt_wq) {
  626. dev_err(hdev->dev,
  627. "Failed to allocate CS completions workqueue\n");
  628. rc = -ENOMEM;
  629. goto free_eq_wq;
  630. }
  631. hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
  632. if (!hdev->ts_free_obj_wq) {
  633. dev_err(hdev->dev,
  634. "Failed to allocate Timestamp registration free workqueue\n");
  635. rc = -ENOMEM;
  636. goto free_cs_cmplt_wq;
  637. }
  638. hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
  639. if (!hdev->pf_wq) {
  640. dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
  641. rc = -ENOMEM;
  642. goto free_ts_free_wq;
  643. }
  644. hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
  645. GFP_KERNEL);
  646. if (!hdev->hl_chip_info) {
  647. rc = -ENOMEM;
  648. goto free_pf_wq;
  649. }
  650. rc = hl_mmu_if_set_funcs(hdev);
  651. if (rc)
  652. goto free_chip_info;
  653. hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr);
  654. hdev->device_reset_work.wq =
  655. create_singlethread_workqueue("hl_device_reset");
  656. if (!hdev->device_reset_work.wq) {
  657. rc = -ENOMEM;
  658. dev_err(hdev->dev, "Failed to create device reset WQ\n");
  659. goto free_cb_mgr;
  660. }
  661. INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
  662. device_hard_reset_pending);
  663. hdev->device_reset_work.hdev = hdev;
  664. hdev->device_fini_pending = 0;
  665. mutex_init(&hdev->send_cpu_message_lock);
  666. mutex_init(&hdev->debug_lock);
  667. INIT_LIST_HEAD(&hdev->cs_mirror_list);
  668. spin_lock_init(&hdev->cs_mirror_lock);
  669. spin_lock_init(&hdev->reset_info.lock);
  670. INIT_LIST_HEAD(&hdev->fpriv_list);
  671. INIT_LIST_HEAD(&hdev->fpriv_ctrl_list);
  672. mutex_init(&hdev->fpriv_list_lock);
  673. mutex_init(&hdev->fpriv_ctrl_list_lock);
  674. mutex_init(&hdev->clk_throttling.lock);
  675. return 0;
  676. free_cb_mgr:
  677. hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
  678. free_chip_info:
  679. kfree(hdev->hl_chip_info);
  680. free_pf_wq:
  681. destroy_workqueue(hdev->pf_wq);
  682. free_ts_free_wq:
  683. destroy_workqueue(hdev->ts_free_obj_wq);
  684. free_cs_cmplt_wq:
  685. destroy_workqueue(hdev->cs_cmplt_wq);
  686. free_eq_wq:
  687. destroy_workqueue(hdev->eq_wq);
  688. free_cq_wq:
  689. for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
  690. if (hdev->cq_wq[i])
  691. destroy_workqueue(hdev->cq_wq[i]);
  692. kfree(hdev->cq_wq);
  693. asid_fini:
  694. hl_asid_fini(hdev);
  695. early_fini:
  696. if (hdev->asic_funcs->early_fini)
  697. hdev->asic_funcs->early_fini(hdev);
  698. return rc;
  699. }
  700. /*
  701. * device_early_fini - finalize all that was done in device_early_init
  702. *
  703. * @hdev: pointer to habanalabs device structure
  704. *
  705. */
  706. static void device_early_fini(struct hl_device *hdev)
  707. {
  708. int i;
  709. mutex_destroy(&hdev->debug_lock);
  710. mutex_destroy(&hdev->send_cpu_message_lock);
  711. mutex_destroy(&hdev->fpriv_list_lock);
  712. mutex_destroy(&hdev->fpriv_ctrl_list_lock);
  713. mutex_destroy(&hdev->clk_throttling.lock);
  714. hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
  715. kfree(hdev->hl_chip_info);
  716. destroy_workqueue(hdev->pf_wq);
  717. destroy_workqueue(hdev->ts_free_obj_wq);
  718. destroy_workqueue(hdev->cs_cmplt_wq);
  719. destroy_workqueue(hdev->eq_wq);
  720. destroy_workqueue(hdev->device_reset_work.wq);
  721. for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
  722. destroy_workqueue(hdev->cq_wq[i]);
  723. kfree(hdev->cq_wq);
  724. hl_asid_fini(hdev);
  725. if (hdev->asic_funcs->early_fini)
  726. hdev->asic_funcs->early_fini(hdev);
  727. }
  728. static bool is_pci_link_healthy(struct hl_device *hdev)
  729. {
  730. u16 vendor_id;
  731. if (!hdev->pdev)
  732. return false;
  733. pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id);
  734. return (vendor_id == PCI_VENDOR_ID_HABANALABS);
  735. }
  736. static void hl_device_heartbeat(struct work_struct *work)
  737. {
  738. struct hl_device *hdev = container_of(work, struct hl_device,
  739. work_heartbeat.work);
  740. if (!hl_device_operational(hdev, NULL))
  741. goto reschedule;
  742. if (!hdev->asic_funcs->send_heartbeat(hdev))
  743. goto reschedule;
  744. if (hl_device_operational(hdev, NULL))
  745. dev_err(hdev->dev, "Device heartbeat failed! PCI link is %s\n",
  746. is_pci_link_healthy(hdev) ? "healthy" : "broken");
  747. hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
  748. return;
  749. reschedule:
  750. /*
  751. * prev_reset_trigger tracks consecutive fatal h/w errors until first
  752. * heartbeat immediately post reset.
  753. * If control reached here, then at least one heartbeat work has been
  754. * scheduled since last reset/init cycle.
  755. * So if the device is not already in reset cycle, reset the flag
  756. * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
  757. * status for at least one heartbeat. From this point driver restarts
  758. * tracking future consecutive fatal errors.
  759. */
  760. if (!hdev->reset_info.in_reset)
  761. hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
  762. schedule_delayed_work(&hdev->work_heartbeat,
  763. usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
  764. }
  765. /*
  766. * device_late_init - do late stuff initialization for the habanalabs device
  767. *
  768. * @hdev: pointer to habanalabs device structure
  769. *
  770. * Do stuff that either needs the device H/W queues to be active or needs
  771. * to happen after all the rest of the initialization is finished
  772. */
  773. static int device_late_init(struct hl_device *hdev)
  774. {
  775. int rc;
  776. if (hdev->asic_funcs->late_init) {
  777. rc = hdev->asic_funcs->late_init(hdev);
  778. if (rc) {
  779. dev_err(hdev->dev,
  780. "failed late initialization for the H/W\n");
  781. return rc;
  782. }
  783. }
  784. hdev->high_pll = hdev->asic_prop.high_pll;
  785. if (hdev->heartbeat) {
  786. INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
  787. schedule_delayed_work(&hdev->work_heartbeat,
  788. usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
  789. }
  790. hdev->late_init_done = true;
  791. return 0;
  792. }
  793. /*
  794. * device_late_fini - finalize all that was done in device_late_init
  795. *
  796. * @hdev: pointer to habanalabs device structure
  797. *
  798. */
  799. static void device_late_fini(struct hl_device *hdev)
  800. {
  801. if (!hdev->late_init_done)
  802. return;
  803. if (hdev->heartbeat)
  804. cancel_delayed_work_sync(&hdev->work_heartbeat);
  805. if (hdev->asic_funcs->late_fini)
  806. hdev->asic_funcs->late_fini(hdev);
  807. hdev->late_init_done = false;
  808. }
  809. int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
  810. {
  811. u64 max_power, curr_power, dc_power, dividend;
  812. int rc;
  813. max_power = hdev->max_power;
  814. dc_power = hdev->asic_prop.dc_power_default;
  815. rc = hl_fw_cpucp_power_get(hdev, &curr_power);
  816. if (rc)
  817. return rc;
  818. curr_power = clamp(curr_power, dc_power, max_power);
  819. dividend = (curr_power - dc_power) * 100;
  820. *utilization = (u32) div_u64(dividend, (max_power - dc_power));
  821. return 0;
  822. }
  823. int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable)
  824. {
  825. int rc = 0;
  826. mutex_lock(&hdev->debug_lock);
  827. if (!enable) {
  828. if (!hdev->in_debug) {
  829. dev_err(hdev->dev,
  830. "Failed to disable debug mode because device was not in debug mode\n");
  831. rc = -EFAULT;
  832. goto out;
  833. }
  834. if (!hdev->reset_info.hard_reset_pending)
  835. hdev->asic_funcs->halt_coresight(hdev, ctx);
  836. hdev->in_debug = 0;
  837. goto out;
  838. }
  839. if (hdev->in_debug) {
  840. dev_err(hdev->dev,
  841. "Failed to enable debug mode because device is already in debug mode\n");
  842. rc = -EFAULT;
  843. goto out;
  844. }
  845. hdev->in_debug = 1;
  846. out:
  847. mutex_unlock(&hdev->debug_lock);
  848. return rc;
  849. }
  850. static void take_release_locks(struct hl_device *hdev)
  851. {
  852. /* Flush anyone that is inside the critical section of enqueue
  853. * jobs to the H/W
  854. */
  855. hdev->asic_funcs->hw_queues_lock(hdev);
  856. hdev->asic_funcs->hw_queues_unlock(hdev);
  857. /* Flush processes that are sending message to CPU */
  858. mutex_lock(&hdev->send_cpu_message_lock);
  859. mutex_unlock(&hdev->send_cpu_message_lock);
  860. /* Flush anyone that is inside device open */
  861. mutex_lock(&hdev->fpriv_list_lock);
  862. mutex_unlock(&hdev->fpriv_list_lock);
  863. mutex_lock(&hdev->fpriv_ctrl_list_lock);
  864. mutex_unlock(&hdev->fpriv_ctrl_list_lock);
  865. }
  866. static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
  867. bool skip_wq_flush)
  868. {
  869. if (hard_reset)
  870. device_late_fini(hdev);
  871. /*
  872. * Halt the engines and disable interrupts so we won't get any more
  873. * completions from H/W and we won't have any accesses from the
  874. * H/W to the host machine
  875. */
  876. hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
  877. /* Go over all the queues, release all CS and their jobs */
  878. hl_cs_rollback_all(hdev, skip_wq_flush);
  879. /* flush the MMU prefetch workqueue */
  880. flush_workqueue(hdev->pf_wq);
  881. /* Release all pending user interrupts, each pending user interrupt
  882. * holds a reference to user context
  883. */
  884. hl_release_pending_user_interrupts(hdev);
  885. }
  886. /*
  887. * hl_device_suspend - initiate device suspend
  888. *
  889. * @hdev: pointer to habanalabs device structure
  890. *
  891. * Puts the hw in the suspend state (all asics).
  892. * Returns 0 for success or an error on failure.
  893. * Called at driver suspend.
  894. */
  895. int hl_device_suspend(struct hl_device *hdev)
  896. {
  897. int rc;
  898. pci_save_state(hdev->pdev);
  899. /* Block future CS/VM/JOB completion operations */
  900. spin_lock(&hdev->reset_info.lock);
  901. if (hdev->reset_info.in_reset) {
  902. spin_unlock(&hdev->reset_info.lock);
  903. dev_err(hdev->dev, "Can't suspend while in reset\n");
  904. return -EIO;
  905. }
  906. hdev->reset_info.in_reset = 1;
  907. spin_unlock(&hdev->reset_info.lock);
  908. /* This blocks all other stuff that is not blocked by in_reset */
  909. hdev->disabled = true;
  910. take_release_locks(hdev);
  911. rc = hdev->asic_funcs->suspend(hdev);
  912. if (rc)
  913. dev_err(hdev->dev,
  914. "Failed to disable PCI access of device CPU\n");
  915. /* Shut down the device */
  916. pci_disable_device(hdev->pdev);
  917. pci_set_power_state(hdev->pdev, PCI_D3hot);
  918. return 0;
  919. }
  920. /*
  921. * hl_device_resume - initiate device resume
  922. *
  923. * @hdev: pointer to habanalabs device structure
  924. *
  925. * Bring the hw back to operating state (all asics).
  926. * Returns 0 for success or an error on failure.
  927. * Called at driver resume.
  928. */
  929. int hl_device_resume(struct hl_device *hdev)
  930. {
  931. int rc;
  932. pci_set_power_state(hdev->pdev, PCI_D0);
  933. pci_restore_state(hdev->pdev);
  934. rc = pci_enable_device_mem(hdev->pdev);
  935. if (rc) {
  936. dev_err(hdev->dev,
  937. "Failed to enable PCI device in resume\n");
  938. return rc;
  939. }
  940. pci_set_master(hdev->pdev);
  941. rc = hdev->asic_funcs->resume(hdev);
  942. if (rc) {
  943. dev_err(hdev->dev, "Failed to resume device after suspend\n");
  944. goto disable_device;
  945. }
  946. /* 'in_reset' was set to true during suspend, now we must clear it in order
  947. * for hard reset to be performed
  948. */
  949. spin_lock(&hdev->reset_info.lock);
  950. hdev->reset_info.in_reset = 0;
  951. spin_unlock(&hdev->reset_info.lock);
  952. rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
  953. if (rc) {
  954. dev_err(hdev->dev, "Failed to reset device during resume\n");
  955. goto disable_device;
  956. }
  957. return 0;
  958. disable_device:
  959. pci_clear_master(hdev->pdev);
  960. pci_disable_device(hdev->pdev);
  961. return rc;
  962. }
  963. static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev)
  964. {
  965. struct task_struct *task = NULL;
  966. struct list_head *fd_list;
  967. struct hl_fpriv *hpriv;
  968. struct mutex *fd_lock;
  969. u32 pending_cnt;
  970. fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
  971. fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
  972. /* Giving time for user to close FD, and for processes that are inside
  973. * hl_device_open to finish
  974. */
  975. if (!list_empty(fd_list))
  976. ssleep(1);
  977. if (timeout) {
  978. pending_cnt = timeout;
  979. } else {
  980. if (hdev->process_kill_trial_cnt) {
  981. /* Processes have been already killed */
  982. pending_cnt = 1;
  983. goto wait_for_processes;
  984. } else {
  985. /* Wait a small period after process kill */
  986. pending_cnt = HL_PENDING_RESET_PER_SEC;
  987. }
  988. }
  989. mutex_lock(fd_lock);
  990. /* This section must be protected because we are dereferencing
  991. * pointers that are freed if the process exits
  992. */
  993. list_for_each_entry(hpriv, fd_list, dev_node) {
  994. task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
  995. if (task) {
  996. dev_info(hdev->dev, "Killing user process pid=%d\n",
  997. task_pid_nr(task));
  998. send_sig(SIGKILL, task, 1);
  999. usleep_range(1000, 10000);
  1000. put_task_struct(task);
  1001. } else {
  1002. /*
  1003. * If we got here, it means that process was killed from outside the driver
  1004. * right after it started looping on fd_list and before get_pid_task, thus
  1005. * we don't need to kill it.
  1006. */
  1007. dev_dbg(hdev->dev,
  1008. "Can't get task struct for user process, assuming process was killed from outside the driver\n");
  1009. }
  1010. }
  1011. mutex_unlock(fd_lock);
  1012. /*
  1013. * We killed the open users, but that doesn't mean they are closed.
  1014. * It could be that they are running a long cleanup phase in the driver
  1015. * e.g. MMU unmappings, or running other long teardown flow even before
  1016. * our cleanup.
  1017. * Therefore we need to wait again to make sure they are closed before
  1018. * continuing with the reset.
  1019. */
  1020. wait_for_processes:
  1021. while ((!list_empty(fd_list)) && (pending_cnt)) {
  1022. dev_dbg(hdev->dev,
  1023. "Waiting for all unmap operations to finish before hard reset\n");
  1024. pending_cnt--;
  1025. ssleep(1);
  1026. }
  1027. /* All processes exited successfully */
  1028. if (list_empty(fd_list))
  1029. return 0;
  1030. /* Give up waiting for processes to exit */
  1031. if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
  1032. return -ETIME;
  1033. hdev->process_kill_trial_cnt++;
  1034. return -EBUSY;
  1035. }
  1036. static void device_disable_open_processes(struct hl_device *hdev, bool control_dev)
  1037. {
  1038. struct list_head *fd_list;
  1039. struct hl_fpriv *hpriv;
  1040. struct mutex *fd_lock;
  1041. fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock;
  1042. fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list;
  1043. mutex_lock(fd_lock);
  1044. list_for_each_entry(hpriv, fd_list, dev_node)
  1045. hpriv->hdev = NULL;
  1046. mutex_unlock(fd_lock);
  1047. }
  1048. static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
  1049. {
  1050. u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
  1051. /*
  1052. * 'reset cause' is being updated here, because getting here
  1053. * means that it's the 1st time and the last time we're here
  1054. * ('in_reset' makes sure of it). This makes sure that
  1055. * 'reset_cause' will continue holding its 1st recorded reason!
  1056. */
  1057. if (flags & HL_DRV_RESET_HEARTBEAT) {
  1058. hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
  1059. cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
  1060. } else if (flags & HL_DRV_RESET_TDR) {
  1061. hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR;
  1062. cur_reset_trigger = HL_DRV_RESET_TDR;
  1063. } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
  1064. hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
  1065. cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
  1066. } else {
  1067. hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
  1068. }
  1069. /*
  1070. * If reset cause is same twice, then reset_trigger_repeated
  1071. * is set and if this reset is due to a fatal FW error
  1072. * device is set to an unstable state.
  1073. */
  1074. if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) {
  1075. hdev->reset_info.prev_reset_trigger = cur_reset_trigger;
  1076. hdev->reset_info.reset_trigger_repeated = 0;
  1077. } else {
  1078. hdev->reset_info.reset_trigger_repeated = 1;
  1079. }
  1080. /* If reset is due to heartbeat, device CPU is no responsive in
  1081. * which case no point sending PCI disable message to it.
  1082. *
  1083. * If F/W is performing the reset, no need to send it a message to disable
  1084. * PCI access
  1085. */
  1086. if ((flags & HL_DRV_RESET_HARD) &&
  1087. !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
  1088. /* Disable PCI access from device F/W so he won't send
  1089. * us additional interrupts. We disable MSI/MSI-X at
  1090. * the halt_engines function and we can't have the F/W
  1091. * sending us interrupts after that. We need to disable
  1092. * the access here because if the device is marked
  1093. * disable, the message won't be send. Also, in case
  1094. * of heartbeat, the device CPU is marked as disable
  1095. * so this message won't be sent
  1096. */
  1097. if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0))
  1098. dev_warn(hdev->dev,
  1099. "Failed to disable PCI access by F/W\n");
  1100. }
  1101. }
  1102. /*
  1103. * hl_device_reset - reset the device
  1104. *
  1105. * @hdev: pointer to habanalabs device structure
  1106. * @flags: reset flags.
  1107. *
  1108. * Block future CS and wait for pending CS to be enqueued
  1109. * Call ASIC H/W fini
  1110. * Flush all completions
  1111. * Re-initialize all internal data structures
  1112. * Call ASIC H/W init, late_init
  1113. * Test queues
  1114. * Enable device
  1115. *
  1116. * Returns 0 for success or an error on failure.
  1117. */
  1118. int hl_device_reset(struct hl_device *hdev, u32 flags)
  1119. {
  1120. bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
  1121. reset_upon_device_release = false, schedule_hard_reset = false,
  1122. skip_wq_flush, delay_reset;
  1123. u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
  1124. struct hl_ctx *ctx;
  1125. int i, rc;
  1126. if (!hdev->init_done) {
  1127. dev_err(hdev->dev, "Can't reset before initialization is done\n");
  1128. return 0;
  1129. }
  1130. hard_reset = !!(flags & HL_DRV_RESET_HARD);
  1131. from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
  1132. fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
  1133. skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
  1134. delay_reset = !!(flags & HL_DRV_RESET_DELAY);
  1135. if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
  1136. hard_instead_soft = true;
  1137. hard_reset = true;
  1138. }
  1139. if (hdev->reset_upon_device_release && (flags & HL_DRV_RESET_DEV_RELEASE)) {
  1140. if (hard_reset) {
  1141. dev_crit(hdev->dev,
  1142. "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n");
  1143. return -EINVAL;
  1144. }
  1145. reset_upon_device_release = true;
  1146. goto do_reset;
  1147. }
  1148. if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) {
  1149. hard_instead_soft = true;
  1150. hard_reset = true;
  1151. }
  1152. if (hard_instead_soft)
  1153. dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n");
  1154. do_reset:
  1155. /* Re-entry of reset thread */
  1156. if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
  1157. goto kill_processes;
  1158. /*
  1159. * Prevent concurrency in this function - only one reset should be
  1160. * done at any given time. Only need to perform this if we didn't
  1161. * get from the dedicated hard reset thread
  1162. */
  1163. if (!from_hard_reset_thread) {
  1164. /* Block future CS/VM/JOB completion operations */
  1165. spin_lock(&hdev->reset_info.lock);
  1166. if (hdev->reset_info.in_reset) {
  1167. /* We only allow scheduling of a hard reset during compute reset */
  1168. if (hard_reset && hdev->reset_info.in_compute_reset)
  1169. hdev->reset_info.hard_reset_schedule_flags = flags;
  1170. spin_unlock(&hdev->reset_info.lock);
  1171. return 0;
  1172. }
  1173. /* This still allows the completion of some KDMA ops
  1174. * Update this before in_reset because in_compute_reset implies we are in reset
  1175. */
  1176. hdev->reset_info.in_compute_reset = !hard_reset;
  1177. hdev->reset_info.in_reset = 1;
  1178. spin_unlock(&hdev->reset_info.lock);
  1179. if (delay_reset)
  1180. usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
  1181. handle_reset_trigger(hdev, flags);
  1182. /* This also blocks future CS/VM/JOB completion operations */
  1183. hdev->disabled = true;
  1184. take_release_locks(hdev);
  1185. if (hard_reset)
  1186. dev_info(hdev->dev, "Going to reset device\n");
  1187. else if (reset_upon_device_release)
  1188. dev_dbg(hdev->dev, "Going to reset device after release by user\n");
  1189. else
  1190. dev_dbg(hdev->dev, "Going to reset engines of inference device\n");
  1191. }
  1192. again:
  1193. if ((hard_reset) && (!from_hard_reset_thread)) {
  1194. hdev->reset_info.hard_reset_pending = true;
  1195. hdev->process_kill_trial_cnt = 0;
  1196. hdev->device_reset_work.flags = flags;
  1197. /*
  1198. * Because the reset function can't run from heartbeat work,
  1199. * we need to call the reset function from a dedicated work.
  1200. */
  1201. queue_delayed_work(hdev->device_reset_work.wq,
  1202. &hdev->device_reset_work.reset_work, 0);
  1203. return 0;
  1204. }
  1205. cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
  1206. kill_processes:
  1207. if (hard_reset) {
  1208. /* Kill processes here after CS rollback. This is because the
  1209. * process can't really exit until all its CSs are done, which
  1210. * is what we do in cs rollback
  1211. */
  1212. rc = device_kill_open_processes(hdev, 0, false);
  1213. if (rc == -EBUSY) {
  1214. if (hdev->device_fini_pending) {
  1215. dev_crit(hdev->dev,
  1216. "%s Failed to kill all open processes, stopping hard reset\n",
  1217. dev_name(&(hdev)->pdev->dev));
  1218. goto out_err;
  1219. }
  1220. /* signal reset thread to reschedule */
  1221. return rc;
  1222. }
  1223. if (rc) {
  1224. dev_crit(hdev->dev,
  1225. "%s Failed to kill all open processes, stopping hard reset\n",
  1226. dev_name(&(hdev)->pdev->dev));
  1227. goto out_err;
  1228. }
  1229. /* Flush the Event queue workers to make sure no other thread is
  1230. * reading or writing to registers during the reset
  1231. */
  1232. flush_workqueue(hdev->eq_wq);
  1233. }
  1234. /* Reset the H/W. It will be in idle state after this returns */
  1235. hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
  1236. if (hard_reset) {
  1237. hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
  1238. /* Release kernel context */
  1239. if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
  1240. hdev->kernel_ctx = NULL;
  1241. hl_vm_fini(hdev);
  1242. hl_mmu_fini(hdev);
  1243. hl_eq_reset(hdev, &hdev->event_queue);
  1244. }
  1245. /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
  1246. hl_hw_queue_reset(hdev, hard_reset);
  1247. for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
  1248. hl_cq_reset(hdev, &hdev->completion_queue[i]);
  1249. /* Make sure the context switch phase will run again */
  1250. ctx = hl_get_compute_ctx(hdev);
  1251. if (ctx) {
  1252. atomic_set(&ctx->thread_ctx_switch_token, 1);
  1253. ctx->thread_ctx_switch_wait_token = 0;
  1254. hl_ctx_put(ctx);
  1255. }
  1256. /* Finished tear-down, starting to re-initialize */
  1257. if (hard_reset) {
  1258. hdev->device_cpu_disabled = false;
  1259. hdev->reset_info.hard_reset_pending = false;
  1260. if (hdev->reset_info.reset_trigger_repeated &&
  1261. (hdev->reset_info.prev_reset_trigger ==
  1262. HL_DRV_RESET_FW_FATAL_ERR)) {
  1263. /* if there 2 back to back resets from FW,
  1264. * ensure driver puts the driver in a unusable state
  1265. */
  1266. dev_crit(hdev->dev,
  1267. "%s Consecutive FW fatal errors received, stopping hard reset\n",
  1268. dev_name(&(hdev)->pdev->dev));
  1269. rc = -EIO;
  1270. goto out_err;
  1271. }
  1272. if (hdev->kernel_ctx) {
  1273. dev_crit(hdev->dev,
  1274. "%s kernel ctx was alive during hard reset, something is terribly wrong\n",
  1275. dev_name(&(hdev)->pdev->dev));
  1276. rc = -EBUSY;
  1277. goto out_err;
  1278. }
  1279. rc = hl_mmu_init(hdev);
  1280. if (rc) {
  1281. dev_err(hdev->dev,
  1282. "Failed to initialize MMU S/W after hard reset\n");
  1283. goto out_err;
  1284. }
  1285. /* Allocate the kernel context */
  1286. hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
  1287. GFP_KERNEL);
  1288. if (!hdev->kernel_ctx) {
  1289. rc = -ENOMEM;
  1290. hl_mmu_fini(hdev);
  1291. goto out_err;
  1292. }
  1293. hdev->is_compute_ctx_active = false;
  1294. rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
  1295. if (rc) {
  1296. dev_err(hdev->dev,
  1297. "failed to init kernel ctx in hard reset\n");
  1298. kfree(hdev->kernel_ctx);
  1299. hdev->kernel_ctx = NULL;
  1300. hl_mmu_fini(hdev);
  1301. goto out_err;
  1302. }
  1303. }
  1304. /* Device is now enabled as part of the initialization requires
  1305. * communication with the device firmware to get information that
  1306. * is required for the initialization itself
  1307. */
  1308. hdev->disabled = false;
  1309. /* F/W security enabled indication might be updated after hard-reset */
  1310. if (hard_reset) {
  1311. rc = hl_fw_read_preboot_status(hdev);
  1312. if (rc)
  1313. goto out_err;
  1314. }
  1315. rc = hdev->asic_funcs->hw_init(hdev);
  1316. if (rc) {
  1317. dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
  1318. goto out_err;
  1319. }
  1320. /* If device is not idle fail the reset process */
  1321. if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
  1322. HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
  1323. dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n",
  1324. idle_mask[1], idle_mask[0]);
  1325. rc = -EIO;
  1326. goto out_err;
  1327. }
  1328. /* Check that the communication with the device is working */
  1329. rc = hdev->asic_funcs->test_queues(hdev);
  1330. if (rc) {
  1331. dev_err(hdev->dev, "Failed to detect if device is alive after reset\n");
  1332. goto out_err;
  1333. }
  1334. if (hard_reset) {
  1335. rc = device_late_init(hdev);
  1336. if (rc) {
  1337. dev_err(hdev->dev, "Failed late init after hard reset\n");
  1338. goto out_err;
  1339. }
  1340. rc = hl_vm_init(hdev);
  1341. if (rc) {
  1342. dev_err(hdev->dev, "Failed to init memory module after hard reset\n");
  1343. goto out_err;
  1344. }
  1345. if (!hdev->asic_prop.fw_security_enabled)
  1346. hl_fw_set_max_power(hdev);
  1347. } else {
  1348. rc = hdev->asic_funcs->compute_reset_late_init(hdev);
  1349. if (rc) {
  1350. if (reset_upon_device_release)
  1351. dev_err(hdev->dev,
  1352. "Failed late init in reset after device release\n");
  1353. else
  1354. dev_err(hdev->dev, "Failed late init after compute reset\n");
  1355. goto out_err;
  1356. }
  1357. }
  1358. rc = hdev->asic_funcs->scrub_device_mem(hdev);
  1359. if (rc) {
  1360. dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc);
  1361. return rc;
  1362. }
  1363. spin_lock(&hdev->reset_info.lock);
  1364. hdev->reset_info.in_compute_reset = 0;
  1365. /* Schedule hard reset only if requested and if not already in hard reset.
  1366. * We keep 'in_reset' enabled, so no other reset can go in during the hard
  1367. * reset schedule
  1368. */
  1369. if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
  1370. schedule_hard_reset = true;
  1371. else
  1372. hdev->reset_info.in_reset = 0;
  1373. spin_unlock(&hdev->reset_info.lock);
  1374. hdev->reset_info.needs_reset = false;
  1375. if (hard_reset)
  1376. dev_info(hdev->dev,
  1377. "Successfully finished resetting the %s device\n",
  1378. dev_name(&(hdev)->pdev->dev));
  1379. else
  1380. dev_dbg(hdev->dev,
  1381. "Successfully finished resetting the %s device\n",
  1382. dev_name(&(hdev)->pdev->dev));
  1383. if (hard_reset) {
  1384. hdev->reset_info.hard_reset_cnt++;
  1385. /* After reset is done, we are ready to receive events from
  1386. * the F/W. We can't do it before because we will ignore events
  1387. * and if those events are fatal, we won't know about it and
  1388. * the device will be operational although it shouldn't be
  1389. */
  1390. hdev->asic_funcs->enable_events_from_fw(hdev);
  1391. } else if (!reset_upon_device_release) {
  1392. hdev->reset_info.compute_reset_cnt++;
  1393. }
  1394. if (schedule_hard_reset) {
  1395. dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
  1396. flags = hdev->reset_info.hard_reset_schedule_flags;
  1397. hdev->reset_info.hard_reset_schedule_flags = 0;
  1398. hdev->disabled = true;
  1399. hard_reset = true;
  1400. handle_reset_trigger(hdev, flags);
  1401. goto again;
  1402. }
  1403. return 0;
  1404. out_err:
  1405. hdev->disabled = true;
  1406. spin_lock(&hdev->reset_info.lock);
  1407. hdev->reset_info.in_compute_reset = 0;
  1408. if (hard_reset) {
  1409. dev_err(hdev->dev,
  1410. "%s Failed to reset! Device is NOT usable\n",
  1411. dev_name(&(hdev)->pdev->dev));
  1412. hdev->reset_info.hard_reset_cnt++;
  1413. } else if (reset_upon_device_release) {
  1414. spin_unlock(&hdev->reset_info.lock);
  1415. dev_err(hdev->dev, "Failed to reset device after user release\n");
  1416. flags |= HL_DRV_RESET_HARD;
  1417. flags &= ~HL_DRV_RESET_DEV_RELEASE;
  1418. hard_reset = true;
  1419. goto again;
  1420. } else {
  1421. spin_unlock(&hdev->reset_info.lock);
  1422. dev_err(hdev->dev, "Failed to do compute reset\n");
  1423. hdev->reset_info.compute_reset_cnt++;
  1424. flags |= HL_DRV_RESET_HARD;
  1425. hard_reset = true;
  1426. goto again;
  1427. }
  1428. hdev->reset_info.in_reset = 0;
  1429. spin_unlock(&hdev->reset_info.lock);
  1430. return rc;
  1431. }
  1432. static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
  1433. {
  1434. mutex_lock(&notifier_event->lock);
  1435. notifier_event->events_mask |= event_mask;
  1436. if (notifier_event->eventfd)
  1437. eventfd_signal(notifier_event->eventfd, 1);
  1438. mutex_unlock(&notifier_event->lock);
  1439. }
  1440. /*
  1441. * hl_notifier_event_send_all - notify all user processes via eventfd
  1442. *
  1443. * @hdev: pointer to habanalabs device structure
  1444. * @event_mask: the occurred event/s
  1445. * Returns 0 for success or an error on failure.
  1446. */
  1447. void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
  1448. {
  1449. struct hl_fpriv *hpriv;
  1450. mutex_lock(&hdev->fpriv_list_lock);
  1451. list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
  1452. hl_notifier_event_send(&hpriv->notifier_event, event_mask);
  1453. mutex_unlock(&hdev->fpriv_list_lock);
  1454. /* control device */
  1455. mutex_lock(&hdev->fpriv_ctrl_list_lock);
  1456. list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node)
  1457. hl_notifier_event_send(&hpriv->notifier_event, event_mask);
  1458. mutex_unlock(&hdev->fpriv_ctrl_list_lock);
  1459. }
  1460. /*
  1461. * hl_device_init - main initialization function for habanalabs device
  1462. *
  1463. * @hdev: pointer to habanalabs device structure
  1464. *
  1465. * Allocate an id for the device, do early initialization and then call the
  1466. * ASIC specific initialization functions. Finally, create the cdev and the
  1467. * Linux device to expose it to the user
  1468. */
  1469. int hl_device_init(struct hl_device *hdev, struct class *hclass)
  1470. {
  1471. int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
  1472. char *name;
  1473. bool add_cdev_sysfs_on_err = false;
  1474. hdev->cdev_idx = hdev->id / 2;
  1475. name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
  1476. if (!name) {
  1477. rc = -ENOMEM;
  1478. goto out_disabled;
  1479. }
  1480. /* Initialize cdev and device structures */
  1481. rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
  1482. &hdev->cdev, &hdev->dev);
  1483. kfree(name);
  1484. if (rc)
  1485. goto out_disabled;
  1486. name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
  1487. if (!name) {
  1488. rc = -ENOMEM;
  1489. goto free_dev;
  1490. }
  1491. /* Initialize cdev and device structures for control device */
  1492. rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
  1493. name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
  1494. kfree(name);
  1495. if (rc)
  1496. goto free_dev;
  1497. /* Initialize ASIC function pointers and perform early init */
  1498. rc = device_early_init(hdev);
  1499. if (rc)
  1500. goto free_dev_ctrl;
  1501. user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count +
  1502. hdev->asic_prop.user_interrupt_count;
  1503. if (user_interrupt_cnt) {
  1504. hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt),
  1505. GFP_KERNEL);
  1506. if (!hdev->user_interrupt) {
  1507. rc = -ENOMEM;
  1508. goto early_fini;
  1509. }
  1510. }
  1511. /*
  1512. * Start calling ASIC initialization. First S/W then H/W and finally
  1513. * late init
  1514. */
  1515. rc = hdev->asic_funcs->sw_init(hdev);
  1516. if (rc)
  1517. goto free_usr_intr_mem;
  1518. /* initialize completion structure for multi CS wait */
  1519. hl_multi_cs_completion_init(hdev);
  1520. /*
  1521. * Initialize the H/W queues. Must be done before hw_init, because
  1522. * there the addresses of the kernel queue are being written to the
  1523. * registers of the device
  1524. */
  1525. rc = hl_hw_queues_create(hdev);
  1526. if (rc) {
  1527. dev_err(hdev->dev, "failed to initialize kernel queues\n");
  1528. goto sw_fini;
  1529. }
  1530. cq_cnt = hdev->asic_prop.completion_queues_count;
  1531. /*
  1532. * Initialize the completion queues. Must be done before hw_init,
  1533. * because there the addresses of the completion queues are being
  1534. * passed as arguments to request_irq
  1535. */
  1536. if (cq_cnt) {
  1537. hdev->completion_queue = kcalloc(cq_cnt,
  1538. sizeof(*hdev->completion_queue),
  1539. GFP_KERNEL);
  1540. if (!hdev->completion_queue) {
  1541. dev_err(hdev->dev,
  1542. "failed to allocate completion queues\n");
  1543. rc = -ENOMEM;
  1544. goto hw_queues_destroy;
  1545. }
  1546. }
  1547. for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
  1548. rc = hl_cq_init(hdev, &hdev->completion_queue[i],
  1549. hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
  1550. if (rc) {
  1551. dev_err(hdev->dev,
  1552. "failed to initialize completion queue\n");
  1553. goto cq_fini;
  1554. }
  1555. hdev->completion_queue[i].cq_idx = i;
  1556. }
  1557. hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs,
  1558. sizeof(struct hl_cs *), GFP_KERNEL);
  1559. if (!hdev->shadow_cs_queue) {
  1560. rc = -ENOMEM;
  1561. goto cq_fini;
  1562. }
  1563. /*
  1564. * Initialize the event queue. Must be done before hw_init,
  1565. * because there the address of the event queue is being
  1566. * passed as argument to request_irq
  1567. */
  1568. rc = hl_eq_init(hdev, &hdev->event_queue);
  1569. if (rc) {
  1570. dev_err(hdev->dev, "failed to initialize event queue\n");
  1571. goto free_shadow_cs_queue;
  1572. }
  1573. /* MMU S/W must be initialized before kernel context is created */
  1574. rc = hl_mmu_init(hdev);
  1575. if (rc) {
  1576. dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
  1577. goto eq_fini;
  1578. }
  1579. /* Allocate the kernel context */
  1580. hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
  1581. if (!hdev->kernel_ctx) {
  1582. rc = -ENOMEM;
  1583. goto mmu_fini;
  1584. }
  1585. hdev->is_compute_ctx_active = false;
  1586. hdev->asic_funcs->state_dump_init(hdev);
  1587. hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL;
  1588. hl_debugfs_add_device(hdev);
  1589. /* debugfs nodes are created in hl_ctx_init so it must be called after
  1590. * hl_debugfs_add_device.
  1591. */
  1592. rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
  1593. if (rc) {
  1594. dev_err(hdev->dev, "failed to initialize kernel context\n");
  1595. kfree(hdev->kernel_ctx);
  1596. goto remove_device_from_debugfs;
  1597. }
  1598. rc = hl_cb_pool_init(hdev);
  1599. if (rc) {
  1600. dev_err(hdev->dev, "failed to initialize CB pool\n");
  1601. goto release_ctx;
  1602. }
  1603. rc = hl_dec_init(hdev);
  1604. if (rc) {
  1605. dev_err(hdev->dev, "Failed to initialize the decoder module\n");
  1606. goto cb_pool_fini;
  1607. }
  1608. /*
  1609. * From this point, override rc (=0) in case of an error to allow
  1610. * debugging (by adding char devices and create sysfs nodes as part of
  1611. * the error flow).
  1612. */
  1613. add_cdev_sysfs_on_err = true;
  1614. /* Device is now enabled as part of the initialization requires
  1615. * communication with the device firmware to get information that
  1616. * is required for the initialization itself
  1617. */
  1618. hdev->disabled = false;
  1619. rc = hdev->asic_funcs->hw_init(hdev);
  1620. if (rc) {
  1621. dev_err(hdev->dev, "failed to initialize the H/W\n");
  1622. rc = 0;
  1623. goto out_disabled;
  1624. }
  1625. /* Check that the communication with the device is working */
  1626. rc = hdev->asic_funcs->test_queues(hdev);
  1627. if (rc) {
  1628. dev_err(hdev->dev, "Failed to detect if device is alive\n");
  1629. rc = 0;
  1630. goto out_disabled;
  1631. }
  1632. rc = device_late_init(hdev);
  1633. if (rc) {
  1634. dev_err(hdev->dev, "Failed late initialization\n");
  1635. rc = 0;
  1636. goto out_disabled;
  1637. }
  1638. dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
  1639. hdev->asic_name,
  1640. hdev->asic_prop.dram_size / SZ_1G);
  1641. rc = hl_vm_init(hdev);
  1642. if (rc) {
  1643. dev_err(hdev->dev, "Failed to initialize memory module\n");
  1644. rc = 0;
  1645. goto out_disabled;
  1646. }
  1647. /*
  1648. * Expose devices and sysfs nodes to user.
  1649. * From here there is no need to add char devices and create sysfs nodes
  1650. * in case of an error.
  1651. */
  1652. add_cdev_sysfs_on_err = false;
  1653. rc = device_cdev_sysfs_add(hdev);
  1654. if (rc) {
  1655. dev_err(hdev->dev,
  1656. "Failed to add char devices and sysfs nodes\n");
  1657. rc = 0;
  1658. goto out_disabled;
  1659. }
  1660. /* Need to call this again because the max power might change,
  1661. * depending on card type for certain ASICs
  1662. */
  1663. if (hdev->asic_prop.set_max_power_on_device_init &&
  1664. !hdev->asic_prop.fw_security_enabled)
  1665. hl_fw_set_max_power(hdev);
  1666. /*
  1667. * hl_hwmon_init() must be called after device_late_init(), because only
  1668. * there we get the information from the device about which
  1669. * hwmon-related sensors the device supports.
  1670. * Furthermore, it must be done after adding the device to the system.
  1671. */
  1672. rc = hl_hwmon_init(hdev);
  1673. if (rc) {
  1674. dev_err(hdev->dev, "Failed to initialize hwmon\n");
  1675. rc = 0;
  1676. goto out_disabled;
  1677. }
  1678. dev_notice(hdev->dev,
  1679. "Successfully added device %s to habanalabs driver\n",
  1680. dev_name(&(hdev)->pdev->dev));
  1681. hdev->init_done = true;
  1682. /* After initialization is done, we are ready to receive events from
  1683. * the F/W. We can't do it before because we will ignore events and if
  1684. * those events are fatal, we won't know about it and the device will
  1685. * be operational although it shouldn't be
  1686. */
  1687. hdev->asic_funcs->enable_events_from_fw(hdev);
  1688. return 0;
  1689. cb_pool_fini:
  1690. hl_cb_pool_fini(hdev);
  1691. release_ctx:
  1692. if (hl_ctx_put(hdev->kernel_ctx) != 1)
  1693. dev_err(hdev->dev,
  1694. "kernel ctx is still alive on initialization failure\n");
  1695. remove_device_from_debugfs:
  1696. hl_debugfs_remove_device(hdev);
  1697. mmu_fini:
  1698. hl_mmu_fini(hdev);
  1699. eq_fini:
  1700. hl_eq_fini(hdev, &hdev->event_queue);
  1701. free_shadow_cs_queue:
  1702. kfree(hdev->shadow_cs_queue);
  1703. cq_fini:
  1704. for (i = 0 ; i < cq_ready_cnt ; i++)
  1705. hl_cq_fini(hdev, &hdev->completion_queue[i]);
  1706. kfree(hdev->completion_queue);
  1707. hw_queues_destroy:
  1708. hl_hw_queues_destroy(hdev);
  1709. sw_fini:
  1710. hdev->asic_funcs->sw_fini(hdev);
  1711. free_usr_intr_mem:
  1712. kfree(hdev->user_interrupt);
  1713. early_fini:
  1714. device_early_fini(hdev);
  1715. free_dev_ctrl:
  1716. put_device(hdev->dev_ctrl);
  1717. free_dev:
  1718. put_device(hdev->dev);
  1719. out_disabled:
  1720. hdev->disabled = true;
  1721. if (add_cdev_sysfs_on_err)
  1722. device_cdev_sysfs_add(hdev);
  1723. if (hdev->pdev)
  1724. dev_err(&hdev->pdev->dev,
  1725. "Failed to initialize hl%d. Device %s is NOT usable !\n",
  1726. hdev->cdev_idx, dev_name(&(hdev)->pdev->dev));
  1727. else
  1728. pr_err("Failed to initialize hl%d. Device %s is NOT usable !\n",
  1729. hdev->cdev_idx, dev_name(&(hdev)->pdev->dev));
  1730. return rc;
  1731. }
  1732. /*
  1733. * hl_device_fini - main tear-down function for habanalabs device
  1734. *
  1735. * @hdev: pointer to habanalabs device structure
  1736. *
  1737. * Destroy the device, call ASIC fini functions and release the id
  1738. */
  1739. void hl_device_fini(struct hl_device *hdev)
  1740. {
  1741. bool device_in_reset;
  1742. ktime_t timeout;
  1743. u64 reset_sec;
  1744. int i, rc;
  1745. dev_info(hdev->dev, "Removing device\n");
  1746. hdev->device_fini_pending = 1;
  1747. flush_delayed_work(&hdev->device_reset_work.reset_work);
  1748. if (hdev->pldm)
  1749. reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
  1750. else
  1751. reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
  1752. /*
  1753. * This function is competing with the reset function, so try to
  1754. * take the reset atomic and if we are already in middle of reset,
  1755. * wait until reset function is finished. Reset function is designed
  1756. * to always finish. However, in Gaudi, because of all the network
  1757. * ports, the hard reset could take between 10-30 seconds
  1758. */
  1759. timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
  1760. spin_lock(&hdev->reset_info.lock);
  1761. device_in_reset = !!hdev->reset_info.in_reset;
  1762. if (!device_in_reset)
  1763. hdev->reset_info.in_reset = 1;
  1764. spin_unlock(&hdev->reset_info.lock);
  1765. while (device_in_reset) {
  1766. usleep_range(50, 200);
  1767. spin_lock(&hdev->reset_info.lock);
  1768. device_in_reset = !!hdev->reset_info.in_reset;
  1769. if (!device_in_reset)
  1770. hdev->reset_info.in_reset = 1;
  1771. spin_unlock(&hdev->reset_info.lock);
  1772. if (ktime_compare(ktime_get(), timeout) > 0) {
  1773. dev_crit(hdev->dev,
  1774. "%s Failed to remove device because reset function did not finish\n",
  1775. dev_name(&(hdev)->pdev->dev));
  1776. return;
  1777. }
  1778. }
  1779. /* Disable PCI access from device F/W so it won't send us additional
  1780. * interrupts. We disable MSI/MSI-X at the halt_engines function and we
  1781. * can't have the F/W sending us interrupts after that. We need to
  1782. * disable the access here because if the device is marked disable, the
  1783. * message won't be send. Also, in case of heartbeat, the device CPU is
  1784. * marked as disable so this message won't be sent
  1785. */
  1786. hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
  1787. /* Mark device as disabled */
  1788. hdev->disabled = true;
  1789. take_release_locks(hdev);
  1790. hdev->reset_info.hard_reset_pending = true;
  1791. hl_hwmon_fini(hdev);
  1792. cleanup_resources(hdev, true, false, false);
  1793. /* Kill processes here after CS rollback. This is because the process
  1794. * can't really exit until all its CSs are done, which is what we
  1795. * do in cs rollback
  1796. */
  1797. dev_info(hdev->dev,
  1798. "Waiting for all processes to exit (timeout of %u seconds)",
  1799. HL_PENDING_RESET_LONG_SEC);
  1800. rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false);
  1801. if (rc) {
  1802. dev_crit(hdev->dev, "Failed to kill all open processes\n");
  1803. device_disable_open_processes(hdev, false);
  1804. }
  1805. rc = device_kill_open_processes(hdev, 0, true);
  1806. if (rc) {
  1807. dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
  1808. device_disable_open_processes(hdev, true);
  1809. }
  1810. hl_cb_pool_fini(hdev);
  1811. /* Reset the H/W. It will be in idle state after this returns */
  1812. hdev->asic_funcs->hw_fini(hdev, true, false);
  1813. hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
  1814. /* Release kernel context */
  1815. if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
  1816. dev_err(hdev->dev, "kernel ctx is still alive\n");
  1817. hl_debugfs_remove_device(hdev);
  1818. hl_dec_fini(hdev);
  1819. hl_vm_fini(hdev);
  1820. hl_mmu_fini(hdev);
  1821. hl_eq_fini(hdev, &hdev->event_queue);
  1822. kfree(hdev->shadow_cs_queue);
  1823. for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
  1824. hl_cq_fini(hdev, &hdev->completion_queue[i]);
  1825. kfree(hdev->completion_queue);
  1826. kfree(hdev->user_interrupt);
  1827. hl_hw_queues_destroy(hdev);
  1828. /* Call ASIC S/W finalize function */
  1829. hdev->asic_funcs->sw_fini(hdev);
  1830. device_early_fini(hdev);
  1831. /* Hide devices and sysfs nodes from user */
  1832. device_cdev_sysfs_del(hdev);
  1833. pr_info("removed device successfully\n");
  1834. }
  1835. /*
  1836. * MMIO register access helper functions.
  1837. */
  1838. /*
  1839. * hl_rreg - Read an MMIO register
  1840. *
  1841. * @hdev: pointer to habanalabs device structure
  1842. * @reg: MMIO register offset (in bytes)
  1843. *
  1844. * Returns the value of the MMIO register we are asked to read
  1845. *
  1846. */
  1847. inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
  1848. {
  1849. return readl(hdev->rmmio + reg);
  1850. }
  1851. /*
  1852. * hl_wreg - Write to an MMIO register
  1853. *
  1854. * @hdev: pointer to habanalabs device structure
  1855. * @reg: MMIO register offset (in bytes)
  1856. * @val: 32-bit value
  1857. *
  1858. * Writes the 32-bit value into the MMIO register
  1859. *
  1860. */
  1861. inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
  1862. {
  1863. writel(val, hdev->rmmio + reg);
  1864. }