card_base.c 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /**
  3. * IBM Accelerator Family 'GenWQE'
  4. *
  5. * (C) Copyright IBM Corp. 2013
  6. *
  7. * Author: Frank Haverkamp <[email protected]>
  8. * Author: Joerg-Stephan Vogt <[email protected]>
  9. * Author: Michael Jung <[email protected]>
  10. * Author: Michael Ruettger <[email protected]>
  11. */
  12. /*
  13. * Module initialization and PCIe setup. Card health monitoring and
  14. * recovery functionality. Character device creation and deletion are
  15. * controlled from here.
  16. */
  17. #include <linux/types.h>
  18. #include <linux/pci.h>
  19. #include <linux/err.h>
  20. #include <linux/aer.h>
  21. #include <linux/string.h>
  22. #include <linux/sched.h>
  23. #include <linux/wait.h>
  24. #include <linux/delay.h>
  25. #include <linux/dma-mapping.h>
  26. #include <linux/module.h>
  27. #include <linux/notifier.h>
  28. #include <linux/device.h>
  29. #include <linux/log2.h>
  30. #include "card_base.h"
  31. #include "card_ddcb.h"
  32. MODULE_AUTHOR("Frank Haverkamp <[email protected]>");
  33. MODULE_AUTHOR("Michael Ruettger <[email protected]>");
  34. MODULE_AUTHOR("Joerg-Stephan Vogt <[email protected]>");
  35. MODULE_AUTHOR("Michael Jung <[email protected]>");
  36. MODULE_DESCRIPTION("GenWQE Card");
  37. MODULE_VERSION(DRV_VERSION);
  38. MODULE_LICENSE("GPL");
  39. static char genwqe_driver_name[] = GENWQE_DEVNAME;
  40. static struct class *class_genwqe;
  41. static struct dentry *debugfs_genwqe;
  42. static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
  43. /* PCI structure for identifying device by PCI vendor and device ID */
  44. static const struct pci_device_id genwqe_device_table[] = {
  45. { .vendor = PCI_VENDOR_ID_IBM,
  46. .device = PCI_DEVICE_GENWQE,
  47. .subvendor = PCI_SUBVENDOR_ID_IBM,
  48. .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
  49. .class = (PCI_CLASSCODE_GENWQE5 << 8),
  50. .class_mask = ~0,
  51. .driver_data = 0 },
  52. /* Initial SR-IOV bring-up image */
  53. { .vendor = PCI_VENDOR_ID_IBM,
  54. .device = PCI_DEVICE_GENWQE,
  55. .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
  56. .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
  57. .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
  58. .class_mask = ~0,
  59. .driver_data = 0 },
  60. { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */
  61. .device = 0x0000, /* VF Device ID */
  62. .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
  63. .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
  64. .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
  65. .class_mask = ~0,
  66. .driver_data = 0 },
  67. /* Fixed up image */
  68. { .vendor = PCI_VENDOR_ID_IBM,
  69. .device = PCI_DEVICE_GENWQE,
  70. .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
  71. .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
  72. .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
  73. .class_mask = ~0,
  74. .driver_data = 0 },
  75. { .vendor = PCI_VENDOR_ID_IBM, /* VF Vendor ID */
  76. .device = 0x0000, /* VF Device ID */
  77. .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
  78. .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
  79. .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
  80. .class_mask = ~0,
  81. .driver_data = 0 },
  82. /* Even one more ... */
  83. { .vendor = PCI_VENDOR_ID_IBM,
  84. .device = PCI_DEVICE_GENWQE,
  85. .subvendor = PCI_SUBVENDOR_ID_IBM,
  86. .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
  87. .class = (PCI_CLASSCODE_GENWQE5 << 8),
  88. .class_mask = ~0,
  89. .driver_data = 0 },
  90. { 0, } /* 0 terminated list. */
  91. };
  92. MODULE_DEVICE_TABLE(pci, genwqe_device_table);
  93. /**
  94. * genwqe_dev_alloc() - Create and prepare a new card descriptor
  95. *
  96. * Return: Pointer to card descriptor, or ERR_PTR(err) on error
  97. */
  98. static struct genwqe_dev *genwqe_dev_alloc(void)
  99. {
  100. unsigned int i = 0, j;
  101. struct genwqe_dev *cd;
  102. for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
  103. if (genwqe_devices[i] == NULL)
  104. break;
  105. }
  106. if (i >= GENWQE_CARD_NO_MAX)
  107. return ERR_PTR(-ENODEV);
  108. cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
  109. if (!cd)
  110. return ERR_PTR(-ENOMEM);
  111. cd->card_idx = i;
  112. cd->class_genwqe = class_genwqe;
  113. cd->debugfs_genwqe = debugfs_genwqe;
  114. /*
  115. * This comes from kernel config option and can be overritten via
  116. * debugfs.
  117. */
  118. cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
  119. init_waitqueue_head(&cd->queue_waitq);
  120. spin_lock_init(&cd->file_lock);
  121. INIT_LIST_HEAD(&cd->file_list);
  122. cd->card_state = GENWQE_CARD_UNUSED;
  123. spin_lock_init(&cd->print_lock);
  124. cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT;
  125. cd->kill_timeout = GENWQE_KILL_TIMEOUT;
  126. for (j = 0; j < GENWQE_MAX_VFS; j++)
  127. cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC;
  128. genwqe_devices[i] = cd;
  129. return cd;
  130. }
  131. static void genwqe_dev_free(struct genwqe_dev *cd)
  132. {
  133. if (!cd)
  134. return;
  135. genwqe_devices[cd->card_idx] = NULL;
  136. kfree(cd);
  137. }
  138. /**
  139. * genwqe_bus_reset() - Card recovery
  140. * @cd: GenWQE device information
  141. *
  142. * pci_reset_function() will recover the device and ensure that the
  143. * registers are accessible again when it completes with success. If
  144. * not, the card will stay dead and registers will be unaccessible
  145. * still.
  146. */
  147. static int genwqe_bus_reset(struct genwqe_dev *cd)
  148. {
  149. int rc = 0;
  150. struct pci_dev *pci_dev = cd->pci_dev;
  151. void __iomem *mmio;
  152. if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
  153. return -EIO;
  154. mmio = cd->mmio;
  155. cd->mmio = NULL;
  156. pci_iounmap(pci_dev, mmio);
  157. pci_release_mem_regions(pci_dev);
  158. /*
  159. * Firmware/BIOS might change memory mapping during bus reset.
  160. * Settings like enable bus-mastering, ... are backuped and
  161. * restored by the pci_reset_function().
  162. */
  163. dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
  164. rc = pci_reset_function(pci_dev);
  165. if (rc) {
  166. dev_err(&pci_dev->dev,
  167. "[%s] err: failed reset func (rc %d)\n", __func__, rc);
  168. return rc;
  169. }
  170. dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
  171. /*
  172. * Here is the right spot to clear the register read
  173. * failure. pci_bus_reset() does this job in real systems.
  174. */
  175. cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
  176. GENWQE_INJECT_GFIR_FATAL |
  177. GENWQE_INJECT_GFIR_INFO);
  178. rc = pci_request_mem_regions(pci_dev, genwqe_driver_name);
  179. if (rc) {
  180. dev_err(&pci_dev->dev,
  181. "[%s] err: request bars failed (%d)\n", __func__, rc);
  182. return -EIO;
  183. }
  184. cd->mmio = pci_iomap(pci_dev, 0, 0);
  185. if (cd->mmio == NULL) {
  186. dev_err(&pci_dev->dev,
  187. "[%s] err: mapping BAR0 failed\n", __func__);
  188. return -ENOMEM;
  189. }
  190. return 0;
  191. }
  192. /*
  193. * Hardware circumvention section. Certain bitstreams in our test-lab
  194. * had different kinds of problems. Here is where we adjust those
  195. * bitstreams to function will with this version of our device driver.
  196. *
  197. * Thise circumventions are applied to the physical function only.
  198. * The magical numbers below are identifying development/manufacturing
  199. * versions of the bitstream used on the card.
  200. *
  201. * Turn off error reporting for old/manufacturing images.
  202. */
  203. bool genwqe_need_err_masking(struct genwqe_dev *cd)
  204. {
  205. return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
  206. }
  207. static void genwqe_tweak_hardware(struct genwqe_dev *cd)
  208. {
  209. struct pci_dev *pci_dev = cd->pci_dev;
  210. /* Mask FIRs for development images */
  211. if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
  212. ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
  213. dev_warn(&pci_dev->dev,
  214. "FIRs masked due to bitstream %016llx.%016llx\n",
  215. cd->slu_unitcfg, cd->app_unitcfg);
  216. __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
  217. 0xFFFFFFFFFFFFFFFFull);
  218. __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
  219. 0x0000000000000000ull);
  220. }
  221. }
  222. /**
  223. * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
  224. * @cd: GenWQE device information
  225. *
  226. * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
  227. * be ignored. This is e.g. true for the bitstream we gave to the card
  228. * manufacturer, but also for some old bitstreams we released to our
  229. * test-lab.
  230. */
  231. int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
  232. {
  233. return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
  234. }
  235. int genwqe_flash_readback_fails(struct genwqe_dev *cd)
  236. {
  237. return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
  238. }
  239. /**
  240. * genwqe_T_psec() - Calculate PF/VF timeout register content
  241. * @cd: GenWQE device information
  242. *
  243. * Note: From a design perspective it turned out to be a bad idea to
  244. * use codes here to specifiy the frequency/speed values. An old
  245. * driver cannot understand new codes and is therefore always a
  246. * problem. Better is to measure out the value or put the
  247. * speed/frequency directly into a register which is always a valid
  248. * value for old as well as for new software.
  249. */
  250. /* T = 1/f */
  251. static int genwqe_T_psec(struct genwqe_dev *cd)
  252. {
  253. u16 speed; /* 1/f -> 250, 200, 166, 175 */
  254. static const int T[] = { 4000, 5000, 6000, 5714 };
  255. speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
  256. if (speed >= ARRAY_SIZE(T))
  257. return -1; /* illegal value */
  258. return T[speed];
  259. }
  260. /**
  261. * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
  262. * @cd: GenWQE device information
  263. *
  264. * Do this _after_ card_reset() is called. Otherwise the values will
  265. * vanish. The settings need to be done when the queues are inactive.
  266. *
  267. * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
  268. * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
  269. */
  270. static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
  271. {
  272. u32 T = genwqe_T_psec(cd);
  273. u64 x;
  274. if (GENWQE_PF_JOBTIMEOUT_MSEC == 0)
  275. return false;
  276. /* PF: large value needed, flash update 2sec per block */
  277. x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC *
  278. 16000000000uL/(T * 15)) - 10;
  279. genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
  280. 0xff00 | (x & 0xff), 0);
  281. return true;
  282. }
  283. /**
  284. * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
  285. * @cd: GenWQE device information
  286. */
  287. static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
  288. {
  289. struct pci_dev *pci_dev = cd->pci_dev;
  290. unsigned int vf;
  291. u32 T = genwqe_T_psec(cd);
  292. u64 x;
  293. int totalvfs;
  294. totalvfs = pci_sriov_get_totalvfs(pci_dev);
  295. if (totalvfs <= 0)
  296. return false;
  297. for (vf = 0; vf < totalvfs; vf++) {
  298. if (cd->vf_jobtimeout_msec[vf] == 0)
  299. continue;
  300. x = ilog2(cd->vf_jobtimeout_msec[vf] *
  301. 16000000000uL/(T * 15)) - 10;
  302. genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
  303. 0xff00 | (x & 0xff), vf + 1);
  304. }
  305. return true;
  306. }
  307. static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
  308. {
  309. unsigned int type, e = 0;
  310. for (type = 0; type < GENWQE_DBG_UNITS; type++) {
  311. switch (type) {
  312. case GENWQE_DBG_UNIT0:
  313. e = genwqe_ffdc_buff_size(cd, 0);
  314. break;
  315. case GENWQE_DBG_UNIT1:
  316. e = genwqe_ffdc_buff_size(cd, 1);
  317. break;
  318. case GENWQE_DBG_UNIT2:
  319. e = genwqe_ffdc_buff_size(cd, 2);
  320. break;
  321. case GENWQE_DBG_REGS:
  322. e = GENWQE_FFDC_REGS;
  323. break;
  324. }
  325. /* currently support only the debug units mentioned here */
  326. cd->ffdc[type].entries = e;
  327. cd->ffdc[type].regs =
  328. kmalloc_array(e, sizeof(struct genwqe_reg),
  329. GFP_KERNEL);
  330. /*
  331. * regs == NULL is ok, the using code treats this as no regs,
  332. * Printing warning is ok in this case.
  333. */
  334. }
  335. return 0;
  336. }
  337. static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
  338. {
  339. unsigned int type;
  340. for (type = 0; type < GENWQE_DBG_UNITS; type++) {
  341. kfree(cd->ffdc[type].regs);
  342. cd->ffdc[type].regs = NULL;
  343. }
  344. }
  345. static int genwqe_read_ids(struct genwqe_dev *cd)
  346. {
  347. int err = 0;
  348. int slu_id;
  349. struct pci_dev *pci_dev = cd->pci_dev;
  350. cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
  351. if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
  352. dev_err(&pci_dev->dev,
  353. "err: SLUID=%016llx\n", cd->slu_unitcfg);
  354. err = -EIO;
  355. goto out_err;
  356. }
  357. slu_id = genwqe_get_slu_id(cd);
  358. if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
  359. dev_err(&pci_dev->dev,
  360. "err: incompatible SLU Architecture %u\n", slu_id);
  361. err = -ENOENT;
  362. goto out_err;
  363. }
  364. cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
  365. if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
  366. dev_err(&pci_dev->dev,
  367. "err: APPID=%016llx\n", cd->app_unitcfg);
  368. err = -EIO;
  369. goto out_err;
  370. }
  371. genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
  372. /*
  373. * Is access to all registers possible? If we are a VF the
  374. * answer is obvious. If we run fully virtualized, we need to
  375. * check if we can access all registers. If we do not have
  376. * full access we will cause an UR and some informational FIRs
  377. * in the PF, but that should not harm.
  378. */
  379. if (pci_dev->is_virtfn)
  380. cd->is_privileged = 0;
  381. else
  382. cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
  383. != IO_ILLEGAL_VALUE);
  384. out_err:
  385. return err;
  386. }
  387. static int genwqe_start(struct genwqe_dev *cd)
  388. {
  389. int err;
  390. struct pci_dev *pci_dev = cd->pci_dev;
  391. err = genwqe_read_ids(cd);
  392. if (err)
  393. return err;
  394. if (genwqe_is_privileged(cd)) {
  395. /* do this after the tweaks. alloc fail is acceptable */
  396. genwqe_ffdc_buffs_alloc(cd);
  397. genwqe_stop_traps(cd);
  398. /* Collect registers e.g. FIRs, UNITIDs, traces ... */
  399. genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
  400. cd->ffdc[GENWQE_DBG_REGS].entries, 0);
  401. genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
  402. cd->ffdc[GENWQE_DBG_UNIT0].regs,
  403. cd->ffdc[GENWQE_DBG_UNIT0].entries);
  404. genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
  405. cd->ffdc[GENWQE_DBG_UNIT1].regs,
  406. cd->ffdc[GENWQE_DBG_UNIT1].entries);
  407. genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
  408. cd->ffdc[GENWQE_DBG_UNIT2].regs,
  409. cd->ffdc[GENWQE_DBG_UNIT2].entries);
  410. genwqe_start_traps(cd);
  411. if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
  412. dev_warn(&pci_dev->dev,
  413. "[%s] chip reload/recovery!\n", __func__);
  414. /*
  415. * Stealth Mode: Reload chip on either hot
  416. * reset or PERST.
  417. */
  418. cd->softreset = 0x7Cull;
  419. __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
  420. cd->softreset);
  421. err = genwqe_bus_reset(cd);
  422. if (err != 0) {
  423. dev_err(&pci_dev->dev,
  424. "[%s] err: bus reset failed!\n",
  425. __func__);
  426. goto out;
  427. }
  428. /*
  429. * Re-read the IDs because
  430. * it could happen that the bitstream load
  431. * failed!
  432. */
  433. err = genwqe_read_ids(cd);
  434. if (err)
  435. goto out;
  436. }
  437. }
  438. err = genwqe_setup_service_layer(cd); /* does a reset to the card */
  439. if (err != 0) {
  440. dev_err(&pci_dev->dev,
  441. "[%s] err: could not setup servicelayer!\n", __func__);
  442. err = -ENODEV;
  443. goto out;
  444. }
  445. if (genwqe_is_privileged(cd)) { /* code is running _after_ reset */
  446. genwqe_tweak_hardware(cd);
  447. genwqe_setup_pf_jtimer(cd);
  448. genwqe_setup_vf_jtimer(cd);
  449. }
  450. err = genwqe_device_create(cd);
  451. if (err < 0) {
  452. dev_err(&pci_dev->dev,
  453. "err: chdev init failed! (err=%d)\n", err);
  454. goto out_release_service_layer;
  455. }
  456. return 0;
  457. out_release_service_layer:
  458. genwqe_release_service_layer(cd);
  459. out:
  460. if (genwqe_is_privileged(cd))
  461. genwqe_ffdc_buffs_free(cd);
  462. return -EIO;
  463. }
  464. /**
  465. * genwqe_stop() - Stop card operation
  466. * @cd: GenWQE device information
  467. *
  468. * Recovery notes:
  469. * As long as genwqe_thread runs we might access registers during
  470. * error data capture. Same is with the genwqe_health_thread.
  471. * When genwqe_bus_reset() fails this function might called two times:
  472. * first by the genwqe_health_thread() and later by genwqe_remove() to
  473. * unbind the device. We must be able to survive that.
  474. *
  475. * This function must be robust enough to be called twice.
  476. */
  477. static int genwqe_stop(struct genwqe_dev *cd)
  478. {
  479. genwqe_finish_queue(cd); /* no register access */
  480. genwqe_device_remove(cd); /* device removed, procs killed */
  481. genwqe_release_service_layer(cd); /* here genwqe_thread is stopped */
  482. if (genwqe_is_privileged(cd)) {
  483. pci_disable_sriov(cd->pci_dev); /* access pci config space */
  484. genwqe_ffdc_buffs_free(cd);
  485. }
  486. return 0;
  487. }
  488. /**
  489. * genwqe_recover_card() - Try to recover the card if it is possible
  490. * @cd: GenWQE device information
  491. * @fatal_err: Indicate whether to attempt soft reset
  492. *
  493. * If fatal_err is set no register access is possible anymore. It is
  494. * likely that genwqe_start fails in that situation. Proper error
  495. * handling is required in this case.
  496. *
  497. * genwqe_bus_reset() will cause the pci code to call genwqe_remove()
  498. * and later genwqe_probe() for all virtual functions.
  499. */
  500. static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
  501. {
  502. int rc;
  503. struct pci_dev *pci_dev = cd->pci_dev;
  504. genwqe_stop(cd);
  505. /*
  506. * Make sure chip is not reloaded to maintain FFDC. Write SLU
  507. * Reset Register, CPLDReset field to 0.
  508. */
  509. if (!fatal_err) {
  510. cd->softreset = 0x70ull;
  511. __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
  512. }
  513. rc = genwqe_bus_reset(cd);
  514. if (rc != 0) {
  515. dev_err(&pci_dev->dev,
  516. "[%s] err: card recovery impossible!\n", __func__);
  517. return rc;
  518. }
  519. rc = genwqe_start(cd);
  520. if (rc < 0) {
  521. dev_err(&pci_dev->dev,
  522. "[%s] err: failed to launch device!\n", __func__);
  523. return rc;
  524. }
  525. return 0;
  526. }
  527. static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
  528. {
  529. *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
  530. return (*gfir & GFIR_ERR_TRIGGER) &&
  531. genwqe_recovery_on_fatal_gfir_required(cd);
  532. }
  533. /**
  534. * genwqe_fir_checking() - Check the fault isolation registers of the card
  535. * @cd: GenWQE device information
  536. *
  537. * If this code works ok, can be tried out with help of the genwqe_poke tool:
  538. * sudo ./tools/genwqe_poke 0x8 0xfefefefefef
  539. *
  540. * Now the relevant FIRs/sFIRs should be printed out and the driver should
  541. * invoke recovery (devices are removed and readded).
  542. */
  543. static u64 genwqe_fir_checking(struct genwqe_dev *cd)
  544. {
  545. int j, iterations = 0;
  546. u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
  547. u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
  548. struct pci_dev *pci_dev = cd->pci_dev;
  549. healthMonitor:
  550. iterations++;
  551. if (iterations > 16) {
  552. dev_err(&pci_dev->dev, "* exit looping after %d times\n",
  553. iterations);
  554. goto fatal_error;
  555. }
  556. gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
  557. if (gfir != 0x0)
  558. dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
  559. IO_SLC_CFGREG_GFIR, gfir);
  560. if (gfir == IO_ILLEGAL_VALUE)
  561. goto fatal_error;
  562. /*
  563. * Avoid printing when to GFIR bit is on prevents contignous
  564. * printout e.g. for the following bug:
  565. * FIR set without a 2ndary FIR/FIR cannot be cleared
  566. * Comment out the following if to get the prints:
  567. */
  568. if (gfir == 0)
  569. return 0;
  570. gfir_masked = gfir & GFIR_ERR_TRIGGER; /* fatal errors */
  571. for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) { /* 0..2 in zEDC */
  572. /* read the primary FIR (pfir) */
  573. fir_addr = (uid << 24) + 0x08;
  574. fir = __genwqe_readq(cd, fir_addr);
  575. if (fir == 0x0)
  576. continue; /* no error in this unit */
  577. dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
  578. if (fir == IO_ILLEGAL_VALUE)
  579. goto fatal_error;
  580. /* read primary FEC */
  581. fec_addr = (uid << 24) + 0x18;
  582. fec = __genwqe_readq(cd, fec_addr);
  583. dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
  584. if (fec == IO_ILLEGAL_VALUE)
  585. goto fatal_error;
  586. for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
  587. /* secondary fir empty, skip it */
  588. if ((fir & mask) == 0x0)
  589. continue;
  590. sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
  591. sfir = __genwqe_readq(cd, sfir_addr);
  592. if (sfir == IO_ILLEGAL_VALUE)
  593. goto fatal_error;
  594. dev_err(&pci_dev->dev,
  595. "* 0x%08x 0x%016llx\n", sfir_addr, sfir);
  596. sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
  597. sfec = __genwqe_readq(cd, sfec_addr);
  598. if (sfec == IO_ILLEGAL_VALUE)
  599. goto fatal_error;
  600. dev_err(&pci_dev->dev,
  601. "* 0x%08x 0x%016llx\n", sfec_addr, sfec);
  602. gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
  603. if (gfir == IO_ILLEGAL_VALUE)
  604. goto fatal_error;
  605. /* gfir turned on during routine! get out and
  606. start over. */
  607. if ((gfir_masked == 0x0) &&
  608. (gfir & GFIR_ERR_TRIGGER)) {
  609. goto healthMonitor;
  610. }
  611. /* do not clear if we entered with a fatal gfir */
  612. if (gfir_masked == 0x0) {
  613. /* NEW clear by mask the logged bits */
  614. sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
  615. __genwqe_writeq(cd, sfir_addr, sfir);
  616. dev_dbg(&pci_dev->dev,
  617. "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n",
  618. sfir_addr, sfir);
  619. /*
  620. * note, these cannot be error-Firs
  621. * since gfir_masked is 0 after sfir
  622. * was read. Also, it is safe to do
  623. * this write if sfir=0. Still need to
  624. * clear the primary. This just means
  625. * there is no secondary FIR.
  626. */
  627. /* clear by mask the logged bit. */
  628. fir_clr_addr = (uid << 24) + 0x10;
  629. __genwqe_writeq(cd, fir_clr_addr, mask);
  630. dev_dbg(&pci_dev->dev,
  631. "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n",
  632. fir_clr_addr, mask);
  633. }
  634. }
  635. }
  636. gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
  637. if (gfir == IO_ILLEGAL_VALUE)
  638. goto fatal_error;
  639. if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
  640. /*
  641. * Check once more that it didn't go on after all the
  642. * FIRS were cleared.
  643. */
  644. dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
  645. iterations);
  646. goto healthMonitor;
  647. }
  648. return gfir_masked;
  649. fatal_error:
  650. return IO_ILLEGAL_VALUE;
  651. }
  652. /**
  653. * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot
  654. * @pci_dev: PCI device information struct
  655. *
  656. * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this
  657. * reset method will not work in all cases.
  658. *
  659. * Return: 0 on success or error code from pci_set_pcie_reset_state()
  660. */
  661. static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
  662. {
  663. int rc;
  664. /*
  665. * lock pci config space access from userspace,
  666. * save state and issue PCIe fundamental reset
  667. */
  668. pci_cfg_access_lock(pci_dev);
  669. pci_save_state(pci_dev);
  670. rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset);
  671. if (!rc) {
  672. /* keep PCIe reset asserted for 250ms */
  673. msleep(250);
  674. pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset);
  675. /* Wait for 2s to reload flash and train the link */
  676. msleep(2000);
  677. }
  678. pci_restore_state(pci_dev);
  679. pci_cfg_access_unlock(pci_dev);
  680. return rc;
  681. }
  682. static int genwqe_platform_recovery(struct genwqe_dev *cd)
  683. {
  684. struct pci_dev *pci_dev = cd->pci_dev;
  685. int rc;
  686. dev_info(&pci_dev->dev,
  687. "[%s] resetting card for error recovery\n", __func__);
  688. /* Clear out error injection flags */
  689. cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
  690. GENWQE_INJECT_GFIR_FATAL |
  691. GENWQE_INJECT_GFIR_INFO);
  692. genwqe_stop(cd);
  693. /* Try recoverying the card with fundamental reset */
  694. rc = genwqe_pci_fundamental_reset(pci_dev);
  695. if (!rc) {
  696. rc = genwqe_start(cd);
  697. if (!rc)
  698. dev_info(&pci_dev->dev,
  699. "[%s] card recovered\n", __func__);
  700. else
  701. dev_err(&pci_dev->dev,
  702. "[%s] err: cannot start card services! (err=%d)\n",
  703. __func__, rc);
  704. } else {
  705. dev_err(&pci_dev->dev,
  706. "[%s] card reset failed\n", __func__);
  707. }
  708. return rc;
  709. }
  710. /**
  711. * genwqe_reload_bistream() - reload card bitstream
  712. * @cd: GenWQE device information
  713. *
  714. * Set the appropriate register and call fundamental reset to reaload the card
  715. * bitstream.
  716. *
  717. * Return: 0 on success, error code otherwise
  718. */
  719. static int genwqe_reload_bistream(struct genwqe_dev *cd)
  720. {
  721. struct pci_dev *pci_dev = cd->pci_dev;
  722. int rc;
  723. dev_info(&pci_dev->dev,
  724. "[%s] resetting card for bitstream reload\n",
  725. __func__);
  726. genwqe_stop(cd);
  727. /*
  728. * Cause a CPLD reprogram with the 'next_bitstream'
  729. * partition on PCIe hot or fundamental reset
  730. */
  731. __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
  732. (cd->softreset & 0xcull) | 0x70ull);
  733. rc = genwqe_pci_fundamental_reset(pci_dev);
  734. if (rc) {
  735. /*
  736. * A fundamental reset failure can be caused
  737. * by lack of support on the arch, so we just
  738. * log the error and try to start the card
  739. * again.
  740. */
  741. dev_err(&pci_dev->dev,
  742. "[%s] err: failed to reset card for bitstream reload\n",
  743. __func__);
  744. }
  745. rc = genwqe_start(cd);
  746. if (rc) {
  747. dev_err(&pci_dev->dev,
  748. "[%s] err: cannot start card services! (err=%d)\n",
  749. __func__, rc);
  750. return rc;
  751. }
  752. dev_info(&pci_dev->dev,
  753. "[%s] card reloaded\n", __func__);
  754. return 0;
  755. }
  756. /**
  757. * genwqe_health_thread() - Health checking thread
  758. * @data: GenWQE device information
  759. *
  760. * This thread is only started for the PF of the card.
  761. *
  762. * This thread monitors the health of the card. A critical situation
  763. * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
  764. * this case we need to be recovered from outside. Writing to
  765. * registers will very likely not work either.
  766. *
  767. * This thread must only exit if kthread_should_stop() becomes true.
  768. *
  769. * Condition for the health-thread to trigger:
  770. * a) when a kthread_stop() request comes in or
  771. * b) a critical GFIR occured
  772. *
  773. * Informational GFIRs are checked and potentially printed in
  774. * GENWQE_HEALTH_CHECK_INTERVAL seconds.
  775. */
  776. static int genwqe_health_thread(void *data)
  777. {
  778. int rc, should_stop = 0;
  779. struct genwqe_dev *cd = data;
  780. struct pci_dev *pci_dev = cd->pci_dev;
  781. u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
  782. health_thread_begin:
  783. while (!kthread_should_stop()) {
  784. rc = wait_event_interruptible_timeout(cd->health_waitq,
  785. (genwqe_health_check_cond(cd, &gfir) ||
  786. (should_stop = kthread_should_stop())),
  787. GENWQE_HEALTH_CHECK_INTERVAL * HZ);
  788. if (should_stop)
  789. break;
  790. if (gfir == IO_ILLEGAL_VALUE) {
  791. dev_err(&pci_dev->dev,
  792. "[%s] GFIR=%016llx\n", __func__, gfir);
  793. goto fatal_error;
  794. }
  795. slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
  796. if (slu_unitcfg == IO_ILLEGAL_VALUE) {
  797. dev_err(&pci_dev->dev,
  798. "[%s] SLU_UNITCFG=%016llx\n",
  799. __func__, slu_unitcfg);
  800. goto fatal_error;
  801. }
  802. app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
  803. if (app_unitcfg == IO_ILLEGAL_VALUE) {
  804. dev_err(&pci_dev->dev,
  805. "[%s] APP_UNITCFG=%016llx\n",
  806. __func__, app_unitcfg);
  807. goto fatal_error;
  808. }
  809. gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
  810. if (gfir == IO_ILLEGAL_VALUE) {
  811. dev_err(&pci_dev->dev,
  812. "[%s] %s: GFIR=%016llx\n", __func__,
  813. (gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
  814. gfir);
  815. goto fatal_error;
  816. }
  817. gfir_masked = genwqe_fir_checking(cd);
  818. if (gfir_masked == IO_ILLEGAL_VALUE)
  819. goto fatal_error;
  820. /*
  821. * GFIR ErrorTrigger bits set => reset the card!
  822. * Never do this for old/manufacturing images!
  823. */
  824. if ((gfir_masked) && !cd->skip_recovery &&
  825. genwqe_recovery_on_fatal_gfir_required(cd)) {
  826. cd->card_state = GENWQE_CARD_FATAL_ERROR;
  827. rc = genwqe_recover_card(cd, 0);
  828. if (rc < 0) {
  829. /* FIXME Card is unusable and needs unbind! */
  830. goto fatal_error;
  831. }
  832. }
  833. if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) {
  834. /* Userspace requested card bitstream reload */
  835. rc = genwqe_reload_bistream(cd);
  836. if (rc)
  837. goto fatal_error;
  838. }
  839. cd->last_gfir = gfir;
  840. cond_resched();
  841. }
  842. return 0;
  843. fatal_error:
  844. if (cd->use_platform_recovery) {
  845. /*
  846. * Since we use raw accessors, EEH errors won't be detected
  847. * by the platform until we do a non-raw MMIO or config space
  848. * read
  849. */
  850. readq(cd->mmio + IO_SLC_CFGREG_GFIR);
  851. /* We do nothing if the card is going over PCI recovery */
  852. if (pci_channel_offline(pci_dev))
  853. return -EIO;
  854. /*
  855. * If it's supported by the platform, we try a fundamental reset
  856. * to recover from a fatal error. Otherwise, we continue to wait
  857. * for an external recovery procedure to take care of it.
  858. */
  859. rc = genwqe_platform_recovery(cd);
  860. if (!rc)
  861. goto health_thread_begin;
  862. }
  863. dev_err(&pci_dev->dev,
  864. "[%s] card unusable. Please trigger unbind!\n", __func__);
  865. /* Bring down logical devices to inform user space via udev remove. */
  866. cd->card_state = GENWQE_CARD_FATAL_ERROR;
  867. genwqe_stop(cd);
  868. /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */
  869. while (!kthread_should_stop())
  870. cond_resched();
  871. return -EIO;
  872. }
  873. static int genwqe_health_check_start(struct genwqe_dev *cd)
  874. {
  875. int rc;
  876. if (GENWQE_HEALTH_CHECK_INTERVAL <= 0)
  877. return 0; /* valid for disabling the service */
  878. /* moved before request_irq() */
  879. /* init_waitqueue_head(&cd->health_waitq); */
  880. cd->health_thread = kthread_run(genwqe_health_thread, cd,
  881. GENWQE_DEVNAME "%d_health",
  882. cd->card_idx);
  883. if (IS_ERR(cd->health_thread)) {
  884. rc = PTR_ERR(cd->health_thread);
  885. cd->health_thread = NULL;
  886. return rc;
  887. }
  888. return 0;
  889. }
  890. static int genwqe_health_thread_running(struct genwqe_dev *cd)
  891. {
  892. return cd->health_thread != NULL;
  893. }
  894. static int genwqe_health_check_stop(struct genwqe_dev *cd)
  895. {
  896. if (!genwqe_health_thread_running(cd))
  897. return -EIO;
  898. kthread_stop(cd->health_thread);
  899. cd->health_thread = NULL;
  900. return 0;
  901. }
  902. /**
  903. * genwqe_pci_setup() - Allocate PCIe related resources for our card
  904. * @cd: GenWQE device information
  905. */
  906. static int genwqe_pci_setup(struct genwqe_dev *cd)
  907. {
  908. int err;
  909. struct pci_dev *pci_dev = cd->pci_dev;
  910. err = pci_enable_device_mem(pci_dev);
  911. if (err) {
  912. dev_err(&pci_dev->dev,
  913. "err: failed to enable pci memory (err=%d)\n", err);
  914. goto err_out;
  915. }
  916. /* Reserve PCI I/O and memory resources */
  917. err = pci_request_mem_regions(pci_dev, genwqe_driver_name);
  918. if (err) {
  919. dev_err(&pci_dev->dev,
  920. "[%s] err: request bars failed (%d)\n", __func__, err);
  921. err = -EIO;
  922. goto err_disable_device;
  923. }
  924. /* check for 64-bit DMA address supported (DAC) */
  925. /* check for 32-bit DMA address supported (SAC) */
  926. if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)) &&
  927. dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) {
  928. dev_err(&pci_dev->dev,
  929. "err: neither DMA32 nor DMA64 supported\n");
  930. err = -EIO;
  931. goto out_release_resources;
  932. }
  933. pci_set_master(pci_dev);
  934. pci_enable_pcie_error_reporting(pci_dev);
  935. /* EEH recovery requires PCIe fundamental reset */
  936. pci_dev->needs_freset = 1;
  937. /* request complete BAR-0 space (length = 0) */
  938. cd->mmio_len = pci_resource_len(pci_dev, 0);
  939. cd->mmio = pci_iomap(pci_dev, 0, 0);
  940. if (cd->mmio == NULL) {
  941. dev_err(&pci_dev->dev,
  942. "[%s] err: mapping BAR0 failed\n", __func__);
  943. err = -ENOMEM;
  944. goto out_release_resources;
  945. }
  946. cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
  947. if (cd->num_vfs < 0)
  948. cd->num_vfs = 0;
  949. err = genwqe_read_ids(cd);
  950. if (err)
  951. goto out_iounmap;
  952. return 0;
  953. out_iounmap:
  954. pci_iounmap(pci_dev, cd->mmio);
  955. out_release_resources:
  956. pci_release_mem_regions(pci_dev);
  957. err_disable_device:
  958. pci_disable_device(pci_dev);
  959. err_out:
  960. return err;
  961. }
  962. /**
  963. * genwqe_pci_remove() - Free PCIe related resources for our card
  964. * @cd: GenWQE device information
  965. */
  966. static void genwqe_pci_remove(struct genwqe_dev *cd)
  967. {
  968. struct pci_dev *pci_dev = cd->pci_dev;
  969. if (cd->mmio)
  970. pci_iounmap(pci_dev, cd->mmio);
  971. pci_release_mem_regions(pci_dev);
  972. pci_disable_device(pci_dev);
  973. }
  974. /**
  975. * genwqe_probe() - Device initialization
  976. * @pci_dev: PCI device information struct
  977. * @id: PCI device ID
  978. *
  979. * Callable for multiple cards. This function is called on bind.
  980. *
  981. * Return: 0 if succeeded, < 0 when failed
  982. */
  983. static int genwqe_probe(struct pci_dev *pci_dev,
  984. const struct pci_device_id *id)
  985. {
  986. int err;
  987. struct genwqe_dev *cd;
  988. genwqe_init_crc32();
  989. cd = genwqe_dev_alloc();
  990. if (IS_ERR(cd)) {
  991. dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
  992. (int)PTR_ERR(cd));
  993. return PTR_ERR(cd);
  994. }
  995. dev_set_drvdata(&pci_dev->dev, cd);
  996. cd->pci_dev = pci_dev;
  997. err = genwqe_pci_setup(cd);
  998. if (err < 0) {
  999. dev_err(&pci_dev->dev,
  1000. "err: problems with PCI setup (err=%d)\n", err);
  1001. goto out_free_dev;
  1002. }
  1003. err = genwqe_start(cd);
  1004. if (err < 0) {
  1005. dev_err(&pci_dev->dev,
  1006. "err: cannot start card services! (err=%d)\n", err);
  1007. goto out_pci_remove;
  1008. }
  1009. if (genwqe_is_privileged(cd)) {
  1010. err = genwqe_health_check_start(cd);
  1011. if (err < 0) {
  1012. dev_err(&pci_dev->dev,
  1013. "err: cannot start health checking! (err=%d)\n",
  1014. err);
  1015. goto out_stop_services;
  1016. }
  1017. }
  1018. return 0;
  1019. out_stop_services:
  1020. genwqe_stop(cd);
  1021. out_pci_remove:
  1022. genwqe_pci_remove(cd);
  1023. out_free_dev:
  1024. genwqe_dev_free(cd);
  1025. return err;
  1026. }
  1027. /**
  1028. * genwqe_remove() - Called when device is removed (hot-plugable)
  1029. * @pci_dev: PCI device information struct
  1030. *
  1031. * Or when driver is unloaded respecitively when unbind is done.
  1032. */
  1033. static void genwqe_remove(struct pci_dev *pci_dev)
  1034. {
  1035. struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
  1036. genwqe_health_check_stop(cd);
  1037. /*
  1038. * genwqe_stop() must survive if it is called twice
  1039. * sequentially. This happens when the health thread calls it
  1040. * and fails on genwqe_bus_reset().
  1041. */
  1042. genwqe_stop(cd);
  1043. genwqe_pci_remove(cd);
  1044. genwqe_dev_free(cd);
  1045. }
  1046. /**
  1047. * genwqe_err_error_detected() - Error detection callback
  1048. * @pci_dev: PCI device information struct
  1049. * @state: PCI channel state
  1050. *
  1051. * This callback is called by the PCI subsystem whenever a PCI bus
  1052. * error is detected.
  1053. */
  1054. static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
  1055. pci_channel_state_t state)
  1056. {
  1057. struct genwqe_dev *cd;
  1058. dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
  1059. cd = dev_get_drvdata(&pci_dev->dev);
  1060. if (cd == NULL)
  1061. return PCI_ERS_RESULT_DISCONNECT;
  1062. /* Stop the card */
  1063. genwqe_health_check_stop(cd);
  1064. genwqe_stop(cd);
  1065. /*
  1066. * On permanent failure, the PCI code will call device remove
  1067. * after the return of this function.
  1068. * genwqe_stop() can be called twice.
  1069. */
  1070. if (state == pci_channel_io_perm_failure) {
  1071. return PCI_ERS_RESULT_DISCONNECT;
  1072. } else {
  1073. genwqe_pci_remove(cd);
  1074. return PCI_ERS_RESULT_NEED_RESET;
  1075. }
  1076. }
  1077. static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
  1078. {
  1079. int rc;
  1080. struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
  1081. rc = genwqe_pci_setup(cd);
  1082. if (!rc) {
  1083. return PCI_ERS_RESULT_RECOVERED;
  1084. } else {
  1085. dev_err(&pci_dev->dev,
  1086. "err: problems with PCI setup (err=%d)\n", rc);
  1087. return PCI_ERS_RESULT_DISCONNECT;
  1088. }
  1089. }
  1090. static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
  1091. {
  1092. return PCI_ERS_RESULT_NONE;
  1093. }
  1094. static void genwqe_err_resume(struct pci_dev *pci_dev)
  1095. {
  1096. int rc;
  1097. struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
  1098. rc = genwqe_start(cd);
  1099. if (!rc) {
  1100. rc = genwqe_health_check_start(cd);
  1101. if (rc)
  1102. dev_err(&pci_dev->dev,
  1103. "err: cannot start health checking! (err=%d)\n",
  1104. rc);
  1105. } else {
  1106. dev_err(&pci_dev->dev,
  1107. "err: cannot start card services! (err=%d)\n", rc);
  1108. }
  1109. }
  1110. static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
  1111. {
  1112. int rc;
  1113. struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
  1114. if (numvfs > 0) {
  1115. genwqe_setup_vf_jtimer(cd);
  1116. rc = pci_enable_sriov(dev, numvfs);
  1117. if (rc < 0)
  1118. return rc;
  1119. return numvfs;
  1120. }
  1121. if (numvfs == 0) {
  1122. pci_disable_sriov(dev);
  1123. return 0;
  1124. }
  1125. return 0;
  1126. }
  1127. static const struct pci_error_handlers genwqe_err_handler = {
  1128. .error_detected = genwqe_err_error_detected,
  1129. .mmio_enabled = genwqe_err_result_none,
  1130. .slot_reset = genwqe_err_slot_reset,
  1131. .resume = genwqe_err_resume,
  1132. };
  1133. static struct pci_driver genwqe_driver = {
  1134. .name = genwqe_driver_name,
  1135. .id_table = genwqe_device_table,
  1136. .probe = genwqe_probe,
  1137. .remove = genwqe_remove,
  1138. .sriov_configure = genwqe_sriov_configure,
  1139. .err_handler = &genwqe_err_handler,
  1140. };
  1141. /**
  1142. * genwqe_devnode() - Set default access mode for genwqe devices.
  1143. * @dev: Pointer to device (unused)
  1144. * @mode: Carrier to pass-back given mode (permissions)
  1145. *
  1146. * Default mode should be rw for everybody. Do not change default
  1147. * device name.
  1148. */
  1149. static char *genwqe_devnode(struct device *dev, umode_t *mode)
  1150. {
  1151. if (mode)
  1152. *mode = 0666;
  1153. return NULL;
  1154. }
  1155. /**
  1156. * genwqe_init_module() - Driver registration and initialization
  1157. */
  1158. static int __init genwqe_init_module(void)
  1159. {
  1160. int rc;
  1161. class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
  1162. if (IS_ERR(class_genwqe)) {
  1163. pr_err("[%s] create class failed\n", __func__);
  1164. return -ENOMEM;
  1165. }
  1166. class_genwqe->devnode = genwqe_devnode;
  1167. debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
  1168. rc = pci_register_driver(&genwqe_driver);
  1169. if (rc != 0) {
  1170. pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
  1171. goto err_out0;
  1172. }
  1173. return rc;
  1174. err_out0:
  1175. debugfs_remove(debugfs_genwqe);
  1176. class_destroy(class_genwqe);
  1177. return rc;
  1178. }
  1179. /**
  1180. * genwqe_exit_module() - Driver exit
  1181. */
  1182. static void __exit genwqe_exit_module(void)
  1183. {
  1184. pci_unregister_driver(&genwqe_driver);
  1185. debugfs_remove(debugfs_genwqe);
  1186. class_destroy(class_genwqe);
  1187. }
  1188. module_init(genwqe_init_module);
  1189. module_exit(genwqe_exit_module);