iov.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * PCI Express I/O Virtualization (IOV) support
  4. * Single Root IOV 1.0
  5. * Address Translation Service 1.0
  6. *
  7. * Copyright (C) 2009 Intel Corporation, Yu Zhao <[email protected]>
  8. */
  9. #include <linux/pci.h>
  10. #include <linux/slab.h>
  11. #include <linux/export.h>
  12. #include <linux/string.h>
  13. #include <linux/delay.h>
  14. #include "pci.h"
  15. #define VIRTFN_ID_LEN 17 /* "virtfn%u\0" for 2^32 - 1 */
  16. int pci_iov_virtfn_bus(struct pci_dev *dev, int vf_id)
  17. {
  18. if (!dev->is_physfn)
  19. return -EINVAL;
  20. return dev->bus->number + ((dev->devfn + dev->sriov->offset +
  21. dev->sriov->stride * vf_id) >> 8);
  22. }
  23. int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
  24. {
  25. if (!dev->is_physfn)
  26. return -EINVAL;
  27. return (dev->devfn + dev->sriov->offset +
  28. dev->sriov->stride * vf_id) & 0xff;
  29. }
  30. EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
  31. int pci_iov_vf_id(struct pci_dev *dev)
  32. {
  33. struct pci_dev *pf;
  34. if (!dev->is_virtfn)
  35. return -EINVAL;
  36. pf = pci_physfn(dev);
  37. return (((dev->bus->number << 8) + dev->devfn) -
  38. ((pf->bus->number << 8) + pf->devfn + pf->sriov->offset)) /
  39. pf->sriov->stride;
  40. }
  41. EXPORT_SYMBOL_GPL(pci_iov_vf_id);
  42. /**
  43. * pci_iov_get_pf_drvdata - Return the drvdata of a PF
  44. * @dev: VF pci_dev
  45. * @pf_driver: Device driver required to own the PF
  46. *
  47. * This must be called from a context that ensures that a VF driver is attached.
  48. * The value returned is invalid once the VF driver completes its remove()
  49. * callback.
  50. *
  51. * Locking is achieved by the driver core. A VF driver cannot be probed until
  52. * pci_enable_sriov() is called and pci_disable_sriov() does not return until
  53. * all VF drivers have completed their remove().
  54. *
  55. * The PF driver must call pci_disable_sriov() before it begins to destroy the
  56. * drvdata.
  57. */
  58. void *pci_iov_get_pf_drvdata(struct pci_dev *dev, struct pci_driver *pf_driver)
  59. {
  60. struct pci_dev *pf_dev;
  61. if (!dev->is_virtfn)
  62. return ERR_PTR(-EINVAL);
  63. pf_dev = dev->physfn;
  64. if (pf_dev->driver != pf_driver)
  65. return ERR_PTR(-EINVAL);
  66. return pci_get_drvdata(pf_dev);
  67. }
  68. EXPORT_SYMBOL_GPL(pci_iov_get_pf_drvdata);
  69. /*
  70. * Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
  71. * change when NumVFs changes.
  72. *
  73. * Update iov->offset and iov->stride when NumVFs is written.
  74. */
  75. static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn)
  76. {
  77. struct pci_sriov *iov = dev->sriov;
  78. pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn);
  79. pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset);
  80. pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride);
  81. }
  82. /*
  83. * The PF consumes one bus number. NumVFs, First VF Offset, and VF Stride
  84. * determine how many additional bus numbers will be consumed by VFs.
  85. *
  86. * Iterate over all valid NumVFs, validate offset and stride, and calculate
  87. * the maximum number of bus numbers that could ever be required.
  88. */
  89. static int compute_max_vf_buses(struct pci_dev *dev)
  90. {
  91. struct pci_sriov *iov = dev->sriov;
  92. int nr_virtfn, busnr, rc = 0;
  93. for (nr_virtfn = iov->total_VFs; nr_virtfn; nr_virtfn--) {
  94. pci_iov_set_numvfs(dev, nr_virtfn);
  95. if (!iov->offset || (nr_virtfn > 1 && !iov->stride)) {
  96. rc = -EIO;
  97. goto out;
  98. }
  99. busnr = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
  100. if (busnr > iov->max_VF_buses)
  101. iov->max_VF_buses = busnr;
  102. }
  103. out:
  104. pci_iov_set_numvfs(dev, 0);
  105. return rc;
  106. }
  107. static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr)
  108. {
  109. struct pci_bus *child;
  110. if (bus->number == busnr)
  111. return bus;
  112. child = pci_find_bus(pci_domain_nr(bus), busnr);
  113. if (child)
  114. return child;
  115. child = pci_add_new_bus(bus, NULL, busnr);
  116. if (!child)
  117. return NULL;
  118. pci_bus_insert_busn_res(child, busnr, busnr);
  119. return child;
  120. }
  121. static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus)
  122. {
  123. if (physbus != virtbus && list_empty(&virtbus->devices))
  124. pci_remove_bus(virtbus);
  125. }
  126. resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
  127. {
  128. if (!dev->is_physfn)
  129. return 0;
  130. return dev->sriov->barsz[resno - PCI_IOV_RESOURCES];
  131. }
  132. static void pci_read_vf_config_common(struct pci_dev *virtfn)
  133. {
  134. struct pci_dev *physfn = virtfn->physfn;
  135. /*
  136. * Some config registers are the same across all associated VFs.
  137. * Read them once from VF0 so we can skip reading them from the
  138. * other VFs.
  139. *
  140. * PCIe r4.0, sec 9.3.4.1, technically doesn't require all VFs to
  141. * have the same Revision ID and Subsystem ID, but we assume they
  142. * do.
  143. */
  144. pci_read_config_dword(virtfn, PCI_CLASS_REVISION,
  145. &physfn->sriov->class);
  146. pci_read_config_byte(virtfn, PCI_HEADER_TYPE,
  147. &physfn->sriov->hdr_type);
  148. pci_read_config_word(virtfn, PCI_SUBSYSTEM_VENDOR_ID,
  149. &physfn->sriov->subsystem_vendor);
  150. pci_read_config_word(virtfn, PCI_SUBSYSTEM_ID,
  151. &physfn->sriov->subsystem_device);
  152. }
  153. int pci_iov_sysfs_link(struct pci_dev *dev,
  154. struct pci_dev *virtfn, int id)
  155. {
  156. char buf[VIRTFN_ID_LEN];
  157. int rc;
  158. sprintf(buf, "virtfn%u", id);
  159. rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
  160. if (rc)
  161. goto failed;
  162. rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn");
  163. if (rc)
  164. goto failed1;
  165. kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
  166. return 0;
  167. failed1:
  168. sysfs_remove_link(&dev->dev.kobj, buf);
  169. failed:
  170. return rc;
  171. }
  172. #ifdef CONFIG_PCI_MSI
  173. static ssize_t sriov_vf_total_msix_show(struct device *dev,
  174. struct device_attribute *attr,
  175. char *buf)
  176. {
  177. struct pci_dev *pdev = to_pci_dev(dev);
  178. u32 vf_total_msix = 0;
  179. device_lock(dev);
  180. if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
  181. goto unlock;
  182. vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
  183. unlock:
  184. device_unlock(dev);
  185. return sysfs_emit(buf, "%u\n", vf_total_msix);
  186. }
  187. static DEVICE_ATTR_RO(sriov_vf_total_msix);
  188. static ssize_t sriov_vf_msix_count_store(struct device *dev,
  189. struct device_attribute *attr,
  190. const char *buf, size_t count)
  191. {
  192. struct pci_dev *vf_dev = to_pci_dev(dev);
  193. struct pci_dev *pdev = pci_physfn(vf_dev);
  194. int val, ret = 0;
  195. if (kstrtoint(buf, 0, &val) < 0)
  196. return -EINVAL;
  197. if (val < 0)
  198. return -EINVAL;
  199. device_lock(&pdev->dev);
  200. if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
  201. ret = -EOPNOTSUPP;
  202. goto err_pdev;
  203. }
  204. device_lock(&vf_dev->dev);
  205. if (vf_dev->driver) {
  206. /*
  207. * A driver is already attached to this VF and has configured
  208. * itself based on the current MSI-X vector count. Changing
  209. * the vector size could mess up the driver, so block it.
  210. */
  211. ret = -EBUSY;
  212. goto err_dev;
  213. }
  214. ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
  215. err_dev:
  216. device_unlock(&vf_dev->dev);
  217. err_pdev:
  218. device_unlock(&pdev->dev);
  219. return ret ? : count;
  220. }
  221. static DEVICE_ATTR_WO(sriov_vf_msix_count);
  222. #endif
  223. static struct attribute *sriov_vf_dev_attrs[] = {
  224. #ifdef CONFIG_PCI_MSI
  225. &dev_attr_sriov_vf_msix_count.attr,
  226. #endif
  227. NULL,
  228. };
  229. static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
  230. struct attribute *a, int n)
  231. {
  232. struct device *dev = kobj_to_dev(kobj);
  233. struct pci_dev *pdev = to_pci_dev(dev);
  234. if (!pdev->is_virtfn)
  235. return 0;
  236. return a->mode;
  237. }
  238. const struct attribute_group sriov_vf_dev_attr_group = {
  239. .attrs = sriov_vf_dev_attrs,
  240. .is_visible = sriov_vf_attrs_are_visible,
  241. };
  242. int pci_iov_add_virtfn(struct pci_dev *dev, int id)
  243. {
  244. int i;
  245. int rc = -ENOMEM;
  246. u64 size;
  247. struct pci_dev *virtfn;
  248. struct resource *res;
  249. struct pci_sriov *iov = dev->sriov;
  250. struct pci_bus *bus;
  251. bus = virtfn_add_bus(dev->bus, pci_iov_virtfn_bus(dev, id));
  252. if (!bus)
  253. goto failed;
  254. virtfn = pci_alloc_dev(bus);
  255. if (!virtfn)
  256. goto failed0;
  257. virtfn->devfn = pci_iov_virtfn_devfn(dev, id);
  258. virtfn->vendor = dev->vendor;
  259. virtfn->device = iov->vf_device;
  260. virtfn->is_virtfn = 1;
  261. virtfn->physfn = pci_dev_get(dev);
  262. virtfn->no_command_memory = 1;
  263. if (id == 0)
  264. pci_read_vf_config_common(virtfn);
  265. rc = pci_setup_device(virtfn);
  266. if (rc)
  267. goto failed1;
  268. virtfn->dev.parent = dev->dev.parent;
  269. virtfn->multifunction = 0;
  270. for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
  271. res = &dev->resource[i + PCI_IOV_RESOURCES];
  272. if (!res->parent)
  273. continue;
  274. virtfn->resource[i].name = pci_name(virtfn);
  275. virtfn->resource[i].flags = res->flags;
  276. size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
  277. virtfn->resource[i].start = res->start + size * id;
  278. virtfn->resource[i].end = virtfn->resource[i].start + size - 1;
  279. rc = request_resource(res, &virtfn->resource[i]);
  280. BUG_ON(rc);
  281. }
  282. pci_device_add(virtfn, virtfn->bus);
  283. rc = pci_iov_sysfs_link(dev, virtfn, id);
  284. if (rc)
  285. goto failed1;
  286. pci_bus_add_device(virtfn);
  287. return 0;
  288. failed1:
  289. pci_stop_and_remove_bus_device(virtfn);
  290. pci_dev_put(dev);
  291. failed0:
  292. virtfn_remove_bus(dev->bus, bus);
  293. failed:
  294. return rc;
  295. }
  296. void pci_iov_remove_virtfn(struct pci_dev *dev, int id)
  297. {
  298. char buf[VIRTFN_ID_LEN];
  299. struct pci_dev *virtfn;
  300. virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
  301. pci_iov_virtfn_bus(dev, id),
  302. pci_iov_virtfn_devfn(dev, id));
  303. if (!virtfn)
  304. return;
  305. sprintf(buf, "virtfn%u", id);
  306. sysfs_remove_link(&dev->dev.kobj, buf);
  307. /*
  308. * pci_stop_dev() could have been called for this virtfn already,
  309. * so the directory for the virtfn may have been removed before.
  310. * Double check to avoid spurious sysfs warnings.
  311. */
  312. if (virtfn->dev.kobj.sd)
  313. sysfs_remove_link(&virtfn->dev.kobj, "physfn");
  314. pci_stop_and_remove_bus_device(virtfn);
  315. virtfn_remove_bus(dev->bus, virtfn->bus);
  316. /* balance pci_get_domain_bus_and_slot() */
  317. pci_dev_put(virtfn);
  318. pci_dev_put(dev);
  319. }
  320. static ssize_t sriov_totalvfs_show(struct device *dev,
  321. struct device_attribute *attr,
  322. char *buf)
  323. {
  324. struct pci_dev *pdev = to_pci_dev(dev);
  325. return sysfs_emit(buf, "%u\n", pci_sriov_get_totalvfs(pdev));
  326. }
  327. static ssize_t sriov_numvfs_show(struct device *dev,
  328. struct device_attribute *attr,
  329. char *buf)
  330. {
  331. struct pci_dev *pdev = to_pci_dev(dev);
  332. u16 num_vfs;
  333. /* Serialize vs sriov_numvfs_store() so readers see valid num_VFs */
  334. device_lock(&pdev->dev);
  335. num_vfs = pdev->sriov->num_VFs;
  336. device_unlock(&pdev->dev);
  337. return sysfs_emit(buf, "%u\n", num_vfs);
  338. }
  339. /*
  340. * num_vfs > 0; number of VFs to enable
  341. * num_vfs = 0; disable all VFs
  342. *
  343. * Note: SRIOV spec does not allow partial VF
  344. * disable, so it's all or none.
  345. */
  346. static ssize_t sriov_numvfs_store(struct device *dev,
  347. struct device_attribute *attr,
  348. const char *buf, size_t count)
  349. {
  350. struct pci_dev *pdev = to_pci_dev(dev);
  351. int ret = 0;
  352. u16 num_vfs;
  353. if (kstrtou16(buf, 0, &num_vfs) < 0)
  354. return -EINVAL;
  355. if (num_vfs > pci_sriov_get_totalvfs(pdev))
  356. return -ERANGE;
  357. device_lock(&pdev->dev);
  358. if (num_vfs == pdev->sriov->num_VFs)
  359. goto exit;
  360. /* is PF driver loaded */
  361. if (!pdev->driver) {
  362. pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n");
  363. ret = -ENOENT;
  364. goto exit;
  365. }
  366. /* is PF driver loaded w/callback */
  367. if (!pdev->driver->sriov_configure) {
  368. pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n");
  369. ret = -ENOENT;
  370. goto exit;
  371. }
  372. if (num_vfs == 0) {
  373. /* disable VFs */
  374. ret = pdev->driver->sriov_configure(pdev, 0);
  375. goto exit;
  376. }
  377. /* enable VFs */
  378. if (pdev->sriov->num_VFs) {
  379. pci_warn(pdev, "%d VFs already enabled. Disable before enabling %d VFs\n",
  380. pdev->sriov->num_VFs, num_vfs);
  381. ret = -EBUSY;
  382. goto exit;
  383. }
  384. ret = pdev->driver->sriov_configure(pdev, num_vfs);
  385. if (ret < 0)
  386. goto exit;
  387. if (ret != num_vfs)
  388. pci_warn(pdev, "%d VFs requested; only %d enabled\n",
  389. num_vfs, ret);
  390. exit:
  391. device_unlock(&pdev->dev);
  392. if (ret < 0)
  393. return ret;
  394. return count;
  395. }
  396. static ssize_t sriov_offset_show(struct device *dev,
  397. struct device_attribute *attr,
  398. char *buf)
  399. {
  400. struct pci_dev *pdev = to_pci_dev(dev);
  401. return sysfs_emit(buf, "%u\n", pdev->sriov->offset);
  402. }
  403. static ssize_t sriov_stride_show(struct device *dev,
  404. struct device_attribute *attr,
  405. char *buf)
  406. {
  407. struct pci_dev *pdev = to_pci_dev(dev);
  408. return sysfs_emit(buf, "%u\n", pdev->sriov->stride);
  409. }
  410. static ssize_t sriov_vf_device_show(struct device *dev,
  411. struct device_attribute *attr,
  412. char *buf)
  413. {
  414. struct pci_dev *pdev = to_pci_dev(dev);
  415. return sysfs_emit(buf, "%x\n", pdev->sriov->vf_device);
  416. }
  417. static ssize_t sriov_drivers_autoprobe_show(struct device *dev,
  418. struct device_attribute *attr,
  419. char *buf)
  420. {
  421. struct pci_dev *pdev = to_pci_dev(dev);
  422. return sysfs_emit(buf, "%u\n", pdev->sriov->drivers_autoprobe);
  423. }
  424. static ssize_t sriov_drivers_autoprobe_store(struct device *dev,
  425. struct device_attribute *attr,
  426. const char *buf, size_t count)
  427. {
  428. struct pci_dev *pdev = to_pci_dev(dev);
  429. bool drivers_autoprobe;
  430. if (kstrtobool(buf, &drivers_autoprobe) < 0)
  431. return -EINVAL;
  432. pdev->sriov->drivers_autoprobe = drivers_autoprobe;
  433. return count;
  434. }
  435. static DEVICE_ATTR_RO(sriov_totalvfs);
  436. static DEVICE_ATTR_RW(sriov_numvfs);
  437. static DEVICE_ATTR_RO(sriov_offset);
  438. static DEVICE_ATTR_RO(sriov_stride);
  439. static DEVICE_ATTR_RO(sriov_vf_device);
  440. static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
  441. static struct attribute *sriov_pf_dev_attrs[] = {
  442. &dev_attr_sriov_totalvfs.attr,
  443. &dev_attr_sriov_numvfs.attr,
  444. &dev_attr_sriov_offset.attr,
  445. &dev_attr_sriov_stride.attr,
  446. &dev_attr_sriov_vf_device.attr,
  447. &dev_attr_sriov_drivers_autoprobe.attr,
  448. #ifdef CONFIG_PCI_MSI
  449. &dev_attr_sriov_vf_total_msix.attr,
  450. #endif
  451. NULL,
  452. };
  453. static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
  454. struct attribute *a, int n)
  455. {
  456. struct device *dev = kobj_to_dev(kobj);
  457. if (!dev_is_pf(dev))
  458. return 0;
  459. return a->mode;
  460. }
  461. const struct attribute_group sriov_pf_dev_attr_group = {
  462. .attrs = sriov_pf_dev_attrs,
  463. .is_visible = sriov_pf_attrs_are_visible,
  464. };
  465. int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
  466. {
  467. return 0;
  468. }
  469. int __weak pcibios_sriov_disable(struct pci_dev *pdev)
  470. {
  471. return 0;
  472. }
  473. static int sriov_add_vfs(struct pci_dev *dev, u16 num_vfs)
  474. {
  475. unsigned int i;
  476. int rc;
  477. if (dev->no_vf_scan)
  478. return 0;
  479. for (i = 0; i < num_vfs; i++) {
  480. rc = pci_iov_add_virtfn(dev, i);
  481. if (rc)
  482. goto failed;
  483. }
  484. return 0;
  485. failed:
  486. while (i--)
  487. pci_iov_remove_virtfn(dev, i);
  488. return rc;
  489. }
  490. static int sriov_enable(struct pci_dev *dev, int nr_virtfn)
  491. {
  492. int rc;
  493. int i;
  494. int nres;
  495. u16 initial;
  496. struct resource *res;
  497. struct pci_dev *pdev;
  498. struct pci_sriov *iov = dev->sriov;
  499. int bars = 0;
  500. int bus;
  501. if (!nr_virtfn)
  502. return 0;
  503. if (iov->num_VFs)
  504. return -EINVAL;
  505. pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial);
  506. if (initial > iov->total_VFs ||
  507. (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs)))
  508. return -EIO;
  509. if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs ||
  510. (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial)))
  511. return -EINVAL;
  512. nres = 0;
  513. for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
  514. bars |= (1 << (i + PCI_IOV_RESOURCES));
  515. res = &dev->resource[i + PCI_IOV_RESOURCES];
  516. if (res->parent)
  517. nres++;
  518. }
  519. if (nres != iov->nres) {
  520. pci_err(dev, "not enough MMIO resources for SR-IOV\n");
  521. return -ENOMEM;
  522. }
  523. bus = pci_iov_virtfn_bus(dev, nr_virtfn - 1);
  524. if (bus > dev->bus->busn_res.end) {
  525. pci_err(dev, "can't enable %d VFs (bus %02x out of range of %pR)\n",
  526. nr_virtfn, bus, &dev->bus->busn_res);
  527. return -ENOMEM;
  528. }
  529. if (pci_enable_resources(dev, bars)) {
  530. pci_err(dev, "SR-IOV: IOV BARS not allocated\n");
  531. return -ENOMEM;
  532. }
  533. if (iov->link != dev->devfn) {
  534. pdev = pci_get_slot(dev->bus, iov->link);
  535. if (!pdev)
  536. return -ENODEV;
  537. if (!pdev->is_physfn) {
  538. pci_dev_put(pdev);
  539. return -ENOSYS;
  540. }
  541. rc = sysfs_create_link(&dev->dev.kobj,
  542. &pdev->dev.kobj, "dep_link");
  543. pci_dev_put(pdev);
  544. if (rc)
  545. return rc;
  546. }
  547. iov->initial_VFs = initial;
  548. if (nr_virtfn < initial)
  549. initial = nr_virtfn;
  550. rc = pcibios_sriov_enable(dev, initial);
  551. if (rc) {
  552. pci_err(dev, "failure %d from pcibios_sriov_enable()\n", rc);
  553. goto err_pcibios;
  554. }
  555. pci_iov_set_numvfs(dev, nr_virtfn);
  556. iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE;
  557. pci_cfg_access_lock(dev);
  558. pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
  559. msleep(100);
  560. pci_cfg_access_unlock(dev);
  561. rc = sriov_add_vfs(dev, initial);
  562. if (rc)
  563. goto err_pcibios;
  564. kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE);
  565. iov->num_VFs = nr_virtfn;
  566. return 0;
  567. err_pcibios:
  568. iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
  569. pci_cfg_access_lock(dev);
  570. pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
  571. ssleep(1);
  572. pci_cfg_access_unlock(dev);
  573. pcibios_sriov_disable(dev);
  574. if (iov->link != dev->devfn)
  575. sysfs_remove_link(&dev->dev.kobj, "dep_link");
  576. pci_iov_set_numvfs(dev, 0);
  577. return rc;
  578. }
  579. static void sriov_del_vfs(struct pci_dev *dev)
  580. {
  581. struct pci_sriov *iov = dev->sriov;
  582. int i;
  583. for (i = 0; i < iov->num_VFs; i++)
  584. pci_iov_remove_virtfn(dev, i);
  585. }
  586. static void sriov_disable(struct pci_dev *dev)
  587. {
  588. struct pci_sriov *iov = dev->sriov;
  589. if (!iov->num_VFs)
  590. return;
  591. sriov_del_vfs(dev);
  592. iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE);
  593. pci_cfg_access_lock(dev);
  594. pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
  595. ssleep(1);
  596. pci_cfg_access_unlock(dev);
  597. pcibios_sriov_disable(dev);
  598. if (iov->link != dev->devfn)
  599. sysfs_remove_link(&dev->dev.kobj, "dep_link");
  600. iov->num_VFs = 0;
  601. pci_iov_set_numvfs(dev, 0);
  602. }
  603. static int sriov_init(struct pci_dev *dev, int pos)
  604. {
  605. int i, bar64;
  606. int rc;
  607. int nres;
  608. u32 pgsz;
  609. u16 ctrl, total;
  610. struct pci_sriov *iov;
  611. struct resource *res;
  612. struct pci_dev *pdev;
  613. pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl);
  614. if (ctrl & PCI_SRIOV_CTRL_VFE) {
  615. pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0);
  616. ssleep(1);
  617. }
  618. ctrl = 0;
  619. list_for_each_entry(pdev, &dev->bus->devices, bus_list)
  620. if (pdev->is_physfn)
  621. goto found;
  622. pdev = NULL;
  623. if (pci_ari_enabled(dev->bus))
  624. ctrl |= PCI_SRIOV_CTRL_ARI;
  625. found:
  626. pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl);
  627. pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total);
  628. if (!total)
  629. return 0;
  630. pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz);
  631. i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0;
  632. pgsz &= ~((1 << i) - 1);
  633. if (!pgsz)
  634. return -EIO;
  635. pgsz &= ~(pgsz - 1);
  636. pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz);
  637. iov = kzalloc(sizeof(*iov), GFP_KERNEL);
  638. if (!iov)
  639. return -ENOMEM;
  640. nres = 0;
  641. for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
  642. res = &dev->resource[i + PCI_IOV_RESOURCES];
  643. /*
  644. * If it is already FIXED, don't change it, something
  645. * (perhaps EA or header fixups) wants it this way.
  646. */
  647. if (res->flags & IORESOURCE_PCI_FIXED)
  648. bar64 = (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
  649. else
  650. bar64 = __pci_read_base(dev, pci_bar_unknown, res,
  651. pos + PCI_SRIOV_BAR + i * 4);
  652. if (!res->flags)
  653. continue;
  654. if (resource_size(res) & (PAGE_SIZE - 1)) {
  655. rc = -EIO;
  656. goto failed;
  657. }
  658. iov->barsz[i] = resource_size(res);
  659. res->end = res->start + resource_size(res) * total - 1;
  660. pci_info(dev, "VF(n) BAR%d space: %pR (contains BAR%d for %d VFs)\n",
  661. i, res, i, total);
  662. i += bar64;
  663. nres++;
  664. }
  665. iov->pos = pos;
  666. iov->nres = nres;
  667. iov->ctrl = ctrl;
  668. iov->total_VFs = total;
  669. iov->driver_max_VFs = total;
  670. pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &iov->vf_device);
  671. iov->pgsz = pgsz;
  672. iov->self = dev;
  673. iov->drivers_autoprobe = true;
  674. pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap);
  675. pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link);
  676. if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END)
  677. iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link);
  678. if (pdev)
  679. iov->dev = pci_dev_get(pdev);
  680. else
  681. iov->dev = dev;
  682. dev->sriov = iov;
  683. dev->is_physfn = 1;
  684. rc = compute_max_vf_buses(dev);
  685. if (rc)
  686. goto fail_max_buses;
  687. return 0;
  688. fail_max_buses:
  689. dev->sriov = NULL;
  690. dev->is_physfn = 0;
  691. failed:
  692. for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
  693. res = &dev->resource[i + PCI_IOV_RESOURCES];
  694. res->flags = 0;
  695. }
  696. kfree(iov);
  697. return rc;
  698. }
  699. static void sriov_release(struct pci_dev *dev)
  700. {
  701. BUG_ON(dev->sriov->num_VFs);
  702. if (dev != dev->sriov->dev)
  703. pci_dev_put(dev->sriov->dev);
  704. kfree(dev->sriov);
  705. dev->sriov = NULL;
  706. }
  707. static void sriov_restore_state(struct pci_dev *dev)
  708. {
  709. int i;
  710. u16 ctrl;
  711. struct pci_sriov *iov = dev->sriov;
  712. pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl);
  713. if (ctrl & PCI_SRIOV_CTRL_VFE)
  714. return;
  715. /*
  716. * Restore PCI_SRIOV_CTRL_ARI before pci_iov_set_numvfs() because
  717. * it reads offset & stride, which depend on PCI_SRIOV_CTRL_ARI.
  718. */
  719. ctrl &= ~PCI_SRIOV_CTRL_ARI;
  720. ctrl |= iov->ctrl & PCI_SRIOV_CTRL_ARI;
  721. pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, ctrl);
  722. for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
  723. pci_update_resource(dev, i + PCI_IOV_RESOURCES);
  724. pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz);
  725. pci_iov_set_numvfs(dev, iov->num_VFs);
  726. pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl);
  727. if (iov->ctrl & PCI_SRIOV_CTRL_VFE)
  728. msleep(100);
  729. }
  730. /**
  731. * pci_iov_init - initialize the IOV capability
  732. * @dev: the PCI device
  733. *
  734. * Returns 0 on success, or negative on failure.
  735. */
  736. int pci_iov_init(struct pci_dev *dev)
  737. {
  738. int pos;
  739. if (!pci_is_pcie(dev))
  740. return -ENODEV;
  741. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
  742. if (pos)
  743. return sriov_init(dev, pos);
  744. return -ENODEV;
  745. }
  746. /**
  747. * pci_iov_release - release resources used by the IOV capability
  748. * @dev: the PCI device
  749. */
  750. void pci_iov_release(struct pci_dev *dev)
  751. {
  752. if (dev->is_physfn)
  753. sriov_release(dev);
  754. }
  755. /**
  756. * pci_iov_remove - clean up SR-IOV state after PF driver is detached
  757. * @dev: the PCI device
  758. */
  759. void pci_iov_remove(struct pci_dev *dev)
  760. {
  761. struct pci_sriov *iov = dev->sriov;
  762. if (!dev->is_physfn)
  763. return;
  764. iov->driver_max_VFs = iov->total_VFs;
  765. if (iov->num_VFs)
  766. pci_warn(dev, "driver left SR-IOV enabled after remove\n");
  767. }
  768. /**
  769. * pci_iov_update_resource - update a VF BAR
  770. * @dev: the PCI device
  771. * @resno: the resource number
  772. *
  773. * Update a VF BAR in the SR-IOV capability of a PF.
  774. */
  775. void pci_iov_update_resource(struct pci_dev *dev, int resno)
  776. {
  777. struct pci_sriov *iov = dev->is_physfn ? dev->sriov : NULL;
  778. struct resource *res = dev->resource + resno;
  779. int vf_bar = resno - PCI_IOV_RESOURCES;
  780. struct pci_bus_region region;
  781. u16 cmd;
  782. u32 new;
  783. int reg;
  784. /*
  785. * The generic pci_restore_bars() path calls this for all devices,
  786. * including VFs and non-SR-IOV devices. If this is not a PF, we
  787. * have nothing to do.
  788. */
  789. if (!iov)
  790. return;
  791. pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &cmd);
  792. if ((cmd & PCI_SRIOV_CTRL_VFE) && (cmd & PCI_SRIOV_CTRL_MSE)) {
  793. dev_WARN(&dev->dev, "can't update enabled VF BAR%d %pR\n",
  794. vf_bar, res);
  795. return;
  796. }
  797. /*
  798. * Ignore unimplemented BARs, unused resource slots for 64-bit
  799. * BARs, and non-movable resources, e.g., those described via
  800. * Enhanced Allocation.
  801. */
  802. if (!res->flags)
  803. return;
  804. if (res->flags & IORESOURCE_UNSET)
  805. return;
  806. if (res->flags & IORESOURCE_PCI_FIXED)
  807. return;
  808. pcibios_resource_to_bus(dev->bus, &region, res);
  809. new = region.start;
  810. new |= res->flags & ~PCI_BASE_ADDRESS_MEM_MASK;
  811. reg = iov->pos + PCI_SRIOV_BAR + 4 * vf_bar;
  812. pci_write_config_dword(dev, reg, new);
  813. if (res->flags & IORESOURCE_MEM_64) {
  814. new = region.start >> 16 >> 16;
  815. pci_write_config_dword(dev, reg + 4, new);
  816. }
  817. }
  818. resource_size_t __weak pcibios_iov_resource_alignment(struct pci_dev *dev,
  819. int resno)
  820. {
  821. return pci_iov_resource_size(dev, resno);
  822. }
  823. /**
  824. * pci_sriov_resource_alignment - get resource alignment for VF BAR
  825. * @dev: the PCI device
  826. * @resno: the resource number
  827. *
  828. * Returns the alignment of the VF BAR found in the SR-IOV capability.
  829. * This is not the same as the resource size which is defined as
  830. * the VF BAR size multiplied by the number of VFs. The alignment
  831. * is just the VF BAR size.
  832. */
  833. resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno)
  834. {
  835. return pcibios_iov_resource_alignment(dev, resno);
  836. }
  837. /**
  838. * pci_restore_iov_state - restore the state of the IOV capability
  839. * @dev: the PCI device
  840. */
  841. void pci_restore_iov_state(struct pci_dev *dev)
  842. {
  843. if (dev->is_physfn)
  844. sriov_restore_state(dev);
  845. }
  846. /**
  847. * pci_vf_drivers_autoprobe - set PF property drivers_autoprobe for VFs
  848. * @dev: the PCI device
  849. * @auto_probe: set VF drivers auto probe flag
  850. */
  851. void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool auto_probe)
  852. {
  853. if (dev->is_physfn)
  854. dev->sriov->drivers_autoprobe = auto_probe;
  855. }
  856. /**
  857. * pci_iov_bus_range - find bus range used by Virtual Function
  858. * @bus: the PCI bus
  859. *
  860. * Returns max number of buses (exclude current one) used by Virtual
  861. * Functions.
  862. */
  863. int pci_iov_bus_range(struct pci_bus *bus)
  864. {
  865. int max = 0;
  866. struct pci_dev *dev;
  867. list_for_each_entry(dev, &bus->devices, bus_list) {
  868. if (!dev->is_physfn)
  869. continue;
  870. if (dev->sriov->max_VF_buses > max)
  871. max = dev->sriov->max_VF_buses;
  872. }
  873. return max ? max - bus->number : 0;
  874. }
  875. /**
  876. * pci_enable_sriov - enable the SR-IOV capability
  877. * @dev: the PCI device
  878. * @nr_virtfn: number of virtual functions to enable
  879. *
  880. * Returns 0 on success, or negative on failure.
  881. */
  882. int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
  883. {
  884. might_sleep();
  885. if (!dev->is_physfn)
  886. return -ENOSYS;
  887. return sriov_enable(dev, nr_virtfn);
  888. }
  889. EXPORT_SYMBOL_GPL(pci_enable_sriov);
  890. /**
  891. * pci_disable_sriov - disable the SR-IOV capability
  892. * @dev: the PCI device
  893. */
  894. void pci_disable_sriov(struct pci_dev *dev)
  895. {
  896. might_sleep();
  897. if (!dev->is_physfn)
  898. return;
  899. sriov_disable(dev);
  900. }
  901. EXPORT_SYMBOL_GPL(pci_disable_sriov);
  902. /**
  903. * pci_num_vf - return number of VFs associated with a PF device_release_driver
  904. * @dev: the PCI device
  905. *
  906. * Returns number of VFs, or 0 if SR-IOV is not enabled.
  907. */
  908. int pci_num_vf(struct pci_dev *dev)
  909. {
  910. if (!dev->is_physfn)
  911. return 0;
  912. return dev->sriov->num_VFs;
  913. }
  914. EXPORT_SYMBOL_GPL(pci_num_vf);
  915. /**
  916. * pci_vfs_assigned - returns number of VFs are assigned to a guest
  917. * @dev: the PCI device
  918. *
  919. * Returns number of VFs belonging to this device that are assigned to a guest.
  920. * If device is not a physical function returns 0.
  921. */
  922. int pci_vfs_assigned(struct pci_dev *dev)
  923. {
  924. struct pci_dev *vfdev;
  925. unsigned int vfs_assigned = 0;
  926. unsigned short dev_id;
  927. /* only search if we are a PF */
  928. if (!dev->is_physfn)
  929. return 0;
  930. /*
  931. * determine the device ID for the VFs, the vendor ID will be the
  932. * same as the PF so there is no need to check for that one
  933. */
  934. dev_id = dev->sriov->vf_device;
  935. /* loop through all the VFs to see if we own any that are assigned */
  936. vfdev = pci_get_device(dev->vendor, dev_id, NULL);
  937. while (vfdev) {
  938. /*
  939. * It is considered assigned if it is a virtual function with
  940. * our dev as the physical function and the assigned bit is set
  941. */
  942. if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
  943. pci_is_dev_assigned(vfdev))
  944. vfs_assigned++;
  945. vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
  946. }
  947. return vfs_assigned;
  948. }
  949. EXPORT_SYMBOL_GPL(pci_vfs_assigned);
  950. /**
  951. * pci_sriov_set_totalvfs -- reduce the TotalVFs available
  952. * @dev: the PCI PF device
  953. * @numvfs: number that should be used for TotalVFs supported
  954. *
  955. * Should be called from PF driver's probe routine with
  956. * device's mutex held.
  957. *
  958. * Returns 0 if PF is an SRIOV-capable device and
  959. * value of numvfs valid. If not a PF return -ENOSYS;
  960. * if numvfs is invalid return -EINVAL;
  961. * if VFs already enabled, return -EBUSY.
  962. */
  963. int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs)
  964. {
  965. if (!dev->is_physfn)
  966. return -ENOSYS;
  967. if (numvfs > dev->sriov->total_VFs)
  968. return -EINVAL;
  969. /* Shouldn't change if VFs already enabled */
  970. if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE)
  971. return -EBUSY;
  972. dev->sriov->driver_max_VFs = numvfs;
  973. return 0;
  974. }
  975. EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs);
  976. /**
  977. * pci_sriov_get_totalvfs -- get total VFs supported on this device
  978. * @dev: the PCI PF device
  979. *
  980. * For a PCIe device with SRIOV support, return the PCIe
  981. * SRIOV capability value of TotalVFs or the value of driver_max_VFs
  982. * if the driver reduced it. Otherwise 0.
  983. */
  984. int pci_sriov_get_totalvfs(struct pci_dev *dev)
  985. {
  986. if (!dev->is_physfn)
  987. return 0;
  988. return dev->sriov->driver_max_VFs;
  989. }
  990. EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs);
  991. /**
  992. * pci_sriov_configure_simple - helper to configure SR-IOV
  993. * @dev: the PCI device
  994. * @nr_virtfn: number of virtual functions to enable, 0 to disable
  995. *
  996. * Enable or disable SR-IOV for devices that don't require any PF setup
  997. * before enabling SR-IOV. Return value is negative on error, or number of
  998. * VFs allocated on success.
  999. */
  1000. int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn)
  1001. {
  1002. int rc;
  1003. might_sleep();
  1004. if (!dev->is_physfn)
  1005. return -ENODEV;
  1006. if (pci_vfs_assigned(dev)) {
  1007. pci_warn(dev, "Cannot modify SR-IOV while VFs are assigned\n");
  1008. return -EPERM;
  1009. }
  1010. if (nr_virtfn == 0) {
  1011. sriov_disable(dev);
  1012. return 0;
  1013. }
  1014. rc = sriov_enable(dev, nr_virtfn);
  1015. if (rc < 0)
  1016. return rc;
  1017. return nr_virtfn;
  1018. }
  1019. EXPORT_SYMBOL_GPL(pci_sriov_configure_simple);