irqdomain.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
  4. *
  5. * Authors:
  6. * Sunil Muthuswamy <[email protected]>
  7. * Wei Liu <[email protected]>
  8. */
  9. #include <linux/pci.h>
  10. #include <linux/irq.h>
  11. #include <asm/mshyperv.h>
  12. static int hv_map_interrupt(union hv_device_id device_id, bool level,
  13. int cpu, int vector, struct hv_interrupt_entry *entry)
  14. {
  15. struct hv_input_map_device_interrupt *input;
  16. struct hv_output_map_device_interrupt *output;
  17. struct hv_device_interrupt_descriptor *intr_desc;
  18. unsigned long flags;
  19. u64 status;
  20. int nr_bank, var_size;
  21. local_irq_save(flags);
  22. input = *this_cpu_ptr(hyperv_pcpu_input_arg);
  23. output = *this_cpu_ptr(hyperv_pcpu_output_arg);
  24. intr_desc = &input->interrupt_descriptor;
  25. memset(input, 0, sizeof(*input));
  26. input->partition_id = hv_current_partition_id;
  27. input->device_id = device_id.as_uint64;
  28. intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
  29. intr_desc->vector_count = 1;
  30. intr_desc->target.vector = vector;
  31. if (level)
  32. intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
  33. else
  34. intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
  35. intr_desc->target.vp_set.valid_bank_mask = 0;
  36. intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
  37. nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
  38. if (nr_bank < 0) {
  39. local_irq_restore(flags);
  40. pr_err("%s: unable to generate VP set\n", __func__);
  41. return EINVAL;
  42. }
  43. intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
  44. /*
  45. * var-sized hypercall, var-size starts after vp_mask (thus
  46. * vp_set.format does not count, but vp_set.valid_bank_mask
  47. * does).
  48. */
  49. var_size = nr_bank + 1;
  50. status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
  51. input, output);
  52. *entry = output->interrupt_entry;
  53. local_irq_restore(flags);
  54. if (!hv_result_success(status))
  55. pr_err("%s: hypercall failed, status %lld\n", __func__, status);
  56. return hv_result(status);
  57. }
  58. static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
  59. {
  60. unsigned long flags;
  61. struct hv_input_unmap_device_interrupt *input;
  62. struct hv_interrupt_entry *intr_entry;
  63. u64 status;
  64. local_irq_save(flags);
  65. input = *this_cpu_ptr(hyperv_pcpu_input_arg);
  66. memset(input, 0, sizeof(*input));
  67. intr_entry = &input->interrupt_entry;
  68. input->partition_id = hv_current_partition_id;
  69. input->device_id = id;
  70. *intr_entry = *old_entry;
  71. status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
  72. local_irq_restore(flags);
  73. return hv_result(status);
  74. }
  75. #ifdef CONFIG_PCI_MSI
  76. struct rid_data {
  77. struct pci_dev *bridge;
  78. u32 rid;
  79. };
  80. static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
  81. {
  82. struct rid_data *rd = data;
  83. u8 bus = PCI_BUS_NUM(rd->rid);
  84. if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
  85. rd->bridge = pdev;
  86. rd->rid = alias;
  87. }
  88. return 0;
  89. }
  90. static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
  91. {
  92. union hv_device_id dev_id;
  93. struct rid_data data = {
  94. .bridge = NULL,
  95. .rid = PCI_DEVID(dev->bus->number, dev->devfn)
  96. };
  97. pci_for_each_dma_alias(dev, get_rid_cb, &data);
  98. dev_id.as_uint64 = 0;
  99. dev_id.device_type = HV_DEVICE_TYPE_PCI;
  100. dev_id.pci.segment = pci_domain_nr(dev->bus);
  101. dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
  102. dev_id.pci.bdf.device = PCI_SLOT(data.rid);
  103. dev_id.pci.bdf.function = PCI_FUNC(data.rid);
  104. dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
  105. if (data.bridge) {
  106. int pos;
  107. /*
  108. * Microsoft Hypervisor requires a bus range when the bridge is
  109. * running in PCI-X mode.
  110. *
  111. * To distinguish conventional vs PCI-X bridge, we can check
  112. * the bridge's PCI-X Secondary Status Register, Secondary Bus
  113. * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
  114. * Specification Revision 1.0 5.2.2.1.3.
  115. *
  116. * Value zero means it is in conventional mode, otherwise it is
  117. * in PCI-X mode.
  118. */
  119. pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
  120. if (pos) {
  121. u16 status;
  122. pci_read_config_word(data.bridge, pos +
  123. PCI_X_BRIDGE_SSTATUS, &status);
  124. if (status & PCI_X_SSTATUS_FREQ) {
  125. /* Non-zero, PCI-X mode */
  126. u8 sec_bus, sub_bus;
  127. dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
  128. pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
  129. dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
  130. pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
  131. dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
  132. }
  133. }
  134. }
  135. return dev_id;
  136. }
  137. static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
  138. struct hv_interrupt_entry *entry)
  139. {
  140. union hv_device_id device_id = hv_build_pci_dev_id(dev);
  141. return hv_map_interrupt(device_id, false, cpu, vector, entry);
  142. }
  143. static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
  144. {
  145. /* High address is always 0 */
  146. msg->address_hi = 0;
  147. msg->address_lo = entry->msi_entry.address.as_uint32;
  148. msg->data = entry->msi_entry.data.as_uint32;
  149. }
  150. static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
  151. static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
  152. {
  153. struct msi_desc *msidesc;
  154. struct pci_dev *dev;
  155. struct hv_interrupt_entry out_entry, *stored_entry;
  156. struct irq_cfg *cfg = irqd_cfg(data);
  157. const cpumask_t *affinity;
  158. int cpu;
  159. u64 status;
  160. msidesc = irq_data_get_msi_desc(data);
  161. dev = msi_desc_to_pci_dev(msidesc);
  162. if (!cfg) {
  163. pr_debug("%s: cfg is NULL", __func__);
  164. return;
  165. }
  166. affinity = irq_data_get_effective_affinity_mask(data);
  167. cpu = cpumask_first_and(affinity, cpu_online_mask);
  168. if (data->chip_data) {
  169. /*
  170. * This interrupt is already mapped. Let's unmap first.
  171. *
  172. * We don't use retarget interrupt hypercalls here because
  173. * Microsoft Hypervisor doens't allow root to change the vector
  174. * or specify VPs outside of the set that is initially used
  175. * during mapping.
  176. */
  177. stored_entry = data->chip_data;
  178. data->chip_data = NULL;
  179. status = hv_unmap_msi_interrupt(dev, stored_entry);
  180. kfree(stored_entry);
  181. if (status != HV_STATUS_SUCCESS) {
  182. pr_debug("%s: failed to unmap, status %lld", __func__, status);
  183. return;
  184. }
  185. }
  186. stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
  187. if (!stored_entry) {
  188. pr_debug("%s: failed to allocate chip data\n", __func__);
  189. return;
  190. }
  191. status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
  192. if (status != HV_STATUS_SUCCESS) {
  193. kfree(stored_entry);
  194. return;
  195. }
  196. *stored_entry = out_entry;
  197. data->chip_data = stored_entry;
  198. entry_to_msi_msg(&out_entry, msg);
  199. return;
  200. }
  201. static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
  202. {
  203. return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
  204. }
  205. static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
  206. {
  207. struct hv_interrupt_entry old_entry;
  208. struct msi_msg msg;
  209. u64 status;
  210. if (!irqd->chip_data) {
  211. pr_debug("%s: no chip data\n!", __func__);
  212. return;
  213. }
  214. old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
  215. entry_to_msi_msg(&old_entry, &msg);
  216. kfree(irqd->chip_data);
  217. irqd->chip_data = NULL;
  218. status = hv_unmap_msi_interrupt(dev, &old_entry);
  219. if (status != HV_STATUS_SUCCESS)
  220. pr_err("%s: hypercall failed, status %lld\n", __func__, status);
  221. }
  222. static void hv_msi_free_irq(struct irq_domain *domain,
  223. struct msi_domain_info *info, unsigned int virq)
  224. {
  225. struct irq_data *irqd = irq_get_irq_data(virq);
  226. struct msi_desc *desc;
  227. if (!irqd)
  228. return;
  229. desc = irq_data_get_msi_desc(irqd);
  230. if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
  231. return;
  232. hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
  233. }
  234. /*
  235. * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
  236. * which implement the MSI or MSI-X Capability Structure.
  237. */
  238. static struct irq_chip hv_pci_msi_controller = {
  239. .name = "HV-PCI-MSI",
  240. .irq_unmask = pci_msi_unmask_irq,
  241. .irq_mask = pci_msi_mask_irq,
  242. .irq_ack = irq_chip_ack_parent,
  243. .irq_retrigger = irq_chip_retrigger_hierarchy,
  244. .irq_compose_msi_msg = hv_irq_compose_msi_msg,
  245. .irq_set_affinity = msi_domain_set_affinity,
  246. .flags = IRQCHIP_SKIP_SET_WAKE,
  247. };
  248. static struct msi_domain_ops pci_msi_domain_ops = {
  249. .msi_free = hv_msi_free_irq,
  250. .msi_prepare = pci_msi_prepare,
  251. };
  252. static struct msi_domain_info hv_pci_msi_domain_info = {
  253. .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
  254. MSI_FLAG_PCI_MSIX,
  255. .ops = &pci_msi_domain_ops,
  256. .chip = &hv_pci_msi_controller,
  257. .handler = handle_edge_irq,
  258. .handler_name = "edge",
  259. };
  260. struct irq_domain * __init hv_create_pci_msi_domain(void)
  261. {
  262. struct irq_domain *d = NULL;
  263. struct fwnode_handle *fn;
  264. fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
  265. if (fn)
  266. d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
  267. /* No point in going further if we can't get an irq domain */
  268. BUG_ON(!d);
  269. return d;
  270. }
  271. #endif /* CONFIG_PCI_MSI */
  272. int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
  273. {
  274. union hv_device_id device_id;
  275. device_id.as_uint64 = 0;
  276. device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
  277. device_id.ioapic.ioapic_id = (u8)ioapic_id;
  278. return hv_unmap_interrupt(device_id.as_uint64, entry);
  279. }
  280. EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
  281. int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
  282. struct hv_interrupt_entry *entry)
  283. {
  284. union hv_device_id device_id;
  285. device_id.as_uint64 = 0;
  286. device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
  287. device_id.ioapic.ioapic_id = (u8)ioapic_id;
  288. return hv_map_interrupt(device_id, level, cpu, vector, entry);
  289. }
  290. EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);