memremap.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _LINUX_MEMREMAP_H_
  3. #define _LINUX_MEMREMAP_H_
  4. #include <linux/mmzone.h>
  5. #include <linux/range.h>
  6. #include <linux/ioport.h>
  7. #include <linux/percpu-refcount.h>
  8. struct resource;
  9. struct device;
  10. /**
  11. * struct vmem_altmap - pre-allocated storage for vmemmap_populate
  12. * @base_pfn: base of the entire dev_pagemap mapping
  13. * @reserve: pages mapped, but reserved for driver use (relative to @base)
  14. * @free: free pages set aside in the mapping for memmap storage
  15. * @align: pages reserved to meet allocation alignments
  16. * @alloc: track pages consumed, private to vmemmap_populate()
  17. */
  18. struct vmem_altmap {
  19. unsigned long base_pfn;
  20. const unsigned long end_pfn;
  21. const unsigned long reserve;
  22. unsigned long free;
  23. unsigned long align;
  24. unsigned long alloc;
  25. };
  26. /*
  27. * Specialize ZONE_DEVICE memory into multiple types each has a different
  28. * usage.
  29. *
  30. * MEMORY_DEVICE_PRIVATE:
  31. * Device memory that is not directly addressable by the CPU: CPU can neither
  32. * read nor write private memory. In this case, we do still have struct pages
  33. * backing the device memory. Doing so simplifies the implementation, but it is
  34. * important to remember that there are certain points at which the struct page
  35. * must be treated as an opaque object, rather than a "normal" struct page.
  36. *
  37. * A more complete discussion of unaddressable memory may be found in
  38. * include/linux/hmm.h and Documentation/mm/hmm.rst.
  39. *
  40. * MEMORY_DEVICE_COHERENT:
  41. * Device memory that is cache coherent from device and CPU point of view. This
  42. * is used on platforms that have an advanced system bus (like CAPI or CXL). A
  43. * driver can hotplug the device memory using ZONE_DEVICE and with that memory
  44. * type. Any page of a process can be migrated to such memory. However no one
  45. * should be allowed to pin such memory so that it can always be evicted.
  46. *
  47. * MEMORY_DEVICE_FS_DAX:
  48. * Host memory that has similar access semantics as System RAM i.e. DMA
  49. * coherent and supports page pinning. In support of coordinating page
  50. * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a
  51. * wakeup event whenever a page is unpinned and becomes idle. This
  52. * wakeup is used to coordinate physical address space management (ex:
  53. * fs truncate/hole punch) vs pinned pages (ex: device dma).
  54. *
  55. * MEMORY_DEVICE_GENERIC:
  56. * Host memory that has similar access semantics as System RAM i.e. DMA
  57. * coherent and supports page pinning. This is for example used by DAX devices
  58. * that expose memory using a character device.
  59. *
  60. * MEMORY_DEVICE_PCI_P2PDMA:
  61. * Device memory residing in a PCI BAR intended for use with Peer-to-Peer
  62. * transactions.
  63. */
  64. enum memory_type {
  65. /* 0 is reserved to catch uninitialized type fields */
  66. MEMORY_DEVICE_PRIVATE = 1,
  67. MEMORY_DEVICE_COHERENT,
  68. MEMORY_DEVICE_FS_DAX,
  69. MEMORY_DEVICE_GENERIC,
  70. MEMORY_DEVICE_PCI_P2PDMA,
  71. };
  72. struct dev_pagemap_ops {
  73. /*
  74. * Called once the page refcount reaches 0. The reference count will be
  75. * reset to one by the core code after the method is called to prepare
  76. * for handing out the page again.
  77. */
  78. void (*page_free)(struct page *page);
  79. /*
  80. * Used for private (un-addressable) device memory only. Must migrate
  81. * the page back to a CPU accessible page.
  82. */
  83. vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf);
  84. /*
  85. * Handle the memory failure happens on a range of pfns. Notify the
  86. * processes who are using these pfns, and try to recover the data on
  87. * them if necessary. The mf_flags is finally passed to the recover
  88. * function through the whole notify routine.
  89. *
  90. * When this is not implemented, or it returns -EOPNOTSUPP, the caller
  91. * will fall back to a common handler called mf_generic_kill_procs().
  92. */
  93. int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn,
  94. unsigned long nr_pages, int mf_flags);
  95. };
  96. #define PGMAP_ALTMAP_VALID (1 << 0)
  97. /**
  98. * struct dev_pagemap - metadata for ZONE_DEVICE mappings
  99. * @altmap: pre-allocated/reserved memory for vmemmap allocations
  100. * @ref: reference count that pins the devm_memremap_pages() mapping
  101. * @done: completion for @ref
  102. * @type: memory type: see MEMORY_* in memory_hotplug.h
  103. * @flags: PGMAP_* flags to specify defailed behavior
  104. * @vmemmap_shift: structural definition of how the vmemmap page metadata
  105. * is populated, specifically the metadata page order.
  106. * A zero value (default) uses base pages as the vmemmap metadata
  107. * representation. A bigger value will set up compound struct pages
  108. * of the requested order value.
  109. * @ops: method table
  110. * @owner: an opaque pointer identifying the entity that manages this
  111. * instance. Used by various helpers to make sure that no
  112. * foreign ZONE_DEVICE memory is accessed.
  113. * @nr_range: number of ranges to be mapped
  114. * @range: range to be mapped when nr_range == 1
  115. * @ranges: array of ranges to be mapped when nr_range > 1
  116. */
  117. struct dev_pagemap {
  118. struct vmem_altmap altmap;
  119. struct percpu_ref ref;
  120. struct completion done;
  121. enum memory_type type;
  122. unsigned int flags;
  123. unsigned long vmemmap_shift;
  124. const struct dev_pagemap_ops *ops;
  125. void *owner;
  126. int nr_range;
  127. union {
  128. struct range range;
  129. struct range ranges[0];
  130. };
  131. };
  132. static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap)
  133. {
  134. return pgmap->ops && pgmap->ops->memory_failure;
  135. }
  136. static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
  137. {
  138. if (pgmap->flags & PGMAP_ALTMAP_VALID)
  139. return &pgmap->altmap;
  140. return NULL;
  141. }
  142. static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap)
  143. {
  144. return 1 << pgmap->vmemmap_shift;
  145. }
  146. static inline bool is_device_private_page(const struct page *page)
  147. {
  148. return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
  149. is_zone_device_page(page) &&
  150. page->pgmap->type == MEMORY_DEVICE_PRIVATE;
  151. }
  152. static inline bool folio_is_device_private(const struct folio *folio)
  153. {
  154. return is_device_private_page(&folio->page);
  155. }
  156. static inline bool is_pci_p2pdma_page(const struct page *page)
  157. {
  158. return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
  159. is_zone_device_page(page) &&
  160. page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
  161. }
  162. static inline bool is_device_coherent_page(const struct page *page)
  163. {
  164. return is_zone_device_page(page) &&
  165. page->pgmap->type == MEMORY_DEVICE_COHERENT;
  166. }
  167. static inline bool folio_is_device_coherent(const struct folio *folio)
  168. {
  169. return is_device_coherent_page(&folio->page);
  170. }
  171. #ifdef CONFIG_ZONE_DEVICE
  172. void zone_device_page_init(struct page *page);
  173. void *memremap_pages(struct dev_pagemap *pgmap, int nid);
  174. void memunmap_pages(struct dev_pagemap *pgmap);
  175. void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
  176. void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
  177. struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  178. struct dev_pagemap *pgmap);
  179. bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
  180. unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
  181. void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
  182. unsigned long memremap_compat_align(void);
  183. #else
  184. static inline void *devm_memremap_pages(struct device *dev,
  185. struct dev_pagemap *pgmap)
  186. {
  187. /*
  188. * Fail attempts to call devm_memremap_pages() without
  189. * ZONE_DEVICE support enabled, this requires callers to fall
  190. * back to plain devm_memremap() based on config
  191. */
  192. WARN_ON_ONCE(1);
  193. return ERR_PTR(-ENXIO);
  194. }
  195. static inline void devm_memunmap_pages(struct device *dev,
  196. struct dev_pagemap *pgmap)
  197. {
  198. }
  199. static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
  200. struct dev_pagemap *pgmap)
  201. {
  202. return NULL;
  203. }
  204. static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
  205. {
  206. return false;
  207. }
  208. static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
  209. {
  210. return 0;
  211. }
  212. static inline void vmem_altmap_free(struct vmem_altmap *altmap,
  213. unsigned long nr_pfns)
  214. {
  215. }
  216. /* when memremap_pages() is disabled all archs can remap a single page */
  217. static inline unsigned long memremap_compat_align(void)
  218. {
  219. return PAGE_SIZE;
  220. }
  221. #endif /* CONFIG_ZONE_DEVICE */
  222. static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
  223. {
  224. if (pgmap)
  225. percpu_ref_put(&pgmap->ref);
  226. }
  227. #endif /* _LINUX_MEMREMAP_H_ */