Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
2018-01-29 11:51:49 -08:00
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -409,6 +409,10 @@ config HAS_DMA
 	depends on !NO_DMA
 	default y

+config SGL_ALLOC
+	bool
+	default n
+
 config DMA_NOOP_OPS
 	bool
 	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -462,7 +462,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
 		 */
 		atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
 		sbq_index_atomic_inc(&sbq->wake_index);
-		wake_up(&ws->wait);
+		wake_up_nr(&ws->wait, wake_batch);
 	}
 }

--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -474,6 +474,133 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);

+#ifdef CONFIG_SGL_ALLOC
+
+/**
+ * sgl_alloc_order - allocate a scatterlist and its pages
+ * @length: Length in bytes of the scatterlist. Must be at least one
+ * @order: Second argument for alloc_pages()
+ * @chainable: Whether or not to allocate an extra element in the scatterlist
+ *	for scatterlist chaining purposes
+ * @gfp: Memory allocation flags
+ * @nent_p: [out] Number of entries in the scatterlist that have pages
+ *
+ * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
+ */
+struct scatterlist *sgl_alloc_order(unsigned long long length,
+				    unsigned int order, bool chainable,
+				    gfp_t gfp, unsigned int *nent_p)
+{
+	struct scatterlist *sgl, *sg;
+	struct page *page;
+	unsigned int nent, nalloc;
+	u32 elem_len;
+
+	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
+	/* Check for integer overflow */
+	if (length > (nent << (PAGE_SHIFT + order)))
+		return NULL;
+	nalloc = nent;
+	if (chainable) {
+		/* Check for integer overflow */
+		if (nalloc + 1 < nalloc)
+			return NULL;
+		nalloc++;
+	}
+	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
+			    (gfp & ~GFP_DMA) | __GFP_ZERO);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, nalloc);
+	sg = sgl;
+	while (length) {
+		elem_len = min_t(u64, length, PAGE_SIZE << order);
+		page = alloc_pages(gfp, order);
+		if (!page) {
+			sgl_free(sgl);
+			return NULL;
+		}
+
+		sg_set_page(sg, page, elem_len, 0);
+		length -= elem_len;
+		sg = sg_next(sg);
+	}
+	WARN_ONCE(length, "length = %lld\n", length);
+	if (nent_p)
+		*nent_p = nent;
+	return sgl;
+}
+EXPORT_SYMBOL(sgl_alloc_order);
+
+/**
+ * sgl_alloc - allocate a scatterlist and its pages
+ * @length: Length in bytes of the scatterlist
+ * @gfp: Memory allocation flags
+ * @nent_p: [out] Number of entries in the scatterlist
+ *
+ * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
+ */
+struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
+			      unsigned int *nent_p)
+{
+	return sgl_alloc_order(length, 0, false, gfp, nent_p);
+}
+EXPORT_SYMBOL(sgl_alloc);
+
+/**
+ * sgl_free_n_order - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ * @nents: Maximum number of elements to free
+ * @order: Second argument for __free_pages()
+ *
+ * Notes:
+ * - If several scatterlists have been chained and each chain element is
+ *   freed separately then it's essential to set nents correctly to avoid that a
+ *   page would get freed twice.
+ * - All pages in a chained scatterlist can be freed at once by setting @nents
+ *   to a high number.
+ */
+void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		if (!sg)
+			break;
+		page = sg_page(sg);
+		if (page)
+			__free_pages(page, order);
+	}
+	kfree(sgl);
+}
+EXPORT_SYMBOL(sgl_free_n_order);
+
+/**
+ * sgl_free_order - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ * @order: Second argument for __free_pages()
+ */
+void sgl_free_order(struct scatterlist *sgl, int order)
+{
+	sgl_free_n_order(sgl, INT_MAX, order);
+}
+EXPORT_SYMBOL(sgl_free_order);
+
+/**
+ * sgl_free - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ */
+void sgl_free(struct scatterlist *sgl)
+{
+	sgl_free_order(sgl, 0);
+}
+EXPORT_SYMBOL(sgl_free);
+
+#endif /* CONFIG_SGL_ALLOC */
+
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
 			  unsigned long pgoffset)