xdp: tracking page_pool resources and safe removal
This patch is needed before we can allow drivers to use page_pool for DMA-mappings. Today with page_pool and XDP return API, it is possible to remove the page_pool object (from rhashtable), while there are still in-flight packet-pages. This is safely handled via RCU and failed lookups in __xdp_return() fallback to call put_page(), when page_pool object is gone. In-case page is still DMA mapped, this will result in page note getting correctly DMA unmapped. To solve this, the page_pool is extended with tracking in-flight pages. And XDP disconnect system queries page_pool and waits, via workqueue, for all in-flight pages to be returned. To avoid killing performance when tracking in-flight pages, the implement use two (unsigned) counters, that in placed on different cache-lines, and can be used to deduct in-flight packets. This is done by mapping the unsigned "sequence" counters onto signed Two's complement arithmetic operations. This is e.g. used by kernel's time_after macros, described in kernel commit1ba3aab303
and5a581b367b
, and also explained in RFC1982. The trick is these two incrementing counters only need to be read and compared, when checking if it's safe to free the page_pool structure. Which will only happen when driver have disconnected RX/alloc side. Thus, on a non-fast-path. It is chosen that page_pool tracking is also enabled for the non-DMA use-case, as this can be used for statistics later. After this patch, using page_pool requires more strict resource "release", e.g. via page_pool_release_page() that was introduced in this patchset, and previous patches implement/fix this more strict requirement. Drivers no-longer call page_pool_destroy(). Drivers already call xdp_rxq_info_unreg() which call xdp_rxq_info_unreg_mem_model(), which will attempt to disconnect the mem id, and if attempt fails schedule the disconnect for later via delayed workqueue. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:

committed by
David S. Miller

parent
29b006a676
commit
99c07c43c4
@@ -38,6 +38,7 @@ struct xdp_mem_allocator {
|
||||
};
|
||||
struct rhash_head node;
|
||||
struct rcu_head rcu;
|
||||
struct delayed_work defer_wq;
|
||||
};
|
||||
|
||||
static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
|
||||
@@ -79,13 +80,13 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
|
||||
|
||||
xa = container_of(rcu, struct xdp_mem_allocator, rcu);
|
||||
|
||||
/* Allocator have indicated safe to remove before this is called */
|
||||
if (xa->mem.type == MEM_TYPE_PAGE_POOL)
|
||||
page_pool_free(xa->page_pool);
|
||||
|
||||
/* Allow this ID to be reused */
|
||||
ida_simple_remove(&mem_id_pool, xa->mem.id);
|
||||
|
||||
/* Notice, driver is expected to free the *allocator,
|
||||
* e.g. page_pool, and MUST also use RCU free.
|
||||
*/
|
||||
|
||||
/* Poison memory */
|
||||
xa->mem.id = 0xFFFF;
|
||||
xa->mem.type = 0xF0F0;
|
||||
@@ -94,6 +95,46 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
|
||||
kfree(xa);
|
||||
}
|
||||
|
||||
bool __mem_id_disconnect(int id)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
bool safe_to_remove = true;
|
||||
|
||||
mutex_lock(&mem_id_lock);
|
||||
|
||||
xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
|
||||
if (!xa) {
|
||||
mutex_unlock(&mem_id_lock);
|
||||
WARN(1, "Request remove non-existing id(%d), driver bug?", id);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Detects in-flight packet-pages for page_pool */
|
||||
if (xa->mem.type == MEM_TYPE_PAGE_POOL)
|
||||
safe_to_remove = page_pool_request_shutdown(xa->page_pool);
|
||||
|
||||
if (safe_to_remove &&
|
||||
!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
|
||||
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
|
||||
|
||||
mutex_unlock(&mem_id_lock);
|
||||
return safe_to_remove;
|
||||
}
|
||||
|
||||
#define DEFER_TIME (msecs_to_jiffies(1000))
|
||||
|
||||
static void mem_id_disconnect_defer_retry(struct work_struct *wq)
|
||||
{
|
||||
struct delayed_work *dwq = to_delayed_work(wq);
|
||||
struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
|
||||
|
||||
if (__mem_id_disconnect(xa->mem.id))
|
||||
return;
|
||||
|
||||
/* Still not ready to be disconnected, retry later */
|
||||
schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
|
||||
}
|
||||
|
||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
@@ -112,16 +153,28 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
if (id == 0)
|
||||
return;
|
||||
|
||||
if (__mem_id_disconnect(id))
|
||||
return;
|
||||
|
||||
/* Could not disconnect, defer new disconnect attempt to later */
|
||||
mutex_lock(&mem_id_lock);
|
||||
|
||||
xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
|
||||
if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
|
||||
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
|
||||
if (!xa) {
|
||||
mutex_unlock(&mem_id_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
|
||||
mutex_unlock(&mem_id_lock);
|
||||
schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
|
||||
|
||||
/* This unregister operation will also cleanup and destroy the
|
||||
* allocator. The page_pool_free() operation is first called when it's
|
||||
* safe to remove, possibly deferred to a workqueue.
|
||||
*/
|
||||
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
||||
{
|
||||
/* Simplify driver cleanup code paths, allow unreg "unused" */
|
||||
|
Reference in New Issue
Block a user