123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674 |
- /* SPDX-License-Identifier: GPL-2.0-or-later */
- /*
- * Definitions for the 'struct ptr_ring' datastructure.
- *
- * Author:
- * Michael S. Tsirkin <[email protected]>
- *
- * Copyright (C) 2016 Red Hat, Inc.
- *
- * This is a limited-size FIFO maintaining pointers in FIFO order, with
- * one CPU producing entries and another consuming entries from a FIFO.
- *
- * This implementation tries to minimize cache-contention when there is a
- * single producer and a single consumer CPU.
- */
- #ifndef _LINUX_PTR_RING_H
- #define _LINUX_PTR_RING_H 1
- #ifdef __KERNEL__
- #include <linux/spinlock.h>
- #include <linux/cache.h>
- #include <linux/types.h>
- #include <linux/compiler.h>
- #include <linux/slab.h>
- #include <linux/mm.h>
- #include <asm/errno.h>
- #endif
- struct ptr_ring {
- int producer ____cacheline_aligned_in_smp;
- spinlock_t producer_lock;
- int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
- int consumer_tail; /* next entry to invalidate */
- spinlock_t consumer_lock;
- /* Shared consumer/producer data */
- /* Read-only by both the producer and the consumer */
- int size ____cacheline_aligned_in_smp; /* max entries in queue */
- int batch; /* number of entries to consume in a batch */
- void **queue;
- };
- /* Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax().
- *
- * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
- * see e.g. ptr_ring_full.
- */
- static inline bool __ptr_ring_full(struct ptr_ring *r)
- {
- return r->queue[r->producer];
- }
- static inline bool ptr_ring_full(struct ptr_ring *r)
- {
- bool ret;
- spin_lock(&r->producer_lock);
- ret = __ptr_ring_full(r);
- spin_unlock(&r->producer_lock);
- return ret;
- }
- static inline bool ptr_ring_full_irq(struct ptr_ring *r)
- {
- bool ret;
- spin_lock_irq(&r->producer_lock);
- ret = __ptr_ring_full(r);
- spin_unlock_irq(&r->producer_lock);
- return ret;
- }
- static inline bool ptr_ring_full_any(struct ptr_ring *r)
- {
- unsigned long flags;
- bool ret;
- spin_lock_irqsave(&r->producer_lock, flags);
- ret = __ptr_ring_full(r);
- spin_unlock_irqrestore(&r->producer_lock, flags);
- return ret;
- }
- static inline bool ptr_ring_full_bh(struct ptr_ring *r)
- {
- bool ret;
- spin_lock_bh(&r->producer_lock);
- ret = __ptr_ring_full(r);
- spin_unlock_bh(&r->producer_lock);
- return ret;
- }
- /* Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax(). Callers must hold producer_lock.
- * Callers are responsible for making sure pointer that is being queued
- * points to a valid data.
- */
- static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
- {
- if (unlikely(!r->size) || r->queue[r->producer])
- return -ENOSPC;
- /* Make sure the pointer we are storing points to a valid data. */
- /* Pairs with the dependency ordering in __ptr_ring_consume. */
- smp_wmb();
- WRITE_ONCE(r->queue[r->producer++], ptr);
- if (unlikely(r->producer >= r->size))
- r->producer = 0;
- return 0;
- }
- /*
- * Note: resize (below) nests producer lock within consumer lock, so if you
- * consume in interrupt or BH context, you must disable interrupts/BH when
- * calling this.
- */
- static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
- {
- int ret;
- spin_lock(&r->producer_lock);
- ret = __ptr_ring_produce(r, ptr);
- spin_unlock(&r->producer_lock);
- return ret;
- }
- static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
- {
- int ret;
- spin_lock_irq(&r->producer_lock);
- ret = __ptr_ring_produce(r, ptr);
- spin_unlock_irq(&r->producer_lock);
- return ret;
- }
- static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
- {
- unsigned long flags;
- int ret;
- spin_lock_irqsave(&r->producer_lock, flags);
- ret = __ptr_ring_produce(r, ptr);
- spin_unlock_irqrestore(&r->producer_lock, flags);
- return ret;
- }
- static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
- {
- int ret;
- spin_lock_bh(&r->producer_lock);
- ret = __ptr_ring_produce(r, ptr);
- spin_unlock_bh(&r->producer_lock);
- return ret;
- }
- static inline void *__ptr_ring_peek(struct ptr_ring *r)
- {
- if (likely(r->size))
- return READ_ONCE(r->queue[r->consumer_head]);
- return NULL;
- }
- /*
- * Test ring empty status without taking any locks.
- *
- * NB: This is only safe to call if ring is never resized.
- *
- * However, if some other CPU consumes ring entries at the same time, the value
- * returned is not guaranteed to be correct.
- *
- * In this case - to avoid incorrectly detecting the ring
- * as empty - the CPU consuming the ring entries is responsible
- * for either consuming all ring entries until the ring is empty,
- * or synchronizing with some other CPU and causing it to
- * re-test __ptr_ring_empty and/or consume the ring enteries
- * after the synchronization point.
- *
- * Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax().
- */
- static inline bool __ptr_ring_empty(struct ptr_ring *r)
- {
- if (likely(r->size))
- return !r->queue[READ_ONCE(r->consumer_head)];
- return true;
- }
- static inline bool ptr_ring_empty(struct ptr_ring *r)
- {
- bool ret;
- spin_lock(&r->consumer_lock);
- ret = __ptr_ring_empty(r);
- spin_unlock(&r->consumer_lock);
- return ret;
- }
- static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
- {
- bool ret;
- spin_lock_irq(&r->consumer_lock);
- ret = __ptr_ring_empty(r);
- spin_unlock_irq(&r->consumer_lock);
- return ret;
- }
- static inline bool ptr_ring_empty_any(struct ptr_ring *r)
- {
- unsigned long flags;
- bool ret;
- spin_lock_irqsave(&r->consumer_lock, flags);
- ret = __ptr_ring_empty(r);
- spin_unlock_irqrestore(&r->consumer_lock, flags);
- return ret;
- }
- static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
- {
- bool ret;
- spin_lock_bh(&r->consumer_lock);
- ret = __ptr_ring_empty(r);
- spin_unlock_bh(&r->consumer_lock);
- return ret;
- }
- /* Must only be called after __ptr_ring_peek returned !NULL */
- static inline void __ptr_ring_discard_one(struct ptr_ring *r)
- {
- /* Fundamentally, what we want to do is update consumer
- * index and zero out the entry so producer can reuse it.
- * Doing it naively at each consume would be as simple as:
- * consumer = r->consumer;
- * r->queue[consumer++] = NULL;
- * if (unlikely(consumer >= r->size))
- * consumer = 0;
- * r->consumer = consumer;
- * but that is suboptimal when the ring is full as producer is writing
- * out new entries in the same cache line. Defer these updates until a
- * batch of entries has been consumed.
- */
- /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
- * to work correctly.
- */
- int consumer_head = r->consumer_head;
- int head = consumer_head++;
- /* Once we have processed enough entries invalidate them in
- * the ring all at once so producer can reuse their space in the ring.
- * We also do this when we reach end of the ring - not mandatory
- * but helps keep the implementation simple.
- */
- if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
- consumer_head >= r->size)) {
- /* Zero out entries in the reverse order: this way we touch the
- * cache line that producer might currently be reading the last;
- * producer won't make progress and touch other cache lines
- * besides the first one until we write out all entries.
- */
- while (likely(head >= r->consumer_tail))
- r->queue[head--] = NULL;
- r->consumer_tail = consumer_head;
- }
- if (unlikely(consumer_head >= r->size)) {
- consumer_head = 0;
- r->consumer_tail = 0;
- }
- /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
- WRITE_ONCE(r->consumer_head, consumer_head);
- }
- static inline void *__ptr_ring_consume(struct ptr_ring *r)
- {
- void *ptr;
- /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
- * accessing data through the pointer is up to date. Pairs
- * with smp_wmb in __ptr_ring_produce.
- */
- ptr = __ptr_ring_peek(r);
- if (ptr)
- __ptr_ring_discard_one(r);
- return ptr;
- }
- static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
- void **array, int n)
- {
- void *ptr;
- int i;
- for (i = 0; i < n; i++) {
- ptr = __ptr_ring_consume(r);
- if (!ptr)
- break;
- array[i] = ptr;
- }
- return i;
- }
- /*
- * Note: resize (below) nests producer lock within consumer lock, so if you
- * call this in interrupt or BH context, you must disable interrupts/BH when
- * producing.
- */
- static inline void *ptr_ring_consume(struct ptr_ring *r)
- {
- void *ptr;
- spin_lock(&r->consumer_lock);
- ptr = __ptr_ring_consume(r);
- spin_unlock(&r->consumer_lock);
- return ptr;
- }
- static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
- {
- void *ptr;
- spin_lock_irq(&r->consumer_lock);
- ptr = __ptr_ring_consume(r);
- spin_unlock_irq(&r->consumer_lock);
- return ptr;
- }
- static inline void *ptr_ring_consume_any(struct ptr_ring *r)
- {
- unsigned long flags;
- void *ptr;
- spin_lock_irqsave(&r->consumer_lock, flags);
- ptr = __ptr_ring_consume(r);
- spin_unlock_irqrestore(&r->consumer_lock, flags);
- return ptr;
- }
- static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
- {
- void *ptr;
- spin_lock_bh(&r->consumer_lock);
- ptr = __ptr_ring_consume(r);
- spin_unlock_bh(&r->consumer_lock);
- return ptr;
- }
- static inline int ptr_ring_consume_batched(struct ptr_ring *r,
- void **array, int n)
- {
- int ret;
- spin_lock(&r->consumer_lock);
- ret = __ptr_ring_consume_batched(r, array, n);
- spin_unlock(&r->consumer_lock);
- return ret;
- }
- static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
- void **array, int n)
- {
- int ret;
- spin_lock_irq(&r->consumer_lock);
- ret = __ptr_ring_consume_batched(r, array, n);
- spin_unlock_irq(&r->consumer_lock);
- return ret;
- }
- static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
- void **array, int n)
- {
- unsigned long flags;
- int ret;
- spin_lock_irqsave(&r->consumer_lock, flags);
- ret = __ptr_ring_consume_batched(r, array, n);
- spin_unlock_irqrestore(&r->consumer_lock, flags);
- return ret;
- }
- static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
- void **array, int n)
- {
- int ret;
- spin_lock_bh(&r->consumer_lock);
- ret = __ptr_ring_consume_batched(r, array, n);
- spin_unlock_bh(&r->consumer_lock);
- return ret;
- }
- /* Cast to structure type and call a function without discarding from FIFO.
- * Function must return a value.
- * Callers must take consumer_lock.
- */
- #define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
- #define PTR_RING_PEEK_CALL(r, f) ({ \
- typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
- \
- spin_lock(&(r)->consumer_lock); \
- __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
- spin_unlock(&(r)->consumer_lock); \
- __PTR_RING_PEEK_CALL_v; \
- })
- #define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
- typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
- \
- spin_lock_irq(&(r)->consumer_lock); \
- __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
- spin_unlock_irq(&(r)->consumer_lock); \
- __PTR_RING_PEEK_CALL_v; \
- })
- #define PTR_RING_PEEK_CALL_BH(r, f) ({ \
- typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
- \
- spin_lock_bh(&(r)->consumer_lock); \
- __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
- spin_unlock_bh(&(r)->consumer_lock); \
- __PTR_RING_PEEK_CALL_v; \
- })
- #define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
- typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
- unsigned long __PTR_RING_PEEK_CALL_f;\
- \
- spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
- __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
- spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
- __PTR_RING_PEEK_CALL_v; \
- })
- /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
- * documentation for vmalloc for which of them are legal.
- */
- static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
- {
- if (size > KMALLOC_MAX_SIZE / sizeof(void *))
- return NULL;
- return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
- }
- static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
- {
- r->size = size;
- r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
- /* We need to set batch at least to 1 to make logic
- * in __ptr_ring_discard_one work correctly.
- * Batching too much (because ring is small) would cause a lot of
- * burstiness. Needs tuning, for now disable batching.
- */
- if (r->batch > r->size / 2 || !r->batch)
- r->batch = 1;
- }
- static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
- {
- r->queue = __ptr_ring_init_queue_alloc(size, gfp);
- if (!r->queue)
- return -ENOMEM;
- __ptr_ring_set_size(r, size);
- r->producer = r->consumer_head = r->consumer_tail = 0;
- spin_lock_init(&r->producer_lock);
- spin_lock_init(&r->consumer_lock);
- return 0;
- }
- /*
- * Return entries into ring. Destroy entries that don't fit.
- *
- * Note: this is expected to be a rare slow path operation.
- *
- * Note: producer lock is nested within consumer lock, so if you
- * resize you must make sure all uses nest correctly.
- * In particular if you consume ring in interrupt or BH context, you must
- * disable interrupts/BH when doing so.
- */
- static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
- void (*destroy)(void *))
- {
- unsigned long flags;
- int head;
- spin_lock_irqsave(&r->consumer_lock, flags);
- spin_lock(&r->producer_lock);
- if (!r->size)
- goto done;
- /*
- * Clean out buffered entries (for simplicity). This way following code
- * can test entries for NULL and if not assume they are valid.
- */
- head = r->consumer_head - 1;
- while (likely(head >= r->consumer_tail))
- r->queue[head--] = NULL;
- r->consumer_tail = r->consumer_head;
- /*
- * Go over entries in batch, start moving head back and copy entries.
- * Stop when we run into previously unconsumed entries.
- */
- while (n) {
- head = r->consumer_head - 1;
- if (head < 0)
- head = r->size - 1;
- if (r->queue[head]) {
- /* This batch entry will have to be destroyed. */
- goto done;
- }
- r->queue[head] = batch[--n];
- r->consumer_tail = head;
- /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
- WRITE_ONCE(r->consumer_head, head);
- }
- done:
- /* Destroy all entries left in the batch. */
- while (n)
- destroy(batch[--n]);
- spin_unlock(&r->producer_lock);
- spin_unlock_irqrestore(&r->consumer_lock, flags);
- }
- static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
- int size, gfp_t gfp,
- void (*destroy)(void *))
- {
- int producer = 0;
- void **old;
- void *ptr;
- while ((ptr = __ptr_ring_consume(r)))
- if (producer < size)
- queue[producer++] = ptr;
- else if (destroy)
- destroy(ptr);
- if (producer >= size)
- producer = 0;
- __ptr_ring_set_size(r, size);
- r->producer = producer;
- r->consumer_head = 0;
- r->consumer_tail = 0;
- old = r->queue;
- r->queue = queue;
- return old;
- }
- /*
- * Note: producer lock is nested within consumer lock, so if you
- * resize you must make sure all uses nest correctly.
- * In particular if you consume ring in interrupt or BH context, you must
- * disable interrupts/BH when doing so.
- */
- static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
- void (*destroy)(void *))
- {
- unsigned long flags;
- void **queue = __ptr_ring_init_queue_alloc(size, gfp);
- void **old;
- if (!queue)
- return -ENOMEM;
- spin_lock_irqsave(&(r)->consumer_lock, flags);
- spin_lock(&(r)->producer_lock);
- old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
- spin_unlock(&(r)->producer_lock);
- spin_unlock_irqrestore(&(r)->consumer_lock, flags);
- kvfree(old);
- return 0;
- }
- /*
- * Note: producer lock is nested within consumer lock, so if you
- * resize you must make sure all uses nest correctly.
- * In particular if you consume ring in interrupt or BH context, you must
- * disable interrupts/BH when doing so.
- */
- static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
- unsigned int nrings,
- int size,
- gfp_t gfp, void (*destroy)(void *))
- {
- unsigned long flags;
- void ***queues;
- int i;
- queues = kmalloc_array(nrings, sizeof(*queues), gfp);
- if (!queues)
- goto noqueues;
- for (i = 0; i < nrings; ++i) {
- queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
- if (!queues[i])
- goto nomem;
- }
- for (i = 0; i < nrings; ++i) {
- spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
- spin_lock(&(rings[i])->producer_lock);
- queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
- size, gfp, destroy);
- spin_unlock(&(rings[i])->producer_lock);
- spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
- }
- for (i = 0; i < nrings; ++i)
- kvfree(queues[i]);
- kfree(queues);
- return 0;
- nomem:
- while (--i >= 0)
- kvfree(queues[i]);
- kfree(queues);
- noqueues:
- return -ENOMEM;
- }
- static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
- {
- void *ptr;
- if (destroy)
- while ((ptr = ptr_ring_consume(r)))
- destroy(ptr);
- kvfree(r->queue);
- }
- #endif /* _LINUX_PTR_RING_H */
|