libceph: multiple workspaces for CRUSH computations

Replace a global map->crush_workspace (protected by a global mutex)
with a list of workspaces, up to the number of CPUs + 1.

This is based on a patch from Robin Geuze <robing@nl.team.blue>.
Robin and his team have observed a 10-20% increase in IOPS on all
queue depths and lower CPU usage as well on a high-end all-NVMe
100GbE cluster.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Ilya Dryomov
2020-08-17 13:45:04 +02:00
parent 1c30c90733
commit 3986f9a42e
3 changed files with 166 additions and 17 deletions

View File

@@ -137,6 +137,17 @@ int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
const char *fmt, ...);
void ceph_oid_destroy(struct ceph_object_id *oid);
struct workspace_manager {
struct list_head idle_ws;
spinlock_t ws_lock;
/* Number of free workspaces */
int free_ws;
/* Total number of allocated workspaces */
atomic_t total_ws;
/* Waiters for a free workspace */
wait_queue_head_t ws_wait;
};
struct ceph_pg_mapping {
struct rb_node node;
struct ceph_pg pgid;
@@ -184,8 +195,7 @@ struct ceph_osdmap {
* the list of osds that store+replicate them. */
struct crush_map *crush;
struct mutex crush_workspace_mutex;
void *crush_workspace;
struct workspace_manager crush_wsm;
};
static inline bool ceph_osd_exists(struct ceph_osdmap *map, int osd)