ceph: add possible_max_rank and make the code more readable
The m_num_mds here is actually the number for MDSs which are in up:active status, and it will be duplicated to m_num_active_mds, so remove it. Add possible_max_rank to the mdsmap struct and this will be the correctly possible largest rank boundary. Remove the special case for one mds in __mdsmap_get_random_mds(), because the validate mds rank may not always be 0. Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
@@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p)
|
|||||||
seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
|
seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds);
|
||||||
seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
|
seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout);
|
||||||
seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
|
seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose);
|
||||||
for (i = 0; i < mdsmap->m_num_mds; i++) {
|
for (i = 0; i < mdsmap->possible_max_rank; i++) {
|
||||||
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
|
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
|
||||||
int state = mdsmap->m_info[i].state;
|
int state = mdsmap->m_info[i].state;
|
||||||
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
|
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
|
||||||
|
@@ -598,7 +598,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
|
|||||||
{
|
{
|
||||||
struct ceph_mds_session *s;
|
struct ceph_mds_session *s;
|
||||||
|
|
||||||
if (mds >= mdsc->mdsmap->m_num_mds)
|
if (mds >= mdsc->mdsmap->possible_max_rank)
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
|
|
||||||
s = kzalloc(sizeof(*s), GFP_NOFS);
|
s = kzalloc(sizeof(*s), GFP_NOFS);
|
||||||
@@ -1231,7 +1231,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
|
|||||||
struct ceph_mds_session *ts;
|
struct ceph_mds_session *ts;
|
||||||
int i, mds = session->s_mds;
|
int i, mds = session->s_mds;
|
||||||
|
|
||||||
if (mds >= mdsc->mdsmap->m_num_mds)
|
if (mds >= mdsc->mdsmap->possible_max_rank)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
mi = &mdsc->mdsmap->m_info[mds];
|
mi = &mdsc->mdsmap->m_info[mds];
|
||||||
@@ -3785,7 +3785,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
|||||||
dout("check_new_map new %u old %u\n",
|
dout("check_new_map new %u old %u\n",
|
||||||
newmap->m_epoch, oldmap->m_epoch);
|
newmap->m_epoch, oldmap->m_epoch);
|
||||||
|
|
||||||
for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) {
|
for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
|
||||||
if (!mdsc->sessions[i])
|
if (!mdsc->sessions[i])
|
||||||
continue;
|
continue;
|
||||||
s = mdsc->sessions[i];
|
s = mdsc->sessions[i];
|
||||||
@@ -3799,7 +3799,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
|||||||
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
|
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
|
||||||
ceph_session_state_name(s->s_state));
|
ceph_session_state_name(s->s_state));
|
||||||
|
|
||||||
if (i >= newmap->m_num_mds) {
|
if (i >= newmap->possible_max_rank) {
|
||||||
/* force close session for stopped mds */
|
/* force close session for stopped mds */
|
||||||
get_session(s);
|
get_session(s);
|
||||||
__unregister_session(mdsc, s);
|
__unregister_session(mdsc, s);
|
||||||
@@ -3856,7 +3856,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) {
|
for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
|
||||||
s = mdsc->sessions[i];
|
s = mdsc->sessions[i];
|
||||||
if (!s)
|
if (!s)
|
||||||
continue;
|
continue;
|
||||||
|
@@ -14,22 +14,15 @@
|
|||||||
#include "super.h"
|
#include "super.h"
|
||||||
|
|
||||||
#define CEPH_MDS_IS_READY(i, ignore_laggy) \
|
#define CEPH_MDS_IS_READY(i, ignore_laggy) \
|
||||||
(m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy))
|
(m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy)
|
||||||
|
|
||||||
static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
|
static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
|
||||||
{
|
{
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
/*
|
|
||||||
* special case for one mds, no matter it is laggy or
|
|
||||||
* not we have no choice
|
|
||||||
*/
|
|
||||||
if (1 == m->m_num_mds && m->m_info[0].state > 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/* count */
|
/* count */
|
||||||
for (i = 0; i < m->m_num_mds; i++)
|
for (i = 0; i < m->possible_max_rank; i++)
|
||||||
if (CEPH_MDS_IS_READY(i, ignore_laggy))
|
if (CEPH_MDS_IS_READY(i, ignore_laggy))
|
||||||
n++;
|
n++;
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
@@ -37,7 +30,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
|
|||||||
|
|
||||||
/* pick */
|
/* pick */
|
||||||
n = prandom_u32() % n;
|
n = prandom_u32() % n;
|
||||||
for (j = 0, i = 0; i < m->m_num_mds; i++) {
|
for (j = 0, i = 0; i < m->possible_max_rank; i++) {
|
||||||
if (CEPH_MDS_IS_READY(i, ignore_laggy))
|
if (CEPH_MDS_IS_READY(i, ignore_laggy))
|
||||||
j++;
|
j++;
|
||||||
if (j > n)
|
if (j > n)
|
||||||
@@ -55,10 +48,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
|
|||||||
int mds;
|
int mds;
|
||||||
|
|
||||||
mds = __mdsmap_get_random_mds(m, false);
|
mds = __mdsmap_get_random_mds(m, false);
|
||||||
if (mds == m->m_num_mds || mds == -1)
|
if (mds == m->possible_max_rank || mds == -1)
|
||||||
mds = __mdsmap_get_random_mds(m, true);
|
mds = __mdsmap_get_random_mds(m, true);
|
||||||
|
|
||||||
return mds == m->m_num_mds ? -1 : mds;
|
return mds == m->possible_max_rank ? -1 : mds;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define __decode_and_drop_type(p, end, type, bad) \
|
#define __decode_and_drop_type(p, end, type, bad) \
|
||||||
@@ -129,7 +122,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||||||
int err;
|
int err;
|
||||||
u8 mdsmap_v, mdsmap_cv;
|
u8 mdsmap_v, mdsmap_cv;
|
||||||
u16 mdsmap_ev;
|
u16 mdsmap_ev;
|
||||||
u32 possible_max_rank;
|
|
||||||
|
|
||||||
m = kzalloc(sizeof(*m), GFP_NOFS);
|
m = kzalloc(sizeof(*m), GFP_NOFS);
|
||||||
if (!m)
|
if (!m)
|
||||||
@@ -157,24 +149,23 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||||||
m->m_max_mds = ceph_decode_32(p);
|
m->m_max_mds = ceph_decode_32(p);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pick out the active nodes as the m_num_mds, the m_num_mds
|
* pick out the active nodes as the m_num_active_mds, the
|
||||||
* maybe larger than m_max_mds when decreasing the max_mds in
|
* m_num_active_mds maybe larger than m_max_mds when decreasing
|
||||||
* cluster side, in other case it should less than or equal
|
* the max_mds in cluster side, in other case it should less
|
||||||
* to m_max_mds.
|
* than or equal to m_max_mds.
|
||||||
*/
|
*/
|
||||||
m->m_num_mds = n = ceph_decode_32(p);
|
m->m_num_active_mds = n = ceph_decode_32(p);
|
||||||
m->m_num_active_mds = m->m_num_mds;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the possible max rank, it maybe larger than the m->m_num_mds,
|
* the possible max rank, it maybe larger than the m_num_active_mds,
|
||||||
* for example if the mds_max == 2 in the cluster, when the MDS(0)
|
* for example if the mds_max == 2 in the cluster, when the MDS(0)
|
||||||
* was laggy and being replaced by a new MDS, we will temporarily
|
* was laggy and being replaced by a new MDS, we will temporarily
|
||||||
* receive a new mds map with n_num_mds == 1 and the active MDS(1),
|
* receive a new mds map with n_num_mds == 1 and the active MDS(1),
|
||||||
* and the mds rank >= m->m_num_mds.
|
* and the mds rank >= m_num_active_mds.
|
||||||
*/
|
*/
|
||||||
possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds);
|
m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds);
|
||||||
|
|
||||||
m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
|
m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS);
|
||||||
if (!m->m_info)
|
if (!m->m_info)
|
||||||
goto nomem;
|
goto nomem;
|
||||||
|
|
||||||
@@ -248,7 +239,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||||||
ceph_mds_state_name(state),
|
ceph_mds_state_name(state),
|
||||||
laggy ? "(laggy)" : "");
|
laggy ? "(laggy)" : "");
|
||||||
|
|
||||||
if (mds < 0 || mds >= possible_max_rank) {
|
if (mds < 0 || mds >= m->possible_max_rank) {
|
||||||
pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
|
pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -318,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
for (i = 0; i < n; i++) {
|
||||||
s32 mds = ceph_decode_32(p);
|
s32 mds = ceph_decode_32(p);
|
||||||
if (mds >= 0 && mds < m->m_num_mds) {
|
if (mds >= 0 && mds < m->possible_max_rank) {
|
||||||
if (m->m_info[mds].laggy)
|
if (m->m_info[mds].laggy)
|
||||||
num_laggy++;
|
num_laggy++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m->m_num_laggy = num_laggy;
|
m->m_num_laggy = num_laggy;
|
||||||
|
|
||||||
if (n > m->m_num_mds) {
|
if (n > m->possible_max_rank) {
|
||||||
void *new_m_info = krealloc(m->m_info,
|
void *new_m_info = krealloc(m->m_info,
|
||||||
n * sizeof(*m->m_info),
|
n * sizeof(*m->m_info),
|
||||||
GFP_NOFS | __GFP_ZERO);
|
GFP_NOFS | __GFP_ZERO);
|
||||||
@@ -333,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
|
|||||||
goto nomem;
|
goto nomem;
|
||||||
m->m_info = new_m_info;
|
m->m_info = new_m_info;
|
||||||
}
|
}
|
||||||
m->m_num_mds = n;
|
m->possible_max_rank = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* inc */
|
/* inc */
|
||||||
@@ -404,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < m->m_num_mds; i++)
|
for (i = 0; i < m->possible_max_rank; i++)
|
||||||
kfree(m->m_info[i].export_targets);
|
kfree(m->m_info[i].export_targets);
|
||||||
kfree(m->m_info);
|
kfree(m->m_info);
|
||||||
kfree(m->m_data_pg_pools);
|
kfree(m->m_data_pg_pools);
|
||||||
@@ -420,7 +411,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
|
|||||||
return false;
|
return false;
|
||||||
if (m->m_num_laggy == m->m_num_active_mds)
|
if (m->m_num_laggy == m->m_num_active_mds)
|
||||||
return false;
|
return false;
|
||||||
for (i = 0; i < m->m_num_mds; i++) {
|
for (i = 0; i < m->possible_max_rank; i++) {
|
||||||
if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
|
if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
|
||||||
nr_active++;
|
nr_active++;
|
||||||
}
|
}
|
||||||
|
@@ -26,8 +26,8 @@ struct ceph_mdsmap {
|
|||||||
u32 m_session_autoclose; /* seconds */
|
u32 m_session_autoclose; /* seconds */
|
||||||
u64 m_max_file_size;
|
u64 m_max_file_size;
|
||||||
u32 m_max_mds; /* expected up:active mds number */
|
u32 m_max_mds; /* expected up:active mds number */
|
||||||
int m_num_active_mds; /* actual up:active mds number */
|
u32 m_num_active_mds; /* actual up:active mds number */
|
||||||
int m_num_mds; /* size of m_info array */
|
u32 possible_max_rank; /* possible max rank index */
|
||||||
struct ceph_mds_info *m_info;
|
struct ceph_mds_info *m_info;
|
||||||
|
|
||||||
/* which object pools file data can be stored in */
|
/* which object pools file data can be stored in */
|
||||||
@@ -43,7 +43,7 @@ struct ceph_mdsmap {
|
|||||||
static inline struct ceph_entity_addr *
|
static inline struct ceph_entity_addr *
|
||||||
ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
|
ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
|
||||||
{
|
{
|
||||||
if (w >= m->m_num_mds)
|
if (w >= m->possible_max_rank)
|
||||||
return NULL;
|
return NULL;
|
||||||
return &m->m_info[w].addr;
|
return &m->m_info[w].addr;
|
||||||
}
|
}
|
||||||
@@ -51,14 +51,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
|
|||||||
static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
|
static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
|
||||||
{
|
{
|
||||||
BUG_ON(w < 0);
|
BUG_ON(w < 0);
|
||||||
if (w >= m->m_num_mds)
|
if (w >= m->possible_max_rank)
|
||||||
return CEPH_MDS_STATE_DNE;
|
return CEPH_MDS_STATE_DNE;
|
||||||
return m->m_info[w].state;
|
return m->m_info[w].state;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
|
static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
|
||||||
{
|
{
|
||||||
if (w >= 0 && w < m->m_num_mds)
|
if (w >= 0 && w < m->possible_max_rank)
|
||||||
return m->m_info[w].laggy;
|
return m->m_info[w].laggy;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user