libceph: fix messenger retry
In ancient times, the messenger could both initiate and accept connections. An artifact if that was data structures to store/process an incoming ceph_msg_connect request and send an outgoing ceph_msg_connect_reply. Sadly, the negotiation code was referencing those structures and ignoring important information (like the peer's connect_seq) from the correct ones. Among other things, this fixes tight reconnect loops where the server sends RETRY_SESSION and we (the client) retries with the same connect_seq as last time. This bug pretty easily triggered by injecting socket failures on the MDS and running some fs workload like workunits/direct_io/test_sync_io. Signed-off-by: Sage Weil <sage@inktank.com>
This commit is contained in:
@@ -172,16 +172,8 @@ struct ceph_connection {
|
||||
|
||||
/* connection negotiation temps */
|
||||
char in_banner[CEPH_BANNER_MAX_LEN];
|
||||
union {
|
||||
struct { /* outgoing connection */
|
||||
struct ceph_msg_connect out_connect;
|
||||
struct ceph_msg_connect_reply in_reply;
|
||||
};
|
||||
struct { /* incoming */
|
||||
struct ceph_msg_connect in_connect;
|
||||
struct ceph_msg_connect_reply out_reply;
|
||||
};
|
||||
};
|
||||
struct ceph_msg_connect out_connect;
|
||||
struct ceph_msg_connect_reply in_reply;
|
||||
struct ceph_entity_addr actual_peer_addr;
|
||||
|
||||
/* message out temps */
|
||||
|
Reference in New Issue
Block a user