sgi-xpc: prevent false heartbeat failures
The heartbeat timeout functionality in sgi-xpc is currently not trained to the connection time. If a connection is made and the code is in the last polling window prior to doing a timeout, the next polling window will see the heartbeat as unchanged and initiate a no-heartbeat disconnect. Signed-off-by: Robin Holt <holt@sgi.com> Signed-off-by: Dean Nelson <dcn@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
a06bba4643
commit
a374c57b07
@@ -90,18 +90,21 @@ struct xpc_rsvd_page {
|
||||
short max_npartitions; /* value of XPC_MAX_PARTITIONS */
|
||||
u8 version;
|
||||
u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */
|
||||
union {
|
||||
unsigned long vars_pa; /* phys address of struct xpc_vars */
|
||||
unsigned long activate_gru_mq_desc_gpa; /* phys addr of */
|
||||
/* activate mq's */
|
||||
/* gru mq descriptor */
|
||||
} sn;
|
||||
unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
|
||||
u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */
|
||||
union {
|
||||
struct {
|
||||
unsigned long vars_pa; /* phys addr */
|
||||
} sn2;
|
||||
struct {
|
||||
unsigned long heartbeat_gpa; /* phys addr */
|
||||
unsigned long activate_gru_mq_desc_gpa; /* phys addr */
|
||||
} uv;
|
||||
} sn;
|
||||
u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */
|
||||
u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */
|
||||
};
|
||||
|
||||
#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
|
||||
#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
|
||||
|
||||
/*
|
||||
* Define the structures by which XPC variables can be exported to other
|
||||
@@ -182,6 +185,17 @@ struct xpc_vars_part_sn2 {
|
||||
(XPC_RP_MACH_NASIDS(_rp) + \
|
||||
xpc_nasid_mask_nlongs))
|
||||
|
||||
|
||||
/*
|
||||
* The following structure describes the partition's heartbeat info which
|
||||
* will be periodically read by other partitions to determine whether this
|
||||
* XPC is still 'alive'.
|
||||
*/
|
||||
struct xpc_heartbeat_uv {
|
||||
unsigned long value;
|
||||
unsigned long offline; /* if 0, heartbeat should be changing */
|
||||
};
|
||||
|
||||
/*
|
||||
* Info pertinent to a GRU message queue using a watch list for irq generation.
|
||||
*/
|
||||
@@ -198,7 +212,7 @@ struct xpc_gru_mq_uv {
|
||||
|
||||
/*
|
||||
* The activate_mq is used to send/receive GRU messages that affect XPC's
|
||||
* heartbeat, partition active state, and channel state. This is UV only.
|
||||
* partition active state and channel state. This is uv only.
|
||||
*/
|
||||
struct xpc_activate_mq_msghdr_uv {
|
||||
unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
|
||||
@@ -210,33 +224,26 @@ struct xpc_activate_mq_msghdr_uv {
|
||||
|
||||
/* activate_mq defined message types */
|
||||
#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0
|
||||
#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1
|
||||
#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2
|
||||
#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3
|
||||
|
||||
#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4
|
||||
#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5
|
||||
#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1
|
||||
#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2
|
||||
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5
|
||||
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6
|
||||
|
||||
#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10
|
||||
#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11
|
||||
#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 7
|
||||
#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 8
|
||||
|
||||
struct xpc_activate_mq_msg_uv {
|
||||
struct xpc_activate_mq_msghdr_uv hdr;
|
||||
};
|
||||
|
||||
struct xpc_activate_mq_msg_heartbeat_req_uv {
|
||||
struct xpc_activate_mq_msghdr_uv hdr;
|
||||
u64 heartbeat;
|
||||
};
|
||||
|
||||
struct xpc_activate_mq_msg_activate_req_uv {
|
||||
struct xpc_activate_mq_msghdr_uv hdr;
|
||||
unsigned long rp_gpa;
|
||||
unsigned long heartbeat_gpa;
|
||||
unsigned long activate_gru_mq_desc_gpa;
|
||||
};
|
||||
|
||||
@@ -687,6 +694,9 @@ struct xpc_partition_sn2 {
|
||||
};
|
||||
|
||||
struct xpc_partition_uv {
|
||||
unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
|
||||
struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
|
||||
/* partition's heartbeat */
|
||||
unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */
|
||||
/* activate mq's gru mq */
|
||||
/* descriptor */
|
||||
@@ -698,14 +708,12 @@ struct xpc_partition_uv {
|
||||
u8 remote_act_state; /* remote partition's act_state */
|
||||
u8 act_state_req; /* act_state request from remote partition */
|
||||
enum xp_retval reason; /* reason for deactivate act_state request */
|
||||
u64 heartbeat; /* incremented by remote partition */
|
||||
};
|
||||
|
||||
/* struct xpc_partition_uv flags */
|
||||
|
||||
#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001
|
||||
#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001
|
||||
#define XPC_P_ENGAGED_UV 0x00000002
|
||||
#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000004
|
||||
|
||||
/* struct xpc_partition_uv act_state change requests */
|
||||
|
||||
@@ -807,7 +815,6 @@ extern int xpc_disengage_timedout;
|
||||
extern int xpc_activate_IRQ_rcvd;
|
||||
extern spinlock_t xpc_activate_IRQ_rcvd_lock;
|
||||
extern wait_queue_head_t xpc_activate_IRQ_wq;
|
||||
extern void *xpc_heartbeating_to_mask;
|
||||
extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
|
||||
extern void xpc_activate_partition(struct xpc_partition *);
|
||||
extern void xpc_activate_kthreads(struct xpc_channel *, int);
|
||||
@@ -825,6 +832,9 @@ extern void (*xpc_increment_heartbeat) (void);
|
||||
extern void (*xpc_offline_heartbeat) (void);
|
||||
extern void (*xpc_online_heartbeat) (void);
|
||||
extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
|
||||
extern void (*xpc_allow_hb) (short);
|
||||
extern void (*xpc_disallow_hb) (short);
|
||||
extern void (*xpc_disallow_all_hbs) (void);
|
||||
extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
|
||||
extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
|
||||
extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
|
||||
@@ -909,40 +919,6 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
|
||||
extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
|
||||
extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
|
||||
|
||||
static inline int
|
||||
xpc_hb_allowed(short partid, void *heartbeating_to_mask)
|
||||
{
|
||||
return test_bit(partid, heartbeating_to_mask);
|
||||
}
|
||||
|
||||
static inline int
|
||||
xpc_any_hbs_allowed(void)
|
||||
{
|
||||
DBUG_ON(xpc_heartbeating_to_mask == NULL);
|
||||
return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xpc_allow_hb(short partid)
|
||||
{
|
||||
DBUG_ON(xpc_heartbeating_to_mask == NULL);
|
||||
set_bit(partid, xpc_heartbeating_to_mask);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xpc_disallow_hb(short partid)
|
||||
{
|
||||
DBUG_ON(xpc_heartbeating_to_mask == NULL);
|
||||
clear_bit(partid, xpc_heartbeating_to_mask);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xpc_disallow_all_hbs(void)
|
||||
{
|
||||
DBUG_ON(xpc_heartbeating_to_mask == NULL);
|
||||
bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
|
||||
}
|
||||
|
||||
static inline void
|
||||
xpc_wakeup_channel_mgr(struct xpc_partition *part)
|
||||
{
|
||||
|
Reference in New Issue
Block a user