sgi-xpc: prevent false heartbeat failures

The heartbeat timeout functionality in sgi-xpc is currently not trained to
the connection time.  If a connection is made and the code is in the last
polling window prior to doing a timeout, the next polling window will see
the heartbeat as unchanged and initiate a no-heartbeat disconnect.

Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Robin Holt
2009-04-13 14:40:18 -07:00
committed by Linus Torvalds
parent a06bba4643
commit a374c57b07
4 changed files with 124 additions and 153 deletions

View File

@@ -90,18 +90,21 @@ struct xpc_rsvd_page {
short max_npartitions; /* value of XPC_MAX_PARTITIONS */
u8 version;
u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */
union {
unsigned long vars_pa; /* phys address of struct xpc_vars */
unsigned long activate_gru_mq_desc_gpa; /* phys addr of */
/* activate mq's */
/* gru mq descriptor */
} sn;
unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */
union {
struct {
unsigned long vars_pa; /* phys addr */
} sn2;
struct {
unsigned long heartbeat_gpa; /* phys addr */
unsigned long activate_gru_mq_desc_gpa; /* phys addr */
} uv;
} sn;
u64 pad2[9]; /* align to last u64 in 2nd 64-byte cacheline */
u64 SAL_nasids_size; /* SAL: size of each nasid mask in bytes */
};
#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
/*
* Define the structures by which XPC variables can be exported to other
@@ -182,6 +185,17 @@ struct xpc_vars_part_sn2 {
(XPC_RP_MACH_NASIDS(_rp) + \
xpc_nasid_mask_nlongs))
/*
* The following structure describes the partition's heartbeat info which
* will be periodically read by other partitions to determine whether this
* XPC is still 'alive'.
*/
struct xpc_heartbeat_uv {
unsigned long value;
unsigned long offline; /* if 0, heartbeat should be changing */
};
/*
* Info pertinent to a GRU message queue using a watch list for irq generation.
*/
@@ -198,7 +212,7 @@ struct xpc_gru_mq_uv {
/*
* The activate_mq is used to send/receive GRU messages that affect XPC's
* heartbeat, partition active state, and channel state. This is UV only.
* partition active state and channel state. This is uv only.
*/
struct xpc_activate_mq_msghdr_uv {
unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
@@ -210,33 +224,26 @@ struct xpc_activate_mq_msghdr_uv {
/* activate_mq defined message types */
#define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV 0
#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV 1
#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV 2
#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV 3
#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 4
#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 5
#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV 1
#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV 2
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 6
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 7
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 8
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 9
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV 3
#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV 4
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV 5
#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV 6
#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 10
#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 11
#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV 7
#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV 8
struct xpc_activate_mq_msg_uv {
struct xpc_activate_mq_msghdr_uv hdr;
};
struct xpc_activate_mq_msg_heartbeat_req_uv {
struct xpc_activate_mq_msghdr_uv hdr;
u64 heartbeat;
};
struct xpc_activate_mq_msg_activate_req_uv {
struct xpc_activate_mq_msghdr_uv hdr;
unsigned long rp_gpa;
unsigned long heartbeat_gpa;
unsigned long activate_gru_mq_desc_gpa;
};
@@ -687,6 +694,9 @@ struct xpc_partition_sn2 {
};
struct xpc_partition_uv {
unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
/* partition's heartbeat */
unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */
/* activate mq's gru mq */
/* descriptor */
@@ -698,14 +708,12 @@ struct xpc_partition_uv {
u8 remote_act_state; /* remote partition's act_state */
u8 act_state_req; /* act_state request from remote partition */
enum xp_retval reason; /* reason for deactivate act_state request */
u64 heartbeat; /* incremented by remote partition */
};
/* struct xpc_partition_uv flags */
#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001
#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000001
#define XPC_P_ENGAGED_UV 0x00000002
#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000004
/* struct xpc_partition_uv act_state change requests */
@@ -807,7 +815,6 @@ extern int xpc_disengage_timedout;
extern int xpc_activate_IRQ_rcvd;
extern spinlock_t xpc_activate_IRQ_rcvd_lock;
extern wait_queue_head_t xpc_activate_IRQ_wq;
extern void *xpc_heartbeating_to_mask;
extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
extern void xpc_activate_partition(struct xpc_partition *);
extern void xpc_activate_kthreads(struct xpc_channel *, int);
@@ -825,6 +832,9 @@ extern void (*xpc_increment_heartbeat) (void);
extern void (*xpc_offline_heartbeat) (void);
extern void (*xpc_online_heartbeat) (void);
extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
extern void (*xpc_allow_hb) (short);
extern void (*xpc_disallow_hb) (short);
extern void (*xpc_disallow_all_hbs) (void);
extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
@@ -909,40 +919,6 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
static inline int
xpc_hb_allowed(short partid, void *heartbeating_to_mask)
{
return test_bit(partid, heartbeating_to_mask);
}
static inline int
xpc_any_hbs_allowed(void)
{
DBUG_ON(xpc_heartbeating_to_mask == NULL);
return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
}
static inline void
xpc_allow_hb(short partid)
{
DBUG_ON(xpc_heartbeating_to_mask == NULL);
set_bit(partid, xpc_heartbeating_to_mask);
}
static inline void
xpc_disallow_hb(short partid)
{
DBUG_ON(xpc_heartbeating_to_mask == NULL);
clear_bit(partid, xpc_heartbeating_to_mask);
}
static inline void
xpc_disallow_all_hbs(void)
{
DBUG_ON(xpc_heartbeating_to_mask == NULL);
bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
}
static inline void
xpc_wakeup_channel_mgr(struct xpc_partition *part)
{