ocfs2/dlm: do not purge lockres that is queued for assert master
When workqueue is delayed, it may occur that a lockres is purged while it is still queued for master assert. it may trigger BUG() as follows. N1 N2 dlm_get_lockres() ->dlm_do_master_requery is the master of lockres, so queue assert_master work dlm_thread() start running and purge the lockres dlm_assert_master_worker() send assert master message to other nodes receiving the assert_master message, set master to N2 dlmlock_remote() send create_lock message to N2, but receive DLM_IVLOCKID, if it is RECOVERY lockres, it triggers the BUG(). Another BUG() is triggered when N3 become the new master and send assert_master to N1, N1 will trigger the BUG() because owner doesn't match. So we should not purge lockres when it is queued for assert master. Signed-off-by: joyce.xue <xuejiufei@huawei.com> Reviewed-by: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:

committed by
Linus Torvalds

parent
b9aaac5a6b
commit
ac4fef4d23
@@ -331,6 +331,7 @@ struct dlm_lock_resource
|
||||
u16 state;
|
||||
char lvb[DLM_LVB_LEN];
|
||||
unsigned int inflight_locks;
|
||||
unsigned int inflight_assert_workers;
|
||||
unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
||||
};
|
||||
|
||||
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
|
||||
void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
|
||||
void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
|
||||
struct dlm_lock_resource *res);
|
||||
|
||||
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
|
||||
|
Reference in New Issue
Block a user