md-cluster: Improve md_reload_sb to be less error prone

md_reload_sb is too simplistic and it explicitly needs to determine
the changes made by the writing node. However, there are multiple areas
where a simple reload could fail.

Instead, read the superblock of one of the "good" rdevs and update
the necessary information:

- read the superblock into a newly allocated page, by temporarily
  swapping out rdev->sb_page and calling ->load_super.
- if that fails return
- if it succeeds, call check_sb_changes
  1. iterates over list of active devices and checks the matching
   dev_roles[] value.
   	If that is 'faulty', the device must be  marked as faulty
	 - call md_error to mark the device as faulty. Make sure
	   not to set CHANGE_DEVS and wakeup mddev->thread or else
	   it would initiate a resync process, which is the responsibility
	   of the "primary" node.
	 - clear the Blocked bit
	 - Call remove_and_add_spares() to hot remove the device.
	If the device is 'spare':
	 - call remove_and_add_spares() to get the number of spares
	   added in this operation.
	 - Reduce mddev->degraded to mark the array as not degraded.
  2. reset recovery_cp
- read the rest of the rdevs to update recovery_offset. If recovery_offset
  is equal to MaxSector, call spare_active() to set it In_sync

This required that recovery_offset be initialized to MaxSector, as
opposed to zero so as to communicate the end of sync for a rdev.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
This commit is contained in:
Goldwyn Rodrigues
2015-08-21 10:33:39 -05:00
والد 2910ff17d1
کامیت 70bcecdb15
4فایلهای تغییر یافته به همراه134 افزوده شده و 27 حذف شده

مشاهده پرونده

@@ -427,8 +427,7 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
md_reload_sb(mddev);
md_reload_sb(mddev, le32_to_cpu(msg->raid_slot));
dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
}
@@ -834,11 +833,23 @@ static int metadata_update_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg;
int ret;
struct md_rdev *rdev;
int ret = 0;
memset(&cmsg, 0, sizeof(cmsg));
cmsg.type = cpu_to_le32(METADATA_UPDATED);
ret = __sendmsg(cinfo, &cmsg);
cmsg.raid_slot = -1;
/* Pick up a good active device number to send.
*/
rdev_for_each(rdev, mddev)
if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
break;
}
if (cmsg.raid_slot >= 0)
ret = __sendmsg(cinfo, &cmsg);
else
pr_warn("md-cluster: No good device id found to send\n");
unlock_comm(cinfo);
return ret;
}
@@ -922,15 +933,9 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
static int add_new_disk_finish(struct mddev *mddev)
{
struct cluster_msg cmsg;
struct md_cluster_info *cinfo = mddev->cluster_info;
int ret;
/* Write sb and inform others */
md_update_sb(mddev, 1);
cmsg.type = METADATA_UPDATED;
ret = __sendmsg(cinfo, &cmsg);
unlock_comm(cinfo);
return ret;
return metadata_update_finish(mddev);
}
static int new_disk_ack(struct mddev *mddev, bool ack)