ソースを参照

qcacld-3.0: Fix a race condition in roaming module

A race condition can happen when roaming state machine
transition takes place in the following sequence:

  1) FW indicates 'roam_synch' event to driver, and driver in
     turn, advances its roaming state machine to
     'WLAN_ROAM_SYNCH_IN_PROG', in wlan scheduler context;
  2) HDD layer starts to do 'disconnect'(could be due to NUD
     failure), and thus needs to stop roaming scan offload in
     the following sequence:

     2.1) Post a RSO stop cmd to FW, via the scheduler thread;
     2.2) Advance the roaming state machine to a new state of
          'WLAN_ROAM_RSO_STOPPED';
     2.3) Check if:
          >> the roaming state machine is still in a state of
	     'WLAN_ROAM_SYNCH_IN_PROG'
	  >> the neighbor roaming state is in any of the states
	     below:
	        eCSR_NEIGHBOR_ROAM_STATE_REASSOCIATING;
		eCSR_NEIGHBOR_ROAM_STATE_PREAUTHENTICATING;
		eCSR_NEIGHBOR_ROAM_STATE_PREAUTH_DONE;

	  Here, if any of the conditions becomes true, then
	  HDD will wait for 4 sec to let the roam_synch handler
	  finish its job, otherwise the wait will not happen.
     2.4) Disconnect the current vdev and advance the roaming
          state machine to 'WLAN_ROAM_DEINIT';

In a corner case, race condition can happen in the following
sequence:

 >> thread 1), which executes in wlan scheduler context, starts
    to run first, and thus roaming state machine advances to
    'WLAN_ROAM_SYNCH_IN_PROG', but then gets preempted before
    starting to parse the assoc resp frame attached in the
    roam_synch event;
 >> thread 2) starts to run and posts RSO stop cmd, but the req
    msg cannot be handled right away due to scheduler thread
    is actively running with other tasks, so it gets queued at
    step 2.1);
 >> thread 2) continues to run, and simply advances the roaming
    state machine to 'WLAN_ROAM_RSO_STOPPED' at 2.2), and then
    it will find out none of the conditions listed in 2.3) is
    true, so it choses NOT to wait for 'roam_synch' handler to
    finish;
 >> thread 2) reaches to step 2.4) without waiting at 2.3), and
    roaming state machine advances to 'WLAN_ROAM_DEINIT';
 >> thread 1) continues to run and starts to parse the assoc
    resp frame indicated by FW, and finds that the roaming state
    machine NOT in 'WLAN_ROAM_SYNCH_IN_PROG', and thus goes to
    the wrong way to treat the buffer in a different manner,
    causing a invalid pointer access here.

Fix the racing condition by checking if the roaming state machine
is in 'WLAN_ROAM_SYNCH_IN_PROG' before advancing the same to
'WLAN_ROAM_RSO_STOPPED' in function wlan_hdd_wait_for_roaming().

Change-Id: I202ccb371e9e70a76ef35938c700b60c91b7d3cb
CRs-Fixed: 2761880
wadesong 4 年 前
コミット
65e8c1c219

+ 4 - 0
components/umac/mlme/connection_mgr/core/src/wlan_cm_roam_offload.c

@@ -1104,6 +1104,10 @@ cm_roam_switch_to_roam_sync(struct wlan_objmgr_pdev *pdev,
 					    WLAN_ROAM_SYNCH_IN_PROG);
 			break;
 		}
+		/*
+		 * transition to WLAN_ROAM_SYNCH_IN_PROG not allowed otherwise
+		 * if we're already RSO stopped, fall through to return failure
+		 */
 	case WLAN_ROAM_INIT:
 	case WLAN_ROAM_DEINIT:
 	case WLAN_ROAM_SYNCH_IN_PROG:

+ 10 - 8
core/hdd/src/wlan_hdd_cfg80211.c

@@ -20255,6 +20255,7 @@ static void wlan_hdd_wait_for_roaming(mac_handle_t mac_handle,
 	struct hdd_context *hdd_ctx;
 	unsigned long rc;
 	struct hdd_station_ctx *sta_ctx = WLAN_HDD_GET_STATION_CTX_PTR(adapter);
+	QDF_STATUS status;
 
 	if (adapter->device_mode != QDF_STA_MODE)
 		return;
@@ -20265,18 +20266,16 @@ static void wlan_hdd_wait_for_roaming(mac_handle_t mac_handle,
 	if (sta_ctx->conn_info.conn_state != eConnectionState_Associated)
 		return;
 
-	sme_stop_roaming(mac_handle, adapter->vdev_id,
-			 REASON_DRIVER_DISABLED,
-			 RSO_INVALID_REQUESTOR);
 	/*
 	 * If firmware has already started roaming process, driver
 	 * needs to wait for processing of this disconnect request.
 	 *
 	 */
 	INIT_COMPLETION(adapter->roaming_comp_var);
-	if (hdd_is_roaming_in_progress(hdd_ctx) ||
-	    sme_neighbor_middle_of_roaming(mac_handle,
-					   adapter->vdev_id)) {
+	status = sme_abort_roaming(mac_handle, adapter->vdev_id);
+	if (QDF_STATUS_E_BUSY == status) {
+		hdd_warn("roaming on-going, wait for %d ms before postingg RSO stop cmd",
+			 WLAN_WAIT_TIME_STOP_ROAM);
 		rc = wait_for_completion_timeout(&adapter->roaming_comp_var,
 				msecs_to_jiffies(WLAN_WAIT_TIME_STOP_ROAM));
 		if (!rc) {
@@ -20285,10 +20284,13 @@ static void wlan_hdd_wait_for_roaming(mac_handle_t mac_handle,
 				adapter->vdev_id);
 		}
 
+		sme_stop_roaming(mac_handle, adapter->vdev_id,
+				 REASON_DRIVER_DISABLED, RSO_INVALID_REQUESTOR);
+
 		if (adapter->roam_ho_fail) {
 			INIT_COMPLETION(adapter->disconnect_comp_var);
-			hdd_conn_set_connection_state(adapter,
-					eConnectionState_Disconnecting);
+			hdd_conn_set_connection_state(
+				adapter, eConnectionState_Disconnecting);
 		}
 	}
 }

+ 18 - 0
core/sme/inc/sme_api.h

@@ -995,6 +995,24 @@ QDF_STATUS sme_start_roaming(mac_handle_t mac_handle, uint8_t sessionId,
 			     uint8_t reason,
 			     enum wlan_cm_rso_control_requestor requestor);
 
+/**
+ * sme_abort_roaming() - Try to stop RSO if conditions allowable
+ * This function checks if the current roaming state machine allows RSO stop
+ * cmd to be issued, and stops roaming if allowed, otherwise, indicate to
+ * the caller that a wait is required.
+ *
+ * @mac_handle - The handle returned by mac_open
+ * @session_id - Session identifier
+ *
+ * Returns:
+ * QDF_STATUS_E_BUSY if roam_synch is in progress and upper layer has to wait
+ *                   before RSO stop cmd can be issued;
+ * QDF_STATUS_SUCCESS if roam_synch is not outstanding. RSO stop cmd will be
+ *                    issued with the global SME lock held in this case, and
+ *                    uppler layer doesn't have to do any wait.
+ */
+QDF_STATUS sme_abort_roaming(mac_handle_t mac_handle, uint8_t session_id);
+
 /**
  * sme_set_pcl_for_first_connected_vdev  - Set the vdev pcl for the connected
  * STA vdev

+ 36 - 2
core/sme/src/common/sme_api.c

@@ -6371,11 +6371,11 @@ QDF_STATUS sme_start_roaming(mac_handle_t mac_handle, uint8_t vdev_id,
  * sme_stop_roaming() - Stop roaming for a given sessionId
  *  This is a synchronous call
  *
- * @mac_handle      - The handle returned by mac_open
+ * @mac_handle - The handle returned by mac_open
  * @sessionId - Session Identifier
  *
  * Return QDF_STATUS_SUCCESS on success
- *	   Other status on failure
+ *        Other status on failure
  */
 QDF_STATUS sme_stop_roaming(mac_handle_t mac_handle, uint8_t session_id,
 			    uint8_t reason,
@@ -6392,6 +6392,12 @@ QDF_STATUS sme_stop_roaming(mac_handle_t mac_handle, uint8_t session_id,
 		return QDF_STATUS_E_FAILURE;
 	}
 
+	status = sme_acquire_global_lock(&mac_ctx->sme);
+	if (QDF_IS_STATUS_ERROR(status)) {
+		sme_err("Failed to acquire global SME lock!");
+		return status;
+	}
+
 	if (reason == REASON_DRIVER_DISABLED && requestor)
 		mlme_set_operations_bitmap(mac_ctx->psoc, session_id,
 					   requestor, false);
@@ -6400,9 +6406,37 @@ QDF_STATUS sme_stop_roaming(mac_handle_t mac_handle, uint8_t session_id,
 					    WLAN_ROAM_RSO_STOPPED,
 					    REASON_DRIVER_DISABLED);
 
+	sme_release_global_lock(&mac_ctx->sme);
 	return status;
 }
 
+QDF_STATUS sme_abort_roaming(mac_handle_t mac_handle, uint8_t session_id)
+{
+	struct mac_context *mac_ctx = MAC_CONTEXT(mac_handle);
+	QDF_STATUS status;
+
+	status = sme_acquire_global_lock(&mac_ctx->sme);
+	if (QDF_IS_STATUS_ERROR(status)) {
+		sme_err("Failed to acquire global SME lock!");
+		return status;
+	}
+
+	if (MLME_IS_ROAM_SYNCH_IN_PROGRESS(mac_ctx->psoc, session_id) ||
+	    sme_neighbor_middle_of_roaming(mac_handle, session_id)) {
+		sme_release_global_lock(&mac_ctx->sme);
+		return QDF_STATUS_E_BUSY;
+	}
+
+	/* RSO stop cmd will be issued with global SME lock held to avoid
+	 * any racing conditions with wma/csr layer
+	 */
+	sme_stop_roaming(mac_handle, session_id, REASON_DRIVER_DISABLED,
+			 RSO_INVALID_REQUESTOR);
+
+	sme_release_global_lock(&mac_ctx->sme);
+	return QDF_STATUS_SUCCESS;
+}
+
 /*
  * sme_start_roaming() - Start roaming for a given sessionId
  *  This is a synchronous call

+ 5 - 0
core/sme/src/csr/csr_api_roam.c

@@ -19008,6 +19008,11 @@ csr_roam_switch_to_roam_sync(struct mac_context *mac, uint8_t vdev_id,
 					    WLAN_ROAM_SYNCH_IN_PROG);
 			break;
 		}
+
+		/*
+		 * transition to WLAN_ROAM_SYNCH_IN_PROG not allowed otherwise
+		 * if we're already RSO stopped, fall through to return failure
+		 */
 	case WLAN_ROAM_INIT:
 	case WLAN_ROAM_DEINIT:
 	case WLAN_ROAM_SYNCH_IN_PROG: