[Intel-wired-lan] [net-next PATCH 11/11] ice: convert VF storage to hash table with krefs and RCU

Keller, Jacob E jacob.e.keller at intel.com
Wed Feb 16 01:43:04 UTC 2022



> -----Original Message-----
> From: Keller, Jacob E <jacob.e.keller at intel.com>
> Sent: Tuesday, February 15, 2022 5:31 PM
> To: Intel Wired LAN <intel-wired-lan at lists.osuosl.org>
> Cc: Keller, Jacob E <jacob.e.keller at intel.com>
> Subject: [net-next PATCH 11/11] ice: convert VF storage to hash table with krefs
> and RCU
> 

Oops. Rather embarassing, I forgot an ice_put_vf() in ice_vc_process_vf_msg, which results in all of the VF structures being leaked. I'm carefully reviewing the remaining flows to check if I missed any other ones. (Thanks CONFIG_KMEMLEAK!)

I'll send a v2 with the fixed version soon.

Thanks,
Jake

> The ice driver stores VF structures in a simple array which is allocated
> once at the time of VF creation. The VF structures are then accessed
> from the array by their VF ID. The ID must be between 0 and the number
> of allocated VFs.
> 
> Multiple threads can access this table:
> 
>  * .ndo operations such as .ndo_get_vf_cfg or .ndo_set_vf_trust
>  * interrupts, such as due to messages from the VF using the virtchnl
>    communication
>  * processing such as device reset
>  * commands to add or remove VFs
> 
> The current implementation does not keep track of when all threads are
> done operating on a VF and can potentially result in use-after-free
> issues caused by one thread accessing a VF structure after it has been
> released when removing VFs. Some of these are prevented with various
> state flats and checks.
> 
> In addition, this structure is quite static and does not support a
> planned future where virtualization can be more dynamic. As we begin to
> look at supporting Scalable IOV with the ice driver (as opposed to just
> supporting Single Root IOV), this structure is not sufficient.
> 
> In the future, VFs will be able to be added and removed individually and
> dynamically.
> 
> To allow for this, and to better protect against a whole class of
> use-after-free bugs, replace the VF storage with a combination of a hash
> table and krefs to reference track all of the accesses to VFs through
> the hash table.
> 
> A hash table still allows efficient look up of the VF given its ID, but
> also allows adding and removing VFs. It does not require contiguous VF
> IDs.
> 
> The use of krefs allows the cleanup of the VF memory to be delayed until
> after all threads have released their reference (by calling ice_put_vf).
> 
> To prevent corruption of the hash table, a combination of RCU and the
> mutex table_lock are used. Addition and removal from the hash table use
> the RCU-aware hash macros. This allows simple read-only look ups that
> iterate to locate a single VF can be fast using RCU. Accesses which
> modify the hash table, or which can't take RCU because they sleep, will
> hold the mutex lock.
> 
> By using this design, we have a stronger guarantee that the VF structure
> can't be released until after all threads are finished operating on it.
> We also pave the way for the more dynamic Scalable IOV implementation in
> the future.
> 
> Signed-off-by: Jacob Keller <jacob.e.keller at intel.com>
> ---
>  drivers/net/ethernet/intel/ice/ice_eswitch.c  |  16 +
>  drivers/net/ethernet/intel/ice/ice_ethtool.c  |  13 +-
>  drivers/net/ethernet/intel/ice/ice_lib.c      |  21 +-
>  drivers/net/ethernet/intel/ice/ice_main.c     |   8 +
>  drivers/net/ethernet/intel/ice/ice_repr.c     |   4 +
>  .../ethernet/intel/ice/ice_virtchnl_fdir.c    |   2 +
>  .../net/ethernet/intel/ice/ice_virtchnl_pf.c  | 377 +++++++++++++-----
>  .../net/ethernet/intel/ice/ice_virtchnl_pf.h  |  45 ++-
>  8 files changed, 362 insertions(+), 124 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c
> b/drivers/net/ethernet/intel/ice/ice_eswitch.c
> index c27013afcadb..9a84d746a6c4 100644
> --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
> +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
> @@ -209,6 +209,8 @@ static void ice_eswitch_remap_rings_to_vectors(struct
> ice_pf *pf)
>  		rx_ring->q_vector = q_vector;
>  		rx_ring->next = NULL;
>  		rx_ring->netdev = repr->netdev;
> +
> +		ice_put_vf(vf);
>  	}
>  }
> 
> @@ -223,6 +225,8 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi
> *ctrl_vsi)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf) {
>  		struct ice_vsi *vsi = vf->repr->src_vsi;
> 
> @@ -251,6 +255,8 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf) {
>  		struct ice_vsi *vsi = vf->repr->src_vsi;
> 
> @@ -430,6 +436,8 @@ static void ice_eswitch_napi_del(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf)
>  		netif_napi_del(&vf->repr->q_vector->napi);
>  }
> @@ -443,6 +451,8 @@ static void ice_eswitch_napi_enable(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf)
>  		napi_enable(&vf->repr->q_vector->napi);
>  }
> @@ -456,6 +466,8 @@ static void ice_eswitch_napi_disable(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf)
>  		napi_disable(&vf->repr->q_vector->napi);
>  }
> @@ -629,6 +641,8 @@ static void ice_eswitch_start_all_tx_queues(struct ice_pf
> *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	if (test_bit(ICE_DOWN, pf->state))
>  		return;
> 
> @@ -647,6 +661,8 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	if (test_bit(ICE_DOWN, pf->state))
>  		return;
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> index 83680eaa3565..399625892f9e 100644
> --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
> +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
> @@ -316,15 +316,20 @@ ice_get_eeprom(struct net_device *netdev, struct
> ethtool_eeprom *eeprom,
>   */
>  static bool ice_active_vfs(struct ice_pf *pf)
>  {
> +	bool active = false;
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> -	ice_for_each_vf(pf, bkt, vf) {
> -		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
> -			return true;
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf) {
> +		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
> +			active = true;
> +			break;
> +		}
>  	}
> +	rcu_read_unlock();
> 
> -	return false;
> +	return active;
>  }
> 
>  /**
> diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c
> b/drivers/net/ethernet/intel/ice/ice_lib.c
> index 94ba40a34402..a04869380c1b 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> @@ -439,8 +439,10 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int
> __always_unused irq, void *d
>  	if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
>  		return IRQ_HANDLED;
> 
> -	ice_for_each_vf(pf, bkt, vf)
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf)
>  		napi_schedule(&vf->repr->q_vector->napi);
> +	rcu_read_unlock();
> 
>  	return IRQ_HANDLED;
>  }
> @@ -1345,11 +1347,17 @@ static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct
> ice_vsi *vsi)
>  {
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> +	int base;
> 
> -	ice_for_each_vf(pf, bkt, vf) {
> -		if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI)
> -			return pf->vsi[vf->ctrl_vsi_idx]->base_vector;
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf) {
> +		if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
> +			base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
> +			rcu_read_unlock();
> +			return base;
> +		}
>  	}
> +	rcu_read_unlock();
> 
>  	return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors,
>  			   ICE_RES_VF_CTRL_VEC_ID);
> @@ -2894,11 +2902,14 @@ static void ice_free_vf_ctrl_res(struct ice_pf *pf,
> struct ice_vsi *vsi)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> -	ice_for_each_vf(pf, bkt, vf) {
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf) {
>  		if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
> +			rcu_read_unlock();
>  			return;
>  		}
>  	}
> +	rcu_read_unlock();
> 
>  	/* No other VFs left that have control VSI. It is now safe to reclaim
>  	 * SW interrupts back to the common pool.
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c
> b/drivers/net/ethernet/intel/ice/ice_main.c
> index 418277550633..a8d4a772b725 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -521,8 +521,10 @@ ice_prepare_for_reset(struct ice_pf *pf, enum
> ice_reset_req reset_type)
>  		ice_vc_notify_reset(pf);
> 
>  	/* Disable VFs until reset is completed */
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf)
>  		ice_set_vf_state_qs_dis(vf);
> +	mutex_unlock(&pf->vfs.table_lock);
> 
>  	if (ice_is_eswitch_mode_switchdev(pf)) {
>  		if (reset_type != ICE_RESET_PFR)
> @@ -1756,6 +1758,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
>  	/* Check to see if one of the VFs caused an MDD event, and then
>  	 * increment counters and set print pending
>  	 */
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf) {
>  		reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
>  		if (reg & VP_MDET_TX_PQM_VALID_M) {
> @@ -1811,6 +1814,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
>  			}
>  		}
>  	}
> +	mutex_unlock(&pf->vfs.table_lock);
> 
>  	ice_print_vfs_mdd_events(pf);
>  }
> @@ -3680,6 +3684,7 @@ static void ice_deinit_pf(struct ice_pf *pf)
>  	mutex_destroy(&pf->sw_mutex);
>  	mutex_destroy(&pf->tc_mutex);
>  	mutex_destroy(&pf->avail_q_mutex);
> +	mutex_destroy(&pf->vfs.table_lock);
> 
>  	if (pf->avail_txqs) {
>  		bitmap_free(pf->avail_txqs);
> @@ -3779,6 +3784,9 @@ static int ice_init_pf(struct ice_pf *pf)
>  		return -ENOMEM;
>  	}
> 
> +	mutex_init(&pf->vfs.table_lock);
> +	hash_init(pf->vfs.table);
> +
>  	return 0;
>  }
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c
> b/drivers/net/ethernet/intel/ice/ice_repr.c
> index ffd428719863..f8db3ca521da 100644
> --- a/drivers/net/ethernet/intel/ice/ice_repr.c
> +++ b/drivers/net/ethernet/intel/ice/ice_repr.c
> @@ -396,6 +396,8 @@ void ice_repr_rem_from_all_vfs(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf)
>  		ice_repr_rem(vf);
>  }
> @@ -410,6 +412,8 @@ int ice_repr_add_for_all_vfs(struct ice_pf *pf)
>  	unsigned int bkt;
>  	int err;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	ice_for_each_vf(pf, bkt, vf) {
>  		err = ice_repr_add(vf);
>  		if (err)
> diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
> b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
> index df7a3f251cd3..07989f1d08ef 100644
> --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
> +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
> @@ -1578,6 +1578,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf)
>  	if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state))
>  		return;
> 
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf) {
>  		struct device *dev = ice_pf_to_dev(pf);
>  		enum virtchnl_fdir_prgm_status status;
> @@ -1634,6 +1635,7 @@ void ice_flush_fdir_ctx(struct ice_pf *pf)
>  		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
>  		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
>  	}
> +	mutex_unlock(&pf->vfs.table_lock);
>  }
> 
>  /**
> diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> index 45fcb95a0807..0e997bfce5da 100644
> --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> @@ -182,21 +182,61 @@ struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
>   * Locate and return a pointer to the VF structure associated with a given ID.
>   * Returns NULL if the ID does not have a valid VF structure associated with
>   * it.
> + *
> + * This function takes a reference to the VF, which must be released by
> + * calling ice_put_vf() once the caller is finished accessing the VF structure
> + * returned.
>   */
>  struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
>  {
> -	if (!pf->vfs.table) {
> -		dev_err(ice_pf_to_dev(pf), "VF table not allocated\n");
> -		return NULL;
> -	}
> +	struct ice_vf *vf;
> 
> -	if (vf_id >= pf->vfs.num_alloc) {
> -		dev_err(ice_pf_to_dev(pf), "Out of range VF ID: %u\n",
> -			vf_id);
> -		return NULL;
> -	}
> +	rcu_read_lock();
> +	hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) {
> +		if (vf->vf_id == vf_id) {
> +			struct ice_vf *found;
> 
> -	return &pf->vfs.table[vf_id];
> +			if (kref_get_unless_zero(&vf->refcnt))
> +				found = vf;
> +			else
> +				found = NULL;
> +
> +			rcu_read_unlock();
> +			return found;
> +		}
> +	}
> +	rcu_read_unlock();
> +
> +	return NULL;
> +}
> +
> +/**
> + * ice_release_vf - Release VF associated with a refcount
> + * @ref: the kref decremented to zero
> + *
> + * Callback function for kref_put to release a VF once its reference count has
> + * hit zero.
> + */
> +static void ice_release_vf(struct kref *ref)
> +{
> +	struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt);
> +
> +	mutex_destroy(&vf->cfg_lock);
> +
> +	kfree_rcu(vf, rcu);
> +}
> +
> +/**
> + * ice_put_vf - Release a reference to a VF
> + * @vf: the VF structure to decrease reference count on
> + *
> + * This must be called after ice_get_vf_by_id() once the reference to the VF
> + * structure is no longer used. Otherwise, the VF structure will never be
> + * freed.
> + */
> +void ice_put_vf(struct ice_vf *vf)
> +{
> +	kref_put(&vf->refcnt, ice_release_vf);
>  }
> 
>  /**
> @@ -210,7 +250,10 @@ struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16
> vf_id)
>   */
>  bool ice_has_vfs(struct ice_pf *pf)
>  {
> -	return pf->vfs.table && pf->vfs.num_alloc > 0;
> +	/* A simple check that the hash table is not empty does not require
> +	 * the mutex or rcu_read_lock.
> +	 */
> +	return !hash_empty(pf->vfs.table);
>  }
> 
>  /**
> @@ -223,10 +266,16 @@ bool ice_has_vfs(struct ice_pf *pf)
>   */
>  u16 ice_get_num_vfs(struct ice_pf *pf)
>  {
> -	if (!pf->vfs.table)
> -		return 0;
> +	struct ice_vf *vf;
> +	unsigned int bkt;
> +	u16 num_vfs = 0;
> 
> -	return pf->vfs.num_alloc;
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf)
> +		num_vfs++;
> +	rcu_read_unlock();
> +
> +	return num_vfs;
>  }
> 
>  /**
> @@ -244,6 +293,32 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf
> *vf)
>  	return 0;
>  }
> 
> +/**
> + * ice_free_vf_entries - Free all VF entries from the hash table
> + * @pf: pointer to the PF structure
> + *
> + * Iterate over the VF hash table, removing and releasing all VF entries.
> + * Called during VF teardown or as cleanup during failed VF initialization.
> + */
> +static void ice_free_vf_entries(struct ice_pf *pf)
> +{
> +	struct ice_vfs *vfs = &pf->vfs;
> +	struct hlist_node *tmp;
> +	struct ice_vf *vf;
> +	unsigned int bkt;
> +
> +	/* Remove all VFs from the hash table and release their main
> +	 * reference. Once all references to the VF are dropped, ice_put_vf()
> +	 * will call ice_release_vf which will remove the VF memory.
> +	 */
> +	lockdep_assert_held(&vfs->table_lock);
> +
> +	hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) {
> +		hash_del_rcu(&vf->entry);
> +		ice_put_vf(vf);
> +	}
> +}
> +
>  /**
>   * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
>   * @pf: pointer to the PF structure
> @@ -260,6 +335,7 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops
> v_opcode,
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf) {
>  		/* Not all vfs are enabled so skip the ones that are not */
>  		if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
> @@ -272,6 +348,7 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops
> v_opcode,
>  		ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg,
>  				      msglen, NULL);
>  	}
> +	mutex_unlock(&pf->vfs.table_lock);
>  }
> 
>  /**
> @@ -546,8 +623,6 @@ void ice_free_vfs(struct ice_pf *pf)
>  	if (!ice_has_vfs(pf))
>  		return;
> 
> -	ice_eswitch_release(pf);
> -
>  	while (test_and_set_bit(ICE_VF_DIS, pf->state))
>  		usleep_range(1000, 2000);
> 
> @@ -560,6 +635,10 @@ void ice_free_vfs(struct ice_pf *pf)
>  	else
>  		dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
> 
> +	mutex_lock(&vfs->table_lock);
> +
> +	ice_eswitch_release(pf);
> +
>  	ice_for_each_vf(pf, bkt, vf) {
>  		mutex_lock(&vf->cfg_lock);
> 
> @@ -595,9 +674,9 @@ void ice_free_vfs(struct ice_pf *pf)
>  		dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
> 
>  	vfs->num_qps_per = 0;
> -	vfs->num_alloc = 0;
> -	devm_kfree(dev, vfs->table);
> -	vfs->table = NULL;
> +	ice_free_vf_entries(pf);
> +
> +	mutex_unlock(&vfs->table_lock);
> 
>  	clear_bit(ICE_VF_DIS, pf->state);
>  	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
> @@ -1200,6 +1279,8 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16
> num_vfs)
>  	int msix_avail_per_vf, msix_avail_for_sriov;
>  	struct device *dev = ice_pf_to_dev(pf);
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	if (!num_vfs || max_valid_res_idx < 0)
>  		return -EINVAL;
> 
> @@ -1507,6 +1588,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
>  	if (!ice_has_vfs(pf))
>  		return false;
> 
> +	mutex_lock(&pf->vfs.table_lock);
> +
>  	/* clear all malicious info if the VFs are getting reset */
>  	ice_for_each_vf(pf, bkt, vf)
>  		if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->vfs.malvfs,
> @@ -1515,8 +1598,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
>  				vf->vf_id);
> 
>  	/* If VFs have been disabled, there is no need to reset */
> -	if (test_and_set_bit(ICE_VF_DIS, pf->state))
> +	if (test_and_set_bit(ICE_VF_DIS, pf->state)) {
> +		mutex_unlock(&pf->vfs.table_lock);
>  		return false;
> +	}
> 
>  	/* Begin reset on all VFs at once */
>  	ice_for_each_vf(pf, bkt, vf)
> @@ -1583,6 +1668,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
>  	ice_flush(hw);
>  	clear_bit(ICE_VF_DIS, pf->state);
> 
> +	mutex_unlock(&pf->vfs.table_lock);
> +
>  	return true;
>  }
> 
> @@ -1735,8 +1822,10 @@ void ice_vc_notify_link_state(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf)
>  		ice_vc_notify_vf_link_state(vf);
> +	mutex_unlock(&pf->vfs.table_lock);
>  }
> 
>  /**
> @@ -1855,6 +1944,8 @@ static int ice_start_vfs(struct ice_pf *pf)
>  	struct ice_vf *vf;
>  	int retval;
> 
> +	lockdep_assert_held(&pf->vfs.table_lock);
> +
>  	it_cnt = 0;
>  	ice_for_each_vf(pf, bkt, vf) {
>  		ice_clear_vf_reset_trigger(vf);
> @@ -1889,18 +1980,38 @@ static int ice_start_vfs(struct ice_pf *pf)
>  }
> 
>  /**
> - * ice_set_dflt_settings_vfs - set VF defaults during initialization/creation
> - * @pf: PF holding reference to all VFs for default configuration
> + * ice_create_vf_entries - Allocate and insert VF entries
> + * @pf: pointer to the PF structure
> + * @num_vfs: the number of VFs to allocate
> + *
> + * Allocate new VF entries and insert them into the hash table. Set some
> + * basic default fields for initializing the new VFs.
> + *
> + * After this function exits, the hash table will have num_vfs entries
> + * inserted.
> + *
> + * Returns 0 on success or an integer error code on failure.
>   */
> -static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
> +static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs)
>  {
> +	struct ice_vfs *vfs = &pf->vfs;
>  	struct ice_vf *vf;
> -	unsigned int bkt;
> -	u16 vf_id = 0;
> +	u16 vf_id;
> +	int err;
> +
> +	lockdep_assert_held(&vfs->table_lock);
> +
> +	for (vf_id = 0; vf_id < num_vfs; vf_id++) {
> +		vf = kzalloc(sizeof(*vf), GFP_KERNEL);
> +		if (!vf) {
> +			err = -ENOMEM;
> +			goto err_free_entries;
> +		}
> +		kref_init(&vf->refcnt);
> 
> -	ice_for_each_vf(pf, bkt, vf) {
>  		vf->pf = pf;
> -		vf->vf_id = vf_id++;
> +		vf->vf_id = vf_id;
> +
>  		vf->vf_sw_id = pf->first_sw;
>  		/* assign default capabilities */
>  		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
> @@ -1917,27 +2028,15 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
>  		ice_vc_set_dflt_vf_ops(&vf->vc_ops);
> 
>  		mutex_init(&vf->cfg_lock);
> +
> +		hash_add_rcu(vfs->table, &vf->entry, vf_id);
>  	}
> -}
> -
> -/**
> - * ice_alloc_vfs - allocate num_vfs in the PF structure
> - * @pf: PF to store the allocated VFs in
> - * @num_vfs: number of VFs to allocate
> - */
> -static int ice_alloc_vfs(struct ice_pf *pf, int num_vfs)
> -{
> -	struct ice_vf *vfs;
> -
> -	vfs = devm_kcalloc(ice_pf_to_dev(pf), num_vfs, sizeof(*vfs),
> -			   GFP_KERNEL);
> -	if (!vfs)
> -		return -ENOMEM;
> -
> -	pf->vfs.table = NULL;
> -	pf->vfs.num_alloc = num_vfs;
> 
>  	return 0;
> +
> +err_free_entries:
> +	ice_free_vf_entries(pf);
> +	return err;
>  }
> 
>  /**
> @@ -1958,14 +2057,10 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
>  	ice_flush(hw);
> 
>  	ret = pci_enable_sriov(pf->pdev, num_vfs);
> -	if (ret) {
> -		pf->vfs.num_alloc = 0;
> -		goto err_unroll_intr;
> -	}
> -
> -	ret = ice_alloc_vfs(pf, num_vfs);
>  	if (ret)
> -		goto err_pci_disable_sriov;
> +		goto err_unroll_intr;
> +
> +	mutex_lock(&pf->vfs.table_lock);
> 
>  	if (ice_set_per_vf_res(pf, num_vfs)) {
>  		dev_err(dev, "Not enough resources for %d VFs, try with fewer
> number of VFs\n",
> @@ -1974,12 +2069,17 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
>  		goto err_unroll_sriov;
>  	}
> 
> -	ice_set_dflt_settings_vfs(pf);
> +	ret = ice_create_vf_entries(pf, num_vfs);
> +	if (ret) {
> +		dev_err(dev, "Failed to allocate VF entries for %d VFs\n",
> +			num_vfs);
> +		goto err_unroll_sriov;
> +	}
> 
>  	if (ice_start_vfs(pf)) {
>  		dev_err(dev, "Failed to start VF(s)\n");
>  		ret = -EAGAIN;
> -		goto err_unroll_sriov;
> +		goto err_unroll_vf_entries;
>  	}
> 
>  	clear_bit(ICE_VF_DIS, pf->state);
> @@ -1992,13 +2092,14 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
>  	if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state))
>  		ice_irq_dynamic_ena(hw, NULL, NULL);
> 
> +	mutex_unlock(&pf->vfs.table_lock);
> +
>  	return 0;
> 
> +err_unroll_vf_entries:
> +	ice_free_vf_entries(pf);
>  err_unroll_sriov:
> -	devm_kfree(dev, pf->vfs.table);
> -	pf->vfs.table = NULL;
> -	pf->vfs.num_alloc = 0;
> -err_pci_disable_sriov:
> +	mutex_unlock(&pf->vfs.table_lock);
>  	pci_disable_sriov(pf->pdev);
>  err_unroll_intr:
>  	/* rearm interrupts here */
> @@ -2133,6 +2234,7 @@ void ice_process_vflr_event(struct ice_pf *pf)
>  	    !ice_has_vfs(pf))
>  		return;
> 
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf) {
>  		u32 reg_idx, bit_idx;
> 
> @@ -2147,6 +2249,7 @@ void ice_process_vflr_event(struct ice_pf *pf)
>  			mutex_unlock(&vf->cfg_lock);
>  		}
>  	}
> +	mutex_unlock(&pf->vfs.table_lock);
>  }
> 
>  /**
> @@ -2166,22 +2269,36 @@ static void ice_vc_reset_vf(struct ice_vf *vf)
>   *
>   * If no VF is found who owns the pfq then return NULL, otherwise return a
>   * pointer to the VF who owns the pfq
> + *
> + * If this function returns non-NULL, it acquires a reference count of the VF
> + * structure. The caller is responsible for calling ice_put_vf() to drop this
> + * reference.
>   */
>  static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
>  {
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> -	ice_for_each_vf(pf, bkt, vf) {
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf) {
>  		struct ice_vsi *vsi;
>  		u16 rxq_idx;
> 
>  		vsi = ice_get_vf_vsi(vf);
> 
>  		ice_for_each_rxq(vsi, rxq_idx)
> -			if (vsi->rxq_map[rxq_idx] == pfq)
> -				return vf;
> +			if (vsi->rxq_map[rxq_idx] == pfq) {
> +				struct ice_vf *found;
> +
> +				if (kref_get_unless_zero(&vf->refcnt))
> +					found = vf;
> +				else
> +					found = NULL;
> +				rcu_read_unlock();
> +				return found;
> +			}
>  	}
> +	rcu_read_unlock();
> 
>  	return NULL;
>  }
> @@ -3006,24 +3123,27 @@ int ice_set_vf_spoofchk(struct net_device *netdev,
> int vf_id, bool ena)
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	vf_vsi = ice_get_vf_vsi(vf);
>  	if (!vf_vsi) {
>  		netdev_err(netdev, "VSI %d for VF %d is null\n",
>  			   vf->lan_vsi_idx, vf->vf_id);
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto out_put_vf;
>  	}
> 
>  	if (vf_vsi->type != ICE_VSI_VF) {
>  		netdev_err(netdev, "Type %d of VSI %d for VF %d is no
> ICE_VSI_VF\n",
>  			   vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
> -		return -ENODEV;
> +		ret = -ENODEV;
> +		goto out_put_vf;
>  	}
> 
>  	if (ena == vf->spoofchk) {
>  		dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
> -		return 0;
> +		ret = 0;
> +		goto out_put_vf;
>  	}
> 
>  	if (ena)
> @@ -3036,6 +3156,8 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int
> vf_id, bool ena)
>  	else
>  		vf->spoofchk = ena;
> 
> +out_put_vf:
> +	ice_put_vf(vf);
>  	return ret;
>  }
> 
> @@ -3048,17 +3170,22 @@ int ice_set_vf_spoofchk(struct net_device *netdev,
> int vf_id, bool ena)
>   */
>  bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
>  {
> +	bool is_vf_promisc = false;
>  	struct ice_vf *vf;
>  	unsigned int bkt;
> 
> -	ice_for_each_vf(pf, bkt, vf) {
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf) {
>  		/* found a VF that has promiscuous mode configured */
>  		if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
> -		    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
> -			return true;
> +		    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
> +			is_vf_promisc = true;
> +			break;
> +		}
>  	}
> +	rcu_read_unlock();
> 
> -	return false;
> +	return is_vf_promisc;
>  }
> 
>  /**
> @@ -3085,6 +3212,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct
> ice_vf *vf, u8 *msg)
>  		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
>  		goto error_param;
>  	}
> +	mutex_unlock(&pf->vfs.table_lock);
> 
>  	if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) {
>  		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
> @@ -4210,7 +4338,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int
> vf_id, u16 vlan_id, u8 qos,
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	if (ice_vf_get_port_vlan_prio(vf) == qos &&
>  	    ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto &&
> @@ -4218,7 +4346,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int
> vf_id, u16 vlan_id, u8 qos,
>  		/* duplicate request, so just return success */
>  		dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x
> request\n",
>  			vlan_id, qos, local_vlan_proto);
> -		return 0;
> +		ret = 0;
> +		goto out_put_vf;
>  	}
> 
>  	mutex_lock(&vf->cfg_lock);
> @@ -4233,7 +4362,9 @@ ice_set_vf_port_vlan(struct net_device *netdev, int
> vf_id, u16 vlan_id, u8 qos,
>  	ice_vc_reset_vf(vf);
>  	mutex_unlock(&vf->cfg_lock);
> 
> -	return 0;
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
>  /**
> @@ -5784,6 +5915,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct
> ice_rq_event_info *event)
>  		ice_vc_send_msg_to_vf(vf, v_opcode,
>  				      VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
> NULL,
>  				      0);
> +		ice_put_vf(vf);
>  		return;
>  	}
> 
> @@ -5793,6 +5925,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct
> ice_rq_event_info *event)
>  				      NULL, 0);
>  		dev_err(dev, "Invalid message from VF %d, opcode %d, len %d,
> error %d\n",
>  			vf_id, v_opcode, msglen, err);
> +		ice_put_vf(vf);
>  		return;
>  	}
> 
> @@ -5802,6 +5935,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct
> ice_rq_event_info *event)
>  	if (!mutex_trylock(&vf->cfg_lock)) {
>  		dev_info(dev, "VF %u is being configured in another context that
> will trigger a VFR, so there is no need to handle this message\n",
>  			 vf->vf_id);
> +		ice_put_vf(vf);
>  		return;
>  	}
> 
> @@ -5939,7 +6073,7 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id,
> struct ifla_vf_info *ivi)
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	ivi->vf = vf_id;
>  	ether_addr_copy(ivi->mac, vf->hw_lan_addr.addr);
> @@ -5960,7 +6094,10 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id,
> struct ifla_vf_info *ivi)
>  		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
>  	ivi->max_tx_rate = vf->max_tx_rate;
>  	ivi->min_tx_rate = vf->min_tx_rate;
> -	return 0;
> +
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
>  /**
> @@ -6021,17 +6158,20 @@ int ice_set_vf_mac(struct net_device *netdev, int
> vf_id, u8 *mac)
> 
>  	/* nothing left to do, unicast MAC already set */
>  	if (ether_addr_equal(vf->dev_lan_addr.addr, mac) &&
> -	    ether_addr_equal(vf->hw_lan_addr.addr, mac))
> -		return 0;
> +	    ether_addr_equal(vf->hw_lan_addr.addr, mac)) {
> +		ret = 0;
> +		goto out_put_vf;
> +	}
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	if (ice_unicast_mac_exists(pf, mac)) {
>  		netdev_err(netdev, "Unicast MAC %pM already exists on this PF.
> Preventing setting VF %u unicast MAC address to %pM\n",
>  			   mac, vf_id, mac);
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto out_put_vf;
>  	}
> 
>  	mutex_lock(&vf->cfg_lock);
> @@ -6055,9 +6195,13 @@ int ice_set_vf_mac(struct net_device *netdev, int
> vf_id, u8 *mac)
> 
>  	ice_vc_reset_vf(vf);
>  	mutex_unlock(&vf->cfg_lock);
> -	return 0;
> +
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
> +
>  /**
>   * ice_set_vf_trust
>   * @netdev: network interface device structure
> @@ -6083,11 +6227,13 @@ int ice_set_vf_trust(struct net_device *netdev, int
> vf_id, bool trusted)
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	/* Check if already trusted */
> -	if (trusted == vf->trusted)
> -		return 0;
> +	if (trusted == vf->trusted) {
> +		ret = 0;
> +		goto out_put_vf;
> +	}
> 
>  	mutex_lock(&vf->cfg_lock);
> 
> @@ -6098,7 +6244,9 @@ int ice_set_vf_trust(struct net_device *netdev, int
> vf_id, bool trusted)
> 
>  	mutex_unlock(&vf->cfg_lock);
> 
> -	return 0;
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
>  /**
> @@ -6121,7 +6269,7 @@ int ice_set_vf_link_state(struct net_device *netdev, int
> vf_id, int link_state)
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	switch (link_state) {
>  	case IFLA_VF_LINK_STATE_AUTO:
> @@ -6136,12 +6284,15 @@ int ice_set_vf_link_state(struct net_device *netdev,
> int vf_id, int link_state)
>  		vf->link_up = false;
>  		break;
>  	default:
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto out_put_vf;
>  	}
> 
>  	ice_vc_notify_vf_link_state(vf);
> 
> -	return 0;
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
>  /**
> @@ -6154,8 +6305,10 @@ static int ice_calc_all_vfs_min_tx_rate(struct ice_pf
> *pf)
>  	unsigned int bkt;
>  	int rate = 0;
> 
> -	ice_for_each_vf(pf, bkt, vf)
> +	rcu_read_lock();
> +	ice_for_each_vf_rcu(pf, bkt, vf)
>  		rate += vf->min_tx_rate;
> +	rcu_read_unlock();
> 
>  	return rate;
>  }
> @@ -6217,7 +6370,7 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id, int
> min_tx_rate,
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	vsi = ice_get_vf_vsi(vf);
> 
> @@ -6227,23 +6380,27 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id,
> int min_tx_rate,
>  	if (max_tx_rate && min_tx_rate > max_tx_rate) {
>  		dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max
> Tx rate %d Mbps\n",
>  			min_tx_rate, max_tx_rate);
> -		return -EINVAL;
> +		ret = -EINVAL;
> +		goto out_put_vf;
>  	}
> 
>  	if (min_tx_rate && ice_is_dcb_active(pf)) {
>  		dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate
> limiting not allowed on this PF.\n");
> -		return -EOPNOTSUPP;
> +		ret = -EOPNOTSUPP;
> +		goto out_put_vf;
>  	}
> 
> -	if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate))
> -		return -EINVAL;
> +	if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
> +		ret = -EINVAL;
> +		goto out_put_vf;
> +	}
> 
>  	if (vf->min_tx_rate != (unsigned int)min_tx_rate) {
>  		ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000);
>  		if (ret) {
>  			dev_err(dev, "Unable to set min-tx-rate for VF %d\n",
>  				vf->vf_id);
> -			return ret;
> +			goto out_put_vf;
>  		}
> 
>  		vf->min_tx_rate = min_tx_rate;
> @@ -6254,13 +6411,15 @@ ice_set_vf_bw(struct net_device *netdev, int vf_id,
> int min_tx_rate,
>  		if (ret) {
>  			dev_err(dev, "Unable to set max-tx-rate for VF %d\n",
>  				vf->vf_id);
> -			return ret;
> +			goto out_put_vf;
>  		}
> 
>  		vf->max_tx_rate = max_tx_rate;
>  	}
> 
> -	return 0;
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
>  /**
> @@ -6284,11 +6443,13 @@ int ice_get_vf_stats(struct net_device *netdev, int
> vf_id,
> 
>  	ret = ice_check_vf_ready_for_cfg(vf);
>  	if (ret)
> -		return ret;
> +		goto out_put_vf;
> 
>  	vsi = ice_get_vf_vsi(vf);
> -	if (!vsi)
> -		return -EINVAL;
> +	if (!vsi) {
> +		ret = -EINVAL;
> +		goto out_put_vf;
> +	}
> 
>  	ice_update_eth_stats(vsi);
>  	stats = &vsi->eth_stats;
> @@ -6306,7 +6467,9 @@ int ice_get_vf_stats(struct net_device *netdev, int
> vf_id,
>  	vf_stats->rx_dropped = stats->rx_discards;
>  	vf_stats->tx_dropped = stats->tx_discards;
> 
> -	return 0;
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return ret;
>  }
> 
>  /**
> @@ -6350,6 +6513,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
> 
>  	pf->vfs.last_printed_mdd_jiffies = jiffies;
> 
> +	mutex_lock(&pf->vfs.table_lock);
>  	ice_for_each_vf(pf, bkt, vf) {
>  		/* only print Rx MDD event message if there are new events */
>  		if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
> @@ -6368,6 +6532,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
>  				 vf->dev_lan_addr.addr);
>  		}
>  	}
> +	mutex_unlock(&pf->vfs.table_lock);
>  }
> 
>  /**
> @@ -6423,9 +6588,8 @@ ice_is_malicious_vf(struct ice_pf *pf, struct
> ice_rq_event_info *event,
>  	if (!vf)
>  		return false;
> 
> -	/* Check if VF is disabled. */
>  	if (test_bit(ICE_VF_STATE_DIS, vf->vf_states))
> -		return false;
> +		goto out_put_vf;
> 
>  	mbxdata.num_msg_proc = num_msg_proc;
>  	mbxdata.num_pending_arq = num_msg_pending;
> @@ -6436,7 +6600,7 @@ ice_is_malicious_vf(struct ice_pf *pf, struct
> ice_rq_event_info *event,
>  	/* check to see if we have a malicious VF */
>  	status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf);
>  	if (status)
> -		return false;
> +		goto out_put_vf;
> 
>  	if (malvf) {
>  		bool report_vf = false;
> @@ -6458,12 +6622,9 @@ ice_is_malicious_vf(struct ice_pf *pf, struct
> ice_rq_event_info *event,
>  					 &vf->dev_lan_addr.addr[0],
>  					 pf_vsi->netdev->dev_addr);
>  		}
> -
> -		return true;
>  	}
> 
> -	/* if there was an error in detection or the VF is not malicious then
> -	 * return false
> -	 */
> -	return false;
> +out_put_vf:
> +	ice_put_vf(vf);
> +	return malvf;
>  }
> diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
> b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
> index 179a4dda38e5..48e28b11bb12 100644
> --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
> +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
> @@ -44,22 +44,45 @@
>   * These functions provide abstraction for interacting with the VF hash table.
>   * In general, direct access to the hash table should be avoided outside of
>   * these functions where possible.
> + *
> + * The VF entries in the hash table are protected by reference counting to
> + * track lifetime of accesses from the table. The ice_get_vf_by_id() function
> + * obtains a reference to the VF structure which must be dropped by using
> + * ice_put_vf().
>   */
> 
>  /**
>   * ice_for_each_vf - Iterate over each VF entry
>   * @pf: pointer to the PF private structure
>   * @bkt: bucket index used for iteration
> - * @entry: pointer to the VF entry currently being processed in the loop.
> + * @vf: pointer to the VF entry currently being processed in the loop.
>   *
>   * The bkt variable is an unsigned integer iterator used to traverse the VF
>   * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is.
>   * Use vf->vf_id to get the id number if needed.
> + *
> + * The caller is expected to be under the table_lock mutex for the entire
> + * loop. Use this iterator if your loop is long or if it might sleep.
>   */
> -#define ice_for_each_vf(pf, bkt, entry)					\
> -	for ((bkt) = 0, (entry) = &(pf)->vfs.table[0];			\
> -	     (bkt) < (pf)->vfs.num_alloc;				\
> -	     (bkt)++, (entry)++)
> +#define ice_for_each_vf(pf, bkt, vf) \
> +	hash_for_each((pf)->vfs.table, (bkt), (vf), entry)
> +
> +/**
> + * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU
> + * @pf: pointer to the PF private structure
> + * @bkt: bucket index used for iteration
> + * @vf: pointer to the VF entry currently being processed in the loop.
> + *
> + * The bkt variable is an unsigned integer iterator used to traverse the VF
> + * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is.
> + * Use vf->vf_id to get the id number if needed.
> + *
> + * The caller is expected to be under rcu_read_lock() for the entire loop.
> + * Only use this iterator if your loop is short and you can guarantee it does
> + * not sleep.
> + */
> +#define ice_for_each_vf_rcu(pf, bkt, vf) \
> +	hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry)
> 
>  /* Specific VF states */
>  enum ice_vf_states {
> @@ -125,8 +148,8 @@ struct ice_vc_vf_ops {
> 
>  /* Virtchnl/SR-IOV config info */
>  struct ice_vfs {
> -	struct ice_vf *table;		/* table of VF entries */
> -	u16 num_alloc;			/* number of allocated VFs */
> +	DECLARE_HASHTABLE(table, 8);	/* table of VF entries */
> +	struct mutex table_lock;	/* Lock for protecting the hash table */
>  	u16 num_supported;		/* max supported VFs on this PF */
>  	u16 num_qps_per;		/* number of queue pairs per VF */
>  	u16 num_msix_per;		/* number of MSi-X vectors per VF */
> @@ -136,6 +159,9 @@ struct ice_vfs {
> 
>  /* VF information structure */
>  struct ice_vf {
> +	struct hlist_node entry;
> +	struct rcu_head rcu;
> +	struct kref refcnt;
>  	struct ice_pf *pf;
> 
>  	/* Used during virtchnl message handling and NDO ops against the VF
> @@ -193,6 +219,7 @@ struct ice_vf {
> 
>  #ifdef CONFIG_PCI_IOV
>  struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id);
> +void ice_put_vf(struct ice_vf *vf);
>  bool ice_has_vfs(struct ice_pf *pf);
>  u16 ice_get_num_vfs(struct ice_pf *pf);
>  struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
> @@ -259,6 +286,10 @@ static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf
> *pf, u16 vf_id)
>  	return NULL;
>  }
> 
> +static inline void ice_put_vf(struct ice_vf *vf)
> +{
> +}
> +
>  static inline bool ice_has_vfs(struct ice_pf *pf)
>  {
>  	return false;
> --
> 2.35.1.129.gb80121027d12



More information about the Intel-wired-lan mailing list