[Intel-wired-lan] [PATCH intel-net 2/5] ice: xsk: allocate separate memory for XDP SW ring

Maciej Fijalkowski maciej.fijalkowski at intel.com
Mon Dec 13 11:14:35 UTC 2021


On Fri, Dec 10, 2021 at 11:24:51PM +0100, Alexander Lobakin wrote:
> From: Alexader Lobakin <alexandr.lobakin at intel.com>
> 
> From: Maciej Fijalkowski <maciej.fijalkowski at intel.com>
> Date: Fri, 10 Dec 2021 15:59:38 +0100
> 
> > Currently, the zero-copy data path is reusing the memory region that was
> > initially allocated for an array of struct ice_rx_buf for its own
> > purposes. This is error prone as it is based on the ice_rx_buf struct
> > always being the same size or bigger than what the zero-copy path needs.
> > There can also be old values present in that array giving rise to errors
> > when the zero-copy path uses it.
> > 
> > Fix this by freeing the ice_rx_buf region and allocating a new array for
> > the zero-copy path that has the right length and is initialized to zero.
> > 
> > Fixes: 57f7f8b6bc0b ("ice: Use xdp_buf instead of rx_buf for xsk zero-copy")
> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski at intel.com>
> > ---
> >  drivers/net/ethernet/intel/ice/ice_base.c | 19 +++++++++++++++
> >  drivers/net/ethernet/intel/ice/ice_txrx.c | 19 ++++++++++-----
> >  drivers/net/ethernet/intel/ice/ice_xsk.c  | 29 ++++++++++++-----------
> >  3 files changed, 47 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
> > index 1efc635cc0f5..56606d477f05 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_base.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_base.c
> > @@ -6,6 +6,18 @@
> >  #include "ice_lib.h"
> >  #include "ice_dcb_lib.h"
> >  
> > +static int ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring)
> > +{
> > +	rx_ring->xdp_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->xdp_buf), GFP_KERNEL);
> > +	return rx_ring->xdp_buf ? 0 : -ENOMEM;
> > +}
> > +
> > +static int ice_alloc_rx_buf(struct ice_rx_ring *rx_ring)
> > +{
> > +	rx_ring->rx_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL);
> > +	return rx_ring->rx_buf ? 0 : -ENOMEM;
> > +}
> > +
> 
> Re that those functions can only return 0 or -ENOMEM, wouldn't it be
> more elegant to make them bool
> 
> 	return !!rx_ring->rx_buf; /* true on success,
> 				     false otherwise */

Yes, why not.

> }
> 
> 	if (!ice_alloc_rx_buf(rx_ring))
> 		return -ENOMEM;
> 
> ?
> 
> >  /**
> >   * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
> >   * @qs_cfg: gathered variables needed for PF->VSI queues assignment
> > @@ -492,8 +504,12 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
> >  			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
> >  					 ring->q_index, ring->q_vector->napi.napi_id);
> >  
> > +		kfree(ring->rx_buf);
> >  		ring->xsk_pool = ice_xsk_pool(ring);
> >  		if (ring->xsk_pool) {
> > +			err = ice_alloc_rx_buf_zc(ring);
> > +			if (err)
> > +				return err;
> >  			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
> >  
> >  			ring->rx_buf_len =
> > @@ -508,6 +524,9 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
> >  			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
> >  				 ring->q_index);
> >  		} else {
> > +			err = ice_alloc_rx_buf(ring);
> > +			if (err)
> > +				return err;
> >  			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
> >  				/* coverity[check_return] */
> >  				xdp_rxq_info_reg(&ring->xdp_rxq,
> > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > index bc3ba19dc88f..227513b687b9 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_txrx.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
> > @@ -419,7 +419,10 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
> >  	}
> >  
> >  rx_skip_free:
> > -	memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
> > +	if (rx_ring->xsk_pool)
> > +		memset(rx_ring->xdp_buf, 0, sizeof(*rx_ring->xdp_buf) * rx_ring->count);
> > +	else
> > +		memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);
> 
> Consider using array_size() instead of a plain multiplication in
> both of these:
> 
> 	memset(s, 0, array_size(n, sizeof(*s));
> 
> It has the same internals as kcalloc() for array multiplication and
> overflow checking.

Good stuff I'll incorporate that.

> 
> >  
> >  	/* Zero out the descriptor ring */
> >  	size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
> > @@ -446,8 +449,13 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
> >  		if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
> >  			xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
> >  	rx_ring->xdp_prog = NULL;
> > -	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
> > -	rx_ring->rx_buf = NULL;
> > +	if (rx_ring->xsk_pool) {
> > +		kfree(rx_ring->xdp_buf);
> > +		rx_ring->xdp_buf = NULL;
> > +	} else {
> > +		kfree(rx_ring->rx_buf);
> > +		rx_ring->rx_buf = NULL;
> > +	}
> >  
> >  	if (rx_ring->desc) {
> >  		size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
> > @@ -475,8 +483,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
> >  	/* warn if we are about to overwrite the pointer */
> >  	WARN_ON(rx_ring->rx_buf);
> >  	rx_ring->rx_buf =
> > -		devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count,
> > -			     GFP_KERNEL);
> > +		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL);
> >  	if (!rx_ring->rx_buf)
> >  		return -ENOMEM;
> >  
> > @@ -505,7 +512,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
> >  	return 0;
> >  
> >  err:
> > -	devm_kfree(dev, rx_ring->rx_buf);
> > +	kfree(rx_ring->rx_buf);
> >  	rx_ring->rx_buf = NULL;
> >  	return -ENOMEM;
> >  }
> > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > index 75c3e98241e0..5cb61955c1f3 100644
> > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> > @@ -12,6 +12,11 @@
> >  #include "ice_txrx_lib.h"
> >  #include "ice_lib.h"
> >  
> > +static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
> > +{
> > +	return &rx_ring->xdp_buf[idx];
> > +}
> > +
> >  /**
> >   * ice_qp_reset_stats - Resets all stats for rings of given index
> >   * @vsi: VSI that contains rings of interest
> > @@ -372,7 +377,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
> >  	dma_addr_t dma;
> >  
> >  	rx_desc = ICE_RX_DESC(rx_ring, ntu);
> > -	xdp = &rx_ring->xdp_buf[ntu];
> > +	xdp = ice_xdp_buf(rx_ring, ntu);
> >  
> >  	nb_buffs = min_t(u16, count, rx_ring->count - ntu);
> >  	nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
> > @@ -425,9 +430,8 @@ static void ice_bump_ntc(struct ice_rx_ring *rx_ring)
> >   * Returns the skb on success, NULL on failure.
> >   */
> >  static struct sk_buff *
> > -ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr)
> > +ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
> >  {
> > -	struct xdp_buff *xdp = *xdp_arr;
> 
> RCT army strikes back lol.

 :(

> 
> >  	unsigned int metasize = xdp->data - xdp->data_meta;
> >  	unsigned int datasize = xdp->data_end - xdp->data;
> >  	unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start;
> > @@ -444,7 +448,6 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr)
> >  		skb_metadata_set(skb, metasize);
> >  
> >  	xsk_buff_free(xdp);
> > -	*xdp_arr = NULL;
> >  	return skb;
> >  }
> >  
> > @@ -521,7 +524,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> >  	while (likely(total_rx_packets < (unsigned int)budget)) {
> >  		union ice_32b_rx_flex_desc *rx_desc;
> >  		unsigned int size, xdp_res = 0;
> > -		struct xdp_buff **xdp;
> > +		struct xdp_buff *xdp;
> >  		struct sk_buff *skb;
> >  		u16 stat_err_bits;
> >  		u16 vlan_tag = 0;
> > @@ -544,18 +547,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
> >  		if (!size)
> >  			break;
> >  
> > -		xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean];
> > -		xsk_buff_set_size(*xdp, size);
> > -		xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool);
> > +		xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
> > +		xsk_buff_set_size(xdp, size);
> > +		xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool);
> >  
> > -		xdp_res = ice_run_xdp_zc(rx_ring, *xdp, xdp_prog, xdp_ring);
> > +		xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring);
> >  		if (xdp_res) {
> >  			if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
> >  				xdp_xmit |= xdp_res;
> >  			else
> > -				xsk_buff_free(*xdp);
> > +				xsk_buff_free(xdp);
> >  
> > -			*xdp = NULL;
> >  			total_rx_bytes += size;
> >  			total_rx_packets++;
> >  			cleaned_count++;
> > @@ -815,10 +817,9 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
> >  	u16 ntu = rx_ring->next_to_use;
> >  
> >  	for ( ; ntc != ntu; ntc = (ntc + 1) & count_mask) {
> > -		struct xdp_buff **xdp = &rx_ring->xdp_buf[ntc];
> > +		struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
> >  
> > -		xsk_buff_free(*xdp);
> > -		*xdp = NULL;
> > +		xsk_buff_free(xdp);
> >  	}
> >  }
> >  
> > -- 
> > 2.33.1
> 
> Al


More information about the Intel-wired-lan mailing list