[Intel-wired-lan] [PATCH iwl-next] e1000e: Avoid DMA re-mapping on RX copybreak

Loktionov, Aleksandr aleksandr.loktionov at intel.com
Mon Apr 27 09:04:33 UTC 2026



> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces at osuosl.org> On Behalf
> Of Matt Vollrath
> Sent: Monday, April 27, 2026 5:32 AM
> To: intel-wired-lan at osuosl.org
> Cc: Matt Vollrath <tactii at gmail.com>
> Subject: [Intel-wired-lan] [PATCH iwl-next] e1000e: Avoid DMA re-
> mapping on RX copybreak
> 
> This patch factors out DMA re-mapping for skbs which were recycled
> in the RX path due to copybreak or errors. There is only one path
> out of the e1000_clean_rx_irq() loop where the skb is consumed and
> DMA needs to be re-mapped, so don't unmap it before checking the
> conditions.
> 
> The buffer allocation loop is adjusted to not assume that DMA is
> unmapped, handling mapping errors gracefully.
> 
> On systems with IOMMU enabled, the cost of re-mapping DMA is greater
> than the cost of copying data out of the ring buffer. When I use
> this patch and configure e1000e with copybreak=2048, my system with
> IOMMU completes RX twice as fast under load.
> 
> The kludge of unconditional unmapping has existed since this driver
> was introduced in 2007, inherited from the e1000 driver which has
> since factored it out. IOMMU tech was new, at the time.
I think comma should be removed "IOMMU tech was new at the time."

Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov at intel.com>

> 
> Tested on an I218-V.
> 
> Assisted-by: Claude:claude-4-7-opus
> Signed-off-by: Matt Vollrath <tactii at gmail.com>
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c | 34 +++++++++++++++----
> ---
>  1 file changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c
> b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 9befdacd6730..b1d6119171df 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -663,6 +663,8 @@ static void e1000_alloc_rx_buffers(struct
> e1000_ring *rx_ring,
>  		skb = buffer_info->skb;
>  		if (skb) {
>  			skb_trim(skb, 0);
> +			if (likely(buffer_info->dma))
> +				goto write_desc;
>  			goto map_skb;
>  		}
> 
> @@ -680,10 +682,12 @@ static void e1000_alloc_rx_buffers(struct
> e1000_ring *rx_ring,
>  						  DMA_FROM_DEVICE);
>  		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
>  			dev_err(&pdev->dev, "Rx DMA map failed\n");
> +			buffer_info->dma = 0;
>  			adapter->rx_dma_failed++;
>  			break;
>  		}
> 
> +write_desc:
>  		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
>  		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info-
> >dma);
> 
> @@ -941,7 +945,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
>  		dma_rmb();	/* read descriptor and rx_buffer_info
> after status DD */
> 
>  		skb = buffer_info->skb;
> -		buffer_info->skb = NULL;
> 
>  		prefetch(skb->data - NET_IP_ALIGN);
> 
> @@ -955,9 +958,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
> 
>  		cleaned = true;
>  		cleaned_count++;
> -		dma_unmap_single(&pdev->dev, buffer_info->dma,
> -				 adapter->rx_buffer_len,
> DMA_FROM_DEVICE);
> -		buffer_info->dma = 0;
> 
>  		length = le16_to_cpu(rx_desc->wb.upper.length);
> 
> @@ -973,8 +973,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
>  		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
>  			/* All receives must fit into a single buffer */
>  			e_dbg("Receive packet consumed multiple
> buffers\n");
> -			/* recycle */
> -			buffer_info->skb = skb;
>  			if (staterr & E1000_RXD_STAT_EOP)
>  				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
>  			goto next_desc;
> @@ -982,8 +980,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
> 
>  		if (unlikely((staterr &
> E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
>  			     !(netdev->features & NETIF_F_RXALL))) {
> -			/* recycle */
> -			buffer_info->skb = skb;
>  			goto next_desc;
>  		}
> 
> @@ -1010,19 +1006,35 @@ static bool e1000_clean_rx_irq(struct
> e1000_ring *rx_ring, int *work_done,
>  			struct sk_buff *new_skb =
>  				napi_alloc_skb(&adapter->napi, length);
>  			if (new_skb) {
> +				dma_sync_single_for_cpu(&pdev->dev,
> +							buffer_info->dma,
> +							adapter-
> >rx_buffer_len,
> +							DMA_FROM_DEVICE);
>  				skb_copy_to_linear_data_offset(new_skb,
>  							       -NET_IP_ALIGN,
>  							       (skb->data -
>  								NET_IP_ALIGN),
>  							       (length +
>  								NET_IP_ALIGN));
> -				/* save the skb in buffer_info as good */
> -				buffer_info->skb = skb;
> +				dma_sync_single_for_device(&pdev->dev,
> +							   buffer_info->dma,
> +							   adapter-
> >rx_buffer_len,
> +							   DMA_FROM_DEVICE);
>  				skb = new_skb;
>  			}
>  			/* else just continue with the old one */
>  		}
> -		/* end copybreak code */
> +
> +		/* If skb was not replaced by copybreak, we are
> consuming
> +		 * the original buffer and must release the DMA
> mapping.
> +		 */
> +		if (skb == buffer_info->skb) {
> +			buffer_info->skb = NULL;
> +			dma_unmap_single(&pdev->dev, buffer_info->dma,
> +					 adapter->rx_buffer_len,
> +					 DMA_FROM_DEVICE);
> +			buffer_info->dma = 0;
> +		}
>  		skb_put(skb, length);
> 
>  		/* Receive Checksum Offload */
> --
> 2.43.0



More information about the Intel-wired-lan mailing list