[Intel-wired-lan] [PATCH iwl-next v3] e1000e: Avoid DMA re-mapping on RX copybreak

Loktionov, Aleksandr aleksandr.loktionov at intel.com
Wed Apr 29 09:18:41 UTC 2026



> -----Original Message-----
> From: Matt Vollrath <tactii at gmail.com>
> Sent: Wednesday, April 29, 2026 3:43 AM
> To: intel-wired-lan at osuosl.org
> Cc: Matt Vollrath <tactii at gmail.com>; Loktionov, Aleksandr
> <aleksandr.loktionov at intel.com>; Paul Menzel <pmenzel at molgen.mpg.de>
> Subject: [PATCH iwl-next v3] e1000e: Avoid DMA re-mapping on RX
> copybreak
> 
> This patch factors out DMA re-mapping for skbs which were recycled in
> the RX path due to copybreak or errors. There is only one path out of
> the e1000_clean_rx_irq() loop where the skb is consumed and DMA needs
> to be re-mapped, so don't unmap it before checking the conditions.
> 
> The buffer allocation loop is adjusted to not assume that DMA is
> unmapped, handling mapping errors gracefully.
> 
> On systems with IOMMU enabled, the cost of re-mapping DMA is greater
> than the cost of copying data out of the ring buffer. When I use this
> patch and configure e1000e with copybreak=2048, my system with IOMMU
> completes RX roughly twice as fast under load.
> 
> Informal performance comparisons were based on Asus Gryphon Z97 which
> includes an I218-V and with a Xeon E3-1240 v3 in the socket.
> ktime_get() measurement was injected into e1000e_poll() wrapping the
> adapter->clean_rx() call. The total time spent in clean_rx() was
> divided
> by work_done to print the average time spent per buffer. iperf3 -R was
> used to saturate the RX path and awk was used for statistics. Control
> revision was set to 7.1-rc1 because iwl-next hadn't been updated yet.
> 
>   rev     | iommu | copybreak | samples | mean (ns) |   stdev
>   7.1-rc1 |   off |         0 |    4748 |    453.72 |  155.82
>   7.1-rc1 |   off |      2048 |    4743 |    554.83 |  103.67
>   7.1-rc1 |    on |         0 |    4751 |   1139.22 |  150.56
> * 7.1-rc1 |    on |      2048 |    4737 |   1267.02 |  184.62
>    +patch |   off |         0 |    4739 |    456.30 |  146.33
>    +patch |   off |      2048 |    4739 |    538.56 |  132.97
>    +patch |    on |         0 |    4769 |   1165.97 |  140.19
> *  +patch |    on |      2048 |    4745 |    562.25 |  171.80
> 
> No surprises here, IOMMU DMA ops are known to be expensive. For most
> users the kernel default is iommu=on and driver default is
> copybreak=256, so unless the workload is small packets, some tuning of
> either knob would be needed to see the full benefit of this change.
> 
> The kludge of unconditional unmapping has existed since this driver
> was introduced in 2007[1], inherited from the e1000 driver which has
> since factored it out[2]. IOMMU tech was new at the time.
> 
> [1] Commit bc7f75fa9788 ("[E1000E]: New pci-express e1000 driver
> (currently for ICH9 devices only)") [2] Commit 2b294b18689c ("e1000:
> perform copybreak ahead of DMA unmap")
> 
> Assisted-by: Claude:claude-4-7-opus
> Signed-off-by: Matt Vollrath <tactii at gmail.com>
> ---
> v3:
> * refactor unmapping bypass, use goto instead of redundant branch
> * remove Aleksandr's sign-off due to logic change
> * benchmark details
> * cite historic commits
> v2:
> * proofread description with Aleksandr
> ---
>  drivers/net/ethernet/intel/e1000e/netdev.c | 32 ++++++++++++++-------
> -
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c
> b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 7ce0cc8ab8f4..62bf85c768d6 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -663,6 +663,8 @@ static void e1000_alloc_rx_buffers(struct
> e1000_ring *rx_ring,
>  		skb = buffer_info->skb;
>  		if (skb) {
>  			skb_trim(skb, 0);
> +			if (likely(buffer_info->dma))
> +				goto write_desc;
>  			goto map_skb;
>  		}
> 
> @@ -680,10 +682,12 @@ static void e1000_alloc_rx_buffers(struct
> e1000_ring *rx_ring,
>  						  DMA_FROM_DEVICE);
>  		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
>  			dev_err(&pdev->dev, "Rx DMA map failed\n");
> +			buffer_info->dma = 0;
>  			adapter->rx_dma_failed++;
>  			break;
>  		}
> 
> +write_desc:
>  		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
>  		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info-
> >dma);
> 
> @@ -941,7 +945,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
>  		dma_rmb();	/* read descriptor and rx_buffer_info after
> status DD */
> 
>  		skb = buffer_info->skb;
> -		buffer_info->skb = NULL;
> 
>  		prefetch(skb->data - NET_IP_ALIGN);
> 
> @@ -955,9 +958,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
> 
>  		cleaned = true;
>  		cleaned_count++;
> -		dma_unmap_single(&pdev->dev, buffer_info->dma,
> -				 adapter->rx_buffer_len, DMA_FROM_DEVICE);
> -		buffer_info->dma = 0;
> 
>  		length = le16_to_cpu(rx_desc->wb.upper.length);
> 
> @@ -973,8 +973,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
>  		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
>  			/* All receives must fit into a single buffer */
>  			e_dbg("Receive packet consumed multiple
> buffers\n");
> -			/* recycle */
> -			buffer_info->skb = skb;
>  			if (staterr & E1000_RXD_STAT_EOP)
>  				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
>  			goto next_desc;
> @@ -982,8 +980,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
> 
>  		if (unlikely((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
> &&
>  			     !(netdev->features & NETIF_F_RXALL))) {
> -			/* recycle */
> -			buffer_info->skb = skb;
>  			goto next_desc;
>  		}
> 
> @@ -1010,19 +1006,33 @@ static bool e1000_clean_rx_irq(struct
> e1000_ring *rx_ring, int *work_done,
>  			struct sk_buff *new_skb =
>  				napi_alloc_skb(&adapter->napi, length);
>  			if (new_skb) {
> +				dma_sync_single_for_cpu(&pdev->dev,
> +							buffer_info->dma,
> +							adapter-
> >rx_buffer_len,
> +							DMA_FROM_DEVICE);
>  				skb_copy_to_linear_data_offset(new_skb,
>  							       -NET_IP_ALIGN,
>  							       (skb->data -
>  								NET_IP_ALIGN),
>  							       (length +
>  								NET_IP_ALIGN));
> -				/* save the skb in buffer_info as good */
> -				buffer_info->skb = skb;
> +				dma_sync_single_for_device(&pdev->dev,
> +							   buffer_info->dma,
> +							   adapter-
> >rx_buffer_len,
> +							   DMA_FROM_DEVICE);
>  				skb = new_skb;
> +				goto copybreak_done;
>  			}
>  			/* else just continue with the old one */
>  		}
> -		/* end copybreak code */
> +
> +		buffer_info->skb = NULL;
> +		dma_unmap_single(&pdev->dev, buffer_info->dma,
> +				 adapter->rx_buffer_len,
> +				 DMA_FROM_DEVICE);
> +		buffer_info->dma = 0;
> +
> +copybreak_done:
>  		skb_put(skb, length);
> 
>  		/* Receive Checksum Offload */
> --
> 2.43.0

Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov at intel.com>



More information about the Intel-wired-lan mailing list