[Intel-wired-lan] [PATCH iwl-next] e1000e: Avoid DMA re-mapping on RX copybreak
Loktionov, Aleksandr
aleksandr.loktionov at intel.com
Mon Apr 27 09:04:33 UTC 2026
> -----Original Message-----
> From: Intel-wired-lan <intel-wired-lan-bounces at osuosl.org> On Behalf
> Of Matt Vollrath
> Sent: Monday, April 27, 2026 5:32 AM
> To: intel-wired-lan at osuosl.org
> Cc: Matt Vollrath <tactii at gmail.com>
> Subject: [Intel-wired-lan] [PATCH iwl-next] e1000e: Avoid DMA re-
> mapping on RX copybreak
>
> This patch factors out DMA re-mapping for skbs which were recycled
> in the RX path due to copybreak or errors. There is only one path
> out of the e1000_clean_rx_irq() loop where the skb is consumed and
> DMA needs to be re-mapped, so don't unmap it before checking the
> conditions.
>
> The buffer allocation loop is adjusted to not assume that DMA is
> unmapped, handling mapping errors gracefully.
>
> On systems with IOMMU enabled, the cost of re-mapping DMA is greater
> than the cost of copying data out of the ring buffer. When I use
> this patch and configure e1000e with copybreak=2048, my system with
> IOMMU completes RX twice as fast under load.
>
> The kludge of unconditional unmapping has existed since this driver
> was introduced in 2007, inherited from the e1000 driver which has
> since factored it out. IOMMU tech was new, at the time.
I think comma should be removed "IOMMU tech was new at the time."
Signed-off-by: Aleksandr Loktionov <aleksandr.loktionov at intel.com>
>
> Tested on an I218-V.
>
> Assisted-by: Claude:claude-4-7-opus
> Signed-off-by: Matt Vollrath <tactii at gmail.com>
> ---
> drivers/net/ethernet/intel/e1000e/netdev.c | 34 +++++++++++++++----
> ---
> 1 file changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c
> b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 9befdacd6730..b1d6119171df 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -663,6 +663,8 @@ static void e1000_alloc_rx_buffers(struct
> e1000_ring *rx_ring,
> skb = buffer_info->skb;
> if (skb) {
> skb_trim(skb, 0);
> + if (likely(buffer_info->dma))
> + goto write_desc;
> goto map_skb;
> }
>
> @@ -680,10 +682,12 @@ static void e1000_alloc_rx_buffers(struct
> e1000_ring *rx_ring,
> DMA_FROM_DEVICE);
> if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
> dev_err(&pdev->dev, "Rx DMA map failed\n");
> + buffer_info->dma = 0;
> adapter->rx_dma_failed++;
> break;
> }
>
> +write_desc:
> rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
> rx_desc->read.buffer_addr = cpu_to_le64(buffer_info-
> >dma);
>
> @@ -941,7 +945,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
> dma_rmb(); /* read descriptor and rx_buffer_info
> after status DD */
>
> skb = buffer_info->skb;
> - buffer_info->skb = NULL;
>
> prefetch(skb->data - NET_IP_ALIGN);
>
> @@ -955,9 +958,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
>
> cleaned = true;
> cleaned_count++;
> - dma_unmap_single(&pdev->dev, buffer_info->dma,
> - adapter->rx_buffer_len,
> DMA_FROM_DEVICE);
> - buffer_info->dma = 0;
>
> length = le16_to_cpu(rx_desc->wb.upper.length);
>
> @@ -973,8 +973,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
> if (adapter->flags2 & FLAG2_IS_DISCARDING) {
> /* All receives must fit into a single buffer */
> e_dbg("Receive packet consumed multiple
> buffers\n");
> - /* recycle */
> - buffer_info->skb = skb;
> if (staterr & E1000_RXD_STAT_EOP)
> adapter->flags2 &= ~FLAG2_IS_DISCARDING;
> goto next_desc;
> @@ -982,8 +980,6 @@ static bool e1000_clean_rx_irq(struct e1000_ring
> *rx_ring, int *work_done,
>
> if (unlikely((staterr &
> E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
> !(netdev->features & NETIF_F_RXALL))) {
> - /* recycle */
> - buffer_info->skb = skb;
> goto next_desc;
> }
>
> @@ -1010,19 +1006,35 @@ static bool e1000_clean_rx_irq(struct
> e1000_ring *rx_ring, int *work_done,
> struct sk_buff *new_skb =
> napi_alloc_skb(&adapter->napi, length);
> if (new_skb) {
> + dma_sync_single_for_cpu(&pdev->dev,
> + buffer_info->dma,
> + adapter-
> >rx_buffer_len,
> + DMA_FROM_DEVICE);
> skb_copy_to_linear_data_offset(new_skb,
> -NET_IP_ALIGN,
> (skb->data -
> NET_IP_ALIGN),
> (length +
> NET_IP_ALIGN));
> - /* save the skb in buffer_info as good */
> - buffer_info->skb = skb;
> + dma_sync_single_for_device(&pdev->dev,
> + buffer_info->dma,
> + adapter-
> >rx_buffer_len,
> + DMA_FROM_DEVICE);
> skb = new_skb;
> }
> /* else just continue with the old one */
> }
> - /* end copybreak code */
> +
> + /* If skb was not replaced by copybreak, we are
> consuming
> + * the original buffer and must release the DMA
> mapping.
> + */
> + if (skb == buffer_info->skb) {
> + buffer_info->skb = NULL;
> + dma_unmap_single(&pdev->dev, buffer_info->dma,
> + adapter->rx_buffer_len,
> + DMA_FROM_DEVICE);
> + buffer_info->dma = 0;
> + }
> skb_put(skb, length);
>
> /* Receive Checksum Offload */
> --
> 2.43.0
More information about the Intel-wired-lan
mailing list