[Intel-wired-lan] [PATCH net v3] igb: Fix XDP with PTP enabled

Maciej Fijalkowski maciej.fijalkowski at intel.com
Thu Apr 22 10:11:29 UTC 2021


On Thu, Apr 22, 2021 at 07:26:17AM +0200, Kurt Kanzenbach wrote:
> When using native XDP with the igb driver, the XDP frame data doesn't point to
> the beginning of the packet. It's off by 16 bytes. Everything works as expected
> with XDP skb mode.
> 
> Actually these 16 bytes are used to store the packet timestamps. Therefore, pull
> the timestamp before executing any XDP operations and adjust all other code
> accordingly. The igc driver does it like that as well.
> 
> Tested with Intel i210 card and AF_XDP sockets.
> 
> Fixes: 9cbc948b5a20 ("igb: add XDP support")
> Signed-off-by: Kurt Kanzenbach <kurt at linutronix.de>
> ---
> 
> Changes since v2:
> 
>  * Check timestamp for validity (Nguyen, Anthony L)
> 
> Changes since v1:
> 
>  * Use xdp_prepare_buff() (Lorenzo Bianconi)
> 
> Changes since RFC:
> 
>  * Removed unused return value definitions (Alexander Duyck)
> 
> Previous versions:
> 
>  * https://lkml.kernel.org/netdev/20210419072332.7246-1-kurt@linutronix.de/
>  * https://lkml.kernel.org/netdev/20210415092145.27322-1-kurt@linutronix.de/
>  * https://lkml.kernel.org/netdev/20210412101713.15161-1-kurt@linutronix.de/
> 
>  drivers/net/ethernet/intel/igb/igb.h      |  3 +-
>  drivers/net/ethernet/intel/igb/igb_main.c | 45 +++++++++++++----------
>  drivers/net/ethernet/intel/igb/igb_ptp.c  | 21 ++++-------
>  3 files changed, 34 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
> index 7bda8c5edea5..72cf967c1a00 100644
> --- a/drivers/net/ethernet/intel/igb/igb.h
> +++ b/drivers/net/ethernet/intel/igb/igb.h
> @@ -748,8 +748,7 @@ void igb_ptp_suspend(struct igb_adapter *adapter);
>  void igb_ptp_rx_hang(struct igb_adapter *adapter);
>  void igb_ptp_tx_hang(struct igb_adapter *adapter);
>  void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
> -int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
> -			struct sk_buff *skb);
> +ktime_t igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va);
>  int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
>  int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
>  void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
> index a45cd2b416c8..13595618f9e3 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -8281,7 +8281,7 @@ static void igb_add_rx_frag(struct igb_ring *rx_ring,
>  static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
>  					 struct igb_rx_buffer *rx_buffer,
>  					 struct xdp_buff *xdp,
> -					 union e1000_adv_rx_desc *rx_desc)
> +					 ktime_t timestamp)
>  {
>  #if (PAGE_SIZE < 8192)
>  	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
> @@ -8301,12 +8301,8 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
>  	if (unlikely(!skb))
>  		return NULL;
>  
> -	if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
> -		if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) {
> -			xdp->data += IGB_TS_HDR_LEN;
> -			size -= IGB_TS_HDR_LEN;
> -		}
> -	}
> +	if (timestamp)
> +		skb_hwtstamps(skb)->hwtstamp = timestamp;
>  
>  	/* Determine available headroom for copy */
>  	headlen = size;
> @@ -8337,7 +8333,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
>  static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
>  				     struct igb_rx_buffer *rx_buffer,
>  				     struct xdp_buff *xdp,
> -				     union e1000_adv_rx_desc *rx_desc)
> +				     ktime_t timestamp)
>  {
>  #if (PAGE_SIZE < 8192)
>  	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
> @@ -8364,11 +8360,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
>  	if (metasize)
>  		skb_metadata_set(skb, metasize);
>  
> -	/* pull timestamp out of packet data */
> -	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
> -		if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb))
> -			__skb_pull(skb, IGB_TS_HDR_LEN);
> -	}
> +	if (timestamp)
> +		skb_hwtstamps(skb)->hwtstamp = timestamp;
>  
>  	/* update buffer offset */
>  #if (PAGE_SIZE < 8192)
> @@ -8683,7 +8676,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
>  	while (likely(total_packets < budget)) {
>  		union e1000_adv_rx_desc *rx_desc;
>  		struct igb_rx_buffer *rx_buffer;
> +		ktime_t timestamp = 0;
> +		int pkt_offset = 0;
>  		unsigned int size;
> +		void *pktbuf;
>  
>  		/* return some buffers to hardware, one at a time is too slow */
>  		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
> @@ -8703,14 +8699,24 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
>  		dma_rmb();
>  
>  		rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
> +		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
> +
> +		/* pull rx packet timestamp if available and valid */
> +		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
> +			timestamp = igb_ptp_rx_pktstamp(rx_ring->q_vector,
> +							pktbuf);
> +
> +			if (timestamp) {
> +				pkt_offset += IGB_TS_HDR_LEN;
> +				size -= IGB_TS_HDR_LEN;
> +			}
> +		}

Small nit: since this is a hot path, maybe we could omit the additional
branch that you're introducing above and make igb_ptp_rx_pktstamp() to
return either 0 for error cases and IGB_TS_HDR_LEN if timestamp was fine?
timestamp itself would be passed as an arg.

So:
		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
			ts_offset = igb_ptp_rx_pktstamp(rx_ring->q_vector,
							pktbuf, &timestamp);
			pkt_offset += ts_offset;
			size -= ts_offset;
		}

Thoughts? I feel like if we see that desc has timestamp enabled then let's
optimize it for successful case.

>  
>  		/* retrieve a buffer from the ring */
>  		if (!skb) {
> -			unsigned int offset = igb_rx_offset(rx_ring);
> -			unsigned char *hard_start;
> +			unsigned char *hard_start = pktbuf - igb_rx_offset(rx_ring);
> +			unsigned int offset = pkt_offset + igb_rx_offset(rx_ring);

Probably we could do something similar in flavour of:
https://lore.kernel.org/bpf/20210118151318.12324-10-maciej.fijalkowski@intel.com/

which broke XDP_REDIRECT and got fixed in:
https://lore.kernel.org/bpf/20210303153928.11764-2-maciej.fijalkowski@intel.com/

You get the idea.

>  
> -			hard_start = page_address(rx_buffer->page) +
> -				     rx_buffer->page_offset - offset;
>  			xdp_prepare_buff(&xdp, hard_start, offset, size, true);
>  #if (PAGE_SIZE > 4096)
>  			/* At larger PAGE_SIZE, frame_sz depend on len size */
> @@ -8733,10 +8739,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
>  		} else if (skb)
>  			igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
>  		else if (ring_uses_build_skb(rx_ring))
> -			skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc);
> +			skb = igb_build_skb(rx_ring, rx_buffer, &xdp,
> +					    timestamp);
>  		else
>  			skb = igb_construct_skb(rx_ring, rx_buffer,
> -						&xdp, rx_desc);
> +						&xdp, timestamp);
>  
>  		/* exit if we failed to retrieve a buffer */
>  		if (!skb) {
> diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
> index 86a576201f5f..8e23df7da641 100644
> --- a/drivers/net/ethernet/intel/igb/igb_ptp.c
> +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
> @@ -856,30 +856,26 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
>  	dev_kfree_skb_any(skb);
>  }
>  
> -#define IGB_RET_PTP_DISABLED 1
> -#define IGB_RET_PTP_INVALID 2
> -
>  /**
>   * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
>   * @q_vector: Pointer to interrupt specific structure
>   * @va: Pointer to address containing Rx buffer
> - * @skb: Buffer containing timestamp and packet
>   *
>   * This function is meant to retrieve a timestamp from the first buffer of an
>   * incoming frame.  The value is stored in little endian format starting on
>   * byte 8
>   *
> - * Returns: 0 if success, nonzero if failure
> + * Returns: 0 on failure, timestamp on success
>   **/
> -int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
> -			struct sk_buff *skb)
> +ktime_t igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va)
>  {
>  	struct igb_adapter *adapter = q_vector->adapter;
> +	struct skb_shared_hwtstamps ts;
>  	__le64 *regval = (__le64 *)va;
>  	int adjust = 0;
>  
>  	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
> -		return IGB_RET_PTP_DISABLED;
> +		return 0;
>  
>  	/* The timestamp is recorded in little endian format.
>  	 * DWORD: 0        1        2        3
> @@ -888,10 +884,9 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
>  
>  	/* check reserved dwords are zero, be/le doesn't matter for zero */
>  	if (regval[0])
> -		return IGB_RET_PTP_INVALID;
> +		return 0;
>  
> -	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
> -				   le64_to_cpu(regval[1]));
> +	igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
>  
>  	/* adjust timestamp for the RX latency based on link speed */
>  	if (adapter->hw.mac.type == e1000_i210) {
> @@ -907,10 +902,8 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
>  			break;
>  		}
>  	}
> -	skb_hwtstamps(skb)->hwtstamp =
> -		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
>  
> -	return 0;
> +	return ktime_sub_ns(ts.hwtstamp, adjust);
>  }
>  
>  /**
> -- 
> 2.20.1
> 


More information about the Intel-wired-lan mailing list