[Intel-wired-lan] [PATCH next-queue v1 1/3] igc: Fix race condition in PTP tx code

Vinicius Costa Gomes vinicius.gomes at intel.com
Thu Mar 9 21:33:06 UTC 2023


Hi Vladimir,

Sorry for the delay, crazy times around here.

Vladimir Oltean <vladimir.oltean at nxp.com> writes:

> Hi Vinicius,
>
> Some small comments, feel free to ignore them.
>
> On Mon, Feb 27, 2023 at 09:45:32PM -0800, Vinicius Costa Gomes wrote:
>> From: Andre Guedes <andre.guedes at intel.com>
>> 
>> Currently, the igc driver supports timestamping only one tx packet at a
>> time. During the transmission flow, the skb that requires hardware
>> timestamping is saved in adapter->ptp_tx_skb. Once hardware has the
>> timestamp, an interrupt is delivered, and adapter->ptp_tx_work is
>> scheduled. In igc_ptp_tx_work(), we read the timestamp register, update
>> adapter->ptp_tx_skb, and notify the network stack.
>> 
>> While the thread executing the transmission flow (the user process
>> running in kernel mode) and the thread executing ptp_tx_work don't
>> access adapter->ptp_tx_skb concurrently, there are two other places
>> where adapter->ptp_tx_skb is accessed: igc_ptp_tx_hang() and
>> igc_ptp_suspend().
>> 
>> igc_ptp_tx_hang() is executed by the adapter->watchdog_task worker
>> thread which runs periodically so it is possible we have two threads
>> accessing ptp_tx_skb at the same time. Consider the following scenario:
>> right after __IGC_PTP_TX_IN_PROGRESS is set in igc_xmit_frame_ring(),
>> igc_ptp_tx_hang() is executed. Since adapter->ptp_tx_start hasn't been
>> written yet, this is considered a timeout and adapter->ptp_tx_skb is
>> cleaned up.
>> 
>> This patch fixes the issue described above by adding the ptp_tx_lock to
>> protect access to ptp_tx_skb and ptp_tx_start fields from igc_adapter.
>> Since igc_xmit_frame_ring() called in atomic context by the networking
>> stack, ptp_tx_lock is defined as a spinlock.
>> 
>> With the introduction of the ptp_tx_lock, the __IGC_PTP_TX_IN_PROGRESS
>> flag doesn't provide much of a use anymore so this patch gets rid of it.
>> 
>> Signed-off-by: Andre Guedes <andre.guedes at intel.com>
>> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes at intel.com>
>> ---
>
> Shouldn't this have a Fixes: tag and be sent to the 'net' queue, since
> it fixes a bug which can be seen in the real world?
>

As I couldn't reproduce the race condition (the "false" timeout) at all
on my systems, at first I chose not to propose this for 'net'.

But on the other hand, yeah, I don't think this patch will have any side
effects and will only protect against this (possible) issue. So yeah,
will add a 'Fixes:' tag and propose it there.

>>  drivers/net/ethernet/intel/igc/igc.h      |  5 +-
>>  drivers/net/ethernet/intel/igc/igc_main.c |  8 +++-
>>  drivers/net/ethernet/intel/igc/igc_ptp.c  | 57 +++++++++++++++--------
>>  3 files changed, 47 insertions(+), 23 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
>> index df3e26c0cf01..e804a566bdd3 100644
>> --- a/drivers/net/ethernet/intel/igc/igc.h
>> +++ b/drivers/net/ethernet/intel/igc/igc.h
>> @@ -227,6 +227,10 @@ struct igc_adapter {
>>  	struct ptp_clock *ptp_clock;
>>  	struct ptp_clock_info ptp_caps;
>>  	struct work_struct ptp_tx_work;
>> +	/* Access to ptp_tx_skb and ptp_tx_start is protected by the
>> +	 * ptp_tx_lock.
>> +	 */
>> +	spinlock_t ptp_tx_lock;
>>  	struct sk_buff *ptp_tx_skb;
>>  	struct hwtstamp_config tstamp_config;
>>  	unsigned long ptp_tx_start;
>> @@ -401,7 +405,6 @@ enum igc_state_t {
>>  	__IGC_TESTING,
>>  	__IGC_RESETTING,
>>  	__IGC_DOWN,
>> -	__IGC_PTP_TX_IN_PROGRESS,
>>  };
>>  
>>  enum igc_tx_flags {
>> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
>> index 2928a6c73692..84c9c6e09054 100644
>> --- a/drivers/net/ethernet/intel/igc/igc_main.c
>> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
>> @@ -1565,14 +1565,16 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>>  
>>  	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
>>  		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
>> +		unsigned long flags;
>> +
>> +		spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
>>  
>>  		/* FIXME: add support for retrieving timestamps from
>>  		 * the other timer registers before skipping the
>>  		 * timestamping request.
>>  		 */
>>  		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
>> -		    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
>> -					   &adapter->state)) {
>> +		    !adapter->ptp_tx_skb) {
>>  			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
>>  			tx_flags |= IGC_TX_FLAGS_TSTAMP;
>>  
>> @@ -1581,6 +1583,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>>  		} else {
>>  			adapter->tx_hwtstamp_skipped++;
>
> unrelated: when adapter->tstamp_config.tx_type != HWTSTAMP_TX_ON but
> skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP was set, it simply means
> that the timestamp is "not for you". For example igc is a DSA master for
> a PTP-capable switch. Should adapter->tx_hwtstamp_skipped increment in
> that case? I mean, timestamps are skipped, but is it worth bumping a
> user-visible counter for what is essentially the normal thing to do?
>

Good point, I never considered this case.

Incrementing the counter seems wrong, will separate the conditions for
the "not for me" and for the "this is for me and I am busy" cases. 

>>  		}
>> +
>> +		spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
>>  	}
>>  
>>  	if (skb_vlan_tag_present(skb)) {
>> diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
>> index 4e10ced736db..9247395911c9 100644
>> --- a/drivers/net/ethernet/intel/igc/igc_ptp.c
>> +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
>> @@ -603,14 +603,15 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
>>  	return 0;
>>  }
>>  
>> +/* Requires adapter->ptp_tx_lock held by caller. */
>>  static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
>>  {
>>  	struct igc_hw *hw = &adapter->hw;
>>  
>>  	dev_kfree_skb_any(adapter->ptp_tx_skb);
>>  	adapter->ptp_tx_skb = NULL;
>> +	adapter->ptp_tx_start = 0;
>
> what's the reason for setting this to 0? (here, there and everywhere)
>

Not really needed, something I missed from the early version of the
patch. Will fix.

>>  	adapter->tx_hwtstamp_timeouts++;
>> -	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
>>  	/* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */
>>  	rd32(IGC_TXSTMPH);
>>  	netdev_warn(adapter->netdev, "Tx timestamp timeout\n");
>> @@ -618,20 +619,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
>>  
>>  void igc_ptp_tx_hang(struct igc_adapter *adapter)
>>  {
>> -	bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
>> -					      IGC_PTP_TX_TIMEOUT);
>> -
>> -	if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
>> -		return;
>> -
>> -	/* If we haven't received a timestamp within the timeout, it is
>> -	 * reasonable to assume that it will never occur, so we can unlock the
>> -	 * timestamp bit when this occurs.
>> -	 */
>> -	if (timeout) {
>> -		cancel_work_sync(&adapter->ptp_tx_work);
>> -		igc_ptp_tx_timeout(adapter);
>> -	}
>> +	unsigned long flags;
>> +
>> +	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
>> +
>> +	if (!adapter->ptp_tx_skb)
>> +		goto unlock;
>> +
>> +	if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT))
>> +		goto unlock;
>> +
>> +	igc_ptp_tx_timeout(adapter);
>> +
>> +unlock:
>> +	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
>>  }
>>  
>>  /**
>> @@ -641,6 +642,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter)
>>   * If we were asked to do hardware stamping and such a time stamp is
>>   * available, then it must have been for this skb here because we only
>>   * allow only one such packet into the queue.
>> + *
>> + * Context: Expects adapter->ptp_tx_lock to be held by caller.
>>   */
>>  static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
>>  {
>> @@ -682,7 +685,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
>>  	 * while we're notifying the stack.
>>  	 */
>
> This comment just became obsolete:
>
> 	/* Clear the lock early before calling skb_tstamp_tx so that
> 	 * applications are not woken up before the lock bit is clear. We use
> 	 * a copy of the skb pointer to ensure other threads can't change it
> 	 * while we're notifying the stack.
> 	 */
>
> it gets removed in one of the later patches.
>
> I suppose this doesn't make a real difference unless this change gets
> backported as a fix to stable kernels and the others don't.
>

Will remove the comment in this patch. Good catch.

>>  	adapter->ptp_tx_skb = NULL;
>> -	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
>> +	adapter->ptp_tx_start = 0;
>>  
>>  	/* Notify the stack and free the skb after we've unlocked */
>>  	skb_tstamp_tx(skb, &shhwtstamps);
>> @@ -701,16 +704,22 @@ static void igc_ptp_tx_work(struct work_struct *work)
>>  	struct igc_adapter *adapter = container_of(work, struct igc_adapter,
>>  						   ptp_tx_work);
>>  	struct igc_hw *hw = &adapter->hw;
>> +	unsigned long flags;
>>  	u32 tsynctxctl;
>>  
>> -	if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
>> -		return;
>> +	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
>> +
>> +	if (!adapter->ptp_tx_skb)
>> +		goto unlock;
>>  
>>  	tsynctxctl = rd32(IGC_TSYNCTXCTL);
>>  	if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0)))
>> -		return;
>> +		goto unlock;
>>  
>>  	igc_ptp_tx_hwtstamp(adapter);
>> +
>> +unlock:
>> +	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
>>  }
>>  
>>  /**
>> @@ -960,6 +969,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
>>  	}
>>  
>>  	spin_lock_init(&adapter->tmreg_lock);
>> +	spin_lock_init(&adapter->ptp_tx_lock);
>>  	INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);
>>  
>>  	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
>> @@ -1017,13 +1027,20 @@ static void igc_ptm_stop(struct igc_adapter *adapter)
>>   */
>>  void igc_ptp_suspend(struct igc_adapter *adapter)
>>  {
>> +	unsigned long flags;
>> +
>>  	if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
>>  		return;
>>  
>>  	cancel_work_sync(&adapter->ptp_tx_work);
>> +
>> +	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
>> +
>>  	dev_kfree_skb_any(adapter->ptp_tx_skb);
>>  	adapter->ptp_tx_skb = NULL;
>> -	clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
>> +	adapter->ptp_tx_start = 0;
>> +
>> +	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
>>  
>>  	if (pci_device_is_present(adapter->pdev)) {
>>  		igc_ptp_time_save(adapter);
>> -- 
>> 2.39.2
>>

-- 
Vinicius


More information about the Intel-wired-lan mailing list