[Intel-wired-lan] [PATCH next-queue v1 1/3] igc: Fix race condition in PTP tx code
Vladimir Oltean
vladimir.oltean at nxp.com
Tue Feb 28 17:33:57 UTC 2023
Hi Vinicius,
Some small comments, feel free to ignore them.
On Mon, Feb 27, 2023 at 09:45:32PM -0800, Vinicius Costa Gomes wrote:
> From: Andre Guedes <andre.guedes at intel.com>
>
> Currently, the igc driver supports timestamping only one tx packet at a
> time. During the transmission flow, the skb that requires hardware
> timestamping is saved in adapter->ptp_tx_skb. Once hardware has the
> timestamp, an interrupt is delivered, and adapter->ptp_tx_work is
> scheduled. In igc_ptp_tx_work(), we read the timestamp register, update
> adapter->ptp_tx_skb, and notify the network stack.
>
> While the thread executing the transmission flow (the user process
> running in kernel mode) and the thread executing ptp_tx_work don't
> access adapter->ptp_tx_skb concurrently, there are two other places
> where adapter->ptp_tx_skb is accessed: igc_ptp_tx_hang() and
> igc_ptp_suspend().
>
> igc_ptp_tx_hang() is executed by the adapter->watchdog_task worker
> thread which runs periodically so it is possible we have two threads
> accessing ptp_tx_skb at the same time. Consider the following scenario:
> right after __IGC_PTP_TX_IN_PROGRESS is set in igc_xmit_frame_ring(),
> igc_ptp_tx_hang() is executed. Since adapter->ptp_tx_start hasn't been
> written yet, this is considered a timeout and adapter->ptp_tx_skb is
> cleaned up.
>
> This patch fixes the issue described above by adding the ptp_tx_lock to
> protect access to ptp_tx_skb and ptp_tx_start fields from igc_adapter.
> Since igc_xmit_frame_ring() called in atomic context by the networking
> stack, ptp_tx_lock is defined as a spinlock.
>
> With the introduction of the ptp_tx_lock, the __IGC_PTP_TX_IN_PROGRESS
> flag doesn't provide much of a use anymore so this patch gets rid of it.
>
> Signed-off-by: Andre Guedes <andre.guedes at intel.com>
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes at intel.com>
> ---
Shouldn't this have a Fixes: tag and be sent to the 'net' queue, since
it fixes a bug which can be seen in the real world?
> drivers/net/ethernet/intel/igc/igc.h | 5 +-
> drivers/net/ethernet/intel/igc/igc_main.c | 8 +++-
> drivers/net/ethernet/intel/igc/igc_ptp.c | 57 +++++++++++++++--------
> 3 files changed, 47 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> index df3e26c0cf01..e804a566bdd3 100644
> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -227,6 +227,10 @@ struct igc_adapter {
> struct ptp_clock *ptp_clock;
> struct ptp_clock_info ptp_caps;
> struct work_struct ptp_tx_work;
> + /* Access to ptp_tx_skb and ptp_tx_start is protected by the
> + * ptp_tx_lock.
> + */
> + spinlock_t ptp_tx_lock;
> struct sk_buff *ptp_tx_skb;
> struct hwtstamp_config tstamp_config;
> unsigned long ptp_tx_start;
> @@ -401,7 +405,6 @@ enum igc_state_t {
> __IGC_TESTING,
> __IGC_RESETTING,
> __IGC_DOWN,
> - __IGC_PTP_TX_IN_PROGRESS,
> };
>
> enum igc_tx_flags {
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index 2928a6c73692..84c9c6e09054 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -1565,14 +1565,16 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
>
> if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
> struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
>
> /* FIXME: add support for retrieving timestamps from
> * the other timer registers before skipping the
> * timestamping request.
> */
> if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
> - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
> - &adapter->state)) {
> + !adapter->ptp_tx_skb) {
> skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
> tx_flags |= IGC_TX_FLAGS_TSTAMP;
>
> @@ -1581,6 +1583,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
> } else {
> adapter->tx_hwtstamp_skipped++;
unrelated: when adapter->tstamp_config.tx_type != HWTSTAMP_TX_ON but
skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP was set, it simply means
that the timestamp is "not for you". For example igc is a DSA master for
a PTP-capable switch. Should adapter->tx_hwtstamp_skipped increment in
that case? I mean, timestamps are skipped, but is it worth bumping a
user-visible counter for what is essentially the normal thing to do?
> }
> +
> + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
> }
>
> if (skb_vlan_tag_present(skb)) {
> diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
> index 4e10ced736db..9247395911c9 100644
> --- a/drivers/net/ethernet/intel/igc/igc_ptp.c
> +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
> @@ -603,14 +603,15 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
> return 0;
> }
>
> +/* Requires adapter->ptp_tx_lock held by caller. */
> static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
> {
> struct igc_hw *hw = &adapter->hw;
>
> dev_kfree_skb_any(adapter->ptp_tx_skb);
> adapter->ptp_tx_skb = NULL;
> + adapter->ptp_tx_start = 0;
what's the reason for setting this to 0? (here, there and everywhere)
> adapter->tx_hwtstamp_timeouts++;
> - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
> /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */
> rd32(IGC_TXSTMPH);
> netdev_warn(adapter->netdev, "Tx timestamp timeout\n");
> @@ -618,20 +619,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter)
>
> void igc_ptp_tx_hang(struct igc_adapter *adapter)
> {
> - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
> - IGC_PTP_TX_TIMEOUT);
> -
> - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
> - return;
> -
> - /* If we haven't received a timestamp within the timeout, it is
> - * reasonable to assume that it will never occur, so we can unlock the
> - * timestamp bit when this occurs.
> - */
> - if (timeout) {
> - cancel_work_sync(&adapter->ptp_tx_work);
> - igc_ptp_tx_timeout(adapter);
> - }
> + unsigned long flags;
> +
> + spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
> +
> + if (!adapter->ptp_tx_skb)
> + goto unlock;
> +
> + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT))
> + goto unlock;
> +
> + igc_ptp_tx_timeout(adapter);
> +
> +unlock:
> + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
> }
>
> /**
> @@ -641,6 +642,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter)
> * If we were asked to do hardware stamping and such a time stamp is
> * available, then it must have been for this skb here because we only
> * allow only one such packet into the queue.
> + *
> + * Context: Expects adapter->ptp_tx_lock to be held by caller.
> */
> static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
> {
> @@ -682,7 +685,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
> * while we're notifying the stack.
> */
This comment just became obsolete:
/* Clear the lock early before calling skb_tstamp_tx so that
* applications are not woken up before the lock bit is clear. We use
* a copy of the skb pointer to ensure other threads can't change it
* while we're notifying the stack.
*/
it gets removed in one of the later patches.
I suppose this doesn't make a real difference unless this change gets
backported as a fix to stable kernels and the others don't.
> adapter->ptp_tx_skb = NULL;
> - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
> + adapter->ptp_tx_start = 0;
>
> /* Notify the stack and free the skb after we've unlocked */
> skb_tstamp_tx(skb, &shhwtstamps);
> @@ -701,16 +704,22 @@ static void igc_ptp_tx_work(struct work_struct *work)
> struct igc_adapter *adapter = container_of(work, struct igc_adapter,
> ptp_tx_work);
> struct igc_hw *hw = &adapter->hw;
> + unsigned long flags;
> u32 tsynctxctl;
>
> - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
> - return;
> + spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
> +
> + if (!adapter->ptp_tx_skb)
> + goto unlock;
>
> tsynctxctl = rd32(IGC_TSYNCTXCTL);
> if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0)))
> - return;
> + goto unlock;
>
> igc_ptp_tx_hwtstamp(adapter);
> +
> +unlock:
> + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
> }
>
> /**
> @@ -960,6 +969,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
> }
>
> spin_lock_init(&adapter->tmreg_lock);
> + spin_lock_init(&adapter->ptp_tx_lock);
> INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);
>
> adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
> @@ -1017,13 +1027,20 @@ static void igc_ptm_stop(struct igc_adapter *adapter)
> */
> void igc_ptp_suspend(struct igc_adapter *adapter)
> {
> + unsigned long flags;
> +
> if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
> return;
>
> cancel_work_sync(&adapter->ptp_tx_work);
> +
> + spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
> +
> dev_kfree_skb_any(adapter->ptp_tx_skb);
> adapter->ptp_tx_skb = NULL;
> - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
> + adapter->ptp_tx_start = 0;
> +
> + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
>
> if (pci_device_is_present(adapter->pdev)) {
> igc_ptp_time_save(adapter);
> --
> 2.39.2
>
More information about the Intel-wired-lan
mailing list