[Intel-wired-lan] [PATCH 1/2] i40e: Fix RS bit update in Tx path and disable force WB workaround
Singhai, Anjali
anjali.singhai at intel.com
Fri Sep 25 19:50:11 UTC 2015
The other patches needed for this patch to fix a tx_timeout issue are
https://patchwork.ozlabs.org/patch/522598/
https://patchwork.ozlabs.org/patch/522954/
Anjali
> -----Original Message-----
> From: Singhai, Anjali
> Sent: Friday, September 25, 2015 12:00 PM
> To: intel-wired-lan at lists.osuosl.org
> Cc: Singhai, Anjali
> Subject: [PATCH 1/2] i40e: Fix RS bit update in Tx path and disable force WB
> workaround
>
> This patch fixes the issue of forcing WB too often causing us to not benefit
> from NAPI.
>
> Without this patch we were forcing WB/arming interrupt too often taking
> away the benefits of NAPI and causing a performance impact.
>
> With this patch we disable force WB in the clean routine for X710 and XL710
> adapters. X722 adapters do not enable interrupt to force a WB and benefit
> from WB_ON_ITR and hence force WB is left enabled for those adapters.
> For XL710 and X710 adapters if we have less than 4 packets pending a
> software Interrupt triggered from service task will force a WB.
>
> This patch also changes the conditions for setting RS bit as described in code
> comments. This optimizes when the HW does a tail bump amd when it does a
> WB. It also optimizes when we do a wmb.
>
> Signed-off-by: Anjali Singhai Jain <anjali.singhai at intel.com>
> ---
> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 126 ++++++++++++++++++---
> -------
> drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +
> 2 files changed, 86 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index d51b8ed..f75db56 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -785,15 +785,21 @@ static bool i40e_clean_tx_irq(struct i40e_ring
> *tx_ring, int budget)
> tx_ring->q_vector->tx.total_bytes += total_bytes;
> tx_ring->q_vector->tx.total_packets += total_packets;
>
> - /* check to see if there are any non-cache aligned descriptors
> - * waiting to be written back, and kick the hardware to force
> - * them to be written back in case of napi polling
> - */
> - if (budget &&
> - !((i & WB_STRIDE) == WB_STRIDE) &&
> - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
> - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
> - tx_ring->arm_wb = true;
> + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
> + unsigned int j = 0;
> + /* check to see if there are < 4 descriptors
> + * waiting to be written back, then kick the hardware to force
> + * them to be written back in case we stay in NAPI.
> + * In this mode on X722 we do not enable Interrupt.
> + */
> + j = i40e_get_tx_pending(tx_ring);
> +
> + if (budget &&
> + ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
> + !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
> + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
> + tx_ring->arm_wb = true;
> + }
>
> if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
> /* schedule immediate reset if we believe we hung */ @@ -
> 2582,6 +2588,9 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring,
> struct sk_buff *skb,
> u32 td_tag = 0;
> dma_addr_t dma;
> u16 gso_segs;
> + u16 desc_count = 0;
> + bool tail_bump = true;
> + bool do_rs = false;
>
> if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
> td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
> @@ -2622,6 +2631,8 @@ static inline void i40e_tx_map(struct i40e_ring
> *tx_ring, struct sk_buff *skb,
>
> tx_desc++;
> i++;
> + desc_count++;
> +
> if (i == tx_ring->count) {
> tx_desc = I40E_TX_DESC(tx_ring, 0);
> i = 0;
> @@ -2641,6 +2652,8 @@ static inline void i40e_tx_map(struct i40e_ring
> *tx_ring, struct sk_buff *skb,
>
> tx_desc++;
> i++;
> + desc_count++;
> +
> if (i == tx_ring->count) {
> tx_desc = I40E_TX_DESC(tx_ring, 0);
> i = 0;
> @@ -2655,34 +2668,6 @@ static inline void i40e_tx_map(struct i40e_ring
> *tx_ring, struct sk_buff *skb,
> tx_bi = &tx_ring->tx_bi[i];
> }
>
> - /* Place RS bit on last descriptor of any packet that spans across the
> - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
> - */
> - if (((i & WB_STRIDE) != WB_STRIDE) &&
> - (first <= &tx_ring->tx_bi[i]) &&
> - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
> - tx_desc->cmd_type_offset_bsz =
> - build_ctob(td_cmd, td_offset, size, td_tag) |
> - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
> - I40E_TXD_QW1_CMD_SHIFT);
> - } else {
> - tx_desc->cmd_type_offset_bsz =
> - build_ctob(td_cmd, td_offset, size, td_tag) |
> - cpu_to_le64((u64)I40E_TXD_CMD <<
> - I40E_TXD_QW1_CMD_SHIFT);
> - }
> -
> - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
> - tx_ring->queue_index),
> - first->bytecount);
> -
> - /* Force memory writes to complete before letting h/w
> - * know there are new descriptors to fetch. (Only
> - * applicable for weak-ordered memory model archs,
> - * such as IA-64).
> - */
> - wmb();
> -
> /* set next_to_watch value indicating a packet is present */
> first->next_to_watch = tx_desc;
>
> @@ -2692,15 +2677,72 @@ static inline void i40e_tx_map(struct i40e_ring
> *tx_ring, struct sk_buff *skb,
>
> tx_ring->next_to_use = i;
>
> + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
> + tx_ring->queue_index),
> + first->bytecount);
> i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
> +
> + /* Algorithm to optimize tail and RS bit setting:
> + * if xmit_more is supported
> + * if xmit_more is true
> + * do not update tail and do not mark RS bit.
> + * if xmit_more is false and last xmit_more was false
> + * if every packet spanned less than 4 desc
> + * then set RS bit on 4th packet and update tail
> + * on every packet
> + * else
> + * update tail and set RS bit on every packet.
> + * if xmit_more is false and last_xmit_more was true
> + * update tail and set RS bit.
> + *
> + * Optimization: wmb to be issued only in case of tail update.
> + * Also optimize the Descriptor WB path for RS bit with the same
> + * algorithm.
> + *
> + * Note: If there are less than 4 packets
> + * pending and interrupts were disabled the service task will
> + * trigger a force WB.
> + */
> + if (skb->xmit_more &&
> + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
> + tx_ring->queue_index))) {
> + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
> + tail_bump = false;
> + } else if (!skb->xmit_more &&
> + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring-
> >netdev,
> + tx_ring->queue_index))
> &&
> + (!(tx_ring->flags &
> I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
> + (tx_ring->packet_stride < WB_STRIDE) &&
> + (desc_count < WB_STRIDE)) {
> + tx_ring->packet_stride++;
> + } else {
> + tx_ring->packet_stride = 0;
> + tx_ring->flags &=
> ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
> + do_rs = true;
> + }
> + if (do_rs)
> + tx_ring->packet_stride = 0;
> +
> + tx_desc->cmd_type_offset_bsz =
> + build_ctob(td_cmd, td_offset, size, td_tag) |
> + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
> + I40E_TX_DESC_CMD_EOP)
> <<
> +
> I40E_TXD_QW1_CMD_SHIFT);
> +
> /* notify HW of packet */
> - if (!skb->xmit_more ||
> - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
> - tx_ring->queue_index)))
> - writel(i, tx_ring->tail);
> - else
> + if (!tail_bump)
> prefetchw(tx_desc + 1);
>
> + if (tail_bump) {
> + /* Force memory writes to complete before letting h/w
> + * know there are new descriptors to fetch. (Only
> + * applicable for weak-ordered memory model archs,
> + * such as IA-64).
> + */
> + wmb();
> + writel(i, tx_ring->tail);
> + }
> +
> return;
>
> dma_error:
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> index 073464e..4871809 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> @@ -266,10 +266,12 @@ struct i40e_ring {
>
> bool ring_active; /* is ring online or not */
> bool arm_wb; /* do something to arm write back */
> + u8 packet_stride;
>
> u16 flags;
> #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
> #define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1)
> +#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
>
> /* stats structs */
> struct i40e_queue_stats stats;
> --
> 1.8.1.4
More information about the Intel-wired-lan
mailing list