[Intel-wired-lan] [net-next PATCH v4 1/2] ixgbe: add XDP support for pass and drop actions

Alexander Duyck alexander.duyck at gmail.com
Fri Mar 10 20:38:26 UTC 2017


On Fri, Mar 10, 2017 at 11:11 AM, John Fastabend
<john.fastabend at gmail.com> wrote:
> Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
> programs instead of rcu primitives as suggested by Daniel Borkmann and
> Alex Duyck.
>
> Signed-off-by: John Fastabend <john.r.fastabend at intel.com>

Two minor cosmetic complaints below.  However the patch looks good to
me.  Feel free to add this to both patches for the next revision.

Acked-by: Alexander Duyck <alexander.h.duyck at intel.com>

> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h         |    4 -
>  drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |    4 -
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  162 +++++++++++++++++++---
>  3 files changed, 143 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index b1ecc26..729f84e 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -273,6 +273,7 @@ struct ixgbe_ring {
>         struct ixgbe_ring *next;        /* pointer to next ring in q_vector */
>         struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
>         struct net_device *netdev;      /* netdev ring belongs to */
> +       struct bpf_prog *xdp_prog;
>         struct device *dev;             /* device for DMA mapping */
>         struct ixgbe_fwd_adapter *l2_accel_priv;
>         void *desc;                     /* descriptor ring memory */
> @@ -510,6 +511,7 @@ struct ixgbe_adapter {
>         unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
>         /* OS defined structs */
>         struct net_device *netdev;
> +       struct bpf_prog *xdp_prog;
>         struct pci_dev *pdev;
>
>         unsigned long state;
> @@ -790,7 +792,7 @@ enum ixgbe_boards {
>  void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
>  void ixgbe_reset(struct ixgbe_adapter *adapter);
>  void ixgbe_set_ethtool_ops(struct net_device *netdev);
> -int ixgbe_setup_rx_resources(struct ixgbe_ring *);
> +int ixgbe_setup_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
>  int ixgbe_setup_tx_resources(struct ixgbe_ring *);
>  void ixgbe_free_rx_resources(struct ixgbe_ring *);
>  void ixgbe_free_tx_resources(struct ixgbe_ring *);
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> index 364c83f..27cf625 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> @@ -1114,7 +1114,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
>                                sizeof(struct ixgbe_ring));
>
>                         temp_ring[i].count = new_rx_count;
> -                       err = ixgbe_setup_rx_resources(&temp_ring[i]);
> +                       err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
>                         if (err) {
>                                 while (i) {
>                                         i--;
> @@ -1747,7 +1747,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
>         rx_ring->netdev = adapter->netdev;
>         rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;
>
> -       err = ixgbe_setup_rx_resources(rx_ring);
> +       err = ixgbe_setup_rx_resources(adapter, rx_ring);
>         if (err) {
>                 ret_val = 4;
>                 goto err_nomem;
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index f14d158..ba89d11 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -49,6 +49,9 @@
>  #include <linux/if_macvlan.h>
>  #include <linux/if_bridge.h>
>  #include <linux/prefetch.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> +#include <linux/atomic.h>
>  #include <scsi/fc/fc_fcoe.h>
>  #include <net/udp_tunnel.h>
>  #include <net/pkt_cls.h>
> @@ -1856,6 +1859,10 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
>   * @rx_desc: pointer to the EOP Rx descriptor
>   * @skb: pointer to current skb being fixed
>   *
> + * Check if the skb is valid in the XDP case it will be an error pointer.
> + * Return true in this case to abort processing and advance to next
> + * descriptor.
> + *
>   * Check for corrupted packet headers caused by senders on the local L2
>   * embedded NIC switch not setting up their Tx Descriptors right.  These
>   * should be very rare.
> @@ -1874,6 +1881,10 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
>  {
>         struct net_device *netdev = rx_ring->netdev;
>
> +       /* XDP packets use error pointer so abort at this point */
> +       if (IS_ERR(skb))
> +               return true;
> +
>         /* verify that the packet does not have any known errors */
>         if (unlikely(ixgbe_test_staterr(rx_desc,
>                                         IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
> @@ -2049,7 +2060,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>                 /* hand second half of page back to the ring */
>                 ixgbe_reuse_rx_page(rx_ring, rx_buffer);
>         } else {
> -               if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
> +               if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
>                         /* the page has been released from the ring */
>                         IXGBE_CB(skb)->page_released = true;
>                 } else {
> @@ -2070,10 +2081,10 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>
>  static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>                                            struct ixgbe_rx_buffer *rx_buffer,
> -                                          union ixgbe_adv_rx_desc *rx_desc,
> -                                          unsigned int size)
> +                                          struct xdp_buff *xdp,
> +                                          union ixgbe_adv_rx_desc *rx_desc)
>  {
> -       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
> +       unsigned int size = xdp->data_end - xdp->data;
>  #if (PAGE_SIZE < 8192)
>         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
>  #else
> @@ -2082,9 +2093,9 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>         struct sk_buff *skb;
>
>         /* prefetch first cache line of first page */
> -       prefetch(va);
> +       prefetch(xdp->data);
>  #if L1_CACHE_BYTES < 128
> -       prefetch(va + L1_CACHE_BYTES);
> +       prefetch(xdp->data + L1_CACHE_BYTES);
>  #endif
>
>         /* allocate a skb to store the frags */
> @@ -2097,7 +2108,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>                         IXGBE_CB(skb)->dma = rx_buffer->dma;
>
>                 skb_add_rx_frag(skb, 0, rx_buffer->page,
> -                               rx_buffer->page_offset,
> +                               xdp->data - page_address(rx_buffer->page),
>                                 size, truesize);
>  #if (PAGE_SIZE < 8192)
>                 rx_buffer->page_offset ^= truesize;
> @@ -2105,7 +2116,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>                 rx_buffer->page_offset += truesize;
>  #endif
>         } else {
> -               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
> +               memcpy(__skb_put(skb, size),
> +                      xdp->data, ALIGN(size, sizeof(long)));

I'm not sure what happened here.  Is this line over 80 characters?  If
not it probably doesn't need to be wrapped.  If it is then you might
want to fix the line wrapping up since it doesn't look right.

>                 rx_buffer->pagecnt_bias++;
>         }
>
> @@ -2114,10 +2126,9 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>
>  static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>                                        struct ixgbe_rx_buffer *rx_buffer,
> -                                      union ixgbe_adv_rx_desc *rx_desc,
> -                                      unsigned int size)
> +                                      struct xdp_buff *xdp,
> +                                      union ixgbe_adv_rx_desc *rx_desc)
>  {
> -       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
>  #if (PAGE_SIZE < 8192)
>         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
>  #else
> @@ -2127,19 +2138,19 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>         struct sk_buff *skb;
>
>         /* prefetch first cache line of first page */
> -       prefetch(va);
> +       prefetch(xdp->data);
>  #if L1_CACHE_BYTES < 128
> -       prefetch(va + L1_CACHE_BYTES);
> +       prefetch(xdp->data + L1_CACHE_BYTES);
>  #endif
>
>         /* build an skb to around the page buffer */
> -       skb = build_skb(va - IXGBE_SKB_PAD, truesize);
> +       skb = build_skb(xdp->data_hard_start, truesize);
>         if (unlikely(!skb))
>                 return NULL;
>
>         /* update pointers within the skb to store the data */
> -       skb_reserve(skb, IXGBE_SKB_PAD);
> -       __skb_put(skb, size);
> +       skb_reserve(skb, xdp->data - xdp->data_hard_start);
> +       __skb_put(skb, xdp->data_end - xdp->data);
>
>         /* record DMA address if this is the start of a chain of buffers */
>         if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
> @@ -2155,6 +2166,41 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>         return skb;
>  }
>
> +#define IXGBE_XDP_PASS 0
> +#define IXGBE_XDP_CONSUMED 1
> +
> +static struct sk_buff *ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
> +                                    struct xdp_buff *xdp)
> +{
> +       int result = IXGBE_XDP_PASS;
> +       struct bpf_prog *xdp_prog;
> +       u32 act;
> +
> +       rcu_read_lock();
> +       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
> +
> +       if (!xdp_prog)
> +               goto xdp_out;
> +
> +       act = bpf_prog_run_xdp(xdp_prog, xdp);
> +       switch (act) {
> +       case XDP_PASS:
> +               break;
> +       default:
> +               bpf_warn_invalid_xdp_action(act);
> +       case XDP_TX:
> +       case XDP_ABORTED:
> +               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
> +               /* fallthrough -- handle aborts by dropping packet */
> +       case XDP_DROP:
> +               result = IXGBE_XDP_CONSUMED;
> +               break;
> +       }
> +xdp_out:
> +       rcu_read_unlock();
> +       return ERR_PTR(-result);
> +}
> +
>  /**
>   * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
>   * @q_vector: structure containing interrupt and ring information
> @@ -2184,6 +2230,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>                 union ixgbe_adv_rx_desc *rx_desc;
>                 struct ixgbe_rx_buffer *rx_buffer;
>                 struct sk_buff *skb;
> +               struct xdp_buff xdp;
>                 unsigned int size;
>
>                 /* return some buffers to hardware, one at a time is too slow */
> @@ -2205,15 +2252,29 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>
>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>
> -               /* retrieve a buffer from the ring */

You can probably leave this comment here.  That is my preference anyway.

> -               if (skb)
> +               if (!skb) {
> +                       xdp.data = page_address(rx_buffer->page) +
> +                                               rx_buffer->page_offset;
> +                       xdp.data_hard_start = xdp.data -
> +                                               ixgbe_rx_offset(rx_ring);
> +                       xdp.data_end = xdp.data + size;
> +
> +                       skb = ixgbe_run_xdp(rx_ring, &xdp);
> +               }
> +
> +               if (IS_ERR(skb)) {
> +                       total_rx_packets++;
> +                       total_rx_bytes += size;
> +                       rx_buffer->pagecnt_bias++;
> +               } else if (skb) {
>                         ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
> -               else if (ring_uses_build_skb(rx_ring))
> +               } else if (ring_uses_build_skb(rx_ring)) {
>                         skb = ixgbe_build_skb(rx_ring, rx_buffer,
> -                                             rx_desc, size);
> -               else
> +                                             &xdp, rx_desc);
> +               } else {
>                         skb = ixgbe_construct_skb(rx_ring, rx_buffer,
> -                                                 rx_desc, size);
> +                                                 &xdp, rx_desc);
> +               }
>
>                 /* exit if we failed to retrieve a buffer */
>                 if (!skb) {
> @@ -6072,7 +6133,8 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
>   *
>   * Returns 0 on success, negative on failure
>   **/
> -int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
> +int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
> +                            struct ixgbe_ring *rx_ring)
>  {
>         struct device *dev = rx_ring->dev;
>         int orig_node = dev_to_node(dev);
> @@ -6109,6 +6171,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
>         rx_ring->next_to_clean = 0;
>         rx_ring->next_to_use = 0;
>
> +       rx_ring->xdp_prog = adapter->xdp_prog;
> +
>         return 0;
>  err:
>         vfree(rx_ring->rx_buffer_info);
> @@ -6132,7 +6196,7 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
>         int i, err = 0;
>
>         for (i = 0; i < adapter->num_rx_queues; i++) {
> -               err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
> +               err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
>                 if (!err)
>                         continue;
>
> @@ -6200,6 +6264,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
>  {
>         ixgbe_clean_rx_ring(rx_ring);
>
> +       rx_ring->xdp_prog = NULL;
>         vfree(rx_ring->rx_buffer_info);
>         rx_ring->rx_buffer_info = NULL;
>
> @@ -9466,6 +9531,54 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
>         return features;
>  }
>
> +static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
> +{
> +       int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
> +       struct ixgbe_adapter *adapter = netdev_priv(dev);
> +       struct bpf_prog *old_prog;
> +
> +       if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
> +               return -EINVAL;
> +
> +       if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
> +               return -EINVAL;
> +
> +       /* verify ixgbe ring attributes are sufficient for XDP */
> +       for (i = 0; i < adapter->num_rx_queues; i++) {
> +               struct ixgbe_ring *ring = adapter->rx_ring[i];
> +
> +               if (ring_is_rsc_enabled(ring))
> +                       return -EINVAL;
> +
> +               if (frame_size > ixgbe_rx_bufsz(ring))
> +                       return -EINVAL;
> +       }
> +
> +       old_prog = xchg(&adapter->xdp_prog, prog);
> +       for (i = 0; i < adapter->num_rx_queues; i++)
> +               xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
> +
> +       if (old_prog)
> +               bpf_prog_put(old_prog);
> +
> +       return 0;
> +}
> +
> +static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
> +{
> +       struct ixgbe_adapter *adapter = netdev_priv(dev);
> +
> +       switch (xdp->command) {
> +       case XDP_SETUP_PROG:
> +               return ixgbe_xdp_setup(dev, xdp->prog);
> +       case XDP_QUERY_PROG:
> +               xdp->prog_attached = !!(adapter->rx_ring[0]->xdp_prog);
> +               return 0;
> +       default:
> +               return -EINVAL;
> +       }
> +}
> +
>  static const struct net_device_ops ixgbe_netdev_ops = {
>         .ndo_open               = ixgbe_open,
>         .ndo_stop               = ixgbe_close,
> @@ -9511,6 +9624,7 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
>         .ndo_udp_tunnel_add     = ixgbe_add_udp_tunnel_port,
>         .ndo_udp_tunnel_del     = ixgbe_del_udp_tunnel_port,
>         .ndo_features_check     = ixgbe_features_check,
> +       .ndo_xdp                = ixgbe_xdp,
>  };
>
>  /**
>


More information about the Intel-wired-lan mailing list