[Intel-wired-lan] [net-next PATCH v4 1/2] ixgbe: add XDP support for pass and drop actions
William Tu
u9012063 at gmail.com
Sat Mar 11 15:49:29 UTC 2017
On Fri, Mar 10, 2017 at 11:11 AM, John Fastabend
<john.fastabend at gmail.com> wrote:
> Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
> programs instead of rcu primitives as suggested by Daniel Borkmann and
> Alex Duyck.
>
> Signed-off-by: John Fastabend <john.r.fastabend at intel.com>
> ---
> drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 -
> drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 4 -
> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 162 +++++++++++++++++++---
> 3 files changed, 143 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index b1ecc26..729f84e 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -273,6 +273,7 @@ struct ixgbe_ring {
> struct ixgbe_ring *next; /* pointer to next ring in q_vector */
> struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
> struct net_device *netdev; /* netdev ring belongs to */
> + struct bpf_prog *xdp_prog;
> struct device *dev; /* device for DMA mapping */
> struct ixgbe_fwd_adapter *l2_accel_priv;
> void *desc; /* descriptor ring memory */
> @@ -510,6 +511,7 @@ struct ixgbe_adapter {
> unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
> /* OS defined structs */
> struct net_device *netdev;
> + struct bpf_prog *xdp_prog;
> struct pci_dev *pdev;
>
> unsigned long state;
> @@ -790,7 +792,7 @@ enum ixgbe_boards {
> void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
> void ixgbe_reset(struct ixgbe_adapter *adapter);
> void ixgbe_set_ethtool_ops(struct net_device *netdev);
> -int ixgbe_setup_rx_resources(struct ixgbe_ring *);
> +int ixgbe_setup_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
> int ixgbe_setup_tx_resources(struct ixgbe_ring *);
> void ixgbe_free_rx_resources(struct ixgbe_ring *);
> void ixgbe_free_tx_resources(struct ixgbe_ring *);
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> index 364c83f..27cf625 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> @@ -1114,7 +1114,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
> sizeof(struct ixgbe_ring));
>
> temp_ring[i].count = new_rx_count;
> - err = ixgbe_setup_rx_resources(&temp_ring[i]);
> + err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
> if (err) {
> while (i) {
> i--;
> @@ -1747,7 +1747,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
> rx_ring->netdev = adapter->netdev;
> rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;
>
> - err = ixgbe_setup_rx_resources(rx_ring);
> + err = ixgbe_setup_rx_resources(adapter, rx_ring);
> if (err) {
> ret_val = 4;
> goto err_nomem;
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index f14d158..ba89d11 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -49,6 +49,9 @@
> #include <linux/if_macvlan.h>
> #include <linux/if_bridge.h>
> #include <linux/prefetch.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> +#include <linux/atomic.h>
> #include <scsi/fc/fc_fcoe.h>
> #include <net/udp_tunnel.h>
> #include <net/pkt_cls.h>
> @@ -1856,6 +1859,10 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
> * @rx_desc: pointer to the EOP Rx descriptor
> * @skb: pointer to current skb being fixed
> *
> + * Check if the skb is valid in the XDP case it will be an error pointer.
> + * Return true in this case to abort processing and advance to next
> + * descriptor.
> + *
> * Check for corrupted packet headers caused by senders on the local L2
> * embedded NIC switch not setting up their Tx Descriptors right. These
> * should be very rare.
> @@ -1874,6 +1881,10 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
> {
> struct net_device *netdev = rx_ring->netdev;
>
> + /* XDP packets use error pointer so abort at this point */
> + if (IS_ERR(skb))
> + return true;
> +
> /* verify that the packet does not have any known errors */
> if (unlikely(ixgbe_test_staterr(rx_desc,
> IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
> @@ -2049,7 +2060,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
> /* hand second half of page back to the ring */
> ixgbe_reuse_rx_page(rx_ring, rx_buffer);
> } else {
> - if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
> + if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
> /* the page has been released from the ring */
> IXGBE_CB(skb)->page_released = true;
> } else {
> @@ -2070,10 +2081,10 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>
> static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
> struct ixgbe_rx_buffer *rx_buffer,
> - union ixgbe_adv_rx_desc *rx_desc,
> - unsigned int size)
> + struct xdp_buff *xdp,
> + union ixgbe_adv_rx_desc *rx_desc)
> {
> - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
> + unsigned int size = xdp->data_end - xdp->data;
> #if (PAGE_SIZE < 8192)
> unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
> #else
> @@ -2082,9 +2093,9 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
> struct sk_buff *skb;
>
> /* prefetch first cache line of first page */
> - prefetch(va);
> + prefetch(xdp->data);
> #if L1_CACHE_BYTES < 128
> - prefetch(va + L1_CACHE_BYTES);
> + prefetch(xdp->data + L1_CACHE_BYTES);
> #endif
>
> /* allocate a skb to store the frags */
> @@ -2097,7 +2108,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
> IXGBE_CB(skb)->dma = rx_buffer->dma;
>
> skb_add_rx_frag(skb, 0, rx_buffer->page,
> - rx_buffer->page_offset,
> + xdp->data - page_address(rx_buffer->page),
> size, truesize);
> #if (PAGE_SIZE < 8192)
> rx_buffer->page_offset ^= truesize;
> @@ -2105,7 +2116,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
> rx_buffer->page_offset += truesize;
> #endif
> } else {
> - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
> + memcpy(__skb_put(skb, size),
> + xdp->data, ALIGN(size, sizeof(long)));
> rx_buffer->pagecnt_bias++;
> }
>
> @@ -2114,10 +2126,9 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>
> static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
> struct ixgbe_rx_buffer *rx_buffer,
> - union ixgbe_adv_rx_desc *rx_desc,
> - unsigned int size)
> + struct xdp_buff *xdp,
> + union ixgbe_adv_rx_desc *rx_desc)
> {
> - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
> #if (PAGE_SIZE < 8192)
> unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
> #else
> @@ -2127,19 +2138,19 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
> struct sk_buff *skb;
>
> /* prefetch first cache line of first page */
> - prefetch(va);
> + prefetch(xdp->data);
> #if L1_CACHE_BYTES < 128
> - prefetch(va + L1_CACHE_BYTES);
> + prefetch(xdp->data + L1_CACHE_BYTES);
> #endif
>
> /* build an skb to around the page buffer */
> - skb = build_skb(va - IXGBE_SKB_PAD, truesize);
> + skb = build_skb(xdp->data_hard_start, truesize);
> if (unlikely(!skb))
> return NULL;
>
> /* update pointers within the skb to store the data */
> - skb_reserve(skb, IXGBE_SKB_PAD);
> - __skb_put(skb, size);
> + skb_reserve(skb, xdp->data - xdp->data_hard_start);
> + __skb_put(skb, xdp->data_end - xdp->data);
>
> /* record DMA address if this is the start of a chain of buffers */
> if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
> @@ -2155,6 +2166,41 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
> return skb;
> }
>
> +#define IXGBE_XDP_PASS 0
> +#define IXGBE_XDP_CONSUMED 1
> +
> +static struct sk_buff *ixgbe_run_xdp(struct ixgbe_ring *rx_ring,
> + struct xdp_buff *xdp)
> +{
> + int result = IXGBE_XDP_PASS;
> + struct bpf_prog *xdp_prog;
> + u32 act;
> +
> + rcu_read_lock();
> + xdp_prog = READ_ONCE(rx_ring->xdp_prog);
> +
> + if (!xdp_prog)
> + goto xdp_out;
> +
> + act = bpf_prog_run_xdp(xdp_prog, xdp);
> + switch (act) {
> + case XDP_PASS:
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + case XDP_TX:
> + case XDP_ABORTED:
> + trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
> + /* fallthrough -- handle aborts by dropping packet */
> + case XDP_DROP:
> + result = IXGBE_XDP_CONSUMED;
> + break;
> + }
> +xdp_out:
> + rcu_read_unlock();
> + return ERR_PTR(-result);
> +}
> +
> /**
> * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
> * @q_vector: structure containing interrupt and ring information
> @@ -2184,6 +2230,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
> union ixgbe_adv_rx_desc *rx_desc;
> struct ixgbe_rx_buffer *rx_buffer;
> struct sk_buff *skb;
> + struct xdp_buff xdp;
> unsigned int size;
>
> /* return some buffers to hardware, one at a time is too slow */
> @@ -2205,15 +2252,29 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>
> rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>
> - /* retrieve a buffer from the ring */
> - if (skb)
> + if (!skb) {
> + xdp.data = page_address(rx_buffer->page) +
> + rx_buffer->page_offset;
> + xdp.data_hard_start = xdp.data -
> + ixgbe_rx_offset(rx_ring);
> + xdp.data_end = xdp.data + size;
> +
> + skb = ixgbe_run_xdp(rx_ring, &xdp);
> + }
> +
> + if (IS_ERR(skb)) {
> + total_rx_packets++;
> + total_rx_bytes += size;
> + rx_buffer->pagecnt_bias++;
> + } else if (skb) {
> ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
> - else if (ring_uses_build_skb(rx_ring))
> + } else if (ring_uses_build_skb(rx_ring)) {
> skb = ixgbe_build_skb(rx_ring, rx_buffer,
> - rx_desc, size);
> - else
> + &xdp, rx_desc);
> + } else {
> skb = ixgbe_construct_skb(rx_ring, rx_buffer,
> - rx_desc, size);
> + &xdp, rx_desc);
> + }
>
> /* exit if we failed to retrieve a buffer */
> if (!skb) {
> @@ -6072,7 +6133,8 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
> *
> * Returns 0 on success, negative on failure
> **/
> -int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
> +int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
> + struct ixgbe_ring *rx_ring)
> {
> struct device *dev = rx_ring->dev;
> int orig_node = dev_to_node(dev);
> @@ -6109,6 +6171,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
> rx_ring->next_to_clean = 0;
> rx_ring->next_to_use = 0;
>
> + rx_ring->xdp_prog = adapter->xdp_prog;
> +
> return 0;
> err:
> vfree(rx_ring->rx_buffer_info);
> @@ -6132,7 +6196,7 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
> int i, err = 0;
>
> for (i = 0; i < adapter->num_rx_queues; i++) {
> - err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
> + err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
> if (!err)
> continue;
>
> @@ -6200,6 +6264,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
> {
> ixgbe_clean_rx_ring(rx_ring);
>
> + rx_ring->xdp_prog = NULL;
> vfree(rx_ring->rx_buffer_info);
> rx_ring->rx_buffer_info = NULL;
>
> @@ -9466,6 +9531,54 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
> return features;
> }
>
> +static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
> +{
> + int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
> + struct ixgbe_adapter *adapter = netdev_priv(dev);
> + struct bpf_prog *old_prog;
> +
> + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
> + return -EINVAL;
> +
> + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
> + return -EINVAL;
> +
> + /* verify ixgbe ring attributes are sufficient for XDP */
> + for (i = 0; i < adapter->num_rx_queues; i++) {
> + struct ixgbe_ring *ring = adapter->rx_ring[i];
> +
> + if (ring_is_rsc_enabled(ring))
> + return -EINVAL;
> +
> + if (frame_size > ixgbe_rx_bufsz(ring))
> + return -EINVAL;
> + }
> +
> + old_prog = xchg(&adapter->xdp_prog, prog);
> + for (i = 0; i < adapter->num_rx_queues; i++)
> + xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
> +
> + if (old_prog)
> + bpf_prog_put(old_prog);
> +
> + return 0;
> +}
Since the patch does not support xdp_adjust_head() yet, should we
detect and return -EOPNOTSUPP?
--William
> +
> +static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
> +{
> + struct ixgbe_adapter *adapter = netdev_priv(dev);
> +
> + switch (xdp->command) {
> + case XDP_SETUP_PROG:
> + return ixgbe_xdp_setup(dev, xdp->prog);
> + case XDP_QUERY_PROG:
> + xdp->prog_attached = !!(adapter->rx_ring[0]->xdp_prog);
> + return 0;
> + default:
> + return -EINVAL;
> + }
> +}
> +
> static const struct net_device_ops ixgbe_netdev_ops = {
> .ndo_open = ixgbe_open,
> .ndo_stop = ixgbe_close,
> @@ -9511,6 +9624,7 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
> .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port,
> .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port,
> .ndo_features_check = ixgbe_features_check,
> + .ndo_xdp = ixgbe_xdp,
> };
>
> /**
>
More information about the Intel-wired-lan
mailing list