[Intel-wired-lan] [net-next PATCH v2 5/6] i40e: Add TX and RX support in switchdev mode.

Jiri Pirko jiri at resnulli.us
Thu Jan 5 12:56:56 UTC 2017


Tue, Jan 03, 2017 at 07:07:53PM CET, sridhar.samudrala at intel.com wrote:
>In switchdev mode, broadcast filter is not enabled on VFs. The broadcasts and
>unknown frames from VFs are received by the PF and passed to corresponding VF
>port representator netdev.
>A host based switching entity like a linux bridge or OVS redirects these frames
>to the right VFs via VFPR netdevs. Any frames sent via VFPR netdevs are sent as
>directed transmits to the corresponding VFs. To enable directed transmit, skb
>metadata dst is used to pass the VF id and the frame is requeued to call the PFs
>transmit routine.
>
>Small script to demonstrate inter VF pings in switchdev mode.
>PF: enp5s0f0, VFs: enp5s2,enp5s2f1 VFPRs:enp5s0f0-vf0, enp5s0f0-vf1
>
># rmmod i40e; modprobe i40e
># devlink dev eswitch set pci/0000:05:00.0 mode switchdev
># echo 2 > /sys/class/net/enp5s0f0/device/sriov_numvfs
># ip link set enp5s0f0 vf 0 mac 00:11:22:33:44:55
># ip link set enp5s0f0 vf 1 mac 00:11:22:33:44:56
># rmmod i40evf; modprobe i40evf
>
>/* Create 2 namespaces and move the VFs to the corresponding ns. */
># ip netns add ns0
># ip link set enp5s2 netns ns0
># ip netns exec ns0 ip addr add 192.168.1.10/24 dev enp5s2
># ip netns exec ns0 ip link set enp5s2 up
># ip netns add ns1
># ip link set enp5s2f1 netns ns1
># ip netns exec ns1 ip addr add 192.168.1.11/24 dev enp5s2f1
># ip netns exec ns1 ip link set enp5s2f1 up
>
>/* bring up pf and vfpr netdevs */
># ip link set enp5s0f0 up
># ip link set enp5s0f0-vf0 up
># ip link set enp5s0f0-vf1 up
>
>/* Create a linux bridge and add vfpr netdevs to it. */
># ip link add vfpr-br type bridge
># ip link set enp5s0f0-vf0 master vfpr-br
># ip link set enp5s0f0-vf1 master vfpr-br
># ip addr add 192.168.1.1/24 dev vfpr-br
># ip link set vfpr-br up
>
># ip netns exec ns0 ping -c3 192.168.1.11
># ip netns exec ns1 ping -c3 192.168.1.10
>
>Signed-off-by: Sridhar Samudrala <sridhar.samudrala at intel.com>

[...]

>diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>index 352cf7c..b46ddaa 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>@@ -1176,16 +1176,37 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
>  * @rx_ring:  rx ring in play
>  * @skb: packet to send up
>  * @vlan_tag: vlan tag for packet
>+ * @lpbk: is it a loopback frame?
>  **/
> static void i40e_receive_skb(struct i40e_ring *rx_ring,
>-			     struct sk_buff *skb, u16 vlan_tag)
>+			     struct sk_buff *skb, u16 vlan_tag, bool lpbk)
> {
> 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
>+	struct i40e_pf *pf = rx_ring->vsi->back;
>+	struct i40e_vf *vf;
>+	struct ethhdr *eth;
>+	int vf_id;
> 
> 	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
> 	    (vlan_tag & VLAN_VID_MASK))
> 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
> 
>+	if ((pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) || !lpbk)
>+		goto gro_receive;
>+
>+	/* If a loopback packet is received from a VF in switchdev mode, pass the
>+	 * frame to the corresponding VFPR netdev based on the source MAC in the frame.
>+	 */
>+	eth = (struct ethhdr *)skb_mac_header(skb);
>+	for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
>+		vf = &pf->vf[vf_id];
>+		if (ether_addr_equal(eth->h_source, vf->default_lan_addr.addr)) {
>+			skb->dev = vf->vfpr_netdev;

This sucks :( Is't there any identification coming from rx ring that
would tell you what vf this is? To match vrpr according to a single MAC
address seems a bit awkward. What if there is a macvlan configured
on the VF?



>+			break;
>+		}
>+	}
>+
>+gro_receive:
> 	napi_gro_receive(&q_vector->napi, skb);
> }
> 

[...]

>@@ -2998,3 +3064,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
> 
> 	return i40e_xmit_frame_ring(skb, tx_ring);
> }
>+
>+netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb,
>+				        struct net_device *dev)
>+{
>+	struct i40e_vfpr_netdev_priv *priv = netdev_priv(dev);
>+	struct i40e_vf *vf = priv->vf;
>+	struct i40e_pf *pf = vf->pf;
>+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
>+
>+	skb_dst_drop(skb);
>+	dst_hold(&priv->vfpr_dst->dst);
>+	skb_dst_set(skb, &priv->vfpr_dst->dst);
>+	skb->dev = vsi->netdev;

This dst dance seems a bit odd to me. Why don't you just call
i40e_xmit_frame_ring with an extra arg holding the needed metadata?


>+
>+	return dev_queue_xmit(skb);
>+}
>diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>index e065321..850723f 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>@@ -366,6 +366,7 @@ struct i40e_ring_container {
> 
> bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
> netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
>+netdev_tx_t i40e_vfpr_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev);
> void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
> void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
> int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
>diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
>index edc0abd..c136cc9 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
>+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
>@@ -728,6 +728,9 @@ enum i40e_rx_desc_status_bits {
> #define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT  I40E_RX_DESC_STATUS_TSYNVALID_SHIFT
> #define I40E_RXD_QW1_STATUS_TSYNVALID_MASK \
> 				    BIT_ULL(I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT)
>+#define I40E_RXD_QW1_STATUS_LPBK_SHIFT  I40E_RX_DESC_STATUS_LPBK_SHIFT
>+#define I40E_RXD_QW1_STATUS_LPBK_MASK \
>+				BIT_ULL(I40E_RXD_QW1_STATUS_LPBK_SHIFT)
> 
> enum i40e_rx_desc_fltstat_values {
> 	I40E_RX_DESC_FLTSTAT_NO_DATA	= 0,
>diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
>index 0c8687d..f0860ef 100644
>--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
>+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
>@@ -1062,6 +1062,7 @@ static int i40e_vfpr_netdev_stop(struct net_device *dev)
> static const struct net_device_ops i40e_vfpr_netdev_ops = {
> 	.ndo_open       	= i40e_vfpr_netdev_open,
> 	.ndo_stop       	= i40e_vfpr_netdev_stop,
>+	.ndo_start_xmit		= i40e_vfpr_netdev_start_xmit,
> };
> 
> /**
>@@ -1121,16 +1122,22 @@ int i40e_alloc_vfpr_netdev(struct i40e_vf *vf, u16 vf_num)
> 
> 	priv = netdev_priv(vfpr_netdev);
> 	priv->vf = &(pf->vf[vf_num]);
>+	priv->vfpr_dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);

I'm missing a patch here. In net-next, metadata_dst_alloc has 2 args.



More information about the Intel-wired-lan mailing list