[Intel-wired-lan] [PATCH net v2] i40e: Fix crash when rebuild fails in i40e_xdp_setup
Kamil Maziarz
kamil.maziarz at intel.com
Wed Dec 21 12:35:20 UTC 2022
From: Sylwester Dziedziuch <sylwesterx.dziedziuch at intel.com>
When attaching XDP program on i40e driver there was a reset and rebuild
of the interface to reconfigure the queues for XDP operation.
If one of the steps of rebuild failed then the interface was left
in incorrect state that could lead to a crash. If rebuild failed while
getting capabilities from HW such crash occurs:
capability discovery failed, err I40E_ERR_ADMIN_QUEUE_TIMEOUT aq_err OK
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
Call Trace:
? i40e_reconfig_rss_queues+0x120/0x120 [i40e]
dev_xdp_install+0x70/0x100
dev_xdp_attach+0x1d7/0x530
dev_change_xdp_fd+0x1f4/0x230
do_setlink+0x45f/0xf30
? irq_work_interrupt+0xa/0x20
? __nla_validate_parse+0x12d/0x1a0
rtnl_setlink+0xb5/0x120
rtnetlink_rcv_msg+0x2b1/0x360
? sock_has_perm+0x80/0xa0
? rtnl_calcit.isra.42+0x120/0x120
netlink_rcv_skb+0x4c/0x120
netlink_unicast+0x196/0x230
netlink_sendmsg+0x204/0x3d0
sock_sendmsg+0x4c/0x50
__sys_sendto+0xee/0x160
? handle_mm_fault+0xc1/0x1e0
? syscall_trace_enter+0x1fb/0x2c0
? __sys_setsockopt+0xd6/0x1d0
__x64_sys_sendto+0x24/0x30
do_syscall_64+0x5b/0x1a0
entry_SYSCALL_64_after_hwframe+0x65/0xca
RIP: 0033:0x7f3535d99781
Fix this by removing reset and rebuild from i40e_xdp_setup and replace it
by interface down, reconfigure queues and interface up. This way if any
step fails the interface will remain in a correct state.
Fixes: 0c8493d90b6b ("i40e: add XDP support for pass and drop actions")
Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch at intel.com>
Signed-off-by: Piotr Raczynski <piotr.raczynski at intel.com>
Signed-off-by: Andrii Staikov <andrii.staikov at intel.com>
Signed-off-by: Kamil Maziarz <kamil.maziarz at intel.com>
---
v2: don't reinitialize rings while hotswapping program
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 144 +++++++++++++++-----
1 file changed, 108 insertions(+), 36 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 94feea3b2599..d9ec79c31554 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -50,6 +50,8 @@ static int i40e_veb_get_bw_info(struct i40e_veb *veb);
static int i40e_get_capabilities(struct i40e_pf *pf,
enum i40e_admin_queue_opc list_type);
static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf);
+static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi,
+ bool is_xdp);
/* i40e_pci_tbl - PCI Device ID Table
*
@@ -3563,11 +3565,17 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
/* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx));
- if (ring->vsi->type == I40E_VSI_MAIN)
- xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+ if (ring->vsi->type == I40E_VSI_MAIN) {
+ if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ ring->queue_index,
+ ring->q_vector->napi.napi_id);
+ }
ring->xsk_pool = i40e_xsk_pool(ring);
if (ring->xsk_pool) {
+ xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+
ring->rx_buf_len =
xsk_pool_get_rx_frame_size(ring->xsk_pool);
/* For AF_XDP ZC, we disallow packets to span on
@@ -13305,6 +13313,34 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
+/**
+ * i40e_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to changed
+ * @prog: XDP program
+ **/
+static void i40e_vsi_assign_bpf_prog(struct i40e_vsi *vsi,
+ struct bpf_prog *prog)
+{
+ int i;
+
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+}
+
+/**
+ * i40e_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
+ * @vsi: VSI to schedule napi on
+ */
+static void i40e_vsi_rx_napi_schedule(struct i40e_vsi *vsi)
+{
+ int i;
+
+ for (i = 0; i < vsi->num_queue_pairs; i++)
+ if (vsi->xdp_rings[i]->xsk_pool)
+ (void)i40e_xsk_wakeup(vsi->netdev, i,
+ XDP_WAKEUP_RX);
+}
+
/**
* i40e_xdp_setup - add/remove an XDP program
* @vsi: VSI to changed
@@ -13315,10 +13351,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+ bool is_xdp_enabled = i40e_enabled_xdp_vsi(vsi);
+ bool if_running = netif_running(vsi->netdev);
+ bool need_reinit = is_xdp_enabled != !!prog;
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
- bool need_reset;
- int i;
+ int ret = 0;
/* Don't allow frames that span over multiple buffers */
if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
@@ -13326,35 +13364,58 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
return -EINVAL;
}
- /* When turning XDP on->off/off->on we reset and rebuild the rings. */
- need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
-
- if (need_reset)
- i40e_prep_for_reset(pf);
-
/* VSI shall be deleted in a moment, just return EINVAL */
if (test_bit(__I40E_IN_REMOVE, pf->state))
return -EINVAL;
old_prog = xchg(&vsi->xdp_prog, prog);
- if (need_reset) {
- if (!prog)
- /* Wait until ndo_xsk_wakeup completes. */
- synchronize_rcu();
- i40e_reset_and_rebuild(pf, true, true);
+ if (!need_reinit)
+ goto assign_prog;
+
+ if (if_running && !test_and_set_bit(__I40E_VSI_DOWN, vsi->state))
+ i40e_down(vsi);
+
+ vsi = i40e_vsi_reinit_setup(vsi, true);
+
+ if (!vsi) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to reinitialize VSI during XDP setup");
+ goto err_vsi_setup;
}
- if (!i40e_enabled_xdp_vsi(vsi) && prog) {
- if (i40e_realloc_rx_bi_zc(vsi, true))
- return -ENOMEM;
- } else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
- if (i40e_realloc_rx_bi_zc(vsi, false))
- return -ENOMEM;
+ /* allocate descriptors */
+ ret = i40e_vsi_setup_tx_resources(vsi);
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to configure TX resources during XDP setup");
+ goto err_setup_tx;
+ }
+ ret = i40e_vsi_setup_rx_resources(vsi);
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to configure RX resources during XDP setup");
+ goto err_setup_rx;
}
- for (i = 0; i < vsi->num_queue_pairs; i++)
- WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+ if (!is_xdp_enabled && prog)
+ ret = i40e_realloc_rx_bi_zc(vsi, true);
+ else if (is_xdp_enabled && !prog)
+ ret = i40e_realloc_rx_bi_zc(vsi, false);
+
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to reallocate RX resources during XDP setup");
+ goto err_vsi_setup;
+ }
+
+ if (if_running) {
+ ret = i40e_up(vsi);
+
+ if (ret) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to open VSI during XDP setup");
+ goto err_vsi_setup;
+ }
+ }
+
+assign_prog:
+ i40e_vsi_assign_bpf_prog(vsi, prog);
if (old_prog)
bpf_prog_put(old_prog);
@@ -13362,13 +13423,20 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
/* Kick start the NAPI context if there is an AF_XDP socket open
* on that queue id. This so that receiving will start.
*/
- if (need_reset && prog)
- for (i = 0; i < vsi->num_queue_pairs; i++)
- if (vsi->xdp_rings[i]->xsk_pool)
- (void)i40e_xsk_wakeup(vsi->netdev, i,
- XDP_WAKEUP_RX);
+ if (need_reinit && prog)
+ i40e_vsi_rx_napi_schedule(vsi);
return 0;
+
+err_setup_rx:
+ i40e_vsi_free_rx_resources(vsi);
+err_setup_tx:
+ i40e_vsi_free_tx_resources(vsi);
+err_vsi_setup:
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+ i40e_vsi_assign_bpf_prog(vsi, old_prog);
+
+ return ret;
}
/**
@@ -14310,13 +14378,14 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
/**
* i40e_vsi_reinit_setup - return and reallocate resources for a VSI
* @vsi: pointer to the vsi.
+ * @is_xdp: flag indicating if this is reinit during XDP setup
*
* This re-allocates a vsi's queue resources.
*
* Returns pointer to the successfully allocated and configured VSI sw struct
* on success, otherwise returns NULL on failure.
**/
-static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
+static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi, bool is_xdp)
{
u16 alloc_queue_pairs;
struct i40e_pf *pf;
@@ -14352,12 +14421,14 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
/* Update the FW view of the VSI. Force a reset of TC and queue
* layout configurations.
*/
- enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
- pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
- pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
- i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
- if (vsi->type == I40E_VSI_MAIN)
- i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
+ if (!is_xdp) {
+ enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
+ pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
+ pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
+ i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
+ if (vsi->type == I40E_VSI_MAIN)
+ i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
+ }
/* assign it some queues */
ret = i40e_alloc_rings(vsi);
@@ -15123,7 +15194,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
if (pf->lan_vsi == I40E_NO_VSI)
vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0);
else if (reinit)
- vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
+ vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi],
+ false);
if (!vsi) {
dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
i40e_cloud_filter_exit(pf);
--
2.25.1
More information about the Intel-wired-lan
mailing list