[Intel-wired-lan] [PATCH RFC net-next 32/34] idpf: implement Tx path for AF_XDP

Alexander Lobakin aleksander.lobakin at intel.com
Sat Dec 23 02:55:52 UTC 2023


From: Michal Kubiak <michal.kubiak at intel.com>

Implement Tx handling for AF_XDP feature in zero-copy mode using
the libie XSk infra.

Signed-off-by: Michal Kubiak <michal.kubiak at intel.com>
Co-developed-by: Alexander Lobakin <aleksander.lobakin at intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin at intel.com>
---
 drivers/net/ethernet/intel/idpf/idpf_txrx.c |  44 ++-
 drivers/net/ethernet/intel/idpf/idpf_txrx.h |   4 +
 drivers/net/ethernet/intel/idpf/idpf_xsk.c  | 318 ++++++++++++++++++++
 drivers/net/ethernet/intel/idpf/idpf_xsk.h  |   9 +
 4 files changed, 361 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index e3f59bbe7c90..5ba880c2bedc 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3,6 +3,7 @@
 
 #include "idpf.h"
 #include "idpf_xdp.h"
+#include "idpf_xsk.h"
 
 /**
  * idpf_buf_lifo_push - push a buffer pointer onto stack
@@ -55,30 +56,36 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 	}
 }
 
-/**
- * idpf_tx_buf_rel_all - Free any empty Tx buffers
- * @txq: queue to be cleaned
- */
-static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
+static void idpf_tx_buf_clean(struct idpf_queue *txq)
 {
 	struct libie_sq_onstack_stats ss = { };
 	struct xdp_frame_bulk bq;
-	u16 i;
-
-	/* Buffers already cleared, nothing to do */
-	if (!txq->tx_buf)
-		return;
 
 	xdp_frame_bulk_init(&bq);
 	rcu_read_lock();
 
-	/* Free all the Tx buffer sk_buffs */
-	for (i = 0; i < txq->desc_count; i++)
+	for (u32 i = 0; i < txq->desc_count; i++)
 		libie_tx_complete_any(&txq->tx_buf[i], txq->dev, &bq,
 				      &txq->xdp_tx_active, &ss);
 
 	xdp_flush_frame_bulk(&bq);
 	rcu_read_unlock();
+}
+
+/**
+ * idpf_tx_buf_rel_all - Free any empty Tx buffers
+ * @txq: queue to be cleaned
+ */
+static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
+{
+	/* Buffers already cleared, nothing to do */
+	if (!txq->tx_buf)
+		return;
+
+	if (test_bit(__IDPF_Q_XSK, txq->flags))
+		idpf_xsk_clean_xdpq(txq);
+	else
+		idpf_tx_buf_clean(txq);
 
 	kfree(txq->tx_buf);
 	txq->tx_buf = NULL;
@@ -86,7 +93,7 @@ static void idpf_tx_buf_rel_all(struct idpf_queue *txq)
 	if (!txq->buf_stack.bufs)
 		return;
 
-	for (i = 0; i < txq->buf_stack.size; i++)
+	for (u32 i = 0; i < txq->buf_stack.size; i++)
 		kfree(txq->buf_stack.bufs[i]);
 
 	kfree(txq->buf_stack.bufs);
@@ -105,6 +112,8 @@ void idpf_tx_desc_rel(struct idpf_queue *txq, bool bufq)
 	if (bufq)
 		idpf_tx_buf_rel_all(txq);
 
+	idpf_xsk_clear_queue(txq);
+
 	if (!txq->desc_ring)
 		return;
 
@@ -196,6 +205,7 @@ static int idpf_tx_buf_alloc_all(struct idpf_queue *tx_q)
  */
 int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
 {
+	enum virtchnl2_queue_type type;
 	struct device *dev = tx_q->dev;
 	u32 desc_sz;
 	int err;
@@ -228,6 +238,10 @@ int idpf_tx_desc_alloc(struct idpf_queue *tx_q, bool bufq)
 	tx_q->next_to_clean = 0;
 	set_bit(__IDPF_Q_GEN_CHK, tx_q->flags);
 
+	type = bufq ? VIRTCHNL2_QUEUE_TYPE_TX :
+	       VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+	idpf_xsk_setup_queue(tx_q, type);
+
 	return 0;
 
 err_alloc:
@@ -3802,7 +3816,9 @@ static bool idpf_tx_splitq_clean_all(struct idpf_q_vector *q_vec,
 	for (i = 0; i < num_txq; i++) {
 		struct idpf_queue *cq = q_vec->tx[i];
 
-		if (!test_bit(__IDPF_Q_XDP, cq->flags))
+		if (test_bit(__IDPF_Q_XSK, cq->flags))
+			clean_complete &= idpf_xmit_zc(cq);
+		else if (!test_bit(__IDPF_Q_XDP, cq->flags))
 			clean_complete &= idpf_tx_clean_complq(cq,
 							       budget_per_q,
 							       cleaned);
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index be396f1e346a..d55ff6aaae2b 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -313,6 +313,7 @@ struct idpf_ptype_state {
  * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
  * @__IDPF_Q_POLL_MODE: Enable poll mode
  * @__IDPF_Q_FLAGS_NBITS: Must be last
+ * @__IDPF_Q_XSK: Queue used to handle the AF_XDP socket
  */
 enum idpf_queue_flags_t {
 	__IDPF_Q_GEN_CHK,
@@ -321,6 +322,7 @@ enum idpf_queue_flags_t {
 	__IDPF_Q_SW_MARKER,
 	__IDPF_Q_POLL_MODE,
 	__IDPF_Q_XDP,
+	__IDPF_Q_XSK,
 
 	__IDPF_Q_FLAGS_NBITS,
 };
@@ -574,10 +576,12 @@ struct idpf_queue {
 	union {
 		struct page_pool *hdr_pp;
 		struct idpf_queue **xdpqs;
+		struct xsk_buff_pool *xsk_tx;
 	};
 	union {
 		struct page_pool *pp;
 		struct device *dev;
+		struct xsk_buff_pool *xsk_rx;
 	};
 	union {
 		union virtchnl2_rx_desc *rx;
diff --git a/drivers/net/ethernet/intel/idpf/idpf_xsk.c b/drivers/net/ethernet/intel/idpf/idpf_xsk.c
index 3017680fedb3..6f1870c05948 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_xsk.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_xsk.c
@@ -6,6 +6,89 @@
 #include "idpf.h"
 #include "idpf_xsk.h"
 
+/**
+ * idpf_xsk_setup_queue - set xsk_pool pointer from netdev to the queue structure
+ * @q: queue to use
+ *
+ * Assigns pointer to xsk_pool field in queue struct if it is supported in
+ * netdev, NULL otherwise.
+ */
+void idpf_xsk_setup_queue(struct idpf_queue *q, enum virtchnl2_queue_type t)
+{
+	struct idpf_vport_user_config_data *cfg_data;
+	struct idpf_vport *vport = q->vport;
+	struct xsk_buff_pool *pool;
+	bool is_rx = false;
+	int qid;
+
+	__clear_bit(__IDPF_Q_XSK, q->flags);
+
+	if (!idpf_xdp_is_prog_ena(q->vport))
+		return;
+
+	switch (t) {
+	case VIRTCHNL2_QUEUE_TYPE_RX:
+		is_rx = true;
+		qid = q->idx;
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+		is_rx = true;
+		qid = q->rxq_grp->splitq.rxq_sets[0]->rxq.idx;
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_TX:
+		qid = q->idx - q->vport->xdp_txq_offset;
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION:
+		qid = q->txq_grp->txqs[0]->idx - q->vport->xdp_txq_offset;
+		break;
+	default:
+		return;
+	}
+
+	if (!is_rx && !test_bit(__IDPF_Q_XDP, q->flags))
+		return;
+
+	cfg_data = &vport->adapter->vport_config[vport->idx]->user_config;
+
+	if (!test_bit(qid, cfg_data->af_xdp_zc_qps))
+		return;
+
+	pool = xsk_get_pool_from_qid(q->vport->netdev, qid);
+
+	if (pool && is_rx && !xsk_buff_can_alloc(pool, 1))
+		return;
+
+	if (is_rx)
+		q->xsk_rx = pool;
+	else
+		q->xsk_tx = pool;
+
+	__set_bit(__IDPF_Q_XSK, q->flags);
+}
+
+void idpf_xsk_clear_queue(struct idpf_queue *q)
+{
+	struct device *dev;
+
+	if (!__test_and_clear_bit(__IDPF_Q_XSK, q->flags))
+		return;
+
+	switch (q->q_type) {
+	case VIRTCHNL2_QUEUE_TYPE_RX:
+	case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+		dev = q->xsk_rx->dev;
+		q->xsk_rx = NULL;
+		q->dev = dev;
+		break;
+	case VIRTCHNL2_QUEUE_TYPE_TX:
+	case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION:
+		dev = q->xsk_tx->dev;
+		q->xsk_tx = NULL;
+		q->dev = dev;
+		break;
+	}
+}
+
 /**
  * idpf_qp_cfg_qs - Configure all queues contained from a given array.
  * @vport: vport structure
@@ -95,6 +178,23 @@ idpf_qp_clean_qs(struct idpf_vport *vport, struct idpf_queue **qs, int num_qs)
 	}
 }
 
+/**
+ * idpf_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+static void
+idpf_trigger_sw_intr(struct idpf_hw *hw, struct idpf_q_vector *q_vector)
+{
+	struct idpf_intr_reg *intr = &q_vector->intr_reg;
+	u32 val;
+
+	val = intr->dyn_ctl_intena_m | intr->dyn_ctl_itridx_m | /* set no itr*/
+	      intr->dyn_ctl_swint_trig_m |intr->dyn_ctl_sw_itridx_ena_m;
+
+	writel(val, intr->dyn_ctl);
+}
+
 /**
  * idpf_qvec_ena_irq - Enable IRQ for given queue vector
  * @q_vector: queue vector
@@ -472,3 +572,221 @@ int idpf_xsk_pool_setup(struct idpf_vport *vport, struct xsk_buff_pool *pool,
 xsk_exit:
 	return err;
 }
+
+/**
+ * idpf_xsk_clean_xdpq - Clean the XDP Tx queue and its buffer pool queues
+ * @xdpq: XDP_Tx queue
+ */
+void idpf_xsk_clean_xdpq(struct idpf_queue *xdpq)
+{
+	u32 ntc = xdpq->next_to_clean, ntu = xdpq->next_to_use;
+	struct device *dev = xdpq->xsk_tx->dev;
+	struct libie_sq_onstack_stats ss = { };
+	struct xdp_frame_bulk bq;
+	u32 xsk_frames = 0;
+
+	xdp_frame_bulk_init(&bq);
+	rcu_read_lock();
+
+	while (ntc != ntu) {
+		struct libie_tx_buffer *tx_buf = &xdpq->tx_buf[ntc];
+
+		if (tx_buf->type)
+			libie_xdp_complete_tx_buf(tx_buf, dev, false, &bq,
+						  &xdpq->xdp_tx_active, &ss);
+		else
+			xsk_frames++;
+
+		if (unlikely(++ntc >= xdpq->desc_count))
+			ntc = 0;
+	}
+
+	xdp_flush_frame_bulk(&bq);
+	rcu_read_unlock();
+
+	if (xsk_frames)
+		xsk_tx_completed(xdpq->xsk_tx, xsk_frames);
+}
+
+/**
+ * idpf_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
+ * @complq: completion queue associated with zero-copy Tx queue
+ */
+static u32 idpf_clean_xdp_irq_zc(struct idpf_queue *complq)
+{
+	struct idpf_splitq_4b_tx_compl_desc *last_rs_desc;
+	struct device *dev = complq->xsk_tx->dev;
+	struct libie_sq_onstack_stats ss = { };
+	int complq_budget = complq->desc_count;
+	u32 ntc = complq->next_to_clean;
+	struct idpf_queue *xdpq = NULL;
+	struct xdp_frame_bulk bq;
+	u32 done_frames = 0;
+	u32 xsk_frames = 0;
+	u32 tx_ntc, cnt;
+	bool gen_flag;
+	int head, i;
+
+	last_rs_desc = &complq->comp_4b[ntc];
+	gen_flag = test_bit(__IDPF_Q_GEN_CHK, complq->flags);
+
+	do {
+		int ctype = idpf_parse_compl_desc(last_rs_desc, complq,
+						  &xdpq, gen_flag);
+
+		if (likely(ctype == IDPF_TXD_COMPLT_RS)) {
+			head = le16_to_cpu(last_rs_desc->q_head_compl_tag.q_head);
+			goto fetch_next_desc;
+		}
+
+		switch (ctype) {
+		case IDPF_TXD_COMPLT_SW_MARKER:
+			idpf_tx_handle_sw_marker(xdpq);
+			break;
+		case -ENODATA:
+			goto clean_xdpq;
+		case -EINVAL:
+			goto fetch_next_desc;
+		default:
+			dev_err(&xdpq->vport->adapter->pdev->dev,
+				"Unsupported completion type for XSK\n");
+			goto fetch_next_desc;
+		}
+
+fetch_next_desc:
+		last_rs_desc++;
+		ntc++;
+		if (unlikely(ntc == complq->desc_count)) {
+			ntc = 0;
+			last_rs_desc = &complq->comp_4b[0];
+			gen_flag = !gen_flag;
+			change_bit(__IDPF_Q_GEN_CHK, complq->flags);
+		}
+		prefetch(last_rs_desc);
+		complq_budget--;
+	} while (likely(complq_budget));
+
+clean_xdpq:
+	complq->next_to_clean = ntc;
+
+	if (!xdpq)
+		return 0;
+
+	cnt = xdpq->desc_count;
+	tx_ntc = xdpq->next_to_clean;
+	done_frames = head >= tx_ntc ? head - tx_ntc :
+				       head + cnt - tx_ntc;
+	if (!done_frames)
+		return 0;
+
+	if (likely(!complq->xdp_tx_active))
+		goto xsk;
+
+	xdp_frame_bulk_init(&bq);
+
+	for (i = 0; i < done_frames; i++) {
+		struct libie_tx_buffer *tx_buf = &xdpq->tx_buf[tx_ntc];
+
+		if (tx_buf->type)
+			libie_xdp_complete_tx_buf(tx_buf, dev, true, &bq,
+						  &xdpq->xdp_tx_active,
+						  &ss);
+		else
+			xsk_frames++;
+
+		if (unlikely(++tx_ntc == cnt))
+			tx_ntc = 0;
+	}
+
+	xdp_flush_frame_bulk(&bq);
+
+xsk:
+	xdpq->next_to_clean += done_frames;
+	if (xdpq->next_to_clean >= cnt)
+		xdpq->next_to_clean -= cnt;
+
+	if (xsk_frames)
+		xsk_tx_completed(xdpq->xsk_tx, xsk_frames);
+
+	return done_frames;
+}
+
+/**
+ * idpf_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdpq: XDP queue to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void idpf_xsk_xmit_pkt(struct libie_xdp_tx_desc desc,
+			      const struct libie_xdp_tx_queue *sq)
+{
+	union idpf_tx_flex_desc *tx_desc = sq->desc_ring;
+	struct idpf_tx_splitq_params tx_params = {
+		.dtype		= IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2,
+		.eop_cmd	= IDPF_TX_DESC_CMD_EOP,
+	};
+
+	tx_desc = &tx_desc[*sq->next_to_use];
+	tx_desc->q.buf_addr = cpu_to_le64(desc.addr);
+
+	idpf_tx_splitq_build_desc(tx_desc, &tx_params,
+				  tx_params.eop_cmd | tx_params.offload.td_cmd,
+				  desc.len);
+}
+
+static u32 idpf_xsk_xmit_prep(void *_xdpq, struct libie_xdp_tx_queue *sq)
+{
+	struct idpf_queue *xdpq = _xdpq;
+
+	libie_xdp_sq_lock(&xdpq->xdp_lock);
+
+	*sq = (struct libie_xdp_tx_queue){
+		.dev		= xdpq->dev,
+		.tx_buf		= xdpq->tx_buf,
+		.desc_ring	= xdpq->desc_ring,
+		.xdp_lock	= &xdpq->xdp_lock,
+		.next_to_use	= &xdpq->next_to_use,
+		.desc_count	= xdpq->desc_count,
+		.xdp_tx_active	= &xdpq->xdp_tx_active,
+	};
+
+	return IDPF_DESC_UNUSED(xdpq);
+}
+
+/**
+ * idpf_xmit_xdpq_zc - take entries from XSK Tx queue and place them onto HW Tx queue
+ * @xdpq: XDP queue to produce the HW Tx descriptors on
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+static bool idpf_xmit_xdpq_zc(struct idpf_queue *xdpq)
+{
+	u32 budget;
+
+	budget = IDPF_DESC_UNUSED(xdpq);
+	budget = min_t(u32, budget, IDPF_QUEUE_QUARTER(xdpq));
+
+	return libie_xsk_xmit_do_bulk(xdpq, xdpq->xsk_tx, budget,
+				      idpf_xsk_xmit_prep, idpf_xsk_xmit_pkt,
+				      idpf_xdp_tx_finalize);
+}
+
+/**
+ * idpf_xmit_zc - perform xmit from all XDP queues assigned to the completion queue
+ * @complq: Completion queue associated with one or more XDP queues
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool idpf_xmit_zc(struct idpf_queue *complq)
+{
+	struct idpf_txq_group *xdpq_grp = complq->txq_grp;
+	bool result = true;
+	int i;
+
+	idpf_clean_xdp_irq_zc(complq);
+
+	for (i = 0; i < xdpq_grp->num_txq; i++)
+		result &= idpf_xmit_xdpq_zc(xdpq_grp->txqs[i]);
+
+	return result;
+}
diff --git a/drivers/net/ethernet/intel/idpf/idpf_xsk.h b/drivers/net/ethernet/intel/idpf/idpf_xsk.h
index 93705900f592..777d6ab7891d 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_xsk.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_xsk.h
@@ -6,9 +6,18 @@
 
 #include <linux/types.h>
 
+enum virtchnl2_queue_type;
+
+struct idpf_queue;
 struct idpf_vport;
 struct xsk_buff_pool;
 
+void idpf_xsk_setup_queue(struct idpf_queue *q, enum virtchnl2_queue_type t);
+void idpf_xsk_clear_queue(struct idpf_queue *q);
+
+void idpf_xsk_clean_xdpq(struct idpf_queue *xdpq);
+bool idpf_xmit_zc(struct idpf_queue *complq);
+
 int idpf_xsk_pool_setup(struct idpf_vport *vport, struct xsk_buff_pool *pool,
 			u32 qid);
 
-- 
2.43.0



More information about the Intel-wired-lan mailing list