[Intel-wired-lan] [next-queue 08/10] ixgbe: process the Tx ipsec offload
Shannon Nelson
shannon.nelson at oracle.com
Thu Dec 7 18:50:24 UTC 2017
On 12/7/2017 9:56 AM, Alexander Duyck wrote:
You've suggested several things here, all good things to look into,
which I will do, most now, some in the near future.
Thanks!
sln
> On Wed, Dec 6, 2017 at 9:43 PM, Shannon Nelson
> <shannon.nelson at oracle.com> wrote:
>> On 12/5/2017 10:13 AM, Alexander Duyck wrote:
>>>
>>> On Mon, Dec 4, 2017 at 9:35 PM, Shannon Nelson
>>> <shannon.nelson at oracle.com> wrote:
>>>>
>>>> If the skb has a security association referenced in the skb, then
>>>> set up the Tx descriptor with the ipsec offload bits. While we're
>>>> here, we fix an oddly named field in the context descriptor struct.
>>>>
>>>> Signed-off-by: Shannon Nelson <shannon.nelson at oracle.com>
>>>> ---
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe.h | 10 +++-
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 77
>>>> ++++++++++++++++++++++++++
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 4 +-
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 38 ++++++++++---
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 +-
>>>> 5 files changed, 118 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>>>> index 77f07dc..68097fe 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>>>> @@ -171,10 +171,11 @@ enum ixgbe_tx_flags {
>>>> IXGBE_TX_FLAGS_CC = 0x08,
>>>> IXGBE_TX_FLAGS_IPV4 = 0x10,
>>>> IXGBE_TX_FLAGS_CSUM = 0x20,
>>>> + IXGBE_TX_FLAGS_IPSEC = 0x40,
>>>>
>>>> /* software defined flags */
>>>> - IXGBE_TX_FLAGS_SW_VLAN = 0x40,
>>>> - IXGBE_TX_FLAGS_FCOE = 0x80,
>>>> + IXGBE_TX_FLAGS_SW_VLAN = 0x80,
>>>> + IXGBE_TX_FLAGS_FCOE = 0x100,
>>>> };
>>>>
>>>> /* VLAN info */
>>>> @@ -1012,12 +1013,17 @@ void ixgbe_init_ipsec_offload(struct
>>>> ixgbe_adapter *adapter);
>>>> void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
>>>> union ixgbe_adv_rx_desc *rx_desc,
>>>> struct sk_buff *skb);
>>>> +int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct sk_buff *skb,
>>>> + __be16 protocol, struct ixgbe_ipsec_tx_data *itd);
>>>> void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
>>>> #else
>>>> static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter
>>>> *adapter) { };
>>>> static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
>>>> union ixgbe_adv_rx_desc *rx_desc,
>>>> struct sk_buff *skb) { };
>>>> +static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
>>>> + struct sk_buff *skb, __be16 protocol,
>>>> + struct ixgbe_ipsec_tx_data *itd) {
>>>> return 0; };
>>>> static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) {
>>>> };
>>>> #endif /* CONFIG_XFRM_OFFLOAD */
>>>> #endif /* _IXGBE_H_ */
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
>>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
>>>> index fd06d9b..2a0dd7a 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
>>>> @@ -703,12 +703,89 @@ static void ixgbe_ipsec_del_sa(struct xfrm_state
>>>> *xs)
>>>> }
>>>> }
>>>>
>>>> +/**
>>>> + * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload
>>>> + * @skb: current data packet
>>>> + * @xs: pointer to transformer state struct
>>>> + **/
>>>> +static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct
>>>> xfrm_state *xs)
>>>> +{
>>>> + if (xs->props.family == AF_INET) {
>>>> + /* Offload with IPv4 options is not supported yet */
>>>> + if (ip_hdr(skb)->ihl > 5)
>>>
>>>
>>> I would make this ihl != 5 instead of "> 5" since smaller values would
>>> be invalid as well.
>>
>>
>> Sure
>>
>>
>>>
>>>> + return false;
>>>> + } else {
>>>> + /* Offload with IPv6 extension headers is not support yet
>>>> */
>>>> + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
>>>> + return false;
>>>> + }
>>>> +
>>>> + return true;
>>>> +}
>>>> +
>>>> static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
>>>> .xdo_dev_state_add = ixgbe_ipsec_add_sa,
>>>> .xdo_dev_state_delete = ixgbe_ipsec_del_sa,
>>>> + .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
>>>> };
>>>>
>>>> /**
>>>> + * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
>>>> + * @tx_ring: outgoing context
>>>> + * @skb: current data packet
>>>> + * @protocol: network protocol
>>>> + * @itd: ipsec Tx data for later use in building context descriptor
>>>> + **/
>>>> +int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct sk_buff *skb,
>>>> + __be16 protocol, struct ixgbe_ipsec_tx_data *itd)
>>>> +{
>>>> + struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
>>>> + struct ixgbe_ipsec *ipsec = adapter->ipsec;
>>>> + struct xfrm_state *xs;
>>>> + struct tx_sa *tsa;
>>>> +
>>>> + if (!skb->sp->len) {
>>>> + netdev_err(tx_ring->netdev, "%s: no xfrm state len =
>>>> %d\n",
>>>> + __func__, skb->sp->len);
>>>> + return 0;
>>>> + }
>>>> +
>>>> + xs = xfrm_input_state(skb);
>>>> + if (!xs) {
>>>> + netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs
>>>> = %p\n",
>>>> + __func__, xs);
>>>> + return 0;
>>>> + }
>>>> +
>>>> + itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
>>>> + if (itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT) {
>>>> + netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d
>>>> handle=%lu\n",
>>>> + __func__, itd->sa_idx,
>>>> xs->xso.offload_handle);
>>>> + return 0;
>>>> + }
>>>> +
>>>> + tsa = &ipsec->tx_tbl[itd->sa_idx];
>>>> + if (!tsa->used) {
>>>> + netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
>>>> + __func__, itd->sa_idx);
>>>> + return 0;
>>>> + }
>>>> +
>>>> + itd->flags = 0;
>>>> + if (xs->id.proto == IPPROTO_ESP) {
>>>> + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
>>>> + IXGBE_ADVTXD_TUCMD_L4T_TCP;
>>>
>>>
>>> Why is the TCP value being set here? This doesn't seem correct either.
>>> This implies TCP a TCP offload. It seems like this should only be
>>> setting ESP.
>>
>>
>> Honestly? Because when I was testing that, it didn't work without it. This
>> was one of the things I was going to come back to when I started working on
>> the csum and tso support.
>
> We might want to try testing with that dropped to see if we need it or
> not. I would suspect not since I would imagine this would cause bad
> things for non-TCP traffic. Also the inner L4 header shouldn't matter
> unless you are trying to offload it.
>
>>>
>>>> + if (protocol == htons(ETH_P_IP))
>>>> + itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
>>>
>>>
>>> Does the IPsec offload need to know if the frame is v4 or v6? I'm just
>>> wondering if it does or not.
>>
>>
>> Yes, I believe this is how it knows how much header to skip to find the ESP
>> header. However, I'll test that and see if it can come out.
>
> Like I mentioned last time it might be better to have this handled in
> ixgbe_tx_csum. If it is harmless we can probably just include it
> there. We should be able to do it in the block after the no_csum
> label. I'd be curious if not doing this up until now might have other
> effects such as impacting RSS since I know the whole reason for us
> having to do the CC stuff anyway was to actually get header split to
> work correctly with PF/VF loopback packets. It wouldn't surprise me if
> setting these fields defines the packet type received on the other
> end.
>
>>> If not then this probably isn't needed.
>>> One thought on this line is you might look at moving it into
>>> ixgbe_tx_csum. If setting the bit is harmless without setting IXSM we
>>> might look at moving it into the end of ixgbe_tx_csum and just make it
>>> compare against first->protocol there.
>>
>>
>>>
>>>> + itd->trailer_len = xs->props.trailer_len;
>>>> + }
>>>> + if (tsa->encrypt)
>>>> + itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
>>>> +
>>>> + return 1;
>>>> +}
>>>> +
>>>> +/**
>>>> * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor
>>>> * @rx_ring: receiving ring
>>>> * @rx_desc: receive data descriptor
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>>>> index f1bfae0..d7875b3 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
>>>> @@ -1261,7 +1261,7 @@ void ixgbe_clear_interrupt_scheme(struct
>>>> ixgbe_adapter *adapter)
>>>> }
>>>>
>>>> void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
>>>> - u32 fcoe_sof_eof, u32 type_tucmd, u32
>>>> mss_l4len_idx)
>>>> + u32 fceof_saidx, u32 type_tucmd, u32
>>>> mss_l4len_idx)
>>>> {
>>>> struct ixgbe_adv_tx_context_desc *context_desc;
>>>> u16 i = tx_ring->next_to_use;
>>>> @@ -1275,7 +1275,7 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring,
>>>> u32 vlan_macip_lens,
>>>> type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
>>>>
>>>> context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
>>>> - context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof);
>>>> + context_desc->fceof_saidx = cpu_to_le32(fceof_saidx);
>>>> context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
>>>> context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
>>>> }
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>>> index 60f9f2d..c857594 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>>>> @@ -7659,9 +7659,10 @@ static void ixgbe_service_task(struct work_struct
>>>> *work)
>>>>
>>>> static int ixgbe_tso(struct ixgbe_ring *tx_ring,
>>>> struct ixgbe_tx_buffer *first,
>>>> - u8 *hdr_len)
>>>> + u8 *hdr_len,
>>>> + struct ixgbe_ipsec_tx_data *itd)
>>>> {
>>>> - u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
>>>> + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx, fceof_saidx = 0;
>>>> struct sk_buff *skb = first->skb;
>>>> union {
>>>> struct iphdr *v4;
>>>> @@ -7740,7 +7741,12 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
>>>> vlan_macip_lens |= (ip.hdr - skb->data) <<
>>>> IXGBE_ADVTXD_MACLEN_SHIFT;
>>>> vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
>>>>
>>>> - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd,
>>>> + if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
>>>> + fceof_saidx |= itd->sa_idx;
>>>> + type_tucmd |= itd->flags | itd->trailer_len;
>>>> + }
>
> So just a thought. Why bother with the TX_FLAGS_CHECK at all? It seems
> like in the case that the flag isn't set you would have itd->sa_idx
> equal to 0 anyway so it would still be the same result wouldn't it? It
> would save you from having to zero both fceof_saidx and itd->sa_idx
> since you could just pass itd->sa_idx and save yourself the extra
> variable.
>
> Also if flags and trailer_len are both being written to the same
> location why not combine them in your structure into one single 32 bit
> entry? It would allow you to essentially reduce this to one OR and you
> could just pass itd->sa_idx directly which should be a pretty
> significant savings in terms of instructions and cycles. Also you
> might want to consider bumping itd->sa_idx up to a 32b value. It will
> possibly cost you a cycle or so to convert the 16b value to a 32b
> value before writing it. If you merge the flags and trailer length you
> should have the space to spare to bump up the size.
>
>>>> +
>>>> + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx,
>>>> type_tucmd,
>>>> mss_l4len_idx);
>>>>
>>>> return 1;
>>>> @@ -7756,10 +7762,12 @@ static inline bool ixgbe_ipv6_csum_is_sctp(struct
>>>> sk_buff *skb)
>>>> }
>>>>
>>>> static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
>>>> - struct ixgbe_tx_buffer *first)
>>>> + struct ixgbe_tx_buffer *first,
>>>> + struct ixgbe_ipsec_tx_data *itd)
>>>> {
>>>> struct sk_buff *skb = first->skb;
>>>> u32 vlan_macip_lens = 0;
>>>> + u32 fceof_saidx = 0;
>>>> u32 type_tucmd = 0;
>>>>
>>>> if (skb->ip_summed != CHECKSUM_PARTIAL) {
>>>> @@ -7800,7 +7808,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring
>>>> *tx_ring,
>>>> vlan_macip_lens |= skb_network_offset(skb) <<
>>>> IXGBE_ADVTXD_MACLEN_SHIFT;
>>>> vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
>>>>
>>>> - ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, 0);
>>>> + if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
>>>> + fceof_saidx |= itd->sa_idx;
>>>> + type_tucmd |= itd->flags | itd->trailer_len;
>>>> + }
>>>> +
>>>> + ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx,
>>>> type_tucmd, 0);
>>>> }
>>>>
>>>> #define IXGBE_SET_FLAG(_input, _flag, _result) \
>>>> @@ -7843,11 +7856,16 @@ static void ixgbe_tx_olinfo_status(union
>>>> ixgbe_adv_tx_desc *tx_desc,
>>>> IXGBE_TX_FLAGS_CSUM,
>>>> IXGBE_ADVTXD_POPTS_TXSM);
>>>>
>>>> - /* enble IPv4 checksum for TSO */
>>>> + /* enable IPv4 checksum for TSO */
>>>> olinfo_status |= IXGBE_SET_FLAG(tx_flags,
>>>> IXGBE_TX_FLAGS_IPV4,
>>>> IXGBE_ADVTXD_POPTS_IXSM);
>>>>
>>>> + /* enable IPsec */
>>>> + olinfo_status |= IXGBE_SET_FLAG(tx_flags,
>>>> + IXGBE_TX_FLAGS_IPSEC,
>>>> + IXGBE_ADVTXD_POPTS_IPSEC);
>>>> +
>>>> /*
>>>> * Check Context must be set if Tx switch is enabled, which it
>>>> * always is for case where virtual functions are running
>>>> @@ -8306,6 +8324,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff
>>>> *skb,
>>>> u32 tx_flags = 0;
>>>> unsigned short f;
>>>> u16 count = TXD_USE_COUNT(skb_headlen(skb));
>>>> + struct ixgbe_ipsec_tx_data ipsec_tx = { 0 };
>>>> __be16 protocol = skb->protocol;
>>>> u8 hdr_len = 0;
>>>>
>>>> @@ -8394,6 +8413,9 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff
>>>> *skb,
>>>> }
>>>> }
>>>>
>>>> + if (skb->sp && ixgbe_ipsec_tx(tx_ring, skb, protocol, &ipsec_tx))
>>>> + tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
>>>
>>>
>>> You might just want to pull the skb->sp check into ixgbe_ipsec_tx and
>>> could pass tx_flags as a part of the first buffer. It doesn't really
>>> matter anyway as most of this will just be inlined so it will all end
>>> up a part of the same function anyway.
>>
>>
>> Since the function is defined in a different .o file, are you sure it will
>> get inlined? I put the skb->sp check here to make sure we don't do an
>> unnecessary jump.
>
> You're right. I forgot you are defining this in a different file.
>
> Still I would like to see this moved down though. Where it is at
> doesn't really flow with everything else since FCoE and this aren't
> likely to ever interact so I would rather us check for FCoE and then
> get into the IPsec logic.
>
>>>
>>> Also I would move this down so that it is handled after the fields in
>>> the first buffer_info structure are set. Then this can ll just fall
>>> inline with the TSO block and get handled there.
>>>
>>>> +
>>>> /* record initial flags and protocol */
>>>> first->tx_flags = tx_flags;
>>>> first->protocol = protocol;
>>>> @@ -8410,11 +8432,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff
>>>> *skb,
>>>> }
>>>>
>>>> #endif /* IXGBE_FCOE */
>>>
>>>
>>> So if you move the function down here it will help to avoid any other
>>> complication. In addition you could follow the same logic that we do
>>> for ixgbe_tso/fso so you could drop the frame instead of transmitting
>>> it if it is requesting a bad offload.
>>
>>
>> Sure
>>
>> sln
>>
>>
>>>
>>>> - tso = ixgbe_tso(tx_ring, first, &hdr_len);
>>>> + tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
>>>> if (tso < 0)
>>>> goto out_drop;
>>>> else if (!tso)
>>>> - ixgbe_tx_csum(tx_ring, first);
>>>> + ixgbe_tx_csum(tx_ring, first, &ipsec_tx);
>>>>
>>>> /* add the ATR filter if ATR is on */
>>>> if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
>>>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
>>>> b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
>>>> index 3df0763..0ac725fa 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
>>>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
>>>> @@ -2856,7 +2856,7 @@ union ixgbe_adv_rx_desc {
>>>> /* Context descriptors */
>>>> struct ixgbe_adv_tx_context_desc {
>>>> __le32 vlan_macip_lens;
>>>> - __le32 seqnum_seed;
>>>> + __le32 fceof_saidx;
>>>> __le32 type_tucmd_mlhl;
>>>> __le32 mss_l4len_idx;
>>>> };
>>>> --
>>>> 2.7.4
>>>>
>>>> _______________________________________________
>>>> Intel-wired-lan mailing list
>>>> Intel-wired-lan at osuosl.org
>>>> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
More information about the Intel-wired-lan
mailing list