Skip to content

Commit de8f3a8

Browse files
borkmanndavem330
authored andcommitted
bpf: add meta pointer for direct access
This work enables generic transfer of metadata from XDP into skb. The basic idea is that we can make use of the fact that the resulting skb must be linear and already comes with a larger headroom for supporting bpf_xdp_adjust_head(), which mangles xdp->data. Here, we base our work on a similar principle and introduce a small helper bpf_xdp_adjust_meta() for adjusting a new pointer called xdp->data_meta. Thus, the packet has a flexible and programmable room for meta data, followed by the actual packet data. struct xdp_buff is therefore laid out that we first point to data_hard_start, then data_meta directly prepended to data followed by data_end marking the end of packet. bpf_xdp_adjust_head() takes into account whether we have meta data already prepended and if so, memmove()s this along with the given offset provided there's enough room. xdp->data_meta is optional and programs are not required to use it. The rationale is that when we process the packet in XDP (e.g. as DoS filter), we can push further meta data along with it for the XDP_PASS case, and give the guarantee that a clsact ingress BPF program on the same device can pick this up for further post-processing. Since we work with skb there, we can also set skb->mark, skb->priority or other skb meta data out of BPF, thus having this scratch space generic and programmable allows for more flexibility than defining a direct 1:1 transfer of potentially new XDP members into skb (it's also more efficient as we don't need to initialize/handle each of such new members). The facility also works together with GRO aggregation. The scratch space at the head of the packet can be multiple of 4 byte up to 32 byte large. Drivers not yet supporting xdp->data_meta can simply be set up with xdp->data_meta as xdp->data + 1 as bpf_xdp_adjust_meta() will detect this and bail out, such that the subsequent match against xdp->data for later access is guaranteed to fail. The verifier treats xdp->data_meta/xdp->data the same way as we treat xdp->data/xdp->data_end pointer comparisons. The requirement for doing the compare against xdp->data is that it hasn't been modified from it's original address we got from ctx access. It may have a range marking already from prior successful xdp->data/xdp->data_end pointer comparisons though. Signed-off-by: Daniel Borkmann <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Acked-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6aaae2b commit de8f3a8

19 files changed

Lines changed: 297 additions & 42 deletions

File tree

drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
9494

9595
xdp.data_hard_start = *data_ptr - offset;
9696
xdp.data = *data_ptr;
97+
xdp_set_data_meta_invalid(&xdp);
9798
xdp.data_end = *data_ptr + *len;
9899
orig_data = xdp.data;
99100
mapping = rx_buf->mapping - bp->rx_dma_offset;

drivers/net/ethernet/cavium/thunder/nicvf_main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
523523

524524
xdp.data_hard_start = page_address(page);
525525
xdp.data = (void *)cpu_addr;
526+
xdp_set_data_meta_invalid(&xdp);
526527
xdp.data_end = xdp.data + len;
527528
orig_data = xdp.data;
528529

drivers/net/ethernet/intel/i40e/i40e_txrx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2107,6 +2107,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
21072107
if (!skb) {
21082108
xdp.data = page_address(rx_buffer->page) +
21092109
rx_buffer->page_offset;
2110+
xdp_set_data_meta_invalid(&xdp);
21102111
xdp.data_hard_start = xdp.data -
21112112
i40e_rx_offset(rx_ring);
21122113
xdp.data_end = xdp.data + size;

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
23262326
if (!skb) {
23272327
xdp.data = page_address(rx_buffer->page) +
23282328
rx_buffer->page_offset;
2329+
xdp_set_data_meta_invalid(&xdp);
23292330
xdp.data_hard_start = xdp.data -
23302331
ixgbe_rx_offset(rx_ring);
23312332
xdp.data_end = xdp.data + size;

drivers/net/ethernet/mellanox/mlx4/en_rx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
762762

763763
xdp.data_hard_start = va - frags[0].page_offset;
764764
xdp.data = va;
765+
xdp_set_data_meta_invalid(&xdp);
765766
xdp.data_end = xdp.data + length;
766767
orig_data = xdp.data;
767768

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
794794
return false;
795795

796796
xdp.data = va + *rx_headroom;
797+
xdp_set_data_meta_invalid(&xdp);
797798
xdp.data_end = xdp.data + *len;
798799
xdp.data_hard_start = va;
799800

drivers/net/ethernet/netronome/nfp/nfp_net_common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,6 +1583,7 @@ static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
15831583

15841584
xdp.data_hard_start = hard_start;
15851585
xdp.data = data + *off;
1586+
xdp_set_data_meta_invalid(&xdp);
15861587
xdp.data_end = data + *off + *len;
15871588

15881589
orig_data = xdp.data;

drivers/net/ethernet/qlogic/qede/qede_fp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
10041004

10051005
xdp.data_hard_start = page_address(bd->data);
10061006
xdp.data = xdp.data_hard_start + *data_offset;
1007+
xdp_set_data_meta_invalid(&xdp);
10071008
xdp.data_end = xdp.data + *len;
10081009

10091010
/* Queues always have a full reset currently, so for the time

drivers/net/tun.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1468,6 +1468,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
14681468

14691469
xdp.data_hard_start = buf;
14701470
xdp.data = buf + pad;
1471+
xdp_set_data_meta_invalid(&xdp);
14711472
xdp.data_end = xdp.data + len;
14721473
orig_data = xdp.data;
14731474
act = bpf_prog_run_xdp(xdp_prog, &xdp);

drivers/net/virtio_net.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
554554

555555
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
556556
xdp.data = xdp.data_hard_start + xdp_headroom;
557+
xdp_set_data_meta_invalid(&xdp);
557558
xdp.data_end = xdp.data + len;
558559
orig_data = xdp.data;
559560
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -686,6 +687,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
686687
data = page_address(xdp_page) + offset;
687688
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
688689
xdp.data = data + vi->hdr_len;
690+
xdp_set_data_meta_invalid(&xdp);
689691
xdp.data_end = xdp.data + (len - vi->hdr_len);
690692
act = bpf_prog_run_xdp(xdp_prog, &xdp);
691693

0 commit comments

Comments
 (0)