Skip to content

Commit 70006af

Browse files
committed
Merge branch 'ebpf_skb_fields'
Alexei Starovoitov says: ==================== bpf: allow eBPF access skb fields V1->V2: - refactored field access converter into common helper convert_skb_access() used in both classic and extended BPF - added missing build_bug_on for field 'len' - added comment to uapi/linux/bpf.h as suggested by Daniel - dropped exposing 'ifindex' field for now classic BPF has a way to access skb fields, whereas extended BPF didn't. This patch introduces this ability. Classic BPF can access fields via negative SKF_AD_OFF offset. Positive bpf_ld_abs N is treated as load from packet, whereas bpf_ld_abs -0x1000 + N is treated as skb fields access. Many offsets were hard coded over years: SKF_AD_PROTOCOL, SKF_AD_PKTTYPE, etc. The problem with this approach was that for every new field classic bpf assembler had to be tweaked. I've considered doing the same for extended, but for every new field LLVM compiler would have to be modifed. Since it would need to add a new intrinsic. It could be done with single intrinsic and magic offset or use of inline assembler, but neither are clean from compiler backend point of view, since they look like calls but shouldn't scratch caller-saved registers. Another approach was to introduce a new helper functions like bpf_get_pkt_type() for every field that we want to access, but that is equally ugly for kernel and slow, since helpers are calls and they are slower then just loads. In theory helper calls can be 'inlined' inside kernel into direct loads, but since they were calls for user space, compiler would have to spill registers around such calls anyway. Teaching compiler to treat such helpers differently is even uglier. They were few other ideas considered. At the end the best seems to be to introduce a user accessible mirror of in-kernel sk_buff structure: struct __sk_buff { __u32 len; __u32 pkt_type; __u32 mark; __u32 queue_mapping; }; bpf programs will do: int bpf_prog1(struct __sk_buff *skb) { __u32 var = skb->pkt_type; which will be compiled to bpf assembler as: dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type) bpf verifier will check validity of access and will convert it to: dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset)) dst_reg &= 7 since 'pkt_type' is a bitfield. No new instructions added. LLVM doesn't need to be modified. JITs don't change and verifier already knows when it accesses 'ctx' pointer. The only thing needed was to convert user visible offset within __sk_buff to kernel internal offset within sk_buff. For 'len' and other fields conversion is trivial. Converting 'pkt_type' takes 2 or 3 instructions depending on endianness. More fields can be exposed by adding to the end of the 'struct __sk_buff'. Like vlan_tci and others can be added later. When pkt_type field is moved around, goes into different structure, removed or its size changes, the function convert_skb_access() would need to updated and it will cover both classic and extended. Patch 2 updates examples to demonstrates how fields are accessed and adds new tests for verifier, since it needs to detect a corner case when attacker is using single bpf instruction in two branches with different register types. The 4 fields of __sk_buff are already exposed to user space via classic bpf and I believe they're useful in extended as well. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents a498cfe + 614cd3b commit 70006af

10 files changed

Lines changed: 335 additions & 51 deletions

File tree

include/linux/bpf.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ struct bpf_verifier_ops {
103103
* with 'type' (read or write) is allowed
104104
*/
105105
bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
106+
107+
u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
108+
struct bpf_insn *insn);
106109
};
107110

108111
struct bpf_prog_type_list {
@@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f);
133136
void bpf_map_put(struct bpf_map *map);
134137

135138
/* verify correctness of eBPF program */
136-
int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
139+
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
137140
#else
138141
static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
139142
{

include/uapi/linux/bpf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,4 +170,14 @@ enum bpf_func_id {
170170
__BPF_FUNC_MAX_ID,
171171
};
172172

173+
/* user accessible mirror of in-kernel sk_buff.
174+
* new fields can only be added to the end of this structure
175+
*/
176+
struct __sk_buff {
177+
__u32 len;
178+
__u32 pkt_type;
179+
__u32 mark;
180+
__u32 queue_mapping;
181+
};
182+
173183
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/syscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr)
519519
goto free_prog;
520520

521521
/* run eBPF verifier */
522-
err = bpf_check(prog, attr);
522+
err = bpf_check(&prog, attr);
523523
if (err < 0)
524524
goto free_used_maps;
525525

kernel/bpf/verifier.c

Lines changed: 136 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env)
16201620
return err;
16211621

16221622
} else if (class == BPF_LDX) {
1623-
if (BPF_MODE(insn->code) != BPF_MEM ||
1624-
insn->imm != 0) {
1625-
verbose("BPF_LDX uses reserved fields\n");
1626-
return -EINVAL;
1627-
}
1623+
enum bpf_reg_type src_reg_type;
1624+
1625+
/* check for reserved fields is already done */
1626+
16281627
/* check src operand */
16291628
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
16301629
if (err)
@@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env)
16431642
if (err)
16441643
return err;
16451644

1645+
src_reg_type = regs[insn->src_reg].type;
1646+
1647+
if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
1648+
/* saw a valid insn
1649+
* dst_reg = *(u32 *)(src_reg + off)
1650+
* use reserved 'imm' field to mark this insn
1651+
*/
1652+
insn->imm = src_reg_type;
1653+
1654+
} else if (src_reg_type != insn->imm &&
1655+
(src_reg_type == PTR_TO_CTX ||
1656+
insn->imm == PTR_TO_CTX)) {
1657+
/* ABuser program is trying to use the same insn
1658+
* dst_reg = *(u32*) (src_reg + off)
1659+
* with different pointer types:
1660+
* src_reg == ctx in one branch and
1661+
* src_reg == stack|map in some other branch.
1662+
* Reject it.
1663+
*/
1664+
verbose("same insn cannot be used with different pointers\n");
1665+
return -EINVAL;
1666+
}
1667+
16461668
} else if (class == BPF_STX) {
16471669
if (BPF_MODE(insn->code) == BPF_XADD) {
16481670
err = check_xadd(env, insn);
@@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
17901812
int i, j;
17911813

17921814
for (i = 0; i < insn_cnt; i++, insn++) {
1815+
if (BPF_CLASS(insn->code) == BPF_LDX &&
1816+
(BPF_MODE(insn->code) != BPF_MEM ||
1817+
insn->imm != 0)) {
1818+
verbose("BPF_LDX uses reserved fields\n");
1819+
return -EINVAL;
1820+
}
1821+
17931822
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
17941823
struct bpf_map *map;
17951824
struct fd f;
@@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
18811910
insn->src_reg = 0;
18821911
}
18831912

1913+
static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
1914+
{
1915+
struct bpf_insn *insn = prog->insnsi;
1916+
int insn_cnt = prog->len;
1917+
int i;
1918+
1919+
for (i = 0; i < insn_cnt; i++, insn++) {
1920+
if (BPF_CLASS(insn->code) != BPF_JMP ||
1921+
BPF_OP(insn->code) == BPF_CALL ||
1922+
BPF_OP(insn->code) == BPF_EXIT)
1923+
continue;
1924+
1925+
/* adjust offset of jmps if necessary */
1926+
if (i < pos && i + insn->off + 1 > pos)
1927+
insn->off += delta;
1928+
else if (i > pos && i + insn->off + 1 < pos)
1929+
insn->off -= delta;
1930+
}
1931+
}
1932+
1933+
/* convert load instructions that access fields of 'struct __sk_buff'
1934+
* into sequence of instructions that access fields of 'struct sk_buff'
1935+
*/
1936+
static int convert_ctx_accesses(struct verifier_env *env)
1937+
{
1938+
struct bpf_insn *insn = env->prog->insnsi;
1939+
int insn_cnt = env->prog->len;
1940+
struct bpf_insn insn_buf[16];
1941+
struct bpf_prog *new_prog;
1942+
u32 cnt;
1943+
int i;
1944+
1945+
if (!env->prog->aux->ops->convert_ctx_access)
1946+
return 0;
1947+
1948+
for (i = 0; i < insn_cnt; i++, insn++) {
1949+
if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
1950+
continue;
1951+
1952+
if (insn->imm != PTR_TO_CTX) {
1953+
/* clear internal mark */
1954+
insn->imm = 0;
1955+
continue;
1956+
}
1957+
1958+
cnt = env->prog->aux->ops->
1959+
convert_ctx_access(insn->dst_reg, insn->src_reg,
1960+
insn->off, insn_buf);
1961+
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
1962+
verbose("bpf verifier is misconfigured\n");
1963+
return -EINVAL;
1964+
}
1965+
1966+
if (cnt == 1) {
1967+
memcpy(insn, insn_buf, sizeof(*insn));
1968+
continue;
1969+
}
1970+
1971+
/* several new insns need to be inserted. Make room for them */
1972+
insn_cnt += cnt - 1;
1973+
new_prog = bpf_prog_realloc(env->prog,
1974+
bpf_prog_size(insn_cnt),
1975+
GFP_USER);
1976+
if (!new_prog)
1977+
return -ENOMEM;
1978+
1979+
new_prog->len = insn_cnt;
1980+
1981+
memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
1982+
sizeof(*insn) * (insn_cnt - i - cnt));
1983+
1984+
/* copy substitute insns in place of load instruction */
1985+
memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
1986+
1987+
/* adjust branches in the whole program */
1988+
adjust_branches(new_prog, i, cnt - 1);
1989+
1990+
/* keep walking new program and skip insns we just inserted */
1991+
env->prog = new_prog;
1992+
insn = new_prog->insnsi + i + cnt - 1;
1993+
i += cnt - 1;
1994+
}
1995+
1996+
return 0;
1997+
}
1998+
18841999
static void free_states(struct verifier_env *env)
18852000
{
18862001
struct verifier_state_list *sl, *sln;
@@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env)
19032018
kfree(env->explored_states);
19042019
}
19052020

1906-
int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
2021+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
19072022
{
19082023
char __user *log_ubuf = NULL;
19092024
struct verifier_env *env;
19102025
int ret = -EINVAL;
19112026

1912-
if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
2027+
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
19132028
return -E2BIG;
19142029

19152030
/* 'struct verifier_env' can be global, but since it's not small,
@@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
19192034
if (!env)
19202035
return -ENOMEM;
19212036

1922-
env->prog = prog;
2037+
env->prog = *prog;
19232038

19242039
/* grab the mutex to protect few globals used by verifier */
19252040
mutex_lock(&bpf_verifier_lock);
@@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
19512066
if (ret < 0)
19522067
goto skip_full_check;
19532068

1954-
env->explored_states = kcalloc(prog->len,
2069+
env->explored_states = kcalloc(env->prog->len,
19552070
sizeof(struct verifier_state_list *),
19562071
GFP_USER);
19572072
ret = -ENOMEM;
@@ -1968,6 +2083,10 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
19682083
while (pop_stack(env, NULL) >= 0);
19692084
free_states(env);
19702085

2086+
if (ret == 0)
2087+
/* program is valid, convert *(u32*)(ctx + off) accesses */
2088+
ret = convert_ctx_accesses(env);
2089+
19712090
if (log_level && log_len >= log_size - 1) {
19722091
BUG_ON(log_len >= log_size);
19732092
/* verifier log exceeded user supplied buffer */
@@ -1983,18 +2102,18 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
19832102

19842103
if (ret == 0 && env->used_map_cnt) {
19852104
/* if program passed verifier, update used_maps in bpf_prog_info */
1986-
prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
1987-
sizeof(env->used_maps[0]),
1988-
GFP_KERNEL);
2105+
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
2106+
sizeof(env->used_maps[0]),
2107+
GFP_KERNEL);
19892108

1990-
if (!prog->aux->used_maps) {
2109+
if (!env->prog->aux->used_maps) {
19912110
ret = -ENOMEM;
19922111
goto free_log_buf;
19932112
}
19942113

1995-
memcpy(prog->aux->used_maps, env->used_maps,
2114+
memcpy(env->prog->aux->used_maps, env->used_maps,
19962115
sizeof(env->used_maps[0]) * env->used_map_cnt);
1997-
prog->aux->used_map_cnt = env->used_map_cnt;
2116+
env->prog->aux->used_map_cnt = env->used_map_cnt;
19982117

19992118
/* program is valid. Convert pseudo bpf_ld_imm64 into generic
20002119
* bpf_ld_imm64 instructions
@@ -2006,11 +2125,12 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
20062125
if (log_level)
20072126
vfree(log_buf);
20082127
free_env:
2009-
if (!prog->aux->used_maps)
2128+
if (!env->prog->aux->used_maps)
20102129
/* if we didn't copy map pointers into bpf_prog_info, release
20112130
* them now. Otherwise free_bpf_prog_info() will release them.
20122131
*/
20132132
release_maps(env);
2133+
*prog = env->prog;
20142134
kfree(env);
20152135
mutex_unlock(&bpf_verifier_lock);
20162136
return ret;

0 commit comments

Comments
 (0)