summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile6
-rw-r--r--kernel/bpf/arraymap.c17
-rw-r--r--kernel/bpf/bpf_lsm.c65
-rw-r--r--kernel/bpf/bpf_struct_ops.c9
-rw-r--r--kernel/bpf/btf.c159
-rw-r--r--kernel/bpf/btf_iter.c2
-rw-r--r--kernel/bpf/btf_relocate.c2
-rw-r--r--kernel/bpf/cgroup.c2
-rw-r--r--kernel/bpf/core.c21
-rw-r--r--kernel/bpf/hashtab.c16
-rw-r--r--kernel/bpf/helpers.c94
-rw-r--r--kernel/bpf/inode.c4
-rw-r--r--kernel/bpf/local_storage.c4
-rw-r--r--kernel/bpf/memalloc.c12
-rw-r--r--kernel/bpf/relo_core.c2
-rw-r--r--kernel/bpf/reuseport_array.c2
-rw-r--r--kernel/bpf/stackmap.c131
-rw-r--r--kernel/bpf/syscall.c31
-rw-r--r--kernel/bpf/verifier.c1291
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/trace/bpf_trace.c108
-rw-r--r--kernel/trace/trace_syscalls.c12
22 files changed, 1458 insertions, 534 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0291eef9ce92..9b9c151b5c82 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -52,9 +52,3 @@ obj-$(CONFIG_BPF_PRELOAD) += preload/
obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o
obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o
-
-# Some source files are common to libbpf.
-vpath %.c $(srctree)/kernel/bpf:$(srctree)/tools/lib/bpf
-
-$(obj)/%.o: %.c FORCE
- $(call if_changed_rule,cc_o_c)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index feabc0193852..79660e3fca4c 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -73,6 +73,9 @@ int array_map_alloc_check(union bpf_attr *attr)
/* avoid overflow on round_up(map->value_size) */
if (attr->value_size > INT_MAX)
return -E2BIG;
+ /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
+ if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
+ return -E2BIG;
return 0;
}
@@ -494,7 +497,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
if (map->btf_key_type_id)
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
rcu_read_unlock();
}
@@ -515,7 +518,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
seq_puts(m, "}\n");
@@ -600,7 +603,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
- return array->pptrs[index];
+ return (void *)(uintptr_t)array->pptrs[index];
return array_map_elem_ptr(array, index);
}
@@ -619,7 +622,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
- return array->pptrs[index];
+ return (void *)(uintptr_t)array->pptrs[index];
return array_map_elem_ptr(array, index);
}
@@ -632,7 +635,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
- void __percpu **pptr;
+ void __percpu *pptr;
u32 size;
meta.seq = seq;
@@ -648,7 +651,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
if (!info->percpu_value_buf) {
ctx.value = v;
} else {
- pptr = v;
+ pptr = (void __percpu *)(uintptr_t)v;
size = array->elem_size;
for_each_possible_cpu(cpu) {
copy_map_value_long(map, info->percpu_value_buf + off,
@@ -993,7 +996,7 @@ static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
prog_id = prog_fd_array_sys_lookup_elem(ptr);
btf_type_seq_show(map->btf, map->btf_value_type_id,
&prog_id, m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
}
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 08a338e1f231..6292ac5f9bd1 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -11,7 +11,6 @@
#include <linux/lsm_hooks.h>
#include <linux/bpf_lsm.h>
#include <linux/kallsyms.h>
-#include <linux/bpf_verifier.h>
#include <net/bpf_sk_storage.h>
#include <linux/bpf_local_storage.h>
#include <linux/btf_ids.h>
@@ -36,6 +35,24 @@ BTF_SET_START(bpf_lsm_hooks)
#undef LSM_HOOK
BTF_SET_END(bpf_lsm_hooks)
+BTF_SET_START(bpf_lsm_disabled_hooks)
+BTF_ID(func, bpf_lsm_vm_enough_memory)
+BTF_ID(func, bpf_lsm_inode_need_killpriv)
+BTF_ID(func, bpf_lsm_inode_getsecurity)
+BTF_ID(func, bpf_lsm_inode_listsecurity)
+BTF_ID(func, bpf_lsm_inode_copy_up_xattr)
+BTF_ID(func, bpf_lsm_getselfattr)
+BTF_ID(func, bpf_lsm_getprocattr)
+BTF_ID(func, bpf_lsm_setprocattr)
+#ifdef CONFIG_KEYS
+BTF_ID(func, bpf_lsm_key_getsecurity)
+#endif
+#ifdef CONFIG_AUDIT
+BTF_ID(func, bpf_lsm_audit_rule_match)
+#endif
+BTF_ID(func, bpf_lsm_ismaclabel)
+BTF_SET_END(bpf_lsm_disabled_hooks)
+
/* List of LSM hooks that should operate on 'current' cgroup regardless
* of function signature.
*/
@@ -97,15 +114,24 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
{
+ u32 btf_id = prog->aux->attach_btf_id;
+ const char *func_name = prog->aux->attach_func_name;
+
if (!prog->gpl_compatible) {
bpf_log(vlog,
"LSM programs must have a GPL compatible license\n");
return -EINVAL;
}
- if (!btf_id_set_contains(&bpf_lsm_hooks, prog->aux->attach_btf_id)) {
+ if (btf_id_set_contains(&bpf_lsm_disabled_hooks, btf_id)) {
+ bpf_log(vlog, "attach_btf_id %u points to disabled hook %s\n",
+ btf_id, func_name);
+ return -EINVAL;
+ }
+
+ if (!btf_id_set_contains(&bpf_lsm_hooks, btf_id)) {
bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
- prog->aux->attach_btf_id, prog->aux->attach_func_name);
+ btf_id, func_name);
return -EINVAL;
}
@@ -390,3 +416,36 @@ const struct bpf_verifier_ops lsm_verifier_ops = {
.get_func_proto = bpf_lsm_func_proto,
.is_valid_access = btf_ctx_access,
};
+
+/* hooks return 0 or 1 */
+BTF_SET_START(bool_lsm_hooks)
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+BTF_ID(func, bpf_lsm_xfrm_state_pol_flow_match)
+#endif
+#ifdef CONFIG_AUDIT
+BTF_ID(func, bpf_lsm_audit_rule_known)
+#endif
+BTF_ID(func, bpf_lsm_inode_xattr_skipcap)
+BTF_SET_END(bool_lsm_hooks)
+
+int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
+ struct bpf_retval_range *retval_range)
+{
+ /* no return value range for void hooks */
+ if (!prog->aux->attach_func_proto->type)
+ return -EINVAL;
+
+ if (btf_id_set_contains(&bool_lsm_hooks, prog->aux->attach_btf_id)) {
+ retval_range->minval = 0;
+ retval_range->maxval = 1;
+ } else {
+ /* All other available LSM hooks, except task_prctl, return 0
+ * on success and negative error code on failure.
+ * To keep things simple, we only allow bpf progs to return 0
+ * or negative errno for task_prctl too.
+ */
+ retval_range->minval = -MAX_ERRNO;
+ retval_range->maxval = 0;
+ }
+ return 0;
+}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 0d515ec57aa5..fda3dd2ee984 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -837,7 +837,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
btf_type_seq_show(st_map->btf,
map->btf_vmlinux_value_type_id,
value, m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
kfree(value);
@@ -1040,6 +1040,13 @@ void bpf_struct_ops_put(const void *kdata)
bpf_map_put(&st_map->map);
}
+int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
+{
+ void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
+
+ return func_ptr ? 0 : -ENOTSUPP;
+}
+
static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ba91be08763a..8ae092ae1573 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -212,7 +212,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
- BTF_KFUNC_HOOK_CGROUP_SKB,
+ BTF_KFUNC_HOOK_CGROUP,
BTF_KFUNC_HOOK_SCHED_ACT,
BTF_KFUNC_HOOK_SK_SKB,
BTF_KFUNC_HOOK_SOCKET_FILTER,
@@ -790,7 +790,7 @@ const char *btf_str_by_offset(const struct btf *btf, u32 offset)
return NULL;
}
-static bool __btf_name_valid(const struct btf *btf, u32 offset)
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
{
/* offset must be valid */
const char *src = btf_str_by_offset(btf, offset);
@@ -811,11 +811,6 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset)
return !*src;
}
-static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
-{
- return __btf_name_valid(btf, offset);
-}
-
/* Allow any printable character in DATASEC names */
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
{
@@ -3761,6 +3756,7 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
return -EINVAL;
}
+/* Callers have to ensure the life cycle of btf if it is program BTF */
static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
struct btf_field_info *info)
{
@@ -3789,7 +3785,6 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
field->kptr.dtor = NULL;
id = info->kptr.type_id;
kptr_btf = (struct btf *)btf;
- btf_get(kptr_btf);
goto found_dtor;
}
if (id < 0)
@@ -4631,7 +4626,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env,
}
if (!t->name_off ||
- !__btf_name_valid(env->btf, t->name_off)) {
+ !btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name");
return -EINVAL;
}
@@ -5519,36 +5514,72 @@ static const char *alloc_obj_fields[] = {
static struct btf_struct_metas *
btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
{
- union {
- struct btf_id_set set;
- struct {
- u32 _cnt;
- u32 _ids[ARRAY_SIZE(alloc_obj_fields)];
- } _arr;
- } aof;
struct btf_struct_metas *tab = NULL;
+ struct btf_id_set *aof;
int i, n, id, ret;
BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0);
BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32));
- memset(&aof, 0, sizeof(aof));
+ aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN);
+ if (!aof)
+ return ERR_PTR(-ENOMEM);
+ aof->cnt = 0;
+
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) {
/* Try to find whether this special type exists in user BTF, and
* if so remember its ID so we can easily find it among members
* of structs that we iterate in the next loop.
*/
+ struct btf_id_set *new_aof;
+
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT);
if (id < 0)
continue;
- aof.set.ids[aof.set.cnt++] = id;
+
+ new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_aof) {
+ ret = -ENOMEM;
+ goto free_aof;
+ }
+ aof = new_aof;
+ aof->ids[aof->cnt++] = id;
}
- if (!aof.set.cnt)
+ n = btf_nr_types(btf);
+ for (i = 1; i < n; i++) {
+ /* Try to find if there are kptrs in user BTF and remember their ID */
+ struct btf_id_set *new_aof;
+ struct btf_field_info tmp;
+ const struct btf_type *t;
+
+ t = btf_type_by_id(btf, i);
+ if (!t) {
+ ret = -EINVAL;
+ goto free_aof;
+ }
+
+ ret = btf_find_kptr(btf, t, 0, 0, &tmp);
+ if (ret != BTF_FIELD_FOUND)
+ continue;
+
+ new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!new_aof) {
+ ret = -ENOMEM;
+ goto free_aof;
+ }
+ aof = new_aof;
+ aof->ids[aof->cnt++] = i;
+ }
+
+ if (!aof->cnt) {
+ kfree(aof);
return NULL;
- sort(&aof.set.ids, aof.set.cnt, sizeof(aof.set.ids[0]), btf_id_cmp_func, NULL);
+ }
+ sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL);
- n = btf_nr_types(btf);
for (i = 1; i < n; i++) {
struct btf_struct_metas *new_tab;
const struct btf_member *member;
@@ -5558,17 +5589,13 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
int j, tab_cnt;
t = btf_type_by_id(btf, i);
- if (!t) {
- ret = -EINVAL;
- goto free;
- }
if (!__btf_type_is_struct(t))
continue;
cond_resched();
for_each_member(j, t, member) {
- if (btf_id_set_contains(&aof.set, member->type))
+ if (btf_id_set_contains(aof, member->type))
goto parse;
}
continue;
@@ -5587,7 +5614,8 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type = &tab->types[tab->cnt];
type->btf_id = i;
record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
- BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
+ BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT |
+ BPF_KPTR, t->size);
/* The record cannot be unset, treat it as an error if so */
if (IS_ERR_OR_NULL(record)) {
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
@@ -5596,9 +5624,12 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
type->record = record;
tab->cnt++;
}
+ kfree(aof);
return tab;
free:
btf_struct_metas_free(tab);
+free_aof:
+ kfree(aof);
return ERR_PTR(ret);
}
@@ -6245,12 +6276,11 @@ static struct btf *btf_parse_module(const char *module_name, const void *data,
btf->kernel_btf = true;
snprintf(btf->name, sizeof(btf->name), "%s", module_name);
- btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
+ btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN);
if (!btf->data) {
err = -ENOMEM;
goto errout;
}
- memcpy(btf->data, data, data_size);
btf->data_size = data_size;
err = btf_parse_hdr(env);
@@ -6418,8 +6448,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (arg == nr_args) {
switch (prog->expected_attach_type) {
- case BPF_LSM_CGROUP:
case BPF_LSM_MAC:
+ /* mark we are accessing the return value */
+ info->is_retval = true;
+ fallthrough;
+ case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
@@ -8054,15 +8087,44 @@ BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
BTF_TRACING_TYPE_xxx
#undef BTF_TRACING_TYPE
+/* Validate well-formedness of iter argument type.
+ * On success, return positive BTF ID of iter state's STRUCT type.
+ * On error, negative error is returned.
+ */
+int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx)
+{
+ const struct btf_param *arg;
+ const struct btf_type *t;
+ const char *name;
+ int btf_id;
+
+ if (btf_type_vlen(func) <= arg_idx)
+ return -EINVAL;
+
+ arg = &btf_params(func)[arg_idx];
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!t || !btf_type_is_ptr(t))
+ return -EINVAL;
+ t = btf_type_skip_modifiers(btf, t->type, &btf_id);
+ if (!t || !__btf_type_is_struct(t))
+ return -EINVAL;
+
+ name = btf_name_by_offset(btf, t->name_off);
+ if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
+ return -EINVAL;
+
+ return btf_id;
+}
+
static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
const struct btf_type *func, u32 func_flags)
{
u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
- const char *name, *sfx, *iter_name;
- const struct btf_param *arg;
+ const char *sfx, *iter_name;
const struct btf_type *t;
char exp_name[128];
u32 nr_args;
+ int btf_id;
/* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */
if (!flags || (flags & (flags - 1)))
@@ -8073,28 +8135,21 @@ static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name,
if (nr_args < 1)
return -EINVAL;
- arg = &btf_params(func)[0];
- t = btf_type_skip_modifiers(btf, arg->type, NULL);
- if (!t || !btf_type_is_ptr(t))
- return -EINVAL;
- t = btf_type_skip_modifiers(btf, t->type, NULL);
- if (!t || !__btf_type_is_struct(t))
- return -EINVAL;
-
- name = btf_name_by_offset(btf, t->name_off);
- if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1))
- return -EINVAL;
+ btf_id = btf_check_iter_arg(btf, func, 0);
+ if (btf_id < 0)
+ return btf_id;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to
* fit nicely in stack slots
*/
+ t = btf_type_by_id(btf, btf_id);
if (t->size == 0 || (t->size % 8))
return -EINVAL;
/* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *)
* naming pattern
*/
- iter_name = name + sizeof(ITER_PREFIX) - 1;
+ iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1;
if (flags & KF_ITER_NEW)
sfx = "new";
else if (flags & KF_ITER_NEXT)
@@ -8309,13 +8364,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_STRUCT_OPS:
return BTF_KFUNC_HOOK_STRUCT_OPS;
case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_LSM:
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
- return BTF_KFUNC_HOOK_CGROUP_SKB;
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ return BTF_KFUNC_HOOK_CGROUP;
case BPF_PROG_TYPE_SCHED_ACT:
return BTF_KFUNC_HOOK_SCHED_ACT;
case BPF_PROG_TYPE_SK_SKB:
@@ -8891,6 +8952,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
struct bpf_core_cand_list cands = {};
struct bpf_core_relo_res targ_res;
struct bpf_core_spec *specs;
+ const struct btf_type *type;
int err;
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
@@ -8900,6 +8962,13 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
if (!specs)
return -ENOMEM;
+ type = btf_type_by_id(ctx->btf, relo->type_id);
+ if (!type) {
+ bpf_log(ctx->log, "relo #%u: bad type id %u\n",
+ relo_idx, relo->type_id);
+ return -EINVAL;
+ }
+
if (need_cands) {
struct bpf_cand_cache *cc;
int i;
diff --git a/kernel/bpf/btf_iter.c b/kernel/bpf/btf_iter.c
new file mode 100644
index 000000000000..0e2c66a52df9
--- /dev/null
+++ b/kernel/bpf/btf_iter.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include "../../tools/lib/bpf/btf_iter.c"
diff --git a/kernel/bpf/btf_relocate.c b/kernel/bpf/btf_relocate.c
new file mode 100644
index 000000000000..c12ccbf66507
--- /dev/null
+++ b/kernel/bpf/btf_relocate.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include "../../tools/lib/bpf/btf_relocate.c"
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 8ba73042a239..e7113d700b87 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -2581,6 +2581,8 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
#endif
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
default:
return NULL;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7ee62e38faf0..4e07cc057d6f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2302,6 +2302,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
{
enum bpf_prog_type prog_type = resolve_prog_type(fp);
bool ret;
+ struct bpf_prog_aux *aux = fp->aux;
if (fp->kprobe_override)
return false;
@@ -2311,7 +2312,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
* in the case of devmap and cpumap). Until device checks
* are implemented, prohibit adding dev-bound programs to program maps.
*/
- if (bpf_prog_is_dev_bound(fp->aux))
+ if (bpf_prog_is_dev_bound(aux))
return false;
spin_lock(&map->owner.lock);
@@ -2321,12 +2322,26 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
*/
map->owner.type = prog_type;
map->owner.jited = fp->jited;
- map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
+ map->owner.xdp_has_frags = aux->xdp_has_frags;
+ map->owner.attach_func_proto = aux->attach_func_proto;
ret = true;
} else {
ret = map->owner.type == prog_type &&
map->owner.jited == fp->jited &&
- map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
+ map->owner.xdp_has_frags == aux->xdp_has_frags;
+ if (ret &&
+ map->owner.attach_func_proto != aux->attach_func_proto) {
+ switch (prog_type) {
+ case BPF_PROG_TYPE_TRACING:
+ case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ ret = false;
+ break;
+ default:
+ break;
+ }
+ }
}
spin_unlock(&map->owner.lock);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 06115f8728e8..b14b87463ee0 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -462,6 +462,9 @@ static int htab_map_alloc_check(union bpf_attr *attr)
* kmalloc-able later in htab_map_update_elem()
*/
return -E2BIG;
+ /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
+ if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
+ return -E2BIG;
return 0;
}
@@ -1049,14 +1052,15 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
pptr = htab_elem_get_ptr(l_new, key_size);
} else {
/* alloc_percpu zero-fills */
- pptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
- if (!pptr) {
+ void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma);
+
+ if (!ptr) {
bpf_mem_cache_free(&htab->ma, l_new);
l_new = ERR_PTR(-ENOMEM);
goto dec_count;
}
- l_new->ptr_to_pptr = pptr;
- pptr = *(void **)pptr;
+ l_new->ptr_to_pptr = ptr;
+ pptr = *(void __percpu **)ptr;
}
pcpu_init_value(htab, pptr, value, onallcpus);
@@ -1586,7 +1590,7 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
seq_puts(m, ": ");
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
rcu_read_unlock();
}
@@ -2450,7 +2454,7 @@ static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key,
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
seq_puts(m, "}\n");
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b5f0adae8293..1a43d06eab28 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -158,6 +158,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.func = bpf_get_smp_processor_id,
.gpl_only = false,
.ret_type = RET_INTEGER,
+ .allow_fastcall = true,
};
BPF_CALL_0(bpf_get_numa_node_id)
@@ -517,16 +518,15 @@ static int __bpf_strtoll(const char *buf, size_t buf_len, u64 flags,
}
BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags,
- long *, res)
+ s64 *, res)
{
long long _res;
int err;
+ *res = 0;
err = __bpf_strtoll(buf, buf_len, flags, &_res);
if (err < 0)
return err;
- if (_res != (long)_res)
- return -ERANGE;
*res = _res;
return err;
}
@@ -538,23 +538,23 @@ const struct bpf_func_proto bpf_strtol_proto = {
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_LONG,
+ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg4_size = sizeof(s64),
};
BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags,
- unsigned long *, res)
+ u64 *, res)
{
unsigned long long _res;
bool is_negative;
int err;
+ *res = 0;
err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative);
if (err < 0)
return err;
if (is_negative)
return -EINVAL;
- if (_res != (unsigned long)_res)
- return -ERANGE;
*res = _res;
return err;
}
@@ -566,7 +566,8 @@ const struct bpf_func_proto bpf_strtoul_proto = {
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_LONG,
+ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg4_size = sizeof(u64),
};
BPF_CALL_3(bpf_strncmp, const char *, s1, u32, s1_sz, const char *, s2)
@@ -714,7 +715,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
if (cpu >= nr_cpu_ids)
return (unsigned long)NULL;
- return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
+ return (unsigned long)per_cpu_ptr((const void __percpu *)(const uintptr_t)ptr, cpu);
}
const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
@@ -727,7 +728,7 @@ const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
{
- return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
+ return (unsigned long)this_cpu_ptr((const void __percpu *)(const uintptr_t)percpu_ptr);
}
const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
@@ -1618,9 +1619,9 @@ void bpf_wq_cancel_and_free(void *val)
schedule_work(&work->delete_work);
}
-BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
+BPF_CALL_2(bpf_kptr_xchg, void *, dst, void *, ptr)
{
- unsigned long *kptr = map_value;
+ unsigned long *kptr = dst;
/* This helper may be inlined by verifier. */
return xchg(kptr, (unsigned long)ptr);
@@ -1635,7 +1636,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
.ret_btf_id = BPF_PTR_POISON,
- .arg1_type = ARG_PTR_TO_KPTR,
+ .arg1_type = ARG_KPTR_XCHG_DEST,
.arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
.arg2_btf_id = BPF_PTR_POISON,
};
@@ -2033,6 +2034,7 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
}
+EXPORT_SYMBOL_GPL(bpf_base_func_proto);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock)
@@ -2457,6 +2459,29 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
return ret;
}
+BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+
+ if (unlikely(idx >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[idx]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+ .func = bpf_current_task_under_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+};
+
/**
* bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
* specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
@@ -2938,6 +2963,47 @@ __bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
bpf_mem_free(&bpf_global_ma, kit->bits);
}
+/**
+ * bpf_copy_from_user_str() - Copy a string from an unsafe user address
+ * @dst: Destination address, in kernel space. This buffer must be
+ * at least @dst__sz bytes long.
+ * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
+ * @unsafe_ptr__ign: Source address, in user space.
+ * @flags: The only supported flag is BPF_F_PAD_ZEROS
+ *
+ * Copies a NUL-terminated string from userspace to BPF space. If user string is
+ * too long this will still ensure zero termination in the dst buffer unless
+ * buffer size is 0.
+ *
+ * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and
+ * memset all of @dst on failure.
+ */
+__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags)
+{
+ int ret;
+
+ if (unlikely(flags & ~BPF_F_PAD_ZEROS))
+ return -EINVAL;
+
+ if (unlikely(!dst__sz))
+ return 0;
+
+ ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1);
+ if (ret < 0) {
+ if (flags & BPF_F_PAD_ZEROS)
+ memset((char *)dst, 0, dst__sz);
+
+ return ret;
+ }
+
+ if (flags & BPF_F_PAD_ZEROS)
+ memset((char *)dst + ret, 0, dst__sz - ret);
+ else
+ ((char *)dst)[ret] = '\0';
+
+ return ret + 1;
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@@ -3023,6 +3089,7 @@ BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
@@ -3051,6 +3118,7 @@ static int __init kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set);
ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
ARRAY_SIZE(generic_dtors),
THIS_MODULE);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index af5d2ffadd70..d8fc5eba529d 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -709,10 +709,10 @@ static void seq_print_delegate_opts(struct seq_file *m,
msk = 1ULL << e->val;
if (delegate_msk & msk) {
/* emit lower-case name without prefix */
- seq_printf(m, "%c", first ? '=' : ':');
+ seq_putc(m, first ? '=' : ':');
name += pfx_len;
while (*name) {
- seq_printf(m, "%c", tolower(*name));
+ seq_putc(m, tolower(*name));
name++;
}
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index a04f505aefe9..3969eb0382af 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -431,7 +431,7 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
seq_puts(m, ": ");
btf_type_seq_show(map->btf, map->btf_value_type_id,
&READ_ONCE(storage->buf)->data[0], m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
} else {
seq_puts(m, ": {\n");
for_each_possible_cpu(cpu) {
@@ -439,7 +439,7 @@ static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(storage->percpu_buf, cpu),
m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
seq_puts(m, "}\n");
}
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index dec892ded031..b3858a76e0b3 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -138,8 +138,8 @@ static struct llist_node notrace *__llist_del_first(struct llist_head *head)
static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags)
{
if (c->percpu_size) {
- void **obj = kmalloc_node(c->percpu_size, flags, node);
- void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
+ void __percpu **obj = kmalloc_node(c->percpu_size, flags, node);
+ void __percpu *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags);
if (!obj || !pptr) {
free_percpu(pptr);
@@ -253,7 +253,7 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic)
static void free_one(void *obj, bool percpu)
{
if (percpu) {
- free_percpu(((void **)obj)[1]);
+ free_percpu(((void __percpu **)obj)[1]);
kfree(obj);
return;
}
@@ -509,8 +509,8 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
*/
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
{
- struct bpf_mem_caches *cc, __percpu *pcc;
- struct bpf_mem_cache *c, __percpu *pc;
+ struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
+ struct bpf_mem_cache *c; struct bpf_mem_cache __percpu *pc;
struct obj_cgroup *objcg = NULL;
int cpu, i, unit_size, percpu_size = 0;
@@ -591,7 +591,7 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size)
{
- struct bpf_mem_caches *cc, __percpu *pcc;
+ struct bpf_mem_caches *cc; struct bpf_mem_caches __percpu *pcc;
int cpu, i, unit_size, percpu_size;
struct obj_cgroup *objcg;
struct bpf_mem_cache *c;
diff --git a/kernel/bpf/relo_core.c b/kernel/bpf/relo_core.c
new file mode 100644
index 000000000000..aa822c9fcfde
--- /dev/null
+++ b/kernel/bpf/relo_core.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include "../../tools/lib/bpf/relo_core.c"
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 4b4f9670f1a9..49b8e5a0c6b4 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -308,7 +308,7 @@ put_file_unlock:
spin_unlock_bh(&reuseport_lock);
put_file:
- fput(socket->file);
+ sockfd_put(socket);
return err;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index c99f8e5234ac..3615c06b7dfa 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -124,8 +124,24 @@ free_smap:
return ERR_PTR(err);
}
+static int fetch_build_id(struct vm_area_struct *vma, unsigned char *build_id, bool may_fault)
+{
+ return may_fault ? build_id_parse(vma, build_id, NULL)
+ : build_id_parse_nofault(vma, build_id, NULL);
+}
+
+/*
+ * Expects all id_offs[i].ip values to be set to correct initial IPs.
+ * They will be subsequently:
+ * - either adjusted in place to a file offset, if build ID fetching
+ * succeeds; in this case id_offs[i].build_id is set to correct build ID,
+ * and id_offs[i].status is set to BPF_STACK_BUILD_ID_VALID;
+ * - or IP will be kept intact, if build ID fetching failed; in this case
+ * id_offs[i].build_id is zeroed out and id_offs[i].status is set to
+ * BPF_STACK_BUILD_ID_IP.
+ */
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
- u64 *ips, u32 trace_nr, bool user)
+ u32 trace_nr, bool user, bool may_fault)
{
int i;
struct mmap_unlock_irq_work *work = NULL;
@@ -142,30 +158,28 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
/* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
- id_offs[i].ip = ips[i];
memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
}
return;
}
for (i = 0; i < trace_nr; i++) {
- if (range_in_vma(prev_vma, ips[i], ips[i])) {
+ u64 ip = READ_ONCE(id_offs[i].ip);
+
+ if (range_in_vma(prev_vma, ip, ip)) {
vma = prev_vma;
- memcpy(id_offs[i].build_id, prev_build_id,
- BUILD_ID_SIZE_MAX);
+ memcpy(id_offs[i].build_id, prev_build_id, BUILD_ID_SIZE_MAX);
goto build_id_valid;
}
- vma = find_vma(current->mm, ips[i]);
- if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
+ vma = find_vma(current->mm, ip);
+ if (!vma || fetch_build_id(vma, id_offs[i].build_id, may_fault)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
- id_offs[i].ip = ips[i];
memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
continue;
}
build_id_valid:
- id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
- - vma->vm_start;
+ id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ip - vma->vm_start;
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
prev_vma = vma;
prev_build_id = id_offs[i].build_id;
@@ -216,7 +230,7 @@ static long __bpf_get_stackid(struct bpf_map *map,
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
- u32 hash, id, trace_nr, trace_len;
+ u32 hash, id, trace_nr, trace_len, i;
bool user = flags & BPF_F_USER_STACK;
u64 *ips;
bool hash_matches;
@@ -238,15 +252,18 @@ static long __bpf_get_stackid(struct bpf_map *map,
return id;
if (stack_map_use_build_id(map)) {
+ struct bpf_stack_build_id *id_offs;
+
/* for build_id+offset, pop a bucket before slow cmp */
new_bucket = (struct stack_map_bucket *)
pcpu_freelist_pop(&smap->freelist);
if (unlikely(!new_bucket))
return -ENOMEM;
new_bucket->nr = trace_nr;
- stack_map_get_build_id_offset(
- (struct bpf_stack_build_id *)new_bucket->data,
- ips, trace_nr, user);
+ id_offs = (struct bpf_stack_build_id *)new_bucket->data;
+ for (i = 0; i < trace_nr; i++)
+ id_offs[i].ip = ips[i];
+ stack_map_get_build_id_offset(id_offs, trace_nr, user, false /* !may_fault */);
trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
if (hash_matches && bucket->nr == trace_nr &&
memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
@@ -387,7 +404,7 @@ const struct bpf_func_proto bpf_get_stackid_proto_pe = {
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
struct perf_callchain_entry *trace_in,
- void *buf, u32 size, u64 flags)
+ void *buf, u32 size, u64 flags, bool may_fault)
{
u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
@@ -405,8 +422,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
if (kernel && user_build_id)
goto clear;
- elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
- : sizeof(u64);
+ elem_size = user_build_id ? sizeof(struct bpf_stack_build_id) : sizeof(u64);
if (unlikely(size % elem_size))
goto clear;
@@ -427,6 +443,9 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
if (sysctl_perf_event_max_stack < max_depth)
max_depth = sysctl_perf_event_max_stack;
+ if (may_fault)
+ rcu_read_lock(); /* need RCU for perf's callchain below */
+
if (trace_in)
trace = trace_in;
else if (kernel && task)
@@ -434,21 +453,34 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
else
trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
crosstask, false);
- if (unlikely(!trace))
- goto err_fault;
- if (trace->nr < skip)
+ if (unlikely(!trace) || trace->nr < skip) {
+ if (may_fault)
+ rcu_read_unlock();
goto err_fault;
+ }
trace_nr = trace->nr - skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
ips = trace->ip + skip;
- if (user && user_build_id)
- stack_map_get_build_id_offset(buf, ips, trace_nr, user);
- else
+ if (user_build_id) {
+ struct bpf_stack_build_id *id_offs = buf;
+ u32 i;
+
+ for (i = 0; i < trace_nr; i++)
+ id_offs[i].ip = ips[i];
+ } else {
memcpy(buf, ips, copy_len);
+ }
+
+ /* trace/ips should not be dereferenced after this point */
+ if (may_fault)
+ rcu_read_unlock();
+
+ if (user_build_id)
+ stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);
if (size > copy_len)
memset(buf + copy_len, 0, size - copy_len);
@@ -464,7 +496,7 @@ clear:
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
- return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */);
}
const struct bpf_func_proto bpf_get_stack_proto = {
@@ -477,8 +509,24 @@ const struct bpf_func_proto bpf_get_stack_proto = {
.arg4_type = ARG_ANYTHING,
};
-BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
- u32, size, u64, flags)
+BPF_CALL_4(bpf_get_stack_sleepable, struct pt_regs *, regs, void *, buf, u32, size,
+ u64, flags)
+{
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, true /* may_fault */);
+}
+
+const struct bpf_func_proto bpf_get_stack_sleepable_proto = {
+ .func = bpf_get_stack_sleepable,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static long __bpf_get_task_stack(struct task_struct *task, void *buf, u32 size,
+ u64 flags, bool may_fault)
{
struct pt_regs *regs;
long res = -EINVAL;
@@ -488,12 +536,18 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
regs = task_pt_regs(task);
if (regs)
- res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
+ res = __bpf_get_stack(regs, task, NULL, buf, size, flags, may_fault);
put_task_stack(task);
return res;
}
+BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
+ u32, size, u64, flags)
+{
+ return __bpf_get_task_stack(task, buf, size, flags, false /* !may_fault */);
+}
+
const struct bpf_func_proto bpf_get_task_stack_proto = {
.func = bpf_get_task_stack,
.gpl_only = false,
@@ -505,6 +559,23 @@ const struct bpf_func_proto bpf_get_task_stack_proto = {
.arg4_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_get_task_stack_sleepable, struct task_struct *, task, void *, buf,
+ u32, size, u64, flags)
+{
+ return __bpf_get_task_stack(task, buf, size, flags, true /* !may_fault */);
+}
+
+const struct bpf_func_proto bpf_get_task_stack_sleepable_proto = {
+ .func = bpf_get_task_stack_sleepable,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
void *, buf, u32, size, u64, flags)
{
@@ -516,7 +587,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
__u64 nr_kernel;
if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
- return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
+ return __bpf_get_stack(regs, NULL, NULL, buf, size, flags, false /* !may_fault */);
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
BPF_F_USER_BUILD_ID)))
@@ -536,7 +607,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
__u64 nr = trace->nr;
trace->nr = nr_kernel;
- err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
+ err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */);
/* restore nr */
trace->nr = nr;
@@ -548,7 +619,7 @@ BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
goto clear;
flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
- err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
+ err = __bpf_get_stack(regs, NULL, trace, buf, size, flags, false /* !may_fault */);
}
return err;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bf6c5f685ea2..8a4117f6d761 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -550,7 +550,8 @@ void btf_record_free(struct btf_record *rec)
case BPF_KPTR_PERCPU:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
- btf_put(rec->fields[i].kptr.btf);
+ if (btf_is_kernel(rec->fields[i].kptr.btf))
+ btf_put(rec->fields[i].kptr.btf);
break;
case BPF_LIST_HEAD:
case BPF_LIST_NODE:
@@ -596,7 +597,8 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
- btf_get(fields[i].kptr.btf);
+ if (btf_is_kernel(fields[i].kptr.btf))
+ btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
goto free;
@@ -733,15 +735,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
}
}
-/* called from workqueue */
-static void bpf_map_free_deferred(struct work_struct *work)
+static void bpf_map_free(struct bpf_map *map)
{
- struct bpf_map *map = container_of(work, struct bpf_map, work);
struct btf_record *rec = map->record;
struct btf *btf = map->btf;
- security_bpf_map_free(map);
- bpf_map_release_memcg(map);
/* implementation dependent freeing */
map->ops->map_free(map);
/* Delay freeing of btf_record for maps, as map_free
@@ -760,6 +758,16 @@ static void bpf_map_free_deferred(struct work_struct *work)
btf_put(btf);
}
+/* called from workqueue */
+static void bpf_map_free_deferred(struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_map, work);
+
+ security_bpf_map_free(map);
+ bpf_map_release_memcg(map);
+ bpf_map_free(map);
+}
+
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic64_dec_and_test(&map->usercnt)) {
@@ -1411,8 +1419,7 @@ static int map_create(union bpf_attr *attr)
free_map_sec:
security_bpf_map_free(map);
free_map:
- btf_put(map->btf);
- map->ops->map_free(map);
+ bpf_map_free(map);
put_token:
bpf_token_put(token);
return err;
@@ -5668,7 +5675,7 @@ static int token_create(union bpf_attr *attr)
return bpf_token_create(attr);
}
-static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
+static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
int err;
@@ -5932,6 +5939,7 @@ static const struct bpf_func_proto bpf_sys_close_proto = {
BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
{
+ *res = 0;
if (flags)
return -EINVAL;
@@ -5952,7 +5960,8 @@ static const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
- .arg4_type = ARG_PTR_TO_LONG,
+ .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg4_size = sizeof(u64),
};
static const struct bpf_func_proto *
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 39d5710c68ad..dd86282ccaa4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -385,11 +385,6 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env,
verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
}
-static bool type_may_be_null(u32 type)
-{
- return type & PTR_MAYBE_NULL;
-}
-
static bool reg_not_null(const struct bpf_reg_state *reg)
{
enum bpf_reg_type type;
@@ -2184,6 +2179,44 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
reg->smin_value = max_t(s64, reg->smin_value, new_smin);
reg->smax_value = min_t(s64, reg->smax_value, new_smax);
}
+
+ /* Here we would like to handle a special case after sign extending load,
+ * when upper bits for a 64-bit range are all 1s or all 0s.
+ *
+ * Upper bits are all 1s when register is in a range:
+ * [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
+ * Upper bits are all 0s when register is in a range:
+ * [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
+ * Together this forms are continuous range:
+ * [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
+ *
+ * Now, suppose that register range is in fact tighter:
+ * [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
+ * Also suppose that it's 32-bit range is positive,
+ * meaning that lower 32-bits of the full 64-bit register
+ * are in the range:
+ * [0x0000_0000, 0x7fff_ffff] (W)
+ *
+ * If this happens, then any value in a range:
+ * [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
+ * is smaller than a lowest bound of the range (R):
+ * 0xffff_ffff_8000_0000
+ * which means that upper bits of the full 64-bit register
+ * can't be all 1s, when lower bits are in range (W).
+ *
+ * Note that:
+ * - 0xffff_ffff_8000_0000 == (s64)S32_MIN
+ * - 0x0000_0000_7fff_ffff == (s64)S32_MAX
+ * These relations are used in the conditions below.
+ */
+ if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
+ reg->smin_value = reg->s32_min_value;
+ reg->smax_value = reg->s32_max_value;
+ reg->umin_value = reg->s32_min_value;
+ reg->umax_value = reg->s32_max_value;
+ reg->var_off = tnum_intersect(reg->var_off,
+ tnum_range(reg->smin_value, reg->smax_value));
+ }
}
static void __reg_deduce_bounds(struct bpf_reg_state *reg)
@@ -2336,6 +2369,25 @@ static void mark_reg_unknown(struct bpf_verifier_env *env,
__mark_reg_unknown(env, regs + regno);
}
+static int __mark_reg_s32_range(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs,
+ u32 regno,
+ s32 s32_min,
+ s32 s32_max)
+{
+ struct bpf_reg_state *reg = regs + regno;
+
+ reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
+ reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
+
+ reg->smin_value = max_t(s64, reg->smin_value, s32_min);
+ reg->smax_value = min_t(s64, reg->smax_value, s32_max);
+
+ reg_bounds_sync(reg);
+
+ return reg_bounds_sanity_check(env, reg, "s32_range");
+}
+
static void __mark_reg_not_init(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
@@ -3337,9 +3389,87 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
return env->insn_aux_data[insn_idx].jmp_point;
}
+#define LR_FRAMENO_BITS 3
+#define LR_SPI_BITS 6
+#define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1)
+#define LR_SIZE_BITS 4
+#define LR_FRAMENO_MASK ((1ull << LR_FRAMENO_BITS) - 1)
+#define LR_SPI_MASK ((1ull << LR_SPI_BITS) - 1)
+#define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1)
+#define LR_SPI_OFF LR_FRAMENO_BITS
+#define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS)
+#define LINKED_REGS_MAX 6
+
+struct linked_reg {
+ u8 frameno;
+ union {
+ u8 spi;
+ u8 regno;
+ };
+ bool is_reg;
+};
+
+struct linked_regs {
+ int cnt;
+ struct linked_reg entries[LINKED_REGS_MAX];
+};
+
+static struct linked_reg *linked_regs_push(struct linked_regs *s)
+{
+ if (s->cnt < LINKED_REGS_MAX)
+ return &s->entries[s->cnt++];
+
+ return NULL;
+}
+
+/* Use u64 as a vector of 6 10-bit values, use first 4-bits to track
+ * number of elements currently in stack.
+ * Pack one history entry for linked registers as 10 bits in the following format:
+ * - 3-bits frameno
+ * - 6-bits spi_or_reg
+ * - 1-bit is_reg
+ */
+static u64 linked_regs_pack(struct linked_regs *s)
+{
+ u64 val = 0;
+ int i;
+
+ for (i = 0; i < s->cnt; ++i) {
+ struct linked_reg *e = &s->entries[i];
+ u64 tmp = 0;
+
+ tmp |= e->frameno;
+ tmp |= e->spi << LR_SPI_OFF;
+ tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF;
+
+ val <<= LR_ENTRY_BITS;
+ val |= tmp;
+ }
+ val <<= LR_SIZE_BITS;
+ val |= s->cnt;
+ return val;
+}
+
+static void linked_regs_unpack(u64 val, struct linked_regs *s)
+{
+ int i;
+
+ s->cnt = val & LR_SIZE_MASK;
+ val >>= LR_SIZE_BITS;
+
+ for (i = 0; i < s->cnt; ++i) {
+ struct linked_reg *e = &s->entries[i];
+
+ e->frameno = val & LR_FRAMENO_MASK;
+ e->spi = (val >> LR_SPI_OFF) & LR_SPI_MASK;
+ e->is_reg = (val >> LR_IS_REG_OFF) & 0x1;
+ val >>= LR_ENTRY_BITS;
+ }
+}
+
/* for any branch, call, exit record the history of jmps in the given state */
static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
- int insn_flags)
+ int insn_flags, u64 linked_regs)
{
u32 cnt = cur->jmp_history_cnt;
struct bpf_jmp_history_entry *p;
@@ -3355,6 +3485,10 @@ static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_st
"verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
env->insn_idx, env->cur_hist_ent->flags, insn_flags);
env->cur_hist_ent->flags |= insn_flags;
+ WARN_ONCE(env->cur_hist_ent->linked_regs != 0,
+ "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n",
+ env->insn_idx, env->cur_hist_ent->linked_regs);
+ env->cur_hist_ent->linked_regs = linked_regs;
return 0;
}
@@ -3369,6 +3503,7 @@ static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_st
p->idx = env->insn_idx;
p->prev_idx = env->prev_insn_idx;
p->flags = insn_flags;
+ p->linked_regs = linked_regs;
cur->jmp_history_cnt = cnt;
env->cur_hist_ent = p;
@@ -3534,6 +3669,11 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
return bt->reg_masks[bt->frame] & (1 << reg);
}
+static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+ return bt->reg_masks[frame] & (1 << reg);
+}
+
static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
{
return bt->stack_masks[frame] & (1ull << slot);
@@ -3578,6 +3718,42 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
}
}
+/* If any register R in hist->linked_regs is marked as precise in bt,
+ * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
+ */
+static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
+{
+ struct linked_regs linked_regs;
+ bool some_precise = false;
+ int i;
+
+ if (!hist || hist->linked_regs == 0)
+ return;
+
+ linked_regs_unpack(hist->linked_regs, &linked_regs);
+ for (i = 0; i < linked_regs.cnt; ++i) {
+ struct linked_reg *e = &linked_regs.entries[i];
+
+ if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) ||
+ (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) {
+ some_precise = true;
+ break;
+ }
+ }
+
+ if (!some_precise)
+ return;
+
+ for (i = 0; i < linked_regs.cnt; ++i) {
+ struct linked_reg *e = &linked_regs.entries[i];
+
+ if (e->is_reg)
+ bt_set_frame_reg(bt, e->frameno, e->regno);
+ else
+ bt_set_frame_slot(bt, e->frameno, e->spi);
+ }
+}
+
static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
/* For given verifier state backtrack_insn() is called from the last insn to
@@ -3617,6 +3793,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
}
+ /* If there is a history record that some registers gained range at this insn,
+ * propagate precision marks to those registers, so that bt_is_reg_set()
+ * accounts for these registers.
+ */
+ bt_sync_linked_regs(bt, hist);
+
if (class == BPF_ALU || class == BPF_ALU64) {
if (!bt_is_reg_set(bt, dreg))
return 0;
@@ -3846,7 +4028,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
*/
bt_set_reg(bt, dreg);
bt_set_reg(bt, sreg);
- /* else dreg <cond> K
+ } else if (BPF_SRC(insn->code) == BPF_K) {
+ /* dreg <cond> K
* Only dreg still needs precision before
* this insn, so for the K-based conditional
* there is nothing new to be marked.
@@ -3864,6 +4047,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
/* to be analyzed */
return -ENOTSUPP;
}
+ /* Propagate precision marks to linked registers, to account for
+ * registers marked as precise in this function.
+ */
+ bt_sync_linked_regs(bt, hist);
return 0;
}
@@ -3991,96 +4178,6 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
}
}
-static bool idset_contains(struct bpf_idset *s, u32 id)
-{
- u32 i;
-
- for (i = 0; i < s->count; ++i)
- if (s->ids[i] == (id & ~BPF_ADD_CONST))
- return true;
-
- return false;
-}
-
-static int idset_push(struct bpf_idset *s, u32 id)
-{
- if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
- return -EFAULT;
- s->ids[s->count++] = id & ~BPF_ADD_CONST;
- return 0;
-}
-
-static void idset_reset(struct bpf_idset *s)
-{
- s->count = 0;
-}
-
-/* Collect a set of IDs for all registers currently marked as precise in env->bt.
- * Mark all registers with these IDs as precise.
- */
-static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
-{
- struct bpf_idset *precise_ids = &env->idset_scratch;
- struct backtrack_state *bt = &env->bt;
- struct bpf_func_state *func;
- struct bpf_reg_state *reg;
- DECLARE_BITMAP(mask, 64);
- int i, fr;
-
- idset_reset(precise_ids);
-
- for (fr = bt->frame; fr >= 0; fr--) {
- func = st->frame[fr];
-
- bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
- for_each_set_bit(i, mask, 32) {
- reg = &func->regs[i];
- if (!reg->id || reg->type != SCALAR_VALUE)
- continue;
- if (idset_push(precise_ids, reg->id))
- return -EFAULT;
- }
-
- bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
- for_each_set_bit(i, mask, 64) {
- if (i >= func->allocated_stack / BPF_REG_SIZE)
- break;
- if (!is_spilled_scalar_reg(&func->stack[i]))
- continue;
- reg = &func->stack[i].spilled_ptr;
- if (!reg->id)
- continue;
- if (idset_push(precise_ids, reg->id))
- return -EFAULT;
- }
- }
-
- for (fr = 0; fr <= st->curframe; ++fr) {
- func = st->frame[fr];
-
- for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
- reg = &func->regs[i];
- if (!reg->id)
- continue;
- if (!idset_contains(precise_ids, reg->id))
- continue;
- bt_set_frame_reg(bt, fr, i);
- }
- for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
- if (!is_spilled_scalar_reg(&func->stack[i]))
- continue;
- reg = &func->stack[i].spilled_ptr;
- if (!reg->id)
- continue;
- if (!idset_contains(precise_ids, reg->id))
- continue;
- bt_set_frame_slot(bt, fr, i);
- }
- }
-
- return 0;
-}
-
/*
* __mark_chain_precision() backtracks BPF program instruction sequence and
* chain of verifier states making sure that register *regno* (if regno >= 0)
@@ -4213,31 +4310,6 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bt->frame, last_idx, first_idx, subseq_idx);
}
- /* If some register with scalar ID is marked as precise,
- * make sure that all registers sharing this ID are also precise.
- * This is needed to estimate effect of find_equal_scalars().
- * Do this at the last instruction of each state,
- * bpf_reg_state::id fields are valid for these instructions.
- *
- * Allows to track precision in situation like below:
- *
- * r2 = unknown value
- * ...
- * --- state #0 ---
- * ...
- * r1 = r2 // r1 and r2 now share the same ID
- * ...
- * --- state #1 {r1.id = A, r2.id = A} ---
- * ...
- * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
- * ...
- * --- state #2 {r1.id = A, r2.id = A} ---
- * r3 = r10
- * r3 += r1 // need to mark both r1 and r2
- */
- if (mark_precise_scalar_ids(env, st))
- return -EFAULT;
-
if (last_idx < 0) {
/* we are at the entry into subprog, which
* is expected for global funcs, but only if
@@ -4458,7 +4530,7 @@ static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
if (!src_reg->id && !tnum_is_const(src_reg->var_off))
/* Ensure that src_reg has a valid ID that will be copied to
- * dst_reg and then will be used by find_equal_scalars() to
+ * dst_reg and then will be used by sync_linked_regs() to
* propagate min/max range.
*/
src_reg->id = ++env->id_gen;
@@ -4504,6 +4576,31 @@ static int get_reg_width(struct bpf_reg_state *reg)
return fls64(reg->umax_value);
}
+/* See comment for mark_fastcall_pattern_for_call() */
+static void check_fastcall_stack_contract(struct bpf_verifier_env *env,
+ struct bpf_func_state *state, int insn_idx, int off)
+{
+ struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ int i;
+
+ if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern)
+ return;
+ /* access to the region [max_stack_depth .. fastcall_stack_off)
+ * from something that is not a part of the fastcall pattern,
+ * disable fastcall rewrites for current subprogram by setting
+ * fastcall_stack_off to a value smaller than any possible offset.
+ */
+ subprog->fastcall_stack_off = S16_MIN;
+ /* reset fastcall aux flags within subprogram,
+ * happens at most once per subprogram
+ */
+ for (i = subprog->start; i < (subprog + 1)->start; ++i) {
+ aux[i].fastcall_spills_num = 0;
+ aux[i].fastcall_pattern = 0;
+ }
+}
+
/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
@@ -4552,6 +4649,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
if (err)
return err;
+ check_fastcall_stack_contract(env, state, insn_idx, off);
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
bool reg_value_fits;
@@ -4627,7 +4725,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
}
if (insn_flags)
- return push_jmp_history(env, env->cur_state, insn_flags);
+ return push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -4686,6 +4784,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
return err;
}
+ check_fastcall_stack_contract(env, state, insn_idx, min_off);
/* Variable offset writes destroy any spilled pointers in range. */
for (i = min_off; i < max_off; i++) {
u8 new_type, *stype;
@@ -4824,6 +4923,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
reg = &reg_state->stack[spi].spilled_ptr;
mark_stack_slot_scratched(env, spi);
+ check_fastcall_stack_contract(env, state, env->insn_idx, off);
if (is_spilled_reg(&reg_state->stack[spi])) {
u8 spill_size = 1;
@@ -4932,7 +5032,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
insn_flags = 0; /* we are not restoring spilled register */
}
if (insn_flags)
- return push_jmp_history(env, env->cur_state, insn_flags);
+ return push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -4984,6 +5084,7 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env,
min_off = reg->smin_value + off;
max_off = reg->smax_value + off;
mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
+ check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off);
return 0;
}
@@ -5589,11 +5690,13 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type,
- struct btf **btf, u32 *btf_id)
+ struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx)
{
struct bpf_insn_access_aux info = {
.reg_type = *reg_type,
.log = &env->log,
+ .is_retval = false,
+ .is_ldsx = is_ldsx,
};
if (env->ops->is_valid_access &&
@@ -5606,6 +5709,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
* type of narrower access.
*/
*reg_type = info.reg_type;
+ *is_retval = info.is_retval;
if (base_type(*reg_type) == PTR_TO_BTF_ID) {
*btf = info.btf;
@@ -6694,10 +6798,20 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
struct bpf_func_state *state,
enum bpf_access_type t)
{
- int min_valid_off;
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx];
+ int min_valid_off, max_bpf_stack;
+
+ /* If accessing instruction is a spill/fill from bpf_fastcall pattern,
+ * add room for all caller saved registers below MAX_BPF_STACK.
+ * In case if bpf_fastcall rewrite won't happen maximal stack depth
+ * would be checked by check_max_stack_depth_subprog().
+ */
+ max_bpf_stack = MAX_BPF_STACK;
+ if (aux->fastcall_pattern)
+ max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
if (t == BPF_WRITE || env->allow_uninit_stack)
- min_valid_off = -MAX_BPF_STACK;
+ min_valid_off = -max_bpf_stack;
else
min_valid_off = -state->allocated_stack;
@@ -6774,6 +6888,17 @@ static int check_stack_access_within_bounds(
return grow_stack_state(env, state, -min_off /* size */);
}
+static bool get_func_retval_range(struct bpf_prog *prog,
+ struct bpf_retval_range *range)
+{
+ if (prog->type == BPF_PROG_TYPE_LSM &&
+ prog->expected_attach_type == BPF_LSM_MAC &&
+ !bpf_lsm_get_retval_range(prog, range)) {
+ return true;
+ }
+ return false;
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -6878,6 +7003,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
+ bool is_retval = false;
+ struct bpf_retval_range range;
enum bpf_reg_type reg_type = SCALAR_VALUE;
struct btf *btf = NULL;
u32 btf_id = 0;
@@ -6893,7 +7020,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return err;
err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
- &btf_id);
+ &btf_id, &is_retval, is_ldsx);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -6902,7 +7029,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* case, we know the offset is zero.
*/
if (reg_type == SCALAR_VALUE) {
- mark_reg_unknown(env, regs, value_regno);
+ if (is_retval && get_func_retval_range(env->prog, &range)) {
+ err = __mark_reg_s32_range(env, regs, value_regno,
+ range.minval, range.maxval);
+ if (err)
+ return err;
+ } else {
+ mark_reg_unknown(env, regs, value_regno);
+ }
} else {
mark_reg_known_zero(env, regs,
value_regno);
@@ -7666,29 +7800,38 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- struct bpf_map *map_ptr = reg->map_ptr;
struct btf_field *kptr_field;
+ struct bpf_map *map_ptr;
+ struct btf_record *rec;
u32 kptr_off;
+ if (type_is_ptr_alloc_obj(reg->type)) {
+ rec = reg_btf_record(reg);
+ } else { /* PTR_TO_MAP_VALUE */
+ map_ptr = reg->map_ptr;
+ if (!map_ptr->btf) {
+ verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
+ map_ptr->name);
+ return -EINVAL;
+ }
+ rec = map_ptr->record;
+ meta->map_ptr = map_ptr;
+ }
+
if (!tnum_is_const(reg->var_off)) {
verbose(env,
"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
regno);
return -EINVAL;
}
- if (!map_ptr->btf) {
- verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
- map_ptr->name);
- return -EINVAL;
- }
- if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
- verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
+
+ if (!btf_record_has_field(rec, BPF_KPTR)) {
+ verbose(env, "R%d has no valid kptr\n", regno);
return -EINVAL;
}
- meta->map_ptr = map_ptr;
kptr_off = reg->off + reg->var_off.value;
- kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
+ kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR);
if (!kptr_field) {
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
return -EACCES;
@@ -7833,12 +7976,17 @@ static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_ITER_DESTROY;
}
-static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
+static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx,
+ const struct btf_param *arg)
{
/* btf_check_iter_kfuncs() guarantees that first argument of any iter
* kfunc is iter state pointer
*/
- return arg == 0 && is_iter_kfunc(meta);
+ if (is_iter_kfunc(meta))
+ return arg_idx == 0;
+
+ /* iter passed as an argument to a generic kfunc */
+ return btf_param_match_suffix(meta->btf, arg, "__iter");
}
static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
@@ -7846,14 +7994,20 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
const struct btf_type *t;
- const struct btf_param *arg;
- int spi, err, i, nr_slots;
- u32 btf_id;
+ int spi, err, i, nr_slots, btf_id;
- /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
- arg = &btf_params(meta->func_proto)[0];
- t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */
- t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */
+ /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs()
+ * ensures struct convention, so we wouldn't need to do any BTF
+ * validation here. But given iter state can be passed as a parameter
+ * to any kfunc, if arg has "__iter" suffix, we need to be a bit more
+ * conservative here.
+ */
+ btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1);
+ if (btf_id < 0) {
+ verbose(env, "expected valid iter pointer as arg #%d\n", regno);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(meta->btf, btf_id);
nr_slots = t->size / BPF_REG_SIZE;
if (is_iter_new_kfunc(meta)) {
@@ -7875,7 +8029,9 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
if (err)
return err;
} else {
- /* iter_next() or iter_destroy() expect initialized iter state*/
+ /* iter_next() or iter_destroy(), as well as any kfunc
+ * accepting iter argument, expect initialized iter state
+ */
err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
switch (err) {
case 0:
@@ -7989,6 +8145,15 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env,
return 0;
}
+static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ int iter_frameno = meta->iter.frameno;
+ int iter_spi = meta->iter.spi;
+
+ return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+}
+
/* process_iter_next_call() is called when verifier gets to iterator's next
* "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
* to it as just "iter_next()" in comments below.
@@ -8073,12 +8238,10 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
struct bpf_reg_state *cur_iter, *queued_iter;
- int iter_frameno = meta->iter.frameno;
- int iter_spi = meta->iter.spi;
BTF_TYPE_EMIT(struct bpf_iter);
- cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+ cur_iter = get_iter_from_state(cur_st, meta);
if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
@@ -8106,7 +8269,7 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
if (!queued_st)
return -ENOMEM;
- queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+ queued_iter = get_iter_from_state(queued_st, meta);
queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
queued_iter->iter.depth++;
if (prev_st)
@@ -8130,6 +8293,12 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
+static bool arg_type_is_raw_mem(enum bpf_arg_type type)
+{
+ return base_type(type) == ARG_PTR_TO_MEM &&
+ type & MEM_UNINIT;
+}
+
static bool arg_type_is_release(enum bpf_arg_type type)
{
return type & OBJ_RELEASE;
@@ -8140,16 +8309,6 @@ static bool arg_type_is_dynptr(enum bpf_arg_type type)
return base_type(type) == ARG_PTR_TO_DYNPTR;
}
-static int int_ptr_type_to_size(enum bpf_arg_type type)
-{
- if (type == ARG_PTR_TO_INT)
- return sizeof(u32);
- else if (type == ARG_PTR_TO_LONG)
- return sizeof(u64);
-
- return -EINVAL;
-}
-
static int resolve_map_arg_type(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_arg_type *arg_type)
@@ -8222,16 +8381,6 @@ static const struct bpf_reg_types mem_types = {
},
};
-static const struct bpf_reg_types int_ptr_types = {
- .types = {
- PTR_TO_STACK,
- PTR_TO_PACKET,
- PTR_TO_PACKET_META,
- PTR_TO_MAP_KEY,
- PTR_TO_MAP_VALUE,
- },
-};
-
static const struct bpf_reg_types spin_lock_types = {
.types = {
PTR_TO_MAP_VALUE,
@@ -8262,7 +8411,12 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
-static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types kptr_xchg_dest_types = {
+ .types = {
+ PTR_TO_MAP_VALUE,
+ PTR_TO_BTF_ID | MEM_ALLOC
+ }
+};
static const struct bpf_reg_types dynptr_types = {
.types = {
PTR_TO_STACK,
@@ -8287,14 +8441,12 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
[ARG_PTR_TO_MEM] = &mem_types,
[ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types,
- [ARG_PTR_TO_INT] = &int_ptr_types,
- [ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK] = &stack_ptr_types,
[ARG_PTR_TO_CONST_STR] = &const_str_ptr_types,
[ARG_PTR_TO_TIMER] = &timer_types,
- [ARG_PTR_TO_KPTR] = &kptr_types,
+ [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types,
[ARG_PTR_TO_DYNPTR] = &dynptr_types,
};
@@ -8333,7 +8485,8 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
if (base_type(arg_type) == ARG_PTR_TO_MEM)
type &= ~DYNPTR_TYPE_FLAG_MASK;
- if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
+ /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */
+ if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) {
type &= ~MEM_ALLOC;
type &= ~MEM_PERCPU;
}
@@ -8426,7 +8579,8 @@ found:
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
return -EFAULT;
}
- if (meta->func_id == BPF_FUNC_kptr_xchg) {
+ /* Check if local kptr in src arg matches kptr in dst arg */
+ if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) {
if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
return -EACCES;
}
@@ -8737,7 +8891,7 @@ skip_type_check:
meta->release_regno = regno;
}
- if (reg->ref_obj_id) {
+ if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
@@ -8849,9 +9003,11 @@ skip_type_check:
*/
meta->raw_mode = arg_type & MEM_UNINIT;
if (arg_type & MEM_FIXED_SIZE) {
- err = check_helper_mem_access(env, regno,
- fn->arg_size[arg], false,
- meta);
+ err = check_helper_mem_access(env, regno, fn->arg_size[arg], false, meta);
+ if (err)
+ return err;
+ if (arg_type & MEM_ALIGNED)
+ err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true);
}
break;
case ARG_CONST_SIZE:
@@ -8876,17 +9032,6 @@ skip_type_check:
if (err)
return err;
break;
- case ARG_PTR_TO_INT:
- case ARG_PTR_TO_LONG:
- {
- int size = int_ptr_type_to_size(arg_type);
-
- err = check_helper_mem_access(env, regno, size, false, meta);
- if (err)
- return err;
- err = check_ptr_alignment(env, reg, 0, size, true);
- break;
- }
case ARG_PTR_TO_CONST_STR:
{
err = check_reg_const_str(env, reg, regno);
@@ -8894,7 +9039,7 @@ skip_type_check:
return err;
break;
}
- case ARG_PTR_TO_KPTR:
+ case ARG_KPTR_XCHG_DEST:
err = process_kptr_func(env, regno, meta);
if (err)
return err;
@@ -9203,15 +9348,15 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
{
int count = 0;
- if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg1_type))
count++;
- if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg2_type))
count++;
- if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg3_type))
count++;
- if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg4_type))
count++;
- if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
+ if (arg_type_is_raw_mem(fn->arg5_type))
count++;
/* We only support one arg being in raw mode at the moment,
@@ -9925,9 +10070,13 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
return is_rbtree_lock_required_kfunc(kfunc_btf_id);
}
-static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
+static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
+ bool return_32bit)
{
- return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
+ if (return_32bit)
+ return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
+ else
+ return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
}
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
@@ -9964,8 +10113,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
if (err)
return err;
- /* enforce R0 return value range */
- if (!retval_range_within(callee->callback_ret_range, r0)) {
+ /* enforce R0 return value range, and bpf_callback_t returns 64bit */
+ if (!retval_range_within(callee->callback_ret_range, r0, false)) {
verbose_invalid_scalar(env, r0, callee->callback_ret_range,
"At callback return", "R0");
return -EINVAL;
@@ -10267,6 +10416,19 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno
state->callback_subprogno == subprogno);
}
+static int get_helper_proto(struct bpf_verifier_env *env, int func_id,
+ const struct bpf_func_proto **ptr)
+{
+ if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID)
+ return -ERANGE;
+
+ if (!env->ops->get_func_proto)
+ return -EINVAL;
+
+ *ptr = env->ops->get_func_proto(func_id, env->prog);
+ return *ptr ? 0 : -EINVAL;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -10283,18 +10445,16 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* find function prototype */
func_id = insn->imm;
- if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
- verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
- func_id);
+ err = get_helper_proto(env, insn->imm, &fn);
+ if (err == -ERANGE) {
+ verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
return -EINVAL;
}
- if (env->ops->get_func_proto)
- fn = env->ops->get_func_proto(func_id, env->prog);
- if (!fn) {
+ if (err) {
verbose(env, "program of this type cannot use helper %s#%d\n",
func_id_name(func_id), func_id);
- return -EINVAL;
+ return err;
}
/* eBPF programs must be GPL compatible to use GPL-ed functions */
@@ -11230,7 +11390,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_DYNPTR;
- if (is_kfunc_arg_iter(meta, argno))
+ if (is_kfunc_arg_iter(meta, argno, &args[argno]))
return KF_ARG_PTR_TO_ITER;
if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
@@ -11332,8 +11492,7 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
* btf_struct_ids_match() to walk the struct at the 0th offset, and
* resolve types.
*/
- if (is_kfunc_acquire(meta) ||
- (is_kfunc_release(meta) && reg->ref_obj_id) ||
+ if ((is_kfunc_release(meta) && reg->ref_obj_id) ||
btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
strict_type_match = true;
@@ -11950,7 +12109,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
switch (kf_arg_type) {
case KF_ARG_PTR_TO_CTX:
if (reg->type != PTR_TO_CTX) {
- verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
+ verbose(env, "arg#%d expected pointer to ctx, but got %s\n",
+ i, reg_type_str(env, reg->type));
return -EINVAL;
}
@@ -12673,6 +12833,17 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
+
+ if (is_iter_next_kfunc(&meta)) {
+ struct bpf_reg_state *cur_iter;
+
+ cur_iter = get_iter_from_state(env->cur_state, &meta);
+
+ if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */
+ regs[BPF_REG_0].type |= MEM_RCU;
+ else
+ regs[BPF_REG_0].type |= PTR_TRUSTED;
+ }
}
if (is_kfunc_ret_null(&meta)) {
@@ -14101,7 +14272,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
u64 val = reg_const_value(src_reg, alu32);
if ((dst_reg->id & BPF_ADD_CONST) ||
- /* prevent overflow in find_equal_scalars() later */
+ /* prevent overflow in sync_linked_regs() later */
val > (u32)S32_MAX) {
/*
* If the register already went through rX += val
@@ -14116,7 +14287,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
} else {
/*
* Make sure ID is cleared otherwise dst_reg min/max could be
- * incorrectly propagated into other registers by find_equal_scalars()
+ * incorrectly propagated into other registers by sync_linked_regs()
*/
dst_reg->id = 0;
}
@@ -14266,7 +14437,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
copy_register_state(dst_reg, src_reg);
/* Make sure ID is cleared if src_reg is not in u32
* range otherwise dst_reg min/max could be incorrectly
- * propagated into src_reg by find_equal_scalars()
+ * propagated into src_reg by sync_linked_regs()
*/
if (!is_src_reg_u32)
dst_reg->id = 0;
@@ -15089,14 +15260,66 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
return true;
}
-static void find_equal_scalars(struct bpf_verifier_state *vstate,
- struct bpf_reg_state *known_reg)
+static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg,
+ u32 id, u32 frameno, u32 spi_or_reg, bool is_reg)
+{
+ struct linked_reg *e;
+
+ if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id)
+ return;
+
+ e = linked_regs_push(reg_set);
+ if (e) {
+ e->frameno = frameno;
+ e->is_reg = is_reg;
+ e->regno = spi_or_reg;
+ } else {
+ reg->id = 0;
+ }
+}
+
+/* For all R being scalar registers or spilled scalar registers
+ * in verifier state, save R in linked_regs if R->id == id.
+ * If there are too many Rs sharing same id, reset id for leftover Rs.
+ */
+static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id,
+ struct linked_regs *linked_regs)
+{
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ int i, j;
+
+ id = id & ~BPF_ADD_CONST;
+ for (i = vstate->curframe; i >= 0; i--) {
+ func = vstate->frame[i];
+ for (j = 0; j < BPF_REG_FP; j++) {
+ reg = &func->regs[j];
+ __collect_linked_regs(linked_regs, reg, id, i, j, true);
+ }
+ for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
+ if (!is_spilled_reg(&func->stack[j]))
+ continue;
+ reg = &func->stack[j].spilled_ptr;
+ __collect_linked_regs(linked_regs, reg, id, i, j, false);
+ }
+ }
+}
+
+/* For all R in linked_regs, copy known_reg range into R
+ * if R->id == known_reg->id.
+ */
+static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg,
+ struct linked_regs *linked_regs)
{
struct bpf_reg_state fake_reg;
- struct bpf_func_state *state;
struct bpf_reg_state *reg;
+ struct linked_reg *e;
+ int i;
- bpf_for_each_reg_in_vstate(vstate, state, reg, ({
+ for (i = 0; i < linked_regs->cnt; ++i) {
+ e = &linked_regs->entries[i];
+ reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno]
+ : &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr;
if (reg->type != SCALAR_VALUE || reg == known_reg)
continue;
if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST))
@@ -15114,7 +15337,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate,
copy_register_state(reg, known_reg);
/*
* Must preserve off, id and add_const flag,
- * otherwise another find_equal_scalars() will be incorrect.
+ * otherwise another sync_linked_regs() will be incorrect.
*/
reg->off = saved_off;
@@ -15122,7 +15345,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate,
scalar_min_max_add(reg, &fake_reg);
reg->var_off = tnum_add(reg->var_off, fake_reg.var_off);
}
- }));
+ }
}
static int check_cond_jmp_op(struct bpf_verifier_env *env,
@@ -15133,6 +15356,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
struct bpf_reg_state *eq_branch_regs;
+ struct linked_regs linked_regs = {};
u8 opcode = BPF_OP(insn->code);
bool is_jmp32;
int pred = -1;
@@ -15247,6 +15471,21 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
return 0;
}
+ /* Push scalar registers sharing same ID to jump history,
+ * do this before creating 'other_branch', so that both
+ * 'this_branch' and 'other_branch' share this history
+ * if parent state is created.
+ */
+ if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id)
+ collect_linked_regs(this_branch, src_reg->id, &linked_regs);
+ if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
+ collect_linked_regs(this_branch, dst_reg->id, &linked_regs);
+ if (linked_regs.cnt > 1) {
+ err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
+ if (err)
+ return err;
+ }
+
other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
false);
if (!other_branch)
@@ -15277,13 +15516,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (BPF_SRC(insn->code) == BPF_X &&
src_reg->type == SCALAR_VALUE && src_reg->id &&
!WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
- find_equal_scalars(this_branch, src_reg);
- find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
+ sync_linked_regs(this_branch, src_reg, &linked_regs);
+ sync_linked_regs(other_branch, &other_branch_regs[insn->src_reg], &linked_regs);
}
if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
!WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
- find_equal_scalars(this_branch, dst_reg);
- find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
+ sync_linked_regs(this_branch, dst_reg, &linked_regs);
+ sync_linked_regs(other_branch, &other_branch_regs[insn->dst_reg], &linked_regs);
}
/* if one pointer register is compared to another pointer
@@ -15571,6 +15810,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
int err;
struct bpf_func_state *frame = env->cur_state->frame[0];
const bool is_subprog = frame->subprogno;
+ bool return_32bit = false;
/* LSM and struct_ops func-ptr's return type could be "void" */
if (!is_subprog || frame->in_exception_callback_fn) {
@@ -15676,12 +15916,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
case BPF_PROG_TYPE_LSM:
if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
- /* Regular BPF_PROG_TYPE_LSM programs can return
- * any value.
- */
- return 0;
- }
- if (!env->prog->aux->attach_func_proto->type) {
+ /* no range found, any return value is allowed */
+ if (!get_func_retval_range(env->prog, &range))
+ return 0;
+ /* no restricted range, any return value is allowed */
+ if (range.minval == S32_MIN && range.maxval == S32_MAX)
+ return 0;
+ return_32bit = true;
+ } else if (!env->prog->aux->attach_func_proto->type) {
/* Make sure programs that attach to void
* hooks don't try to modify return value.
*/
@@ -15711,7 +15953,7 @@ enforce_retval:
if (err)
return err;
- if (!retval_range_within(range, reg)) {
+ if (!retval_range_within(range, reg, return_32bit)) {
verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
if (!is_subprog &&
prog->expected_attach_type == BPF_LSM_CGROUP &&
@@ -15877,6 +16119,274 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
return ret;
}
+/* Bitmask with 1s for all caller saved registers */
+#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
+
+/* Return a bitmask specifying which caller saved registers are
+ * clobbered by a call to a helper *as if* this helper follows
+ * bpf_fastcall contract:
+ * - includes R0 if function is non-void;
+ * - includes R1-R5 if corresponding parameter has is described
+ * in the function prototype.
+ */
+static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn)
+{
+ u32 mask;
+ int i;
+
+ mask = 0;
+ if (fn->ret_type != RET_VOID)
+ mask |= BIT(BPF_REG_0);
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i)
+ if (fn->arg_type[i] != ARG_DONTCARE)
+ mask |= BIT(BPF_REG_1 + i);
+ return mask;
+}
+
+/* True if do_misc_fixups() replaces calls to helper number 'imm',
+ * replacement patch is presumed to follow bpf_fastcall contract
+ * (see mark_fastcall_pattern_for_call() below).
+ */
+static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
+{
+ switch (imm) {
+#ifdef CONFIG_X86_64
+ case BPF_FUNC_get_smp_processor_id:
+ return env->prog->jit_requested && bpf_jit_supports_percpu_insn();
+#endif
+ default:
+ return false;
+ }
+}
+
+/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */
+static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta)
+{
+ u32 vlen, i, mask;
+
+ vlen = btf_type_vlen(meta->func_proto);
+ mask = 0;
+ if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type)))
+ mask |= BIT(BPF_REG_0);
+ for (i = 0; i < vlen; ++i)
+ mask |= BIT(BPF_REG_1 + i);
+ return mask;
+}
+
+/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */
+static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta)
+{
+ if (meta->btf == btf_vmlinux)
+ return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
+ meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast];
+ return false;
+}
+
+/* LLVM define a bpf_fastcall function attribute.
+ * This attribute means that function scratches only some of
+ * the caller saved registers defined by ABI.
+ * For BPF the set of such registers could be defined as follows:
+ * - R0 is scratched only if function is non-void;
+ * - R1-R5 are scratched only if corresponding parameter type is defined
+ * in the function prototype.
+ *
+ * The contract between kernel and clang allows to simultaneously use
+ * such functions and maintain backwards compatibility with old
+ * kernels that don't understand bpf_fastcall calls:
+ *
+ * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5
+ * registers are not scratched by the call;
+ *
+ * - as a post-processing step, clang visits each bpf_fastcall call and adds
+ * spill/fill for every live r0-r5;
+ *
+ * - stack offsets used for the spill/fill are allocated as lowest
+ * stack offsets in whole function and are not used for any other
+ * purposes;
+ *
+ * - when kernel loads a program, it looks for such patterns
+ * (bpf_fastcall function surrounded by spills/fills) and checks if
+ * spill/fill stack offsets are used exclusively in fastcall patterns;
+ *
+ * - if so, and if verifier or current JIT inlines the call to the
+ * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary
+ * spill/fill pairs;
+ *
+ * - when old kernel loads a program, presence of spill/fill pairs
+ * keeps BPF program valid, albeit slightly less efficient.
+ *
+ * For example:
+ *
+ * r1 = 1;
+ * r2 = 2;
+ * *(u64 *)(r10 - 8) = r1; r1 = 1;
+ * *(u64 *)(r10 - 16) = r2; r2 = 2;
+ * call %[to_be_inlined] --> call %[to_be_inlined]
+ * r2 = *(u64 *)(r10 - 16); r0 = r1;
+ * r1 = *(u64 *)(r10 - 8); r0 += r2;
+ * r0 = r1; exit;
+ * r0 += r2;
+ * exit;
+ *
+ * The purpose of mark_fastcall_pattern_for_call is to:
+ * - look for such patterns;
+ * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern;
+ * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction;
+ * - update env->subprog_info[*]->fastcall_stack_off to find an offset
+ * at which bpf_fastcall spill/fill stack slots start;
+ * - update env->subprog_info[*]->keep_fastcall_stack.
+ *
+ * The .fastcall_pattern and .fastcall_stack_off are used by
+ * check_fastcall_stack_contract() to check if every stack access to
+ * fastcall spill/fill stack slot originates from spill/fill
+ * instructions, members of fastcall patterns.
+ *
+ * If such condition holds true for a subprogram, fastcall patterns could
+ * be rewritten by remove_fastcall_spills_fills().
+ * Otherwise bpf_fastcall patterns are not changed in the subprogram
+ * (code, presumably, generated by an older clang version).
+ *
+ * For example, it is *not* safe to remove spill/fill below:
+ *
+ * r1 = 1;
+ * *(u64 *)(r10 - 8) = r1; r1 = 1;
+ * call %[to_be_inlined] --> call %[to_be_inlined]
+ * r1 = *(u64 *)(r10 - 8); r0 = *(u64 *)(r10 - 8); <---- wrong !!!
+ * r0 = *(u64 *)(r10 - 8); r0 += r1;
+ * r0 += r1; exit;
+ * exit;
+ */
+static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
+ struct bpf_subprog_info *subprog,
+ int insn_idx, s16 lowest_off)
+{
+ struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
+ struct bpf_insn *call = &env->prog->insnsi[insn_idx];
+ const struct bpf_func_proto *fn;
+ u32 clobbered_regs_mask = ALL_CALLER_SAVED_REGS;
+ u32 expected_regs_mask;
+ bool can_be_inlined = false;
+ s16 off;
+ int i;
+
+ if (bpf_helper_call(call)) {
+ if (get_helper_proto(env, call->imm, &fn) < 0)
+ /* error would be reported later */
+ return;
+ clobbered_regs_mask = helper_fastcall_clobber_mask(fn);
+ can_be_inlined = fn->allow_fastcall &&
+ (verifier_inlines_helper_call(env, call->imm) ||
+ bpf_jit_inlines_helper_call(call->imm));
+ }
+
+ if (bpf_pseudo_kfunc_call(call)) {
+ struct bpf_kfunc_call_arg_meta meta;
+ int err;
+
+ err = fetch_kfunc_meta(env, call, &meta, NULL);
+ if (err < 0)
+ /* error would be reported later */
+ return;
+
+ clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta);
+ can_be_inlined = is_fastcall_kfunc_call(&meta);
+ }
+
+ if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS)
+ return;
+
+ /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
+ expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
+
+ /* match pairs of form:
+ *
+ * *(u64 *)(r10 - Y) = rX (where Y % 8 == 0)
+ * ...
+ * call %[to_be_inlined]
+ * ...
+ * rX = *(u64 *)(r10 - Y)
+ */
+ for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) {
+ if (insn_idx - i < 0 || insn_idx + i >= env->prog->len)
+ break;
+ stx = &insns[insn_idx - i];
+ ldx = &insns[insn_idx + i];
+ /* must be a stack spill/fill pair */
+ if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) ||
+ ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) ||
+ stx->dst_reg != BPF_REG_10 ||
+ ldx->src_reg != BPF_REG_10)
+ break;
+ /* must be a spill/fill for the same reg */
+ if (stx->src_reg != ldx->dst_reg)
+ break;
+ /* must be one of the previously unseen registers */
+ if ((BIT(stx->src_reg) & expected_regs_mask) == 0)
+ break;
+ /* must be a spill/fill for the same expected offset,
+ * no need to check offset alignment, BPF_DW stack access
+ * is always 8-byte aligned.
+ */
+ if (stx->off != off || ldx->off != off)
+ break;
+ expected_regs_mask &= ~BIT(stx->src_reg);
+ env->insn_aux_data[insn_idx - i].fastcall_pattern = 1;
+ env->insn_aux_data[insn_idx + i].fastcall_pattern = 1;
+ }
+ if (i == 1)
+ return;
+
+ /* Conditionally set 'fastcall_spills_num' to allow forward
+ * compatibility when more helper functions are marked as
+ * bpf_fastcall at compile time than current kernel supports, e.g:
+ *
+ * 1: *(u64 *)(r10 - 8) = r1
+ * 2: call A ;; assume A is bpf_fastcall for current kernel
+ * 3: r1 = *(u64 *)(r10 - 8)
+ * 4: *(u64 *)(r10 - 8) = r1
+ * 5: call B ;; assume B is not bpf_fastcall for current kernel
+ * 6: r1 = *(u64 *)(r10 - 8)
+ *
+ * There is no need to block bpf_fastcall rewrite for such program.
+ * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy,
+ * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
+ * does not remove spill/fill pair {4,6}.
+ */
+ if (can_be_inlined)
+ env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
+ else
+ subprog->keep_fastcall_stack = 1;
+ subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off);
+}
+
+static int mark_fastcall_patterns(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprog = env->subprog_info;
+ struct bpf_insn *insn;
+ s16 lowest_off;
+ int s, i;
+
+ for (s = 0; s < env->subprog_cnt; ++s, ++subprog) {
+ /* find lowest stack spill offset used in this subprog */
+ lowest_off = 0;
+ for (i = subprog->start; i < (subprog + 1)->start; ++i) {
+ insn = env->prog->insnsi + i;
+ if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) ||
+ insn->dst_reg != BPF_REG_10)
+ continue;
+ lowest_off = min(lowest_off, insn->off);
+ }
+ /* use this offset to find fastcall patterns */
+ for (i = subprog->start; i < (subprog + 1)->start; ++i) {
+ insn = env->prog->insnsi + i;
+ if (insn->code != (BPF_JMP | BPF_CALL))
+ continue;
+ mark_fastcall_pattern_for_call(env, subprog, i, lowest_off);
+ }
+ }
+ return 0;
+}
+
/* Visits the instruction at index t and returns one of the following:
* < 0 - an error occurred
* DONE_EXPLORING - the instruction was fully explored
@@ -16772,7 +17282,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
*
* First verification path is [1-6]:
* - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
- * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
+ * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark
* r7 <= X, because r6 and r7 share same id.
* Next verification path is [1-4, 6].
*
@@ -17566,7 +18076,7 @@ hit:
* the current state.
*/
if (is_jmp_point(env, env->insn_idx))
- err = err ? : push_jmp_history(env, cur, 0);
+ err = err ? : push_jmp_history(env, cur, 0, 0);
err = err ? : propagate_precision(env, &sl->state);
if (err)
return err;
@@ -17834,7 +18344,7 @@ static int do_check(struct bpf_verifier_env *env)
}
if (is_jmp_point(env, env->insn_idx)) {
- err = push_jmp_history(env, state, 0);
+ err = push_jmp_history(env, state, 0, 0);
if (err)
return err;
}
@@ -18767,6 +19277,9 @@ static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta)
for (i = 0; i < insn_cnt; i++, insn++) {
u8 code = insn->code;
+ if (tgt_idx <= i && i < tgt_idx + delta)
+ continue;
+
if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) ||
BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT)
continue;
@@ -19026,9 +19539,11 @@ static int opt_remove_dead_code(struct bpf_verifier_env *env)
return 0;
}
+static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+
static int opt_remove_nops(struct bpf_verifier_env *env)
{
- const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+ const struct bpf_insn ja = NOP;
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
int i, err;
@@ -19153,14 +19668,39 @@ apply_patch_buffer:
*/
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
const struct bpf_verifier_ops *ops = env->ops;
- int i, cnt, size, ctx_field_size, delta = 0;
+ int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
const int insn_cnt = env->prog->len;
- struct bpf_insn insn_buf[16], *insn;
+ struct bpf_insn *epilogue_buf = env->epilogue_buf;
+ struct bpf_insn *insn_buf = env->insn_buf;
+ struct bpf_insn *insn;
u32 target_size, size_default, off;
struct bpf_prog *new_prog;
enum bpf_access_type type;
bool is_narrower_load;
+ int epilogue_idx = 0;
+
+ if (ops->gen_epilogue) {
+ epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
+ -(subprogs[0].stack_depth + 8));
+ if (epilogue_cnt >= INSN_BUF_SIZE) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ } else if (epilogue_cnt) {
+ /* Save the ARG_PTR_TO_CTX for the epilogue to use */
+ cnt = 0;
+ subprogs[0].stack_depth += 8;
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1,
+ -subprogs[0].stack_depth);
+ insn_buf[cnt++] = env->prog->insnsi[0];
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ delta += cnt - 1;
+ }
+ }
if (ops->gen_prologue || env->seen_direct_write) {
if (!ops->gen_prologue) {
@@ -19169,7 +19709,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
- if (cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
@@ -19182,6 +19722,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
}
+ if (delta)
+ WARN_ON(adjust_jmp_off(env->prog, 0, delta));
+
if (bpf_prog_is_offloaded(env->prog->aux))
return 0;
@@ -19214,6 +19757,25 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
env->prog->aux->num_exentries++;
continue;
+ } else if (insn->code == (BPF_JMP | BPF_EXIT) &&
+ epilogue_cnt &&
+ i + delta < subprogs[1].start) {
+ /* Generate epilogue for the main prog */
+ if (epilogue_idx) {
+ /* jump back to the earlier generated epilogue */
+ insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1);
+ cnt = 1;
+ } else {
+ memcpy(insn_buf, epilogue_buf,
+ epilogue_cnt * sizeof(*epilogue_buf));
+ cnt = epilogue_cnt;
+ /* epilogue_idx cannot be 0. It must have at
+ * least one ctx ptr saving insn before the
+ * epilogue.
+ */
+ epilogue_idx = i + delta;
+ }
+ goto patch_insn_buf;
} else {
continue;
}
@@ -19316,7 +19878,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
target_size = 0;
cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
&target_size);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
(ctx_field_size && !target_size)) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
@@ -19325,7 +19887,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (is_narrower_load && size < target_size) {
u8 shift = bpf_ctx_narrow_access_offset(
off, size, size_default) * 8;
- if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
+ if (shift && cnt + 1 >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier narrow ctx load misconfigured\n");
return -EINVAL;
}
@@ -19350,6 +19912,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->dst_reg, insn->dst_reg,
size * 8, 0);
+patch_insn_buf:
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
@@ -19870,7 +20433,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
const int insn_cnt = prog->len;
const struct bpf_map_ops *ops;
struct bpf_insn_aux_data *aux;
- struct bpf_insn insn_buf[16];
+ struct bpf_insn *insn_buf = env->insn_buf;
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
int i, ret, cnt, delta = 0, cur_subprog = 0;
@@ -19913,13 +20476,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
- /* Make divide-by-zero exceptions impossible. */
+ /* Make sdiv/smod divide-by-minus-one exceptions impossible. */
+ if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) ||
+ insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) ||
+ insn->code == (BPF_ALU | BPF_MOD | BPF_K) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_K)) &&
+ insn->off == 1 && insn->imm == -1) {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ struct bpf_insn *patchlet;
+ struct bpf_insn chk_and_sdiv[] = {
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0),
+ };
+ struct bpf_insn chk_and_smod[] = {
+ BPF_MOV32_IMM(insn->dst_reg, 0),
+ };
+
+ patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
+
+ new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ }
+
+ /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
+ bool is_sdiv = isdiv && insn->off == 1;
+ bool is_smod = !isdiv && insn->off == 1;
struct bpf_insn *patchlet;
struct bpf_insn chk_and_div[] = {
/* [R,W]x div 0 -> 0 */
@@ -19939,10 +20535,62 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
};
+ struct bpf_insn chk_and_sdiv[] = {
+ /* [R,W]x sdiv 0 -> 0
+ * LLONG_MIN sdiv -1 -> LLONG_MIN
+ * INT_MIN sdiv -1 -> INT_MIN
+ */
+ BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 4, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 1, 0),
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_MOV | BPF_K, insn->dst_reg,
+ 0, 0, 0),
+ /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ *insn,
+ };
+ struct bpf_insn chk_and_smod[] = {
+ /* [R,W]x mod 0 -> [R,W]x */
+ /* [R,W]x mod -1 -> 0 */
+ BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
+ BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 3, 1),
+ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 3 + (is64 ? 0 : 1), 1),
+ BPF_MOV32_IMM(insn->dst_reg, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ *insn,
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
+ };
- patchlet = isdiv ? chk_and_div : chk_and_mod;
- cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
- ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+ if (is_sdiv) {
+ patchlet = chk_and_sdiv;
+ cnt = ARRAY_SIZE(chk_and_sdiv);
+ } else if (is_smod) {
+ patchlet = chk_and_smod;
+ cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
+ } else {
+ patchlet = isdiv ? chk_and_div : chk_and_mod;
+ cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
+ ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+ }
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
@@ -19989,7 +20637,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
(BPF_MODE(insn->code) == BPF_ABS ||
BPF_MODE(insn->code) == BPF_IND)) {
cnt = env->ops->gen_ld_abs(insn, insn_buf);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
@@ -20282,7 +20930,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
if (cnt == -EOPNOTSUPP)
goto patch_map_ops_generic;
- if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
@@ -20384,7 +21032,7 @@ patch_map_ops_generic:
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
/* Implement bpf_get_smp_processor_id() inline. */
if (insn->imm == BPF_FUNC_get_smp_processor_id &&
- prog->jit_requested && bpf_jit_supports_percpu_insn()) {
+ verifier_inlines_helper_call(env, insn->imm)) {
/* BPF_FUNC_get_smp_processor_id inlining is an
* optimization, so if pcpu_hot.cpu_number is ever
* changed in some incompatible and hard to support
@@ -20642,7 +21290,7 @@ static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
int position,
s32 stack_base,
u32 callback_subprogno,
- u32 *cnt)
+ u32 *total_cnt)
{
s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
@@ -20651,55 +21299,56 @@ static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
int reg_loop_cnt = BPF_REG_7;
int reg_loop_ctx = BPF_REG_8;
+ struct bpf_insn *insn_buf = env->insn_buf;
struct bpf_prog *new_prog;
u32 callback_start;
u32 call_insn_offset;
s32 callback_offset;
+ u32 cnt = 0;
/* This represents an inlined version of bpf_iter.c:bpf_loop,
* be careful to modify this code in sync.
*/
- struct bpf_insn insn_buf[] = {
- /* Return error and jump to the end of the patch if
- * expected number of iterations is too big.
- */
- BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
- BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
- BPF_JMP_IMM(BPF_JA, 0, 0, 16),
- /* spill R6, R7, R8 to use these as loop vars */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
- /* initialize loop vars */
- BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
- BPF_MOV32_IMM(reg_loop_cnt, 0),
- BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
- /* loop header,
- * if reg_loop_cnt >= reg_loop_max skip the loop body
- */
- BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
- /* callback call,
- * correct callback offset would be set after patching
- */
- BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
- BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
- BPF_CALL_REL(0),
- /* increment loop counter */
- BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
- /* jump to loop header if callback returned 0 */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
- /* return value of bpf_loop,
- * set R0 to the number of iterations
- */
- BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
- /* restore original values of R6, R7, R8 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
- BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
- };
- *cnt = ARRAY_SIZE(insn_buf);
- new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+ /* Return error and jump to the end of the patch if
+ * expected number of iterations is too big.
+ */
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
+ insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16);
+ /* spill R6, R7, R8 to use these as loop vars */
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
+ insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
+ /* initialize loop vars */
+ insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
+ insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0);
+ insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
+ /* loop header,
+ * if reg_loop_cnt >= reg_loop_max skip the loop body
+ */
+ insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5);
+ /* callback call,
+ * correct callback offset would be set after patching
+ */
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
+ insn_buf[cnt++] = BPF_CALL_REL(0);
+ /* increment loop counter */
+ insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
+ /* jump to loop header if callback returned 0 */
+ insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6);
+ /* return value of bpf_loop,
+ * set R0 to the number of iterations
+ */
+ insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
+ /* restore original values of R6, R7, R8 */
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
+ insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
+
+ *total_cnt = cnt;
+ new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt);
if (!new_prog)
return new_prog;
@@ -20774,6 +21423,40 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env)
return 0;
}
+/* Remove unnecessary spill/fill pairs, members of fastcall pattern,
+ * adjust subprograms stack depth when possible.
+ */
+static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprog = env->subprog_info;
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ u32 spills_num;
+ bool modified = false;
+ int i, j;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (aux[i].fastcall_spills_num > 0) {
+ spills_num = aux[i].fastcall_spills_num;
+ /* NOPs would be removed by opt_remove_nops() */
+ for (j = 1; j <= spills_num; ++j) {
+ *(insn - j) = NOP;
+ *(insn + j) = NOP;
+ }
+ modified = true;
+ }
+ if ((subprog + 1)->start == i + 1) {
+ if (modified && !subprog->keep_fastcall_stack)
+ subprog->stack_depth = -subprog->fastcall_stack_off;
+ subprog++;
+ modified = false;
+ }
+ }
+
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
@@ -21047,6 +21730,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
u32 btf_id, member_idx;
struct btf *btf;
const char *mname;
+ int err;
if (!prog->gpl_compatible) {
verbose(env, "struct ops programs must have a GPL compatible license\n");
@@ -21094,8 +21778,15 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
+ err = bpf_struct_ops_supported(st_ops, __btf_member_bit_offset(t, member) / 8);
+ if (err) {
+ verbose(env, "attach to unsupported member %s of struct %s\n",
+ mname, st_ops->name);
+ return err;
+ }
+
if (st_ops->check_member) {
- int err = st_ops->check_member(t, member, prog);
+ err = st_ops->check_member(t, member, prog);
if (err) {
verbose(env, "attach to unsupported member %s of struct %s\n",
@@ -21706,6 +22397,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret < 0)
goto skip_full_check;
+ ret = mark_fastcall_patterns(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = do_check_main(env);
ret = ret ?: do_check_subprogs(env);
@@ -21715,6 +22410,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
skip_full_check:
kvfree(env->explored_states);
+ /* might decrease stack depth, keep it before passes that
+ * allocate additional slots.
+ */
+ if (ret == 0)
+ ret = remove_fastcall_spills_fills(env);
+
if (ret == 0)
ret = check_max_stack_depth(env);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4f03eb908e7f..0016f5ff9ba2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8964,7 +8964,7 @@ got_name:
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
if (atomic_read(&nr_build_id_events))
- build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
+ build_id_parse_nofault(vma, mmap_event->build_id, &mmap_event->build_id_size);
perf_iterate_sb(perf_event_mmap_output,
mmap_event,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ac0a01cc8634..a582cd25ca87 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -24,7 +24,6 @@
#include <linux/key.h>
#include <linux/verification.h>
#include <linux/namei.h>
-#include <linux/fileattr.h>
#include <net/bpf_sk_storage.h>
@@ -798,29 +797,6 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = {
.ret_btf_id = &bpf_task_pt_regs_ids[0],
};
-BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
-{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
- struct cgroup *cgrp;
-
- if (unlikely(idx >= array->map.max_entries))
- return -E2BIG;
-
- cgrp = READ_ONCE(array->ptrs[idx]);
- if (unlikely(!cgrp))
- return -EAGAIN;
-
- return task_under_cgroup_hierarchy(current, cgrp);
-}
-
-static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
- .func = bpf_current_task_under_cgroup,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_ANYTHING,
-};
-
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
@@ -1226,7 +1202,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_LONG,
+ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg3_size = sizeof(u64),
};
BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
@@ -1242,7 +1219,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = {
.func = get_func_ret,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_LONG,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
+ .arg2_size = sizeof(u64),
};
BPF_CALL_1(get_func_arg_cnt, void *, ctx)
@@ -1439,73 +1417,6 @@ static int __init bpf_key_sig_kfuncs_init(void)
late_initcall(bpf_key_sig_kfuncs_init);
#endif /* CONFIG_KEYS */
-/* filesystem kfuncs */
-__bpf_kfunc_start_defs();
-
-/**
- * bpf_get_file_xattr - get xattr of a file
- * @file: file to get xattr from
- * @name__str: name of the xattr
- * @value_p: output buffer of the xattr value
- *
- * Get xattr *name__str* of *file* and store the output in *value_ptr*.
- *
- * For security reasons, only *name__str* with prefix "user." is allowed.
- *
- * Return: 0 on success, a negative value on error.
- */
-__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
- struct bpf_dynptr *value_p)
-{
- struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
- struct dentry *dentry;
- u32 value_len;
- void *value;
- int ret;
-
- if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- return -EPERM;
-
- value_len = __bpf_dynptr_size(value_ptr);
- value = __bpf_dynptr_data_rw(value_ptr, value_len);
- if (!value)
- return -EINVAL;
-
- dentry = file_dentry(file);
- ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ);
- if (ret)
- return ret;
- return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len);
-}
-
-__bpf_kfunc_end_defs();
-
-BTF_KFUNCS_START(fs_kfunc_set_ids)
-BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_KFUNCS_END(fs_kfunc_set_ids)
-
-static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id)
-{
- if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id))
- return 0;
-
- /* Only allow to attach from LSM hooks, to avoid recursion */
- return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0;
-}
-
-static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &fs_kfunc_set_ids,
- .filter = bpf_get_file_xattr_filter,
-};
-
-static int __init bpf_fs_kfuncs_init(void)
-{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
-}
-
-late_initcall(bpf_fs_kfuncs_init);
-
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1548,8 +1459,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_numa_node_id_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_write_user:
@@ -1578,6 +1487,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
return &bpf_cgrp_storage_delete_proto;
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
#endif
case BPF_FUNC_send_signal:
return &bpf_send_signal_proto;
@@ -1598,7 +1509,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
case BPF_FUNC_get_task_stack:
- return &bpf_get_task_stack_proto;
+ return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
+ : &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return &bpf_copy_from_user_proto;
case BPF_FUNC_copy_from_user_task:
@@ -1654,7 +1566,7 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
case BPF_FUNC_get_stack:
- return &bpf_get_stack_proto;
+ return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
@@ -3299,7 +3211,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
struct bpf_run_ctx *old_run_ctx;
int err = 0;
- if (link->task && current->mm != link->task->mm)
+ if (link->task && !same_thread_group(current, link->task))
return 0;
if (sleepable)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 9c581d6da843..785733245ead 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -564,6 +564,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re
BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *));
/* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
+ perf_fetch_caller_regs(regs);
*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
for (i = 0; i < sys_data->nb_args; i++)
@@ -575,6 +576,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
+ struct pt_regs *fake_regs;
struct hlist_head *head;
unsigned long args[6];
bool valid_prog_array;
@@ -602,7 +604,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
size = ALIGN(size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- rec = perf_trace_buf_alloc(size, NULL, &rctx);
+ rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
if (!rec)
return;
@@ -611,7 +613,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
- !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
+ !perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||
hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
@@ -666,6 +668,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg
} __aligned(8) param;
/* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. &param) */
+ perf_fetch_caller_regs(regs);
*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
param.ret = rec->ret;
@@ -676,6 +679,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
+ struct pt_regs *fake_regs;
struct hlist_head *head;
bool valid_prog_array;
int syscall_nr;
@@ -701,7 +705,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- rec = perf_trace_buf_alloc(size, NULL, &rctx);
+ rec = perf_trace_buf_alloc(size, &fake_regs, &rctx);
if (!rec)
return;
@@ -709,7 +713,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->ret = syscall_get_return_value(current, regs);
if ((valid_prog_array &&
- !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) ||
+ !perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) ||
hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;