summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-11-24 11:05:43 +1000
committerDave Airlie <airlied@redhat.com>2022-11-24 11:05:43 +1000
commitd47f9580839eb6fe568e38b2084d94887fbf5ce0 (patch)
tree2493555057f8e5c850590d7438fdbf4472666201 /kernel
parent3d335a523b938a445a674be24d1dd5c7a4c86fb6 (diff)
parenteb7081409f94a9a8608593d0fb63a1aa3d6f95d8 (diff)
Backmerge tag 'v6.1-rc6' into drm-next
Linux 6.1-rc6 This is needed for drm-misc-next and tegra. Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c5
-rw-r--r--kernel/bpf/dispatcher.c22
-rw-r--r--kernel/bpf/memalloc.c18
-rw-r--r--kernel/bpf/percpu_freelist.c23
-rw-r--r--kernel/bpf/verifier.c32
-rw-r--r--kernel/events/core.c26
-rw-r--r--kernel/events/hw_breakpoint_test.c4
-rw-r--r--kernel/kprobes.c13
-rw-r--r--kernel/power/hibernate.c2
-rw-r--r--kernel/rcu/tree.c10
-rw-r--r--kernel/rseq.c19
-rw-r--r--kernel/sched/core.c52
-rw-r--r--kernel/trace/fprobe.c5
-rw-r--r--kernel/trace/ftrace.c21
-rw-r--r--kernel/trace/kprobe_event_gen_test.c66
-rw-r--r--kernel/trace/rethook.c4
-rw-r--r--kernel/trace/ring_buffer.c82
-rw-r--r--kernel/trace/synth_event_gen_test.c16
-rw-r--r--kernel/trace/trace.c12
-rw-r--r--kernel/trace/trace_eprobe.c8
-rw-r--r--kernel/trace/trace_events_synth.c5
-rw-r--r--kernel/trace/trace_syscalls.c2
22 files changed, 289 insertions, 158 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index eba603cec2c5..35c07afac924 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4436,6 +4436,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
return -EINVAL;
}
+ if (btf_type_is_resolve_source_only(ret_type)) {
+ btf_verifier_log_type(env, t, "Invalid return type");
+ return -EINVAL;
+ }
+
if (btf_type_needs_resolve(ret_type) &&
!env_type_is_resolved(env, ret_type_id)) {
err = btf_resolve(env, ret_type, ret_type_id);
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index fa64b80b8bca..c19719f48ce0 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -4,6 +4,7 @@
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/static_call.h>
/* The BPF dispatcher is a multiway branch code generator. The
* dispatcher is a mechanism to avoid the performance penalty of an
@@ -104,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *b
static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
{
- void *old, *new, *tmp;
- u32 noff;
- int err;
-
- if (!prev_num_progs) {
- old = NULL;
- noff = 0;
- } else {
- old = d->image + d->image_off;
+ void *new, *tmp;
+ u32 noff = 0;
+
+ if (prev_num_progs)
noff = d->image_off ^ (PAGE_SIZE / 2);
- }
new = d->num_progs ? d->image + noff : NULL;
tmp = d->num_progs ? d->rw_image + noff : NULL;
@@ -128,11 +123,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
return;
}
- err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new);
- if (err || !new)
- return;
+ __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func);
- d->image_off = noff;
+ if (new)
+ d->image_off = noff;
}
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 5f83be1d2018..4901fa1048cd 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -418,14 +418,17 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
/* No progs are using this bpf_mem_cache, but htab_map_free() called
* bpf_mem_cache_free() for all remaining elements and they can be in
* free_by_rcu or in waiting_for_gp lists, so drain those lists now.
+ *
+ * Except for waiting_for_gp list, there are no concurrent operations
+ * on these lists, so it is safe to use __llist_del_all().
*/
llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu))
free_one(c, llnode);
llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp))
free_one(c, llnode);
- llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist))
+ llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist))
free_one(c, llnode);
- llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra))
+ llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra))
free_one(c, llnode);
}
@@ -493,6 +496,16 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
rcu_in_progress = 0;
for_each_possible_cpu(cpu) {
c = per_cpu_ptr(ma->cache, cpu);
+ /*
+ * refill_work may be unfinished for PREEMPT_RT kernel
+ * in which irq work is invoked in a per-CPU RT thread.
+ * It is also possible for kernel with
+ * arch_irq_work_has_interrupt() being false and irq
+ * work is invoked in timer interrupt. So waiting for
+ * the completion of irq work to ease the handling of
+ * concurrency.
+ */
+ irq_work_sync(&c->refill_work);
drain_mem_cache(c);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
@@ -507,6 +520,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
cc = per_cpu_ptr(ma->caches, cpu);
for (i = 0; i < NUM_CACHES; i++) {
c = &cc->cache[i];
+ irq_work_sync(&c->refill_work);
drain_mem_cache(c);
rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
}
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index b6e7f5c5b9ab..034cf87b54e9 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -100,22 +100,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
u32 nr_elems)
{
struct pcpu_freelist_head *head;
- int i, cpu, pcpu_entries;
+ unsigned int cpu, cpu_idx, i, j, n, m;
- pcpu_entries = nr_elems / num_possible_cpus() + 1;
- i = 0;
+ n = nr_elems / num_possible_cpus();
+ m = nr_elems % num_possible_cpus();
+ cpu_idx = 0;
for_each_possible_cpu(cpu) {
-again:
head = per_cpu_ptr(s->freelist, cpu);
- /* No locking required as this is not visible yet. */
- pcpu_freelist_push_node(head, buf);
- i++;
- buf += elem_size;
- if (i == nr_elems)
- break;
- if (i % pcpu_entries)
- goto again;
+ j = n + (cpu_idx < m ? 1 : 0);
+ for (i = 0; i < j; i++) {
+ /* No locking required as this is not visible yet. */
+ pcpu_freelist_push_node(head, buf);
+ buf += elem_size;
+ }
+ cpu_idx++;
}
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 014ee0953dbd..264b3dc714cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1027,12 +1027,17 @@ out:
*/
static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
{
+ void *new_arr;
+
if (!new_n || old_n == new_n)
goto out;
- arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
- if (!arr)
+ new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
+ if (!new_arr) {
+ kfree(arr);
return NULL;
+ }
+ arr = new_arr;
if (new_n > old_n)
memset(arr + old_n * size, 0, (new_n - old_n) * size);
@@ -6618,8 +6623,12 @@ static int release_reference(struct bpf_verifier_env *env,
return err;
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg->ref_obj_id == ref_obj_id)
- __mark_reg_unknown(env, reg);
+ if (reg->ref_obj_id == ref_obj_id) {
+ if (!env->allow_ptr_leaks)
+ __mark_reg_not_init(env, reg);
+ else
+ __mark_reg_unknown(env, reg);
+ }
}));
return 0;
@@ -6736,11 +6745,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* Transfer references to the callee */
err = copy_reference_state(callee, caller);
if (err)
- return err;
+ goto err_out;
err = set_callee_state_cb(env, caller, callee, *insn_idx);
if (err)
- return err;
+ goto err_out;
clear_caller_saved_regs(env, caller->regs);
@@ -6757,6 +6766,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
print_verifier_state(env, callee, true);
}
return 0;
+
+err_out:
+ free_func_state(callee);
+ state->frame[state->curframe + 1] = NULL;
+ return err;
}
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
@@ -6946,6 +6960,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
+ callee->callback_ret_range = tnum_range(0, 1);
return 0;
}
@@ -6969,8 +6984,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return -EINVAL;
}
- state->curframe--;
- caller = state->frame[state->curframe];
+ caller = state->frame[state->curframe - 1];
if (callee->in_callback_fn) {
/* enforce R0 return value range [0, 1]. */
struct tnum range = callee->callback_ret_range;
@@ -7009,7 +7023,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
}
/* clear everything in the callee */
free_func_state(callee);
- state->frame[state->curframe + 1] = NULL;
+ state->frame[state->curframe--] = NULL;
return 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 01933db7629c..884871427a94 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9306,14 +9306,27 @@ static int __perf_event_overflow(struct perf_event *event,
}
if (event->attr.sigtrap) {
- /*
- * Should not be able to return to user space without processing
- * pending_sigtrap (kernel events can overflow multiple times).
- */
- WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel);
+ unsigned int pending_id = 1;
+
+ if (regs)
+ pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
if (!event->pending_sigtrap) {
- event->pending_sigtrap = 1;
+ event->pending_sigtrap = pending_id;
local_inc(&event->ctx->nr_pending);
+ } else if (event->attr.exclude_kernel) {
+ /*
+ * Should not be able to return to user space without
+ * consuming pending_sigtrap; with exceptions:
+ *
+ * 1. Where !exclude_kernel, events can overflow again
+ * in the kernel without returning to user space.
+ *
+ * 2. Events that can overflow again before the IRQ-
+ * work without user space progress (e.g. hrtimer).
+ * To approximate progress (with false negatives),
+ * check 32-bit hash of the current IP.
+ */
+ WARN_ON_ONCE(event->pending_sigtrap != pending_id);
}
event->pending_addr = data->addr;
irq_work_queue(&event->pending_irq);
@@ -9846,6 +9859,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
perf_sample_data_init(&data, 0, 0);
data.raw = &raw;
+ data.sample_flags |= PERF_SAMPLE_RAW;
perf_trace_buf_update(record, event_type);
diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c
index 5ced822df788..c57610f52bb4 100644
--- a/kernel/events/hw_breakpoint_test.c
+++ b/kernel/events/hw_breakpoint_test.c
@@ -295,11 +295,11 @@ static int test_init(struct kunit *test)
{
/* Most test cases want 2 distinct CPUs. */
if (num_online_cpus() < 2)
- return -EINVAL;
+ kunit_skip(test, "not enough cpus");
/* Want the system to not use breakpoints elsewhere. */
if (hw_breakpoint_is_used())
- return -EBUSY;
+ kunit_skip(test, "hw breakpoint already in use");
return 0;
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3220b0a2fb4a..3050631e528d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1766,7 +1766,13 @@ static int __unregister_kprobe_top(struct kprobe *p)
if ((list_p != p) && (list_p->post_handler))
goto noclean;
}
- ap->post_handler = NULL;
+ /*
+ * For the kprobe-on-ftrace case, we keep the
+ * post_handler setting to identify this aggrprobe
+ * armed with kprobe_ipmodify_ops.
+ */
+ if (!kprobe_ftrace(ap))
+ ap->post_handler = NULL;
}
noclean:
/*
@@ -2429,8 +2435,11 @@ int enable_kprobe(struct kprobe *kp)
if (!kprobes_all_disarmed && kprobe_disabled(p)) {
p->flags &= ~KPROBE_FLAG_DISABLED;
ret = arm_kprobe(p);
- if (ret)
+ if (ret) {
p->flags |= KPROBE_FLAG_DISABLED;
+ if (p != kp)
+ kp->flags |= KPROBE_FLAG_DISABLED;
+ }
}
out:
mutex_unlock(&kprobe_mutex);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index f58a0aa92310..793c55a2becb 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -645,7 +645,7 @@ static void power_down(void)
int error;
if (hibernation_mode == HIBERNATION_SUSPEND) {
- error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+ error = suspend_devices_and_enter(mem_sleep_current);
if (error) {
hibernation_mode = hibernation_ops ?
HIBERNATION_PLATFORM :
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6bb8e72bc815..93416afebd59 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1403,30 +1403,32 @@ static void rcu_poll_gp_seq_end(unsigned long *snap)
// where caller does not hold the root rcu_node structure's lock.
static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
{
+ unsigned long flags;
struct rcu_node *rnp = rcu_get_root();
if (rcu_init_invoked()) {
lockdep_assert_irqs_enabled();
- raw_spin_lock_irq_rcu_node(rnp);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
rcu_poll_gp_seq_start(snap);
if (rcu_init_invoked())
- raw_spin_unlock_irq_rcu_node(rnp);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
// Make the polled API aware of the end of a grace period, but where
// caller does not hold the root rcu_node structure's lock.
static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
{
+ unsigned long flags;
struct rcu_node *rnp = rcu_get_root();
if (rcu_init_invoked()) {
lockdep_assert_irqs_enabled();
- raw_spin_lock_irq_rcu_node(rnp);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
}
rcu_poll_gp_seq_end(snap);
if (rcu_init_invoked())
- raw_spin_unlock_irq_rcu_node(rnp);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/*
diff --git a/kernel/rseq.c b/kernel/rseq.c
index bda8175f8f99..d38ab944105d 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -171,12 +171,27 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
return 0;
}
+static bool rseq_warn_flags(const char *str, u32 flags)
+{
+ u32 test_flags;
+
+ if (!flags)
+ return false;
+ test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
+ if (test_flags)
+ pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
+ test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
+ if (test_flags)
+ pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
+ return true;
+}
+
static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
{
u32 flags, event_mask;
int ret;
- if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags)
+ if (rseq_warn_flags("rseq_cs", cs_flags))
return -EINVAL;
/* Get thread flags. */
@@ -184,7 +199,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
if (ret)
return ret;
- if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags)
+ if (rseq_warn_flags("rseq", flags))
return -EINVAL;
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cb2aa2b54c7a..daff72f00385 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4200,6 +4200,40 @@ out:
return success;
}
+static bool __task_needs_rq_lock(struct task_struct *p)
+{
+ unsigned int state = READ_ONCE(p->__state);
+
+ /*
+ * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
+ * the task is blocked. Make sure to check @state since ttwu() can drop
+ * locks at the end, see ttwu_queue_wakelist().
+ */
+ if (state == TASK_RUNNING || state == TASK_WAKING)
+ return true;
+
+ /*
+ * Ensure we load p->on_rq after p->__state, otherwise it would be
+ * possible to, falsely, observe p->on_rq == 0.
+ *
+ * See try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+ if (p->on_rq)
+ return true;
+
+#ifdef CONFIG_SMP
+ /*
+ * Ensure the task has finished __schedule() and will not be referenced
+ * anymore. Again, see try_to_wake_up() for a longer comment.
+ */
+ smp_rmb();
+ smp_cond_load_acquire(&p->on_cpu, !VAL);
+#endif
+
+ return false;
+}
+
/**
* task_call_func - Invoke a function on task in fixed state
* @p: Process for which the function is to be invoked, can be @current.
@@ -4217,28 +4251,12 @@ out:
int task_call_func(struct task_struct *p, task_call_f func, void *arg)
{
struct rq *rq = NULL;
- unsigned int state;
struct rq_flags rf;
int ret;
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
- state = READ_ONCE(p->__state);
-
- /*
- * Ensure we load p->on_rq after p->__state, otherwise it would be
- * possible to, falsely, observe p->on_rq == 0.
- *
- * See try_to_wake_up() for a longer comment.
- */
- smp_rmb();
-
- /*
- * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
- * the task is blocked. Make sure to check @state since ttwu() can drop
- * locks at the end, see ttwu_queue_wakelist().
- */
- if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq)
+ if (__task_needs_rq_lock(p))
rq = __task_rq_lock(p, &rf);
/*
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index aac63ca9c3d1..e8143e368074 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -141,6 +141,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
return -E2BIG;
fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
+ if (!fp->rethook)
+ return -ENOMEM;
for (i = 0; i < size; i++) {
struct fprobe_rethook_node *node;
@@ -301,7 +303,8 @@ int unregister_fprobe(struct fprobe *fp)
{
int ret;
- if (!fp || fp->ops.func != fprobe_handler)
+ if (!fp || (fp->ops.saved_func != fprobe_handler &&
+ fp->ops.saved_func != fprobe_kprobe_handler))
return -EINVAL;
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fbf2543111c0..33236241f236 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1289,6 +1289,7 @@ static int ftrace_add_mod(struct trace_array *tr,
if (!ftrace_mod)
return -ENOMEM;
+ INIT_LIST_HEAD(&ftrace_mod->list);
ftrace_mod->func = kstrdup(func, GFP_KERNEL);
ftrace_mod->module = kstrdup(module, GFP_KERNEL);
ftrace_mod->enable = enable;
@@ -3028,18 +3029,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
command |= FTRACE_UPDATE_TRACE_FUNC;
}
- if (!command || !ftrace_enabled) {
- /*
- * If these are dynamic or per_cpu ops, they still
- * need their data freed. Since, function tracing is
- * not currently active, we can just free them
- * without synchronizing all CPUs.
- */
- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- goto free_ops;
-
- return 0;
- }
+ if (!command || !ftrace_enabled)
+ goto out;
/*
* If the ops uses a trampoline, then it needs to be
@@ -3076,6 +3067,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
removed_ops = NULL;
ops->flags &= ~FTRACE_OPS_FL_REMOVING;
+out:
/*
* Dynamic ops may be freed, we must make sure that all
* callers are done before leaving this function.
@@ -3103,7 +3095,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
if (IS_ENABLED(CONFIG_PREEMPTION))
synchronize_rcu_tasks();
- free_ops:
ftrace_trampoline_free(ops);
}
@@ -3200,7 +3191,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
/* if we can't allocate this size, try something smaller */
if (!order)
return -ENOMEM;
- order >>= 1;
+ order--;
goto again;
}
@@ -7401,7 +7392,7 @@ void __init ftrace_init(void)
}
pr_info("ftrace: allocating %ld entries in %ld pages\n",
- count, count / ENTRIES_PER_PAGE + 1);
+ count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
ret = ftrace_process_locs(NULL,
__start_mcount_loc,
diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index 80e04a1e1977..c736487fc0e4 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -73,6 +73,10 @@ static struct trace_event_file *gen_kretprobe_test;
#define KPROBE_GEN_TEST_ARG3 NULL
#endif
+static bool trace_event_file_is_valid(struct trace_event_file *input)
+{
+ return input && !IS_ERR(input);
+}
/*
* Test to make sure we can create a kprobe event, then add more
@@ -100,20 +104,20 @@ static int __init test_gen_kprobe_cmd(void)
KPROBE_GEN_TEST_FUNC,
KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
if (ret)
- goto free;
+ goto out;
/* Use kprobe_event_add_fields to add the rest of the fields */
ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
if (ret)
- goto free;
+ goto out;
/*
* This actually creates the event.
*/
ret = kprobe_event_gen_cmd_end(&cmd);
if (ret)
- goto free;
+ goto out;
/*
* Now get the gen_kprobe_test event file. We need to prevent
@@ -136,13 +140,13 @@ static int __init test_gen_kprobe_cmd(void)
goto delete;
}
out:
+ kfree(buf);
return ret;
delete:
+ if (trace_event_file_is_valid(gen_kprobe_test))
+ gen_kprobe_test = NULL;
/* We got an error after creating the event, delete it */
ret = kprobe_event_delete("gen_kprobe_test");
- free:
- kfree(buf);
-
goto out;
}
@@ -170,14 +174,14 @@ static int __init test_gen_kretprobe_cmd(void)
KPROBE_GEN_TEST_FUNC,
"$retval");
if (ret)
- goto free;
+ goto out;
/*
* This actually creates the event.
*/
ret = kretprobe_event_gen_cmd_end(&cmd);
if (ret)
- goto free;
+ goto out;
/*
* Now get the gen_kretprobe_test event file. We need to
@@ -201,13 +205,13 @@ static int __init test_gen_kretprobe_cmd(void)
goto delete;
}
out:
+ kfree(buf);
return ret;
delete:
+ if (trace_event_file_is_valid(gen_kretprobe_test))
+ gen_kretprobe_test = NULL;
/* We got an error after creating the event, delete it */
ret = kprobe_event_delete("gen_kretprobe_test");
- free:
- kfree(buf);
-
goto out;
}
@@ -221,10 +225,12 @@ static int __init kprobe_event_gen_test_init(void)
ret = test_gen_kretprobe_cmd();
if (ret) {
- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
- "kprobes",
- "gen_kretprobe_test", false));
- trace_put_event_file(gen_kretprobe_test);
+ if (trace_event_file_is_valid(gen_kretprobe_test)) {
+ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
+ "kprobes",
+ "gen_kretprobe_test", false));
+ trace_put_event_file(gen_kretprobe_test);
+ }
WARN_ON(kprobe_event_delete("gen_kretprobe_test"));
}
@@ -233,24 +239,30 @@ static int __init kprobe_event_gen_test_init(void)
static void __exit kprobe_event_gen_test_exit(void)
{
- /* Disable the event or you can't remove it */
- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
- "kprobes",
- "gen_kprobe_test", false));
+ if (trace_event_file_is_valid(gen_kprobe_test)) {
+ /* Disable the event or you can't remove it */
+ WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
+ "kprobes",
+ "gen_kprobe_test", false));
+
+ /* Now give the file and instance back */
+ trace_put_event_file(gen_kprobe_test);
+ }
- /* Now give the file and instance back */
- trace_put_event_file(gen_kprobe_test);
/* Now unregister and free the event */
WARN_ON(kprobe_event_delete("gen_kprobe_test"));
- /* Disable the event or you can't remove it */
- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
- "kprobes",
- "gen_kretprobe_test", false));
+ if (trace_event_file_is_valid(gen_kretprobe_test)) {
+ /* Disable the event or you can't remove it */
+ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
+ "kprobes",
+ "gen_kretprobe_test", false));
+
+ /* Now give the file and instance back */
+ trace_put_event_file(gen_kretprobe_test);
+ }
- /* Now give the file and instance back */
- trace_put_event_file(gen_kretprobe_test);
/* Now unregister and free the event */
WARN_ON(kprobe_event_delete("gen_kretprobe_test"));
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index c69d82273ce7..32c3dfdb4d6a 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -83,8 +83,10 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler)
{
struct rethook *rh = kzalloc(sizeof(struct rethook), GFP_KERNEL);
- if (!rh || !handler)
+ if (!rh || !handler) {
+ kfree(rh);
return NULL;
+ }
rh->data = data;
rh->handler = handler;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 199759c73519..b21bf14bae9b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -519,6 +519,7 @@ struct ring_buffer_per_cpu {
local_t committing;
local_t commits;
local_t pages_touched;
+ local_t pages_lost;
local_t pages_read;
long last_pages_touch;
size_t shortest_full;
@@ -894,10 +895,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
{
size_t read;
+ size_t lost;
size_t cnt;
read = local_read(&buffer->buffers[cpu]->pages_read);
+ lost = local_read(&buffer->buffers[cpu]->pages_lost);
cnt = local_read(&buffer->buffers[cpu]->pages_touched);
+
+ if (WARN_ON_ONCE(cnt < lost))
+ return 0;
+
+ cnt -= lost;
+
/* The reader can read an empty page, but not more than that */
if (cnt < read) {
WARN_ON_ONCE(read > cnt + 1);
@@ -907,6 +916,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
return cnt - read;
}
+static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ size_t nr_pages;
+ size_t dirty;
+
+ nr_pages = cpu_buffer->nr_pages;
+ if (!nr_pages || !full)
+ return true;
+
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+
+ return (dirty * 100) > (full * nr_pages);
+}
+
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
@@ -937,6 +961,9 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *rbwork;
+ if (!buffer)
+ return;
+
if (cpu == RING_BUFFER_ALL_CPUS) {
/* Wake up individual ones too. One level recursion */
@@ -945,7 +972,15 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
rbwork = &buffer->irq_work;
} else {
+ if (WARN_ON_ONCE(!buffer->buffers))
+ return;
+ if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
+ return;
+
cpu_buffer = buffer->buffers[cpu];
+ /* The CPU buffer may not have been initialized yet */
+ if (!cpu_buffer)
+ return;
rbwork = &cpu_buffer->irq_work;
}
@@ -1035,22 +1070,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
!ring_buffer_empty_cpu(buffer, cpu)) {
unsigned long flags;
bool pagebusy;
- size_t nr_pages;
- size_t dirty;
+ bool done;
if (!full)
break;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- nr_pages = cpu_buffer->nr_pages;
- dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+ done = !pagebusy && full_hit(buffer, cpu, full);
+
if (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (!pagebusy &&
- (!nr_pages || (dirty * 100) > full * nr_pages))
+ if (done)
break;
}
@@ -1076,6 +1109,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* @cpu: the cpu buffer to wait on
* @filp: the file descriptor
* @poll_table: The poll descriptor
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
@@ -1085,14 +1119,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* zero otherwise.
*/
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
- struct file *filp, poll_table *poll_table)
+ struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
- if (cpu == RING_BUFFER_ALL_CPUS)
+ if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
- else {
+ full = 0;
+ } else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -EINVAL;
@@ -1100,8 +1135,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
work = &cpu_buffer->irq_work;
}
- poll_wait(filp, &work->waiters, poll_table);
- work->waiters_pending = true;
+ if (full) {
+ poll_wait(filp, &work->full_waiters, poll_table);
+ work->full_waiters_pending = true;
+ } else {
+ poll_wait(filp, &work->waiters, poll_table);
+ work->waiters_pending = true;
+ }
+
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -1117,6 +1158,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
*/
smp_mb();
+ if (full)
+ return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
+
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
@@ -1758,9 +1802,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
free_buffer_page(cpu_buffer->reader_page);
- rb_head_page_deactivate(cpu_buffer);
-
if (head) {
+ rb_head_page_deactivate(cpu_buffer);
+
list_for_each_entry_safe(bpage, tmp, head, list) {
list_del_init(&bpage->list);
free_buffer_page(bpage);
@@ -1996,6 +2040,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
*/
local_add(page_entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_inc(&cpu_buffer->pages_lost);
}
/*
@@ -2480,6 +2525,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
*/
local_add(entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_inc(&cpu_buffer->pages_lost);
/*
* The entries will be zeroed out when we move the
@@ -3144,10 +3190,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
static __always_inline void
rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
- size_t nr_pages;
- size_t dirty;
- size_t full;
-
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
@@ -3171,10 +3213,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
- full = cpu_buffer->shortest_full;
- nr_pages = cpu_buffer->nr_pages;
- dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
- if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+ if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full))
return;
cpu_buffer->irq_work.wakeup_full = true;
@@ -5237,6 +5276,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
local_set(&cpu_buffer->pages_touched, 0);
+ local_set(&cpu_buffer->pages_lost, 0);
local_set(&cpu_buffer->pages_read, 0);
cpu_buffer->last_pages_touch = 0;
cpu_buffer->shortest_full = 0;
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 0b15e975d2c2..8d77526892f4 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void)
/* Now generate a gen_synth_test event */
ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals));
- out:
+ free:
+ kfree(buf);
return ret;
delete:
/* We got an error after creating the event, delete it */
synth_event_delete("gen_synth_test");
- free:
- kfree(buf);
-
- goto out;
+ goto free;
}
/*
@@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void)
/* Now trace an empty_synth_test event */
ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals));
- out:
+ free:
+ kfree(buf);
return ret;
delete:
/* We got an error after creating the event, delete it */
synth_event_delete("empty_synth_test");
- free:
- kfree(buf);
-
- goto out;
+ goto free;
}
static struct synth_field_desc create_synth_test_fields[] = {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 47a44b055a1d..a7fe0e115272 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6657,6 +6657,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_unlock(&trace_types_lock);
free_cpumask_var(iter->started);
+ kfree(iter->fmt);
mutex_destroy(&iter->mutex);
kfree(iter);
@@ -6681,7 +6682,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
return EPOLLIN | EPOLLRDNORM;
else
return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
- filp, poll_table);
+ filp, poll_table, iter->tr->buffer_percent);
}
static __poll_t
@@ -7802,6 +7803,7 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
int len)
{
struct tracing_log_err *err;
+ char *cmd;
if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
err = alloc_tracing_log_err(len);
@@ -7810,12 +7812,12 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
return err;
}
-
+ cmd = kzalloc(len, GFP_KERNEL);
+ if (!cmd)
+ return ERR_PTR(-ENOMEM);
err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
kfree(err->cmd);
- err->cmd = kzalloc(len, GFP_KERNEL);
- if (!err->cmd)
- return ERR_PTR(-ENOMEM);
+ err->cmd = cmd;
list_del(&err->list);
return err;
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 5dd0617e5df6..352b65e2b910 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -52,6 +52,7 @@ static void trace_event_probe_cleanup(struct trace_eprobe *ep)
kfree(ep->event_system);
if (ep->event)
trace_event_put_ref(ep->event);
+ kfree(ep->filter_str);
kfree(ep);
}
@@ -563,6 +564,9 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
{
struct eprobe_data *edata = data->private_data;
+ if (unlikely(!rec))
+ return;
+
__eprobe_trace_func(edata, rec);
}
@@ -642,7 +646,7 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file)
INIT_LIST_HEAD(&trigger->list);
if (ep->filter_str) {
- ret = create_event_filter(file->tr, file->event_call,
+ ret = create_event_filter(file->tr, ep->event,
ep->filter_str, false, &filter);
if (ret)
goto error;
@@ -900,7 +904,7 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[
static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[])
{
- struct event_filter *dummy;
+ struct event_filter *dummy = NULL;
int i, ret, len = 0;
char *p;
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index e310052dc83c..29fbfb27c2b2 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -828,10 +828,9 @@ static int register_synth_event(struct synth_event *event)
}
ret = set_synth_event_print_fmt(call);
- if (ret < 0) {
+ /* unregister_trace_event() will be called inside */
+ if (ret < 0)
trace_remove_event_call(call);
- goto err;
- }
out:
return ret;
err:
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index b69e207012c9..942ddbdace4a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -201,8 +201,6 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
return trace_handle_return(s);
}
-extern char *__bad_type_size(void);
-
#define SYSCALL_FIELD(_type, _name) { \
.type = #_type, .name = #_name, \
.size = sizeof(_type), .align = __alignof__(_type), \