selftests/bpf: Add benchmark for local_storage RCU Tasks Trace usage

This benchmark measures grace period latency and kthread cpu usage of RCU Tasks Trace when many processes are creating/deleting BPF local_storage. Intent here is to quantify improvement on these metrics after Paul's recent RCU Tasks patches [0]. Specifically, fork 15k tasks which call a bpf prog that creates/destroys task local_storage and sleep in a loop, resulting in many call_rcu_tasks_trace calls. To determine grace period latency, trace time elapsed between rcu_tasks_trace_pregp_step and rcu_tasks_trace_postgp; for cpu usage look at rcu_task_trace_kthread's stime in /proc/PID/stat. On my virtualized test environment (Skylake, 8 cpus) benchmark results demonstrate significant improvement: BEFORE Paul's patches: SUMMARY tasks_trace grace period latency avg 22298.551 us stddev 1302.165 us SUMMARY ticks per tasks_trace grace period avg 2.291 stddev 0.324 AFTER Paul's patches: SUMMARY tasks_trace grace period latency avg 16969.197 us stddev 2525.053 us SUMMARY ticks per tasks_trace grace period avg 1.146 stddev 0.178 Note that since these patches are not in bpf-next benchmarking was done by cherry-picking this patch onto rcu tree. [0] https://lore.kernel.org/rcu/20220620225402.GA3842369@paulmck-ThinkPad-P17-Gen-1/ Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Paul E. McKenney <paulmck@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20220705190018.3239050-1-davemarchevsky@fb.com
author: Dave Marchevsky <davemarchevsky@fb.com> 2022-07-05 12:00:18 -0700
committer: Daniel Borkmann <daniel@iogearbox.net> 2022-07-07 16:35:21 +0200
commit: 2b4b2621fd6401865b31b9f403e4b936b7439e94 (patch)
tree: fa77994c04a9e5317a655a39f7f11cfbf64ee493 /tools/testing/selftests/bpf/benchs
parent: 935dc35c75318fa213d26808ad8bb130fb0b486e (diff)
2 files changed, 292 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
new file mode 100644
index 000000000000..43f109d93130
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+
+#include <sys/prctl.h>
+#include "local_storage_rcu_tasks_trace_bench.skel.h"
+#include "bench.h"
+
+#include <signal.h>
+
+static struct {
+	__u32 nr_procs;
+	__u32 kthread_pid;
+	bool quiet;
+} args = {
+	.nr_procs = 1000,
+	.kthread_pid = 0,
+	.quiet = false,
+};
+
+enum {
+	ARG_NR_PROCS = 7000,
+	ARG_KTHREAD_PID = 7001,
+	ARG_QUIET = 7002,
+};
+
+static const struct argp_option opts[] = {
+	{ "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
+		"Set number of user processes to spin up"},
+	{ "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
+		"Pid of rcu_tasks_trace kthread for ticks tracking"},
+	{ "quiet", ARG_QUIET, "{0,1}", 0,
+		"If true, don't report progress"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_NR_PROCS:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid nr_procs\n");
+			argp_usage(state);
+		}
+		args.nr_procs = ret;
+		break;
+	case ARG_KTHREAD_PID:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1) {
+			fprintf(stderr, "invalid kthread_pid\n");
+			argp_usage(state);
+		}
+		args.kthread_pid = ret;
+		break;
+	case ARG_QUIET:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 0 || ret > 1) {
+			fprintf(stderr, "invalid quiet %ld\n", ret);
+			argp_usage(state);
+		}
+		args.quiet = ret;
+		break;
+break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_local_storage_rcu_tasks_trace_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+#define MAX_SLEEP_PROCS 150000
+
+static void validate(void)
+{
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+		exit(1);
+	}
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.nr_procs > MAX_SLEEP_PROCS) {
+		fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
+			MAX_SLEEP_PROCS);
+		exit(1);
+	}
+}
+
+static long kthread_pid_ticks(void)
+{
+	char procfs_path[100];
+	long stime;
+	FILE *f;
+
+	if (!args.kthread_pid)
+		return -1;
+
+	sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
+	f = fopen(procfs_path, "r");
+	if (!f) {
+		fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
+		goto err_out;
+	}
+	if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
+		fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
+		goto err_out;
+	}
+	fclose(f);
+	return stime;
+
+err_out:
+	if (f)
+		fclose(f);
+	exit(1);
+	return 0;
+}
+
+static struct {
+	struct local_storage_rcu_tasks_trace_bench *skel;
+	long prev_kthread_stime;
+} ctx;
+
+static void sleep_and_loop(void)
+{
+	while (true) {
+		sleep(rand() % 4);
+		syscall(__NR_getpgid);
+	}
+}
+
+static void local_storage_tasks_trace_setup(void)
+{
+	int i, err, forkret, runner_pid;
+
+	runner_pid = getpid();
+
+	for (i = 0; i < args.nr_procs; i++) {
+		forkret = fork();
+		if (forkret < 0) {
+			fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
+				args.nr_procs);
+			goto err_out;
+		}
+
+		if (!forkret) {
+			err = prctl(PR_SET_PDEATHSIG, SIGKILL);
+			if (err < 0) {
+				fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
+				goto err_out;
+			}
+
+			if (getppid() != runner_pid) {
+				fprintf(stderr, "Runner died while spinning up procs, exiting\n");
+				goto err_out;
+			}
+			sleep_and_loop();
+		}
+	}
+	printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
+
+	setup_libbpf();
+
+	ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "Error doing open_and_load, exiting\n");
+		goto err_out;
+	}
+
+	ctx.prev_kthread_stime = kthread_pid_ticks();
+
+	if (!bpf_program__attach(ctx.skel->progs.get_local)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	if (!bpf_program__attach(ctx.skel->progs.postgp)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	return;
+err_out:
+	exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+	long ticks;
+
+	res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
+	res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
+	ticks = kthread_pid_ticks();
+	res->stime = ticks - ctx.prev_kthread_stime;
+	ctx.prev_kthread_stime = ticks;
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+static void *producer(void *input)
+{
+	while (true)
+		syscall(__NR_getpgid);
+	return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+	if (ctx.skel->bss->unexpected) {
+		fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
+		fprintf(stderr, "Data can't be trusted, exiting\n");
+		exit(1);
+	}
+
+	if (args.quiet)
+		return;
+
+	printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
+	       iter, res->gp_ns / (double)res->gp_ct);
+	printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
+	       iter, res->stime / (double)res->gp_ct);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+	struct basic_stats gp_stat;
+
+	grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
+	printf("SUMMARY tasks_trace grace period latency");
+	printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
+	grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
+	printf("SUMMARY ticks per tasks_trace grace period");
+	printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
+}
+
+/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
+ * of RCU Tasks-Trace.
+ *
+ * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
+ * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
+ * The number of forked tasks is configurable.
+ *
+ * exercising code paths which call call_rcu_tasks_trace while there are many
+ * thousands of tasks on the system should result in RCU Tasks-Trace having to
+ * do a noticeable amount of work.
+ *
+ * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
+ * after the grace period has ended, or by measuring grace period latency.
+ *
+ * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
+ * and rcu_tasks_trace_postgp functions to measure grace period latency and
+ * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
+ */
+const struct bench bench_local_storage_tasks_trace = {
+	.name = "local-storage-tasks-trace",
+	.validate = validate,
+	.setup = local_storage_tasks_trace_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = report_progress,
+	.report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
new file mode 100755
index 000000000000..5dac1f02892c
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+kthread_pid=`pgrep rcu_tasks_trace_kthread`
+
+if [ -z $kthread_pid ]; then
+	echo "error: Couldn't find rcu_tasks_trace_kthread"
+	exit 1
+fi
+
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
author	Dave Marchevsky <davemarchevsky@fb.com>	2022-07-05 12:00:18 -0700
committer	Daniel Borkmann <daniel@iogearbox.net>	2022-07-07 16:35:21 +0200
commit	2b4b2621fd6401865b31b9f403e4b936b7439e94 (patch)
tree	fa77994c04a9e5317a655a39f7f11cfbf64ee493 /tools/testing/selftests/bpf/benchs
parent	935dc35c75318fa213d26808ad8bb130fb0b486e (diff)