diff options
author | Adolfo OchagavĂa <github@adolfo.ochagavia.nl> | 2023-09-05 11:25:36 +0200 |
---|---|---|
committer | ctz <jpixton@gmail.com> | 2023-09-05 13:34:27 +0000 |
commit | 8c914559f8b31c2db642322a8c87b95ccfd9b2f3 (patch) | |
tree | abb76f1b91579ccb4709a152e80b6faf6de0b349 /ci-bench | |
parent | c9a397446231854c5af439081875ef53fe4339c8 (diff) |
Show detailed icount diff for scenarios with noteworthy diffs
Diffstat (limited to 'ci-bench')
-rw-r--r-- | ci-bench/README.md | 15 | ||||
-rw-r--r-- | ci-bench/src/cachegrind.rs | 99 | ||||
-rw-r--r-- | ci-bench/src/main.rs | 96 |
3 files changed, 167 insertions, 43 deletions
diff --git a/ci-bench/README.md b/ci-bench/README.md index 3187adac..d869c484 100644 --- a/ci-bench/README.md +++ b/ci-bench/README.md @@ -12,8 +12,9 @@ important bits. _Note: this step requires having `valgrind` in your path._ -Use `cargo run --release -- run-all > out.csv` to generate a CSV with the instruction counts for -the different scenarios we support. The result should look like the following: +Use `cargo run --release -- run-all --output-dir foo` to generate the results inside the `foo` +directory. Within that directory, you will find an `icounts.csv` file with the instruction counts +for the different scenarios we support. It should look like the following: ```csv handshake_no_resume_1.2_rsa_aes_server,11327015 @@ -31,12 +32,14 @@ handshake_no_resume_1.3_rsa_aes_client,4212770 ... ``` +In the `cachegrind` subdirectory you will find output files emitted by the `cachegrind` tool, which +are useful to report detailed instruction count differences when comparing two benchmark runs. + ### Comparing results -Use `cargo run --release -- compare out1.csv out2.csv`. It will output a report using -GitHub-flavored markdown (used by the CI itself to give feedback about PRs). We currently -consider differences of 0.2% to be significant, but might tweak it in the future after we gain -experience with the benchmarking setup. +Use `cargo run --release -- compare foo bar`. It will output a report using GitHub-flavored markdown +(used by the CI itself to give feedback about PRs). We currently consider differences of 0.2% to be +significant, but might tweak it in the future after we gain experience with the benchmarking setup. ### Supported scenarios diff --git a/ci-bench/src/cachegrind.rs b/ci-bench/src/cachegrind.rs index e674fbf0..064aeeb1 100644 --- a/ci-bench/src/cachegrind.rs +++ b/ci-bench/src/cachegrind.rs @@ -9,12 +9,17 @@ use anyhow::Context; use crate::benchmark::Benchmark; use crate::Side; +/// The subdirectory in which the cachegrind output should be stored +const CACHEGRIND_OUTPUT_SUBDIR: &str = "cachegrind"; + /// A cachegrind-based benchmark runner pub struct CachegrindRunner { /// The path to the ci-bench executable /// /// This is necessary because the cachegrind runner works by spawning child processes executable: String, + /// The directory where the cachegrind output will be stored + output_dir: PathBuf, /// The amount of instructions that are executed upon startup of the child process, before /// actually running one of the benchmarks /// @@ -24,9 +29,13 @@ pub struct CachegrindRunner { impl CachegrindRunner { /// Returns a new cachegrind-based benchmark runner - pub fn new(executable: String) -> anyhow::Result<Self> { + pub fn new(executable: String, output_dir: PathBuf) -> anyhow::Result<Self> { Self::ensure_cachegrind_available()?; + let cachegrind_output_dir = output_dir.join(CACHEGRIND_OUTPUT_SUBDIR); + std::fs::create_dir_all(&cachegrind_output_dir) + .context("Failed to create cachegrind output directory")?; + // We don't care about the side here, so let's use `Server` just to choose something let overhead_instructions = Self::run_bench_side( &executable, @@ -35,12 +44,14 @@ impl CachegrindRunner { "calibration", Stdio::piped(), Stdio::piped(), + &cachegrind_output_dir, )? .wait_and_get_instr_count() .context("Unable to count overhead instructions")?; Ok(CachegrindRunner { executable, + output_dir: cachegrind_output_dir, overhead_instructions, }) } @@ -61,6 +72,7 @@ impl CachegrindRunner { &bench.name_with_side(Side::Server), Stdio::piped(), Stdio::piped(), + &self.output_dir, ) .context("server side bench crashed")?; @@ -71,6 +83,7 @@ impl CachegrindRunner { &bench.name_with_side(Side::Client), Stdio::from(server.process.stdout.take().unwrap()), Stdio::from(server.process.stdin.take().unwrap()), + &self.output_dir, ) .context("client side bench crashed")?; @@ -116,10 +129,9 @@ impl CachegrindRunner { name: &str, stdin: Stdio, stdout: Stdio, + output_dir: &Path, ) -> anyhow::Result<BenchSubprocess> { - let output_file = PathBuf::from(format!("target/cachegrind/cachegrind.out.{}", name)); - std::fs::create_dir_all(output_file.parent().unwrap()) - .context("Failed to create cachegrind output directory")?; + let cachegrind_output_file = output_dir.join(name); // Run under setarch to disable ASLR, to reduce noise let mut cmd = Command::new("setarch"); @@ -133,7 +145,10 @@ impl CachegrindRunner { // keep stderr free of noise, to see any errors from the child process) .arg("--log-file=/dev/null") // The file where the instruction counts will be stored - .arg(format!("--cachegrind-out-file={}", output_file.display())) + .arg(format!( + "--cachegrind-out-file={}", + cachegrind_output_file.display() + )) .arg(executable) .arg("run-single") .arg(benchmark_index.to_string()) @@ -146,7 +161,7 @@ impl CachegrindRunner { Ok(BenchSubprocess { process: child, - output_file, + cachegrind_output_file, }) } } @@ -156,7 +171,7 @@ struct BenchSubprocess { /// The benchmark's child process, running under cachegrind process: Child, /// Cachegrind's output file for this benchmark - output_file: PathBuf, + cachegrind_output_file: PathBuf, } impl BenchSubprocess { @@ -173,9 +188,7 @@ impl BenchSubprocess { ); } - let instruction_count = parse_cachegrind_output(&self.output_file)?; - std::fs::remove_file(&self.output_file).ok(); - + let instruction_count = parse_cachegrind_output(&self.cachegrind_output_file)?; Ok(instruction_count) } } @@ -216,3 +229,69 @@ impl Sub for InstructionCounts { } } } + +/// Returns the detailed instruction diff between the baseline and the candidate +pub fn diff(baseline: &Path, candidate: &Path, scenario: &str) -> anyhow::Result<String> { + // The latest version of valgrind has deprecated cg_diff, which has been superseded by + // cg_annotate. Many systems are running older versions, though, so we are sticking with cg_diff + // for the time being. + + let tmp_path = Path::new("target/ci-bench-tmp"); + let tmp = File::create(tmp_path).context("cannot create temp file for cg_diff")?; + + // cg_diff generates a diff between two cachegrind output files in a custom format that is not + // user-friendly + let cg_diff = Command::new("cg_diff") + .arg( + baseline + .join(CACHEGRIND_OUTPUT_SUBDIR) + .join(scenario), + ) + .arg( + candidate + .join(CACHEGRIND_OUTPUT_SUBDIR) + .join(scenario), + ) + .stdout(Stdio::from(tmp)) + .spawn() + .context("cannot spawn cg_diff subprocess")? + .wait() + .context("error waiting for cg_diff to finish")?; + + if !cg_diff.success() { + anyhow::bail!( + "cg_diff finished with an error (code = {:?})", + cg_diff.code() + ) + } + + // cg_annotate transforms the output of cg_diff into something a user can understand + let cg_annotate = Command::new("cg_annotate") + .arg(tmp_path) + .arg("--auto=no") + .output() + .context("error waiting for cg_annotate to finish")?; + + if !cg_annotate.status.success() { + anyhow::bail!( + "cg_annotate finished with an error (code = {:?})", + cg_annotate.status.code() + ) + } + + let annotated = + String::from_utf8(cg_annotate.stdout).context("cg_annotate produced invalid UTF8")?; + + // Discard lines before the first `Ir` header + let mut diff = String::new(); + for line in annotated + .trim() + .lines() + .skip_while(|l| l.trim() != "Ir") + { + diff.push_str(line); + diff.push('\n'); + } + + Ok(diff) +} diff --git a/ci-bench/src/main.rs b/ci-bench/src/main.rs index 066b7187..5b0e8dad 100644 --- a/ci-bench/src/main.rs +++ b/ci-bench/src/main.rs @@ -56,6 +56,9 @@ const RESUMED_HANDSHAKE_RUNS: usize = 30; /// The threshold at which instruction count changes are considered relevant const CHANGE_THRESHOLD: f64 = 0.002; // 0.2% +/// The name of the file where the instruction counts are stored after a `run-all` run +const ICOUNTS_FILENAME: &str = "icounts.csv"; + #[derive(Parser)] #[command(about)] pub struct Cli { @@ -66,15 +69,18 @@ pub struct Cli { #[derive(Subcommand)] pub enum Command { /// Run all benchmarks and prints the measured CPU instruction counts in CSV format - RunAll, + RunAll { + #[arg(short, long)] + output_dir: Option<PathBuf>, + }, /// Run a single benchmark at the provided index (used by the bench runner to start each benchmark in its own process) RunSingle { index: u32, side: Side }, /// Compare the results from two previous benchmark runs and print a user-friendly markdown overview Compare { - /// Path to a CSV file obtained from a previous `run-all` execution - baseline_input: PathBuf, - /// Path to a CSV file obtained from a previous `run-all` execution - candidate_input: PathBuf, + /// Path to the directory with the results of a previous `run-all` execution + baseline_dir: PathBuf, + /// Path to the directory with the results of a previous `run-all` execution + candidate_dir: PathBuf, }, } @@ -99,13 +105,16 @@ fn main() -> anyhow::Result<()> { let cli = Cli::parse(); match cli.command { - Command::RunAll => { + Command::RunAll { output_dir } => { let executable = std::env::args().next().unwrap(); - let results = run_all(executable, &benchmarks)?; + let output_dir = output_dir.unwrap_or("target/ci-bench".into()); + let results = run_all(executable, output_dir.clone(), &benchmarks)?; // Output results in CSV (note: not using a library here to avoid extra dependencies) + let mut csv_file = File::create(output_dir.join(ICOUNTS_FILENAME)) + .context("cannot create output csv file")?; for (name, instr_count) in results { - println!("{name},{instr_count}"); + writeln!(csv_file, "{name},{instr_count}")?; } } Command::RunSingle { index, side } => { @@ -167,12 +176,12 @@ fn main() -> anyhow::Result<()> { mem::forget(stdout); } Command::Compare { - baseline_input, - candidate_input, + baseline_dir, + candidate_dir, } => { - let baseline = read_results(baseline_input.as_ref())?; - let candidate = read_results(candidate_input.as_ref())?; - let result = compare_results(&baseline, &candidate); + let baseline = read_results(&baseline_dir.join(ICOUNTS_FILENAME))?; + let candidate = read_results(&candidate_dir.join(ICOUNTS_FILENAME))?; + let result = compare_results(&baseline_dir, &candidate_dir, &baseline, &candidate)?; print_report(&result); if !result.noteworthy.is_empty() { @@ -275,9 +284,13 @@ fn add_benchmark_group(benchmarks: &mut Vec<Benchmark>, params: BenchmarkParams) } /// Run all the provided benches under cachegrind to retrieve their instruction count -pub fn run_all(executable: String, benches: &[Benchmark]) -> anyhow::Result<Vec<(String, u64)>> { +pub fn run_all( + executable: String, + output_dir: PathBuf, + benches: &[Benchmark], +) -> anyhow::Result<Vec<(String, u64)>> { // Run the benchmarks in parallel - let cachegrind = CachegrindRunner::new(executable)?; + let cachegrind = CachegrindRunner::new(executable, output_dir)?; let results: Vec<_> = benches .par_iter() .enumerate() @@ -497,8 +510,10 @@ fn run_bench<T: BenchStepper>(mut stepper: T, kind: BenchmarkKind) -> anyhow::Re /// The results of a comparison between two `run-all` executions struct CompareResult { - /// Results that probably indicate a real change in performance and should be highlighted - noteworthy: Vec<Diff>, + /// Results that probably indicate a real change in performance and should be highlighted. + /// + /// The string is a detailed diff between the instruction counts obtained from cachegrind. + noteworthy: Vec<(Diff, String)>, /// Results within the noise threshold negligible: Vec<Diff>, /// Benchmark scenarios present in the candidate but missing in the baseline @@ -517,7 +532,10 @@ struct Diff { /// Reads the (benchmark, instruction count) pairs from previous CSV output fn read_results(path: &Path) -> anyhow::Result<HashMap<String, u64>> { - let file = fs::File::open(path).context("CSV file for comparison not found")?; + let file = File::open(path).context(format!( + "CSV file for comparison not found: {}", + path.display() + ))?; let mut measurements = HashMap::new(); for line in BufReader::new(file).lines() { @@ -543,9 +561,11 @@ fn read_results(path: &Path) -> anyhow::Result<HashMap<String, u64>> { /// Returns an internal representation of the comparison between the baseline and the candidate /// measurements fn compare_results( + baseline_dir: &Path, + candidate_dir: &Path, baseline: &HashMap<String, u64>, candidate: &HashMap<String, u64>, -) -> CompareResult { +) -> anyhow::Result<CompareResult> { let mut diffs = Vec::new(); let mut missing = Vec::new(); for (scenario, &instr_count) in candidate { @@ -573,11 +593,18 @@ fn compare_results( }); let (noteworthy, negligible) = split_on_threshold(&diffs); - CompareResult { - noteworthy, + + let mut noteworthy_with_details = Vec::new(); + for diff in noteworthy { + let detailed_diff = cachegrind::diff(baseline_dir, candidate_dir, &diff.scenario)?; + noteworthy_with_details.push((diff, detailed_diff)); + } + + Ok(CompareResult { + noteworthy: noteworthy_with_details, negligible, missing_in_baseline: missing, - } + }) } /// Prints a report of the comparison to stdout, using GitHub-flavored markdown @@ -594,23 +621,38 @@ fn print_report(result: &CompareResult) { } } - println!("### Noteworthy instruction count differences"); + println!("## Noteworthy instruction count differences"); if result.noteworthy.is_empty() { println!( "_There are no noteworthy instruction count differences (i.e. above {}%)_", CHANGE_THRESHOLD * 100.0 ); } else { - table(&result.noteworthy, true); + table( + result + .noteworthy + .iter() + .map(|(diff, _)| diff), + true, + ); + println!("<details>"); + println!("<summary>Details per scenario</summary>\n"); + for (diff, detailed_diff) in &result.noteworthy { + println!("#### {}", diff.scenario); + println!("```"); + println!("{detailed_diff}"); + println!("```"); + } + println!("</details>\n") } - println!("### Other instruction count differences"); + println!("## Other instruction count differences"); if result.negligible.is_empty() { println!("_There are no other instruction count differences_"); } else { println!("<details>"); println!("<summary>Click to expand</summary>\n"); - table(&result.negligible, false); + table(result.negligible.iter(), false); println!("</details>\n") } } @@ -633,7 +675,7 @@ fn split_on_threshold(diffs: &[Diff]) -> (Vec<Diff>, Vec<Diff>) { } /// Renders the diffs as a markdown table -fn table(diffs: &[Diff], emoji_feedback: bool) { +fn table<'a>(diffs: impl Iterator<Item = &'a Diff>, emoji_feedback: bool) { println!("| Scenario | Baseline | Candidate | Diff |"); println!("| --- | ---: | ---: | ---: |"); for diff in diffs { |