summaryrefslogtreecommitdiff
path: root/ci-bench
diff options
context:
space:
mode:
authorAdolfo OchagavĂ­a <github@adolfo.ochagavia.nl>2023-09-05 11:25:36 +0200
committerctz <jpixton@gmail.com>2023-09-05 13:34:27 +0000
commit8c914559f8b31c2db642322a8c87b95ccfd9b2f3 (patch)
treeabb76f1b91579ccb4709a152e80b6faf6de0b349 /ci-bench
parentc9a397446231854c5af439081875ef53fe4339c8 (diff)
Show detailed icount diff for scenarios with noteworthy diffs
Diffstat (limited to 'ci-bench')
-rw-r--r--ci-bench/README.md15
-rw-r--r--ci-bench/src/cachegrind.rs99
-rw-r--r--ci-bench/src/main.rs96
3 files changed, 167 insertions, 43 deletions
diff --git a/ci-bench/README.md b/ci-bench/README.md
index 3187adac..d869c484 100644
--- a/ci-bench/README.md
+++ b/ci-bench/README.md
@@ -12,8 +12,9 @@ important bits.
_Note: this step requires having `valgrind` in your path._
-Use `cargo run --release -- run-all > out.csv` to generate a CSV with the instruction counts for
-the different scenarios we support. The result should look like the following:
+Use `cargo run --release -- run-all --output-dir foo` to generate the results inside the `foo`
+directory. Within that directory, you will find an `icounts.csv` file with the instruction counts
+for the different scenarios we support. It should look like the following:
```csv
handshake_no_resume_1.2_rsa_aes_server,11327015
@@ -31,12 +32,14 @@ handshake_no_resume_1.3_rsa_aes_client,4212770
...
```
+In the `cachegrind` subdirectory you will find output files emitted by the `cachegrind` tool, which
+are useful to report detailed instruction count differences when comparing two benchmark runs.
+
### Comparing results
-Use `cargo run --release -- compare out1.csv out2.csv`. It will output a report using
-GitHub-flavored markdown (used by the CI itself to give feedback about PRs). We currently
-consider differences of 0.2% to be significant, but might tweak it in the future after we gain
-experience with the benchmarking setup.
+Use `cargo run --release -- compare foo bar`. It will output a report using GitHub-flavored markdown
+(used by the CI itself to give feedback about PRs). We currently consider differences of 0.2% to be
+significant, but might tweak it in the future after we gain experience with the benchmarking setup.
### Supported scenarios
diff --git a/ci-bench/src/cachegrind.rs b/ci-bench/src/cachegrind.rs
index e674fbf0..064aeeb1 100644
--- a/ci-bench/src/cachegrind.rs
+++ b/ci-bench/src/cachegrind.rs
@@ -9,12 +9,17 @@ use anyhow::Context;
use crate::benchmark::Benchmark;
use crate::Side;
+/// The subdirectory in which the cachegrind output should be stored
+const CACHEGRIND_OUTPUT_SUBDIR: &str = "cachegrind";
+
/// A cachegrind-based benchmark runner
pub struct CachegrindRunner {
/// The path to the ci-bench executable
///
/// This is necessary because the cachegrind runner works by spawning child processes
executable: String,
+ /// The directory where the cachegrind output will be stored
+ output_dir: PathBuf,
/// The amount of instructions that are executed upon startup of the child process, before
/// actually running one of the benchmarks
///
@@ -24,9 +29,13 @@ pub struct CachegrindRunner {
impl CachegrindRunner {
/// Returns a new cachegrind-based benchmark runner
- pub fn new(executable: String) -> anyhow::Result<Self> {
+ pub fn new(executable: String, output_dir: PathBuf) -> anyhow::Result<Self> {
Self::ensure_cachegrind_available()?;
+ let cachegrind_output_dir = output_dir.join(CACHEGRIND_OUTPUT_SUBDIR);
+ std::fs::create_dir_all(&cachegrind_output_dir)
+ .context("Failed to create cachegrind output directory")?;
+
// We don't care about the side here, so let's use `Server` just to choose something
let overhead_instructions = Self::run_bench_side(
&executable,
@@ -35,12 +44,14 @@ impl CachegrindRunner {
"calibration",
Stdio::piped(),
Stdio::piped(),
+ &cachegrind_output_dir,
)?
.wait_and_get_instr_count()
.context("Unable to count overhead instructions")?;
Ok(CachegrindRunner {
executable,
+ output_dir: cachegrind_output_dir,
overhead_instructions,
})
}
@@ -61,6 +72,7 @@ impl CachegrindRunner {
&bench.name_with_side(Side::Server),
Stdio::piped(),
Stdio::piped(),
+ &self.output_dir,
)
.context("server side bench crashed")?;
@@ -71,6 +83,7 @@ impl CachegrindRunner {
&bench.name_with_side(Side::Client),
Stdio::from(server.process.stdout.take().unwrap()),
Stdio::from(server.process.stdin.take().unwrap()),
+ &self.output_dir,
)
.context("client side bench crashed")?;
@@ -116,10 +129,9 @@ impl CachegrindRunner {
name: &str,
stdin: Stdio,
stdout: Stdio,
+ output_dir: &Path,
) -> anyhow::Result<BenchSubprocess> {
- let output_file = PathBuf::from(format!("target/cachegrind/cachegrind.out.{}", name));
- std::fs::create_dir_all(output_file.parent().unwrap())
- .context("Failed to create cachegrind output directory")?;
+ let cachegrind_output_file = output_dir.join(name);
// Run under setarch to disable ASLR, to reduce noise
let mut cmd = Command::new("setarch");
@@ -133,7 +145,10 @@ impl CachegrindRunner {
// keep stderr free of noise, to see any errors from the child process)
.arg("--log-file=/dev/null")
// The file where the instruction counts will be stored
- .arg(format!("--cachegrind-out-file={}", output_file.display()))
+ .arg(format!(
+ "--cachegrind-out-file={}",
+ cachegrind_output_file.display()
+ ))
.arg(executable)
.arg("run-single")
.arg(benchmark_index.to_string())
@@ -146,7 +161,7 @@ impl CachegrindRunner {
Ok(BenchSubprocess {
process: child,
- output_file,
+ cachegrind_output_file,
})
}
}
@@ -156,7 +171,7 @@ struct BenchSubprocess {
/// The benchmark's child process, running under cachegrind
process: Child,
/// Cachegrind's output file for this benchmark
- output_file: PathBuf,
+ cachegrind_output_file: PathBuf,
}
impl BenchSubprocess {
@@ -173,9 +188,7 @@ impl BenchSubprocess {
);
}
- let instruction_count = parse_cachegrind_output(&self.output_file)?;
- std::fs::remove_file(&self.output_file).ok();
-
+ let instruction_count = parse_cachegrind_output(&self.cachegrind_output_file)?;
Ok(instruction_count)
}
}
@@ -216,3 +229,69 @@ impl Sub for InstructionCounts {
}
}
}
+
+/// Returns the detailed instruction diff between the baseline and the candidate
+pub fn diff(baseline: &Path, candidate: &Path, scenario: &str) -> anyhow::Result<String> {
+ // The latest version of valgrind has deprecated cg_diff, which has been superseded by
+ // cg_annotate. Many systems are running older versions, though, so we are sticking with cg_diff
+ // for the time being.
+
+ let tmp_path = Path::new("target/ci-bench-tmp");
+ let tmp = File::create(tmp_path).context("cannot create temp file for cg_diff")?;
+
+ // cg_diff generates a diff between two cachegrind output files in a custom format that is not
+ // user-friendly
+ let cg_diff = Command::new("cg_diff")
+ .arg(
+ baseline
+ .join(CACHEGRIND_OUTPUT_SUBDIR)
+ .join(scenario),
+ )
+ .arg(
+ candidate
+ .join(CACHEGRIND_OUTPUT_SUBDIR)
+ .join(scenario),
+ )
+ .stdout(Stdio::from(tmp))
+ .spawn()
+ .context("cannot spawn cg_diff subprocess")?
+ .wait()
+ .context("error waiting for cg_diff to finish")?;
+
+ if !cg_diff.success() {
+ anyhow::bail!(
+ "cg_diff finished with an error (code = {:?})",
+ cg_diff.code()
+ )
+ }
+
+ // cg_annotate transforms the output of cg_diff into something a user can understand
+ let cg_annotate = Command::new("cg_annotate")
+ .arg(tmp_path)
+ .arg("--auto=no")
+ .output()
+ .context("error waiting for cg_annotate to finish")?;
+
+ if !cg_annotate.status.success() {
+ anyhow::bail!(
+ "cg_annotate finished with an error (code = {:?})",
+ cg_annotate.status.code()
+ )
+ }
+
+ let annotated =
+ String::from_utf8(cg_annotate.stdout).context("cg_annotate produced invalid UTF8")?;
+
+ // Discard lines before the first `Ir` header
+ let mut diff = String::new();
+ for line in annotated
+ .trim()
+ .lines()
+ .skip_while(|l| l.trim() != "Ir")
+ {
+ diff.push_str(line);
+ diff.push('\n');
+ }
+
+ Ok(diff)
+}
diff --git a/ci-bench/src/main.rs b/ci-bench/src/main.rs
index 066b7187..5b0e8dad 100644
--- a/ci-bench/src/main.rs
+++ b/ci-bench/src/main.rs
@@ -56,6 +56,9 @@ const RESUMED_HANDSHAKE_RUNS: usize = 30;
/// The threshold at which instruction count changes are considered relevant
const CHANGE_THRESHOLD: f64 = 0.002; // 0.2%
+/// The name of the file where the instruction counts are stored after a `run-all` run
+const ICOUNTS_FILENAME: &str = "icounts.csv";
+
#[derive(Parser)]
#[command(about)]
pub struct Cli {
@@ -66,15 +69,18 @@ pub struct Cli {
#[derive(Subcommand)]
pub enum Command {
/// Run all benchmarks and prints the measured CPU instruction counts in CSV format
- RunAll,
+ RunAll {
+ #[arg(short, long)]
+ output_dir: Option<PathBuf>,
+ },
/// Run a single benchmark at the provided index (used by the bench runner to start each benchmark in its own process)
RunSingle { index: u32, side: Side },
/// Compare the results from two previous benchmark runs and print a user-friendly markdown overview
Compare {
- /// Path to a CSV file obtained from a previous `run-all` execution
- baseline_input: PathBuf,
- /// Path to a CSV file obtained from a previous `run-all` execution
- candidate_input: PathBuf,
+ /// Path to the directory with the results of a previous `run-all` execution
+ baseline_dir: PathBuf,
+ /// Path to the directory with the results of a previous `run-all` execution
+ candidate_dir: PathBuf,
},
}
@@ -99,13 +105,16 @@ fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
match cli.command {
- Command::RunAll => {
+ Command::RunAll { output_dir } => {
let executable = std::env::args().next().unwrap();
- let results = run_all(executable, &benchmarks)?;
+ let output_dir = output_dir.unwrap_or("target/ci-bench".into());
+ let results = run_all(executable, output_dir.clone(), &benchmarks)?;
// Output results in CSV (note: not using a library here to avoid extra dependencies)
+ let mut csv_file = File::create(output_dir.join(ICOUNTS_FILENAME))
+ .context("cannot create output csv file")?;
for (name, instr_count) in results {
- println!("{name},{instr_count}");
+ writeln!(csv_file, "{name},{instr_count}")?;
}
}
Command::RunSingle { index, side } => {
@@ -167,12 +176,12 @@ fn main() -> anyhow::Result<()> {
mem::forget(stdout);
}
Command::Compare {
- baseline_input,
- candidate_input,
+ baseline_dir,
+ candidate_dir,
} => {
- let baseline = read_results(baseline_input.as_ref())?;
- let candidate = read_results(candidate_input.as_ref())?;
- let result = compare_results(&baseline, &candidate);
+ let baseline = read_results(&baseline_dir.join(ICOUNTS_FILENAME))?;
+ let candidate = read_results(&candidate_dir.join(ICOUNTS_FILENAME))?;
+ let result = compare_results(&baseline_dir, &candidate_dir, &baseline, &candidate)?;
print_report(&result);
if !result.noteworthy.is_empty() {
@@ -275,9 +284,13 @@ fn add_benchmark_group(benchmarks: &mut Vec<Benchmark>, params: BenchmarkParams)
}
/// Run all the provided benches under cachegrind to retrieve their instruction count
-pub fn run_all(executable: String, benches: &[Benchmark]) -> anyhow::Result<Vec<(String, u64)>> {
+pub fn run_all(
+ executable: String,
+ output_dir: PathBuf,
+ benches: &[Benchmark],
+) -> anyhow::Result<Vec<(String, u64)>> {
// Run the benchmarks in parallel
- let cachegrind = CachegrindRunner::new(executable)?;
+ let cachegrind = CachegrindRunner::new(executable, output_dir)?;
let results: Vec<_> = benches
.par_iter()
.enumerate()
@@ -497,8 +510,10 @@ fn run_bench<T: BenchStepper>(mut stepper: T, kind: BenchmarkKind) -> anyhow::Re
/// The results of a comparison between two `run-all` executions
struct CompareResult {
- /// Results that probably indicate a real change in performance and should be highlighted
- noteworthy: Vec<Diff>,
+ /// Results that probably indicate a real change in performance and should be highlighted.
+ ///
+ /// The string is a detailed diff between the instruction counts obtained from cachegrind.
+ noteworthy: Vec<(Diff, String)>,
/// Results within the noise threshold
negligible: Vec<Diff>,
/// Benchmark scenarios present in the candidate but missing in the baseline
@@ -517,7 +532,10 @@ struct Diff {
/// Reads the (benchmark, instruction count) pairs from previous CSV output
fn read_results(path: &Path) -> anyhow::Result<HashMap<String, u64>> {
- let file = fs::File::open(path).context("CSV file for comparison not found")?;
+ let file = File::open(path).context(format!(
+ "CSV file for comparison not found: {}",
+ path.display()
+ ))?;
let mut measurements = HashMap::new();
for line in BufReader::new(file).lines() {
@@ -543,9 +561,11 @@ fn read_results(path: &Path) -> anyhow::Result<HashMap<String, u64>> {
/// Returns an internal representation of the comparison between the baseline and the candidate
/// measurements
fn compare_results(
+ baseline_dir: &Path,
+ candidate_dir: &Path,
baseline: &HashMap<String, u64>,
candidate: &HashMap<String, u64>,
-) -> CompareResult {
+) -> anyhow::Result<CompareResult> {
let mut diffs = Vec::new();
let mut missing = Vec::new();
for (scenario, &instr_count) in candidate {
@@ -573,11 +593,18 @@ fn compare_results(
});
let (noteworthy, negligible) = split_on_threshold(&diffs);
- CompareResult {
- noteworthy,
+
+ let mut noteworthy_with_details = Vec::new();
+ for diff in noteworthy {
+ let detailed_diff = cachegrind::diff(baseline_dir, candidate_dir, &diff.scenario)?;
+ noteworthy_with_details.push((diff, detailed_diff));
+ }
+
+ Ok(CompareResult {
+ noteworthy: noteworthy_with_details,
negligible,
missing_in_baseline: missing,
- }
+ })
}
/// Prints a report of the comparison to stdout, using GitHub-flavored markdown
@@ -594,23 +621,38 @@ fn print_report(result: &CompareResult) {
}
}
- println!("### Noteworthy instruction count differences");
+ println!("## Noteworthy instruction count differences");
if result.noteworthy.is_empty() {
println!(
"_There are no noteworthy instruction count differences (i.e. above {}%)_",
CHANGE_THRESHOLD * 100.0
);
} else {
- table(&result.noteworthy, true);
+ table(
+ result
+ .noteworthy
+ .iter()
+ .map(|(diff, _)| diff),
+ true,
+ );
+ println!("<details>");
+ println!("<summary>Details per scenario</summary>\n");
+ for (diff, detailed_diff) in &result.noteworthy {
+ println!("#### {}", diff.scenario);
+ println!("```");
+ println!("{detailed_diff}");
+ println!("```");
+ }
+ println!("</details>\n")
}
- println!("### Other instruction count differences");
+ println!("## Other instruction count differences");
if result.negligible.is_empty() {
println!("_There are no other instruction count differences_");
} else {
println!("<details>");
println!("<summary>Click to expand</summary>\n");
- table(&result.negligible, false);
+ table(result.negligible.iter(), false);
println!("</details>\n")
}
}
@@ -633,7 +675,7 @@ fn split_on_threshold(diffs: &[Diff]) -> (Vec<Diff>, Vec<Diff>) {
}
/// Renders the diffs as a markdown table
-fn table(diffs: &[Diff], emoji_feedback: bool) {
+fn table<'a>(diffs: impl Iterator<Item = &'a Diff>, emoji_feedback: bool) {
println!("| Scenario | Baseline | Candidate | Diff |");
println!("| --- | ---: | ---: | ---: |");
for diff in diffs {