Reduce instruction counting scope to minimum

This eliminates: - for resumption and transfer tests, the full handshake - for all tests, one-time setup costs (eg, RSA private key validation)
author: Joe Birr-Pixton <jpixton@gmail.com> 2024-09-16 15:05:56 +0100
committer: Joe Birr-Pixton <jpixton@gmail.com> 2024-09-20 12:27:49 +0000
commit: 7d08edbcd1f65ff8fa3b580dc931063657e0b680 (patch)
tree: 6a0d08e3a6a59b7ef277994eab5e13b3a08cfa5d
parent: cd2910123d13ee9d5961bf15f7d1fccc410945f2 (diff)
5 files changed, 60 insertions, 7 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a14209f6..127991c3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -439,6 +439,11 @@ jobs:
         uses: actions/checkout@v4
         with:
           persist-credentials: false
+
+      - name: Install valgrind
+        if: runner.os == 'Linux'
+        run: sudo apt-get update && sudo apt-get install -y valgrind
+
       - name: Install rust toolchain
         uses: dtolnay/rust-toolchain@stable
         with:
@@ -458,6 +463,11 @@ jobs:
         uses: actions/checkout@v4
         with:
           persist-credentials: false
+
+      - name: Install valgrind
+        if: runner.os == 'Linux'
+        run: sudo apt-get update && sudo apt-get install -y valgrind
+
       - name: Install rust toolchain
         uses: dtolnay/rust-toolchain@nightly
         with:
diff --git a/Cargo.lock b/Cargo.lock
index b321c1f7..c8b83e6b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -657,6 +657,15 @@ dependencies = [
 ]
 
 [[package]]
+name = "crabgrind"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bdbd43e4f32a9681a504577db2d4ea7d3f7b1bf2e97955561af98501ab600508"
+dependencies = [
+ "cc",
+]
+
+[[package]]
 name = "crossbeam-deque"
 version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2144,6 +2153,7 @@ dependencies = [
  "async-trait",
  "byteorder",
  "clap",
+ "crabgrind",
  "fxhash",
  "itertools 0.13.0",
  "rayon",
diff --git a/ci-bench/Cargo.toml b/ci-bench/Cargo.toml
index c1322f13..51b70528 100644
--- a/ci-bench/Cargo.toml
+++ b/ci-bench/Cargo.toml
@@ -20,3 +20,6 @@ rustls-pemfile = "2"
 
 [target.'cfg(not(target_env = "msvc"))'.dependencies]
 tikv-jemallocator = "0.6"
+
+[target.'cfg(target_os = "linux")'.dependencies]
+crabgrind = "=0.1.9" # compatible with valgrind package on GHA ubuntu-latest
diff --git a/ci-bench/src/callgrind.rs b/ci-bench/src/callgrind.rs
index 6ce003da..434e1770 100644
--- a/ci-bench/src/callgrind.rs
+++ b/ci-bench/src/callgrind.rs
@@ -140,6 +140,8 @@ impl CallgrindRunner {
             .arg("-R")
             .arg("valgrind")
             .arg("--tool=callgrind")
+            // Do not count instructions from the start, instead this is controlled by `CountInstructions`
+            .arg("--collect-atstart=no")
             // Disable the cache simulation, since we are only interested in instruction counts
             .arg("--cache-sim=no")
             // Save callgrind's logs, which would otherwise be printed to stderr (we want to
@@ -288,3 +290,25 @@ pub fn diff(baseline: &Path, candidate: &Path, scenario: &str) -> anyhow::Result
          Candidate output:\n{string_candidate}\n"
     ))
 }
+
+/// A RAII-like object for enabling callgrind instruction counting.
+///
+/// Warning: must not be nested.
+///
+/// Instructions outside the scope of these objects are not counted.
+pub(crate) struct CountInstructions;
+
+impl CountInstructions {
+    pub(crate) fn start() -> Self {
+        #[cfg(target_os = "linux")]
+        crabgrind::callgrind::toggle_collect();
+        CountInstructions
+    }
+}
+
+impl Drop for CountInstructions {
+    fn drop(&mut self) {
+        #[cfg(target_os = "linux")]
+        crabgrind::callgrind::toggle_collect();
+    }
+}
diff --git a/ci-bench/src/main.rs b/ci-bench/src/main.rs
index e1ede865..dc1174d3 100644
--- a/ci-bench/src/main.rs
+++ b/ci-bench/src/main.rs
@@ -27,7 +27,7 @@ use crate::benchmark::{
     get_reported_instr_count, validate_benchmarks, Benchmark, BenchmarkKind, BenchmarkParams,
     ResumptionKind,
 };
-use crate::callgrind::CallgrindRunner;
+use crate::callgrind::{CallgrindRunner, CountInstructions};
 use crate::util::async_io::{self, AsyncRead, AsyncWrite};
 use crate::util::transport::{
     read_handshake_message, read_plaintext_to_end_bounded, send_handshake_message,
@@ -645,17 +645,20 @@ impl BenchStepper for ServerSideStepper<'_> {
 
 /// Runs the benchmark using the provided stepper
 async fn run_bench<T: BenchStepper>(mut stepper: T, kind: BenchmarkKind) -> anyhow::Result<()> {
-    let mut endpoint = stepper.handshake().await?;
-
     match kind {
         BenchmarkKind::Handshake(ResumptionKind::No) => {
-            // Nothing else to do here, since the handshake already happened
-            black_box(endpoint);
+            // Just count instructions for one handshake.
+            let _count = CountInstructions::start();
+            black_box(stepper.handshake().await?);
         }
         BenchmarkKind::Handshake(_) => {
-            // The handshake performed above was non-resumed, because the client didn't have a
-            // session ID / ticket; from now on we can perform resumed handshakes. We do it multiple
+            // The first handshake performed is non-resumed, because the client didn't have a
+            // session ID / ticket.  This is not measured.
+            stepper.handshake().await?;
+
+            // From now on we can perform resumed handshakes. We do it multiple
             // times, for reasons explained in the comments to `RESUMED_HANDSHAKE_RUNS`.
+            let _count = CountInstructions::start();
             for _ in 0..RESUMED_HANDSHAKE_RUNS {
                 // Wait for the endpoints to sync (i.e. the server must have discarded the previous
                 // connection and be ready for a new handshake, otherwise the client will start a
@@ -668,6 +671,9 @@ async fn run_bench<T: BenchStepper>(mut stepper: T, kind: BenchmarkKind) -> anyh
             }
         }
         BenchmarkKind::Transfer => {
+            // Measurement includes the transfer, but not the handshake.
+            let mut endpoint = stepper.handshake().await?;
+            let _count = CountInstructions::start();
             stepper
                 .transmit_data(&mut endpoint)
                 .await?;
author	Joe Birr-Pixton <jpixton@gmail.com>	2024-09-16 15:05:56 +0100
committer	Joe Birr-Pixton <jpixton@gmail.com>	2024-09-20 12:27:49 +0000
commit	7d08edbcd1f65ff8fa3b580dc931063657e0b680 (patch)
tree	6a0d08e3a6a59b7ef277994eab5e13b3a08cfa5d
parent	cd2910123d13ee9d5961bf15f7d1fccc410945f2 (diff)