Fix UB/crash in new SeqnoToTimeMapping::CopyFromSeqnoRange (#12293)

Summary: After https://github.com/facebook/rocksdb/issues/12253 this function has crashed in the crash test, in its call to `std::copy`. I haven't reproduced the crash directly, but `std::copy` probably has undefined behavior if the starting iterator is after the ending iterator, which was possible. I've fixed the logic to deal with that case and to add an assertion to check that precondition of `std::copy` (which appears can be unchecked by `std::copy` itself even with UBSAN+ASAN). Also added some unit tests etc. that were unfinished for https://github.com/facebook/rocksdb/issues/12253, and slightly tweak SeqnoToTimeMapping::EnforceMaxTimeSpan handling of zero time span case. This is intended for patching 8.11. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12293 Test Plan: tests added. Will trigger ~20 runs of the crash test job that saw the crash. https://fburl.com/ci/5iiizvfa Reviewed By: jowlyzhang Differential Revision: D53090422 Pulled By: pdillinger fbshipit-source-id: 69d60b1847d9c7e4ae62b153011c2040405db461
author: Peter Dillinger <peterd@meta.com> 2024-01-25 11:27:15 -0800
committer: Peter Dillinger <peterd@meta.com> 2024-01-25 13:26:31 -0800
commit: d9e7f6a3b9087e67cbbede41abe39d8e10d8b191 (patch)
tree: ab462176a9fb95bb05ce1f9337bb81e48a3e437b
parent: bf61a59a5bf9115b5a9362400aea4900554c31f7 (diff)
3 files changed, 215 insertions, 11 deletions
diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc
index 45796a2d5..23b0874a7 100644
--- a/db/seqno_time_test.cc
+++ b/db/seqno_time_test.cc
@@ -1052,6 +1052,114 @@ TEST_F(SeqnoTimeTest, MappingAppend) {
   ASSERT_EQ(test.TEST_GetLastEntry(), P({11, 250}));
 }
 
+TEST_F(SeqnoTimeTest, CapacityLimits) {
+  using P = SeqnoToTimeMapping::SeqnoTimePair;
+  SeqnoToTimeMapping test;
+
+  test.SetCapacity(3);
+  EXPECT_TRUE(test.Append(10, 300));
+  EXPECT_TRUE(test.Append(20, 400));
+  EXPECT_TRUE(test.Append(30, 500));
+  EXPECT_TRUE(test.Append(40, 600));
+  // Capacity 3 is small enough that the non-strict limit is
+  // equal to the strict limit.
+  EXPECT_EQ(3U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
+
+  // Same for Capacity 2
+  test.SetCapacity(2);
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
+
+  EXPECT_TRUE(test.Append(50, 700));
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
+
+  // Capacity 1 is difficult to work with internally, so is
+  // coerced to 2.
+  test.SetCapacity(1);
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
+
+  EXPECT_TRUE(test.Append(60, 800));
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({60, 800}));
+
+  // Capacity 0 means throw everything away
+  test.SetCapacity(0);
+  EXPECT_EQ(0U, test.Size());
+
+  EXPECT_FALSE(test.Append(70, 900));
+  EXPECT_EQ(0U, test.Size());
+
+  // Unlimited capacity
+  test.SetCapacity(UINT64_MAX);
+  for (unsigned i = 1; i <= 10101U; i++) {
+    EXPECT_TRUE(test.Append(i, 11U * i));
+  }
+  EXPECT_EQ(10101U, test.Size());
+}
+
+TEST_F(SeqnoTimeTest, TimeSpanLimits) {
+  SeqnoToTimeMapping test;
+
+  // Default: no limit
+  for (unsigned i = 1; i <= 63U; i++) {
+    EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
+  }
+  // None dropped.
+  EXPECT_EQ(63U, test.Size());
+
+  test.Clear();
+
+  // Explicit no limit
+  test.SetMaxTimeSpan(UINT64_MAX);
+  for (unsigned i = 1; i <= 63U; i++) {
+    EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
+  }
+  // None dropped.
+  EXPECT_EQ(63U, test.Size());
+
+  // We generally keep 2 entries as long as the configured max time span
+  // is non-zero
+  test.SetMaxTimeSpan(10);
+  EXPECT_EQ(2U, test.Size());
+
+  test.SetMaxTimeSpan(1);
+  EXPECT_EQ(2U, test.Size());
+
+  // But go down to 1 entry if the max time span is zero
+  test.SetMaxTimeSpan(0);
+  EXPECT_EQ(1U, test.Size());
+
+  EXPECT_TRUE(test.Append(2000, (uint64_t{1} << 63) + 42U));
+  EXPECT_EQ(1U, test.Size());
+
+  test.Clear();
+
+  // Test more typical behavior. Note that one entry at or beyond the max span
+  // is kept.
+  test.SetMaxTimeSpan(100);
+  EXPECT_TRUE(test.Append(1001, 123));
+  EXPECT_TRUE(test.Append(1002, 134));
+  EXPECT_TRUE(test.Append(1003, 150));
+  EXPECT_TRUE(test.Append(1004, 189));
+  EXPECT_TRUE(test.Append(1005, 220));
+  EXPECT_EQ(5U, test.Size());
+  EXPECT_TRUE(test.Append(1006, 233));
+  EXPECT_EQ(6U, test.Size());
+  EXPECT_TRUE(test.Append(1007, 234));
+  EXPECT_EQ(6U, test.Size());
+  EXPECT_TRUE(test.Append(1008, 235));
+  EXPECT_EQ(7U, test.Size());
+  EXPECT_TRUE(test.Append(1009, 300));
+  EXPECT_EQ(6U, test.Size());
+  EXPECT_TRUE(test.Append(1010, 350));
+  EXPECT_EQ(3U, test.Size());
+  EXPECT_TRUE(test.Append(1011, 470));
+  EXPECT_EQ(2U, test.Size());
+}
+
 TEST_F(SeqnoTimeTest, ProximalFunctions) {
   SeqnoToTimeMapping test;
   test.SetCapacity(10);
@@ -1201,6 +1309,93 @@ TEST_F(SeqnoTimeTest, PrePopulate) {
   }
 }
 
+TEST_F(SeqnoTimeTest, CopyFromSeqnoRange) {
+  SeqnoToTimeMapping test_from;
+  SeqnoToTimeMapping test_to;
+
+  // With zero to draw from
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 100, 100);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  // With one to draw from
+  EXPECT_TRUE(test_from.Append(10, 500));
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // Includes one entry before range
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 100, 100);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // Includes one entry before range (even if somewhat nonsensical)
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 9);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 10);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // With more to draw from
+  EXPECT_TRUE(test_from.Append(20, 600));
+  EXPECT_TRUE(test_from.Append(30, 700));
+  EXPECT_TRUE(test_from.Append(40, 800));
+  EXPECT_TRUE(test_from.Append(50, 900));
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
+  EXPECT_EQ(test_to.Size(), 5U);
+
+  // Includes one entry before range
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 100, 100);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 19, 19);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // Includes one entry before range (even if somewhat nonsensical)
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 9);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 10);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 20, 20);
+  EXPECT_EQ(test_to.Size(), 2U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 20, 29);
+  EXPECT_EQ(test_to.Size(), 2U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 20, 30);
+  EXPECT_EQ(test_to.Size(), 3U);
+}
+
 TEST_F(SeqnoTimeTest, EnforceWithNow) {
   constexpr uint64_t kMaxTimeSpan = 420;
   SeqnoToTimeMapping test;
diff --git a/db/seqno_to_time_mapping.cc b/db/seqno_to_time_mapping.cc
index 7fd11e90a..e7bee3e46 100644
--- a/db/seqno_to_time_mapping.cc
+++ b/db/seqno_to_time_mapping.cc
@@ -72,20 +72,16 @@ SequenceNumber SeqnoToTimeMapping::GetProximalSeqnoBeforeTime(
 void SeqnoToTimeMapping::EnforceMaxTimeSpan(uint64_t now) {
   assert(enforced_);  // at least sorted
   uint64_t cutoff_time;
+  if (pairs_.size() <= 1) {
+    return;
+  }
   if (now > 0) {
-    if (pairs_.size() <= 1) {
-      return;
-    }
     if (now < max_time_span_) {
       // Nothing eligible to prune / avoid underflow
       return;
     }
     cutoff_time = now - max_time_span_;
   } else {
-    if (pairs_.size() <= 2) {
-      // Need to keep at least two if we don't know the current time
-      return;
-    }
     const auto& last = pairs_.back();
     if (last.time < max_time_span_) {
       // Nothing eligible to prune / avoid underflow
@@ -108,7 +104,6 @@ void SeqnoToTimeMapping::EnforceCapacity(bool strict) {
     return;
   }
   // Treat cap of 1 as 2 to work with the below algorithm (etc.)
-  // TODO: unit test
   if (strict_cap == 1) {
     strict_cap = 2;
   }
@@ -400,12 +395,16 @@ void SeqnoToTimeMapping::CopyFromSeqnoRange(const SeqnoToTimeMapping& src,
                                             SequenceNumber to_seqno) {
   bool orig_empty = Empty();
   auto src_it = src.FindGreaterEqSeqno(from_seqno);
+  // Allow nonsensical ranges like [1000, 0] which might show up e.g. for
+  // an SST file with no entries.
+  auto src_it_end =
+      to_seqno < from_seqno ? src_it : src.FindGreaterSeqno(to_seqno);
   // To best answer GetProximalTimeBeforeSeqno(from_seqno) we need an entry
   // with a seqno before that (if available)
   if (src_it != src.pairs_.begin()) {
     --src_it;
   }
-  auto src_it_end = src.FindGreaterSeqno(to_seqno);
+  assert(src_it <= src_it_end);
   std::copy(src_it, src_it_end, std::back_inserter(pairs_));
 
   if (!orig_empty || max_time_span_ < UINT64_MAX || capacity_ < UINT64_MAX) {
@@ -420,6 +419,7 @@ bool SeqnoToTimeMapping::Append(SequenceNumber seqno, uint64_t time) {
   bool added = false;
   if (seqno == 0) {
     // skip seq number 0, which may have special meaning, like zeroed out data
+    // TODO: consider changing?
   } else if (pairs_.empty()) {
     enforced_ = true;
     pairs_.push_back({seqno, time});
diff --git a/db/seqno_to_time_mapping.h b/db/seqno_to_time_mapping.h
index d30a991d6..0332d2f90 100644
--- a/db/seqno_to_time_mapping.h
+++ b/db/seqno_to_time_mapping.h
@@ -125,12 +125,13 @@ class SeqnoToTimeMapping {
   // under enforcement mode, the structure will maintian only one entry older
   // than the newest entry time minus max_time_span, so that
   // GetProximalSeqnoBeforeTime queries back to that time return a good result.
-  // UINT64_MAX == unlimited. Returns *this.
+  // UINT64_MAX == unlimited. 0 == retain just one latest entry. Returns *this.
   SeqnoToTimeMapping& SetMaxTimeSpan(uint64_t max_time_span);
 
   // Set the nominal capacity under enforcement mode. The structure is allowed
   // to grow some reasonable fraction larger but will automatically compact
-  // down to this size. UINT64_MAX == unlimited. Returns *this.
+  // down to this size. UINT64_MAX == unlimited. 0 == retain nothing.
+  // Returns *this.
   SeqnoToTimeMapping& SetCapacity(uint64_t capacity);
 
   // ==== Modifiers, enforced ==== //
@@ -243,6 +244,14 @@ class SeqnoToTimeMapping {
 
   std::deque<SeqnoTimePair> pairs_;
 
+  // Whether this object is in the "enforced" state. Between calls to public
+  // functions, enforced_==true means that
+  // * `pairs_` is sorted
+  // * The capacity limit (non-strict) is met
+  // * The time span limit is met
+  // However, some places within the implementation (Append()) will temporarily
+  // violate those last two conditions while enforced_==true. See also the
+  // Enforce*() and Sort*() private functions below.
   bool enforced_ = true;
 
   void EnforceMaxTimeSpan(uint64_t now = 0);
author	Peter Dillinger <peterd@meta.com>	2024-01-25 11:27:15 -0800
committer	Peter Dillinger <peterd@meta.com>	2024-01-25 13:26:31 -0800
commit	d9e7f6a3b9087e67cbbede41abe39d8e10d8b191 (patch)
tree	ab462176a9fb95bb05ce1f9337bb81e48a3e437b
parent	bf61a59a5bf9115b5a9362400aea4900554c31f7 (diff)