summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Zhang <yuzhangyu@fb.com>2024-02-06 18:35:36 -0800
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2024-02-06 18:35:36 -0800
commite3e8fbb497240dca68820604ef463065146f9fe2 (patch)
tree00a3c119526c56a337c71be9d99943edd7396480
parent0088f777889bd627b4332e9851e15c2c460c89fa (diff)
Add a separate range classes for internal usage (#12071)
Summary: Introduce some different range classes `UserKeyRange` and `UserKeyRangePtr` to be used by internal implementation. The `Range` class is used in both public APIs like `DB::GetApproximateSizes`, `DB::GetApproximateMemTableStats`, `DB::GetPropertiesOfTablesInRange` etc and internal implementations like `ColumnFamilyData::RangesOverlapWithMemtables`, `VersionSet::GetPropertiesOfTablesInRange`. These APIs have different expectations of what keys this range class contain. Public API users are supposed to populate the range with the user keys without timestamp, in the same way that point lookup and range scan APIs' key input only expect the user key without timestamp. The internal APIs implementation expect a user key whose format is compatible with the user comparator, a.k.a a user key with the timestamp. This PR contains: 1) introducing counterpart range class `UserKeyRange` `UserKeyRangePtr` for internal implementation while leave the existing `Range` and `RangePtr` class only for public APIs. Internal implementations are updated to use this new class instead. 2) add user-defined timestamp support for `DB::GetPropertiesOfTablesInRange` API and `DeleteFilesInRanges` API. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12071 Test Plan: existing tests Added test for `DB::GetPropertiesOfTablesInRange` and `DeleteFilesInRanges` APIs for when user-defined timestamp is enabled. The change in external_file_ingestion_job doesn't have a user-defined timestamp enabled test case coverage, will add one in a follow up PR that adds file ingestion support for UDT. Reviewed By: ltamasi Differential Revision: D53292608 Pulled By: jowlyzhang fbshipit-source-id: 9a9279e23c640a6d8f8232636501a95aef7638b8
-rw-r--r--db/column_family.cc2
-rw-r--r--db/column_family.h2
-rw-r--r--db/compaction/compaction_job.cc8
-rw-r--r--db/db_compaction_test.cc102
-rw-r--r--db/db_impl/db_impl.cc47
-rw-r--r--db/db_impl/db_impl_compaction_flush.cc2
-rw-r--r--db/db_table_properties_test.cc145
-rw-r--r--db/dbformat.h27
-rw-r--r--db/external_sst_file_ingestion_job.cc39
-rw-r--r--db/version_set.cc8
-rw-r--r--db/version_set.h2
-rw-r--r--unreleased_history/new_features/udt_support_for_some_apis.md1
12 files changed, 275 insertions, 110 deletions
diff --git a/db/column_family.cc b/db/column_family.cc
index 1bdd8cf3d..94830ce7d 100644
--- a/db/column_family.cc
+++ b/db/column_family.cc
@@ -1179,7 +1179,7 @@ bool ColumnFamilyData::RangeOverlapWithCompaction(
}
Status ColumnFamilyData::RangesOverlapWithMemtables(
- const autovector<Range>& ranges, SuperVersion* super_version,
+ const autovector<UserKeyRange>& ranges, SuperVersion* super_version,
bool allow_data_in_errors, bool* overlap) {
assert(overlap != nullptr);
*overlap = false;
diff --git a/db/column_family.h b/db/column_family.h
index ecad83b0b..ce05c45a2 100644
--- a/db/column_family.h
+++ b/db/column_family.h
@@ -400,7 +400,7 @@ class ColumnFamilyData {
// duration of this function.
//
// Thread-safe
- Status RangesOverlapWithMemtables(const autovector<Range>& ranges,
+ Status RangesOverlapWithMemtables(const autovector<UserKeyRange>& ranges,
SuperVersion* super_version,
bool allow_data_in_errors, bool* overlap);
diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc
index 7464bd6fa..30a25d42a 100644
--- a/db/compaction/compaction_job.cc
+++ b/db/compaction/compaction_job.cc
@@ -450,14 +450,6 @@ void CompactionJob::ReleaseSubcompactionResources() {
ShrinkSubcompactionResources(extra_num_subcompaction_threads_reserved_);
}
-struct RangeWithSize {
- Range range;
- uint64_t size;
-
- RangeWithSize(const Slice& a, const Slice& b, uint64_t s = 0)
- : range(a, b), size(s) {}
-};
-
void CompactionJob::GenSubcompactionBoundaries() {
// The goal is to find some boundary keys so that we can evenly partition
// the compaction input data into max_subcompactions ranges.
diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc
index ba8261d20..7827f393b 100644
--- a/db/db_compaction_test.cc
+++ b/db/db_compaction_test.cc
@@ -1870,13 +1870,60 @@ TEST_F(DBCompactionTest, ManualCompactionWithUnorderedWrite) {
ASSERT_EQ(Get("foo"), "v2");
}
-TEST_F(DBCompactionTest, DeleteFileRange) {
+// Test params:
+// 1) whether to enable user-defined timestamps.
+class DBDeleteFileRangeTest : public DBTestBase,
+ public testing::WithParamInterface<bool> {
+ public:
+ DBDeleteFileRangeTest()
+ : DBTestBase("db_delete_file_range_test", /*env_do_fsync=*/true) {}
+
+ void SetUp() override { enable_udt_ = GetParam(); }
+
+ protected:
+ void PutKeyValue(const Slice& key, const Slice& value) {
+ if (enable_udt_) {
+ EXPECT_OK(db_->Put(WriteOptions(), key, min_ts_, value));
+ } else {
+ EXPECT_OK(Put(key, value));
+ }
+ }
+
+ std::string GetValue(const std::string& key) {
+ ReadOptions roptions;
+ std::string result;
+ if (enable_udt_) {
+ roptions.timestamp = &min_ts_;
+ }
+ Status s = db_->Get(roptions, key, &result);
+ EXPECT_TRUE(s.ok());
+ return result;
+ }
+
+ Status MaybeGetValue(const std::string& key, std::string* result) {
+ ReadOptions roptions;
+ if (enable_udt_) {
+ roptions.timestamp = &min_ts_;
+ }
+ Status s = db_->Get(roptions, key, result);
+ EXPECT_TRUE(s.IsNotFound() || s.ok());
+ return s;
+ }
+
+ bool enable_udt_ = false;
+ Slice min_ts_ = MinU64Ts();
+};
+
+TEST_P(DBDeleteFileRangeTest, DeleteFileRange) {
Options options = CurrentOptions();
options.write_buffer_size = 10 * 1024 * 1024;
options.max_bytes_for_level_multiplier = 2;
options.num_levels = 4;
options.level0_file_num_compaction_trigger = 3;
options.max_background_compactions = 3;
+ if (enable_udt_) {
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
+ }
DestroyAndReopen(options);
int32_t value_size = 10 * 1024; // 10 KB
@@ -1888,14 +1935,14 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
// file 1 [0 => 100]
for (int32_t i = 0; i < 100; i++) {
values[i] = rnd.RandomString(value_size);
- ASSERT_OK(Put(Key(i), values[i]));
+ PutKeyValue(Key(i), values[i]);
}
ASSERT_OK(Flush());
// file 2 [100 => 300]
for (int32_t i = 100; i < 300; i++) {
values[i] = rnd.RandomString(value_size);
- ASSERT_OK(Put(Key(i), values[i]));
+ PutKeyValue(Key(i), values[i]);
}
ASSERT_OK(Flush());
@@ -1911,7 +1958,7 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
// file 3 [ 0 => 200]
for (int32_t i = 0; i < 200; i++) {
values[i] = rnd.RandomString(value_size);
- ASSERT_OK(Put(Key(i), values[i]));
+ PutKeyValue(Key(i), values[i]);
}
ASSERT_OK(Flush());
@@ -1923,7 +1970,7 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
}
values[j] = rnd.RandomString(value_size);
- ASSERT_OK(Put(Key(j), values[j]));
+ PutKeyValue(Key(j), values[j]);
}
}
ASSERT_OK(Flush());
@@ -1948,11 +1995,10 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
int32_t deleted_count = 0;
for (int32_t i = 0; i < 4300; i++) {
if (i < 1000 || i > 2000) {
- ASSERT_EQ(Get(Key(i)), values[i]);
+ ASSERT_EQ(GetValue(Key(i)), values[i]);
} else {
- ReadOptions roptions;
std::string result;
- Status s = db_->Get(roptions, Key(i), &result);
+ Status s = MaybeGetValue(Key(i), &result);
ASSERT_TRUE(s.IsNotFound() || s.ok());
if (s.IsNotFound()) {
deleted_count++;
@@ -1982,7 +2028,7 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
for (int32_t i = 0; i < 4300; i++) {
ReadOptions roptions;
std::string result;
- ASSERT_TRUE(db_->Get(roptions, Key(i), &result).IsNotFound());
+ ASSERT_TRUE(MaybeGetValue(Key(i), &result).IsNotFound());
deleted_count2++;
}
ASSERT_GT(deleted_count2, deleted_count);
@@ -1990,13 +2036,16 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
ASSERT_GT(old_num_files, new_num_files);
}
-TEST_F(DBCompactionTest, DeleteFilesInRanges) {
+TEST_P(DBDeleteFileRangeTest, DeleteFilesInRanges) {
Options options = CurrentOptions();
options.write_buffer_size = 10 * 1024 * 1024;
options.max_bytes_for_level_multiplier = 2;
options.num_levels = 4;
options.max_background_compactions = 3;
options.disable_auto_compactions = true;
+ if (enable_udt_) {
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
+ }
DestroyAndReopen(options);
int32_t value_size = 10 * 1024; // 10 KB
@@ -2009,7 +2058,7 @@ TEST_F(DBCompactionTest, DeleteFilesInRanges) {
for (auto j = 0; j < 100; j++) {
auto k = i * 100 + j;
values[k] = rnd.RandomString(value_size);
- ASSERT_OK(Put(Key(k), values[k]));
+ PutKeyValue(Key(k), values[k]);
}
ASSERT_OK(Flush());
}
@@ -2024,7 +2073,7 @@ TEST_F(DBCompactionTest, DeleteFilesInRanges) {
for (auto i = 0; i < 10; i += 2) {
for (auto j = 0; j < 100; j++) {
auto k = i * 100 + j;
- ASSERT_OK(Put(Key(k), values[k]));
+ PutKeyValue(Key(k), values[k]);
}
ASSERT_OK(Flush());
}
@@ -2050,13 +2099,12 @@ TEST_F(DBCompactionTest, DeleteFilesInRanges) {
// Keys [0, 300) should not exist.
for (auto i = 0; i < 300; i++) {
- ReadOptions ropts;
std::string result;
- auto s = db_->Get(ropts, Key(i), &result);
+ auto s = MaybeGetValue(Key(i), &result);
ASSERT_TRUE(s.IsNotFound());
}
for (auto i = 300; i < 1000; i++) {
- ASSERT_EQ(Get(Key(i)), values[i]);
+ ASSERT_EQ(GetValue(Key(i)), values[i]);
}
}
@@ -2078,16 +2126,15 @@ TEST_F(DBCompactionTest, DeleteFilesInRanges) {
// Keys [600, 900) should not exist.
for (auto i = 600; i < 900; i++) {
- ReadOptions ropts;
std::string result;
- auto s = db_->Get(ropts, Key(i), &result);
+ auto s = MaybeGetValue(Key(i), &result);
ASSERT_TRUE(s.IsNotFound());
}
for (auto i = 300; i < 600; i++) {
- ASSERT_EQ(Get(Key(i)), values[i]);
+ ASSERT_EQ(GetValue(Key(i)), values[i]);
}
for (auto i = 900; i < 1000; i++) {
- ASSERT_EQ(Get(Key(i)), values[i]);
+ ASSERT_EQ(GetValue(Key(i)), values[i]);
}
}
@@ -2098,15 +2145,14 @@ TEST_F(DBCompactionTest, DeleteFilesInRanges) {
ASSERT_EQ("", FilesPerLevel(0));
for (auto i = 0; i < 1000; i++) {
- ReadOptions ropts;
std::string result;
- auto s = db_->Get(ropts, Key(i), &result);
+ auto s = MaybeGetValue(Key(i), &result);
ASSERT_TRUE(s.IsNotFound());
}
}
}
-TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
+TEST_P(DBDeleteFileRangeTest, DeleteFileRangeFileEndpointsOverlapBug) {
// regression test for #2833: groups of files whose user-keys overlap at the
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
// reappear, either because a new version of the key was removed, or a range
@@ -2118,6 +2164,9 @@ TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
Options options = CurrentOptions();
options.level0_file_num_compaction_trigger = kNumL0Files;
options.target_file_size_base = 1 << 10; // 1KB
+ if (enable_udt_) {
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
+ }
DestroyAndReopen(options);
// The snapshot prevents key 1 from having its old version dropped. The low
@@ -2141,8 +2190,8 @@ TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
std::string vals[kNumL0Files];
for (int i = 0; i < kNumL0Files; ++i) {
vals[i] = rnd.RandomString(kValSize);
- ASSERT_OK(Put(Key(i), vals[i]));
- ASSERT_OK(Put(Key(i + 1), vals[i]));
+ PutKeyValue(Key(i), vals[i]);
+ PutKeyValue(Key(i + 1), vals[i]);
ASSERT_OK(Flush());
if (i == 0) {
snapshot = db_->GetSnapshot();
@@ -2155,11 +2204,14 @@ TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
std::string begin_str = Key(0), end_str = Key(1);
Slice begin = begin_str, end = end_str;
ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
- ASSERT_EQ(vals[1], Get(Key(1)));
+ ASSERT_EQ(vals[1], GetValue(Key(1)));
db_->ReleaseSnapshot(snapshot);
}
+INSTANTIATE_TEST_CASE_P(DBDeleteFileRangeTest, DBDeleteFileRangeTest,
+ ::testing::Bool());
+
TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) {
int32_t trivial_move = 0;
int32_t non_trivial_move = 0;
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
index 86c1f989b..b458913c0 100644
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@@ -4370,7 +4370,25 @@ Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family,
// TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
- auto s = version->GetPropertiesOfTablesInRange(read_options, range, n, props);
+ const Comparator* const ucmp = cfd->user_comparator();
+ assert(ucmp);
+ size_t ts_sz = ucmp->timestamp_size();
+
+ autovector<UserKeyRange> ukey_ranges;
+ std::vector<std::string> keys;
+ ukey_ranges.reserve(n);
+ keys.reserve(2 * n);
+ // Add timestamp if needed
+ for (size_t i = 0; i < n; i++) {
+ auto [start, limit] = MaybeAddTimestampsToRange(
+ &range[i].start, &range[i].limit, ts_sz, &keys.emplace_back(),
+ &keys.emplace_back(), /*exclusive_end=*/false);
+ assert(start.has_value());
+ assert(limit.has_value());
+ ukey_ranges.emplace_back(start.value(), limit.value());
+ }
+ auto s =
+ version->GetPropertiesOfTablesInRange(read_options, ukey_ranges, props);
// Decrement the ref count
mutex_.Lock();
@@ -4887,6 +4905,24 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
Status status = Status::OK();
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
ColumnFamilyData* cfd = cfh->cfd();
+ const Comparator* ucmp = cfd->user_comparator();
+ assert(ucmp);
+ const size_t ts_sz = ucmp->timestamp_size();
+ autovector<UserKeyRangePtr> ukey_ranges;
+ std::vector<std::string> keys;
+ std::vector<Slice> key_slices;
+ ukey_ranges.reserve(n);
+ keys.reserve(2 * n);
+ key_slices.reserve(2 * n);
+ for (size_t i = 0; i < n; i++) {
+ auto [start, limit] = MaybeAddTimestampsToRange(
+ ranges[i].start, ranges[i].limit, ts_sz, &keys.emplace_back(),
+ &keys.emplace_back(), !include_end);
+ assert((ranges[i].start != nullptr) == start.has_value());
+ assert((ranges[i].limit != nullptr) == limit.has_value());
+ ukey_ranges.emplace_back(start, limit);
+ }
+
VersionEdit edit;
std::set<FileMetaData*> deleted_files;
JobContext job_context(next_job_id_.fetch_add(1), true);
@@ -4895,8 +4931,9 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
Version* input_version = cfd->current();
auto* vstorage = input_version->storage_info();
- for (size_t r = 0; r < n; r++) {
- auto begin = ranges[r].start, end = ranges[r].limit;
+ for (const auto& range : ukey_ranges) {
+ auto begin = range.start.has_value() ? &range.start.value() : nullptr;
+ auto end = range.limit.has_value() ? &range.limit.value() : nullptr;
for (int i = 1; i < cfd->NumberLevels(); i++) {
if (vstorage->LevelFiles(i).empty() ||
!vstorage->OverlapInLevel(i, begin, end)) {
@@ -4930,8 +4967,8 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
continue;
}
if (!include_end && end != nullptr &&
- cfd->user_comparator()->Compare(level_file->largest.user_key(),
- *end) == 0) {
+ (ucmp->CompareWithoutTimestamp(level_file->largest.user_key(),
+ *end) == 0)) {
continue;
}
edit.SetColumnFamily(cfd->GetID());
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index a613c2aa2..5c5c769e9 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -1140,7 +1140,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
// TODO(ajkr): We could also optimize away the flush in certain cases where
// one/both sides of the interval are unbounded. But it requires more
// changes to RangesOverlapWithMemtables.
- Range range(*begin, *end);
+ UserKeyRange range(*begin, *end);
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
s = cfd->RangesOverlapWithMemtables(
{range}, super_version, immutable_db_options_.allow_data_in_errors,
diff --git a/db/db_table_properties_test.cc b/db/db_table_properties_test.cc
index 41b14111d..e3d56423f 100644
--- a/db/db_table_properties_test.cc
+++ b/db/db_table_properties_test.cc
@@ -59,9 +59,6 @@ class DBTablePropertiesTest : public DBTestBase,
public:
DBTablePropertiesTest()
: DBTestBase("db_table_properties_test", /*env_do_fsync=*/false) {}
- TablePropertiesCollection TestGetPropertiesOfTablesInRange(
- std::vector<Range> ranges, std::size_t* num_properties = nullptr,
- std::size_t* num_files = nullptr);
};
TEST_F(DBTablePropertiesTest, GetPropertiesOfAllTablesTest) {
@@ -236,49 +233,109 @@ TEST_F(DBTablePropertiesTest, CreateOnDeletionCollectorFactory) {
ASSERT_EQ(0.5, del_factory->GetDeletionRatio());
}
-TablePropertiesCollection
-DBTablePropertiesTest::TestGetPropertiesOfTablesInRange(
- std::vector<Range> ranges, std::size_t* num_properties,
- std::size_t* num_files) {
- // Since we deref zero element in the vector it can not be empty
- // otherwise we pass an address to some random memory
- EXPECT_GT(ranges.size(), 0U);
- // run the query
- TablePropertiesCollection props;
- EXPECT_OK(db_->GetPropertiesOfTablesInRange(
- db_->DefaultColumnFamily(), ranges.data(), ranges.size(), &props));
-
- // Make sure that we've received properties for those and for those files
- // only which fall within requested ranges
- std::vector<LiveFileMetaData> vmd;
- db_->GetLiveFilesMetaData(&vmd);
- for (auto& md : vmd) {
- std::string fn = md.db_path + md.name;
- bool in_range = false;
- for (auto& r : ranges) {
- // smallestkey < limit && largestkey >= start
- if (r.limit.compare(md.smallestkey) >= 0 &&
- r.start.compare(md.largestkey) <= 0) {
- in_range = true;
- EXPECT_GT(props.count(fn), 0);
- }
+// Test params:
+// 1) whether to enable user-defined timestamps
+class DBTablePropertiesInRangeTest : public DBTestBase,
+ public testing::WithParamInterface<bool> {
+ public:
+ DBTablePropertiesInRangeTest()
+ : DBTestBase("db_table_properties_in_range_test",
+ /*env_do_fsync=*/false) {}
+
+ void SetUp() override { enable_udt_ = GetParam(); }
+
+ protected:
+ void PutKeyValue(const Slice& key, const Slice& value) {
+ if (enable_udt_) {
+ EXPECT_OK(db_->Put(WriteOptions(), key, min_ts_, value));
+ } else {
+ EXPECT_OK(Put(key, value));
}
- if (!in_range) {
- EXPECT_EQ(props.count(fn), 0);
+ }
+
+ std::string GetValue(const std::string& key) {
+ ReadOptions roptions;
+ std::string result;
+ if (enable_udt_) {
+ roptions.timestamp = &min_ts_;
}
+ Status s = db_->Get(roptions, key, &result);
+ EXPECT_TRUE(s.ok());
+ return result;
}
- if (num_properties) {
- *num_properties = props.size();
+ Status MaybeGetValue(const std::string& key, std::string* result) {
+ ReadOptions roptions;
+ if (enable_udt_) {
+ roptions.timestamp = &min_ts_;
+ }
+ Status s = db_->Get(roptions, key, result);
+ EXPECT_TRUE(s.IsNotFound() || s.ok());
+ return s;
}
- if (num_files) {
- *num_files = vmd.size();
+ TablePropertiesCollection TestGetPropertiesOfTablesInRange(
+ std::vector<Range> ranges, std::size_t* num_properties = nullptr,
+ std::size_t* num_files = nullptr) {
+ // Since we deref zero element in the vector it can not be empty
+ // otherwise we pass an address to some random memory
+ EXPECT_GT(ranges.size(), 0U);
+ // run the query
+ TablePropertiesCollection props;
+ ColumnFamilyHandle* default_cf = db_->DefaultColumnFamily();
+ EXPECT_OK(db_->GetPropertiesOfTablesInRange(default_cf, &ranges[0],
+ ranges.size(), &props));
+
+ const Comparator* ucmp = default_cf->GetComparator();
+ EXPECT_NE(ucmp, nullptr);
+ const size_t ts_sz = ucmp->timestamp_size();
+ const size_t range_size = ranges.size();
+ autovector<UserKeyRange> ukey_ranges;
+ std::vector<std::string> keys;
+ ukey_ranges.reserve(range_size);
+ keys.reserve(range_size * 2);
+ for (auto& r : ranges) {
+ auto [start, limit] = MaybeAddTimestampsToRange(
+ &r.start, &r.limit, ts_sz, &keys.emplace_back(), &keys.emplace_back(),
+ /*exclusive_end=*/false);
+ EXPECT_TRUE(start.has_value());
+ EXPECT_TRUE(limit.has_value());
+ ukey_ranges.emplace_back(start.value(), limit.value());
+ }
+ // Make sure that we've received properties for those and for those files
+ // only which fall within requested ranges
+ std::vector<LiveFileMetaData> vmd;
+ db_->GetLiveFilesMetaData(&vmd);
+ for (auto& md : vmd) {
+ std::string fn = md.db_path + md.name;
+ bool in_range = false;
+ for (auto& r : ukey_ranges) {
+ if (ucmp->Compare(r.start, md.largestkey) <= 0 &&
+ ucmp->Compare(r.limit, md.smallestkey) >= 0) {
+ in_range = true;
+ EXPECT_GT(props.count(fn), 0);
+ }
+ }
+ if (!in_range) {
+ EXPECT_EQ(props.count(fn), 0);
+ }
+ }
+
+ if (num_properties) {
+ *num_properties = props.size();
+ }
+
+ if (num_files) {
+ *num_files = vmd.size();
+ }
+ return props;
}
- return props;
-}
-TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) {
+ bool enable_udt_ = false;
+ Slice min_ts_ = MinU64Ts();
+};
+
+TEST_P(DBTablePropertiesInRangeTest, GetPropertiesOfTablesInRange) {
// Fixed random sead
Random rnd(301);
@@ -296,17 +353,21 @@ TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) {
options.hard_pending_compaction_bytes_limit = 16 * 1024;
options.num_levels = 8;
options.env = env_;
+ bool udt_enabled = GetParam();
+ if (udt_enabled) {
+ options.comparator = test::BytewiseComparatorWithU64TsWrapper();
+ }
DestroyAndReopen(options);
// build a decent LSM
for (int i = 0; i < 10000; i++) {
- ASSERT_OK(Put(test::RandomKey(&rnd, 5), rnd.RandomString(102)));
+ PutKeyValue(test::RandomKey(&rnd, 5), rnd.RandomString(102));
}
ASSERT_OK(Flush());
ASSERT_OK(dbfull()->TEST_WaitForCompact());
if (NumTableFilesAtLevel(0) == 0) {
- ASSERT_OK(Put(test::RandomKey(&rnd, 5), rnd.RandomString(102)));
+ PutKeyValue(test::RandomKey(&rnd, 5), rnd.RandomString(102));
ASSERT_OK(Flush());
}
@@ -371,6 +432,10 @@ TEST_F(DBTablePropertiesTest, GetPropertiesOfTablesInRange) {
}
}
+INSTANTIATE_TEST_CASE_P(DBTablePropertiesInRangeTest,
+ DBTablePropertiesInRangeTest,
+ ::testing::Values(true, false));
+
TEST_F(DBTablePropertiesTest, GetColumnFamilyNameProperty) {
std::string kExtraCfName = "pikachu";
CreateAndReopenWithCF({kExtraCfName}, CurrentOptions());
diff --git a/db/dbformat.h b/db/dbformat.h
index 5f69cc00b..4ae509973 100644
--- a/db/dbformat.h
+++ b/db/dbformat.h
@@ -11,6 +11,7 @@
#include <stdio.h>
#include <memory>
+#include <optional>
#include <string>
#include <utility>
@@ -77,6 +78,32 @@ enum ValueType : unsigned char {
extern const ValueType kValueTypeForSeek;
extern const ValueType kValueTypeForSeekForPrev;
+// A range of user keys used internally by RocksDB. Also see `Range` used by
+// public APIs.
+struct UserKeyRange {
+ // In case of user_defined timestamp, if enabled, `start` and `limit` should
+ // include user_defined timestamps.
+ Slice start;
+ Slice limit;
+
+ UserKeyRange() = default;
+ UserKeyRange(const Slice& s, const Slice& l) : start(s), limit(l) {}
+};
+
+// A range of user keys used internally by RocksDB. Also see `RangePtr` used by
+// public APIs.
+struct UserKeyRangePtr {
+ // In case of user_defined timestamp, if enabled, `start` and `limit` should
+ // point to key with timestamp part.
+ // An optional range start, if missing, indicating a start before all keys.
+ std::optional<Slice> start;
+ // An optional range end, if missing, indicating an end after all keys.
+ std::optional<Slice> limit;
+
+ UserKeyRangePtr(const std::optional<Slice>& s, const std::optional<Slice>& l)
+ : start(s), limit(l) {}
+};
+
// Checks whether a type is an inline value type
// (i.e. a type used in memtable skiplist and sst file datablock).
inline bool IsValueType(ValueType t) {
diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc
index a5a607462..a07bb15d3 100644
--- a/db/external_sst_file_ingestion_job.cc
+++ b/db/external_sst_file_ingestion_job.cc
@@ -327,31 +327,22 @@ Status ExternalSstFileIngestionJob::Prepare(
Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
SuperVersion* super_version) {
- autovector<Range> ranges;
- autovector<std::string> keys;
+ size_t n = files_to_ingest_.size();
+ autovector<UserKeyRange> ranges;
+ std::vector<std::string> keys;
+ ranges.reserve(n);
+ keys.reserve(2 * n);
size_t ts_sz = cfd_->user_comparator()->timestamp_size();
- if (ts_sz) {
- // Check all ranges [begin, end] inclusively. Add maximum
- // timestamp to include all `begin` keys, and add minimal timestamp to
- // include all `end` keys.
- for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
- std::string begin_str;
- std::string end_str;
- AppendUserKeyWithMaxTimestamp(
- &begin_str, file_to_ingest.smallest_internal_key.user_key(), ts_sz);
- AppendUserKeyWithMinTimestamp(
- &end_str, file_to_ingest.largest_internal_key.user_key(), ts_sz);
- keys.emplace_back(std::move(begin_str));
- keys.emplace_back(std::move(end_str));
- }
- for (size_t i = 0; i < files_to_ingest_.size(); ++i) {
- ranges.emplace_back(keys[2 * i], keys[2 * i + 1]);
- }
- } else {
- for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
- ranges.emplace_back(file_to_ingest.smallest_internal_key.user_key(),
- file_to_ingest.largest_internal_key.user_key());
- }
+ // Check all ranges [begin, end] inclusively.
+ for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
+ Slice start_ukey = file_to_ingest.smallest_internal_key.user_key();
+ Slice end_ukey = file_to_ingest.largest_internal_key.user_key();
+ auto [start, end] = MaybeAddTimestampsToRange(
+ &start_ukey, &end_ukey, ts_sz, &keys.emplace_back(),
+ &keys.emplace_back(), /*exclusive_end=*/false);
+ assert(start.has_value());
+ assert(end.has_value());
+ ranges.emplace_back(start.value(), end.value());
}
Status status = cfd_->RangesOverlapWithMemtables(
ranges, super_version, db_options_.allow_data_in_errors, flush_needed);
diff --git a/db/version_set.cc b/db/version_set.cc
index 4d6a8ba32..b6814556f 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -1717,13 +1717,13 @@ Status Version::GetPropertiesOfAllTables(const ReadOptions& read_options,
}
Status Version::GetPropertiesOfTablesInRange(
- const ReadOptions& read_options, const Range* range, std::size_t n,
+ const ReadOptions& read_options, const autovector<UserKeyRange>& ranges,
TablePropertiesCollection* props) const {
for (int level = 0; level < storage_info_.num_non_empty_levels(); level++) {
- for (decltype(n) i = 0; i < n; i++) {
+ for (const auto& range : ranges) {
// Convert user_key into a corresponding internal key.
- InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
- InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
+ InternalKey k1(range.start, kMaxSequenceNumber, kValueTypeForSeek);
+ InternalKey k2(range.limit, kMaxSequenceNumber, kValueTypeForSeek);
std::vector<FileMetaData*> files;
storage_info_.GetOverlappingInputs(level, &k1, &k2, &files, -1, nullptr,
false);
diff --git a/db/version_set.h b/db/version_set.h
index 376e220a9..12304348f 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -980,7 +980,7 @@ class Version {
Status GetPropertiesOfAllTables(const ReadOptions& read_options,
TablePropertiesCollection* props, int level);
Status GetPropertiesOfTablesInRange(const ReadOptions& read_options,
- const Range* range, std::size_t n,
+ const autovector<UserKeyRange>& ranges,
TablePropertiesCollection* props) const;
// Print summary of range delete tombstones in SST files into out_str,
diff --git a/unreleased_history/new_features/udt_support_for_some_apis.md b/unreleased_history/new_features/udt_support_for_some_apis.md
new file mode 100644
index 000000000..88bf2a160
--- /dev/null
+++ b/unreleased_history/new_features/udt_support_for_some_apis.md
@@ -0,0 +1 @@
+Add support for user-defined timestamps in APIs `DeleteFilesInRanges` and `GetPropertiesOfTablesInRange`. \ No newline at end of file