summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--db/db_basic_test.cc57
-rw-r--r--db/db_impl/compacted_db_impl.cc92
-rw-r--r--db/db_impl/compacted_db_impl.h21
-rw-r--r--db/db_impl/db_impl.cc308
-rw-r--r--db/db_impl/db_impl.h26
-rw-r--r--db/db_test.cc18
-rw-r--r--db/db_with_timestamp_basic_test.cc6
-rw-r--r--include/rocksdb/db.h247
-rw-r--r--include/rocksdb/utilities/stackable_db.h22
-rw-r--r--java/src/test/java/org/rocksdb/VerifyChecksumsTest.java5
-rw-r--r--table/block_based/block_based_table_reader_sync_and_async.h1
-rw-r--r--unreleased_history/public_api_changes/consolidate_get_mget.md1
-rw-r--r--utilities/blob_db/blob_db.h33
-rw-r--r--utilities/blob_db/blob_db_impl.cc64
-rw-r--r--utilities/blob_db/blob_db_impl.h9
-rw-r--r--utilities/transactions/write_prepared_txn_db.cc43
-rw-r--r--utilities/transactions/write_prepared_txn_db.h11
-rw-r--r--utilities/ttl/db_ttl_impl.cc36
-rw-r--r--utilities/ttl/db_ttl_impl.h12
19 files changed, 328 insertions, 684 deletions
diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc
index f20706333..9c2af8358 100644
--- a/db/db_basic_test.cc
+++ b/db/db_basic_test.cc
@@ -4504,68 +4504,35 @@ TEST_P(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
SetTimeElapseOnlySleepOnReopen(&options);
ReopenWithColumnFamilies(GetCFNames(), options);
- // Test the non-batched version of MultiGet with multiple column
- // families
+ // Test batched MultiGet with an IO delay in the first data block read.
+ // Both keys in the first CF should succeed as they're in the same data
+ // block and would form one batch, and we check for deadline between
+ // batches.
std::vector<std::string> key_str;
size_t i;
- for (i = 0; i < 5; ++i) {
+ for (i = 0; i < 10; ++i) {
key_str.emplace_back(Key(static_cast<int>(i)));
}
std::vector<ColumnFamilyHandle*> cfs(key_str.size());
- ;
std::vector<Slice> keys(key_str.size());
- std::vector<std::string> values(key_str.size());
+ std::vector<PinnableSlice> pin_values(keys.size());
+
for (i = 0; i < key_str.size(); ++i) {
- cfs[i] = handles_[i];
+ // 2 keys per CF
+ cfs[i] = handles_[i / 2];
keys[i] = Slice(key_str[i].data(), key_str[i].size());
}
-
ReadOptions ro;
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
ro.async_io = GetParam();
// Delay the first IO
fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0);
- std::vector<Status> statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
- // The first key is successful because we check after the lookup, but
- // subsequent keys fail due to deadline exceeded
- CheckStatus(statuses, 1);
-
- // Clear the cache
- cache->SetCapacity(0);
- cache->SetCapacity(1048576);
- // Test non-batched Multiget with multiple column families and
- // introducing an IO delay in one of the middle CFs
- key_str.clear();
- for (i = 0; i < 10; ++i) {
- key_str.emplace_back(Key(static_cast<int>(i)));
- }
- cfs.resize(key_str.size());
- keys.resize(key_str.size());
- values.resize(key_str.size());
- for (i = 0; i < key_str.size(); ++i) {
- // 2 keys per CF
- cfs[i] = handles_[i / 2];
- keys[i] = Slice(key_str[i].data(), key_str[i].size());
- }
- ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
- fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 1);
- statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
- CheckStatus(statuses, 3);
-
- // Test batched MultiGet with an IO delay in the first data block read.
- // Both keys in the first CF should succeed as they're in the same data
- // block and would form one batch, and we check for deadline between
- // batches.
- std::vector<PinnableSlice> pin_values(keys.size());
- cache->SetCapacity(0);
- cache->SetCapacity(1048576);
- statuses.clear();
- statuses.resize(keys.size());
- ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
- fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0);
+ std::vector<Status> statuses(key_str.size());
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
pin_values.data(), statuses.data());
+ // The first key is successful because we check after the lookup, but
+ // subsequent keys fail due to deadline exceeded
CheckStatus(statuses, 2);
// Similar to the previous one, but an IO delay in the third CF data block
diff --git a/db/db_impl/compacted_db_impl.cc b/db/db_impl/compacted_db_impl.cc
index 6c714e1d9..d1c2db17b 100644
--- a/db/db_impl/compacted_db_impl.cc
+++ b/db/db_impl/compacted_db_impl.cc
@@ -33,12 +33,6 @@ size_t CompactedDBImpl::FindFile(const Slice& key) {
files_.files);
}
-Status CompactedDBImpl::Get(const ReadOptions& options, ColumnFamilyHandle*,
- const Slice& key, PinnableSlice* value) {
- return Get(options, /*column_family*/ nullptr, key, value,
- /*timestamp*/ nullptr);
-}
-
Status CompactedDBImpl::Get(const ReadOptions& _read_options,
ColumnFamilyHandle*, const Slice& key,
PinnableSlice* value, std::string* timestamp) {
@@ -108,62 +102,59 @@ Status CompactedDBImpl::Get(const ReadOptions& _read_options,
return Status::NotFound();
}
-std::vector<Status> CompactedDBImpl::MultiGet(
- const ReadOptions& options, const std::vector<ColumnFamilyHandle*>&,
- const std::vector<Slice>& keys, std::vector<std::string>* values) {
- return MultiGet(options, keys, values, /*timestamps*/ nullptr);
-}
-
-std::vector<Status> CompactedDBImpl::MultiGet(
- const ReadOptions& _read_options, const std::vector<ColumnFamilyHandle*>&,
- const std::vector<Slice>& keys, std::vector<std::string>* values,
- std::vector<std::string>* timestamps) {
+void CompactedDBImpl::MultiGet(const ReadOptions& _read_options,
+ size_t num_keys,
+ ColumnFamilyHandle** /*column_families*/,
+ const Slice* keys, PinnableSlice* values,
+ std::string* timestamps, Status* statuses,
+ const bool /*sorted_input*/) {
assert(user_comparator_);
- size_t num_keys = keys.size();
+ Status s;
if (_read_options.io_activity != Env::IOActivity::kUnknown &&
_read_options.io_activity != Env::IOActivity::kMultiGet) {
- Status s = Status::InvalidArgument(
+ s = Status::InvalidArgument(
"Can only call MultiGet with `ReadOptions::io_activity` is "
"`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
- return std::vector<Status>(num_keys, s);
}
ReadOptions read_options(_read_options);
- if (read_options.io_activity == Env::IOActivity::kUnknown) {
- read_options.io_activity = Env::IOActivity::kMultiGet;
- }
-
- if (read_options.timestamp) {
- Status s =
- FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp));
- if (!s.ok()) {
- return std::vector<Status>(num_keys, s);
+ if (s.ok()) {
+ if (read_options.io_activity == Env::IOActivity::kUnknown) {
+ read_options.io_activity = Env::IOActivity::kMultiGet;
}
- if (read_options.timestamp->size() > 0) {
- s = FailIfReadCollapsedHistory(cfd_, cfd_->GetSuperVersion(),
- *(read_options.timestamp));
- if (!s.ok()) {
- return std::vector<Status>(num_keys, s);
+
+ if (read_options.timestamp) {
+ s = FailIfTsMismatchCf(DefaultColumnFamily(), *(read_options.timestamp));
+ if (s.ok()) {
+ if (read_options.timestamp->size() > 0) {
+ s = FailIfReadCollapsedHistory(cfd_, cfd_->GetSuperVersion(),
+ *(read_options.timestamp));
+ }
}
+ } else {
+ s = FailIfCfHasTs(DefaultColumnFamily());
}
- } else {
- Status s = FailIfCfHasTs(DefaultColumnFamily());
- if (!s.ok()) {
- return std::vector<Status>(num_keys, s);
+ }
+
+ if (!s.ok()) {
+ for (size_t i = 0; i < num_keys; ++i) {
+ statuses[i] = s;
}
+ return;
}
// Clear the timestamps for returning results so that we can distinguish
// between tombstone or key that has never been written
if (timestamps) {
- for (auto& ts : *timestamps) {
- ts.clear();
+ for (size_t i = 0; i < num_keys; ++i) {
+ timestamps[i].clear();
}
}
GetWithTimestampReadCallback read_cb(kMaxSequenceNumber);
autovector<TableReader*, 16> reader_list;
- for (const auto& key : keys) {
+ for (size_t i = 0; i < num_keys; ++i) {
+ const Slice& key = keys[i];
LookupKey lkey(key, kMaxSequenceNumber, read_options.timestamp);
const FdWithKeyRange& f = files_.files[FindFile(lkey.user_key())];
if (user_comparator_->CompareWithoutTimestamp(
@@ -177,30 +168,26 @@ std::vector<Status> CompactedDBImpl::MultiGet(
reader_list.push_back(f.fd.table_reader);
}
}
- std::vector<Status> statuses(num_keys, Status::NotFound());
- values->resize(num_keys);
- if (timestamps) {
- timestamps->resize(num_keys);
+ for (size_t i = 0; i < num_keys; ++i) {
+ statuses[i] = Status::NotFound();
}
int idx = 0;
for (auto* r : reader_list) {
if (r != nullptr) {
- PinnableSlice pinnable_val;
- std::string& value = (*values)[idx];
+ PinnableSlice& pinnable_val = values[idx];
LookupKey lkey(keys[idx], kMaxSequenceNumber, read_options.timestamp);
- std::string* timestamp = timestamps ? &(*timestamps)[idx] : nullptr;
+ std::string* timestamp = timestamps ? &timestamps[idx] : nullptr;
GetContext get_context(
user_comparator_, nullptr, nullptr, nullptr, GetContext::kNotFound,
lkey.user_key(), &pinnable_val, /*columns=*/nullptr,
user_comparator_->timestamp_size() > 0 ? timestamp : nullptr, nullptr,
nullptr, true, nullptr, nullptr, nullptr, nullptr, &read_cb);
- Status s =
+ Status status =
r->Get(read_options, lkey.internal_key(), &get_context, nullptr);
- assert(static_cast<size_t>(idx) < statuses.size());
- if (!s.ok() && !s.IsNotFound()) {
- statuses[idx] = s;
+ assert(static_cast<size_t>(idx) < num_keys);
+ if (!status.ok() && !status.IsNotFound()) {
+ statuses[idx] = status;
} else {
- value.assign(pinnable_val.data(), pinnable_val.size());
if (get_context.State() == GetContext::kFound) {
statuses[idx] = Status::OK();
}
@@ -208,7 +195,6 @@ std::vector<Status> CompactedDBImpl::MultiGet(
}
++idx;
}
- return statuses;
}
Status CompactedDBImpl::Init(const Options& options) {
diff --git a/db/db_impl/compacted_db_impl.h b/db/db_impl/compacted_db_impl.h
index 17d8c9bfc..03853a5dd 100644
--- a/db/db_impl/compacted_db_impl.h
+++ b/db/db_impl/compacted_db_impl.h
@@ -27,26 +27,17 @@ class CompactedDBImpl : public DBImpl {
// Implementations of the DB interface
using DB::Get;
Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const Slice& key, PinnableSlice* value) override;
-
- Status Get(const ReadOptions& _read_options,
- ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value, std::string* timestamp) override;
+ const Slice& key, PinnableSlice* value,
+ std::string* timestamp) override;
using DB::MultiGet;
// Note that CompactedDBImpl::MultiGet is not the optimized version of
// MultiGet to use.
// TODO: optimize CompactedDBImpl::MultiGet, see DBImpl::MultiGet for details.
- std::vector<Status> MultiGet(const ReadOptions& options,
- const std::vector<ColumnFamilyHandle*>&,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override;
-
- std::vector<Status> MultiGet(const ReadOptions& _read_options,
- const std::vector<ColumnFamilyHandle*>&,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values,
- std::vector<std::string>* timestamps) override;
+ void MultiGet(const ReadOptions& options, size_t num_keys,
+ ColumnFamilyHandle** column_families, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
+ Status* statuses, const bool sorted_input) override;
using DBImpl::Put;
Status Put(const WriteOptions& /*options*/,
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
index 7c71bb752..b0465815e 100644
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@@ -2020,12 +2020,6 @@ ColumnFamilyHandle* DBImpl::PersistentStatsColumnFamily() const {
return persist_stats_cf_handle_;
}
-Status DBImpl::Get(const ReadOptions& read_options,
- ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value) {
- return Get(read_options, column_family, key, value, /*timestamp=*/nullptr);
-}
-
Status DBImpl::GetImpl(const ReadOptions& read_options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* value) {
@@ -2497,250 +2491,6 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
return s;
}
-std::vector<Status> DBImpl::MultiGet(
- const ReadOptions& read_options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys, std::vector<std::string>* values) {
- return MultiGet(read_options, column_family, keys, values,
- /*timestamps=*/nullptr);
-}
-
-std::vector<Status> DBImpl::MultiGet(
- const ReadOptions& _read_options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys, std::vector<std::string>* values,
- std::vector<std::string>* timestamps) {
- PERF_CPU_TIMER_GUARD(get_cpu_nanos, immutable_db_options_.clock);
- StopWatch sw(immutable_db_options_.clock, stats_, DB_MULTIGET);
- PERF_TIMER_GUARD(get_snapshot_time);
-
- size_t num_keys = keys.size();
- assert(column_family.size() == num_keys);
- std::vector<Status> stat_list(num_keys);
-
- if (_read_options.io_activity != Env::IOActivity::kUnknown &&
- _read_options.io_activity != Env::IOActivity::kMultiGet) {
- Status s = Status::InvalidArgument(
- "Can only call MultiGet with `ReadOptions::io_activity` is "
- "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
-
- for (size_t i = 0; i < num_keys; ++i) {
- stat_list[i] = s;
- }
- return stat_list;
- }
-
- ReadOptions read_options(_read_options);
- if (read_options.io_activity == Env::IOActivity::kUnknown) {
- read_options.io_activity = Env::IOActivity::kMultiGet;
- }
-
- bool should_fail = false;
- for (size_t i = 0; i < num_keys; ++i) {
- assert(column_family[i]);
- if (read_options.timestamp) {
- stat_list[i] =
- FailIfTsMismatchCf(column_family[i], *(read_options.timestamp));
- if (!stat_list[i].ok()) {
- should_fail = true;
- }
- } else {
- stat_list[i] = FailIfCfHasTs(column_family[i]);
- if (!stat_list[i].ok()) {
- should_fail = true;
- }
- }
- }
-
- if (should_fail) {
- for (auto& s : stat_list) {
- if (s.ok()) {
- s = Status::Incomplete(
- "DB not queried due to invalid argument(s) in the same MultiGet");
- }
- }
- return stat_list;
- }
-
- if (tracer_) {
- // TODO: This mutex should be removed later, to improve performance when
- // tracing is enabled.
- InstrumentedMutexLock lock(&trace_mutex_);
- if (tracer_) {
- // TODO: maybe handle the tracing status?
- tracer_->MultiGet(column_family, keys).PermitUncheckedError();
- }
- }
-
- UnorderedMap<uint32_t, MultiGetColumnFamilyData> multiget_cf_data(
- column_family.size());
- for (auto cf : column_family) {
- auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(cf);
- auto cfd = cfh->cfd();
- if (multiget_cf_data.find(cfd->GetID()) == multiget_cf_data.end()) {
- multiget_cf_data.emplace(cfd->GetID(),
- MultiGetColumnFamilyData(cfh, nullptr));
- }
- }
-
- std::function<MultiGetColumnFamilyData*(
- UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&)>
- iter_deref_lambda =
- [](UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&
- cf_iter) { return &cf_iter->second; };
-
- SequenceNumber consistent_seqnum;
- bool sv_from_thread_local;
- Status status =
- MultiCFSnapshot<UnorderedMap<uint32_t, MultiGetColumnFamilyData>>(
- read_options, nullptr, iter_deref_lambda, &multiget_cf_data,
- &consistent_seqnum, &sv_from_thread_local);
-
- if (!status.ok()) {
- for (auto& s : stat_list) {
- if (s.ok()) {
- s = status;
- }
- }
- return stat_list;
- }
-
- TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum1");
- TEST_SYNC_POINT("DBImpl::MultiGet:AfterGetSeqNum2");
-
- // Contain a list of merge operations if merge occurs.
- MergeContext merge_context;
-
- // Note: this always resizes the values array
- values->resize(num_keys);
- if (timestamps) {
- timestamps->resize(num_keys);
- }
-
- // Keep track of bytes that we read for statistics-recording later
- uint64_t bytes_read = 0;
- PERF_TIMER_STOP(get_snapshot_time);
-
- // For each of the given keys, apply the entire "get" process as follows:
- // First look in the memtable, then in the immutable memtable (if any).
- // s is both in/out. When in, s could either be OK or MergeInProgress.
- // merge_operands will contain the sequence of merges in the latter case.
- size_t num_found = 0;
- size_t keys_read;
- uint64_t curr_value_size = 0;
-
- GetWithTimestampReadCallback timestamp_read_callback(0);
- ReadCallback* read_callback = nullptr;
- if (read_options.timestamp && read_options.timestamp->size() > 0) {
- timestamp_read_callback.Refresh(consistent_seqnum);
- read_callback = &timestamp_read_callback;
- }
-
- for (keys_read = 0; keys_read < num_keys; ++keys_read) {
- merge_context.Clear();
- Status& s = stat_list[keys_read];
- std::string* value = &(*values)[keys_read];
- std::string* timestamp = timestamps ? &(*timestamps)[keys_read] : nullptr;
-
- LookupKey lkey(keys[keys_read], consistent_seqnum, read_options.timestamp);
- auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(
- column_family[keys_read]);
- SequenceNumber max_covering_tombstone_seq = 0;
- auto mgd_iter = multiget_cf_data.find(cfh->cfd()->GetID());
- assert(mgd_iter != multiget_cf_data.end());
- auto mgd = mgd_iter->second;
- auto super_version = mgd.super_version;
- bool skip_memtable =
- (read_options.read_tier == kPersistedTier &&
- has_unpersisted_data_.load(std::memory_order_relaxed));
- bool done = false;
- if (!skip_memtable) {
- if (super_version->mem->Get(
- lkey, value, /*columns=*/nullptr, timestamp, &s, &merge_context,
- &max_covering_tombstone_seq, read_options,
- false /* immutable_memtable */, read_callback)) {
- done = true;
- RecordTick(stats_, MEMTABLE_HIT);
- } else if (super_version->imm->Get(lkey, value, /*columns=*/nullptr,
- timestamp, &s, &merge_context,
- &max_covering_tombstone_seq,
- read_options, read_callback)) {
- done = true;
- RecordTick(stats_, MEMTABLE_HIT);
- }
- }
- if (!done) {
- PinnableSlice pinnable_val;
- PERF_TIMER_GUARD(get_from_output_files_time);
- PinnedIteratorsManager pinned_iters_mgr;
- super_version->current->Get(read_options, lkey, &pinnable_val,
- /*columns=*/nullptr, timestamp, &s,
- &merge_context, &max_covering_tombstone_seq,
- &pinned_iters_mgr, /*value_found=*/nullptr,
- /*key_exists=*/nullptr,
- /*seq=*/nullptr, read_callback);
- value->assign(pinnable_val.data(), pinnable_val.size());
- RecordTick(stats_, MEMTABLE_MISS);
- }
-
- if (s.ok()) {
- const auto& merge_threshold = read_options.merge_operand_count_threshold;
- if (merge_threshold.has_value() &&
- merge_context.GetNumOperands() > merge_threshold.value()) {
- s = Status::OkMergeOperandThresholdExceeded();
- }
-
- bytes_read += value->size();
- num_found++;
-
- curr_value_size += value->size();
- if (curr_value_size > read_options.value_size_soft_limit) {
- while (++keys_read < num_keys) {
- stat_list[keys_read] = Status::Aborted();
- }
- break;
- }
- }
- if (read_options.deadline.count() &&
- immutable_db_options_.clock->NowMicros() >
- static_cast<uint64_t>(read_options.deadline.count())) {
- break;
- }
- }
-
- if (keys_read < num_keys) {
- // The only reason to break out of the loop is when the deadline is
- // exceeded
- assert(immutable_db_options_.clock->NowMicros() >
- static_cast<uint64_t>(read_options.deadline.count()));
- for (++keys_read; keys_read < num_keys; ++keys_read) {
- stat_list[keys_read] = Status::TimedOut();
- }
- }
-
- // Post processing (decrement reference counts and record statistics)
- PERF_TIMER_GUARD(get_post_process_time);
-
- for (auto mgd_iter : multiget_cf_data) {
- auto mgd = mgd_iter.second;
- if (sv_from_thread_local) {
- ReturnAndCleanupSuperVersion(mgd.cfd, mgd.super_version);
- } else {
- TEST_SYNC_POINT("DBImpl::MultiGet::BeforeLastTryUnRefSV");
- CleanupSuperVersion(mgd.super_version);
- }
- }
- RecordTick(stats_, NUMBER_MULTIGET_CALLS);
- RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
- RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found);
- RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
- RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read);
- PERF_COUNTER_ADD(multiget_read_bytes, bytes_read);
- PERF_TIMER_STOP(get_post_process_time);
-
- return stat_list;
-}
-
template <class T>
Status DBImpl::MultiCFSnapshot(
const ReadOptions& read_options, ReadCallback* callback,
@@ -2889,7 +2639,8 @@ Status DBImpl::MultiCFSnapshot(
}
}
- // Keep track of bytes that we read for statistics-recording later
+ TEST_SYNC_POINT("DBImpl::MultiCFSnapshot:AfterGetSeqNum1");
+ TEST_SYNC_POINT("DBImpl::MultiCFSnapshot:AfterGetSeqNum2");
PERF_TIMER_STOP(get_snapshot_time);
*sv_from_thread_local = !last_try;
if (!s.ok()) {
@@ -2898,14 +2649,6 @@ Status DBImpl::MultiCFSnapshot(
return s;
}
-void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
- ColumnFamilyHandle** column_families, const Slice* keys,
- PinnableSlice* values, Status* statuses,
- const bool sorted_input) {
- MultiGet(read_options, num_keys, column_families, keys, values,
- /* timestamps */ nullptr, statuses, sorted_input);
-}
-
void DBImpl::MultiGet(const ReadOptions& _read_options, const size_t num_keys,
ColumnFamilyHandle** column_families, const Slice* keys,
PinnableSlice* values, std::string* timestamps,
@@ -3122,38 +2865,23 @@ void DBImpl::PrepareMultiGetKeys(
CompareKeyContext());
}
-void DBImpl::MultiGet(const ReadOptions& read_options,
- ColumnFamilyHandle* column_family, const size_t num_keys,
- const Slice* keys, PinnableSlice* values,
- Status* statuses, const bool sorted_input) {
- MultiGet(read_options, column_family, num_keys, keys, values,
- /* timestamps */ nullptr, statuses, sorted_input);
-}
-
-void DBImpl::MultiGet(const ReadOptions& _read_options,
- ColumnFamilyHandle* column_family, const size_t num_keys,
- const Slice* keys, PinnableSlice* values,
- std::string* timestamps, Status* statuses,
- const bool sorted_input) {
- if (_read_options.io_activity != Env::IOActivity::kUnknown &&
- _read_options.io_activity != Env::IOActivity::kMultiGet) {
- Status s = Status::InvalidArgument(
- "Can only call MultiGet with `ReadOptions::io_activity` is "
- "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
- for (size_t i = 0; i < num_keys; ++i) {
- if (statuses[i].ok()) {
- statuses[i] = s;
- }
- }
- return;
- }
-
- ReadOptions read_options(_read_options);
- if (read_options.io_activity == Env::IOActivity::kUnknown) {
- read_options.io_activity = Env::IOActivity::kMultiGet;
+void DB::MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family,
+ const size_t num_keys, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
+ Status* statuses, const bool sorted_input) {
+ // Use std::array, if possible, to avoid memory allocation overhead
+ if (num_keys > MultiGetContext::MAX_BATCH_SIZE) {
+ std::vector<ColumnFamilyHandle*> column_families(num_keys, column_family);
+ MultiGet(options, num_keys, column_families.data(), keys, values,
+ timestamps, statuses, sorted_input);
+ } else {
+ std::array<ColumnFamilyHandle*, MultiGetContext::MAX_BATCH_SIZE>
+ column_families;
+ std::fill(column_families.begin(), column_families.begin() + num_keys,
+ column_family);
+ MultiGet(options, num_keys, column_families.data(), keys, values,
+ timestamps, statuses, sorted_input);
}
- MultiGetCommon(read_options, column_family, num_keys, keys, values,
- /* columns */ nullptr, timestamps, statuses, sorted_input);
}
void DBImpl::MultiGetCommon(const ReadOptions& read_options,
diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h
index 1601129cd..eb0f6e043 100644
--- a/db/db_impl/db_impl.h
+++ b/db/db_impl/db_impl.h
@@ -232,8 +232,6 @@ class DBImpl : public DB {
Status Write(const WriteOptions& options, WriteBatch* updates) override;
using DB::Get;
- Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const Slice& key, PinnableSlice* value) override;
Status Get(const ReadOptions& _read_options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* value, std::string* timestamp) override;
@@ -261,17 +259,6 @@ class DBImpl : public DB {
}
using DB::MultiGet;
- std::vector<Status> MultiGet(
- const ReadOptions& options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override;
- std::vector<Status> MultiGet(
- const ReadOptions& _read_options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys, std::vector<std::string>* values,
- std::vector<std::string>* timestamps) override;
-
// This MultiGet is a batched version, which may be faster than calling Get
// multiple times, especially if the keys have some spatial locality that
// enables them to be queried in the same SST files/set of files. The larger
@@ -279,19 +266,6 @@ class DBImpl : public DB {
// The values and statuses parameters are arrays with number of elements
// equal to keys.size(). This allows the storage for those to be alloacted
// by the caller on the stack for small batches
- void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const size_t num_keys, const Slice* keys, PinnableSlice* values,
- Status* statuses, const bool sorted_input = false) override;
- void MultiGet(const ReadOptions& _read_options,
- ColumnFamilyHandle* column_family, const size_t num_keys,
- const Slice* keys, PinnableSlice* values,
- std::string* timestamps, Status* statuses,
- const bool sorted_input = false) override;
-
- void MultiGet(const ReadOptions& options, const size_t num_keys,
- ColumnFamilyHandle** column_families, const Slice* keys,
- PinnableSlice* values, Status* statuses,
- const bool sorted_input = false) override;
void MultiGet(const ReadOptions& _read_options, const size_t num_keys,
ColumnFamilyHandle** column_families, const Slice* keys,
PinnableSlice* values, std::string* timestamps,
diff --git a/db/db_test.cc b/db/db_test.cc
index b1054b6c8..7f9eda724 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -3098,7 +3098,8 @@ class ModelDB : public DB {
}
using DB::Get;
Status Get(const ReadOptions& /*options*/, ColumnFamilyHandle* /*cf*/,
- const Slice& key, PinnableSlice* /*value*/) override {
+ const Slice& key, PinnableSlice* /*value*/,
+ std::string* /*timestamp*/) override {
return Status::NotSupported(key);
}
@@ -3112,14 +3113,13 @@ class ModelDB : public DB {
}
using DB::MultiGet;
- std::vector<Status> MultiGet(
- const ReadOptions& /*options*/,
- const std::vector<ColumnFamilyHandle*>& /*column_family*/,
- const std::vector<Slice>& keys,
- std::vector<std::string>* /*values*/) override {
- std::vector<Status> s(keys.size(),
- Status::NotSupported("Not implemented."));
- return s;
+ void MultiGet(const ReadOptions& /*options*/, const size_t num_keys,
+ ColumnFamilyHandle** /*column_families*/, const Slice* /*keys*/,
+ PinnableSlice* /*values*/, std::string* /*timestamps*/,
+ Status* statuses, const bool /*sorted_input*/) override {
+ for (size_t i = 0; i < num_keys; ++i) {
+ statuses[i] = Status::NotSupported("Not implemented.");
+ }
}
using DB::IngestExternalFile;
diff --git a/db/db_with_timestamp_basic_test.cc b/db/db_with_timestamp_basic_test.cc
index c7dbae20c..2d5b08832 100644
--- a/db/db_with_timestamp_basic_test.cc
+++ b/db/db_with_timestamp_basic_test.cc
@@ -2610,10 +2610,10 @@ TEST_F(DataVisibilityTest, MultiGetWithoutSnapshot) {
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->LoadDependency({
- {"DBImpl::MultiGet:AfterGetSeqNum1",
+ {"DBImpl::MultiCFSnapshot:AfterGetSeqNum1",
"DataVisibilityTest::MultiGetWithoutSnapshot:BeforePut"},
{"DataVisibilityTest::MultiGetWithoutSnapshot:AfterPut",
- "DBImpl::MultiGet:AfterGetSeqNum2"},
+ "DBImpl::MultiCFSnapshot:AfterGetSeqNum2"},
});
SyncPoint::GetInstance()->EnableProcessing();
port::Thread writer_thread([this]() {
@@ -4737,4 +4737,4 @@ int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
-} \ No newline at end of file
+}
diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h
index 214a718eb..7fe13278e 100644
--- a/include/rocksdb/db.h
+++ b/include/rocksdb/db.h
@@ -549,53 +549,69 @@ class DB {
// any, or an empty value otherwise.
//
// If timestamp is enabled and a non-null timestamp pointer is passed in,
- // timestamp is returned.
+ // timestamp is returned. If the underlying DB implementation doesn't
+ // support returning timestamp and the timestamp argument is non-null,
+ // a Status::NotSupported() error will be returned.
//
// Returns OK on success. Returns NotFound and an empty value in "*value" if
// there is no entry for "key". Returns some other non-OK status on error.
+ // NOTE: Pure virtual => was virtual before
+ virtual Status Get(const ReadOptions& options,
+ ColumnFamilyHandle* column_family, const Slice& key,
+ PinnableSlice* value, std::string* timestamp) = 0;
+
+ // The timestamp of the key is returned if a non-null timestamp pointer is
+ // passed, and value is returned as a string
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual inline Status Get(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
- std::string* value) {
+ std::string* value, std::string* timestamp) final {
assert(value != nullptr);
PinnableSlice pinnable_val(value);
assert(!pinnable_val.IsPinned());
- auto s = Get(options, column_family, key, &pinnable_val);
+ auto s = Get(options, column_family, key, &pinnable_val, timestamp);
if (s.ok() && pinnable_val.IsPinned()) {
value->assign(pinnable_val.data(), pinnable_val.size());
} // else value is already assigned
return s;
}
+
+ // No timestamp, and value is returned in a PinnableSlice
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual Status Get(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value) = 0;
- virtual Status Get(const ReadOptions& options, const Slice& key,
- std::string* value) {
- return Get(options, DefaultColumnFamily(), key, value);
+ PinnableSlice* value) final {
+ return Get(options, column_family, key, value, nullptr);
}
- // Get() methods that return timestamp. Derived DB classes don't need to worry
- // about this group of methods if they don't care about timestamp feature.
+ // No timestamp, and the value is returned as a string
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual inline Status Get(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
- std::string* value, std::string* timestamp) {
+ std::string* value) final {
assert(value != nullptr);
PinnableSlice pinnable_val(value);
assert(!pinnable_val.IsPinned());
- auto s = Get(options, column_family, key, &pinnable_val, timestamp);
+ auto s = Get(options, column_family, key, &pinnable_val);
if (s.ok() && pinnable_val.IsPinned()) {
value->assign(pinnable_val.data(), pinnable_val.size());
} // else value is already assigned
return s;
}
- virtual Status Get(const ReadOptions& /*options*/,
- ColumnFamilyHandle* /*column_family*/,
- const Slice& /*key*/, PinnableSlice* /*value*/,
- std::string* /*timestamp*/) {
- return Status::NotSupported(
- "Get() that returns timestamp is not implemented.");
+
+ // Gets a key in the default column family, returns the value as a string,
+ // and no timestamp returned
+ // NOTE: virtual final => disallow override (was previously allowed)
+ virtual Status Get(const ReadOptions& options, const Slice& key,
+ std::string* value) final {
+ return Get(options, DefaultColumnFamily(), key, value);
}
+
+ // Gets a key in the default column family, returns the value as a string,
+ // and timestamp of the key is returned if timestamp parameter is non-null
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual Status Get(const ReadOptions& options, const Slice& key,
- std::string* value, std::string* timestamp) {
+ std::string* value, std::string* timestamp) final {
return Get(options, DefaultColumnFamily(), key, value, timestamp);
}
@@ -650,9 +666,10 @@ class DB {
int* number_of_operands) = 0;
// Consistent Get of many keys across column families without the need
- // for an explicit snapshot. NOTE: the implementation of this MultiGet API
- // does not have the performance benefits of the void-returning MultiGet
- // functions.
+ // for an explicit snapshot. The main difference between this set of
+ // MultiGet APis and the batched MultiGet APIs that follow are -
+ // 1. The APIs take std::vector instead of C style array pointers
+ // 2. Values are returned as std::string rather than PinnableSlice
//
// If keys[i] does not exist in the database, then the i'th returned
// status will be one for which Status::IsNotFound() is true, and
@@ -662,35 +679,67 @@ class DB {
//
// (*values) will always be resized to be the same size as (keys).
// Similarly, the number of returned statuses will be the number of keys.
+ // If timestamps is non-null, the vector pointed to by it will be resized to
+ // number of keys and filled with timestamps of the keys on return.
// Note: keys will not be "de-duplicated". Duplicate keys will return
// duplicate values in order, and may return different status values
// in case there are errors.
+ // NOTE: virtual final => disallow override (was previously allowed)
+ virtual std::vector<Status> MultiGet(
+ const ReadOptions& options,
+ const std::vector<ColumnFamilyHandle*>& column_families,
+ const std::vector<Slice>& keys, std::vector<std::string>* values,
+ std::vector<std::string>* timestamps) final {
+ size_t num_keys = keys.size();
+ std::vector<Status> statuses(num_keys);
+ std::vector<PinnableSlice> pin_values(num_keys);
+
+ values->resize(num_keys);
+ if (timestamps) {
+ timestamps->resize(num_keys);
+ }
+ MultiGet(options, num_keys,
+ const_cast<ColumnFamilyHandle**>(column_families.data()),
+ keys.data(), pin_values.data(),
+ timestamps ? timestamps->data() : nullptr, statuses.data(),
+ /*sorted_input=*/false);
+ for (size_t i = 0; i < num_keys; ++i) {
+ if (statuses[i].ok()) {
+ (*values)[i].assign(pin_values[i].data(), pin_values[i].size());
+ }
+ }
+ return statuses;
+ }
+
+ // No timestamps are returned
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual std::vector<Status> MultiGet(
const ReadOptions& options,
const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
+ const std::vector<Slice>& keys, std::vector<std::string>* values) final {
+ values->resize(keys.size());
+ return MultiGet(options, column_family, keys, values, nullptr);
+ }
+
+ // MultiGet for default column family, no timestamps returned
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual std::vector<Status> MultiGet(const ReadOptions& options,
const std::vector<Slice>& keys,
- std::vector<std::string>* values) {
+ std::vector<std::string>* values) final {
+ values->resize(keys.size());
return MultiGet(
options,
std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
keys, values);
}
+ // MultiGet for default column family
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual std::vector<Status> MultiGet(
- const ReadOptions& /*options*/,
- const std::vector<ColumnFamilyHandle*>& /*column_family*/,
- const std::vector<Slice>& keys, std::vector<std::string>* /*values*/,
- std::vector<std::string>* /*timestamps*/) {
- return std::vector<Status>(
- keys.size(), Status::NotSupported(
- "MultiGet() returning timestamps not implemented."));
- }
- virtual std::vector<Status> MultiGet(const ReadOptions& options,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values,
- std::vector<std::string>* timestamps) {
+ const ReadOptions& options, const std::vector<Slice>& keys,
+ std::vector<std::string>* values,
+ std::vector<std::string>* timestamps) final {
+ values->resize(keys.size());
return MultiGet(
options,
std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
@@ -705,123 +754,59 @@ class DB {
// benefits.
// Parameters -
// options - ReadOptions
- // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
- // passed to the API are restricted to a single column family
// num_keys - Number of keys to lookup
+ // column_families - Pointer to C style array of ColumnFamilyHandle* that
+ // the keys belong to.
// keys - Pointer to C style array of key Slices with num_keys elements
// values - Pointer to C style array of PinnableSlices with num_keys elements
+ // timestamps - Pointer to C style array of std::string that, if non-null and
+ // timestamps are enabled, will be filled with timestamps of the
+ // keys on return. The array should be sized to num_keys entries
+ // by the caller.
// statuses - Pointer to C style array of Status with num_keys elements
// sorted_input - If true, it means the input keys are already sorted by key
// order, so the MultiGet() API doesn't have to sort them
// again. If false, the keys will be copied and sorted
// internally by the API - the input array will not be
// modified
- virtual void MultiGet(const ReadOptions& options,
- ColumnFamilyHandle* column_family,
- const size_t num_keys, const Slice* keys,
- PinnableSlice* values, Status* statuses,
- const bool /*sorted_input*/ = false) {
- std::vector<ColumnFamilyHandle*> cf;
- std::vector<Slice> user_keys;
- std::vector<Status> status;
- std::vector<std::string> vals;
- for (size_t i = 0; i < num_keys; ++i) {
- cf.emplace_back(column_family);
- user_keys.emplace_back(keys[i]);
- }
- status = MultiGet(options, cf, user_keys, &vals);
- std::copy(status.begin(), status.end(), statuses);
- for (auto& value : vals) {
- values->PinSelf(value);
- values++;
- }
- }
+ // NOTE: Pure virtual => was virtual (optional). If the concrete
+ // implementation
+ // doesn't support returning timestamps, and the timestamps paramater is
+ // non-null, it should return Status::NotSupported() for all the keys.
+ virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
+ ColumnFamilyHandle** column_families, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
+ Status* statuses, const bool sorted_input = false) = 0;
+ // MultiGet for single column family
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual void MultiGet(const ReadOptions& options,
ColumnFamilyHandle* column_family,
const size_t num_keys, const Slice* keys,
PinnableSlice* values, std::string* timestamps,
- Status* statuses, const bool /*sorted_input*/ = false) {
- std::vector<ColumnFamilyHandle*> cf;
- std::vector<Slice> user_keys;
- std::vector<Status> status;
- std::vector<std::string> vals;
- std::vector<std::string> tss;
+ Status* statuses,
+ const bool sorted_input = false) final;
- for (size_t i = 0; i < num_keys; ++i) {
- cf.emplace_back(column_family);
- user_keys.emplace_back(keys[i]);
- }
- status = MultiGet(options, cf, user_keys, &vals, &tss);
- std::copy(status.begin(), status.end(), statuses);
- std::copy(tss.begin(), tss.end(), timestamps);
- for (auto& value : vals) {
- values->PinSelf(value);
- values++;
- }
+ // MultiGet for single column family, no timestamps returned
+ // NOTE: virtual final => disallow override (was previously allowed)
+ virtual void MultiGet(const ReadOptions& options,
+ ColumnFamilyHandle* column_family,
+ const size_t num_keys, const Slice* keys,
+ PinnableSlice* values, Status* statuses,
+ const bool sorted_input = false) final {
+ MultiGet(options, column_family, num_keys, keys, values, nullptr, statuses,
+ sorted_input);
}
- // Overloaded MultiGet API that improves performance by batching operations
- // in the read path for greater efficiency. Currently, only the block based
- // table format with full filters are supported. Other table formats such
- // as plain table, block based table with block based filters and
- // partitioned indexes will still work, but will not get any performance
- // benefits.
- // Parameters -
- // options - ReadOptions
- // column_family - ColumnFamilyHandle* that the keys belong to. All the keys
- // passed to the API are restricted to a single column family
- // num_keys - Number of keys to lookup
- // keys - Pointer to C style array of key Slices with num_keys elements
- // values - Pointer to C style array of PinnableSlices with num_keys elements
- // statuses - Pointer to C style array of Status with num_keys elements
- // sorted_input - If true, it means the input keys are already sorted by key
- // order, so the MultiGet() API doesn't have to sort them
- // again. If false, the keys will be copied and sorted
- // internally by the API - the input array will not be
- // modified
+ // Multiple column families, no timestamps returned
+ // NOTE: virtual final => disallow override (was previously allowed)
virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
ColumnFamilyHandle** column_families, const Slice* keys,
PinnableSlice* values, Status* statuses,
- const bool /*sorted_input*/ = false) {
- std::vector<ColumnFamilyHandle*> cf;
- std::vector<Slice> user_keys;
- std::vector<Status> status;
- std::vector<std::string> vals;
-
- for (size_t i = 0; i < num_keys; ++i) {
- cf.emplace_back(column_families[i]);
- user_keys.emplace_back(keys[i]);
- }
- status = MultiGet(options, cf, user_keys, &vals);
- std::copy(status.begin(), status.end(), statuses);
- for (auto& value : vals) {
- values->PinSelf(value);
- values++;
- }
- }
- virtual void MultiGet(const ReadOptions& options, const size_t num_keys,
- ColumnFamilyHandle** column_families, const Slice* keys,
- PinnableSlice* values, std::string* timestamps,
- Status* statuses, const bool /*sorted_input*/ = false) {
- std::vector<ColumnFamilyHandle*> cf;
- std::vector<Slice> user_keys;
- std::vector<Status> status;
- std::vector<std::string> vals;
- std::vector<std::string> tss;
-
- for (size_t i = 0; i < num_keys; ++i) {
- cf.emplace_back(column_families[i]);
- user_keys.emplace_back(keys[i]);
- }
- status = MultiGet(options, cf, user_keys, &vals, &tss);
- std::copy(status.begin(), status.end(), statuses);
- std::copy(tss.begin(), tss.end(), timestamps);
- for (auto& value : vals) {
- values->PinSelf(value);
- values++;
- }
+ const bool sorted_input = false) final {
+ MultiGet(options, num_keys, column_families, keys, values, nullptr,
+ statuses, sorted_input);
}
// Batched MultiGet-like API that returns wide-column entities from a single
diff --git a/include/rocksdb/utilities/stackable_db.h b/include/rocksdb/utilities/stackable_db.h
index 7b45188b3..437b6cb49 100644
--- a/include/rocksdb/utilities/stackable_db.h
+++ b/include/rocksdb/utilities/stackable_db.h
@@ -97,8 +97,9 @@ class StackableDB : public DB {
using DB::Get;
Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const Slice& key, PinnableSlice* value) override {
- return db_->Get(options, column_family, key, value);
+ const Slice& key, PinnableSlice* value,
+ std::string* timestamp) override {
+ return db_->Get(options, column_family, key, value, timestamp);
}
using DB::GetEntity;
@@ -120,19 +121,12 @@ class StackableDB : public DB {
}
using DB::MultiGet;
- std::vector<Status> MultiGet(
- const ReadOptions& options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override {
- return db_->MultiGet(options, column_family, keys, values);
- }
-
- void MultiGet(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const size_t num_keys, const Slice* keys, PinnableSlice* values,
+ void MultiGet(const ReadOptions& options, const size_t num_keys,
+ ColumnFamilyHandle** column_families, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
Status* statuses, const bool sorted_input = false) override {
- return db_->MultiGet(options, column_family, num_keys, keys, values,
- statuses, sorted_input);
+ return db_->MultiGet(options, num_keys, column_families, keys, values,
+ timestamps, statuses, sorted_input);
}
using DB::MultiGetEntity;
diff --git a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java b/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java
index ddc2a456f..d1a5fa5f9 100644
--- a/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java
+++ b/java/src/test/java/org/rocksdb/VerifyChecksumsTest.java
@@ -166,9 +166,10 @@ public class VerifyChecksumsTest {
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT);
if (verifyFlag) {
// We don't need to be exact - we are checking that the checksums happen
- // exactly how many depends on block size etc etc, so may not be entirely stable
+ // exactly how many depends on block size, MultiGet batching etc etc,
+ // so may not be entirely stable
System.out.println(MessageFormat.format("verify=true {0}", afterOperationsCount));
- assertThat(afterOperationsCount).isGreaterThan(beforeOperationsCount + 20);
+ assertThat(afterOperationsCount).isGreaterThan(beforeOperationsCount);
} else {
System.out.println(MessageFormat.format("verify=false {0}", afterOperationsCount));
assertThat(afterOperationsCount).isEqualTo(beforeOperationsCount);
diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h
index e3ad6ea1b..c4483a0de 100644
--- a/table/block_based/block_based_table_reader_sync_and_async.h
+++ b/table/block_based/block_based_table_reader_sync_and_async.h
@@ -222,6 +222,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
// beyond the payload size.
s = VerifyBlockChecksum(footer, data + req_offset, handle.size(),
rep_->file->file_name(), handle.offset());
+ RecordTick(ioptions.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
}
} else if (!use_shared_buffer) {
diff --git a/unreleased_history/public_api_changes/consolidate_get_mget.md b/unreleased_history/public_api_changes/consolidate_get_mget.md
new file mode 100644
index 000000000..99b6ae2ce
--- /dev/null
+++ b/unreleased_history/public_api_changes/consolidate_get_mget.md
@@ -0,0 +1 @@
+Declare one Get and one MultiGet variant as pure virtual, and make all the other variants non-overridable. The methods required to be implemented by derived classes of DB allow returning timestamps. It is up to the implementation to check and return an error if timestamps are not supported. The non-batched MultiGet APIs are reimplemented in terms of batched MultiGet, so callers might see a performance improvement.
diff --git a/utilities/blob_db/blob_db.h b/utilities/blob_db/blob_db.h
index 12208dfde..d7eaf0fae 100644
--- a/utilities/blob_db/blob_db.h
+++ b/utilities/blob_db/blob_db.h
@@ -138,7 +138,8 @@ class BlobDB : public StackableDB {
using ROCKSDB_NAMESPACE::StackableDB::Get;
Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const Slice& key, PinnableSlice* value) override = 0;
+ const Slice& key, PinnableSlice* value,
+ std::string* timestamp) override = 0;
// Get value and expiration.
virtual Status Get(const ReadOptions& options,
@@ -149,36 +150,6 @@ class BlobDB : public StackableDB {
return Get(options, DefaultColumnFamily(), key, value, expiration);
}
- using ROCKSDB_NAMESPACE::StackableDB::MultiGet;
- std::vector<Status> MultiGet(const ReadOptions& options,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override = 0;
- std::vector<Status> MultiGet(
- const ReadOptions& options,
- const std::vector<ColumnFamilyHandle*>& column_families,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override {
- for (auto column_family : column_families) {
- if (column_family->GetID() != DefaultColumnFamily()->GetID()) {
- return std::vector<Status>(
- column_families.size(),
- Status::NotSupported(
- "Blob DB doesn't support non-default column family."));
- }
- }
- return MultiGet(options, keys, values);
- }
- void MultiGet(const ReadOptions& /*options*/,
- ColumnFamilyHandle* /*column_family*/, const size_t num_keys,
- const Slice* /*keys*/, PinnableSlice* /*values*/,
- Status* statuses,
- const bool /*sorted_input*/ = false) override {
- for (size_t i = 0; i < num_keys; ++i) {
- statuses[i] =
- Status::NotSupported("Blob DB doesn't support batched MultiGet");
- }
- }
-
using ROCKSDB_NAMESPACE::StackableDB::SingleDelete;
Status SingleDelete(const WriteOptions& /*wopts*/,
ColumnFamilyHandle* /*column_family*/,
diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc
index 6fded8441..5d3cf03c9 100644
--- a/utilities/blob_db/blob_db_impl.cc
+++ b/utilities/blob_db/blob_db_impl.cc
@@ -1409,27 +1409,43 @@ Status BlobDBImpl::AppendBlob(const WriteOptions& write_options,
return s;
}
-std::vector<Status> BlobDBImpl::MultiGet(const ReadOptions& _read_options,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) {
+void BlobDBImpl::MultiGet(const ReadOptions& _read_options, size_t num_keys,
+ ColumnFamilyHandle** column_families,
+ const Slice* keys, PinnableSlice* values,
+ std::string* timestamps, Status* statuses,
+ const bool /*sorted_input*/) {
StopWatch multiget_sw(clock_, statistics_, BLOB_DB_MULTIGET_MICROS);
RecordTick(statistics_, BLOB_DB_NUM_MULTIGET);
// Get a snapshot to avoid blob file get deleted between we
// fetch and index entry and reading from the file.
- std::vector<Status> statuses;
- std::size_t num_keys = keys.size();
- statuses.reserve(num_keys);
- if (_read_options.io_activity != Env::IOActivity::kUnknown &&
- _read_options.io_activity != Env::IOActivity::kMultiGet) {
- Status s = Status::InvalidArgument(
- "Can only call MultiGet with `ReadOptions::io_activity` is "
- "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
+ {
+ Status s;
+ if (_read_options.io_activity != Env::IOActivity::kUnknown &&
+ _read_options.io_activity != Env::IOActivity::kMultiGet) {
+ s = Status::InvalidArgument(
+ "Can only call MultiGet with `ReadOptions::io_activity` is "
+ "`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
+ } else if (timestamps) {
+ s = Status::NotSupported(
+ "MultiGet() returning timestamps not implemented.");
+ }
+ if (s.ok()) {
+ for (size_t i = 0; i < num_keys; ++i) {
+ if (column_families[i]->GetID() != DefaultColumnFamily()->GetID()) {
+ s = Status::NotSupported(
+ "Blob DB doesn't support non-default column family.");
+ break;
+ }
+ }
+ }
- for (size_t i = 0; i < num_keys; ++i) {
- statuses.push_back(s);
+ if (!s.ok()) {
+ for (size_t i = 0; i < num_keys; ++i) {
+ statuses[i] = s;
+ }
+ return;
}
- return statuses;
}
ReadOptions read_options(_read_options);
@@ -1438,19 +1454,14 @@ std::vector<Status> BlobDBImpl::MultiGet(const ReadOptions& _read_options,
}
bool snapshot_created = SetSnapshotIfNeeded(&read_options);
- values->clear();
- values->reserve(keys.size());
- PinnableSlice value;
- for (size_t i = 0; i < keys.size(); i++) {
- statuses.push_back(
- GetImpl(read_options, DefaultColumnFamily(), keys[i], &value));
- values->push_back(value.ToString());
- value.Reset();
+ for (size_t i = 0; i < num_keys; i++) {
+ PinnableSlice& value = values[i];
+ statuses[i] = GetImpl(read_options, DefaultColumnFamily(), keys[i], &value);
}
if (snapshot_created) {
db_->ReleaseSnapshot(read_options.snapshot);
}
- return statuses;
+ return;
}
bool BlobDBImpl::SetSnapshotIfNeeded(ReadOptions* read_options) {
@@ -1654,13 +1665,18 @@ Status BlobDBImpl::GetRawBlobFromFile(const Slice& key, uint64_t file_number,
Status BlobDBImpl::Get(const ReadOptions& _read_options,
ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value) {
+ PinnableSlice* value, std::string* timestamp) {
if (_read_options.io_activity != Env::IOActivity::kUnknown &&
_read_options.io_activity != Env::IOActivity::kGet) {
return Status::InvalidArgument(
"Can only call Get with `ReadOptions::io_activity` is "
"`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
}
+ if (timestamp) {
+ return Status::NotSupported(
+ "Get() that returns timestamp is not implemented.");
+ }
+
ReadOptions read_options(_read_options);
if (read_options.io_activity == Env::IOActivity::kUnknown) {
read_options.io_activity = Env::IOActivity::kGet;
diff --git a/utilities/blob_db/blob_db_impl.h b/utilities/blob_db/blob_db_impl.h
index 65763ad18..6cbc0d594 100644
--- a/utilities/blob_db/blob_db_impl.h
+++ b/utilities/blob_db/blob_db_impl.h
@@ -105,7 +105,7 @@ class BlobDBImpl : public BlobDB {
using BlobDB::Get;
Status Get(const ReadOptions& _read_options,
ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value) override;
+ PinnableSlice* value, std::string* timestamp) override;
Status Get(const ReadOptions& _read_options,
ColumnFamilyHandle* column_family, const Slice& key,
@@ -123,9 +123,10 @@ class BlobDBImpl : public BlobDB {
}
using BlobDB::MultiGet;
- std::vector<Status> MultiGet(const ReadOptions& _read_options,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override;
+ void MultiGet(const ReadOptions& _read_options, size_t num_keys,
+ ColumnFamilyHandle** column_families, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
+ Status* statuses, const bool sorted_input) override;
using BlobDB::Write;
Status Write(const WriteOptions& opts, WriteBatch* updates) override;
diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc
index be69fa537..f6b83f32f 100644
--- a/utilities/transactions/write_prepared_txn_db.cc
+++ b/utilities/transactions/write_prepared_txn_db.cc
@@ -249,13 +249,18 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig,
Status WritePreparedTxnDB::Get(const ReadOptions& _read_options,
ColumnFamilyHandle* column_family,
- const Slice& key, PinnableSlice* value) {
+ const Slice& key, PinnableSlice* value,
+ std::string* timestamp) {
if (_read_options.io_activity != Env::IOActivity::kUnknown &&
_read_options.io_activity != Env::IOActivity::kGet) {
return Status::InvalidArgument(
"Can only call Get with `ReadOptions::io_activity` is "
"`Env::IOActivity::kUnknown` or `Env::IOActivity::kGet`");
}
+ if (timestamp) {
+ return Status::NotSupported(
+ "Get() that returns timestamp is not implemented");
+ }
ReadOptions read_options(_read_options);
if (read_options.io_activity == Env::IOActivity::kUnknown) {
read_options.io_activity = Env::IOActivity::kGet;
@@ -325,24 +330,34 @@ void WritePreparedTxnDB::UpdateCFComparatorMap(ColumnFamilyHandle* h) {
handle_map_.reset(handle_map);
}
-std::vector<Status> WritePreparedTxnDB::MultiGet(
- const ReadOptions& _read_options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys, std::vector<std::string>* values) {
+void WritePreparedTxnDB::MultiGet(const ReadOptions& _read_options,
+ const size_t num_keys,
+ ColumnFamilyHandle** column_families,
+ const Slice* keys, PinnableSlice* values,
+ std::string* timestamps, Status* statuses,
+ const bool /*sorted_input*/) {
assert(values);
- size_t num_keys = keys.size();
- std::vector<Status> stat_list(num_keys);
+ Status s;
if (_read_options.io_activity != Env::IOActivity::kUnknown &&
_read_options.io_activity != Env::IOActivity::kMultiGet) {
- Status s = Status::InvalidArgument(
+ s = Status::InvalidArgument(
"Can only call MultiGet with `ReadOptions::io_activity` is "
"`Env::IOActivity::kUnknown` or `Env::IOActivity::kMultiGet`");
+ }
+
+ if (s.ok()) {
+ if (timestamps) {
+ s = Status::NotSupported(
+ "MultiGet() returning timestamps not implemented.");
+ }
+ }
+ if (!s.ok()) {
for (size_t i = 0; i < num_keys; ++i) {
- stat_list[i] = s;
+ statuses[i] = s;
}
- return stat_list;
+ return;
}
ReadOptions read_options(_read_options);
@@ -350,13 +365,11 @@ std::vector<Status> WritePreparedTxnDB::MultiGet(
read_options.io_activity = Env::IOActivity::kMultiGet;
}
- values->resize(num_keys);
-
for (size_t i = 0; i < num_keys; ++i) {
- stat_list[i] =
- this->GetImpl(read_options, column_family[i], keys[i], &(*values)[i]);
+ statuses[i] =
+ this->GetImpl(read_options, column_families[i], keys[i], &values[i]);
}
- return stat_list;
+ return;
}
// Struct to hold ownership of snapshot and read callback for iterator cleanup.
diff --git a/utilities/transactions/write_prepared_txn_db.h b/utilities/transactions/write_prepared_txn_db.h
index 5e1f0233a..ae0cb5194 100644
--- a/utilities/transactions/write_prepared_txn_db.h
+++ b/utilities/transactions/write_prepared_txn_db.h
@@ -84,14 +84,13 @@ class WritePreparedTxnDB : public PessimisticTransactionDB {
using DB::Get;
Status Get(const ReadOptions& _read_options,
ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value) override;
+ PinnableSlice* value, std::string* timestamp) override;
using DB::MultiGet;
- std::vector<Status> MultiGet(
- const ReadOptions& _read_options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override;
+ void MultiGet(const ReadOptions& _read_options, const size_t num_keys,
+ ColumnFamilyHandle** column_families, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
+ Status* statuses, const bool sorted_input) override;
using DB::NewIterator;
Iterator* NewIterator(const ReadOptions& _read_options,
diff --git a/utilities/ttl/db_ttl_impl.cc b/utilities/ttl/db_ttl_impl.cc
index e4bff7826..f2c02e860 100644
--- a/utilities/ttl/db_ttl_impl.cc
+++ b/utilities/ttl/db_ttl_impl.cc
@@ -493,7 +493,11 @@ Status DBWithTTLImpl::Put(const WriteOptions& options,
Status DBWithTTLImpl::Get(const ReadOptions& options,
ColumnFamilyHandle* column_family, const Slice& key,
- PinnableSlice* value) {
+ PinnableSlice* value, std::string* timestamp) {
+ if (timestamp) {
+ return Status::NotSupported(
+ "Get() that returns timestamp is not supported");
+ }
Status st = db_->Get(options, column_family, key, value);
if (!st.ok()) {
return st;
@@ -505,22 +509,34 @@ Status DBWithTTLImpl::Get(const ReadOptions& options,
return StripTS(value);
}
-std::vector<Status> DBWithTTLImpl::MultiGet(
- const ReadOptions& options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys, std::vector<std::string>* values) {
- auto statuses = db_->MultiGet(options, column_family, keys, values);
- for (size_t i = 0; i < keys.size(); ++i) {
+void DBWithTTLImpl::MultiGet(const ReadOptions& options, const size_t num_keys,
+ ColumnFamilyHandle** column_families,
+ const Slice* keys, PinnableSlice* values,
+ std::string* timestamps, Status* statuses,
+ const bool /*sorted_input*/) {
+ if (timestamps) {
+ for (size_t i = 0; i < num_keys; ++i) {
+ statuses[i] = Status::NotSupported(
+ "MultiGet() returning timestamps not implemented.");
+ }
+ return;
+ }
+
+ db_->MultiGet(options, num_keys, column_families, keys, values, timestamps,
+ statuses);
+ for (size_t i = 0; i < num_keys; ++i) {
if (!statuses[i].ok()) {
continue;
}
- statuses[i] = SanityCheckTimestamp((*values)[i]);
+ PinnableSlice tmp_val = std::move(values[i]);
+ values[i].PinSelf(tmp_val);
+ assert(!values[i].IsPinned());
+ statuses[i] = SanityCheckTimestamp(values[i]);
if (!statuses[i].ok()) {
continue;
}
- statuses[i] = StripTS(&(*values)[i]);
+ statuses[i] = StripTS(&values[i]);
}
- return statuses;
}
bool DBWithTTLImpl::KeyMayExist(const ReadOptions& options,
diff --git a/utilities/ttl/db_ttl_impl.h b/utilities/ttl/db_ttl_impl.h
index f0aef147c..731cd3955 100644
--- a/utilities/ttl/db_ttl_impl.h
+++ b/utilities/ttl/db_ttl_impl.h
@@ -53,14 +53,14 @@ class DBWithTTLImpl : public DBWithTTL {
using StackableDB::Get;
Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
- const Slice& key, PinnableSlice* value) override;
+ const Slice& key, PinnableSlice* value,
+ std::string* timestamp) override;
using StackableDB::MultiGet;
- std::vector<Status> MultiGet(
- const ReadOptions& options,
- const std::vector<ColumnFamilyHandle*>& column_family,
- const std::vector<Slice>& keys,
- std::vector<std::string>* values) override;
+ void MultiGet(const ReadOptions& options, const size_t num_keys,
+ ColumnFamilyHandle** column_families, const Slice* keys,
+ PinnableSlice* values, std::string* timestamps,
+ Status* statuses, const bool sorted_input) override;
using StackableDB::KeyMayExist;
bool KeyMayExist(const ReadOptions& options,