summaryrefslogtreecommitdiff
path: root/db/db_impl/db_impl.h
diff options
context:
space:
mode:
authorJay Huh <jewoongh@meta.com>2024-04-24 15:28:55 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2024-04-24 15:28:55 -0700
commit1fca175eece9213a07f99973bae8e9a7d6aea93c (patch)
treefb0f6c63aafac82a1da081a79908793efa203101 /db/db_impl/db_impl.h
parent6807da0b44f28a0b22e5d32c7950aa9a6d5cb4bb (diff)
MultiCFSnapshot for NewIterators() API (#12573)
Summary: As mentioned in https://github.com/facebook/rocksdb/issues/12561 and https://github.com/facebook/rocksdb/issues/12566 , `NewIterators()` API has not been providing consistent view of the db across multiple column families. This PR addresses it by utilizing `MultiCFSnapshot()` function which has been used for `MultiGet()` APIs. To be able to obtain the thread-local super version with ref, `sv_exclusive_access` parameter has been added to `MultiCFSnapshot()` so that we could call `GetReferencedSuperVersion()` or `GetAndRefSuperVersion()` depending on the param and support `Refresh()` API for MultiCfIterators Pull Request resolved: https://github.com/facebook/rocksdb/pull/12573 Test Plan: **Unit Tests Added** ``` ./db_iterator_test --gtest_filter="*IteratorsConsistentView*" ``` ``` ./multi_cf_iterator_test -- --gtest_filter="*ConsistentView*" ``` **Performance Check** Setup ``` make -j64 release TEST_TMPDIR=/dev/shm/db_bench ./db_bench -benchmarks="filluniquerandom" -key_size=32 -value_size=512 -num=10000000 -compression_type=none ``` Run ``` TEST_TMPDIR=/dev/shm/db_bench ./db_bench -use_existing_db=1 -benchmarks="multireadrandom" -cache_size=10485760000 ``` Before the change ``` DB path: [/dev/shm/db_bench/dbbench] multireadrandom : 6.374 micros/op 156892 ops/sec 6.374 seconds 1000000 operations; (0 of 1000000 found) ``` After the change ``` DB path: [/dev/shm/db_bench/dbbench] multireadrandom : 6.265 micros/op 159627 ops/sec 6.265 seconds 1000000 operations; (0 of 1000000 found) ``` Reviewed By: jowlyzhang Differential Revision: D56444066 Pulled By: jaykorean fbshipit-source-id: 327ce73c072da30c221e18d4f3389f49115b8f99
Diffstat (limited to 'db/db_impl/db_impl.h')
-rw-r--r--db/db_impl/db_impl.h23
1 files changed, 17 insertions, 6 deletions
diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h
index ae9a8d5a1..504d7ec60 100644
--- a/db/db_impl/db_impl.h
+++ b/db/db_impl/db_impl.h
@@ -2355,18 +2355,20 @@ class DBImpl : public DB {
// A structure to contain ColumnFamilyData and the SuperVersion obtained for
// the consistent view of DB
- struct ColumnFamilyDataSuperVersionPair {
+ struct ColumnFamilySuperVersionPair {
+ ColumnFamilyHandleImpl* cfh;
ColumnFamilyData* cfd;
// SuperVersion for the column family obtained in a manner that ensures a
// consistent view across all column families in the DB
SuperVersion* super_version;
- ColumnFamilyDataSuperVersionPair(ColumnFamilyHandle* column_family,
- SuperVersion* sv)
- : cfd(static_cast<ColumnFamilyHandleImpl*>(column_family)->cfd()),
+ ColumnFamilySuperVersionPair(ColumnFamilyHandle* column_family,
+ SuperVersion* sv)
+ : cfh(static_cast<ColumnFamilyHandleImpl*>(column_family)),
+ cfd(cfh->cfd()),
super_version(sv) {}
- ColumnFamilyDataSuperVersionPair() = default;
+ ColumnFamilySuperVersionPair() = default;
};
// A common function to obtain a consistent snapshot, which can be implicit
@@ -2380,9 +2382,17 @@ class DBImpl : public DB {
// If callback is non-null, the callback is refreshed with the snapshot
// sequence number
//
+ // `extra_sv_ref` is used to indicate whether thread-local SuperVersion
+ // should be obtained with an extra ref (by GetReferencedSuperVersion()) or
+ // not (by GetAndRefSuperVersion()). For instance, point lookup like MultiGet
+ // does not require SuperVersion to be re-acquired throughout the entire
+ // invocation (no need extra ref), while MultiCfIterators may need the
+ // SuperVersion to be updated during Refresh() (requires extra ref).
+ //
// `sv_from_thread_local` being set to false indicates that the SuperVersion
// obtained from the ColumnFamilyData, whereas true indicates they are thread
// local.
+ //
// A non-OK status will be returned if for a column family that enables
// user-defined timestamp feature, the specified `ReadOptions.timestamp`
// attemps to read collapsed history.
@@ -2390,7 +2400,8 @@ class DBImpl : public DB {
Status MultiCFSnapshot(const ReadOptions& read_options,
ReadCallback* callback,
IterDerefFuncType iter_deref_func, T* cf_list,
- SequenceNumber* snapshot, bool* sv_from_thread_local);
+ bool extra_sv_ref, SequenceNumber* snapshot,
+ bool* sv_from_thread_local);
// The actual implementation of the batching MultiGet. The caller is expected
// to have acquired the SuperVersion and pass in a snapshot sequence number