summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJermy Li <javaloveme@gmail.com>2022-03-15 09:50:21 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2022-03-15 09:50:21 -0700
commit3da8236837d63a74bf3adae56ae32515674240d8 (patch)
tree340eb28c37636688a1229e934f7a99cbb1dcd4ee
parentbbdaf63d0fccca7579936d997fa41aeabeb46ad9 (diff)
fix: Reusing-Iterator reads stale keys after DeleteRange() performed (#9258)
Summary: fix https://github.com/facebook/rocksdb/issues/9255 Pull Request resolved: https://github.com/facebook/rocksdb/pull/9258 Reviewed By: pdillinger Differential Revision: D34879684 Pulled By: ajkr fbshipit-source-id: 5934f4b7524dc27ecdf1430e0456a0fc02958fc7
-rw-r--r--HISTORY.md1
-rw-r--r--db/arena_wrapped_db_iter.cc69
-rw-r--r--db/db_range_del_test.cc28
3 files changed, 76 insertions, 22 deletions
diff --git a/HISTORY.md b/HISTORY.md
index f24b33d15..d9e08b413 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -16,6 +16,7 @@
* Fixed a bug that DisableManualCompaction may assert when disable an unscheduled manual compaction.
* Fixed a potential timer crash when open close DB concurrently.
* Fixed a race condition for `alive_log_files_` in non-two-write-queues mode. The race is between the write_thread_ in WriteToWAL() and another thread executing `FindObsoleteFiles()`. The race condition will be caught if `__glibcxx_requires_nonempty` is enabled.
+* Fixed a bug that `Iterator::Refresh()` reads stale keys after DeleteRange() performed.
### Public API changes
* Remove BlockBasedTableOptions.hash_index_allow_collision which already takes no effect.
diff --git a/db/arena_wrapped_db_iter.cc b/db/arena_wrapped_db_iter.cc
index 520588afe..20d4655be 100644
--- a/db/arena_wrapped_db_iter.cc
+++ b/db/arena_wrapped_db_iter.cc
@@ -58,30 +58,55 @@ Status ArenaWrappedDBIter::Refresh() {
uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
- if (sv_number_ != cur_sv_number) {
- Env* env = db_iter_->env();
- db_iter_->~DBIter();
- arena_.~Arena();
- new (&arena_) Arena();
+ while (true) {
+ if (sv_number_ != cur_sv_number) {
+ Env* env = db_iter_->env();
+ db_iter_->~DBIter();
+ arena_.~Arena();
+ new (&arena_) Arena();
- SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
- SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
- if (read_callback_) {
- read_callback_->Refresh(latest_seq);
- }
- Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
- sv->current, latest_seq,
- sv->mutable_cf_options.max_sequential_skip_in_iterations,
- cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
- allow_refresh_);
+ SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
+ SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
+ if (read_callback_) {
+ read_callback_->Refresh(latest_seq);
+ }
+ Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
+ sv->current, latest_seq,
+ sv->mutable_cf_options.max_sequential_skip_in_iterations,
+ cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
+ allow_refresh_);
- InternalIterator* internal_iter = db_impl_->NewInternalIterator(
- read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
- latest_seq, /* allow_unprepared_value */ true);
- SetIterUnderDBIter(internal_iter);
- } else {
- db_iter_->set_sequence(db_impl_->GetLatestSequenceNumber());
- db_iter_->set_valid(false);
+ InternalIterator* internal_iter = db_impl_->NewInternalIterator(
+ read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
+ latest_seq, /* allow_unprepared_value */ true);
+ SetIterUnderDBIter(internal_iter);
+ break;
+ } else {
+ SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
+ // Refresh range-tombstones in MemTable
+ if (!read_options_.ignore_range_deletions) {
+ SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
+ ReadRangeDelAggregator* range_del_agg =
+ db_iter_->GetRangeDelAggregator();
+ std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter;
+ range_del_iter.reset(
+ sv->mem->NewRangeTombstoneIterator(read_options_, latest_seq));
+ range_del_agg->AddTombstones(std::move(range_del_iter));
+ cfd_->ReturnThreadLocalSuperVersion(sv);
+ }
+ // Refresh latest sequence number
+ db_iter_->set_sequence(latest_seq);
+ db_iter_->set_valid(false);
+ // Check again if the latest super version number is changed
+ uint64_t latest_sv_number = cfd_->GetSuperVersionNumber();
+ if (latest_sv_number != cur_sv_number) {
+ // If the super version number is changed after refreshing,
+ // fallback to Re-Init the InternalIterator
+ cur_sv_number = latest_sv_number;
+ continue;
+ }
+ break;
+ }
}
return Status::OK();
}
diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc
index 683b701f0..845114339 100644
--- a/db/db_range_del_test.cc
+++ b/db/db_range_del_test.cc
@@ -1724,6 +1724,34 @@ TEST_F(DBRangeDelTest, OverlappedKeys) {
ASSERT_EQ(0, NumTableFilesAtLevel(1));
}
+TEST_F(DBRangeDelTest, IteratorRefresh) {
+ // Refreshing an iterator after a range tombstone is added should cause the
+ // deleted range of keys to disappear.
+ for (bool sv_changed : {false, true}) {
+ ASSERT_OK(db_->Put(WriteOptions(), "key1", "value1"));
+ ASSERT_OK(db_->Put(WriteOptions(), "key2", "value2"));
+
+ auto* iter = db_->NewIterator(ReadOptions());
+ ASSERT_OK(iter->status());
+
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
+ "key2", "key3"));
+
+ if (sv_changed) {
+ ASSERT_OK(db_->Flush(FlushOptions()));
+ }
+
+ ASSERT_OK(iter->Refresh());
+ ASSERT_OK(iter->status());
+ iter->SeekToFirst();
+ ASSERT_EQ("key1", iter->key());
+ iter->Next();
+ ASSERT_FALSE(iter->Valid());
+
+ delete iter;
+ }
+}
+
#endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE