From 98c33cb8e39412ffdc4aa9b0ec97204dbd28acb9 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Thu, 19 Sep 2024 14:05:21 -0700 Subject: Steps toward making IDENTITY file obsolete (#13019) Summary: * Set write_dbid_to_manifest=true by default * Add new option write_identity_file (default true) that allows us to opt-in to future behavior without identity file * Refactor related DB open code to minimize code duplication _Recommend hiding whitespace changes for review_ Intended follow-up: add support to ldb for reading and even replacing the DB identity in the manifest. Could be a variant of `update_manifest` command or based on it. Pull Request resolved: https://github.com/facebook/rocksdb/pull/13019 Test Plan: unit tests and stress test updated for new functionality Reviewed By: anand1976 Differential Revision: D62898229 Pulled By: pdillinger fbshipit-source-id: c08b25cf790610b034e51a9de0dc78b921abbcf0 --- db/c.cc | 9 ++ db/c_test.c | 13 +- db/compaction/compaction_job_test.cc | 15 +- db/db_basic_test.cc | 162 +++++++++++++--------- db/db_impl/db_impl.h | 9 +- db/db_impl/db_impl_files.cc | 63 ++++----- db/db_impl/db_impl_open.cc | 48 ++++--- db/flush_job_test.cc | 13 +- db/version_set.cc | 1 + db/version_set_test.cc | 6 +- db_stress_tool/db_stress_common.h | 1 + db_stress_tool/db_stress_gflags.cc | 4 + db_stress_tool/db_stress_test_base.cc | 1 + include/rocksdb/c.h | 4 + include/rocksdb/options.h | 28 ++-- java/src/test/java/org/rocksdb/DBOptionsTest.java | 4 +- java/src/test/java/org/rocksdb/OptionsTest.java | 4 +- java/src/test/java/org/rocksdb/RocksDBTest.java | 2 +- options/db_options.cc | 7 + options/db_options.h | 1 + test_util/testutil.cc | 2 + test_util/testutil.h | 2 + tools/db_crashtest.py | 7 + unreleased_history/behavior_changes/db_id.md | 1 + 24 files changed, 254 insertions(+), 153 deletions(-) create mode 100644 unreleased_history/behavior_changes/db_id.md diff --git a/db/c.cc b/db/c.cc index 0858568af..9b6db666d 100644 --- a/db/c.cc +++ b/db/c.cc @@ -4075,6 +4075,15 @@ void rocksdb_options_set_write_dbid_to_manifest( opt->rep.write_dbid_to_manifest = write_dbid_to_manifest; } +unsigned char rocksdb_options_get_write_identity_file(rocksdb_options_t* opt) { + return opt->rep.write_identity_file; +} + +void rocksdb_options_set_write_identity_file( + rocksdb_options_t* opt, unsigned char write_identity_file) { + opt->rep.write_identity_file = write_identity_file; +} + unsigned char rocksdb_options_get_track_and_verify_wals_in_manifest( rocksdb_options_t* opt) { return opt->rep.track_and_verify_wals_in_manifest; diff --git a/db/c_test.c b/db/c_test.c index ebf39cb0a..eb9886ba4 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -772,6 +772,8 @@ int main(int argc, char** argv) { rocksdb_options_set_write_buffer_size(options, 100000); rocksdb_options_set_paranoid_checks(options, 1); rocksdb_options_set_max_open_files(options, 10); + /* Compatibility with how test was written */ + rocksdb_options_set_write_dbid_to_manifest(options, 0); table_options = rocksdb_block_based_options_create(); rocksdb_block_based_options_set_block_cache(table_options, cache); @@ -962,15 +964,24 @@ int main(int argc, char** argv) { rocksdb_options_t* options_dbid_in_manifest = rocksdb_options_create(); rocksdb_options_set_create_if_missing(options_dbid_in_manifest, 1); + rocksdb_options_set_write_dbid_to_manifest(options_dbid_in_manifest, false); unsigned char write_to_manifest = rocksdb_options_get_write_dbid_to_manifest(options_dbid_in_manifest); CheckCondition(!write_to_manifest); rocksdb_options_set_write_dbid_to_manifest(options_dbid_in_manifest, true); - CheckCondition(!write_to_manifest); write_to_manifest = rocksdb_options_get_write_dbid_to_manifest(options_dbid_in_manifest); CheckCondition(write_to_manifest); + rocksdb_options_set_write_identity_file(options_dbid_in_manifest, true); + unsigned char write_identity_file = + rocksdb_options_get_write_identity_file(options_dbid_in_manifest); + CheckCondition(write_identity_file); + rocksdb_options_set_write_identity_file(options_dbid_in_manifest, false); + write_identity_file = + rocksdb_options_get_write_identity_file(options_dbid_in_manifest); + CheckCondition(!write_identity_file); + db = rocksdb_open(options_dbid_in_manifest, dbbackupname, &err); CheckNoError(err); diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index bbc0fe4cf..8e85a9f96 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -545,15 +545,14 @@ class CompactionJobTestBase : public testing::Test { ASSERT_OK(s); db_options_.info_log = info_log; - versions_.reset(new VersionSet( - dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", - /*error_handler=*/nullptr, /*read_only=*/false)); + versions_.reset( + new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + test::kUnitTestDbId, /*db_session_id=*/"", + /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr, /*read_only=*/false)); compaction_job_stats_.Reset(); - ASSERT_OK( - SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); VersionEdit new_db; new_db.SetLogNumber(0); diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index bb8a132ae..ef497c114 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -688,76 +688,100 @@ TEST_F(DBBasicTest, IdentityAcrossRestarts) { constexpr size_t kMinIdSize = 10; do { for (bool with_manifest : {false, true}) { - std::string idfilename = IdentityFileName(dbname_); - std::string id1, tmp; - ASSERT_OK(db_->GetDbIdentity(id1)); - ASSERT_GE(id1.size(), kMinIdSize); - - Options options = CurrentOptions(); - options.write_dbid_to_manifest = with_manifest; - Reopen(options); - std::string id2; - ASSERT_OK(db_->GetDbIdentity(id2)); - // id2 should match id1 because identity was not regenerated - ASSERT_EQ(id1, id2); - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id2); - - // Recover from deleted/missing IDENTITY - ASSERT_OK(env_->DeleteFile(idfilename)); - Reopen(options); - std::string id3; - ASSERT_OK(db_->GetDbIdentity(id3)); - if (with_manifest) { - // id3 should match id1 because identity was restored from manifest - ASSERT_EQ(id1, id3); - } else { - // id3 should NOT match id1 because identity was regenerated - ASSERT_NE(id1, id3); - ASSERT_GE(id3.size(), kMinIdSize); - } - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id3); - - // Recover from truncated IDENTITY - { - std::unique_ptr w; - ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions())); - ASSERT_OK(w->Close()); - } - Reopen(options); - std::string id4; - ASSERT_OK(db_->GetDbIdentity(id4)); - if (with_manifest) { - // id4 should match id1 because identity was restored from manifest - ASSERT_EQ(id1, id4); - } else { - // id4 should NOT match id1 because identity was regenerated - ASSERT_NE(id1, id4); - ASSERT_GE(id4.size(), kMinIdSize); - } - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id4); - - // Recover from overwritten IDENTITY - std::string silly_id = "asdf123456789"; - { - std::unique_ptr w; - ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions())); - ASSERT_OK(w->Append(silly_id)); - ASSERT_OK(w->Close()); - } - Reopen(options); - std::string id5; - ASSERT_OK(db_->GetDbIdentity(id5)); - if (with_manifest) { - // id4 should match id1 because identity was restored from manifest - ASSERT_EQ(id1, id5); - } else { - ASSERT_EQ(id5, silly_id); + for (bool write_file : {false, true}) { + std::string idfilename = IdentityFileName(dbname_); + std::string id1, tmp; + ASSERT_OK(db_->GetDbIdentity(id1)); + ASSERT_GE(id1.size(), kMinIdSize); + + Options options = CurrentOptions(); + options.write_dbid_to_manifest = with_manifest; + options.write_identity_file = true; // initially + Reopen(options); + std::string id2; + ASSERT_OK(db_->GetDbIdentity(id2)); + // id2 should match id1 because identity was not regenerated + ASSERT_EQ(id1, id2); + ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); + ASSERT_EQ(tmp, id2); + + if (write_file) { + // Recover from deleted/missing IDENTITY + ASSERT_OK(env_->DeleteFile(idfilename)); + } else { + // Transition to no IDENTITY file + options.write_identity_file = false; + if (!with_manifest) { + // Incompatible options, should fail + ASSERT_NOK(TryReopen(options)); + // Back to a usable config and continue + options.write_identity_file = true; + Reopen(options); + continue; + } + } + Reopen(options); + std::string id3; + ASSERT_OK(db_->GetDbIdentity(id3)); + if (with_manifest) { + // id3 should match id1 because identity was restored from manifest + ASSERT_EQ(id1, id3); + } else { + // id3 should NOT match id1 because identity was regenerated + ASSERT_NE(id1, id3); + ASSERT_GE(id3.size(), kMinIdSize); + } + if (write_file) { + ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); + ASSERT_EQ(tmp, id3); + + // Recover from truncated IDENTITY + std::unique_ptr w; + ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions())); + ASSERT_OK(w->Close()); + } else { + ASSERT_TRUE(env_->FileExists(idfilename).IsNotFound()); + } + Reopen(options); + std::string id4; + ASSERT_OK(db_->GetDbIdentity(id4)); + if (with_manifest) { + // id4 should match id1 because identity was restored from manifest + ASSERT_EQ(id1, id4); + } else { + // id4 should NOT match id1 because identity was regenerated + ASSERT_NE(id1, id4); + ASSERT_GE(id4.size(), kMinIdSize); + } + std::string silly_id = "asdf123456789"; + if (write_file) { + ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); + ASSERT_EQ(tmp, id4); + + // Recover from overwritten IDENTITY + std::unique_ptr w; + ASSERT_OK(env_->NewWritableFile(idfilename, &w, EnvOptions())); + ASSERT_OK(w->Append(silly_id)); + ASSERT_OK(w->Close()); + } else { + ASSERT_TRUE(env_->FileExists(idfilename).IsNotFound()); + } + Reopen(options); + std::string id5; + ASSERT_OK(db_->GetDbIdentity(id5)); + if (with_manifest) { + // id4 should match id1 because identity was restored from manifest + ASSERT_EQ(id1, id5); + } else { + ASSERT_EQ(id5, silly_id); + } + if (write_file) { + ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); + ASSERT_EQ(tmp, id5); + } else { + ASSERT_TRUE(env_->FileExists(idfilename).IsNotFound()); + } } - ASSERT_OK(ReadFileToString(env_, idfilename, &tmp)); - ASSERT_EQ(tmp, id5); } } while (ChangeCompactOptions()); } diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 66b324825..218c1851e 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -1584,11 +1584,12 @@ class DBImpl : public DB { virtual bool OwnTablesAndLogs() const { return true; } - // Setup DB identity file, and write DB ID to manifest if necessary. + // Read/create DB identity file (as appropriate), and write DB ID to + // version_edit if provided. Status SetupDBId(const WriteOptions& write_options, bool read_only, - RecoveryContext* recovery_ctx); - // Assign db_id_ and write DB ID to manifest if necessary. - void SetDBId(std::string&& id, bool read_only, RecoveryContext* recovery_ctx); + bool is_new_db, VersionEdit* version_edit); + // Assign db_id_ and write DB ID to version_edit if provided. + void SetDBId(std::string&& id, bool read_only, VersionEdit* version_edit); // Collect a deduplicated collection of paths used by this DB, including // dbname_, DBOptions.db_paths, ColumnFamilyOptions.cf_paths. diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index bb0ff3985..3812e1fcb 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -953,59 +953,60 @@ uint64_t PrecomputeMinLogNumberToKeep2PC( } void DBImpl::SetDBId(std::string&& id, bool read_only, - RecoveryContext* recovery_ctx) { + VersionEdit* version_edit) { assert(db_id_.empty()); assert(!id.empty()); db_id_ = std::move(id); - if (!read_only && immutable_db_options_.write_dbid_to_manifest) { - assert(recovery_ctx != nullptr); + if (!read_only && version_edit) { + assert(version_edit != nullptr); assert(versions_->GetColumnFamilySet() != nullptr); - VersionEdit edit; - edit.SetDBId(db_id_); + version_edit->SetDBId(db_id_); versions_->db_id_ = db_id_; - recovery_ctx->UpdateVersionEdits( - versions_->GetColumnFamilySet()->GetDefault(), edit); } } Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only, - RecoveryContext* recovery_ctx) { + bool is_new_db, VersionEdit* version_edit) { Status s; - // Check for the IDENTITY file and create it if not there or - // broken or not matching manifest - std::string db_id_in_file; - s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr); - if (s.ok()) { - s = GetDbIdentityFromIdentityFile(&db_id_in_file); - if (s.ok() && !db_id_in_file.empty()) { - if (db_id_.empty()) { - // Loaded from file and wasn't already known from manifest - SetDBId(std::move(db_id_in_file), read_only, recovery_ctx); - return s; - } else if (db_id_ == db_id_in_file) { - // Loaded from file and matches manifest - return s; + if (!is_new_db) { + // Check for the IDENTITY file and create it if not there or + // broken or not matching manifest + std::string db_id_in_file; + s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr); + if (s.ok()) { + s = GetDbIdentityFromIdentityFile(&db_id_in_file); + if (s.ok() && !db_id_in_file.empty()) { + if (db_id_.empty()) { + // Loaded from file and wasn't already known from manifest + SetDBId(std::move(db_id_in_file), read_only, version_edit); + return s; + } else if (db_id_ == db_id_in_file) { + // Loaded from file and matches manifest + return s; + } } } - } - if (s.IsNotFound()) { - s = Status::OK(); - } - if (!s.ok()) { - assert(s.IsIOError()); - return s; + if (s.IsNotFound()) { + s = Status::OK(); + } + if (!s.ok()) { + assert(s.IsIOError()); + return s; + } } // Otherwise IDENTITY file is missing or no good. // Generate new id if needed if (db_id_.empty()) { - SetDBId(env_->GenerateUniqueId(), read_only, recovery_ctx); + SetDBId(env_->GenerateUniqueId(), read_only, version_edit); } // Persist it to IDENTITY file if allowed - if (!read_only) { + if (!read_only && immutable_db_options_.write_identity_file) { s = SetIdentityFile(write_options, env_, dbname_, immutable_db_options_.metadata_write_temperature, db_id_); } + // NOTE: an obsolete IDENTITY file with write_identity_file=false is handled + // elsewhere, so that it's only deleted after successful recovery return s; } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 4ce2e82e8..4fb100876 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -289,28 +289,25 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { "start_time and end_time cannot be the same"); } } + + if (!db_options.write_dbid_to_manifest && !db_options.write_identity_file) { + return Status::InvalidArgument( + "write_dbid_to_manifest and write_identity_file cannot both be false"); + } return Status::OK(); } Status DBImpl::NewDB(std::vector* new_filenames) { - VersionEdit new_db; + VersionEdit new_db_edit; const WriteOptions write_options(Env::IOActivity::kDBOpen); - Status s = SetIdentityFile(write_options, env_, dbname_, - immutable_db_options_.metadata_write_temperature); + Status s = SetupDBId(write_options, /*read_only=*/false, /*is_new_db=*/true, + &new_db_edit); if (!s.ok()) { return s; } - if (immutable_db_options_.write_dbid_to_manifest) { - std::string temp_db_id; - s = GetDbIdentityFromIdentityFile(&temp_db_id); - if (!s.ok()) { - return s; - } - new_db.SetDBId(temp_db_id); - } - new_db.SetLogNumber(0); - new_db.SetNextFile(2); - new_db.SetLastSequence(0); + new_db_edit.SetLogNumber(0); + new_db_edit.SetNextFile(2); + new_db_edit.SetLastSequence(0); ROCKS_LOG_INFO(immutable_db_options_.info_log, "Creating manifest 1 \n"); const std::string manifest = DescriptorFileName(dbname_, 1); @@ -342,7 +339,7 @@ Status DBImpl::NewDB(std::vector* new_filenames) { tmp_set.Contains(FileType::kDescriptorFile))); log::Writer log(std::move(file_writer), 0, false); std::string record; - new_db.EncodeTo(&record); + new_db_edit.EncodeTo(&record); s = log.AddRecord(write_options, record); if (s.ok()) { s = SyncManifest(&immutable_db_options_, write_options, log.file()); @@ -528,7 +525,7 @@ Status DBImpl::Recover( } assert(s.ok()); } - assert(db_id_.empty()); + assert(is_new_db || db_id_.empty()); Status s; bool missing_table_file = false; if (!immutable_db_options_.best_efforts_recovery) { @@ -674,7 +671,17 @@ Status DBImpl::Recover( } } } - s = SetupDBId(write_options, read_only, recovery_ctx); + if (is_new_db) { + // Already set up DB ID in NewDB + } else if (immutable_db_options_.write_dbid_to_manifest && recovery_ctx) { + VersionEdit edit; + s = SetupDBId(write_options, read_only, is_new_db, &edit); + recovery_ctx->UpdateVersionEdits( + versions_->GetColumnFamilySet()->GetDefault(), edit); + } else { + s = SetupDBId(write_options, read_only, is_new_db, nullptr); + } + assert(!s.ok() || !db_id_.empty()); ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB ID: %s\n", db_id_.c_str()); if (s.ok() && !read_only) { s = MaybeUpdateNextFileNumber(recovery_ctx); @@ -2132,6 +2139,13 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, s = impl->LogAndApplyForRecovery(recovery_ctx); } + if (s.ok() && !impl->immutable_db_options_.write_identity_file) { + // On successful recovery, delete an obsolete IDENTITY file to avoid DB ID + // inconsistency + impl->env_->DeleteFile(IdentityFileName(impl->dbname_)) + .PermitUncheckedError(); + } + if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) { impl->mutex_.AssertHeld(); s = impl->InitPersistStatsColumnFamily(); diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index d407e4815..ce21ec648 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -142,12 +142,13 @@ class FlushJobTestBase : public testing::Test { column_families.emplace_back(cf_name, cf_options_); } - versions_.reset(new VersionSet( - dbname_, &db_options_, env_options_, table_cache_.get(), - &write_buffer_manager_, &write_controller_, - /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, - /*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"", - /*error_handler=*/nullptr, /*read_only=*/false)); + versions_.reset( + new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(), + &write_buffer_manager_, &write_controller_, + /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, + test::kUnitTestDbId, /*db_session_id=*/"", + /*daily_offpeak_time_utc=*/"", + /*error_handler=*/nullptr, /*read_only=*/false)); EXPECT_OK(versions_->Recover(column_families, false)); } diff --git a/db/version_set.cc b/db/version_set.cc index c2fa7aad7..c42a50cba 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5143,6 +5143,7 @@ VersionSet::VersionSet( fs_(_db_options->fs, io_tracer), clock_(_db_options->clock), dbname_(dbname), + db_id_(db_id), db_options_(_db_options), next_file_number_(2), manifest_file_number_(0), // Filled by Recover() diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 4f3665fba..4bc9806e2 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1282,6 +1282,8 @@ class VersionSetTestBase { assert(column_families != nullptr); assert(last_seqno != nullptr); assert(log_writer != nullptr); + ASSERT_OK( + SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { DBOptions tmp_db_options; @@ -1426,8 +1428,6 @@ class VersionSetTestBase { void NewDB() { SequenceNumber last_seqno; std::unique_ptr log_writer; - ASSERT_OK( - SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); PrepareManifest(&column_families_, &last_seqno, &log_writer); log_writer.reset(); CreateCurrentFile(); @@ -3890,6 +3890,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, assert(column_families != nullptr); assert(last_seqno != nullptr); assert(log_writer != nullptr); + ASSERT_OK( + SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown)); const std::string manifest = DescriptorFileName(dbname_, 1); const auto& fs = env_->GetFileSystem(); std::unique_ptr file_writer; diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 67f82808f..8ae432cc7 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -260,6 +260,7 @@ DECLARE_bool(use_full_merge_v1); DECLARE_int32(sync_wal_one_in); DECLARE_bool(avoid_unnecessary_blocking_io); DECLARE_bool(write_dbid_to_manifest); +DECLARE_bool(write_identity_file); DECLARE_bool(avoid_flush_during_recovery); DECLARE_uint64(max_write_batch_group_size_bytes); DECLARE_bool(level_compaction_dynamic_level_bytes); diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index bb2d9d453..c23a163a8 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -993,6 +993,10 @@ DEFINE_bool(write_dbid_to_manifest, ROCKSDB_NAMESPACE::Options().write_dbid_to_manifest, "Write DB_ID to manifest"); +DEFINE_bool(write_identity_file, + ROCKSDB_NAMESPACE::Options().write_identity_file, + "Write DB_ID to IDENTITY file"); + DEFINE_bool(avoid_flush_during_recovery, ROCKSDB_NAMESPACE::Options().avoid_flush_during_recovery, "Avoid flush during recovery"); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index b8ab0cc4f..5d4839df8 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -4044,6 +4044,7 @@ void InitializeOptionsFromFlags( options.manual_wal_flush = FLAGS_manual_wal_flush_one_in > 0 ? true : false; options.avoid_unnecessary_blocking_io = FLAGS_avoid_unnecessary_blocking_io; options.write_dbid_to_manifest = FLAGS_write_dbid_to_manifest; + options.write_identity_file = FLAGS_write_identity_file; options.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery; options.max_write_batch_group_size_bytes = FLAGS_max_write_batch_group_size_bytes; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 9f91f7643..ed403a6ea 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1644,6 +1644,10 @@ extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_write_dbid_to_manifest(rocksdb_options_t*); extern ROCKSDB_LIBRARY_API void rocksdb_options_set_write_dbid_to_manifest( rocksdb_options_t*, unsigned char); +extern ROCKSDB_LIBRARY_API unsigned char +rocksdb_options_get_write_identity_file(rocksdb_options_t*); +extern ROCKSDB_LIBRARY_API void rocksdb_options_set_write_identity_file( + rocksdb_options_t*, unsigned char); extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_track_and_verify_wals_in_manifest(rocksdb_options_t*); diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index fb272663b..507f9bab8 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1380,16 +1380,24 @@ struct DBOptions { // ReadOptions::background_purge_on_iterator_cleanup. bool avoid_unnecessary_blocking_io = false; - // Historically DB ID has always been stored in Identity File in DB folder. - // If this flag is true, the DB ID is written to Manifest file in addition - // to the Identity file. By doing this 2 problems are solved - // 1. We don't checksum the Identity file where as Manifest file is. - // 2. Since the source of truth for DB is Manifest file DB ID will sit with - // the source of truth. Previously the Identity file could be copied - // independent of Manifest and that can result in wrong DB ID. - // We recommend setting this flag to true. - // Default: false - bool write_dbid_to_manifest = false; + // The DB unique ID can be saved in the DB manifest (preferred, this option) + // or an IDENTITY file (historical, deprecated), or both. If this option is + // set to false (old behavior), then write_identity_file must be set to true. + // The manifest is preferred because + // 1. The IDENTITY file is not checksummed, so it is not as safe against + // corruption. + // 2. The IDENTITY file may or may not be copied with the DB (e.g. not + // copied by BackupEngine), so is not reliable for the provenance of a DB. + // This option might eventually be obsolete and removed as Identity files + // are phased out. + bool write_dbid_to_manifest = true; + + // It is expected that the Identity file will be obsoleted by recording + // DB ID in the manifest (see write_dbid_to_manifest). Setting this to true + // maintains the historical behavior of writing an Identity file, while + // setting to false is expected to be the future default. This option might + // eventually be obsolete and removed as Identity files are phased out. + bool write_identity_file = true; // The number of bytes to prefetch when reading the log. This is mostly useful // for reading a remotely located log, as it can save the number of diff --git a/java/src/test/java/org/rocksdb/DBOptionsTest.java b/java/src/test/java/org/rocksdb/DBOptionsTest.java index 189acdb4a..bf71704cb 100644 --- a/java/src/test/java/org/rocksdb/DBOptionsTest.java +++ b/java/src/test/java/org/rocksdb/DBOptionsTest.java @@ -793,9 +793,9 @@ public class DBOptionsTest { @Test public void writeDbidToManifest() { try (final DBOptions options = new DBOptions()) { - assertThat(options.writeDbidToManifest()).isEqualTo(false); - assertThat(options.setWriteDbidToManifest(true)).isEqualTo(options); assertThat(options.writeDbidToManifest()).isEqualTo(true); + assertThat(options.setWriteDbidToManifest(false)).isEqualTo(options); + assertThat(options.writeDbidToManifest()).isEqualTo(false); } } diff --git a/java/src/test/java/org/rocksdb/OptionsTest.java b/java/src/test/java/org/rocksdb/OptionsTest.java index 9d2316737..38f8243fd 100644 --- a/java/src/test/java/org/rocksdb/OptionsTest.java +++ b/java/src/test/java/org/rocksdb/OptionsTest.java @@ -1379,9 +1379,9 @@ public class OptionsTest { @Test public void writeDbidToManifest() { try (final Options options = new Options()) { - assertThat(options.writeDbidToManifest()).isEqualTo(false); - assertThat(options.setWriteDbidToManifest(true)).isEqualTo(options); assertThat(options.writeDbidToManifest()).isEqualTo(true); + assertThat(options.setWriteDbidToManifest(false)).isEqualTo(options); + assertThat(options.writeDbidToManifest()).isEqualTo(false); } } diff --git a/java/src/test/java/org/rocksdb/RocksDBTest.java b/java/src/test/java/org/rocksdb/RocksDBTest.java index 1459f03b0..42bb4ab3c 100644 --- a/java/src/test/java/org/rocksdb/RocksDBTest.java +++ b/java/src/test/java/org/rocksdb/RocksDBTest.java @@ -1641,7 +1641,7 @@ public class RocksDBTest { try (final RocksDB db = RocksDB.open(options, dbPath)) { final RocksDB.LiveFiles livefiles = db.getLiveFiles(true); assertThat(livefiles).isNotNull(); - assertThat(livefiles.manifestFileSize).isEqualTo(70); + assertThat(livefiles.manifestFileSize).isEqualTo(116); assertThat(livefiles.files.size()).isEqualTo(3); assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT"); assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005"); diff --git a/options/db_options.cc b/options/db_options.cc index 2678bb5a7..0b880f4b9 100644 --- a/options/db_options.cc +++ b/options/db_options.cc @@ -407,6 +407,10 @@ static std::unordered_map {offsetof(struct ImmutableDBOptions, write_dbid_to_manifest), OptionType::kBoolean, OptionVerificationType::kNormal, OptionTypeFlags::kNone}}, + {"write_identity_file", + {offsetof(struct ImmutableDBOptions, write_identity_file), + OptionType::kBoolean, OptionVerificationType::kNormal, + OptionTypeFlags::kNone}}, {"log_readahead_size", {offsetof(struct ImmutableDBOptions, log_readahead_size), OptionType::kSizeT, OptionVerificationType::kNormal, @@ -772,6 +776,7 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options) avoid_unnecessary_blocking_io(options.avoid_unnecessary_blocking_io), persist_stats_to_disk(options.persist_stats_to_disk), write_dbid_to_manifest(options.write_dbid_to_manifest), + write_identity_file(options.write_identity_file), log_readahead_size(options.log_readahead_size), file_checksum_gen_factory(options.file_checksum_gen_factory), best_efforts_recovery(options.best_efforts_recovery), @@ -947,6 +952,8 @@ void ImmutableDBOptions::Dump(Logger* log) const { persist_stats_to_disk); ROCKS_LOG_HEADER(log, " Options.write_dbid_to_manifest: %d", write_dbid_to_manifest); + ROCKS_LOG_HEADER(log, " Options.write_identity_file: %d", + write_identity_file); ROCKS_LOG_HEADER( log, " Options.log_readahead_size: %" ROCKSDB_PRIszt, log_readahead_size); diff --git a/options/db_options.h b/options/db_options.h index 7e0752626..842fefca8 100644 --- a/options/db_options.h +++ b/options/db_options.h @@ -89,6 +89,7 @@ struct ImmutableDBOptions { bool avoid_unnecessary_blocking_io; bool persist_stats_to_disk; bool write_dbid_to_manifest; + bool write_identity_file; size_t log_readahead_size; std::shared_ptr file_checksum_gen_factory; bool best_efforts_recovery; diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 40531586a..59a07e4a2 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -773,4 +773,6 @@ void RegisterTestLibrary(const std::string& arg) { ObjectRegistry::Default()->AddLibrary("test", RegisterTestObjects, arg); } } + +const std::string kUnitTestDbId = "UnitTest"; } // namespace ROCKSDB_NAMESPACE::test diff --git a/test_util/testutil.h b/test_util/testutil.h index 9e0499cb3..2d693b5f2 100644 --- a/test_util/testutil.h +++ b/test_util/testutil.h @@ -904,5 +904,7 @@ struct ReadOptionsNoIo : public ReadOptions { }; extern const ReadOptionsNoIo kReadOptionsNoIo; +extern const std::string kUnitTestDbId; + } // namespace test } // namespace ROCKSDB_NAMESPACE diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 038e5fd53..f88c0be19 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -209,6 +209,7 @@ default_params = { "use_write_buffer_manager": lambda: random.randint(0, 1), "avoid_unnecessary_blocking_io": random.randint(0, 1), "write_dbid_to_manifest": random.randint(0, 1), + "write_identity_file": random.randint(0, 1), "avoid_flush_during_recovery": lambda: random.choice( [1 if t == 0 else 0 for t in range(0, 8)] ), @@ -959,6 +960,12 @@ def finalize_and_sanitize(src_params): and dest_params.get("use_timed_put_one_in") == 1 ): dest_params["use_timed_put_one_in"] = 3 + if ( + dest_params.get("write_dbid_to_manifest") == 0 + and dest_params.get("write_identity_file") == 0 + ): + # At least one must be true + dest_params["write_dbid_to_manifest"] = 1 return dest_params diff --git a/unreleased_history/behavior_changes/db_id.md b/unreleased_history/behavior_changes/db_id.md new file mode 100644 index 000000000..5536f3ce5 --- /dev/null +++ b/unreleased_history/behavior_changes/db_id.md @@ -0,0 +1 @@ +* Set `write_dbid_to_manifest=true` by default. This means DB ID will now be preserved through backups, checkpoints, etc. by default. Also add `write_identity_file` option which can be set to false for anticipated future behavior. -- cgit v1.2.3-70-g09d2