summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Dillinger <peterd@meta.com>2024-08-23 19:49:25 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2024-08-23 19:49:25 -0700
commit96340dbce2ef295a1b48fc086ff8bf4b0356531b (patch)
treec95d14f1dd1fa9b4e3dadfef7a9c81cc2e98b900
parentd6aed64de4f7ceb32183eee938cae6ba1f152bc9 (diff)
Options for file temperature for more files (#12957)
Summary: We have a request to use the cold tier as primary source of truth for the DB, and to best support such use cases and to complement the existing options controlling SST file temperatures, we add two new DB options: * `metadata_write_temperature` for DB "small" files that don't contain much user data * `wal_write_temperature` for WALs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12957 Test Plan: Unit test included, though it's hard to be sure we've covered all the places Reviewed By: jowlyzhang Differential Revision: D61664815 Pulled By: pdillinger fbshipit-source-id: 8e19c9dd8fd2db059bb15f74938d6bc12002e82b
-rw-r--r--db/compaction/compaction_job_test.cc6
-rw-r--r--db/db_impl/db_impl_files.cc4
-rw-r--r--db/db_impl/db_impl_open.cc14
-rw-r--r--db/db_test2.cc230
-rw-r--r--db/db_test_util.h11
-rw-r--r--db/flush_job_test.cc6
-rw-r--r--db/version_set.cc10
-rw-r--r--db/version_set_test.cc98
-rw-r--r--env/file_system.cc6
-rw-r--r--file/filename.cc14
-rw-r--r--file/filename.h3
-rw-r--r--include/rocksdb/advanced_options.h2
-rw-r--r--include/rocksdb/file_system.h7
-rw-r--r--include/rocksdb/options.h10
-rw-r--r--options/db_options.cc16
-rw-r--r--options/db_options.h2
-rw-r--r--options/options_helper.cc9
-rw-r--r--options/options_parser.cc5
-rw-r--r--options/options_settable_test.cc7
19 files changed, 382 insertions, 78 deletions
diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc
index 11a757fd6..bbc0fe4cf 100644
--- a/db/compaction/compaction_job_test.cc
+++ b/db/compaction/compaction_job_test.cc
@@ -552,7 +552,8 @@ class CompactionJobTestBase : public testing::Test {
/*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"",
/*error_handler=*/nullptr, /*read_only=*/false));
compaction_job_stats_.Reset();
- ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
+ ASSERT_OK(
+ SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
VersionEdit new_db;
new_db.SetLogNumber(0);
@@ -575,7 +576,8 @@ class CompactionJobTestBase : public testing::Test {
}
ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file.
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ Temperature::kUnknown, nullptr);
ASSERT_OK(s);
diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc
index dd4bf411c..0db729368 100644
--- a/db/db_impl/db_impl_files.cc
+++ b/db/db_impl/db_impl_files.cc
@@ -970,7 +970,9 @@ Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
}
// Persist it to IDENTITY file if allowed
if (!read_only) {
- s = SetIdentityFile(write_options, env_, dbname_, db_id_);
+ s = SetIdentityFile(write_options, env_, dbname_,
+ immutable_db_options_.metadata_write_temperature,
+ db_id_);
}
return s;
}
diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc
index 8ef024e7b..a58a142d7 100644
--- a/db/db_impl/db_impl_open.cc
+++ b/db/db_impl/db_impl_open.cc
@@ -295,7 +295,8 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
VersionEdit new_db;
const WriteOptions write_options(Env::IOActivity::kDBOpen);
- Status s = SetIdentityFile(write_options, env_, dbname_);
+ Status s = SetIdentityFile(write_options, env_, dbname_,
+ immutable_db_options_.metadata_write_temperature);
if (!s.ok()) {
return s;
}
@@ -319,6 +320,12 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
}
std::unique_ptr<FSWritableFile> file;
FileOptions file_options = fs_->OptimizeForManifestWrite(file_options_);
+ // DB option takes precedence when not kUnknown
+ if (immutable_db_options_.metadata_write_temperature !=
+ Temperature::kUnknown) {
+ file_options.temperature =
+ immutable_db_options_.metadata_write_temperature;
+ }
s = NewWritableFile(fs_.get(), manifest, &file, file_options);
if (!s.ok()) {
return s;
@@ -344,6 +351,7 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
if (s.ok()) {
// Make "CURRENT" file that points to the new manifest file.
s = SetCurrentFile(write_options, fs_.get(), dbname_, 1,
+ immutable_db_options_.metadata_write_temperature,
directories_.GetDbDir());
if (new_filenames) {
new_filenames->emplace_back(
@@ -1936,6 +1944,10 @@ IOStatus DBImpl::CreateWAL(const WriteOptions& write_options,
BuildDBOptions(immutable_db_options_, mutable_db_options_);
FileOptions opt_file_options =
fs_->OptimizeForLogWrite(file_options_, db_options);
+ // DB option takes precedence when not kUnknown
+ if (immutable_db_options_.wal_write_temperature != Temperature::kUnknown) {
+ opt_file_options.temperature = immutable_db_options_.wal_write_temperature;
+ }
std::string wal_dir = immutable_db_options_.GetWalDir();
std::string log_fname = LogFileName(wal_dir, log_file_num);
diff --git a/db/db_test2.cc b/db/db_test2.cc
index e6a3adf9b..f380144c6 100644
--- a/db/db_test2.cc
+++ b/db/db_test2.cc
@@ -10,6 +10,7 @@
#include <atomic>
#include <cstdlib>
#include <functional>
+#include <iostream>
#include <memory>
#include "db/db_test_util.h"
@@ -26,6 +27,7 @@
#include "rocksdb/utilities/replayer.h"
#include "rocksdb/wal_filter.h"
#include "test_util/testutil.h"
+#include "util/defer.h"
#include "util/random.h"
#include "utilities/fault_injection_env.h"
@@ -6544,6 +6546,234 @@ TEST_P(RenameCurrentTest, Compaction) {
ASSERT_EQ("d_value", Get("d"));
}
+TEST_F(DBTest2, VariousFileTemperatures) {
+ constexpr size_t kNumberFileTypes = static_cast<size_t>(kBlobFile) + 1U;
+
+ struct MyTestFS : public FileTemperatureTestFS {
+ explicit MyTestFS(const std::shared_ptr<FileSystem>& fs)
+ : FileTemperatureTestFS(fs) {
+ Reset();
+ }
+
+ IOStatus NewWritableFile(const std::string& fname, const FileOptions& opts,
+ std::unique_ptr<FSWritableFile>* result,
+ IODebugContext* dbg) override {
+ IOStatus ios =
+ FileTemperatureTestFS::NewWritableFile(fname, opts, result, dbg);
+ if (ios.ok()) {
+ uint64_t number;
+ FileType type;
+ if (ParseFileName(GetFileName(fname), &number, "LOG", &type)) {
+ if (type == kTableFile) {
+ // Not checked here
+ } else if (type == kWalFile) {
+ if (opts.temperature != expected_wal_temperature) {
+ std::cerr << "Attempt to open " << fname << " with temperature "
+ << temperature_to_string[opts.temperature]
+ << " rather than "
+ << temperature_to_string[expected_wal_temperature]
+ << std::endl;
+ assert(false);
+ }
+ } else if (type == kDescriptorFile) {
+ if (opts.temperature != expected_manifest_temperature) {
+ std::cerr << "Attempt to open " << fname << " with temperature "
+ << temperature_to_string[opts.temperature]
+ << " rather than "
+ << temperature_to_string[expected_wal_temperature]
+ << std::endl;
+ assert(false);
+ }
+ } else if (opts.temperature != expected_other_metadata_temperature) {
+ std::cerr << "Attempt to open " << fname << " with temperature "
+ << temperature_to_string[opts.temperature]
+ << " rather than "
+ << temperature_to_string[expected_wal_temperature]
+ << std::endl;
+ assert(false);
+ }
+ UpdateCount(type, 1);
+ }
+ }
+ return ios;
+ }
+
+ IOStatus RenameFile(const std::string& src, const std::string& dst,
+ const IOOptions& options,
+ IODebugContext* dbg) override {
+ IOStatus ios = FileTemperatureTestFS::RenameFile(src, dst, options, dbg);
+ if (ios.ok()) {
+ uint64_t number;
+ FileType src_type;
+ FileType dst_type;
+ assert(ParseFileName(GetFileName(src), &number, "LOG", &src_type));
+ assert(ParseFileName(GetFileName(dst), &number, "LOG", &dst_type));
+
+ UpdateCount(src_type, -1);
+ UpdateCount(dst_type, 1);
+ }
+ return ios;
+ }
+
+ void UpdateCount(FileType type, int delta) {
+ size_t i = static_cast<size_t>(type);
+ assert(i < kNumberFileTypes);
+ counts[i].FetchAddRelaxed(delta);
+ }
+
+ std::map<FileType, size_t> PopCounts() {
+ std::map<FileType, size_t> ret;
+ for (size_t i = 0; i < kNumberFileTypes; ++i) {
+ int c = counts[i].ExchangeRelaxed(0);
+ if (c > 0) {
+ ret[static_cast<FileType>(i)] = c;
+ }
+ }
+ return ret;
+ }
+
+ FileOptions OptimizeForLogWrite(
+ const FileOptions& file_options,
+ const DBOptions& /*db_options*/) const override {
+ FileOptions opts = file_options;
+ if (optimize_wal_temperature != Temperature::kUnknown) {
+ opts.temperature = optimize_wal_temperature;
+ }
+ return opts;
+ }
+
+ FileOptions OptimizeForManifestWrite(
+ const FileOptions& file_options) const override {
+ FileOptions opts = file_options;
+ if (optimize_manifest_temperature != Temperature::kUnknown) {
+ opts.temperature = optimize_manifest_temperature;
+ }
+ return opts;
+ }
+
+ void Reset() {
+ optimize_manifest_temperature = Temperature::kUnknown;
+ optimize_wal_temperature = Temperature::kUnknown;
+ expected_manifest_temperature = Temperature::kUnknown;
+ expected_other_metadata_temperature = Temperature::kUnknown;
+ expected_wal_temperature = Temperature::kUnknown;
+ for (auto& c : counts) {
+ c.StoreRelaxed(0);
+ }
+ }
+
+ Temperature optimize_manifest_temperature;
+ Temperature optimize_wal_temperature;
+ Temperature expected_manifest_temperature;
+ Temperature expected_other_metadata_temperature;
+ Temperature expected_wal_temperature;
+ std::array<RelaxedAtomic<int>, kNumberFileTypes> counts;
+ };
+
+ // We don't have enough non-unknown temps to confidently distinguish that
+ // a specific setting caused a specific outcome, in a single run. This is a
+ // reasonable work-around without blowing up test time. Only returns
+ // non-unknown temperatures.
+ auto RandomTemp = [] {
+ static std::vector<Temperature> temps = {
+ Temperature::kHot, Temperature::kWarm, Temperature::kCold};
+ return temps[Random::GetTLSInstance()->Uniform(
+ static_cast<int>(temps.size()))];
+ };
+
+ auto test_fs = std::make_shared<MyTestFS>(env_->GetFileSystem());
+ std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, test_fs));
+ for (bool use_optimize : {false, true}) {
+ std::cerr << "use_optimize: " << std::to_string(use_optimize) << std::endl;
+ for (bool use_temp_options : {false, true}) {
+ std::cerr << "use_temp_options: " << std::to_string(use_temp_options)
+ << std::endl;
+
+ Options options = CurrentOptions();
+ // Currently require for last level temperature
+ options.compaction_style = kCompactionStyleUniversal;
+ options.env = env.get();
+ test_fs->Reset();
+ if (use_optimize) {
+ test_fs->optimize_manifest_temperature = RandomTemp();
+ test_fs->expected_manifest_temperature =
+ test_fs->optimize_manifest_temperature;
+ test_fs->optimize_wal_temperature = RandomTemp();
+ test_fs->expected_wal_temperature = test_fs->optimize_wal_temperature;
+ }
+ if (use_temp_options) {
+ options.metadata_write_temperature = RandomTemp();
+ test_fs->expected_manifest_temperature =
+ options.metadata_write_temperature;
+ test_fs->expected_other_metadata_temperature =
+ options.metadata_write_temperature;
+ options.wal_write_temperature = RandomTemp();
+ test_fs->expected_wal_temperature = options.wal_write_temperature;
+ options.last_level_temperature = RandomTemp();
+ options.default_write_temperature = RandomTemp();
+ }
+
+ DestroyAndReopen(options);
+ Defer closer([&] { Close(); });
+
+ using FTC = std::map<FileType, size_t>;
+ // Files on DB startup
+ ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
+ {kDescriptorFile, 2},
+ {kCurrentFile, 2},
+ {kIdentityFile, 1},
+ {kOptionsFile, 1}}));
+
+ // Temperature count map
+ using TCM = std::map<Temperature, size_t>;
+ ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), TCM({}));
+
+ ASSERT_OK(Put("foo", "1"));
+ ASSERT_OK(Put("bar", "1"));
+ ASSERT_OK(Flush());
+ ASSERT_OK(Put("foo", "2"));
+ ASSERT_OK(Put("bar", "2"));
+ ASSERT_OK(Flush());
+
+ ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
+ TCM({{options.default_write_temperature, 2}}));
+
+ ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
+
+ ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(),
+ TCM({{options.last_level_temperature, 1}}));
+
+ ASSERT_OK(Put("foo", "3"));
+ ASSERT_OK(Put("bar", "3"));
+ ASSERT_OK(Flush());
+
+ // Just in memtable/WAL
+ ASSERT_OK(Put("dog", "3"));
+
+ {
+ TCM expected;
+ expected[options.default_write_temperature] += 1;
+ expected[options.last_level_temperature] += 1;
+ ASSERT_EQ(test_fs->CountCurrentSstFilesByTemp(), expected);
+ }
+
+ // New files during operation
+ ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 3}, {kTableFile, 4}}));
+
+ Reopen(options);
+
+ // New files during re-open/recovery
+ ASSERT_EQ(test_fs->PopCounts(), FTC({{kWalFile, 1},
+ {kTableFile, 1},
+ {kDescriptorFile, 1},
+ {kCurrentFile, 1},
+ {kOptionsFile, 1}}));
+
+ Destroy(options);
+ }
+ }
+}
+
TEST_F(DBTest2, LastLevelTemperature) {
class TestListener : public EventListener {
public:
diff --git a/db/db_test_util.h b/db/db_test_util.h
index 47b1667ea..36a461534 100644
--- a/db/db_test_util.h
+++ b/db/db_test_util.h
@@ -831,6 +831,15 @@ class FileTemperatureTestFS : public FileSystemWrapper {
return count;
}
+ std::map<Temperature, size_t> CountCurrentSstFilesByTemp() {
+ MutexLock lock(&mu_);
+ std::map<Temperature, size_t> ret;
+ for (const auto& e : current_sst_file_temperatures_) {
+ ret[e.second]++;
+ }
+ return ret;
+ }
+
void OverrideSstFileTemperature(uint64_t number, Temperature temp) {
MutexLock lock(&mu_);
current_sst_file_temperatures_[number] = temp;
@@ -842,7 +851,7 @@ class FileTemperatureTestFS : public FileSystemWrapper {
requested_sst_file_temperatures_;
std::map<uint64_t, Temperature> current_sst_file_temperatures_;
- std::string GetFileName(const std::string& fname) {
+ static std::string GetFileName(const std::string& fname) {
auto filename = fname.substr(fname.find_last_of(kFilePathSeparator) + 1);
// workaround only for Windows that the file path could contain both Windows
// FilePathSeparator and '/'
diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc
index 3ffb77d53..d407e4815 100644
--- a/db/flush_job_test.cc
+++ b/db/flush_job_test.cc
@@ -68,7 +68,8 @@ class FlushJobTestBase : public testing::Test {
}
void NewDB() {
- ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
+ ASSERT_OK(
+ SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
VersionEdit new_db;
new_db.SetLogNumber(0);
@@ -114,7 +115,8 @@ class FlushJobTestBase : public testing::Test {
}
ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file.
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ Temperature::kUnknown, nullptr);
ASSERT_OK(s);
}
diff --git a/db/version_set.cc b/db/version_set.cc
index fec847630..e81165a3d 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -5511,6 +5511,10 @@ Status VersionSet::ProcessManifestWrites(
std::unique_ptr<log::Writer> new_desc_log_ptr;
{
FileOptions opt_file_opts = fs_->OptimizeForManifestWrite(file_options_);
+ // DB option (in file_options_) takes precedence when not kUnknown
+ if (file_options_.temperature != Temperature::kUnknown) {
+ opt_file_opts.temperature = file_options_.temperature;
+ }
mu->Unlock();
TEST_SYNC_POINT("VersionSet::LogAndApply:WriteManifestStart");
TEST_SYNC_POINT_CALLBACK("VersionSet::LogAndApply:WriteManifest", nullptr);
@@ -5637,9 +5641,9 @@ Status VersionSet::ProcessManifestWrites(
assert(manifest_io_status.ok());
}
if (s.ok() && new_descriptor_log) {
- io_s = SetCurrentFile(write_options, fs_.get(), dbname_,
- pending_manifest_file_number_,
- dir_contains_current_file);
+ io_s = SetCurrentFile(
+ write_options, fs_.get(), dbname_, pending_manifest_file_number_,
+ file_options_.temperature, dir_contains_current_file);
if (!io_s.ok()) {
s = io_s;
// Quarantine old manifest file in case new manifest file's CURRENT file
diff --git a/db/version_set_test.cc b/db/version_set_test.cc
index 94ee5f2e5..4f3665fba 100644
--- a/db/version_set_test.cc
+++ b/db/version_set_test.cc
@@ -1415,16 +1415,22 @@ class VersionSetTestBase {
}
}
+ void CreateCurrentFile() {
+ // Make "CURRENT" file point to the new manifest file.
+ ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ Temperature::kUnknown,
+ /* dir_contains_current_file */ nullptr));
+ }
+
// Create DB with 3 column families.
void NewDB() {
SequenceNumber last_seqno;
std::unique_ptr<log::Writer> log_writer;
- ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
+ ASSERT_OK(
+ SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
PrepareManifest(&column_families_, &last_seqno, &log_writer);
log_writer.reset();
- // Make "CURRENT" file point to the new manifest file.
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
EXPECT_OK(versions_->Recover(column_families_, false));
EXPECT_EQ(column_families_.size(),
@@ -2600,7 +2606,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
edits_[i].MarkAtomicGroup(--remaining);
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
+ CreateCurrentFile();
}
void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) {
@@ -2612,7 +2618,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
edits_[i].MarkAtomicGroup(--remaining);
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
+ CreateCurrentFile();
}
void SetupCorruptedAtomicGroup(int atomic_group_size) {
@@ -2626,7 +2632,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
}
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
+ CreateCurrentFile();
}
void SetupIncorrectAtomicGroup(int atomic_group_size) {
@@ -2642,7 +2648,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
}
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
+ CreateCurrentFile();
}
void SetupTestSyncPoints() {
@@ -3408,8 +3414,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
SequenceNumber last_seqno;
std::unique_ptr<log::Writer> log_writer;
PrepareManifest(&column_families, &last_seqno, &log_writer);
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
EXPECT_OK(versions_->Recover(column_families, false /* read_only */));
EXPECT_EQ(column_families.size(),
@@ -3431,7 +3436,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
cfd_to_drop->Ref();
drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID());
mutex_.Lock();
- s = versions_->LogAndApply(
+ Status s = versions_->LogAndApply(
cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), read_options,
write_options, &drop_cf_edit, &mutex_, nullptr);
mutex_.Unlock();
@@ -3541,9 +3546,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase,
TEST_F(EmptyDefaultCfNewManifest, Recover) {
PrepareManifest(nullptr, nullptr, &log_writer_);
log_writer_.reset();
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
- /* dir_contains_current_file */ nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
std::vector<ColumnFamilyDescriptor> column_families;
@@ -3552,7 +3555,7 @@ TEST_F(EmptyDefaultCfNewManifest, Recover) {
cf_options_);
std::string db_id;
bool has_missing_table_file = false;
- s = versions_->TryRecoverFromOneManifest(
+ Status s = versions_->TryRecoverFromOneManifest(
manifest_path, column_families, false, &db_id, &has_missing_table_file);
ASSERT_OK(s);
ASSERT_FALSE(has_missing_table_file);
@@ -3573,7 +3576,8 @@ class VersionSetTestEmptyDb
assert(nullptr != log_writer);
VersionEdit new_db;
if (db_options_.write_dbid_to_manifest) {
- ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
+ ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_,
+ Temperature::kUnknown));
DBOptions tmp_db_options;
tmp_db_options.env = env_;
std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
@@ -3606,9 +3610,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) {
db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
PrepareManifest(nullptr, nullptr, &log_writer_);
log_writer_.reset();
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
- /* dir_contains_current_file */ nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3623,9 +3625,9 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) {
std::string db_id;
bool has_missing_table_file = false;
- s = versions_->TryRecoverFromOneManifest(manifest_path, column_families,
- read_only, &db_id,
- &has_missing_table_file);
+ Status s = versions_->TryRecoverFromOneManifest(
+ manifest_path, column_families, read_only, &db_id,
+ &has_missing_table_file);
auto iter =
std::find(cf_names.begin(), cf_names.end(), kDefaultColumnFamilyName);
if (iter == cf_names.end()) {
@@ -3651,9 +3653,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) {
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
- /* dir_contains_current_file */ nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3699,9 +3699,7 @@ TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) {
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
- /* dir_contains_current_file */ nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3758,9 +3756,7 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) {
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
- /* dir_contains_current_file */ nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3816,9 +3812,7 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) {
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
- /* dir_contains_current_file */ nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -4025,15 +4019,14 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
log_writer_.reset();
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
std::string db_id;
bool has_missing_table_file = false;
- s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
- /*read_only=*/false, &db_id,
- &has_missing_table_file);
+ Status s = versions_->TryRecoverFromOneManifest(
+ manifest_path, column_families_,
+ /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s);
ASSERT_TRUE(has_missing_table_file);
for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
@@ -4083,15 +4076,14 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
log_writer_.reset();
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
std::string db_id;
bool has_missing_table_file = false;
- s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
- /*read_only=*/false, &db_id,
- &has_missing_table_file);
+ Status s = versions_->TryRecoverFromOneManifest(
+ manifest_path, column_families_,
+ /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s);
ASSERT_TRUE(has_missing_table_file);
for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
@@ -4137,15 +4129,14 @@ TEST_F(VersionSetTestMissingFiles, NoFileMissing) {
WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
log_writer_.reset();
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
std::string db_id;
bool has_missing_table_file = false;
- s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
- /*read_only=*/false, &db_id,
- &has_missing_table_file);
+ Status s = versions_->TryRecoverFromOneManifest(
+ manifest_path, column_families_,
+ /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s);
ASSERT_FALSE(has_missing_table_file);
for (ColumnFamilyData* cfd : *(versions_->GetColumnFamilySet())) {
@@ -4266,15 +4257,14 @@ class BestEffortsRecoverIncompleteVersionTest
/*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>(),
blob_files);
log_writer_.reset();
- Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
- ASSERT_OK(s);
+ CreateCurrentFile();
std::string manifest_path;
VerifyManifest(&manifest_path);
std::string db_id;
bool has_missing_table_file = false;
- s = versions_->TryRecoverFromOneManifest(manifest_path, column_families_,
- /*read_only=*/false, &db_id,
- &has_missing_table_file);
+ Status s = versions_->TryRecoverFromOneManifest(
+ manifest_path, column_families_,
+ /*read_only=*/false, &db_id, &has_missing_table_file);
ASSERT_OK(s);
ASSERT_TRUE(has_missing_table_file);
}
diff --git a/env/file_system.cc b/env/file_system.cc
index 27c7207f0..1f02f7a7e 100644
--- a/env/file_system.cc
+++ b/env/file_system.cc
@@ -181,10 +181,10 @@ FileOptions FileSystem::OptimizeForBlobFileRead(
IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
const std::string& fname, bool should_sync,
- const IOOptions& io_options) {
+ const IOOptions& io_options,
+ const FileOptions& file_options) {
std::unique_ptr<FSWritableFile> file;
- EnvOptions soptions;
- IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr);
+ IOStatus s = fs->NewWritableFile(fname, file_options, &file, nullptr);
if (!s.ok()) {
return s;
}
diff --git a/file/filename.cc b/file/filename.cc
index b34a0e113..45cbf9d76 100644
--- a/file/filename.cc
+++ b/file/filename.cc
@@ -388,6 +388,7 @@ bool ParseFileName(const std::string& fname, uint64_t* number,
IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
const std::string& dbname, uint64_t descriptor_number,
+ Temperature temp,
FSDirectory* dir_contains_current_file) {
// Remove leading "dbname/" and add newline to manifest file name
std::string manifest = DescriptorFileName(dbname, descriptor_number);
@@ -397,8 +398,11 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
std::string tmp = TempFileName(dbname, descriptor_number);
IOOptions opts;
IOStatus s = PrepareIOFromWriteOptions(write_options, opts);
+ FileOptions file_opts;
+ file_opts.temperature = temp;
if (s.ok()) {
- s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts);
+ s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts,
+ file_opts);
}
TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s);
if (s.ok()) {
@@ -423,7 +427,8 @@ IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
}
Status SetIdentityFile(const WriteOptions& write_options, Env* env,
- const std::string& dbname, const std::string& db_id) {
+ const std::string& dbname, Temperature temp,
+ const std::string& db_id) {
std::string id;
if (db_id.empty()) {
id = env->GenerateUniqueId();
@@ -437,8 +442,11 @@ Status SetIdentityFile(const WriteOptions& write_options, Env* env,
Status s;
IOOptions opts;
s = PrepareIOFromWriteOptions(write_options, opts);
+ FileOptions file_opts;
+ file_opts.temperature = temp;
if (s.ok()) {
- s = WriteStringToFile(env, id, tmp, true, &opts);
+ s = WriteStringToFile(env->GetFileSystem().get(), id, tmp,
+ /*should_sync=*/true, opts, file_opts);
}
if (s.ok()) {
s = env->RenameFile(tmp, identify_file_name);
diff --git a/file/filename.h b/file/filename.h
index 56bbd78d5..5a52c745a 100644
--- a/file/filename.h
+++ b/file/filename.h
@@ -161,11 +161,12 @@ bool ParseFileName(const std::string& filename, uint64_t* number,
// when
IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
const std::string& dbname, uint64_t descriptor_number,
+ Temperature temp,
FSDirectory* dir_contains_current_file);
// Make the IDENTITY file for the db
Status SetIdentityFile(const WriteOptions& write_options, Env* env,
- const std::string& dbname,
+ const std::string& dbname, Temperature temp,
const std::string& db_id = {});
// Sync manifest file `file`.
diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h
index 11f971c24..3761923ce 100644
--- a/include/rocksdb/advanced_options.h
+++ b/include/rocksdb/advanced_options.h
@@ -813,7 +813,7 @@ struct AdvancedColumnFamilyOptions {
// If this option is set, when creating the last level files, pass this
// temperature to FileSystem used. Should be no-op for default FileSystem
// and users need to plug in their own FileSystem to take advantage of it.
- // When using FIFO compaction, this option is ignored.
+ // Currently only compatible with universal compaction.
//
// Dynamically changeable through the SetOptions() API
Temperature last_level_temperature = Temperature::kUnknown;
diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h
index 8d21c9194..042b38305 100644
--- a/include/rocksdb/file_system.h
+++ b/include/rocksdb/file_system.h
@@ -195,7 +195,9 @@ struct FileOptions : EnvOptions {
FileOptions() : EnvOptions(), handoff_checksum_type(ChecksumType::kCRC32c) {}
FileOptions(const DBOptions& opts)
- : EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {}
+ : EnvOptions(opts),
+ temperature(opts.metadata_write_temperature),
+ handoff_checksum_type(ChecksumType::kCRC32c) {}
FileOptions(const EnvOptions& opts)
: EnvOptions(opts), handoff_checksum_type(ChecksumType::kCRC32c) {}
@@ -1952,7 +1954,8 @@ class FSDirectoryWrapper : public FSDirectory {
// A utility routine: write "data" to the named file.
IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
const std::string& fname, bool should_sync = false,
- const IOOptions& io_options = IOOptions());
+ const IOOptions& io_options = IOOptions(),
+ const FileOptions& file_options = FileOptions());
// A utility routine: read contents of named file into *data
IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h
index 8223c4a13..4b2564083 100644
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1580,6 +1580,16 @@ struct DBOptions {
// Default 100ms
uint64_t follower_catchup_retry_wait_ms = 100;
+ // When DB files other than SST, blob and WAL files are created, use this
+ // filesystem temperature. (See also `wal_write_temperature` and various
+ // `*_temperature` CF options.) When not `kUnknown`, this overrides any
+ // temperature set by OptimizeForManifestWrite functions.
+ Temperature metadata_write_temperature = Temperature::kUnknown;
+
+ // Use this filesystem temperature when creating WAL files. When not
+ // `kUnknown`, this overrides any temperature set by OptimizeForLogWrite
+ // functions.
+ Temperature wal_write_temperature = Temperature::kUnknown;
// End EXPERIMENTAL
};
diff --git a/options/db_options.cc b/options/db_options.cc
index 8eb28c1ed..2678bb5a7 100644
--- a/options/db_options.cc
+++ b/options/db_options.cc
@@ -576,6 +576,14 @@ static std::unordered_map<std::string, OptionTypeInfo>
{offsetof(struct ImmutableDBOptions, follower_catchup_retry_wait_ms),
OptionType::kUInt64T, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
+ {"metadata_write_temperature",
+ {offsetof(struct ImmutableDBOptions, metadata_write_temperature),
+ OptionType::kTemperature, OptionVerificationType::kNormal,
+ OptionTypeFlags::kNone}},
+ {"wal_write_temperature",
+ {offsetof(struct ImmutableDBOptions, wal_write_temperature),
+ OptionType::kTemperature, OptionVerificationType::kNormal,
+ OptionTypeFlags::kNone}},
};
const std::string OptionsHelper::kDBOptionsName = "DBOptions";
@@ -778,7 +786,9 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
follower_refresh_catchup_period_ms(
options.follower_refresh_catchup_period_ms),
follower_catchup_retry_count(options.follower_catchup_retry_count),
- follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms) {
+ follower_catchup_retry_wait_ms(options.follower_catchup_retry_wait_ms),
+ metadata_write_temperature(options.metadata_write_temperature),
+ wal_write_temperature(options.wal_write_temperature) {
fs = env->GetFileSystem();
clock = env->GetSystemClock().get();
logger = info_log.get();
@@ -956,6 +966,10 @@ void ImmutableDBOptions::Dump(Logger* log) const {
db_host_id.c_str());
ROCKS_LOG_HEADER(log, " Options.enforce_single_del_contracts: %s",
enforce_single_del_contracts ? "true" : "false");
+ ROCKS_LOG_HEADER(log, " Options.metadata_write_temperature: %s",
+ temperature_to_string[metadata_write_temperature].c_str());
+ ROCKS_LOG_HEADER(log, " Options.wal_write_temperature: %s",
+ temperature_to_string[wal_write_temperature].c_str());
}
bool ImmutableDBOptions::IsWalDirSameAsDBPath() const {
diff --git a/options/db_options.h b/options/db_options.h
index 5de6ab498..7e0752626 100644
--- a/options/db_options.h
+++ b/options/db_options.h
@@ -103,6 +103,8 @@ struct ImmutableDBOptions {
uint64_t follower_refresh_catchup_period_ms;
uint64_t follower_catchup_retry_count;
uint64_t follower_catchup_retry_wait_ms;
+ Temperature metadata_write_temperature;
+ Temperature wal_write_temperature;
// Beginning convenience/helper objects that are not part of the base
// DBOptions
diff --git a/options/options_helper.cc b/options/options_helper.cc
index ec62dd1f5..011f47b98 100644
--- a/options/options_helper.cc
+++ b/options/options_helper.cc
@@ -180,6 +180,15 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
options.enforce_single_del_contracts =
immutable_db_options.enforce_single_del_contracts;
options.daily_offpeak_time_utc = mutable_db_options.daily_offpeak_time_utc;
+ options.follower_refresh_catchup_period_ms =
+ immutable_db_options.follower_refresh_catchup_period_ms;
+ options.follower_catchup_retry_count =
+ immutable_db_options.follower_catchup_retry_count;
+ options.follower_catchup_retry_wait_ms =
+ immutable_db_options.follower_catchup_retry_wait_ms;
+ options.metadata_write_temperature =
+ immutable_db_options.metadata_write_temperature;
+ options.wal_write_temperature = immutable_db_options.wal_write_temperature;
return options;
}
diff --git a/options/options_parser.cc b/options/options_parser.cc
index ec32f7644..4e249908b 100644
--- a/options/options_parser.cc
+++ b/options/options_parser.cc
@@ -69,8 +69,9 @@ Status PersistRocksDBOptions(const WriteOptions& write_options,
}
std::unique_ptr<FSWritableFile> wf;
- Status s =
- fs->NewWritableFile(file_name, FileOptions(), &wf, nullptr);
+ FileOptions file_options;
+ file_options.temperature = db_opt.metadata_write_temperature;
+ Status s = fs->NewWritableFile(file_name, file_options, &wf, nullptr);
if (!s.ok()) {
return s;
}
diff --git a/options/options_settable_test.cc b/options/options_settable_test.cc
index 2bf349b1c..67aab055e 100644
--- a/options/options_settable_test.cc
+++ b/options/options_settable_test.cc
@@ -367,7 +367,12 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
"lowest_used_cache_tier=kNonVolatileBlockTier;"
"allow_data_in_errors=false;"
"enforce_single_del_contracts=false;"
- "daily_offpeak_time_utc=08:30-19:00;",
+ "daily_offpeak_time_utc=08:30-19:00;"
+ "follower_refresh_catchup_period_ms=123;"
+ "follower_catchup_retry_count=456;"
+ "follower_catchup_retry_wait_ms=789;"
+ "metadata_write_temperature=kCold;"
+ "wal_write_temperature=kHot;",
new_options));
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),