summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--db/blob/blob_file_builder.cc35
-rw-r--r--db/blob/blob_file_builder.h11
-rw-r--r--db/blob/blob_file_builder_test.cc39
-rw-r--r--db/blob/blob_file_cache_test.cc9
-rw-r--r--db/blob/blob_file_reader_test.cc11
-rw-r--r--db/blob/blob_log_writer.cc83
-rw-r--r--db/blob/blob_log_writer.h18
-rw-r--r--db/blob/blob_source_test.cc9
-rw-r--r--db/builder.cc55
-rw-r--r--db/builder.h5
-rw-r--r--db/column_family.cc2
-rw-r--r--db/compaction/compaction_job.cc22
-rw-r--r--db/compaction/compaction_job_test.cc12
-rw-r--r--db/compaction/compaction_outputs.cc9
-rw-r--r--db/convenience.cc2
-rw-r--r--db/db_basic_test.cc3
-rw-r--r--db/db_impl/db_impl.cc203
-rw-r--r--db/db_impl/db_impl.h80
-rw-r--r--db/db_impl/db_impl_compaction_flush.cc79
-rw-r--r--db/db_impl/db_impl_experimental.cc6
-rw-r--r--db/db_impl/db_impl_files.cc9
-rw-r--r--db/db_impl/db_impl_open.cc98
-rw-r--r--db/db_impl/db_impl_write.cc63
-rw-r--r--db/db_iter.cc1
-rw-r--r--db/db_iter.h5
-rw-r--r--db/db_sst_test.cc11
-rw-r--r--db/db_test2.cc2
-rw-r--r--db/db_wal_test.cc4
-rw-r--r--db/experimental.cc6
-rw-r--r--db/external_sst_file_ingestion_job.cc6
-rw-r--r--db/fault_injection_test.cc2
-rw-r--r--db/flush_job.cc48
-rw-r--r--db/flush_job_test.cc8
-rw-r--r--db/import_column_family_job.cc2
-rw-r--r--db/internal_stats.cc12
-rw-r--r--db/log_test.cc28
-rw-r--r--db/log_writer.cc184
-rw-r--r--db/log_writer.h18
-rw-r--r--db/memtable.cc2
-rw-r--r--db/memtable_list.cc14
-rw-r--r--db/repair.cc52
-rw-r--r--db/table_properties_collector_test.cc13
-rw-r--r--db/version_set.cc55
-rw-r--r--db/version_set.h36
-rw-r--r--db/version_set_test.cc116
-rw-r--r--db/version_util.h10
-rw-r--r--db/wal_manager_test.cc7
-rw-r--r--db/write_batch.cc4
-rw-r--r--db/write_thread.h2
-rw-r--r--db_stress_tool/db_stress_env_wrapper.h166
-rw-r--r--db_stress_tool/db_stress_listener.cc12
-rw-r--r--db_stress_tool/multi_ops_txns_stress.cc18
-rw-r--r--env/env.cc5
-rw-r--r--env/env_test.cc2
-rw-r--r--env/file_system.cc9
-rw-r--r--file/file_util.cc15
-rw-r--r--file/file_util.h8
-rw-r--r--file/filename.cc44
-rw-r--r--file/filename.h7
-rw-r--r--file/writable_file_writer.cc182
-rw-r--r--file/writable_file_writer.h51
-rw-r--r--include/rocksdb/env.h4
-rw-r--r--include/rocksdb/file_system.h3
-rw-r--r--include/rocksdb/options.h37
-rw-r--r--include/rocksdb/sst_file_reader.h2
-rw-r--r--include/rocksdb/statistics.h8
-rw-r--r--java/rocksjni/portal.h17
-rw-r--r--java/src/main/java/org/rocksdb/HistogramType.java8
-rw-r--r--logging/env_logger.h6
-rw-r--r--monitoring/persistent_stats_history.cc3
-rw-r--r--monitoring/statistics.cc4
-rw-r--r--options/options.cc7
-rw-r--r--options/options_parser.cc55
-rw-r--r--options/options_parser.h6
-rw-r--r--options/options_test.cc18
-rw-r--r--table/block_based/block_based_table_builder.cc25
-rw-r--r--table/block_based/block_based_table_reader.cc6
-rw-r--r--table/block_based/block_based_table_reader_test.cc9
-rw-r--r--table/block_based/data_block_hash_index_test.cc6
-rw-r--r--table/block_fetcher_test.cc10
-rw-r--r--table/cuckoo/cuckoo_table_builder.cc13
-rw-r--r--table/cuckoo/cuckoo_table_builder_test.cc22
-rw-r--r--table/cuckoo/cuckoo_table_reader.cc2
-rw-r--r--table/cuckoo/cuckoo_table_reader_test.cc5
-rw-r--r--table/mock_table.cc2
-rw-r--r--table/plain/plain_table_builder.cc10
-rw-r--r--table/plain/plain_table_key_coding.cc12
-rw-r--r--table/plain/plain_table_reader.cc4
-rw-r--r--table/sst_file_dumper.cc13
-rw-r--r--table/sst_file_writer.cc31
-rw-r--r--table/table_builder.h5
-rw-r--r--table/table_reader_bench.cc12
-rw-r--r--table/table_test.cc93
-rw-r--r--test_util/testutil.cc13
-rw-r--r--tools/db_bench_tool_test.cc6
-rw-r--r--tools/ldb_cmd.cc6
-rw-r--r--tools/simulated_hybrid_file_system.cc5
-rw-r--r--tools/sst_dump_test.cc12
-rw-r--r--unreleased_history/behavior_changes/blob_file_write_micros.md1
-rw-r--r--unreleased_history/bug_fixes/blob_tickers.md1
-rw-r--r--unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md1
-rw-r--r--util/file_checksum_helper.cc2
-rw-r--r--util/file_reader_writer_test.cc124
-rw-r--r--util/log_write_bench.cc6
-rw-r--r--utilities/backup/backup_engine.cc10
-rw-r--r--utilities/backup/backup_engine_test.cc2
-rw-r--r--utilities/blob_db/blob_compaction_filter.cc10
-rw-r--r--utilities/blob_db/blob_db.h2
-rw-r--r--utilities/blob_db/blob_db_impl.cc90
-rw-r--r--utilities/blob_db/blob_db_impl.h27
-rw-r--r--utilities/blob_db/blob_db_listener.h8
-rw-r--r--utilities/blob_db/blob_file.cc10
-rw-r--r--utilities/blob_db/blob_file.h5
-rw-r--r--utilities/cache_dump_load_impl.h10
-rw-r--r--utilities/fault_injection_fs.cc3
-rw-r--r--utilities/options/options_util_test.cc13
-rw-r--r--utilities/simulator_cache/sim_cache.cc6
-rw-r--r--utilities/trace/file_trace_reader_writer.cc2
-rw-r--r--utilities/transactions/pessimistic_transaction_db.cc1
-rw-r--r--utilities/transactions/write_prepared_txn.cc1
-rw-r--r--utilities/transactions/write_prepared_txn_db.cc1
-rw-r--r--utilities/transactions/write_unprepared_txn.cc2
-rw-r--r--utilities/transactions/write_unprepared_txn_db.cc2
123 files changed, 1822 insertions, 1048 deletions
diff --git a/db/blob/blob_file_builder.cc b/db/blob/blob_file_builder.cc
index 35269fdb5..dceb90cee 100644
--- a/db/blob/blob_file_builder.cc
+++ b/db/blob/blob_file_builder.cc
@@ -34,9 +34,9 @@ BlobFileBuilder::BlobFileBuilder(
VersionSet* versions, FileSystem* fs,
const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options, const FileOptions* file_options,
- std::string db_id, std::string db_session_id, int job_id,
- uint32_t column_family_id, const std::string& column_family_name,
- Env::IOPriority io_priority, Env::WriteLifeTimeHint write_hint,
+ const WriteOptions* write_options, std::string db_id,
+ std::string db_session_id, int job_id, uint32_t column_family_id,
+ const std::string& column_family_name, Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
@@ -44,18 +44,18 @@ BlobFileBuilder::BlobFileBuilder(
std::vector<BlobFileAddition>* blob_file_additions)
: BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, fs,
immutable_options, mutable_cf_options, file_options,
- db_id, db_session_id, job_id, column_family_id,
- column_family_name, io_priority, write_hint, io_tracer,
- blob_callback, creation_reason, blob_file_paths,
- blob_file_additions) {}
+ write_options, db_id, db_session_id, job_id,
+ column_family_id, column_family_name, write_hint,
+ io_tracer, blob_callback, creation_reason,
+ blob_file_paths, blob_file_additions) {}
BlobFileBuilder::BlobFileBuilder(
std::function<uint64_t()> file_number_generator, FileSystem* fs,
const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options, const FileOptions* file_options,
- std::string db_id, std::string db_session_id, int job_id,
- uint32_t column_family_id, const std::string& column_family_name,
- Env::IOPriority io_priority, Env::WriteLifeTimeHint write_hint,
+ const WriteOptions* write_options, std::string db_id,
+ std::string db_session_id, int job_id, uint32_t column_family_id,
+ const std::string& column_family_name, Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
@@ -69,12 +69,12 @@ BlobFileBuilder::BlobFileBuilder(
blob_compression_type_(mutable_cf_options->blob_compression_type),
prepopulate_blob_cache_(mutable_cf_options->prepopulate_blob_cache),
file_options_(file_options),
+ write_options_(write_options),
db_id_(std::move(db_id)),
db_session_id_(std::move(db_session_id)),
job_id_(job_id),
column_family_id_(column_family_id),
column_family_name_(column_family_name),
- io_priority_(io_priority),
write_hint_(write_hint),
io_tracer_(io_tracer),
blob_callback_(blob_callback),
@@ -87,6 +87,7 @@ BlobFileBuilder::BlobFileBuilder(
assert(fs_);
assert(immutable_options_);
assert(file_options_);
+ assert(write_options_);
assert(blob_file_paths_);
assert(blob_file_paths_->empty());
assert(blob_file_additions_);
@@ -207,14 +208,14 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {
blob_file_paths_->emplace_back(std::move(blob_file_path));
assert(file);
- file->SetIOPriority(io_priority_);
+ file->SetIOPriority(write_options_->rate_limiter_priority);
file->SetWriteLifeTimeHint(write_hint_);
FileTypeSet tmp_set = immutable_options_->checksum_handoff_file_types;
Statistics* const statistics = immutable_options_->stats;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_paths_->back(), *file_options_,
immutable_options_->clock, io_tracer_, statistics,
- immutable_options_->listeners,
+ Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS, immutable_options_->listeners,
immutable_options_->file_checksum_gen_factory.get(),
tmp_set.Contains(FileType::kBlobFile), false));
@@ -231,7 +232,7 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {
expiration_range);
{
- Status s = blob_log_writer->WriteHeader(header);
+ Status s = blob_log_writer->WriteHeader(*write_options_, header);
TEST_SYNC_POINT_CALLBACK(
"BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader", &s);
@@ -296,7 +297,8 @@ Status BlobFileBuilder::WriteBlobToFile(const Slice& key, const Slice& blob,
uint64_t key_offset = 0;
- Status s = writer_->AddRecord(key, blob, &key_offset, blob_offset);
+ Status s =
+ writer_->AddRecord(*write_options_, key, blob, &key_offset, blob_offset);
TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AddRecord", &s);
@@ -321,7 +323,8 @@ Status BlobFileBuilder::CloseBlobFile() {
std::string checksum_method;
std::string checksum_value;
- Status s = writer_->AppendFooter(footer, &checksum_method, &checksum_value);
+ Status s = writer_->AppendFooter(*write_options_, footer, &checksum_method,
+ &checksum_value);
TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AppendFooter", &s);
diff --git a/db/blob/blob_file_builder.h b/db/blob/blob_file_builder.h
index 8e7aab502..6ba7181aa 100644
--- a/db/blob/blob_file_builder.h
+++ b/db/blob/blob_file_builder.h
@@ -13,6 +13,7 @@
#include "rocksdb/advanced_options.h"
#include "rocksdb/compression_type.h"
#include "rocksdb/env.h"
+#include "rocksdb/options.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/types.h"
@@ -36,11 +37,11 @@ class BlobFileBuilder {
BlobFileBuilder(VersionSet* versions, FileSystem* fs,
const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options,
- const FileOptions* file_options, std::string db_id,
+ const FileOptions* file_options,
+ const WriteOptions* write_options, std::string db_id,
std::string db_session_id, int job_id,
uint32_t column_family_id,
const std::string& column_family_name,
- Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
@@ -51,11 +52,11 @@ class BlobFileBuilder {
BlobFileBuilder(std::function<uint64_t()> file_number_generator,
FileSystem* fs, const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options,
- const FileOptions* file_options, std::string db_id,
+ const FileOptions* file_options,
+ const WriteOptions* write_options, std::string db_id,
std::string db_session_id, int job_id,
uint32_t column_family_id,
const std::string& column_family_name,
- Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
@@ -92,12 +93,12 @@ class BlobFileBuilder {
CompressionType blob_compression_type_;
PrepopulateBlobCache prepopulate_blob_cache_;
const FileOptions* file_options_;
+ const WriteOptions* write_options_;
const std::string db_id_;
const std::string db_session_id_;
int job_id_;
uint32_t column_family_id_;
std::string column_family_name_;
- Env::IOPriority io_priority_;
Env::WriteLifeTimeHint write_hint_;
std::shared_ptr<IOTracer> io_tracer_;
BlobFileCompletionCallback* blob_callback_;
diff --git a/db/blob/blob_file_builder_test.cc b/db/blob/blob_file_builder_test.cc
index 5882e219f..8a2ecff13 100644
--- a/db/blob/blob_file_builder_test.cc
+++ b/db/blob/blob_file_builder_test.cc
@@ -43,6 +43,7 @@ class BlobFileBuilderTest : public testing::Test {
mock_env_.reset(MockEnv::Create(Env::Default()));
fs_ = mock_env_->GetFileSystem().get();
clock_ = mock_env_->GetSystemClock().get();
+ write_options_.rate_limiter_priority = Env::IO_HIGH;
}
void VerifyBlobFile(uint64_t blob_file_number,
@@ -113,6 +114,7 @@ class BlobFileBuilderTest : public testing::Test {
FileSystem* fs_;
SystemClock* clock_;
FileOptions file_options_;
+ WriteOptions write_options_;
};
TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) {
@@ -136,7 +138,6 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -144,8 +145,8 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
@@ -221,7 +222,6 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -229,8 +229,8 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
@@ -309,7 +309,6 @@ TEST_F(BlobFileBuilderTest, InlinedValues) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -317,8 +316,8 @@ TEST_F(BlobFileBuilderTest, InlinedValues) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
@@ -364,7 +363,6 @@ TEST_F(BlobFileBuilderTest, Compression) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -372,8 +370,8 @@ TEST_F(BlobFileBuilderTest, Compression) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
@@ -448,7 +446,6 @@ TEST_F(BlobFileBuilderTest, CompressionError) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -456,8 +453,8 @@ TEST_F(BlobFileBuilderTest, CompressionError) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
@@ -528,7 +525,6 @@ TEST_F(BlobFileBuilderTest, Checksum) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -536,8 +532,8 @@ TEST_F(BlobFileBuilderTest, Checksum) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
@@ -589,11 +585,13 @@ class BlobFileBuilderIOErrorTest
BlobFileBuilderIOErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
fs_ = mock_env_->GetFileSystem().get();
+ write_options_.rate_limiter_priority = Env::IO_HIGH;
}
std::unique_ptr<Env> mock_env_;
FileSystem* fs_;
FileOptions file_options_;
+ WriteOptions write_options_;
std::string sync_point_;
};
@@ -626,7 +624,6 @@ TEST_P(BlobFileBuilderIOErrorTest, IOError) {
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
- constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
@@ -634,8 +631,8 @@ TEST_P(BlobFileBuilderIOErrorTest, IOError) {
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
- &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id,
- column_family_id, column_family_name, io_priority, write_hint,
+ &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/,
+ job_id, column_family_id, column_family_name, write_hint,
nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
diff --git a/db/blob/blob_file_cache_test.cc b/db/blob/blob_file_cache_test.cc
index 8c3c56de9..edfeb7e81 100644
--- a/db/blob/blob_file_cache_test.cc
+++ b/db/blob/blob_file_cache_test.cc
@@ -57,7 +57,7 @@ void WriteBlobFile(uint32_t column_family_id,
BlobLogHeader header(column_family_id, kNoCompression, has_ttl,
expiration_range);
- ASSERT_OK(blob_log_writer.WriteHeader(header));
+ ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header));
constexpr char key[] = "key";
constexpr char blob[] = "blob";
@@ -67,7 +67,8 @@ void WriteBlobFile(uint32_t column_family_id,
uint64_t key_offset = 0;
uint64_t blob_offset = 0;
- ASSERT_OK(blob_log_writer.AddRecord(key, blob, &key_offset, &blob_offset));
+ ASSERT_OK(blob_log_writer.AddRecord(WriteOptions(), key, blob, &key_offset,
+ &blob_offset));
BlobLogFooter footer;
footer.blob_count = 1;
@@ -76,8 +77,8 @@ void WriteBlobFile(uint32_t column_family_id,
std::string checksum_method;
std::string checksum_value;
- ASSERT_OK(
- blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value));
+ ASSERT_OK(blob_log_writer.AppendFooter(WriteOptions(), footer,
+ &checksum_method, &checksum_value));
}
} // anonymous namespace
diff --git a/db/blob/blob_file_reader_test.cc b/db/blob/blob_file_reader_test.cc
index b6049d1ef..b42b86685 100644
--- a/db/blob/blob_file_reader_test.cc
+++ b/db/blob/blob_file_reader_test.cc
@@ -63,7 +63,7 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
BlobLogHeader header(column_family_id, compression, has_ttl,
expiration_range_header);
- ASSERT_OK(blob_log_writer.WriteHeader(header));
+ ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header));
std::vector<std::string> compressed_blobs(num);
std::vector<Slice> blobs_to_write(num);
@@ -91,7 +91,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
for (size_t i = 0; i < num; ++i) {
uint64_t key_offset = 0;
- ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset,
+ ASSERT_OK(blob_log_writer.AddRecord(WriteOptions(), keys[i],
+ blobs_to_write[i], &key_offset,
&blob_offsets[i]));
}
@@ -101,8 +102,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
std::string checksum_method;
std::string checksum_value;
- ASSERT_OK(
- blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value));
+ ASSERT_OK(blob_log_writer.AppendFooter(WriteOptions(), footer,
+ &checksum_method, &checksum_value));
}
// Creates a test blob file with a single blob in it. Note: this method
@@ -473,7 +474,7 @@ TEST_F(BlobFileReaderTest, Malformed) {
BlobLogHeader header(column_family_id, kNoCompression, has_ttl,
expiration_range);
- ASSERT_OK(blob_log_writer.WriteHeader(header));
+ ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header));
}
constexpr HistogramImpl* blob_file_read_hist = nullptr;
diff --git a/db/blob/blob_log_writer.cc b/db/blob/blob_log_writer.cc
index bf5ef27c1..d1768f902 100644
--- a/db/blob/blob_log_writer.cc
+++ b/db/blob/blob_log_writer.cc
@@ -33,35 +33,49 @@ BlobLogWriter::BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest,
BlobLogWriter::~BlobLogWriter() = default;
-Status BlobLogWriter::Sync() {
+Status BlobLogWriter::Sync(const WriteOptions& write_options) {
TEST_SYNC_POINT("BlobLogWriter::Sync");
StopWatch sync_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS);
- Status s = dest_->Sync(use_fsync_);
- RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED);
+ IOOptions opts;
+ Status s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ s = dest_->Sync(opts, use_fsync_);
+ }
+ if (s.ok()) {
+ RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED);
+ }
return s;
}
-Status BlobLogWriter::WriteHeader(BlobLogHeader& header) {
+Status BlobLogWriter::WriteHeader(const WriteOptions& write_options,
+ BlobLogHeader& header) {
assert(block_offset_ == 0);
assert(last_elem_type_ == kEtNone);
std::string str;
header.EncodeTo(&str);
- Status s = dest_->Append(Slice(str));
+ IOOptions opts;
+ Status s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ s = dest_->Append(opts, Slice(str));
+ }
if (s.ok()) {
block_offset_ += str.size();
if (do_flush_) {
- s = dest_->Flush();
+ s = dest_->Flush(opts);
}
}
last_elem_type_ = kEtFileHdr;
- RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
- BlobLogHeader::kSize);
+ if (s.ok()) {
+ RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+ BlobLogHeader::kSize);
+ }
return s;
}
-Status BlobLogWriter::AppendFooter(BlobLogFooter& footer,
+Status BlobLogWriter::AppendFooter(const WriteOptions& write_options,
+ BlobLogFooter& footer,
std::string* checksum_method,
std::string* checksum_value) {
assert(block_offset_ != 0);
@@ -75,14 +89,17 @@ Status BlobLogWriter::AppendFooter(BlobLogFooter& footer,
s.PermitUncheckedError();
return Status::IOError("Seen Error. Skip closing.");
} else {
- s = dest_->Append(Slice(str));
+ IOOptions opts;
+ s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ s = dest_->Append(opts, Slice(str));
+ }
if (s.ok()) {
block_offset_ += str.size();
-
- s = Sync();
+ s = Sync(write_options);
if (s.ok()) {
- s = dest_->Close();
+ s = dest_->Close(opts);
if (s.ok()) {
assert(!!checksum_method == !!checksum_value);
@@ -111,12 +128,15 @@ Status BlobLogWriter::AppendFooter(BlobLogFooter& footer,
}
last_elem_type_ = kEtFileFooter;
- RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
- BlobLogFooter::kSize);
+ if (s.ok()) {
+ RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+ BlobLogFooter::kSize);
+ }
return s;
}
-Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val,
+Status BlobLogWriter::AddRecord(const WriteOptions& write_options,
+ const Slice& key, const Slice& val,
uint64_t expiration, uint64_t* key_offset,
uint64_t* blob_offset) {
assert(block_offset_ != 0);
@@ -125,11 +145,13 @@ Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val,
std::string buf;
ConstructBlobHeader(&buf, key, val, expiration);
- Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset);
+ Status s =
+ EmitPhysicalRecord(write_options, buf, key, val, key_offset, blob_offset);
return s;
}
-Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val,
+Status BlobLogWriter::AddRecord(const WriteOptions& write_options,
+ const Slice& key, const Slice& val,
uint64_t* key_offset, uint64_t* blob_offset) {
assert(block_offset_ != 0);
assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord);
@@ -137,7 +159,8 @@ Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val,
std::string buf;
ConstructBlobHeader(&buf, key, val, 0);
- Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset);
+ Status s =
+ EmitPhysicalRecord(write_options, buf, key, val, key_offset, blob_offset);
return s;
}
@@ -150,28 +173,34 @@ void BlobLogWriter::ConstructBlobHeader(std::string* buf, const Slice& key,
record.EncodeHeaderTo(buf);
}
-Status BlobLogWriter::EmitPhysicalRecord(const std::string& headerbuf,
+Status BlobLogWriter::EmitPhysicalRecord(const WriteOptions& write_options,
+ const std::string& headerbuf,
const Slice& key, const Slice& val,
uint64_t* key_offset,
uint64_t* blob_offset) {
- StopWatch write_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS);
- Status s = dest_->Append(Slice(headerbuf));
+ IOOptions opts;
+ Status s = WritableFileWriter::PrepareIOOptions(write_options, opts);
if (s.ok()) {
- s = dest_->Append(key);
+ s = dest_->Append(opts, Slice(headerbuf));
}
if (s.ok()) {
- s = dest_->Append(val);
+ s = dest_->Append(opts, key);
+ }
+ if (s.ok()) {
+ s = dest_->Append(opts, val);
}
if (do_flush_ && s.ok()) {
- s = dest_->Flush();
+ s = dest_->Flush(opts);
}
*key_offset = block_offset_ + BlobLogRecord::kHeaderSize;
*blob_offset = *key_offset + key.size();
block_offset_ = *blob_offset + val.size();
last_elem_type_ = kEtRecord;
- RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
- BlobLogRecord::kHeaderSize + key.size() + val.size());
+ if (s.ok()) {
+ RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
+ BlobLogRecord::kHeaderSize + key.size() + val.size());
+ }
return s;
}
diff --git a/db/blob/blob_log_writer.h b/db/blob/blob_log_writer.h
index c1f9f31ad..0ba4f9c2a 100644
--- a/db/blob/blob_log_writer.h
+++ b/db/blob/blob_log_writer.h
@@ -43,20 +43,24 @@ class BlobLogWriter {
static void ConstructBlobHeader(std::string* buf, const Slice& key,
const Slice& val, uint64_t expiration);
- Status AddRecord(const Slice& key, const Slice& val, uint64_t* key_offset,
+ Status AddRecord(const WriteOptions& write_options, const Slice& key,
+ const Slice& val, uint64_t* key_offset,
uint64_t* blob_offset);
- Status AddRecord(const Slice& key, const Slice& val, uint64_t expiration,
- uint64_t* key_offset, uint64_t* blob_offset);
+ Status AddRecord(const WriteOptions& write_options, const Slice& key,
+ const Slice& val, uint64_t expiration, uint64_t* key_offset,
+ uint64_t* blob_offset);
- Status EmitPhysicalRecord(const std::string& headerbuf, const Slice& key,
+ Status EmitPhysicalRecord(const WriteOptions& write_options,
+ const std::string& headerbuf, const Slice& key,
const Slice& val, uint64_t* key_offset,
uint64_t* blob_offset);
- Status AppendFooter(BlobLogFooter& footer, std::string* checksum_method,
+ Status AppendFooter(const WriteOptions& write_options, BlobLogFooter& footer,
+ std::string* checksum_method,
std::string* checksum_value);
- Status WriteHeader(BlobLogHeader& header);
+ Status WriteHeader(const WriteOptions& write_options, BlobLogHeader& header);
WritableFileWriter* file() { return dest_.get(); }
@@ -64,7 +68,7 @@ class BlobLogWriter {
uint64_t get_log_number() const { return log_number_; }
- Status Sync();
+ Status Sync(const WriteOptions& write_options);
private:
std::unique_ptr<WritableFileWriter> dest_;
diff --git a/db/blob/blob_source_test.cc b/db/blob/blob_source_test.cc
index 258d2da5e..9fc1931c1 100644
--- a/db/blob/blob_source_test.cc
+++ b/db/blob/blob_source_test.cc
@@ -65,7 +65,7 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
BlobLogHeader header(column_family_id, compression, has_ttl,
expiration_range_header);
- ASSERT_OK(blob_log_writer.WriteHeader(header));
+ ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header));
std::vector<std::string> compressed_blobs(num);
std::vector<Slice> blobs_to_write(num);
@@ -93,7 +93,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
for (size_t i = 0; i < num; ++i) {
uint64_t key_offset = 0;
- ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset,
+ ASSERT_OK(blob_log_writer.AddRecord(WriteOptions(), keys[i],
+ blobs_to_write[i], &key_offset,
&blob_offsets[i]));
}
@@ -103,8 +104,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options,
std::string checksum_method;
std::string checksum_value;
- ASSERT_OK(
- blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value));
+ ASSERT_OK(blob_log_writer.AppendFooter(WriteOptions(), footer,
+ &checksum_method, &checksum_value));
}
} // anonymous namespace
diff --git a/db/builder.cc b/db/builder.cc
index d3040ee9e..f9cc2a5ea 100644
--- a/db/builder.cc
+++ b/db/builder.cc
@@ -32,6 +32,7 @@
#include "options/options_helper.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/table.h"
@@ -57,8 +58,8 @@ TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions,
Status BuildTable(
const std::string& dbname, VersionSet* versions,
const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions,
- const FileOptions& file_options, const ReadOptions& read_options,
- TableCache* table_cache, InternalIterator* iter,
+ const FileOptions& file_options, TableCache* table_cache,
+ InternalIterator* iter,
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
range_del_iters,
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
@@ -69,9 +70,8 @@ Status BuildTable(
IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCreationReason blob_creation_reason,
const SeqnoToTimeMapping& seqno_to_time_mapping, EventLogger* event_logger,
- int job_id, const Env::IOPriority io_priority,
- TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
- const std::string* full_history_ts_low,
+ int job_id, TableProperties* table_properties,
+ Env::WriteLifeTimeHint write_hint, const std::string* full_history_ts_low,
BlobFileCompletionCallback* blob_callback, Version* version,
uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
uint64_t* memtable_garbage_bytes) {
@@ -164,11 +164,11 @@ Status BuildTable(
table_file_created = true;
FileTypeSet tmp_set = ioptions.checksum_handoff_file_types;
- file->SetIOPriority(io_priority);
+ file->SetIOPriority(tboptions.write_options.rate_limiter_priority);
file->SetWriteLifeTimeHint(write_hint);
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, ioptions.clock, io_tracer,
- ioptions.stats, ioptions.listeners,
+ ioptions.stats, Histograms::SST_WRITE_MICROS, ioptions.listeners,
ioptions.file_checksum_gen_factory.get(),
tmp_set.Contains(FileType::kTableFile), false));
@@ -188,10 +188,11 @@ Status BuildTable(
blob_file_additions)
? new BlobFileBuilder(
versions, fs, &ioptions, &mutable_cf_options, &file_options,
- tboptions.db_id, tboptions.db_session_id, job_id,
- tboptions.column_family_id, tboptions.column_family_name,
- io_priority, write_hint, io_tracer, blob_callback,
- blob_creation_reason, &blob_file_paths, blob_file_additions)
+ &(tboptions.write_options), tboptions.db_id,
+ tboptions.db_session_id, job_id, tboptions.column_family_id,
+ tboptions.column_family_name, write_hint, io_tracer,
+ blob_callback, blob_creation_reason, &blob_file_paths,
+ blob_file_additions)
: nullptr);
const std::atomic<bool> kManualCompactionCanceledFalse{false};
@@ -244,7 +245,11 @@ Status BuildTable(
}
// TODO(noetzli): Update stats after flush, too.
- if (io_priority == Env::IO_HIGH &&
+ // TODO(hx235): Replace `rate_limiter_priority` with `io_activity` for
+ // flush IO in repair when we have an `Env::IOActivity` enum for it
+ if ((tboptions.write_options.io_activity == Env::IOActivity::kFlush ||
+ tboptions.write_options.io_activity == Env::IOActivity::kDBOpen ||
+ tboptions.write_options.rate_limiter_priority == Env::IO_HIGH) &&
IOSTATS(bytes_written) >= kReportFlushIOStatsEvery) {
ThreadStatusUtil::SetThreadOperationProperty(
ThreadStatus::FLUSH_BYTES_WRITTEN, IOSTATS(bytes_written));
@@ -275,7 +280,7 @@ Status BuildTable(
SizeApproximationOptions approx_opts;
approx_opts.files_size_error_margin = 0.1;
meta->compensated_range_deletion_size += versions->ApproximateSize(
- approx_opts, read_options, version, kv.first.Encode(),
+ approx_opts, tboptions.read_options, version, kv.first.Encode(),
tombstone_end.Encode(), 0 /* start_level */, -1 /* end_level */,
TableReaderCaller::kFlush);
}
@@ -346,13 +351,16 @@ Status BuildTable(
// Finish and check for file errors
TEST_SYNC_POINT("BuildTable:BeforeSyncTable");
- if (s.ok() && !empty) {
+ IOOptions opts;
+ *io_status =
+ WritableFileWriter::PrepareIOOptions(tboptions.write_options, opts);
+ if (s.ok() && io_status->ok() && !empty) {
StopWatch sw(ioptions.clock, ioptions.stats, TABLE_SYNC_MICROS);
- *io_status = file_writer->Sync(ioptions.use_fsync);
+ *io_status = file_writer->Sync(opts, ioptions.use_fsync);
}
TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile");
if (s.ok() && io_status->ok() && !empty) {
- *io_status = file_writer->Close();
+ *io_status = file_writer->Close(opts);
}
if (s.ok() && io_status->ok() && !empty) {
// Add the checksum information to file metadata.
@@ -396,9 +404,9 @@ Status BuildTable(
// No matter whether use_direct_io_for_flush_and_compaction is true,
// the goal is to cache it here for further user reads.
std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
- read_options, file_options, tboptions.internal_comparator, *meta,
- nullptr /* range_del_agg */, mutable_cf_options.prefix_extractor,
- nullptr,
+ tboptions.read_options, file_options, tboptions.internal_comparator,
+ *meta, nullptr /* range_del_agg */,
+ mutable_cf_options.prefix_extractor, nullptr,
(internal_stats == nullptr) ? nullptr
: internal_stats->GetFileReadHist(0),
TableReaderCaller::kFlush, /*arena=*/nullptr,
@@ -436,8 +444,13 @@ Status BuildTable(
constexpr IODebugContext* dbg = nullptr;
if (table_file_created) {
- Status ignored = fs->DeleteFile(fname, IOOptions(), dbg);
- ignored.PermitUncheckedError();
+ IOOptions opts;
+ Status prepare =
+ WritableFileWriter::PrepareIOOptions(tboptions.write_options, opts);
+ if (prepare.ok()) {
+ Status ignored = fs->DeleteFile(fname, opts, dbg);
+ ignored.PermitUncheckedError();
+ }
}
assert(blob_file_additions || blob_file_paths.empty());
diff --git a/db/builder.h b/db/builder.h
index 6a6a1866a..96d87677b 100644
--- a/db/builder.h
+++ b/db/builder.h
@@ -53,8 +53,8 @@ TableBuilder* NewTableBuilder(const TableBuilderOptions& tboptions,
extern Status BuildTable(
const std::string& dbname, VersionSet* versions,
const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions,
- const FileOptions& file_options, const ReadOptions& read_options,
- TableCache* table_cache, InternalIterator* iter,
+ const FileOptions& file_options, TableCache* table_cache,
+ InternalIterator* iter,
std::vector<std::unique_ptr<FragmentedRangeTombstoneIterator>>
range_del_iters,
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
@@ -66,7 +66,6 @@ extern Status BuildTable(
BlobFileCreationReason blob_creation_reason,
const SeqnoToTimeMapping& seqno_to_time_mapping,
EventLogger* event_logger = nullptr, int job_id = 0,
- const Env::IOPriority io_priority = Env::IO_HIGH,
TableProperties* table_properties = nullptr,
Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
const std::string* full_history_ts_low = nullptr,
diff --git a/db/column_family.cc b/db/column_family.cc
index 1e61dfab2..bad6ec889 100644
--- a/db/column_family.cc
+++ b/db/column_family.cc
@@ -1168,7 +1168,7 @@ Status ColumnFamilyData::RangesOverlapWithMemtables(
*overlap = false;
// Create an InternalIterator over all unflushed memtables
Arena arena;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions read_opts;
read_opts.total_order_seek = true;
MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc
index 99b099759..9d1a45f5b 100644
--- a/db/compaction/compaction_job.cc
+++ b/db/compaction/compaction_job.cc
@@ -1130,6 +1130,9 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
// (b) CompactionFilter::Decision::kRemoveAndSkipUntil.
read_options.total_order_seek = true;
+ const WriteOptions write_options(Env::IOPriority::IO_LOW,
+ Env::IOActivity::kCompaction);
+
// Remove the timestamps from boundaries because boundaries created in
// GenSubcompactionBoundaries doesn't strip away the timestamp.
size_t ts_sz = cfd->user_comparator()->timestamp_size();
@@ -1264,8 +1267,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
? new BlobFileBuilder(
versions_, fs_.get(),
sub_compact->compaction->immutable_options(),
- mutable_cf_options, &file_options_, db_id_, db_session_id_,
- job_id_, cfd->GetID(), cfd->GetName(), Env::IOPriority::IO_LOW,
+ mutable_cf_options, &file_options_, &write_options, db_id_,
+ db_session_id_, job_id_, cfd->GetID(), cfd->GetName(),
write_hint_, io_tracer_, blob_callback_,
BlobFileCreationReason::kCompaction, &blob_file_paths,
sub_compact->Current().GetBlobFileAdditionsPtr())
@@ -1710,6 +1713,8 @@ Status CompactionJob::InstallCompactionResults(
db_mutex_->AssertHeld();
const ReadOptions read_options(Env::IOActivity::kCompaction);
+ const WriteOptions write_options(Env::IOActivity::kCompaction);
+
auto* compaction = compact_->compaction;
assert(compaction);
@@ -1792,8 +1797,9 @@ Status CompactionJob::InstallCompactionResults(
};
return versions_->LogAndApply(
- compaction->column_family_data(), mutable_cf_options, read_options, edit,
- db_mutex_, db_directory_, /*new_descriptor_log=*/false,
+ compaction->column_family_data(), mutable_cf_options, read_options,
+ write_options, edit, db_mutex_, db_directory_,
+ /*new_descriptor_log=*/false,
/*column_family_options=*/nullptr, manifest_wcb);
}
@@ -1943,13 +1949,17 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
sub_compact->compaction->immutable_options()->listeners;
outputs.AssignFileWriter(new WritableFileWriter(
std::move(writable_file), fname, fo_copy, db_options_.clock, io_tracer_,
- db_options_.stats, listeners, db_options_.file_checksum_gen_factory.get(),
+ db_options_.stats, Histograms::SST_WRITE_MICROS, listeners,
+ db_options_.file_checksum_gen_factory.get(),
tmp_set.Contains(FileType::kTableFile), false));
// TODO(hx235): pass in the correct `oldest_key_time` instead of `0`
+ const ReadOptions read_options(Env::IOActivity::kCompaction);
+ const WriteOptions write_options(Env::IOActivity::kCompaction);
TableBuilderOptions tboptions(
*cfd->ioptions(), *(sub_compact->compaction->mutable_cf_options()),
- cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(),
+ read_options, write_options, cfd->internal_comparator(),
+ cfd->int_tbl_prop_collector_factories(),
sub_compact->compaction->output_compression(),
sub_compact->compaction->output_compression_opts(), cfd->GetID(),
cfd->GetName(), sub_compact->compaction->output_level(),
diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc
index 886bcb6e1..bd805358e 100644
--- a/db/compaction/compaction_job_test.cc
+++ b/db/compaction/compaction_job_test.cc
@@ -295,9 +295,12 @@ class CompactionJobTestBase : public testing::Test {
Status s = WritableFileWriter::Create(fs_, table_name, FileOptions(),
&file_writer, nullptr);
ASSERT_OK(s);
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> table_builder(
cf_options_.table_factory->NewTableBuilder(
TableBuilderOptions(*cfd_->ioptions(), mutable_cf_options_,
+ read_options, write_options,
cfd_->internal_comparator(),
cfd_->int_tbl_prop_collector_factories(),
CompressionType::kNoCompression,
@@ -394,7 +397,7 @@ class CompactionJobTestBase : public testing::Test {
mutex_.Lock();
EXPECT_OK(versions_->LogAndApply(
versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_,
- read_options_, &edit, &mutex_, nullptr));
+ read_options_, write_options_, &edit, &mutex_, nullptr));
mutex_.Unlock();
}
@@ -549,7 +552,7 @@ class CompactionJobTestBase : public testing::Test {
/*db_id=*/"", /*db_session_id=*/"", /*daily_offpeak_time_utc=*/"",
/*error_handler=*/nullptr, /*read_only=*/false));
compaction_job_stats_.Reset();
- ASSERT_OK(SetIdentityFile(env_, dbname_));
+ ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
VersionEdit new_db;
new_db.SetLogNumber(0);
@@ -568,11 +571,11 @@ class CompactionJobTestBase : public testing::Test {
log::Writer log(std::move(file_writer), 0, false);
std::string record;
new_db.EncodeTo(&record);
- s = log.AddRecord(record);
+ s = log.AddRecord(WriteOptions(), record);
}
ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file.
- s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
@@ -736,6 +739,7 @@ class CompactionJobTestBase : public testing::Test {
MutableCFOptions mutable_cf_options_;
MutableDBOptions mutable_db_options_;
const ReadOptions read_options_;
+ const WriteOptions write_options_;
std::shared_ptr<Cache> table_cache_;
WriteController write_controller_;
WriteBufferManager write_buffer_manager_;
diff --git a/db/compaction/compaction_outputs.cc b/db/compaction/compaction_outputs.cc
index eb76cd849..9ad2b3a0d 100644
--- a/db/compaction/compaction_outputs.cc
+++ b/db/compaction/compaction_outputs.cc
@@ -62,12 +62,15 @@ IOStatus CompactionOutputs::WriterSyncClose(const Status& input_status,
Statistics* statistics,
bool use_fsync) {
IOStatus io_s;
- if (input_status.ok()) {
+ IOOptions opts;
+ io_s = WritableFileWriter::PrepareIOOptions(
+ WriteOptions(Env::IOActivity::kCompaction), opts);
+ if (input_status.ok() && io_s.ok()) {
StopWatch sw(clock, statistics, COMPACTION_OUTFILE_SYNC_MICROS);
- io_s = file_writer_->Sync(use_fsync);
+ io_s = file_writer_->Sync(opts, use_fsync);
}
if (input_status.ok() && io_s.ok()) {
- io_s = file_writer_->Close();
+ io_s = file_writer_->Close(opts);
}
if (input_status.ok() && io_s.ok()) {
diff --git a/db/convenience.cc b/db/convenience.cc
index 08bddc8e8..9e78adc74 100644
--- a/db/convenience.cc
+++ b/db/convenience.cc
@@ -34,7 +34,7 @@ Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
Status VerifySstFileChecksum(const Options& options,
const EnvOptions& env_options,
const std::string& file_path) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
return VerifySstFileChecksum(options, env_options, read_options, file_path);
}
diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc
index 0c8ae6033..5f7b2a0b0 100644
--- a/db/db_basic_test.cc
+++ b/db/db_basic_test.cc
@@ -3126,7 +3126,8 @@ TEST_F(DBBasicTest, LastSstFileNotInManifest) {
// Manually add a sst file.
constexpr uint64_t kSstFileNumber = 100;
const std::string kSstFile = MakeTableFileName(dbname_, kSstFileNumber);
- ASSERT_OK(WriteStringToFile(env_, /* data = */ "bad sst file content",
+ ASSERT_OK(WriteStringToFile(env_,
+ /* data = */ "bad sst file content",
/* fname = */ kSstFile,
/* should_sync = */ true));
ASSERT_OK(env_->FileExists(kSstFile));
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
index 8084f02cf..36b18f9a2 100644
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@@ -333,8 +333,10 @@ Status DBImpl::Resume() {
Status DBImpl::ResumeImpl(DBRecoverContext context) {
mutex_.AssertHeld();
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
WaitForBackgroundWork();
Status s;
@@ -373,8 +375,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) {
assert(cfh);
ColumnFamilyData* cfd = cfh->cfd();
const MutableCFOptions& cf_opts = *cfd->GetLatestMutableCFOptions();
- s = versions_->LogAndApply(cfd, cf_opts, read_options, &edit, &mutex_,
- directories_.GetDbDir());
+ s = versions_->LogAndApply(cfd, cf_opts, read_options, write_options,
+ &edit, &mutex_, directories_.GetDbDir());
if (!s.ok()) {
io_s = versions_->io_status();
if (!io_s.ok()) {
@@ -716,23 +718,26 @@ Status DBImpl::CloseHelper() {
Status DBImpl::CloseImpl() { return CloseHelper(); }
DBImpl::~DBImpl() {
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
+
// TODO: remove this.
init_logger_creation_s_.PermitUncheckedError();
InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
- if (closed_) {
- return;
- }
+ if (!closed_) {
+ closed_ = true;
- closed_ = true;
+ {
+ const Status s = MaybeReleaseTimestampedSnapshotsAndCheck();
+ s.PermitUncheckedError();
+ }
- {
- const Status s = MaybeReleaseTimestampedSnapshotsAndCheck();
- s.PermitUncheckedError();
+ closing_status_ = CloseImpl();
+ closing_status_.PermitUncheckedError();
}
-
- closing_status_ = CloseImpl();
- closing_status_.PermitUncheckedError();
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
}
void DBImpl::MaybeIgnoreError(Status* s) const {
@@ -807,7 +812,9 @@ Status DBImpl::StartPeriodicTaskScheduler() {
return s;
}
-Status DBImpl::RegisterRecordSeqnoTimeWorker(bool is_new_db) {
+Status DBImpl::RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ bool is_new_db) {
options_mutex_.AssertHeld();
uint64_t min_preserve_seconds = std::numeric_limits<uint64_t>::max();
@@ -890,7 +897,8 @@ Status DBImpl::RegisterRecordSeqnoTimeWorker(bool is_new_db) {
VersionEdit edit;
edit.SetLastSequence(kMax);
s = versions_->LogAndApplyToDefaultColumnFamily(
- {}, &edit, &mutex_, directories_.GetDbDir());
+ read_options, write_options, &edit, &mutex_,
+ directories_.GetDbDir());
if (!s.ok() && versions_->io_status().IsIOError()) {
s = error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
@@ -1000,6 +1008,7 @@ void DBImpl::PersistStats() {
stats_slice_initialized_ = true;
std::swap(stats_slice_, stats_map);
if (s.ok()) {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions wo;
wo.low_pri = true;
wo.no_slowdown = true;
@@ -1214,8 +1223,10 @@ FSDirectory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const {
Status DBImpl::SetOptions(
ColumnFamilyHandle* column_family,
const std::unordered_map<std::string, std::string>& options_map) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
auto* cfd =
static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
if (options_map.empty()) {
@@ -1238,14 +1249,15 @@ Status DBImpl::SetOptions(
new_options = *cfd->GetLatestMutableCFOptions();
// Append new version to recompute compaction score.
VersionEdit dummy_edit;
- s = versions_->LogAndApply(cfd, new_options, read_options, &dummy_edit,
- &mutex_, directories_.GetDbDir());
+ s = versions_->LogAndApply(cfd, new_options, read_options, write_options,
+ &dummy_edit, &mutex_, directories_.GetDbDir());
// Trigger possible flush/compactions. This has to be before we persist
// options to file, otherwise there will be a deadlock with writer
// thread.
InstallSuperVersionAndScheduleWork(cfd, &sv_context, new_options);
- persist_options_status = WriteOptionsFile(true /*db_mutex_already_held*/);
+ persist_options_status =
+ WriteOptionsFile(write_options, true /*db_mutex_already_held*/);
bg_cv_.SignalAll();
}
}
@@ -1424,7 +1436,8 @@ Status DBImpl::SetDBOptions(
}
write_thread_.ExitUnbatched(&w);
}
- persist_options_status = WriteOptionsFile(true /*db_mutex_already_held*/);
+ persist_options_status =
+ WriteOptionsFile(WriteOptions(), true /*db_mutex_already_held*/);
} else {
// To get here, we must have had invalid options and will not attempt to
// persist the options, which means the status is "OK/Uninitialized.
@@ -1476,14 +1489,14 @@ int DBImpl::FindMinimumEmptyLevelFitting(
return minimum_level;
}
-Status DBImpl::FlushWAL(bool sync) {
+Status DBImpl::FlushWAL(const WriteOptions& write_options, bool sync) {
if (manual_wal_flush_) {
IOStatus io_s;
{
// We need to lock log_write_mutex_ since logs_ might change concurrently
InstrumentedMutexLock wl(&log_write_mutex_);
log::Writer* cur_log_writer = logs_.back().writer;
- io_s = cur_log_writer->WriteBuffer();
+ io_s = cur_log_writer->WriteBuffer(write_options);
}
if (!io_s.ok()) {
ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
@@ -1556,11 +1569,22 @@ Status DBImpl::SyncWAL() {
RecordTick(stats_, WAL_FILE_SYNCED);
Status status;
IOStatus io_s;
- for (log::Writer* log : logs_to_sync) {
- io_s = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync);
- if (!io_s.ok()) {
- status = io_s;
- break;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ const ReadOptions read_options;
+ const WriteOptions write_options;
+ IOOptions opts;
+ io_s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (!io_s.ok()) {
+ status = io_s;
+ }
+ if (io_s.ok()) {
+ for (log::Writer* log : logs_to_sync) {
+ io_s =
+ log->file()->SyncWithoutFlush(opts, immutable_db_options_.use_fsync);
+ if (!io_s.ok()) {
+ status = io_s;
+ break;
+ }
}
}
if (!io_s.ok()) {
@@ -1589,9 +1613,7 @@ Status DBImpl::SyncWAL() {
}
if (status.ok() && synced_wals.IsWalAddition()) {
InstrumentedMutexLock l(&mutex_);
- // TODO: plumb Env::IOActivity
- const ReadOptions read_options;
- status = ApplyWALToManifest(read_options, &synced_wals);
+ status = ApplyWALToManifest(read_options, write_options, &synced_wals);
}
TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2");
@@ -1600,12 +1622,14 @@ Status DBImpl::SyncWAL() {
}
Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options,
+ const WriteOptions& write_options,
VersionEdit* synced_wals) {
// not empty, write to MANIFEST.
mutex_.AssertHeld();
Status status = versions_->LogAndApplyToDefaultColumnFamily(
- read_options, synced_wals, &mutex_, directories_.GetDbDir());
+ read_options, write_options, synced_wals, &mutex_,
+ directories_.GetDbDir());
if (!status.ok() && versions_->io_status().IsIOError()) {
status = error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
@@ -3486,6 +3510,7 @@ void DBImpl::MultiGetEntity(const ReadOptions& _read_options, size_t num_keys,
}
Status DBImpl::WrapUpCreateColumnFamilies(
+ const ReadOptions& read_options, const WriteOptions& write_options,
const std::vector<const ColumnFamilyOptions*>& cf_options) {
// NOTE: this function is skipped for create_missing_column_families and
// DB::Open, so new functionality here might need to go into Open also.
@@ -3498,26 +3523,32 @@ Status DBImpl::WrapUpCreateColumnFamilies(
}
}
// Attempt both follow-up actions even if one fails
- Status s = WriteOptionsFile(false /*db_mutex_already_held*/);
+ Status s = WriteOptionsFile(write_options, false /*db_mutex_already_held*/);
if (register_worker) {
- s.UpdateIfOk(RegisterRecordSeqnoTimeWorker(/*from_db_open=*/false));
+ s.UpdateIfOk(RegisterRecordSeqnoTimeWorker(read_options, write_options,
+ /* is_new_db */ false));
}
return s;
}
-Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options,
+Status DBImpl::CreateColumnFamily(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ const ColumnFamilyOptions& cf_options,
const std::string& column_family,
ColumnFamilyHandle** handle) {
assert(handle != nullptr);
InstrumentedMutexLock ol(&options_mutex_);
- Status s = CreateColumnFamilyImpl(cf_options, column_family, handle);
+ Status s = CreateColumnFamilyImpl(read_options, write_options, cf_options,
+ column_family, handle);
if (s.ok()) {
- s.UpdateIfOk(WrapUpCreateColumnFamilies({&cf_options}));
+ s.UpdateIfOk(
+ WrapUpCreateColumnFamilies(read_options, write_options, {&cf_options}));
}
return s;
}
Status DBImpl::CreateColumnFamilies(
+ const ReadOptions& read_options, const WriteOptions& write_options,
const ColumnFamilyOptions& cf_options,
const std::vector<std::string>& column_family_names,
std::vector<ColumnFamilyHandle*>* handles) {
@@ -3529,7 +3560,8 @@ Status DBImpl::CreateColumnFamilies(
bool success_once = false;
for (size_t i = 0; i < num_cf; i++) {
ColumnFamilyHandle* handle;
- s = CreateColumnFamilyImpl(cf_options, column_family_names[i], &handle);
+ s = CreateColumnFamilyImpl(read_options, write_options, cf_options,
+ column_family_names[i], &handle);
if (!s.ok()) {
break;
}
@@ -3537,12 +3569,14 @@ Status DBImpl::CreateColumnFamilies(
success_once = true;
}
if (success_once) {
- s.UpdateIfOk(WrapUpCreateColumnFamilies({&cf_options}));
+ s.UpdateIfOk(
+ WrapUpCreateColumnFamilies(read_options, write_options, {&cf_options}));
}
return s;
}
Status DBImpl::CreateColumnFamilies(
+ const ReadOptions& read_options, const WriteOptions& write_options,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles) {
assert(handles != nullptr);
@@ -3555,7 +3589,8 @@ Status DBImpl::CreateColumnFamilies(
cf_opts.reserve(num_cf);
for (size_t i = 0; i < num_cf; i++) {
ColumnFamilyHandle* handle;
- s = CreateColumnFamilyImpl(column_families[i].options,
+ s = CreateColumnFamilyImpl(read_options, write_options,
+ column_families[i].options,
column_families[i].name, &handle);
if (!s.ok()) {
break;
@@ -3565,17 +3600,18 @@ Status DBImpl::CreateColumnFamilies(
cf_opts.push_back(&column_families[i].options);
}
if (success_once) {
- s.UpdateIfOk(WrapUpCreateColumnFamilies(cf_opts));
+ s.UpdateIfOk(
+ WrapUpCreateColumnFamilies(read_options, write_options, cf_opts));
}
return s;
}
-Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
+Status DBImpl::CreateColumnFamilyImpl(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ const ColumnFamilyOptions& cf_options,
const std::string& column_family_name,
ColumnFamilyHandle** handle) {
options_mutex_.AssertHeld();
- // TODO: plumb Env::IOActivity
- const ReadOptions read_options;
Status s;
*handle = nullptr;
@@ -3619,7 +3655,7 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
// LogAndApply will both write the creation in MANIFEST and create
// ColumnFamilyData object
s = versions_->LogAndApply(nullptr, MutableCFOptions(cf_options),
- read_options, &edit, &mutex_,
+ read_options, write_options, &edit, &mutex_,
directories_.GetDbDir(), false, &cf_options);
write_thread_.ExitUnbatched(&w);
}
@@ -3668,7 +3704,8 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) {
InstrumentedMutexLock ol(&options_mutex_);
Status s = DropColumnFamilyImpl(column_family);
if (s.ok()) {
- s = WriteOptionsFile(false /*db_mutex_already_held*/);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ s = WriteOptionsFile(WriteOptions(), false /*db_mutex_already_held*/);
}
return s;
}
@@ -3686,8 +3723,9 @@ Status DBImpl::DropColumnFamilies(
success_once = true;
}
if (success_once) {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
Status persist_options_status =
- WriteOptionsFile(false /*db_mutex_already_held*/);
+ WriteOptionsFile(WriteOptions(), false /*db_mutex_already_held*/);
if (s.ok() && !persist_options_status.ok()) {
s = persist_options_status;
}
@@ -3696,8 +3734,10 @@ Status DBImpl::DropColumnFamilies(
}
Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
auto cfd = cfh->cfd();
if (cfd->GetID() == 0) {
@@ -3721,7 +3761,7 @@ Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) {
WriteThread::Writer w;
write_thread_.EnterUnbatched(&w, &mutex_);
s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
- read_options, &edit, &mutex_,
+ read_options, write_options, &edit, &mutex_,
directories_.GetDbDir());
write_thread_.ExitUnbatched(&w);
}
@@ -3748,7 +3788,8 @@ Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) {
if (cfd->ioptions()->preserve_internal_time_seconds > 0 ||
cfd->ioptions()->preclude_last_level_data_seconds > 0) {
- s = RegisterRecordSeqnoTimeWorker(/*from_db_open=*/false);
+ s = RegisterRecordSeqnoTimeWorker(read_options, write_options,
+ /* is_new_db */ false);
}
if (s.ok()) {
@@ -3779,7 +3820,7 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options,
// falsify later if key-may-exist but can't fetch value
*value_found = true;
}
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions roptions = read_options;
roptions.read_tier = kBlockCacheTier; // read from block cache only
PinnableSlice pinnable_val;
@@ -4298,7 +4339,7 @@ Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
version->Ref();
mutex_.Unlock();
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = version->GetPropertiesOfAllTables(read_options, props);
@@ -4322,7 +4363,7 @@ Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family,
version->Ref();
mutex_.Unlock();
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = version->GetPropertiesOfTablesInRange(read_options, range, n, props);
@@ -4664,7 +4705,7 @@ Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options,
SuperVersion* sv = GetAndRefSuperVersion(cfd);
v = sv->current;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
for (int i = 0; i < n; i++) {
// Add timestamp if needed
@@ -4728,8 +4769,10 @@ Status DBImpl::GetUpdatesSince(
}
Status DBImpl::DeleteFile(std::string name) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
uint64_t number;
FileType type;
WalFileType log_type;
@@ -4809,7 +4852,7 @@ Status DBImpl::DeleteFile(std::string name) {
edit.SetColumnFamily(cfd->GetID());
edit.DeleteFile(level, number);
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
- read_options, &edit, &mutex_,
+ read_options, write_options, &edit, &mutex_,
directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(cfd,
@@ -4832,8 +4875,10 @@ Status DBImpl::DeleteFile(std::string name) {
Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
const RangePtr* ranges, size_t n,
bool include_end) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
Status status = Status::OK();
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
ColumnFamilyData* cfd = cfh->cfd();
@@ -4901,7 +4946,7 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
}
input_version->Ref();
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
- read_options, &edit, &mutex_,
+ read_options, write_options, &edit, &mutex_,
directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(cfd,
@@ -5315,7 +5360,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
return result;
}
-Status DBImpl::WriteOptionsFile(bool db_mutex_already_held) {
+Status DBImpl::WriteOptionsFile(const WriteOptions& write_options,
+ bool db_mutex_already_held) {
options_mutex_.AssertHeld();
if (db_mutex_already_held) {
@@ -5349,8 +5395,8 @@ Status DBImpl::WriteOptionsFile(bool db_mutex_already_held) {
std::string file_name =
TempOptionsFileName(GetName(), versions_->NewFileNumber());
- Status s = PersistRocksDBOptions(db_options, cf_names, cf_opts, file_name,
- fs_.get());
+ Status s = PersistRocksDBOptions(write_options, db_options, cf_names, cf_opts,
+ file_name, fs_.get());
if (s.ok()) {
s = RenameTempFileToOptionsFile(file_name);
@@ -5543,7 +5589,7 @@ Status DBImpl::GetLatestSequenceForKey(
MergeContext merge_context;
SequenceNumber max_covering_tombstone_seq = 0;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions read_options;
SequenceNumber current_seq = versions_->LastSequence();
@@ -5699,8 +5745,10 @@ Status DBImpl::IngestExternalFile(
Status DBImpl::IngestExternalFiles(
const std::vector<IngestExternalFileArg>& args) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
if (args.empty()) {
return Status::InvalidArgument("ingestion arg list is empty");
}
@@ -5918,9 +5966,10 @@ Status DBImpl::IngestExternalFiles(
}
assert(0 == num_entries);
}
- status = versions_->LogAndApply(cfds_to_commit, mutable_cf_options_list,
- read_options, edit_lists, &mutex_,
- directories_.GetDbDir());
+ status = versions_->LogAndApply(
+ cfds_to_commit, mutable_cf_options_list, read_options, write_options,
+
+ edit_lists, &mutex_, directories_.GetDbDir());
// It is safe to update VersionSet last seqno here after LogAndApply since
// LogAndApply persists last sequence number from VersionEdits,
// which are from file's largest seqno and not from VersionSet.
@@ -6022,8 +6071,10 @@ Status DBImpl::CreateColumnFamilyWithImport(
ColumnFamilyHandle** handle) {
assert(handle != nullptr);
assert(*handle == nullptr);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
std::string cf_comparator_name = options.comparator->Name();
size_t total_file_num = 0;
@@ -6039,7 +6090,8 @@ Status DBImpl::CreateColumnFamilyWithImport(
}
// Create column family.
- auto status = CreateColumnFamily(options, column_family_name, handle);
+ auto status = CreateColumnFamily(read_options, write_options, options,
+ column_family_name, handle);
if (!status.ok()) {
return status;
}
@@ -6075,8 +6127,8 @@ Status DBImpl::CreateColumnFamilyWithImport(
next_file_number = versions_->FetchAddFileNumber(total_file_num);
auto cf_options = cfd->GetLatestMutableCFOptions();
status =
- versions_->LogAndApply(cfd, *cf_options, read_options, &dummy_edit,
- &mutex_, directories_.GetDbDir());
+ versions_->LogAndApply(cfd, *cf_options, read_options, write_options,
+ &dummy_edit, &mutex_, directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options);
}
@@ -6113,8 +6165,8 @@ Status DBImpl::CreateColumnFamilyWithImport(
if (status.ok()) {
auto cf_options = cfd->GetLatestMutableCFOptions();
status = versions_->LogAndApply(cfd, *cf_options, read_options,
- import_job.edit(), &mutex_,
- directories_.GetDbDir());
+ write_options, import_job.edit(),
+ &mutex_, directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(cfd, &sv_context, *cf_options);
}
@@ -6198,6 +6250,7 @@ Status DBImpl::ClipColumnFamily(ColumnFamilyHandle* column_family,
empty_after_delete = true;
} else {
const Comparator* const ucmp = column_family->GetComparator();
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions wo;
// Delete [smallest_user_key, clip_begin_key)
if (ucmp->Compare(smallest_user_key, begin_key) < 0) {
@@ -6518,8 +6571,10 @@ Status DBImpl::ReserveFileNumbersBeforeIngestion(
ColumnFamilyData* cfd, uint64_t num,
std::unique_ptr<std::list<uint64_t>::iterator>& pending_output_elem,
uint64_t* next_file_number) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
Status s;
SuperVersionContext dummy_sv_ctx(true /* create_superversion */);
assert(nullptr != next_file_number);
@@ -6537,8 +6592,8 @@ Status DBImpl::ReserveFileNumbersBeforeIngestion(
// reuse the file number that has already assigned to the internal file,
// and this will overwrite the external file. To protect the external
// file, we have to make sure the file number will never being reused.
- s = versions_->LogAndApply(cfd, *cf_options, read_options, &dummy_edit,
- &mutex_, directories_.GetDbDir());
+ s = versions_->LogAndApply(cfd, *cf_options, read_options, write_options,
+ &dummy_edit, &mutex_, directories_.GetDbDir());
if (s.ok()) {
InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options);
}
diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h
index 34a5f3398..8853033ae 100644
--- a/db/db_impl/db_impl.h
+++ b/db/db_impl/db_impl.h
@@ -321,14 +321,41 @@ class DBImpl : public DB {
virtual Status CreateColumnFamily(const ColumnFamilyOptions& cf_options,
const std::string& column_family,
- ColumnFamilyHandle** handle) override;
+ ColumnFamilyHandle** handle) override {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ return CreateColumnFamily(ReadOptions(), WriteOptions(), cf_options,
+ column_family, handle);
+ }
+ virtual Status CreateColumnFamily(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ const ColumnFamilyOptions& cf_options,
+ const std::string& column_family,
+ ColumnFamilyHandle** handle);
virtual Status CreateColumnFamilies(
const ColumnFamilyOptions& cf_options,
const std::vector<std::string>& column_family_names,
- std::vector<ColumnFamilyHandle*>* handles) override;
+ std::vector<ColumnFamilyHandle*>* handles) override {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ return CreateColumnFamilies(ReadOptions(), WriteOptions(), cf_options,
+ column_family_names, handles);
+ }
+ virtual Status CreateColumnFamilies(
+ const ReadOptions& read_options, const WriteOptions& write_options,
+ const ColumnFamilyOptions& cf_options,
+ const std::vector<std::string>& column_family_names,
+ std::vector<ColumnFamilyHandle*>* handles);
+
virtual Status CreateColumnFamilies(
const std::vector<ColumnFamilyDescriptor>& column_families,
- std::vector<ColumnFamilyHandle*>* handles) override;
+ std::vector<ColumnFamilyHandle*>* handles) override {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ return CreateColumnFamilies(ReadOptions(), WriteOptions(), column_families,
+ handles);
+ }
+ virtual Status CreateColumnFamilies(
+ const ReadOptions& read_options, const WriteOptions& write_options,
+ const std::vector<ColumnFamilyDescriptor>& column_families,
+ std::vector<ColumnFamilyHandle*>* handles);
virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override;
virtual Status DropColumnFamilies(
const std::vector<ColumnFamilyHandle*>& column_families) override;
@@ -440,7 +467,12 @@ class DBImpl : public DB {
virtual Status Flush(
const FlushOptions& options,
const std::vector<ColumnFamilyHandle*>& column_families) override;
- virtual Status FlushWAL(bool sync) override;
+ virtual Status FlushWAL(bool sync) override {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ return FlushWAL(WriteOptions(), sync);
+ }
+
+ virtual Status FlushWAL(const WriteOptions& write_options, bool sync);
bool WALBufferIsEmpty();
virtual Status SyncWAL() override;
virtual Status LockWAL() override;
@@ -1406,7 +1438,8 @@ class DBImpl : public DB {
// Persist options to options file. Must be holding options_mutex_.
// Will lock DB mutex if !db_mutex_already_held.
- Status WriteOptionsFile(bool db_mutex_already_held);
+ Status WriteOptionsFile(const WriteOptions& write_options,
+ bool db_mutex_already_held);
Status CompactRangeInternal(const CompactRangeOptions& options,
ColumnFamilyHandle* column_family,
@@ -1532,7 +1565,8 @@ class DBImpl : public DB {
virtual bool OwnTablesAndLogs() const { return true; }
// Setup DB identity file, and write DB ID to manifest if necessary.
- Status SetupDBId(bool read_only, RecoveryContext* recovery_ctx);
+ Status SetupDBId(const WriteOptions& write_options, bool read_only,
+ RecoveryContext* recovery_ctx);
// Assign db_id_ and write DB ID to manifest if necessary.
void SetDBId(std::string&& id, bool read_only, RecoveryContext* recovery_ctx);
@@ -1659,7 +1693,8 @@ class DBImpl : public DB {
return w;
}
Status ClearWriter() {
- Status s = writer->WriteBuffer();
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ Status s = writer->WriteBuffer(WriteOptions());
delete writer;
writer = nullptr;
return s;
@@ -1835,12 +1870,15 @@ class DBImpl : public DB {
const Status CreateArchivalDirectory();
// Create a column family, without some of the follow-up work yet
- Status CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
+ Status CreateColumnFamilyImpl(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ const ColumnFamilyOptions& cf_options,
const std::string& cf_name,
ColumnFamilyHandle** handle);
// Follow-up work to user creating a column family or (families)
Status WrapUpCreateColumnFamilies(
+ const ReadOptions& read_options, const WriteOptions& write_options,
const std::vector<const ColumnFamilyOptions*>& cf_options);
Status DropColumnFamilyImpl(ColumnFamilyHandle* column_family);
@@ -1872,7 +1910,8 @@ class DBImpl : public DB {
void ReleaseFileNumberFromPendingOutputs(
std::unique_ptr<std::list<uint64_t>::iterator>& v);
- IOStatus SyncClosedLogs(JobContext* job_context, VersionEdit* synced_wals,
+ IOStatus SyncClosedLogs(const WriteOptions& write_options,
+ JobContext* job_context, VersionEdit* synced_wals,
bool error_recovery_in_prog);
// Flush the in-memory write buffer to storage. Switches to a new
@@ -2058,12 +2097,10 @@ class DBImpl : public DB {
WriteBatch* tmp_batch, WriteBatch** merged_batch,
size_t* write_with_wal, WriteBatch** to_be_cached_state);
- // rate_limiter_priority is used to charge `DBOptions::rate_limiter`
- // for automatic WAL flush (`Options::manual_wal_flush` == false)
- // associated with this WriteToWAL
- IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer,
- uint64_t* log_used, uint64_t* log_size,
- Env::IOPriority rate_limiter_priority,
+ IOStatus WriteToWAL(const WriteBatch& merged_batch,
+ const WriteOptions& write_options,
+ log::Writer* log_writer, uint64_t* log_used,
+ uint64_t* log_size,
LogFileNumberSize& log_file_number_size);
IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group,
@@ -2175,7 +2212,9 @@ class DBImpl : public DB {
// Cancel scheduled periodic tasks
Status CancelPeriodicTaskScheduler();
- Status RegisterRecordSeqnoTimeWorker(bool is_new_db);
+ Status RegisterRecordSeqnoTimeWorker(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ bool is_new_db);
void PrintStatistics();
@@ -2203,7 +2242,9 @@ class DBImpl : public DB {
// helper function to call after some of the logs_ were synced
void MarkLogsSynced(uint64_t up_to, bool synced_dir, VersionEdit* edit);
- Status ApplyWALToManifest(const ReadOptions& read_options, VersionEdit* edit);
+ Status ApplyWALToManifest(const ReadOptions& read_options,
+ const WriteOptions& write_options,
+ VersionEdit* edit);
// WALs with log number up to up_to are not synced successfully.
void MarkLogsNotSynced(uint64_t up_to);
@@ -2275,8 +2316,9 @@ class DBImpl : public DB {
size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; }
- IOStatus CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
- size_t preallocate_block_size, log::Writer** new_log);
+ IOStatus CreateWAL(const WriteOptions& write_options, uint64_t log_file_num,
+ uint64_t recycle_log_number, size_t preallocate_block_size,
+ log::Writer** new_log);
// Validate self-consistency of DB options
static Status ValidateOptions(const DBOptions& db_options);
diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc
index 71c23de95..a47f095b9 100644
--- a/db/db_impl/db_impl_compaction_flush.cc
+++ b/db/db_impl/db_impl_compaction_flush.cc
@@ -19,6 +19,10 @@
#include "monitoring/perf_context_imp.h"
#include "monitoring/thread_status_updater.h"
#include "monitoring/thread_status_util.h"
+#include "rocksdb/file_system.h"
+#include "rocksdb/io_status.h"
+#include "rocksdb/options.h"
+#include "rocksdb/table.h"
#include "test_util/sync_point.h"
#include "util/cast_util.h"
#include "util/coding.h"
@@ -112,7 +116,8 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
return true;
}
-IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
+IOStatus DBImpl::SyncClosedLogs(const WriteOptions& write_options,
+ JobContext* job_context,
VersionEdit* synced_wals,
bool error_recovery_in_prog) {
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
@@ -143,7 +148,13 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
if (error_recovery_in_prog) {
log->file()->reset_seen_error();
}
- io_s = log->file()->Sync(immutable_db_options_.use_fsync);
+
+ IOOptions io_options;
+ io_s = WritableFileWriter::PrepareIOOptions(write_options, io_options);
+ if (!io_s.ok()) {
+ break;
+ }
+ io_s = log->file()->Sync(io_options, immutable_db_options_.use_fsync);
if (!io_s.ok()) {
break;
}
@@ -152,16 +163,21 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
if (error_recovery_in_prog) {
log->file()->reset_seen_error();
}
- io_s = log->Close();
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ io_s = log->Close(WriteOptions());
if (!io_s.ok()) {
break;
}
}
}
if (io_s.ok()) {
- io_s = directories_.GetWalDir()->FsyncWithDirOptions(
- IOOptions(), nullptr,
- DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+ IOOptions io_options;
+ io_s = WritableFileWriter::PrepareIOOptions(write_options, io_options);
+ if (io_s.ok()) {
+ io_s = directories_.GetWalDir()->FsyncWithDirOptions(
+ io_options, nullptr,
+ DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+ }
}
TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock",
@@ -199,6 +215,8 @@ Status DBImpl::FlushMemTableToOutputFile(
assert(cfd->imm()->IsFlushPending());
assert(versions_);
assert(versions_->GetColumnFamilySet());
+ const ReadOptions read_options(Env::IOActivity::kFlush);
+ const WriteOptions write_options(Env::IOActivity::kFlush);
// If there are more than one column families, we need to make sure that
// all the log files except the most recent one are synced. Otherwise if
// the host crashes after flushing and before WAL is persistent, the
@@ -265,13 +283,12 @@ Status DBImpl::FlushMemTableToOutputFile(
VersionEdit synced_wals;
bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
mutex_.Unlock();
- log_io_s =
- SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog);
+ log_io_s = SyncClosedLogs(write_options, job_context, &synced_wals,
+ error_recovery_in_prog);
mutex_.Lock();
if (log_io_s.ok() && synced_wals.IsWalAddition()) {
- const ReadOptions read_options(Env::IOActivity::kFlush);
- log_io_s =
- status_to_io_status(ApplyWALToManifest(read_options, &synced_wals));
+ log_io_s = status_to_io_status(
+ ApplyWALToManifest(read_options, write_options, &synced_wals));
TEST_SYNC_POINT_CALLBACK("DBImpl::FlushMemTableToOutputFile:CommitWal:1",
nullptr);
}
@@ -465,6 +482,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
const autovector<BGFlushArg>& bg_flush_args, bool* made_progress,
JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri) {
mutex_.AssertHeld();
+ const ReadOptions read_options(Env::IOActivity::kFlush);
+ const WriteOptions write_options(Env::IOActivity::kFlush);
autovector<ColumnFamilyData*> cfds;
for (const auto& arg : bg_flush_args) {
@@ -552,13 +571,12 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
VersionEdit synced_wals;
bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
mutex_.Unlock();
- log_io_s =
- SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog);
+ log_io_s = SyncClosedLogs(write_options, job_context, &synced_wals,
+ error_recovery_in_prog);
mutex_.Lock();
if (log_io_s.ok() && synced_wals.IsWalAddition()) {
- const ReadOptions read_options(Env::IOActivity::kFlush);
- log_io_s =
- status_to_io_status(ApplyWALToManifest(read_options, &synced_wals));
+ log_io_s = status_to_io_status(
+ ApplyWALToManifest(read_options, write_options, &synced_wals));
}
if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
@@ -653,9 +671,14 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
// Sync on all distinct output directories.
for (auto dir : distinct_output_dirs) {
if (dir != nullptr) {
- Status error_status = dir->FsyncWithDirOptions(
- IOOptions(), nullptr,
- DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+ IOOptions io_options;
+ Status error_status =
+ WritableFileWriter::PrepareIOOptions(write_options, io_options);
+ if (error_status.ok()) {
+ error_status = dir->FsyncWithDirOptions(
+ io_options, nullptr,
+ DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
+ }
if (!error_status.ok()) {
s = error_status;
break;
@@ -1049,8 +1072,10 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
edit.SetColumnFamily(cfd->GetID());
edit.SetFullHistoryTsLow(ts_low);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
TEST_SYNC_POINT_CALLBACK("DBImpl::IncreaseFullHistoryTsLowImpl:BeforeEdit",
&edit);
@@ -1064,7 +1089,7 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
}
Status s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
- read_options, &edit, &mutex_,
+ read_options, write_options, &edit, &mutex_,
directories_.GetDbDir());
if (!s.ok()) {
return s;
@@ -1754,6 +1779,7 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
}
const ReadOptions read_options(Env::IOActivity::kCompaction);
+ const WriteOptions write_options(Env::IOActivity::kCompaction);
SuperVersionContext sv_context(/* create_superversion */ true);
@@ -1870,9 +1896,9 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
edit.DebugString().data());
- Status status =
- versions_->LogAndApply(cfd, mutable_cf_options, read_options, &edit,
- &mutex_, directories_.GetDbDir());
+ Status status = versions_->LogAndApply(cfd, mutable_cf_options,
+ read_options, write_options, &edit,
+ &mutex_, directories_.GetDbDir());
cfd->compaction_picker()->UnregisterCompaction(c.get());
c.reset();
@@ -3480,6 +3506,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Start");
const ReadOptions read_options(Env::IOActivity::kCompaction);
+ const WriteOptions write_options(Env::IOActivity::kCompaction);
bool is_manual = (manual_compaction != nullptr);
std::unique_ptr<Compaction> c;
@@ -3692,7 +3719,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
}
status = versions_->LogAndApply(
c->column_family_data(), *c->mutable_cf_options(), read_options,
- c->edit(), &mutex_, directories_.GetDbDir(),
+ write_options, c->edit(), &mutex_, directories_.GetDbDir(),
/*new_descriptor_log=*/false, /*column_family_options=*/nullptr,
[&c, &compaction_released](const Status& s) {
c->ReleaseCompactionFiles(s);
@@ -3766,7 +3793,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
}
status = versions_->LogAndApply(
c->column_family_data(), *c->mutable_cf_options(), read_options,
- c->edit(), &mutex_, directories_.GetDbDir(),
+ write_options, c->edit(), &mutex_, directories_.GetDbDir(),
/*new_descriptor_log=*/false, /*column_family_options=*/nullptr,
[&c, &compaction_released](const Status& s) {
c->ReleaseCompactionFiles(s);
diff --git a/db/db_impl/db_impl_experimental.cc b/db/db_impl/db_impl_experimental.cc
index 442cb4767..c90df262e 100644
--- a/db/db_impl/db_impl_experimental.cc
+++ b/db/db_impl/db_impl_experimental.cc
@@ -61,8 +61,10 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
"PromoteL0 FAILED. Invalid target level %d\n", target_level);
return Status::InvalidArgument("Invalid target level");
}
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
Status status;
VersionEdit edit;
JobContext job_context(next_job_id_.fetch_add(1), true);
@@ -143,7 +145,7 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
}
status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
- read_options, &edit, &mutex_,
+ read_options, write_options, &edit, &mutex_,
directories_.GetDbDir());
if (status.ok()) {
InstallSuperVersionAndScheduleWork(cfd,
diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc
index bd4879647..3519ecec1 100644
--- a/db/db_impl/db_impl_files.cc
+++ b/db/db_impl/db_impl_files.cc
@@ -18,6 +18,7 @@
#include "file/sst_file_manager_impl.h"
#include "logging/logging.h"
#include "port/port.h"
+#include "rocksdb/options.h"
#include "util/autovector.h"
#include "util/defer.h"
@@ -510,7 +511,8 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
// Close WALs before trying to delete them.
for (const auto w : state.logs_to_free) {
// TODO: maybe check the return value of Close.
- auto s = w->Close();
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ auto s = w->Close(WriteOptions());
s.PermitUncheckedError();
}
@@ -925,7 +927,8 @@ void DBImpl::SetDBId(std::string&& id, bool read_only,
}
}
-Status DBImpl::SetupDBId(bool read_only, RecoveryContext* recovery_ctx) {
+Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
+ RecoveryContext* recovery_ctx) {
Status s;
// Check for the IDENTITY file and create it if not there or
// broken or not matching manifest
@@ -958,7 +961,7 @@ Status DBImpl::SetupDBId(bool read_only, RecoveryContext* recovery_ctx) {
}
// Persist it to IDENTITY file if allowed
if (!read_only) {
- s = SetIdentityFile(env_, dbname_, db_id_);
+ s = SetIdentityFile(write_options, env_, dbname_, db_id_);
}
return s;
}
diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc
index 074fa8621..438911862 100644
--- a/db/db_impl/db_impl_open.cc
+++ b/db/db_impl/db_impl_open.cc
@@ -21,6 +21,7 @@
#include "monitoring/persistent_stats_history.h"
#include "monitoring/thread_status_util.h"
#include "options/options_helper.h"
+#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "rocksdb/wal_filter.h"
#include "test_util/sync_point.h"
@@ -309,7 +310,8 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) {
Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
VersionEdit new_db;
- Status s = SetIdentityFile(env_, dbname_);
+ const WriteOptions write_options(Env::IOActivity::kDBOpen);
+ Status s = SetIdentityFile(write_options, env_, dbname_);
if (!s.ok()) {
return s;
}
@@ -339,20 +341,23 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
immutable_db_options_.manifest_preallocation_size);
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), manifest, file_options, immutable_db_options_.clock,
- io_tracer_, nullptr /* stats */, immutable_db_options_.listeners,
- nullptr, tmp_set.Contains(FileType::kDescriptorFile),
+ io_tracer_, nullptr /* stats */,
+ Histograms::HISTOGRAM_ENUM_MAX /* hist_type */,
+ immutable_db_options_.listeners, nullptr,
+ tmp_set.Contains(FileType::kDescriptorFile),
tmp_set.Contains(FileType::kDescriptorFile)));
log::Writer log(std::move(file_writer), 0, false);
std::string record;
new_db.EncodeTo(&record);
- s = log.AddRecord(record);
+ s = log.AddRecord(write_options, record);
if (s.ok()) {
- s = SyncManifest(&immutable_db_options_, log.file());
+ s = SyncManifest(&immutable_db_options_, write_options, log.file());
}
}
if (s.ok()) {
// Make "CURRENT" file that points to the new manifest file.
- s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir());
+ s = SetCurrentFile(write_options, fs_.get(), dbname_, 1,
+ directories_.GetDbDir());
if (new_filenames) {
new_filenames->emplace_back(
manifest.substr(manifest.find_last_of("/\\") + 1));
@@ -418,6 +423,7 @@ Status DBImpl::Recover(
uint64_t* recovered_seq, RecoveryContext* recovery_ctx) {
mutex_.AssertHeld();
+ const WriteOptions write_options(Env::IOActivity::kDBOpen);
bool tmp_is_new_db = false;
bool& is_new_db = recovery_ctx ? recovery_ctx->is_new_db_ : tmp_is_new_db;
assert(db_lock_ == nullptr);
@@ -642,7 +648,7 @@ Status DBImpl::Recover(
}
}
}
- s = SetupDBId(read_only, recovery_ctx);
+ s = SetupDBId(write_options, read_only, recovery_ctx);
ROCKS_LOG_INFO(immutable_db_options_.info_log, "DB ID: %s\n", db_id_.c_str());
if (s.ok() && !read_only) {
s = DeleteUnreferencedSstFiles(recovery_ctx);
@@ -872,8 +878,9 @@ Status DBImpl::PersistentStatsProcessFormatVersion() {
if (s.ok()) {
ColumnFamilyOptions cfo;
OptimizeForPersistentStats(&cfo);
- s = CreateColumnFamilyImpl(cfo, kPersistentStatsColumnFamilyName,
- &handle);
+ s = CreateColumnFamilyImpl(ReadOptions(Env::IOActivity::kDBOpen),
+ WriteOptions(Env::IOActivity::kDBOpen), cfo,
+ kPersistentStatsColumnFamilyName, &handle);
}
if (s.ok()) {
persist_stats_cf_handle_ = static_cast<ColumnFamilyHandleImpl*>(handle);
@@ -895,6 +902,7 @@ Status DBImpl::PersistentStatsProcessFormatVersion() {
std::to_string(kStatsCFCompatibleFormatVersion));
}
if (s.ok()) {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions wo;
wo.low_pri = true;
wo.no_slowdown = true;
@@ -926,7 +934,9 @@ Status DBImpl::InitPersistStatsColumnFamily() {
ColumnFamilyHandle* handle = nullptr;
ColumnFamilyOptions cfo;
OptimizeForPersistentStats(&cfo);
- s = CreateColumnFamilyImpl(cfo, kPersistentStatsColumnFamilyName, &handle);
+ s = CreateColumnFamilyImpl(ReadOptions(Env::IOActivity::kDBOpen),
+ WriteOptions(Env::IOActivity::kDBOpen), cfo,
+ kPersistentStatsColumnFamilyName, &handle);
persist_stats_cf_handle_ = static_cast<ColumnFamilyHandleImpl*>(handle);
mutex_.Lock();
}
@@ -937,9 +947,12 @@ Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) {
mutex_.AssertHeld();
assert(versions_->descriptor_log_ == nullptr);
const ReadOptions read_options(Env::IOActivity::kDBOpen);
- Status s = versions_->LogAndApply(
- recovery_ctx.cfds_, recovery_ctx.mutable_cf_opts_, read_options,
- recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir());
+ const WriteOptions write_options(Env::IOActivity::kDBOpen);
+
+ Status s = versions_->LogAndApply(recovery_ctx.cfds_,
+ recovery_ctx.mutable_cf_opts_, read_options,
+ write_options, recovery_ctx.edit_lists_,
+ &mutex_, directories_.GetDbDir());
if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) {
mutex_.Unlock();
for (const auto& stale_sst_file : recovery_ctx.files_to_delete_) {
@@ -1665,9 +1678,11 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
}
IOStatus io_s;
+ const ReadOptions read_option(Env::IOActivity::kDBOpen);
+ const WriteOptions write_option(Env::IO_HIGH, Env::IOActivity::kDBOpen);
TableBuilderOptions tboptions(
- *cfd->ioptions(), mutable_cf_options, cfd->internal_comparator(),
- cfd->int_tbl_prop_collector_factories(),
+ *cfd->ioptions(), mutable_cf_options, read_option, write_option,
+ cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(),
GetCompressionFlush(*cfd->ioptions(), mutable_cf_options),
mutable_cf_options.compression_opts, cfd->GetID(), cfd->GetName(),
0 /* level */, false /* is_bottommost */,
@@ -1677,16 +1692,15 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
SeqnoToTimeMapping empty_seqno_to_time_mapping;
Version* version = cfd->current();
version->Ref();
- const ReadOptions read_option(Env::IOActivity::kDBOpen);
uint64_t num_input_entries = 0;
s = BuildTable(
dbname_, versions_.get(), immutable_db_options_, tboptions,
- file_options_for_compaction_, read_option, cfd->table_cache(),
- iter.get(), std::move(range_del_iters), &meta, &blob_file_additions,
+ file_options_for_compaction_, cfd->table_cache(), iter.get(),
+ std::move(range_del_iters), &meta, &blob_file_additions,
snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber,
snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s,
io_tracer_, BlobFileCreationReason::kRecovery,
- empty_seqno_to_time_mapping, &event_logger_, job_id, Env::IO_HIGH,
+ empty_seqno_to_time_mapping, &event_logger_, job_id,
nullptr /* table_properties */, write_hint,
nullptr /*full_history_ts_low*/, &blob_callback_, version,
&num_input_entries);
@@ -1888,7 +1902,8 @@ Status DB::OpenAndTrimHistory(
return s;
}
-IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
+IOStatus DBImpl::CreateWAL(const WriteOptions& write_options,
+ uint64_t log_file_num, uint64_t recycle_log_number,
size_t preallocate_block_size,
log::Writer** new_log) {
IOStatus io_s;
@@ -1922,14 +1937,15 @@ IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(lfile), log_fname, opt_file_options,
- immutable_db_options_.clock, io_tracer_, nullptr /* stats */, listeners,
- nullptr, tmp_set.Contains(FileType::kWalFile),
+ immutable_db_options_.clock, io_tracer_, nullptr /* stats */,
+ Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, listeners, nullptr,
+ tmp_set.Contains(FileType::kWalFile),
tmp_set.Contains(FileType::kWalFile)));
*new_log = new log::Writer(std::move(file_writer), log_file_num,
immutable_db_options_.recycle_log_file_num > 0,
immutable_db_options_.manual_wal_flush,
immutable_db_options_.wal_compression);
- io_s = (*new_log)->AddCompressionTypeRecord();
+ io_s = (*new_log)->AddCompressionTypeRecord(write_options);
}
return io_s;
}
@@ -1938,6 +1954,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
const bool seq_per_batch, const bool batch_per_txn) {
+ const WriteOptions write_options(Env::IOActivity::kDBOpen);
+ const ReadOptions read_options(Env::IOActivity::kDBOpen);
+
Status s = ValidateOptionsByTable(db_options, column_families);
if (!s.ok()) {
return s;
@@ -2014,7 +2033,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
log::Writer* new_log = nullptr;
const size_t preallocate_block_size =
impl->GetWalPreallocateBlockSize(max_write_buffer_size);
- s = impl->CreateWAL(new_log_number, 0 /*recycle_log_number*/,
+ s = impl->CreateWAL(write_options, new_log_number, 0 /*recycle_log_number*/,
preallocate_block_size, &new_log);
if (s.ok()) {
InstrumentedMutexLock wl(&impl->log_write_mutex_);
@@ -2039,21 +2058,25 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
if (recovered_seq != kMaxSequenceNumber) {
WriteBatch empty_batch;
WriteBatchInternal::SetSequence(&empty_batch, recovered_seq);
- WriteOptions write_options;
uint64_t log_used, log_size;
log::Writer* log_writer = impl->logs_.back().writer;
LogFileNumberSize& log_file_number_size = impl->alive_log_files_.back();
assert(log_writer->get_log_number() == log_file_number_size.number);
impl->mutex_.AssertHeld();
- s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size,
- Env::IO_TOTAL, log_file_number_size);
+ s = impl->WriteToWAL(empty_batch, write_options, log_writer, &log_used,
+ &log_size, log_file_number_size);
if (s.ok()) {
// Need to fsync, otherwise it might get lost after a power reset.
- s = impl->FlushWAL(false);
+ s = impl->FlushWAL(write_options, false);
TEST_SYNC_POINT_CALLBACK("DBImpl::Open::BeforeSyncWAL", /*arg=*/&s);
+ IOOptions opts;
if (s.ok()) {
- s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
+ s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ }
+ if (s.ok()) {
+ s = log_writer->file()->Sync(opts,
+ impl->immutable_db_options_.use_fsync);
}
}
}
@@ -2084,7 +2107,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
impl->mutex_.Unlock();
// NOTE: the work normally done in WrapUpCreateColumnFamilies will
// be done separately below.
- s = impl->CreateColumnFamilyImpl(cf.options, cf.name, &handle);
+ s = impl->CreateColumnFamilyImpl(read_options, write_options,
+ cf.options, cf.name, &handle);
impl->mutex_.Lock();
if (s.ok()) {
handles->push_back(handle);
@@ -2136,7 +2160,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
// Persist RocksDB Options before scheduling the compaction.
// The WriteOptionsFile() will release and lock the mutex internally.
persist_options_status =
- impl->WriteOptionsFile(true /*db_mutex_already_held*/);
+ impl->WriteOptionsFile(write_options, true /*db_mutex_already_held*/);
*dbptr = impl;
impl->opened_successfully_ = true;
impl->DeleteObsoleteFiles();
@@ -2236,12 +2260,17 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
impl);
LogFlush(impl->immutable_db_options_.info_log);
if (!impl->WALBufferIsEmpty()) {
- s = impl->FlushWAL(false);
+ s = impl->FlushWAL(write_options, false);
if (s.ok()) {
// Sync is needed otherwise WAL buffered data might get lost after a
// power reset.
log::Writer* log_writer = impl->logs_.back().writer;
- s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
+ IOOptions opts;
+ s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ s = log_writer->file()->Sync(opts,
+ impl->immutable_db_options_.use_fsync);
+ }
}
}
if (s.ok() && !persist_options_status.ok()) {
@@ -2258,7 +2287,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
s = impl->StartPeriodicTaskScheduler();
}
if (s.ok()) {
- s = impl->RegisterRecordSeqnoTimeWorker(recovery_ctx.is_new_db_);
+ s = impl->RegisterRecordSeqnoTimeWorker(read_options, write_options,
+ recovery_ctx.is_new_db_);
}
impl->options_mutex_.Unlock();
if (!s.ok()) {
diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc
index 34a7de122..c29240e08 100644
--- a/db/db_impl/db_impl_write.cc
+++ b/db/db_impl/db_impl_write.cc
@@ -620,9 +620,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
log_write_mutex_.Unlock();
if (status.ok() && synced_wals.IsWalAddition()) {
InstrumentedMutexLock l(&mutex_);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
- status = ApplyWALToManifest(read_options, &synced_wals);
+ status = ApplyWALToManifest(read_options, write_options, &synced_wals);
}
// Requesting sync with two_write_queues_ is expected to be very rare. We
@@ -783,9 +783,9 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
}
if (w.status.ok() && synced_wals.IsWalAddition()) {
InstrumentedMutexLock l(&mutex_);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
- w.status = ApplyWALToManifest(read_options, &synced_wals);
+ w.status = ApplyWALToManifest(read_options, write_options, &synced_wals);
}
write_thread_.ExitAsBatchGroupLeader(wal_write_group, w.status);
}
@@ -1318,9 +1318,9 @@ Status DBImpl::MergeBatch(const WriteThread::WriteGroup& write_group,
// When two_write_queues_ is disabled, this function is called from the only
// write thread. Otherwise this must be called holding log_write_mutex_.
IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
+ const WriteOptions& write_options,
log::Writer* log_writer, uint64_t* log_used,
uint64_t* log_size,
- Env::IOPriority rate_limiter_priority,
LogFileNumberSize& log_file_number_size) {
assert(log_size != nullptr);
@@ -1343,12 +1343,11 @@ IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch,
log_write_mutex_.Lock();
}
IOStatus io_s = log_writer->MaybeAddUserDefinedTimestampSizeRecord(
- versions_->GetColumnFamiliesTimestampSizeForRecord(),
- rate_limiter_priority);
+ write_options, versions_->GetColumnFamiliesTimestampSizeForRecord());
if (!io_s.ok()) {
return io_s;
}
- io_s = log_writer->AddRecord(log_entry, rate_limiter_priority);
+ io_s = log_writer->AddRecord(write_options, log_entry);
if (UNLIKELY(needs_locking)) {
log_write_mutex_.Unlock();
@@ -1391,9 +1390,13 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
WriteBatchInternal::SetSequence(merged_batch, sequence);
uint64_t log_size;
- io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size,
- write_group.leader->rate_limiter_priority,
- log_file_number_size);
+
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ WriteOptions write_options;
+ write_options.rate_limiter_priority =
+ write_group.leader->rate_limiter_priority;
+ io_s = WriteToWAL(*merged_batch, write_options, log_writer, log_used,
+ &log_size, log_file_number_size);
if (to_be_cached_state) {
cached_recoverable_state_ = *to_be_cached_state;
cached_recoverable_state_empty_ = false;
@@ -1420,10 +1423,17 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
log_write_mutex_.Lock();
}
- for (auto& log : logs_) {
- io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync);
- if (!io_s.ok()) {
- break;
+ if (io_s.ok()) {
+ for (auto& log : logs_) {
+ IOOptions opts;
+ io_s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (!io_s.ok()) {
+ break;
+ }
+ io_s = log.writer->file()->Sync(opts, immutable_db_options_.use_fsync);
+ if (!io_s.ok()) {
+ break;
+ }
}
}
@@ -1496,9 +1506,13 @@ IOStatus DBImpl::ConcurrentWriteToWAL(
assert(log_writer->get_log_number() == log_file_number_size.number);
uint64_t log_size;
- io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size,
- write_group.leader->rate_limiter_priority,
- log_file_number_size);
+
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ WriteOptions write_options;
+ write_options.rate_limiter_priority =
+ write_group.leader->rate_limiter_priority;
+ io_s = WriteToWAL(*merged_batch, write_options, log_writer, log_used,
+ &log_size, log_file_number_size);
if (to_be_cached_state) {
cached_recoverable_state_ = *to_be_cached_state;
cached_recoverable_state_empty_ = false;
@@ -2117,8 +2131,10 @@ void DBImpl::NotifyOnMemTableSealed(ColumnFamilyData* /*cfd*/,
// two_write_queues_ is true (This is to simplify the reasoning.)
Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
mutex_.AssertHeld();
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
log::Writer* new_log = nullptr;
MemTable* new_mem = nullptr;
IOStatus io_s;
@@ -2165,8 +2181,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
if (creating_new_log) {
// TODO: Write buffer size passed in should be max of all CF's instead
// of mutable_cf_options.write_buffer_size.
- io_s = CreateWAL(new_log_number, recycle_log_number, preallocate_block_size,
- &new_log);
+ io_s = CreateWAL(write_options, new_log_number, recycle_log_number,
+ preallocate_block_size, &new_log);
if (s.ok()) {
s = io_s;
}
@@ -2203,7 +2219,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
// In recovery path, we force another try of writing WAL buffer.
cur_log_writer->file()->reset_seen_error();
}
- io_s = cur_log_writer->WriteBuffer();
+ io_s = cur_log_writer->WriteBuffer(write_options);
if (s.ok()) {
s = io_s;
}
@@ -2271,7 +2287,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
VersionEdit wal_deletion;
wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
s = versions_->LogAndApplyToDefaultColumnFamily(
- read_options, &wal_deletion, &mutex_, directories_.GetDbDir());
+ read_options, write_options, &wal_deletion, &mutex_,
+ directories_.GetDbDir());
if (!s.ok() && versions_->io_status().IsIOError()) {
s = error_handler_.SetBGError(versions_->io_status(),
BackgroundErrorReason::kManifestWrite);
diff --git a/db/db_iter.cc b/db/db_iter.cc
index 507bb2577..4687031f9 100644
--- a/db/db_iter.cc
+++ b/db/db_iter.cc
@@ -201,6 +201,7 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key,
// TODO: consider moving ReadOptions from ArenaWrappedDBIter to DBIter to
// avoid having to copy options back and forth.
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions read_options;
read_options.read_tier = read_tier_;
read_options.fill_cache = fill_cache_;
diff --git a/db/db_iter.h b/db/db_iter.h
index 5022405c3..9a1649c34 100644
--- a/db/db_iter.h
+++ b/db/db_iter.h
@@ -126,6 +126,10 @@ class DBIter final : public Iterator {
void operator=(const DBIter&) = delete;
~DBIter() override {
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(
+ ThreadStatus::OperationType::OP_UNKNOWN);
// Release pinned data if any
if (pinned_iters_mgr_.PinningEnabled()) {
pinned_iters_mgr_.ReleasePinnedData();
@@ -134,6 +138,7 @@ class DBIter final : public Iterator {
ResetInternalKeysSkippedCounter();
local_stats_.BumpGlobalStatistics(statistics_);
iter_.DeleteIter(arena_mode_);
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
}
void SetIter(InternalIterator* iter) {
assert(iter_.iter() == nullptr);
diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc
index 7590aa2f1..8432831fe 100644
--- a/db/db_sst_test.cc
+++ b/db/db_sst_test.cc
@@ -957,15 +957,18 @@ TEST_F(DBSSTTest, OpenDBWithExistingTrashAndObsoleteSstFile) {
// Add some trash files to the db directory so the DB can clean them up
ASSERT_OK(env_->CreateDirIfMissing(dbname_));
- ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash"));
- ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash"));
- ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash"));
+ ASSERT_OK(
+ WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash", false));
+ ASSERT_OK(
+ WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash", false));
+ ASSERT_OK(
+ WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash", false));
// Manually add an obsolete sst file. Obsolete SST files are discovered and
// deleted upon recovery.
constexpr uint64_t kSstFileNumber = 100;
const std::string kObsoleteSstFile =
MakeTableFileName(dbname_, kSstFileNumber);
- ASSERT_OK(WriteStringToFile(env_, "abc", kObsoleteSstFile));
+ ASSERT_OK(WriteStringToFile(env_, "abc", kObsoleteSstFile, false));
// Reopen the DB and verify that it deletes existing trash files and obsolete
// SST files with rate limiting.
diff --git a/db/db_test2.cc b/db/db_test2.cc
index e471685b2..c7cc88a46 100644
--- a/db/db_test2.cc
+++ b/db/db_test2.cc
@@ -5691,7 +5691,7 @@ TEST_F(DBTest2, CrashInRecoveryMultipleCF) {
ASSERT_OK(ReadFileToString(env_, fname, &file_content));
file_content[400] = 'h';
file_content[401] = 'a';
- ASSERT_OK(WriteStringToFile(env_, file_content, fname));
+ ASSERT_OK(WriteStringToFile(env_, file_content, fname, false));
break;
}
}
diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc
index 88c6d1aac..ecb185808 100644
--- a/db/db_wal_test.cc
+++ b/db/db_wal_test.cc
@@ -1561,7 +1561,7 @@ class RecoveryTestHelper {
new log::Writer(std::move(file_writer), current_log_number,
db_options.recycle_log_file_num > 0, false,
db_options.wal_compression);
- ASSERT_OK(log_writer->AddCompressionTypeRecord());
+ ASSERT_OK(log_writer->AddCompressionTypeRecord(WriteOptions()));
current_log_writer.reset(log_writer);
WriteBatch batch;
@@ -1574,7 +1574,7 @@ class RecoveryTestHelper {
ASSERT_OK(batch.Put(key, value));
WriteBatchInternal::SetSequence(&batch, seq);
ASSERT_OK(current_log_writer->AddRecord(
- WriteBatchInternal::Contents(&batch)));
+ WriteOptions(), WriteBatchInternal::Contents(&batch)));
versions->SetLastAllocatedSequence(seq);
versions->SetLastPublishedSequence(seq);
versions->SetLastSequence(seq);
diff --git a/db/experimental.cc b/db/experimental.cc
index f6f920b2c..44816e710 100644
--- a/db/experimental.cc
+++ b/db/experimental.cc
@@ -38,8 +38,9 @@ Status UpdateManifestForFilesState(
const DBOptions& db_opts, const std::string& db_name,
const std::vector<ColumnFamilyDescriptor>& column_families,
const UpdateManifestForFilesStateOptions& opts) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
OfflineManifestWriter w(db_opts, db_name);
Status s = w.Recover(column_families);
@@ -117,7 +118,8 @@ Status UpdateManifestForFilesState(
std::unique_ptr<FSDirectory> db_dir;
s = fs->NewDirectory(db_name, IOOptions(), &db_dir, nullptr);
if (s.ok()) {
- s = w.LogAndApply(read_options, cfd, &edit, db_dir.get());
+ s = w.LogAndApply(read_options, write_options, cfd, &edit,
+ db_dir.get());
}
if (s.ok()) {
++cfs_updated;
diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc
index a4a194714..e17c4097a 100644
--- a/db/external_sst_file_ingestion_job.cc
+++ b/db/external_sst_file_ingestion_job.cc
@@ -710,7 +710,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
// If customized readahead size is needed, we can pass a user option
// all the way to here. Right now we just rely on the default readahead
// to keep things simple.
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
ro.readahead_size = ingestion_options_.verify_checksums_readahead_size;
status = table_reader->VerifyChecksum(
@@ -764,7 +764,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
file_to_ingest->num_range_deletions = props->num_range_deletions;
ParsedInternalKey key;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
@@ -902,7 +902,7 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
bool overlap_with_db = false;
Arena arena;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
ro.total_order_seek = true;
int target_level = 0;
diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc
index d888dfde1..17b4c0342 100644
--- a/db/fault_injection_test.cc
+++ b/db/fault_injection_test.cc
@@ -572,7 +572,7 @@ TEST_P(FaultInjectionTest, NoDuplicateTrailingEntries) {
edit.SetColumnFamily(0);
std::string buf;
assert(edit.EncodeTo(&buf));
- const Status s = log_writer->AddRecord(buf);
+ const Status s = log_writer->AddRecord(WriteOptions(), buf);
ASSERT_NOK(s);
}
diff --git a/db/flush_job.cc b/db/flush_job.cc
index a3e168823..ff6cf36ef 100644
--- a/db/flush_job.cc
+++ b/db/flush_job.cc
@@ -409,7 +409,7 @@ Status FlushJob::MemPurge() {
// Create two iterators, one for the memtable data (contains
// info from puts + deletes), and one for the memtable
// Range Tombstones (from DeleteRanges).
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
ro.total_order_seek = true;
Arena arena;
@@ -701,8 +701,8 @@ bool FlushJob::MemPurgeDecider(double threshold) {
// Cochran formula for determining sample size.
// 95% confidence interval, 7% precision.
// n0 = (1.96*1.96)*0.25/(0.07*0.07) = 196.0
- // TODO: plumb Env::IOActivity
double n0 = 196.0;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
ro.total_order_seek = true;
@@ -961,29 +961,30 @@ Status FlushJob::WriteLevel0Table() {
const std::string* const full_history_ts_low =
(full_history_ts_low_.empty()) ? nullptr : &full_history_ts_low_;
+ const ReadOptions read_options(Env::IOActivity::kFlush);
+ const WriteOptions write_options(io_priority, Env::IOActivity::kFlush);
TableBuilderOptions tboptions(
- *cfd_->ioptions(), mutable_cf_options_, cfd_->internal_comparator(),
- cfd_->int_tbl_prop_collector_factories(), output_compression_,
- mutable_cf_options_.compression_opts, cfd_->GetID(), cfd_->GetName(),
- 0 /* level */, false /* is_bottommost */,
- TableFileCreationReason::kFlush, oldest_key_time, current_time,
- db_id_, db_session_id_, 0 /* target_file_size */,
- meta_.fd.GetNumber());
+ *cfd_->ioptions(), mutable_cf_options_, read_options, write_options,
+ cfd_->internal_comparator(), cfd_->int_tbl_prop_collector_factories(),
+ output_compression_, mutable_cf_options_.compression_opts,
+ cfd_->GetID(), cfd_->GetName(), 0 /* level */,
+ false /* is_bottommost */, TableFileCreationReason::kFlush,
+ oldest_key_time, current_time, db_id_, db_session_id_,
+ 0 /* target_file_size */, meta_.fd.GetNumber());
const SequenceNumber job_snapshot_seq =
job_context_->GetJobSnapshotSequence();
- const ReadOptions read_options(Env::IOActivity::kFlush);
- s = BuildTable(dbname_, versions_, db_options_, tboptions, file_options_,
- read_options, cfd_->table_cache(), iter.get(),
- std::move(range_del_iters), &meta_, &blob_file_additions,
- existing_snapshots_, earliest_write_conflict_snapshot_,
- job_snapshot_seq, snapshot_checker_,
- mutable_cf_options_.paranoid_file_checks,
- cfd_->internal_stats(), &io_s, io_tracer_,
- BlobFileCreationReason::kFlush, seqno_to_time_mapping_,
- event_logger_, job_context_->job_id, io_priority,
- &table_properties_, write_hint, full_history_ts_low,
- blob_callback_, base_, &num_input_entries,
- &memtable_payload_bytes, &memtable_garbage_bytes);
+
+ s = BuildTable(
+ dbname_, versions_, db_options_, tboptions, file_options_,
+ cfd_->table_cache(), iter.get(), std::move(range_del_iters), &meta_,
+ &blob_file_additions, existing_snapshots_,
+ earliest_write_conflict_snapshot_, job_snapshot_seq,
+ snapshot_checker_, mutable_cf_options_.paranoid_file_checks,
+ cfd_->internal_stats(), &io_s, io_tracer_,
+ BlobFileCreationReason::kFlush, seqno_to_time_mapping_, event_logger_,
+ job_context_->job_id, &table_properties_, write_hint,
+ full_history_ts_low, blob_callback_, base_, &num_input_entries,
+ &memtable_payload_bytes, &memtable_garbage_bytes);
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:s", &s);
// TODO: Cleanup io_status in BuildTable and table builders
assert(!s.ok() || io_s.ok());
@@ -1177,8 +1178,9 @@ Status FlushJob::MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT() {
VersionEdit edit;
edit.SetColumnFamily(cfd_->GetID());
edit.SetFullHistoryTsLow(new_full_history_ts_low);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
return versions_->LogAndApply(cfd_, *cfd_->GetLatestMutableCFOptions(),
- ReadOptions(), &edit, db_mutex_,
+ ReadOptions(), WriteOptions(), &edit, db_mutex_,
output_file_directory_);
}
diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc
index 95cde2d4d..1838a9389 100644
--- a/db/flush_job_test.cc
+++ b/db/flush_job_test.cc
@@ -55,7 +55,7 @@ class FlushJobTestBase : public testing::Test {
}
void NewDB() {
- ASSERT_OK(SetIdentityFile(env_, dbname_));
+ ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
VersionEdit new_db;
new_db.SetLogNumber(0);
@@ -89,19 +89,19 @@ class FlushJobTestBase : public testing::Test {
log::Writer log(std::move(file_writer), 0, false);
std::string record;
new_db.EncodeTo(&record);
- s = log.AddRecord(record);
+ s = log.AddRecord(WriteOptions(), record);
ASSERT_OK(s);
for (const auto& e : new_cfs) {
record.clear();
e.EncodeTo(&record);
- s = log.AddRecord(record);
+ s = log.AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
}
ASSERT_OK(s);
// Make "CURRENT" file that points to the new manifest file.
- s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
}
diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc
index f7b8a50ae..fcd38f24b 100644
--- a/db/import_column_family_job.cc
+++ b/db/import_column_family_job.cc
@@ -355,7 +355,7 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
// in file_meta.
if (file_meta.smallest.empty()) {
assert(file_meta.largest.empty());
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
diff --git a/db/internal_stats.cc b/db/internal_stats.cc
index 6ef4b4302..d4cf19dcf 100644
--- a/db/internal_stats.cc
+++ b/db/internal_stats.cc
@@ -1155,7 +1155,7 @@ bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) {
bool InternalStats::HandleAggregatedTableProperties(std::string* value,
Slice /*suffix*/) {
std::shared_ptr<const TableProperties> tp;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = cfd_->current()->GetAggregatedTableProperties(read_options, &tp);
if (!s.ok()) {
@@ -1177,7 +1177,7 @@ static std::map<std::string, std::string> MapUint64ValuesToString(
bool InternalStats::HandleAggregatedTablePropertiesMap(
std::map<std::string, std::string>* values, Slice /*suffix*/) {
std::shared_ptr<const TableProperties> tp;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = cfd_->current()->GetAggregatedTableProperties(read_options, &tp);
if (!s.ok()) {
@@ -1195,7 +1195,7 @@ bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* values,
return false;
}
std::shared_ptr<const TableProperties> tp;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = cfd_->current()->GetAggregatedTableProperties(
read_options, &tp, static_cast<int>(level));
@@ -1214,7 +1214,7 @@ bool InternalStats::HandleAggregatedTablePropertiesAtLevelMap(
return false;
}
std::shared_ptr<const TableProperties> tp;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = cfd_->current()->GetAggregatedTableProperties(
read_options, &tp, static_cast<int>(level));
@@ -1418,7 +1418,7 @@ bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value,
bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value,
DBImpl* /*db*/,
Version* version) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
*value = (version == nullptr)
? 0
@@ -1473,7 +1473,7 @@ bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/,
->compaction_options_fifo.allow_compaction) {
return false;
}
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
TablePropertiesCollection collection;
auto s = cfd_->current()->GetPropertiesOfAllTables(read_options, &collection);
diff --git a/db/log_test.cc b/db/log_test.cc
index 0bf3bf5ae..bd5aaf6d6 100644
--- a/db/log_test.cc
+++ b/db/log_test.cc
@@ -185,9 +185,10 @@ class LogTest
void Write(const std::string& msg,
const UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
if (cf_to_ts_sz != nullptr && !cf_to_ts_sz->empty()) {
- ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(*cf_to_ts_sz));
+ ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(WriteOptions(),
+ *cf_to_ts_sz));
}
- ASSERT_OK(writer_->AddRecord(Slice(msg)));
+ ASSERT_OK(writer_->AddRecord(WriteOptions(), Slice(msg)));
}
size_t WrittenBytes() const { return dest_contents().size(); }
@@ -732,8 +733,8 @@ TEST_P(LogTest, Recycle) {
std::unique_ptr<WritableFileWriter> dest_holder(new WritableFileWriter(
std::move(sink), "" /* don't care */, FileOptions()));
Writer recycle_writer(std::move(dest_holder), 123, true);
- ASSERT_OK(recycle_writer.AddRecord(Slice("foooo")));
- ASSERT_OK(recycle_writer.AddRecord(Slice("bar")));
+ ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("foooo")));
+ ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("bar")));
ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2);
ASSERT_EQ("foooo", Read());
ASSERT_EQ("bar", Read());
@@ -764,9 +765,10 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
UnorderedMap<uint32_t, size_t> ts_sz_two = {
{2, sizeof(uint64_t)},
};
- ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(ts_sz_two));
- ASSERT_OK(recycle_writer.AddRecord(Slice("foooo")));
- ASSERT_OK(recycle_writer.AddRecord(Slice("bar")));
+ ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(
+ WriteOptions(), ts_sz_two));
+ ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("foooo")));
+ ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("bar")));
ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2);
CheckRecordAndTimestampSize("foooo", ts_sz_two);
CheckRecordAndTimestampSize("bar", ts_sz_two);
@@ -853,12 +855,12 @@ class RetriableLogTest : public ::testing::TestWithParam<int> {
std::string contents() { return sink_->contents_; }
void Encode(const std::string& msg) {
- ASSERT_OK(log_writer_->AddRecord(Slice(msg)));
+ ASSERT_OK(log_writer_->AddRecord(WriteOptions(), Slice(msg)));
}
void Write(const Slice& data) {
- ASSERT_OK(writer_->Append(data));
- ASSERT_OK(writer_->Sync(true));
+ ASSERT_OK(writer_->Append(IOOptions(), data));
+ ASSERT_OK(writer_->Sync(IOOptions(), true));
}
bool TryRead(std::string* result) {
@@ -991,7 +993,9 @@ INSTANTIATE_TEST_CASE_P(bool, RetriableLogTest, ::testing::Values(0, 2));
class CompressionLogTest : public LogTest {
public:
- Status SetupTestEnv() { return writer_->AddCompressionTypeRecord(); }
+ Status SetupTestEnv() {
+ return writer_->AddCompressionTypeRecord(WriteOptions());
+ }
};
TEST_P(CompressionLogTest, Empty) {
@@ -1109,7 +1113,7 @@ TEST_P(CompressionLogTest, AlignedFragmentation) {
// beginning of the block.
while ((WrittenBytes() & (kBlockSize - 1)) >= kHeaderSize) {
char entry = 'a';
- ASSERT_OK(writer_->AddRecord(Slice(&entry, 1)));
+ ASSERT_OK(writer_->AddRecord(WriteOptions(), Slice(&entry, 1)));
num_filler_records++;
}
const std::vector<std::string> wal_entries = {
diff --git a/db/log_writer.cc b/db/log_writer.cc
index 5fc46b33f..8e0f7a4a9 100644
--- a/db/log_writer.cc
+++ b/db/log_writer.cc
@@ -38,32 +38,43 @@ Writer::Writer(std::unique_ptr<WritableFileWriter>&& dest, uint64_t log_number,
}
Writer::~Writer() {
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
if (dest_) {
- WriteBuffer().PermitUncheckedError();
+ WriteBuffer(WriteOptions()).PermitUncheckedError();
}
if (compress_) {
delete compress_;
}
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
}
-IOStatus Writer::WriteBuffer() {
+IOStatus Writer::WriteBuffer(const WriteOptions& write_options) {
if (dest_->seen_error()) {
return IOStatus::IOError("Seen error. Skip writing buffer.");
}
- return dest_->Flush();
+ IOOptions opts;
+ IOStatus s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (!s.ok()) {
+ return s;
+ }
+ return dest_->Flush(opts);
}
-IOStatus Writer::Close() {
+IOStatus Writer::Close(const WriteOptions& write_options) {
IOStatus s;
- if (dest_) {
- s = dest_->Close();
+ IOOptions opts;
+ s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok() && dest_) {
+ s = dest_->Close(opts);
dest_.reset();
}
return s;
}
-IOStatus Writer::AddRecord(const Slice& slice,
- Env::IOPriority rate_limiter_priority) {
+IOStatus Writer::AddRecord(const WriteOptions& write_options,
+ const Slice& slice) {
const char* ptr = slice.data();
size_t left = slice.size();
@@ -83,83 +94,87 @@ IOStatus Writer::AddRecord(const Slice& slice,
}
IOStatus s;
- do {
- const int64_t leftover = kBlockSize - block_offset_;
- assert(leftover >= 0);
- if (leftover < header_size) {
- // Switch to a new block
- if (leftover > 0) {
- // Fill the trailer (literal below relies on kHeaderSize and
- // kRecyclableHeaderSize being <= 11)
- assert(header_size <= 11);
- s = dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
- static_cast<size_t>(leftover)),
- 0 /* crc32c_checksum */, rate_limiter_priority);
- if (!s.ok()) {
- break;
+ IOOptions opts;
+ s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ do {
+ const int64_t leftover = kBlockSize - block_offset_;
+ assert(leftover >= 0);
+ if (leftover < header_size) {
+ // Switch to a new block
+ if (leftover > 0) {
+ // Fill the trailer (literal below relies on kHeaderSize and
+ // kRecyclableHeaderSize being <= 11)
+ assert(header_size <= 11);
+ s = dest_->Append(opts,
+ Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+ static_cast<size_t>(leftover)),
+ 0 /* crc32c_checksum */);
+ if (!s.ok()) {
+ break;
+ }
}
+ block_offset_ = 0;
}
- block_offset_ = 0;
- }
- // Invariant: we never leave < header_size bytes in a block.
- assert(static_cast<int64_t>(kBlockSize - block_offset_) >= header_size);
-
- const size_t avail = kBlockSize - block_offset_ - header_size;
-
- // Compress the record if compression is enabled.
- // Compress() is called at least once (compress_start=true) and after the
- // previous generated compressed chunk is written out as one or more
- // physical records (left=0).
- if (compress_ && (compress_start || left == 0)) {
- compress_remaining = compress_->Compress(slice.data(), slice.size(),
- compressed_buffer_.get(), &left);
-
- if (compress_remaining < 0) {
- // Set failure status
- s = IOStatus::IOError("Unexpected WAL compression error");
- s.SetDataLoss(true);
- break;
- } else if (left == 0) {
- // Nothing left to compress
- if (!compress_start) {
+ // Invariant: we never leave < header_size bytes in a block.
+ assert(static_cast<int64_t>(kBlockSize - block_offset_) >= header_size);
+
+ const size_t avail = kBlockSize - block_offset_ - header_size;
+
+ // Compress the record if compression is enabled.
+ // Compress() is called at least once (compress_start=true) and after the
+ // previous generated compressed chunk is written out as one or more
+ // physical records (left=0).
+ if (compress_ && (compress_start || left == 0)) {
+ compress_remaining = compress_->Compress(
+ slice.data(), slice.size(), compressed_buffer_.get(), &left);
+
+ if (compress_remaining < 0) {
+ // Set failure status
+ s = IOStatus::IOError("Unexpected WAL compression error");
+ s.SetDataLoss(true);
break;
+ } else if (left == 0) {
+ // Nothing left to compress
+ if (!compress_start) {
+ break;
+ }
}
+ compress_start = false;
+ ptr = compressed_buffer_.get();
}
- compress_start = false;
- ptr = compressed_buffer_.get();
- }
-
- const size_t fragment_length = (left < avail) ? left : avail;
-
- RecordType type;
- const bool end = (left == fragment_length && compress_remaining == 0);
- if (begin && end) {
- type = recycle_log_files_ ? kRecyclableFullType : kFullType;
- } else if (begin) {
- type = recycle_log_files_ ? kRecyclableFirstType : kFirstType;
- } else if (end) {
- type = recycle_log_files_ ? kRecyclableLastType : kLastType;
- } else {
- type = recycle_log_files_ ? kRecyclableMiddleType : kMiddleType;
- }
- s = EmitPhysicalRecord(type, ptr, fragment_length, rate_limiter_priority);
- ptr += fragment_length;
- left -= fragment_length;
- begin = false;
- } while (s.ok() && (left > 0 || compress_remaining > 0));
+ const size_t fragment_length = (left < avail) ? left : avail;
+
+ RecordType type;
+ const bool end = (left == fragment_length && compress_remaining == 0);
+ if (begin && end) {
+ type = recycle_log_files_ ? kRecyclableFullType : kFullType;
+ } else if (begin) {
+ type = recycle_log_files_ ? kRecyclableFirstType : kFirstType;
+ } else if (end) {
+ type = recycle_log_files_ ? kRecyclableLastType : kLastType;
+ } else {
+ type = recycle_log_files_ ? kRecyclableMiddleType : kMiddleType;
+ }
+ s = EmitPhysicalRecord(write_options, type, ptr, fragment_length);
+ ptr += fragment_length;
+ left -= fragment_length;
+ begin = false;
+ } while (s.ok() && (left > 0 || compress_remaining > 0));
+ }
if (s.ok()) {
if (!manual_flush_) {
- s = dest_->Flush(rate_limiter_priority);
+ s = dest_->Flush(opts);
}
}
return s;
}
-IOStatus Writer::AddCompressionTypeRecord() {
+IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) {
// Should be the first record
assert(block_offset_ == 0);
@@ -171,11 +186,15 @@ IOStatus Writer::AddCompressionTypeRecord() {
CompressionTypeRecord record(compression_type_);
std::string encode;
record.EncodeTo(&encode);
- IOStatus s =
- EmitPhysicalRecord(kSetCompressionType, encode.data(), encode.size());
+ IOStatus s = EmitPhysicalRecord(write_options, kSetCompressionType,
+ encode.data(), encode.size());
if (s.ok()) {
if (!manual_flush_) {
- s = dest_->Flush();
+ IOOptions io_opts;
+ s = WritableFileWriter::PrepareIOOptions(write_options, io_opts);
+ if (s.ok()) {
+ s = dest_->Flush(io_opts);
+ }
}
// Initialize fields required for compression
const size_t max_output_buffer_len =
@@ -197,8 +216,8 @@ IOStatus Writer::AddCompressionTypeRecord() {
}
IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
- const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
- Env::IOPriority rate_limiter_priority) {
+ const WriteOptions& write_options,
+ const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz) {
std::vector<std::pair<uint32_t, size_t>> ts_sz_to_record;
for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) {
if (recorded_cf_to_ts_sz_.count(cf_id) != 0) {
@@ -219,14 +238,14 @@ IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
record.EncodeTo(&encoded);
RecordType type = recycle_log_files_ ? kRecyclableUserDefinedTimestampSizeType
: kUserDefinedTimestampSizeType;
- return EmitPhysicalRecord(type, encoded.data(), encoded.size(),
- rate_limiter_priority);
+ return EmitPhysicalRecord(write_options, type, encoded.data(),
+ encoded.size());
}
bool Writer::BufferIsEmpty() { return dest_->BufferIsEmpty(); }
-IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n,
- Env::IOPriority rate_limiter_priority) {
+IOStatus Writer::EmitPhysicalRecord(const WriteOptions& write_options,
+ RecordType t, const char* ptr, size_t n) {
assert(n <= 0xffff); // Must fit in two bytes
size_t header_size;
@@ -266,10 +285,13 @@ IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n,
EncodeFixed32(buf, crc);
// Write the header and the payload
- IOStatus s = dest_->Append(Slice(buf, header_size), 0 /* crc32c_checksum */,
- rate_limiter_priority);
+ IOOptions opts;
+ IOStatus s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ s = dest_->Append(opts, Slice(buf, header_size), 0 /* crc32c_checksum */);
+ }
if (s.ok()) {
- s = dest_->Append(Slice(ptr, n), payload_crc, rate_limiter_priority);
+ s = dest_->Append(opts, Slice(ptr, n), payload_crc);
}
block_offset_ += header_size + n;
return s;
diff --git a/db/log_writer.h b/db/log_writer.h
index 7a64a8560..1bbf72569 100644
--- a/db/log_writer.h
+++ b/db/log_writer.h
@@ -86,9 +86,8 @@ class Writer {
~Writer();
- IOStatus AddRecord(const Slice& slice,
- Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
- IOStatus AddCompressionTypeRecord();
+ IOStatus AddRecord(const WriteOptions& write_options, const Slice& slice);
+ IOStatus AddCompressionTypeRecord(const WriteOptions& write_options);
// If there are column families in `cf_to_ts_sz` not included in
// `recorded_cf_to_ts_sz_` and its user-defined timestamp size is non-zero,
@@ -96,17 +95,17 @@ class Writer {
// kRecyclableUserDefinedTimestampSizeType for these column families.
// This timestamp size record applies to all subsequent records.
IOStatus MaybeAddUserDefinedTimestampSizeRecord(
- const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
- Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
+ const WriteOptions& write_options,
+ const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz);
WritableFileWriter* file() { return dest_.get(); }
const WritableFileWriter* file() const { return dest_.get(); }
uint64_t get_log_number() const { return log_number_; }
- IOStatus WriteBuffer();
+ IOStatus WriteBuffer(const WriteOptions& write_options);
- IOStatus Close();
+ IOStatus Close(const WriteOptions& write_options);
bool BufferIsEmpty();
@@ -121,9 +120,8 @@ class Writer {
// record type stored in the header.
uint32_t type_crc_[kMaxRecordType + 1];
- IOStatus EmitPhysicalRecord(
- RecordType type, const char* ptr, size_t length,
- Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
+ IOStatus EmitPhysicalRecord(const WriteOptions& write_options,
+ RecordType type, const char* ptr, size_t length);
// If true, it does not flush after each write. Instead it relies on the upper
// layer to manually does the flush by calling ::WriteBuffer()
diff --git a/db/memtable.cc b/db/memtable.cc
index 0b8786bc2..56679dd75 100644
--- a/db/memtable.cc
+++ b/db/memtable.cc
@@ -597,7 +597,7 @@ void MemTable::ConstructFragmentedRangeTombstones() {
assert(!IsFragmentedRangeTombstonesConstructed(false));
// There should be no concurrent Construction
if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
auto* unfragmented_iter =
new MemTableIterator(*this, ReadOptions(), nullptr /* arena */,
true /* use_range_del_table */);
diff --git a/db/memtable_list.cc b/db/memtable_list.cc
index dfa93461b..a65d3914b 100644
--- a/db/memtable_list.cc
+++ b/db/memtable_list.cc
@@ -502,6 +502,7 @@ Status MemTableList::TryInstallMemtableFlushResults(
mu->AssertHeld();
const ReadOptions read_options(Env::IOActivity::kFlush);
+ const WriteOptions write_options(Env::IOActivity::kFlush);
// Flush was successful
// Record the status on the memtable object. Either this call or a call by a
@@ -614,10 +615,10 @@ Status MemTableList::TryInstallMemtableFlushResults(
};
if (write_edits) {
// this can release and reacquire the mutex.
- s = vset->LogAndApply(cfd, mutable_cf_options, read_options, edit_list,
- mu, db_directory, /*new_descriptor_log=*/false,
- /*column_family_options=*/nullptr,
- manifest_write_cb);
+ s = vset->LogAndApply(
+ cfd, mutable_cf_options, read_options, write_options, edit_list, mu,
+ db_directory, /*new_descriptor_log=*/false,
+ /*column_family_options=*/nullptr, manifest_write_cb);
} else {
// If write_edit is false (e.g: successful mempurge),
// then remove old memtables, wake up manifest write queue threads,
@@ -835,6 +836,7 @@ Status InstallMemtableAtomicFlushResults(
mu->AssertHeld();
const ReadOptions read_options(Env::IOActivity::kFlush);
+ const WriteOptions write_options(Env::IOActivity::kFlush);
size_t num = mems_list.size();
assert(cfds.size() == num);
@@ -913,8 +915,8 @@ Status InstallMemtableAtomicFlushResults(
}
// this can release and reacquire the mutex.
- s = vset->LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists,
- mu, db_directory);
+ s = vset->LogAndApply(cfds, mutable_cf_options_list, read_options,
+ write_options, edit_lists, mu, db_directory);
for (size_t k = 0; k != cfds.size(); ++k) {
auto* imm = (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k);
diff --git a/db/repair.cc b/db/repair.cc
index 7585d9758..bf409e22a 100644
--- a/db/repair.cc
+++ b/db/repair.cc
@@ -146,8 +146,10 @@ class Repairer {
// Adds a column family to the VersionSet with cf_options_ and updates
// manifest.
Status AddColumnFamily(const std::string& cf_name, uint32_t cf_id) {
- // TODO: plumb Env::IOActivity;
+ // TODO: plumb Env::IOActivity, Env::IOPriority;
const ReadOptions read_options;
+ const WriteOptions write_options;
+
const auto* cf_opts = GetColumnFamilyOptions(cf_name);
if (cf_opts == nullptr) {
return Status::Corruption("Encountered unknown column family with name=" +
@@ -170,9 +172,9 @@ class Repairer {
Status status = env_->GetFileSystem()->NewDirectory(dbname_, IOOptions(),
&db_dir, nullptr);
if (status.ok()) {
- status = vset_.LogAndApply(cfd, mut_cf_opts, read_options, &edit, &mutex_,
- db_dir.get(), false /* new_descriptor_log */,
- cf_opts);
+ status = vset_.LogAndApply(cfd, mut_cf_opts, read_options, write_options,
+ &edit, &mutex_, db_dir.get(),
+ false /* new_descriptor_log */, cf_opts);
}
mutex_.Unlock();
return status;
@@ -362,9 +364,6 @@ class Repairer {
}
};
- // TODO: plumb Env::IOActivity
- const ReadOptions read_options;
-
// Open the log file
std::string logname = LogFileName(wal_dir, log);
const auto& fs = env_->GetFileSystem();
@@ -440,7 +439,7 @@ class Repairer {
FileMetaData meta;
meta.fd = FileDescriptor(next_file_number_++, 0, 0);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
ro.total_order_seek = true;
Arena arena;
@@ -463,26 +462,29 @@ class Repairer {
IOStatus io_s;
CompressionOptions default_compression;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ const ReadOptions read_options;
+ const WriteOptions write_option(Env::IO_HIGH);
TableBuilderOptions tboptions(
- *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(),
- cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(),
- kNoCompression, default_compression, cfd->GetID(), cfd->GetName(),
- -1 /* level */, false /* is_bottommost */,
- TableFileCreationReason::kRecovery, 0 /* oldest_key_time */,
- 0 /* file_creation_time */, "DB Repairer" /* db_id */, db_session_id_,
- 0 /*target_file_size*/, meta.fd.GetNumber());
+ *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(), read_options,
+ write_option, cfd->internal_comparator(),
+ cfd->int_tbl_prop_collector_factories(), kNoCompression,
+ default_compression, cfd->GetID(), cfd->GetName(), -1 /* level */,
+ false /* is_bottommost */, TableFileCreationReason::kRecovery,
+ 0 /* oldest_key_time */, 0 /* file_creation_time */,
+ "DB Repairer" /* db_id */, db_session_id_, 0 /*target_file_size*/,
+ meta.fd.GetNumber());
SeqnoToTimeMapping empty_seqno_to_time_mapping;
status = BuildTable(
dbname_, /* versions */ nullptr, immutable_db_options_, tboptions,
- file_options_, read_options, table_cache_.get(), iter.get(),
+ file_options_, table_cache_.get(), iter.get(),
std::move(range_del_iters), &meta, nullptr /* blob_file_additions */,
{}, kMaxSequenceNumber, kMaxSequenceNumber, snapshot_checker,
false /* paranoid_file_checks*/, nullptr /* internal_stats */, &io_s,
nullptr /*IOTracer*/, BlobFileCreationReason::kRecovery,
empty_seqno_to_time_mapping, nullptr /* event_logger */,
- 0 /* job_id */, Env::IO_HIGH, nullptr /* table_properties */,
- write_hint);
+ 0 /* job_id */, nullptr /* table_properties */, write_hint);
ROCKS_LOG_INFO(db_options_.info_log,
"Log #%" PRIu64 ": %d ops saved to Table #%" PRIu64 " %s",
log, counter, meta.fd.GetNumber(),
@@ -529,7 +531,7 @@ class Repairer {
file_size);
std::shared_ptr<const TableProperties> props;
if (status.ok()) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
status = table_cache_->GetTableProperties(
file_options_, read_options, icmp_, t->meta, &props,
@@ -592,7 +594,7 @@ class Repairer {
}
}
if (status.ok()) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ropts;
ropts.total_order_seek = true;
InternalIterator* iter = table_cache_->NewIterator(
@@ -641,7 +643,7 @@ class Repairer {
// an SST file is a full sorted run. This probably needs the extra logic
// from compaction_job.cc around call to UpdateBoundariesForRange (to
// handle range tombstones extendingg beyond range of other entries).
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ropts;
std::unique_ptr<FragmentedRangeTombstoneIterator> r_iter;
status = table_cache_->GetRangeTombstoneIterator(
@@ -666,8 +668,10 @@ class Repairer {
}
Status AddTables() {
- // TODO: plumb Env::IOActivity;
+ // TODO: plumb Env::IOActivity, Env::IOPriority;
const ReadOptions read_options;
+ const WriteOptions write_options;
+
std::unordered_map<uint32_t, std::vector<const TableInfo*>> cf_id_to_tables;
SequenceNumber max_sequence = 0;
for (size_t i = 0; i < tables_.size(); i++) {
@@ -755,8 +759,8 @@ class Repairer {
nullptr);
if (s.ok()) {
s = vset_.LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
- read_options, &edit, &mutex_, db_dir.get(),
- false /* new_descriptor_log */);
+ read_options, write_options, &edit, &mutex_,
+ db_dir.get(), false /* new_descriptor_log */);
}
mutex_.Unlock();
}
diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc
index e10f02e67..6bf265c2b 100644
--- a/db/table_properties_collector_test.cc
+++ b/db/table_properties_collector_test.cc
@@ -52,10 +52,13 @@ void MakeBuilder(
std::unique_ptr<FSWritableFile> wf(new test::StringSink);
writable->reset(
new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions()));
+ const ReadOptions read_options;
+ const WriteOptions write_options;
TableBuilderOptions tboptions(
- ioptions, moptions, internal_comparator, int_tbl_prop_collector_factories,
- options.compression, options.compression_opts, kTestColumnFamilyId,
- kTestColumnFamilyName, kTestLevel);
+ ioptions, moptions, read_options, write_options, internal_comparator,
+ int_tbl_prop_collector_factories, options.compression,
+ options.compression_opts, kTestColumnFamilyId, kTestColumnFamilyName,
+ kTestLevel);
builder->reset(NewTableBuilder(tboptions, writable->get()));
}
} // namespace
@@ -280,7 +283,7 @@ void TestCustomizedTablePropertiesCollector(
builder->Add(ikey.Encode(), kv.second);
}
ASSERT_OK(builder->Finish());
- ASSERT_OK(writer->Flush());
+ ASSERT_OK(writer->Flush(IOOptions()));
// -- Step 2: Read properties
test::StringSink* fwf =
@@ -419,7 +422,7 @@ void TestInternalKeyPropertiesCollector(
}
ASSERT_OK(builder->Finish());
- ASSERT_OK(writable->Flush());
+ ASSERT_OK(writable->Flush(IOOptions()));
test::StringSink* fwf =
static_cast<test::StringSink*>(writable->writable_file());
diff --git a/db/version_set.cc b/db/version_set.cc
index 72febac90..d41f87982 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -1623,7 +1623,7 @@ Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
std::stringstream ss;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
for (int level = 0; level < storage_info_.num_levels_; level++) {
for (const auto& file_meta : storage_info_.files_[level]) {
@@ -5113,7 +5113,7 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
std::string manifest_file_name =
DescriptorFileName(dbname_, manifest_file_number_);
uint64_t size = 0;
- IOStatus io_s = descriptor_log_->Close();
+ IOStatus io_s = descriptor_log_->Close(WriteOptions());
descriptor_log_.reset();
TEST_SYNC_POINT("VersionSet::Close:AfterClose");
if (io_s.ok()) {
@@ -5146,7 +5146,8 @@ Status VersionSet::Close(FSDirectory* db_dir, InstrumentedMutex* mu) {
VersionEdit edit;
assert(cfd);
const MutableCFOptions& cf_opts = *cfd->GetLatestMutableCFOptions();
- s = LogAndApply(cfd, cf_opts, ReadOptions(), &edit, mu, db_dir);
+ s = LogAndApply(cfd, cf_opts, ReadOptions(), WriteOptions(), &edit, mu,
+ db_dir);
}
closed_ = true;
@@ -5230,8 +5231,8 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
Status VersionSet::ProcessManifestWrites(
std::deque<ManifestWriter>& writers, InstrumentedMutex* mu,
FSDirectory* dir_contains_current_file, bool new_descriptor_log,
- const ColumnFamilyOptions* new_cf_options,
- const ReadOptions& read_options) {
+ const ColumnFamilyOptions* new_cf_options, const ReadOptions& read_options,
+ const WriteOptions& write_options) {
mu->AssertHeld();
assert(!writers.empty());
ManifestWriter& first_writer = writers.front();
@@ -5505,13 +5506,15 @@ Status VersionSet::ProcessManifestWrites(
FileTypeSet tmp_set = db_options_->checksum_handoff_file_types;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(descriptor_file), descriptor_fname, opt_file_opts, clock_,
- io_tracer_, nullptr, db_options_->listeners, nullptr,
+ io_tracer_, nullptr, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */,
+ db_options_->listeners, nullptr,
tmp_set.Contains(FileType::kDescriptorFile),
tmp_set.Contains(FileType::kDescriptorFile)));
descriptor_log_.reset(
new log::Writer(std::move(file_writer), 0, false));
- s = WriteCurrentStateToManifest(curr_state, wal_additions,
- descriptor_log_.get(), io_s);
+ s = WriteCurrentStateToManifest(write_options, curr_state,
+ wal_additions, descriptor_log_.get(),
+ io_s);
} else {
manifest_io_status = io_s;
s = io_s;
@@ -5555,7 +5558,7 @@ Status VersionSet::ProcessManifestWrites(
}
++idx;
#endif /* !NDEBUG */
- io_s = descriptor_log_->AddRecord(record);
+ io_s = descriptor_log_->AddRecord(write_options, record);
if (!io_s.ok()) {
s = io_s;
manifest_io_status = io_s;
@@ -5564,7 +5567,8 @@ Status VersionSet::ProcessManifestWrites(
}
if (s.ok()) {
- io_s = SyncManifest(db_options_, descriptor_log_->file());
+ io_s =
+ SyncManifest(db_options_, write_options, descriptor_log_->file());
manifest_io_status = io_s;
TEST_SYNC_POINT_CALLBACK(
"VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s);
@@ -5582,7 +5586,8 @@ Status VersionSet::ProcessManifestWrites(
assert(manifest_io_status.ok());
}
if (s.ok() && new_descriptor_log) {
- io_s = SetCurrentFile(fs_.get(), dbname_, pending_manifest_file_number_,
+ io_s = SetCurrentFile(write_options, fs_.get(), dbname_,
+ pending_manifest_file_number_,
dir_contains_current_file);
if (!io_s.ok()) {
s = io_s;
@@ -5822,7 +5827,7 @@ void VersionSet::WakeUpWaitingManifestWriters() {
Status VersionSet::LogAndApply(
const autovector<ColumnFamilyData*>& column_family_datas,
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
- const ReadOptions& read_options,
+ const ReadOptions& read_options, const WriteOptions& write_options,
const autovector<autovector<VersionEdit*>>& edit_lists,
InstrumentedMutex* mu, FSDirectory* dir_contains_current_file,
bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options,
@@ -5900,8 +5905,8 @@ Status VersionSet::LogAndApply(
return Status::ColumnFamilyDropped();
}
return ProcessManifestWrites(writers, mu, dir_contains_current_file,
- new_descriptor_log, new_cf_options,
- read_options);
+ new_descriptor_log, new_cf_options, read_options,
+ write_options);
}
void VersionSet::LogAndApplyCFHelper(VersionEdit* edit,
@@ -6238,7 +6243,7 @@ Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
Status VersionSet::ListColumnFamiliesFromManifest(
const std::string& manifest_path, FileSystem* fs,
std::vector<std::string>* column_families) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
std::unique_ptr<SequentialFileReader> file_reader;
Status s;
@@ -6282,8 +6287,9 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
"Number of levels needs to be bigger than 1");
}
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
ImmutableDBOptions db_options(*options);
ColumnFamilyOptions cf_options(*options);
@@ -6373,8 +6379,8 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
InstrumentedMutex dummy_mutex;
InstrumentedMutexLock l(&dummy_mutex);
return versions.LogAndApply(versions.GetColumnFamilySet()->GetDefault(),
- mutable_cf_options, read_options, &ve,
- &dummy_mutex, nullptr, true);
+ mutable_cf_options, read_options, write_options,
+ &ve, &dummy_mutex, nullptr, true);
}
// Get the checksum information including the checksum and checksum function
@@ -6448,7 +6454,7 @@ Status VersionSet::DumpManifest(
Options& options, std::string& dscname, bool verbose, bool hex, bool json,
const std::vector<ColumnFamilyDescriptor>& cf_descs) {
assert(options.env);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
std::vector<std::string> column_families;
@@ -6515,6 +6521,7 @@ void VersionSet::MarkMinLogNumberToKeep(uint64_t number) {
}
Status VersionSet::WriteCurrentStateToManifest(
+ const WriteOptions& write_options,
const std::unordered_map<uint32_t, MutableCFState>& curr_state,
const VersionEdit& wal_additions, log::Writer* log, IOStatus& io_s) {
// TODO: Break up into multiple records to reduce memory usage on recovery?
@@ -6535,7 +6542,7 @@ Status VersionSet::WriteCurrentStateToManifest(
return Status::Corruption("Unable to Encode VersionEdit:" +
edit_for_db_id.DebugString(true));
}
- io_s = log->AddRecord(db_id_record);
+ io_s = log->AddRecord(write_options, db_id_record);
if (!io_s.ok()) {
return io_s;
}
@@ -6550,7 +6557,7 @@ Status VersionSet::WriteCurrentStateToManifest(
return Status::Corruption("Unable to Encode VersionEdit: " +
wal_additions.DebugString(true));
}
- io_s = log->AddRecord(record);
+ io_s = log->AddRecord(write_options, record);
if (!io_s.ok()) {
return io_s;
}
@@ -6567,7 +6574,7 @@ Status VersionSet::WriteCurrentStateToManifest(
return Status::Corruption("Unable to Encode VersionEdit: " +
wal_deletions.DebugString(true));
}
- io_s = log->AddRecord(wal_deletions_record);
+ io_s = log->AddRecord(write_options, wal_deletions_record);
if (!io_s.ok()) {
return io_s;
}
@@ -6597,7 +6604,7 @@ Status VersionSet::WriteCurrentStateToManifest(
return Status::Corruption("Unable to Encode VersionEdit:" +
edit.DebugString(true));
}
- io_s = log->AddRecord(record);
+ io_s = log->AddRecord(write_options, record);
if (!io_s.ok()) {
return io_s;
}
@@ -6679,7 +6686,7 @@ Status VersionSet::WriteCurrentStateToManifest(
return Status::Corruption("Unable to Encode VersionEdit:" +
edit.DebugString(true));
}
- io_s = log->AddRecord(record);
+ io_s = log->AddRecord(write_options, record);
if (!io_s.ok()) {
return io_s;
}
diff --git a/db/version_set.h b/db/version_set.h
index d99edfd6c..d73a53560 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -1170,14 +1170,15 @@ class VersionSet {
virtual Status Close(FSDirectory* db_dir, InstrumentedMutex* mu);
Status LogAndApplyToDefaultColumnFamily(
- const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu,
+ const ReadOptions& read_options, const WriteOptions& write_options,
+ VersionEdit* edit, InstrumentedMutex* mu,
FSDirectory* dir_contains_current_file, bool new_descriptor_log = false,
const ColumnFamilyOptions* column_family_options = nullptr) {
ColumnFamilyData* default_cf = GetColumnFamilySet()->GetDefault();
const MutableCFOptions* cf_options =
default_cf->GetLatestMutableCFOptions();
- return LogAndApply(default_cf, *cf_options, read_options, edit, mu,
- dir_contains_current_file, new_descriptor_log,
+ return LogAndApply(default_cf, *cf_options, read_options, write_options,
+ edit, mu, dir_contains_current_file, new_descriptor_log,
column_family_options);
}
@@ -1190,7 +1191,8 @@ class VersionSet {
Status LogAndApply(
ColumnFamilyData* column_family_data,
const MutableCFOptions& mutable_cf_options,
- const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu,
+ const ReadOptions& read_options, const WriteOptions& write_options,
+ VersionEdit* edit, InstrumentedMutex* mu,
FSDirectory* dir_contains_current_file, bool new_descriptor_log = false,
const ColumnFamilyOptions* column_family_options = nullptr,
const std::function<void(const Status&)>& manifest_wcb = {}) {
@@ -1202,16 +1204,17 @@ class VersionSet {
autovector<VersionEdit*> edit_list;
edit_list.emplace_back(edit);
edit_lists.emplace_back(edit_list);
- return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists,
- mu, dir_contains_current_file, new_descriptor_log,
- column_family_options, {manifest_wcb});
+ return LogAndApply(cfds, mutable_cf_options_list, read_options,
+ write_options, edit_lists, mu, dir_contains_current_file,
+ new_descriptor_log, column_family_options,
+ {manifest_wcb});
}
// The batch version. If edit_list.size() > 1, caller must ensure that
// no edit in the list column family add or drop
Status LogAndApply(
ColumnFamilyData* column_family_data,
const MutableCFOptions& mutable_cf_options,
- const ReadOptions& read_options,
+ const ReadOptions& read_options, const WriteOptions& write_options,
const autovector<VersionEdit*>& edit_list, InstrumentedMutex* mu,
FSDirectory* dir_contains_current_file, bool new_descriptor_log = false,
const ColumnFamilyOptions* column_family_options = nullptr,
@@ -1222,9 +1225,10 @@ class VersionSet {
mutable_cf_options_list.emplace_back(&mutable_cf_options);
autovector<autovector<VersionEdit*>> edit_lists;
edit_lists.emplace_back(edit_list);
- return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists,
- mu, dir_contains_current_file, new_descriptor_log,
- column_family_options, {manifest_wcb});
+ return LogAndApply(cfds, mutable_cf_options_list, read_options,
+ write_options, edit_lists, mu, dir_contains_current_file,
+ new_descriptor_log, column_family_options,
+ {manifest_wcb});
}
// The across-multi-cf batch version. If edit_lists contain more than
@@ -1233,7 +1237,7 @@ class VersionSet {
virtual Status LogAndApply(
const autovector<ColumnFamilyData*>& cfds,
const autovector<const MutableCFOptions*>& mutable_cf_options_list,
- const ReadOptions& read_options,
+ const ReadOptions& read_options, const WriteOptions& write_options,
const autovector<autovector<VersionEdit*>>& edit_lists,
InstrumentedMutex* mu, FSDirectory* dir_contains_current_file,
bool new_descriptor_log = false,
@@ -1547,6 +1551,7 @@ class VersionSet {
new Version(cfd, this, file_options_, mutable_cf_options, io_tracer_);
constexpr bool update_stats = false;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
version->PrepareAppend(mutable_cf_options, read_options, update_stats);
AppendVersion(cfd, version);
@@ -1595,6 +1600,7 @@ class VersionSet {
// Save current contents to *log
Status WriteCurrentStateToManifest(
+ const WriteOptions& write_options,
const std::unordered_map<uint32_t, MutableCFState>& curr_state,
const VersionEdit& wal_additions, log::Writer* log, IOStatus& io_s);
@@ -1688,7 +1694,8 @@ class VersionSet {
FSDirectory* dir_contains_current_file,
bool new_descriptor_log,
const ColumnFamilyOptions* new_cf_options,
- const ReadOptions& read_options);
+ const ReadOptions& read_options,
+ const WriteOptions& write_options);
void LogAndApplyCFHelper(VersionEdit* edit,
SequenceNumber* max_last_sequence);
@@ -1747,7 +1754,7 @@ class ReactiveVersionSet : public VersionSet {
private:
std::unique_ptr<ManifestTailer> manifest_tailer_;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options_;
using VersionSet::LogAndApply;
using VersionSet::Recover;
@@ -1756,6 +1763,7 @@ class ReactiveVersionSet : public VersionSet {
const autovector<ColumnFamilyData*>& /*cfds*/,
const autovector<const MutableCFOptions*>& /*mutable_cf_options_list*/,
const ReadOptions& /* read_options */,
+ const WriteOptions& /* write_options */,
const autovector<autovector<VersionEdit*>>& /*edit_lists*/,
InstrumentedMutex* /*mu*/, FSDirectory* /*dir_contains_current_file*/,
bool /*new_descriptor_log*/, const ColumnFamilyOptions* /*new_cf_option*/,
diff --git a/db/version_set_test.cc b/db/version_set_test.cc
index 390d355e4..b16ffd035 100644
--- a/db/version_set_test.cc
+++ b/db/version_set_test.cc
@@ -1322,11 +1322,11 @@ class VersionSetTestBase {
log_writer->reset(new log::Writer(std::move(file_writer), 0, false));
std::string record;
new_db.EncodeTo(&record);
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
for (const auto& e : new_cfs) {
record.clear();
e.EncodeTo(&record);
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
}
@@ -1342,11 +1342,11 @@ class VersionSetTestBase {
void NewDB() {
SequenceNumber last_seqno;
std::unique_ptr<log::Writer> log_writer;
- ASSERT_OK(SetIdentityFile(env_, dbname_));
+ ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
PrepareManifest(&column_families_, &last_seqno, &log_writer);
log_writer.reset();
// Make "CURRENT" file point to the new manifest file.
- Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
EXPECT_OK(versions_->Recover(column_families_, false));
@@ -1392,7 +1392,7 @@ class VersionSetTestBase {
mutex_.Lock();
Status s = versions_->LogAndApply(
versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_,
- read_options_, &edit, &mutex_, nullptr);
+ read_options_, write_options_, &edit, &mutex_, nullptr);
mutex_.Unlock();
return s;
}
@@ -1406,7 +1406,7 @@ class VersionSetTestBase {
mutex_.Lock();
Status s = versions_->LogAndApply(
versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_,
- read_options_, vedits, &mutex_, nullptr);
+ read_options_, write_options_, vedits, &mutex_, nullptr);
mutex_.Unlock();
return s;
}
@@ -1418,7 +1418,8 @@ class VersionSetTestBase {
VersionEdit dummy;
ASSERT_OK(versions_->LogAndApply(
versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_,
- read_options_, &dummy, &mutex_, db_directory, new_descriptor_log));
+ read_options_, write_options_, &dummy, &mutex_, db_directory,
+ new_descriptor_log));
mutex_.Unlock();
}
@@ -1436,7 +1437,7 @@ class VersionSetTestBase {
mutex_.Lock();
s = versions_->LogAndApply(/*column_family_data=*/nullptr,
MutableCFOptions(cf_options), read_options_,
- &new_cf, &mutex_,
+ write_options_, &new_cf, &mutex_,
/*db_directory=*/nullptr,
/*new_descriptor_log=*/false, &cf_options);
mutex_.Unlock();
@@ -1459,6 +1460,8 @@ class VersionSetTestBase {
ImmutableOptions immutable_options_;
MutableCFOptions mutable_cf_options_;
const ReadOptions read_options_;
+ const WriteOptions write_options_;
+
std::shared_ptr<Cache> table_cache_;
WriteController write_controller_;
WriteBufferManager write_buffer_manager_;
@@ -1483,6 +1486,7 @@ TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) {
NewDB();
const int kGroupSize = 5;
const ReadOptions read_options;
+ const WriteOptions write_options;
autovector<VersionEdit> edits;
for (int i = 0; i != kGroupSize; ++i) {
@@ -1510,8 +1514,9 @@ TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) {
});
SyncPoint::GetInstance()->EnableProcessing();
mutex_.Lock();
- Status s = versions_->LogAndApply(cfds, all_mutable_cf_options, read_options,
- edit_lists, &mutex_, nullptr);
+ Status s =
+ versions_->LogAndApply(cfds, all_mutable_cf_options, read_options,
+ write_options, edit_lists, &mutex_, nullptr);
mutex_.Unlock();
EXPECT_OK(s);
EXPECT_EQ(kGroupSize - 1, count);
@@ -1713,7 +1718,7 @@ TEST_F(VersionSetTest, ObsoleteBlobFile) {
mutex_.Lock();
Status s = versions_->LogAndApply(
versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_,
- read_options_, &edit, &mutex_, nullptr);
+ read_options_, write_options_, &edit, &mutex_, nullptr);
mutex_.Unlock();
ASSERT_OK(s);
@@ -2454,7 +2459,8 @@ class VersionSetWithTimestampTest : public VersionSetTest {
Status s;
mutex_.Lock();
s = versions_->LogAndApply(cfd_, *(cfd_->GetLatestMutableCFOptions()),
- read_options_, edits_, &mutex_, nullptr);
+ read_options_, write_options_, edits_, &mutex_,
+ nullptr);
mutex_.Unlock();
ASSERT_OK(s);
VerifyFullHistoryTsLow(*std::max_element(ts_lbs.begin(), ts_lbs.end()));
@@ -2514,7 +2520,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
edits_[i].MarkAtomicGroup(--remaining);
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
+ ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
}
void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) {
@@ -2526,7 +2532,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
edits_[i].MarkAtomicGroup(--remaining);
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
+ ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
}
void SetupCorruptedAtomicGroup(int atomic_group_size) {
@@ -2540,7 +2546,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
}
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
+ ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
}
void SetupIncorrectAtomicGroup(int atomic_group_size) {
@@ -2556,7 +2562,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
}
edits_[i].SetLastSequence(last_seqno_++);
}
- ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr));
+ ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr));
}
void SetupTestSyncPoints() {
@@ -2602,7 +2608,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase,
for (int i = 0; i < num_edits; i++) {
std::string record;
edits_[i].EncodeTo(&record);
- ASSERT_OK(log_writer_->AddRecord(record));
+ ASSERT_OK(log_writer_->AddRecord(WriteOptions(), record));
}
}
@@ -2724,7 +2730,7 @@ TEST_F(VersionSetAtomicGroupTest,
// edits.
std::string last_record;
edits_[kAtomicGroupSize - 1].EncodeTo(&last_record);
- EXPECT_OK(log_writer_->AddRecord(last_record));
+ EXPECT_OK(log_writer_->AddRecord(WriteOptions(), last_record));
InstrumentedMutex mu;
std::unordered_set<ColumnFamilyData*> cfds_changed;
mu.Lock();
@@ -2896,12 +2902,13 @@ class VersionSetTestDropOneCF : public VersionSetTestBase,
// last column family in an atomic group.
TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
const ReadOptions read_options;
+ const WriteOptions write_options;
std::vector<ColumnFamilyDescriptor> column_families;
SequenceNumber last_seqno;
std::unique_ptr<log::Writer> log_writer;
PrepareManifest(&column_families, &last_seqno, &log_writer);
- Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
EXPECT_OK(versions_->Recover(column_families, false /* read_only */));
@@ -2924,9 +2931,9 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
cfd_to_drop->Ref();
drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID());
mutex_.Lock();
- s = versions_->LogAndApply(cfd_to_drop,
- *cfd_to_drop->GetLatestMutableCFOptions(),
- read_options, &drop_cf_edit, &mutex_, nullptr);
+ s = versions_->LogAndApply(
+ cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), read_options,
+ write_options, &drop_cf_edit, &mutex_, nullptr);
mutex_.Unlock();
ASSERT_OK(s);
@@ -2976,7 +2983,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) {
SyncPoint::GetInstance()->EnableProcessing();
mutex_.Lock();
s = versions_->LogAndApply(cfds, mutable_cf_options_list, read_options,
- edit_lists, &mutex_, nullptr);
+ write_options, edit_lists, &mutex_, nullptr);
mutex_.Unlock();
ASSERT_OK(s);
ASSERT_EQ(1, called);
@@ -3010,7 +3017,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase,
log_writer->reset(new log::Writer(std::move(file_writer), 0, true));
std::string record;
ASSERT_TRUE(new_db.EncodeTo(&record));
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
// Create new column family
VersionEdit new_cf;
@@ -3020,7 +3027,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase,
new_cf.SetNextFile(2);
record.clear();
ASSERT_TRUE(new_cf.EncodeTo(&record));
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
@@ -3034,8 +3041,8 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase,
TEST_F(EmptyDefaultCfNewManifest, Recover) {
PrepareManifest(nullptr, nullptr, &log_writer_);
log_writer_.reset();
- Status s =
- SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ /* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3066,7 +3073,7 @@ class VersionSetTestEmptyDb
assert(nullptr != log_writer);
VersionEdit new_db;
if (db_options_.write_dbid_to_manifest) {
- ASSERT_OK(SetIdentityFile(env_, dbname_));
+ ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_));
DBOptions tmp_db_options;
tmp_db_options.env = env_;
std::unique_ptr<DBImpl> impl(new DBImpl(tmp_db_options, dbname_));
@@ -3085,7 +3092,7 @@ class VersionSetTestEmptyDb
log_writer->reset(new log::Writer(std::move(file_writer), 0, false));
std::string record;
new_db.EncodeTo(&record);
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
}
@@ -3099,8 +3106,8 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) {
db_options_.write_dbid_to_manifest = std::get<0>(GetParam());
PrepareManifest(nullptr, nullptr, &log_writer_);
log_writer_.reset();
- Status s =
- SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ /* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path;
@@ -3140,11 +3147,12 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) {
{
std::string record;
new_cf1.EncodeTo(&record);
- s = log_writer_->AddRecord(record);
+ s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ /* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path;
@@ -3187,11 +3195,12 @@ TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) {
new_cf.SetColumnFamily(cf_id++);
std::string record;
ASSERT_TRUE(new_cf.EncodeTo(&record));
- s = log_writer_->AddRecord(record);
+ s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ /* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path;
@@ -3234,7 +3243,7 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) {
new_cf.SetColumnFamily(cf_id++);
std::string record;
ASSERT_TRUE(new_cf.EncodeTo(&record));
- s = log_writer_->AddRecord(record);
+ s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
{
@@ -3245,11 +3254,12 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) {
tmp_edit.SetLastSequence(0);
std::string record;
ASSERT_TRUE(tmp_edit.EncodeTo(&record));
- s = log_writer_->AddRecord(record);
+ s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ /* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path;
@@ -3292,7 +3302,7 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) {
new_cf.SetColumnFamily(cf_id++);
std::string record;
ASSERT_TRUE(new_cf.EncodeTo(&record));
- s = log_writer_->AddRecord(record);
+ s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
{
@@ -3302,11 +3312,12 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) {
tmp_edit.SetLastSequence(0);
std::string record;
ASSERT_TRUE(tmp_edit.EncodeTo(&record));
- s = log_writer_->AddRecord(record);
+ s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
log_writer_.reset();
- s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr);
+ s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1,
+ /* dir_contains_current_file */ nullptr);
ASSERT_OK(s);
std::string manifest_path;
@@ -3407,7 +3418,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
{
std::string record;
ASSERT_TRUE(new_db.EncodeTo(&record));
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
const std::vector<std::string> cf_names = {
@@ -3425,7 +3436,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
new_cf.SetColumnFamily(cf_id);
std::string record;
ASSERT_TRUE(new_cf.EncodeTo(&record));
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
VersionEdit cf_files;
@@ -3433,7 +3444,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
cf_files.SetLogNumber(0);
record.clear();
ASSERT_TRUE(cf_files.EncodeTo(&record));
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
++cf_id;
}
@@ -3444,7 +3455,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
edit.SetLastSequence(seq);
std::string record;
ASSERT_TRUE(edit.EncodeTo(&record));
- s = (*log_writer)->AddRecord(record);
+ s = (*log_writer)->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
*last_seqno = seq + 1;
@@ -3485,9 +3496,12 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
std::move(file), fname, FileOptions(), env_->GetSystemClock().get()));
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(table_factory_->NewTableBuilder(
TableBuilderOptions(
- immutable_options_, mutable_cf_options_, *internal_comparator_,
+ immutable_options_, mutable_cf_options_, read_options,
+ write_options, *internal_comparator_,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(),
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
@@ -3496,7 +3510,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
InternalKey ikey(info.key, 0, ValueType::kTypeValue);
builder->Add(ikey.Encode(), "value");
ASSERT_OK(builder->Finish());
- ASSERT_OK(fwriter->Flush());
+ ASSERT_OK(fwriter->Flush(IOOptions()));
uint64_t file_size = 0;
s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr);
ASSERT_OK(s);
@@ -3528,7 +3542,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
assert(log_writer_.get() != nullptr);
std::string record;
ASSERT_TRUE(edit.EncodeTo(&record, 0 /* ts_sz */));
- Status s = log_writer_->AddRecord(record);
+ Status s = log_writer_->AddRecord(WriteOptions(), record);
ASSERT_OK(s);
}
@@ -3573,7 +3587,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
log_writer_.reset();
- Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3631,7 +3645,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, added_files, std::vector<std::pair<int, uint64_t>>());
log_writer_.reset();
- Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
std::string manifest_path;
VerifyManifest(&manifest_path);
@@ -3685,7 +3699,7 @@ TEST_F(VersionSetTestMissingFiles, NoFileMissing) {
WriteFileAdditionAndDeletionToManifest(
/*cf=*/0, std::vector<std::pair<int, FileMetaData>>(), deleted_files);
log_writer_.reset();
- Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr);
+ Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr);
ASSERT_OK(s);
std::string manifest_path;
VerifyManifest(&manifest_path);
diff --git a/db/version_util.h b/db/version_util.h
index ca2e7a377..e499b9e2e 100644
--- a/db/version_util.h
+++ b/db/version_util.h
@@ -36,15 +36,17 @@ class OfflineManifestWriter {
/*no_error_if_files_missing*/ true);
}
- Status LogAndApply(const ReadOptions& read_options, ColumnFamilyData* cfd,
+ Status LogAndApply(const ReadOptions& read_options,
+ const WriteOptions& write_options, ColumnFamilyData* cfd,
VersionEdit* edit,
FSDirectory* dir_contains_current_file) {
// Use `mutex` to imitate a locked DB mutex when calling `LogAndApply()`.
InstrumentedMutex mutex;
mutex.Lock();
- Status s = versions_.LogAndApply(
- cfd, *cfd->GetLatestMutableCFOptions(), read_options, edit, &mutex,
- dir_contains_current_file, false /* new_descriptor_log */);
+ Status s = versions_.LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
+ read_options, write_options, edit, &mutex,
+ dir_contains_current_file,
+ false /* new_descriptor_log */);
mutex.Unlock();
return s;
}
diff --git a/db/wal_manager_test.cc b/db/wal_manager_test.cc
index 3f47c2901..3be19cb3a 100644
--- a/db/wal_manager_test.cc
+++ b/db/wal_manager_test.cc
@@ -73,8 +73,8 @@ class WalManagerTest : public testing::Test {
WriteBatch batch;
ASSERT_OK(batch.Put(key, value));
WriteBatchInternal::SetSequence(&batch, seq);
- ASSERT_OK(
- current_log_writer_->AddRecord(WriteBatchInternal::Contents(&batch)));
+ ASSERT_OK(current_log_writer_->AddRecord(
+ WriteOptions(), WriteBatchInternal::Contents(&batch)));
versions_->SetLastAllocatedSequence(seq);
versions_->SetLastPublishedSequence(seq);
versions_->SetLastSequence(seq);
@@ -146,7 +146,8 @@ TEST_F(WalManagerTest, ReadFirstRecordCache) {
WriteBatch batch;
ASSERT_OK(batch.Put("foo", "bar"));
WriteBatchInternal::SetSequence(&batch, 10);
- ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
+ ASSERT_OK(
+ writer.AddRecord(WriteOptions(), WriteBatchInternal::Contents(&batch)));
// TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here.
// Waiting for lei to finish with db_test
diff --git a/db/write_batch.cc b/db/write_batch.cc
index 75f6e1eb4..09fa2c371 100644
--- a/db/write_batch.cc
+++ b/db/write_batch.cc
@@ -2064,7 +2064,7 @@ class MemTableInserter : public WriteBatch::Handler {
// key not found in memtable. Do sst get, update, add
SnapshotImpl read_from_snapshot;
read_from_snapshot.number_ = sequence_;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ropts;
// it's going to be overwritten for sure, so no point caching data block
// containing the old version
@@ -2511,7 +2511,7 @@ class MemTableInserter : public WriteBatch::Handler {
SnapshotImpl read_from_snapshot;
read_from_snapshot.number_ = sequence_;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions read_options;
read_options.snapshot = &read_from_snapshot;
diff --git a/db/write_thread.h b/db/write_thread.h
index 6e5805e37..dc64601f9 100644
--- a/db/write_thread.h
+++ b/db/write_thread.h
@@ -166,6 +166,8 @@ class WriteThread {
PreReleaseCallback* _pre_release_callback = nullptr,
PostMemTableCallback* _post_memtable_callback = nullptr)
: batch(_batch),
+ // TODO: store a copy of WriteOptions instead of its seperated data
+ // members
sync(write_options.sync),
no_slowdown(write_options.no_slowdown),
disable_wal(write_options.disableWAL),
diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h
index 83e6838c7..c5e2a1c45 100644
--- a/db_stress_tool/db_stress_env_wrapper.h
+++ b/db_stress_tool/db_stress_env_wrapper.h
@@ -76,6 +76,161 @@ class DbStressRandomAccessFileWrapper : public FSRandomAccessFileOwnerWrapper {
}
};
+class DbStressWritableFileWrapper : public FSWritableFileOwnerWrapper {
+ public:
+ explicit DbStressWritableFileWrapper(std::unique_ptr<FSWritableFile>&& target)
+ : FSWritableFileOwnerWrapper(std::move(target)) {}
+
+ IOStatus Append(const Slice& data, const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Append(data, options, dbg);
+ }
+ IOStatus Append(const Slice& data, const IOOptions& options,
+ const DataVerificationInfo& verification_info,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Append(data, options, verification_info, dbg);
+ }
+ IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+ const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->PositionedAppend(data, offset, options, dbg);
+ }
+ IOStatus PositionedAppend(const Slice& data, uint64_t offset,
+ const IOOptions& options,
+ const DataVerificationInfo& verification_info,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->PositionedAppend(data, offset, options, verification_info,
+ dbg);
+ }
+
+ virtual IOStatus Truncate(uint64_t size, const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Truncate(size, options, dbg);
+ }
+
+ virtual IOStatus Close(const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Close(options, dbg);
+ }
+
+ virtual IOStatus Flush(const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Flush(options, dbg);
+ }
+
+ virtual IOStatus Sync(const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Sync(options, dbg);
+ }
+
+ virtual IOStatus Fsync(const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Fsync(options, dbg);
+ }
+
+#ifdef ROCKSDB_FALLOCATE_PRESENT
+ virtual IOStatus Allocate(uint64_t offset, uint64_t len,
+ const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->Allocate(offset, len, options, dbg);
+ }
+#endif
+
+ virtual IOStatus RangeSync(uint64_t offset, uint64_t nbytes,
+ const IOOptions& options,
+ IODebugContext* dbg) override {
+#ifndef NDEBUG
+ const ThreadStatus::OperationType thread_op =
+ ThreadStatusUtil::GetThreadOperation();
+ Env::IOActivity io_activity =
+ ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op);
+ assert(io_activity == Env::IOActivity::kUnknown ||
+ io_activity == options.io_activity);
+#endif
+ return target()->RangeSync(offset, nbytes, options, dbg);
+ }
+};
+
class DbStressFSWrapper : public FileSystemWrapper {
public:
explicit DbStressFSWrapper(const std::shared_ptr<FileSystem>& t)
@@ -95,6 +250,17 @@ class DbStressFSWrapper : public FileSystemWrapper {
return s;
}
+ IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts,
+ std::unique_ptr<FSWritableFile>* r,
+ IODebugContext* dbg) override {
+ std::unique_ptr<FSWritableFile> file;
+ IOStatus s = target()->NewWritableFile(f, file_opts, &file, dbg);
+ if (s.ok()) {
+ r->reset(new DbStressWritableFileWrapper(std::move(file)));
+ }
+ return s;
+ }
+
IOStatus DeleteFile(const std::string& f, const IOOptions& opts,
IODebugContext* dbg) override {
// We determine whether it is a manifest file by searching a strong,
diff --git a/db_stress_tool/db_stress_listener.cc b/db_stress_tool/db_stress_listener.cc
index 64adca877..8b9fb2cbb 100644
--- a/db_stress_tool/db_stress_listener.cc
+++ b/db_stress_tool/db_stress_listener.cc
@@ -130,8 +130,13 @@ UniqueIdVerifier::UniqueIdVerifier(const std::string& db_name, Env* env)
}
UniqueIdVerifier::~UniqueIdVerifier() {
- IOStatus s = data_file_writer_->Close();
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
+ IOStatus s;
+ s = data_file_writer_->Close(IOOptions());
assert(s.ok());
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
}
void UniqueIdVerifier::VerifyNoWrite(const std::string& id) {
@@ -153,13 +158,14 @@ void UniqueIdVerifier::Verify(const std::string& id) {
if (id_set_.size() >= 4294967) {
return;
}
- IOStatus s = data_file_writer_->Append(Slice(id));
+ IOOptions opts;
+ IOStatus s = data_file_writer_->Append(opts, Slice(id));
if (!s.ok()) {
fprintf(stderr, "Error writing to unique id file: %s\n",
s.ToString().c_str());
assert(false);
}
- s = data_file_writer_->Flush();
+ s = data_file_writer_->Flush(opts);
if (!s.ok()) {
fprintf(stderr, "Error flushing unique id file: %s\n",
s.ToString().c_str());
diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc
index 145a96a75..ee90711b1 100644
--- a/db_stress_tool/multi_ops_txns_stress.cc
+++ b/db_stress_tool/multi_ops_txns_stress.cc
@@ -373,10 +373,15 @@ Status MultiOpsTxnsStressTest::TestGet(
ThreadState* thread, const ReadOptions& read_opts,
const std::vector<int>& /*rand_column_families*/,
const std::vector<int64_t>& /*rand_keys*/) {
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
uint32_t a = 0;
uint32_t pos = 0;
std::tie(a, pos) = ChooseExistingA(thread);
- return PointLookupTxn(thread, read_opts, a);
+ Status s = PointLookupTxn(thread, read_opts, a);
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
+ return s;
}
// Not used.
@@ -416,10 +421,15 @@ Status MultiOpsTxnsStressTest::TestIterate(
ThreadState* thread, const ReadOptions& read_opts,
const std::vector<int>& /*rand_column_families*/,
const std::vector<int64_t>& /*rand_keys*/) {
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
uint32_t c = 0;
uint32_t pos = 0;
std::tie(c, pos) = ChooseExistingC(thread);
- return RangeScanTxn(thread, read_opts, c);
+ Status s = RangeScanTxn(thread, read_opts, c);
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
+ return s;
}
// Not intended for use.
@@ -1221,7 +1231,11 @@ void MultiOpsTxnsStressTest::VerifyPkSkFast(const ReadOptions& read_options,
assert(db_ == db);
assert(db_ != nullptr);
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
const Snapshot* const snapshot = db_->GetSnapshot();
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
assert(snapshot);
ManagedSnapshot snapshot_guard(db_, snapshot);
diff --git a/env/env.cc b/env/env.cc
index 8ad828a83..7405f66e0 100644
--- a/env/env.cc
+++ b/env/env.cc
@@ -1051,9 +1051,10 @@ void Log(const std::shared_ptr<Logger>& info_log, const char* format, ...) {
}
Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname,
- bool should_sync) {
+ bool should_sync, const IOOptions* io_options) {
const auto& fs = env->GetFileSystem();
- return WriteStringToFile(fs.get(), data, fname, should_sync);
+ return WriteStringToFile(fs.get(), data, fname, should_sync,
+ io_options ? *io_options : IOOptions());
}
Status ReadFileToString(Env* env, const std::string& fname, std::string* data) {
diff --git a/env/env_test.cc b/env/env_test.cc
index 4cf3c988d..f478806f8 100644
--- a/env/env_test.cc
+++ b/env/env_test.cc
@@ -2610,7 +2610,7 @@ TEST_F(EnvTest, IsDirectory) {
FileOptions(),
SystemClock::Default().get()));
constexpr char buf[] = "test";
- s = fwriter->Append(buf);
+ s = fwriter->Append(IOOptions(), buf);
ASSERT_OK(s);
}
ASSERT_OK(Env::Default()->IsDirectory(test_file_path, &is_dir));
diff --git a/env/file_system.cc b/env/file_system.cc
index e01ec12c9..27c7207f0 100644
--- a/env/file_system.cc
+++ b/env/file_system.cc
@@ -180,19 +180,20 @@ FileOptions FileSystem::OptimizeForBlobFileRead(
}
IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
- const std::string& fname, bool should_sync) {
+ const std::string& fname, bool should_sync,
+ const IOOptions& io_options) {
std::unique_ptr<FSWritableFile> file;
EnvOptions soptions;
IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr);
if (!s.ok()) {
return s;
}
- s = file->Append(data, IOOptions(), nullptr);
+ s = file->Append(data, io_options, nullptr);
if (s.ok() && should_sync) {
- s = file->Sync(IOOptions(), nullptr);
+ s = file->Sync(io_options, nullptr);
}
if (!s.ok()) {
- fs->DeleteFile(fname, IOOptions(), nullptr);
+ fs->DeleteFile(fname, io_options, nullptr);
}
return s;
}
diff --git a/file/file_util.cc b/file/file_util.cc
index 9eee10637..d78a03491 100644
--- a/file/file_util.cc
+++ b/file/file_util.cc
@@ -26,6 +26,7 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source,
FileOptions soptions;
IOStatus io_s;
std::unique_ptr<SequentialFileReader> src_reader;
+ const IOOptions opts;
{
soptions.temperature = temperature;
@@ -37,7 +38,7 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source,
if (size == 0) {
// default argument means copy everything
- io_s = fs->GetFileSize(source, IOOptions(), &size, nullptr);
+ io_s = fs->GetFileSize(source, opts, &size, nullptr);
if (!io_s.ok()) {
return io_s;
}
@@ -60,13 +61,14 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source,
if (slice.size() == 0) {
return IOStatus::Corruption("file too small");
}
- io_s = dest_writer->Append(slice);
+
+ io_s = dest_writer->Append(opts, slice);
if (!io_s.ok()) {
return io_s;
}
size -= slice.size();
}
- return dest_writer->Sync(use_fsync);
+ return dest_writer->Sync(opts, use_fsync);
}
IOStatus CopyFile(FileSystem* fs, const std::string& source,
@@ -85,6 +87,7 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source,
return io_s;
}
+ // TODO: pass in Histograms if the destination file is sst or blob
dest_writer.reset(
new WritableFileWriter(std::move(destfile), destination, options));
}
@@ -99,19 +102,21 @@ IOStatus CreateFile(FileSystem* fs, const std::string& destination,
const EnvOptions soptions;
IOStatus io_s;
std::unique_ptr<WritableFileWriter> dest_writer;
+ const IOOptions opts;
std::unique_ptr<FSWritableFile> destfile;
io_s = fs->NewWritableFile(destination, soptions, &destfile, nullptr);
if (!io_s.ok()) {
return io_s;
}
+ // TODO: pass in Histograms if the destination file is sst or blob
dest_writer.reset(
new WritableFileWriter(std::move(destfile), destination, soptions));
- io_s = dest_writer->Append(Slice(contents));
+ io_s = dest_writer->Append(opts, Slice(contents));
if (!io_s.ok()) {
return io_s;
}
- return dest_writer->Sync(use_fsync);
+ return dest_writer->Sync(opts, use_fsync);
}
Status DeleteDBFile(const ImmutableDBOptions* db_options,
diff --git a/file/file_util.h b/file/file_util.h
index 9c95478c7..032afc19b 100644
--- a/file/file_util.h
+++ b/file/file_util.h
@@ -87,6 +87,14 @@ inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro,
return IOStatus::OK();
}
+inline IOStatus PrepareIOFromWriteOptions(const WriteOptions& wo,
+ IOOptions& opts) {
+ opts.rate_limiter_priority = wo.rate_limiter_priority;
+ opts.io_activity = wo.io_activity;
+
+ return IOStatus::OK();
+}
+
// Test method to delete the input directory and all of its contents.
// This method is destructive and is meant for use only in tests!!!
Status DestroyDir(Env* env, const std::string& dir);
diff --git a/file/filename.cc b/file/filename.cc
index fb7d25472..b34a0e113 100644
--- a/file/filename.cc
+++ b/file/filename.cc
@@ -13,8 +13,10 @@
#include <cstdio>
#include <vector>
+#include "file/file_util.h"
#include "file/writable_file_writer.h"
#include "rocksdb/env.h"
+#include "rocksdb/file_system.h"
#include "test_util/sync_point.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
@@ -384,8 +386,8 @@ bool ParseFileName(const std::string& fname, uint64_t* number,
return true;
}
-IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
- uint64_t descriptor_number,
+IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs,
+ const std::string& dbname, uint64_t descriptor_number,
FSDirectory* dir_contains_current_file) {
// Remove leading "dbname/" and add newline to manifest file name
std::string manifest = DescriptorFileName(dbname, descriptor_number);
@@ -393,21 +395,25 @@ IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
assert(contents.starts_with(dbname + "/"));
contents.remove_prefix(dbname.size() + 1);
std::string tmp = TempFileName(dbname, descriptor_number);
- IOStatus s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true);
+ IOOptions opts;
+ IOStatus s = PrepareIOFromWriteOptions(write_options, opts);
+ if (s.ok()) {
+ s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts);
+ }
TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s);
if (s.ok()) {
TEST_KILL_RANDOM_WITH_WEIGHT("SetCurrentFile:0", REDUCE_ODDS2);
- s = fs->RenameFile(tmp, CurrentFileName(dbname), IOOptions(), nullptr);
+ s = fs->RenameFile(tmp, CurrentFileName(dbname), opts, nullptr);
TEST_KILL_RANDOM_WITH_WEIGHT("SetCurrentFile:1", REDUCE_ODDS2);
TEST_SYNC_POINT_CALLBACK("SetCurrentFile:AfterRename", &s);
}
if (s.ok()) {
if (dir_contains_current_file != nullptr) {
s = dir_contains_current_file->FsyncWithDirOptions(
- IOOptions(), nullptr, DirFsyncOptions(CurrentFileName(dbname)));
+ opts, nullptr, DirFsyncOptions(CurrentFileName(dbname)));
}
} else {
- fs->DeleteFile(tmp, IOOptions(), nullptr)
+ fs->DeleteFile(tmp, opts, nullptr)
.PermitUncheckedError(); // NOTE: PermitUncheckedError is acceptable
// here as we are already handling an error
// case, and this is just a best-attempt
@@ -416,8 +422,8 @@ IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
return s;
}
-Status SetIdentityFile(Env* env, const std::string& dbname,
- const std::string& db_id) {
+Status SetIdentityFile(const WriteOptions& write_options, Env* env,
+ const std::string& dbname, const std::string& db_id) {
std::string id;
if (db_id.empty()) {
id = env->GenerateUniqueId();
@@ -428,17 +434,21 @@ Status SetIdentityFile(Env* env, const std::string& dbname,
// Reserve the filename dbname/000000.dbtmp for the temporary identity file
std::string tmp = TempFileName(dbname, 0);
std::string identify_file_name = IdentityFileName(dbname);
- Status s = WriteStringToFile(env, id, tmp, true);
+ Status s;
+ IOOptions opts;
+ s = PrepareIOFromWriteOptions(write_options, opts);
+ if (s.ok()) {
+ s = WriteStringToFile(env, id, tmp, true, &opts);
+ }
if (s.ok()) {
s = env->RenameFile(tmp, identify_file_name);
}
std::unique_ptr<FSDirectory> dir_obj;
if (s.ok()) {
- s = env->GetFileSystem()->NewDirectory(dbname, IOOptions(), &dir_obj,
- nullptr);
+ s = env->GetFileSystem()->NewDirectory(dbname, opts, &dir_obj, nullptr);
}
if (s.ok()) {
- s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr,
+ s = dir_obj->FsyncWithDirOptions(opts, nullptr,
DirFsyncOptions(identify_file_name));
}
@@ -446,7 +456,7 @@ Status SetIdentityFile(Env* env, const std::string& dbname,
// if it is not impelmented. Detailed explanations can be found in
// db/db_impl/db_impl.h
if (s.ok()) {
- Status temp_s = dir_obj->Close(IOOptions(), nullptr);
+ Status temp_s = dir_obj->Close(opts, nullptr);
if (!temp_s.ok()) {
if (temp_s.IsNotSupported()) {
temp_s.PermitUncheckedError();
@@ -462,10 +472,16 @@ Status SetIdentityFile(Env* env, const std::string& dbname,
}
IOStatus SyncManifest(const ImmutableDBOptions* db_options,
+ const WriteOptions& write_options,
WritableFileWriter* file) {
TEST_KILL_RANDOM_WITH_WEIGHT("SyncManifest:0", REDUCE_ODDS2);
StopWatch sw(db_options->clock, db_options->stats, MANIFEST_FILE_SYNC_MICROS);
- return file->Sync(db_options->use_fsync);
+ IOOptions io_options;
+ IOStatus s = PrepareIOFromWriteOptions(write_options, io_options);
+ if (!s.ok()) {
+ return s;
+ }
+ return file->Sync(io_options, db_options->use_fsync);
}
Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
diff --git a/file/filename.h b/file/filename.h
index 2eb125b6a..156b7224f 100644
--- a/file/filename.h
+++ b/file/filename.h
@@ -162,16 +162,19 @@ extern bool ParseFileName(const std::string& filename, uint64_t* number,
// specified number. On its success and when dir_contains_current_file is not
// nullptr, the function will fsync the directory containing the CURRENT file
// when
-extern IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
+extern IOStatus SetCurrentFile(const WriteOptions& write_options,
+ FileSystem* fs, const std::string& dbname,
uint64_t descriptor_number,
FSDirectory* dir_contains_current_file);
// Make the IDENTITY file for the db
-extern Status SetIdentityFile(Env* env, const std::string& dbname,
+extern Status SetIdentityFile(const WriteOptions& write_options, Env* env,
+ const std::string& dbname,
const std::string& db_id = {});
// Sync manifest file `file`.
extern IOStatus SyncManifest(const ImmutableDBOptions* db_options,
+ const WriteOptions& write_options,
WritableFileWriter* file);
// Return list of file names of info logs in `file_names`.
diff --git a/file/writable_file_writer.cc b/file/writable_file_writer.cc
index 908878a5f..4fadf1d71 100644
--- a/file/writable_file_writer.cc
+++ b/file/writable_file_writer.cc
@@ -13,6 +13,7 @@
#include <mutex>
#include "db/version_edit.h"
+#include "file/file_util.h"
#include "monitoring/histogram.h"
#include "monitoring/iostats_context_imp.h"
#include "port/port.h"
@@ -24,6 +25,24 @@
#include "util/rate_limiter_impl.h"
namespace ROCKSDB_NAMESPACE {
+inline Histograms GetFileWriteHistograms(Histograms file_writer_hist,
+ Env::IOActivity io_activity) {
+ if (file_writer_hist == Histograms::SST_WRITE_MICROS ||
+ file_writer_hist == Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS) {
+ switch (io_activity) {
+ case Env::IOActivity::kFlush:
+ return Histograms::FILE_WRITE_FLUSH_MICROS;
+ case Env::IOActivity::kCompaction:
+ return Histograms::FILE_WRITE_COMPACTION_MICROS;
+ case Env::IOActivity::kDBOpen:
+ return Histograms::FILE_WRITE_DB_OPEN_MICROS;
+ default:
+ break;
+ }
+ }
+ return Histograms::HISTOGRAM_ENUM_MAX;
+}
+
IOStatus WritableFileWriter::Create(const std::shared_ptr<FileSystem>& fs,
const std::string& fname,
const FileOptions& file_opts,
@@ -42,12 +61,16 @@ IOStatus WritableFileWriter::Create(const std::shared_ptr<FileSystem>& fs,
return io_s;
}
-IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
- Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::Append(const IOOptions& opts, const Slice& data,
+ uint32_t crc32c_checksum) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
+ StopWatch sw(clock_, stats_, hist_type_,
+ GetFileWriteHistograms(hist_type_, opts.io_activity));
+
+ const IOOptions io_options = FinalizeIOOptions(opts);
const char* src = data.data();
size_t left = data.size();
IOStatus s;
@@ -59,10 +82,6 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
UpdateFileChecksum(data);
{
- IOOptions io_options;
- io_options.rate_limiter_priority =
- WritableFileWriter::DecideRateLimiterPriority(
- writable_file_->GetIOPriority(), op_rate_limiter_priority);
IOSTATS_TIMER_GUARD(prepare_write_nanos);
TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
@@ -88,7 +107,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
// Flush only when buffered I/O
if (!use_direct_io() && (buf_.Capacity() - buf_.CurrentSize()) < left) {
if (buf_.CurrentSize() > 0) {
- s = Flush(op_rate_limiter_priority);
+ s = Flush(io_options);
if (!s.ok()) {
set_seen_error();
return s;
@@ -119,7 +138,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
src += appended;
if (left > 0) {
- s = Flush(op_rate_limiter_priority);
+ s = Flush(io_options);
if (!s.ok()) {
break;
}
@@ -129,7 +148,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
} else {
assert(buf_.CurrentSize() == 0);
buffered_data_crc32c_checksum_ = crc32c_checksum;
- s = WriteBufferedWithChecksum(src, left, op_rate_limiter_priority);
+ s = WriteBufferedWithChecksum(io_options, src, left);
}
} else {
// In this case, either we do not need to do the data verification or
@@ -149,7 +168,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
src += appended;
if (left > 0) {
- s = Flush(op_rate_limiter_priority);
+ s = Flush(io_options);
if (!s.ok()) {
break;
}
@@ -160,9 +179,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
assert(buf_.CurrentSize() == 0);
if (perform_data_verification_ && buffered_data_with_checksum_) {
buffered_data_crc32c_checksum_ = crc32c::Value(src, left);
- s = WriteBufferedWithChecksum(src, left, op_rate_limiter_priority);
+ s = WriteBufferedWithChecksum(io_options, src, left);
} else {
- s = WriteBuffered(src, left, op_rate_limiter_priority);
+ s = WriteBuffered(io_options, src, left);
}
}
}
@@ -177,11 +196,12 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
return s;
}
-IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
- Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::Pad(const IOOptions& opts,
+ const size_t pad_bytes) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
+ const IOOptions io_options = FinalizeIOOptions(opts);
assert(pad_bytes < kDefaultPageSize);
size_t left = pad_bytes;
size_t cap = buf_.Capacity() - buf_.CurrentSize();
@@ -195,7 +215,7 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
buf_.PadWith(append_bytes, 0);
left -= append_bytes;
if (left > 0) {
- IOStatus s = Flush(op_rate_limiter_priority);
+ IOStatus s = Flush(io_options);
if (!s.ok()) {
set_seen_error();
return s;
@@ -214,11 +234,12 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes,
return IOStatus::OK();
}
-IOStatus WritableFileWriter::Close() {
+IOStatus WritableFileWriter::Close(const IOOptions& opts) {
+ IOOptions io_options = FinalizeIOOptions(opts);
if (seen_error()) {
IOStatus interim;
if (writable_file_.get() != nullptr) {
- interim = writable_file_->Close(IOOptions(), nullptr);
+ interim = writable_file_->Close(io_options, nullptr);
writable_file_.reset();
}
if (interim.ok()) {
@@ -240,11 +261,9 @@ IOStatus WritableFileWriter::Close() {
}
IOStatus s;
- s = Flush(); // flush cache to OS
+ s = Flush(io_options); // flush cache to OS
IOStatus interim;
- IOOptions io_options;
- io_options.rate_limiter_priority = writable_file_->GetIOPriority();
// In direct I/O mode we write whole pages so
// we need to let the file know where data ends.
if (use_direct_io()) {
@@ -322,11 +341,13 @@ IOStatus WritableFileWriter::Close() {
// write out the cached data to the OS cache or storage if direct I/O
// enabled
-IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::Flush(const IOOptions& opts) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
+ const IOOptions io_options = FinalizeIOOptions(opts);
+
IOStatus s;
TEST_KILL_RANDOM_WITH_WEIGHT("WritableFileWriter::Flush:0", REDUCE_ODDS2);
@@ -334,18 +355,17 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
if (use_direct_io()) {
if (pending_sync_) {
if (perform_data_verification_ && buffered_data_with_checksum_) {
- s = WriteDirectWithChecksum(op_rate_limiter_priority);
+ s = WriteDirectWithChecksum(io_options);
} else {
- s = WriteDirect(op_rate_limiter_priority);
+ s = WriteDirect(io_options);
}
}
} else {
if (perform_data_verification_ && buffered_data_with_checksum_) {
- s = WriteBufferedWithChecksum(buf_.BufferStart(), buf_.CurrentSize(),
- op_rate_limiter_priority);
+ s = WriteBufferedWithChecksum(io_options, buf_.BufferStart(),
+ buf_.CurrentSize());
} else {
- s = WriteBuffered(buf_.BufferStart(), buf_.CurrentSize(),
- op_rate_limiter_priority);
+ s = WriteBuffered(io_options, buf_.BufferStart(), buf_.CurrentSize());
}
}
if (!s.ok()) {
@@ -359,10 +379,6 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
if (ShouldNotifyListeners()) {
start_ts = FileOperationInfo::StartNow();
}
- IOOptions io_options;
- io_options.rate_limiter_priority =
- WritableFileWriter::DecideRateLimiterPriority(
- writable_file_->GetIOPriority(), op_rate_limiter_priority);
s = writable_file_->Flush(io_options, nullptr);
if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::steady_clock::now();
@@ -400,7 +416,8 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
assert(offset_sync_to >= last_sync_size_);
if (offset_sync_to > 0 &&
offset_sync_to - last_sync_size_ >= bytes_per_sync_) {
- s = RangeSync(last_sync_size_, offset_sync_to - last_sync_size_);
+ s = RangeSync(io_options, last_sync_size_,
+ offset_sync_to - last_sync_size_);
if (!s.ok()) {
set_seen_error();
}
@@ -429,19 +446,25 @@ const char* WritableFileWriter::GetFileChecksumFuncName() const {
}
}
-IOStatus WritableFileWriter::Sync(bool use_fsync) {
+IOStatus WritableFileWriter::PrepareIOOptions(const WriteOptions& wo,
+ IOOptions& opts) {
+ return PrepareIOFromWriteOptions(wo, opts);
+}
+
+IOStatus WritableFileWriter::Sync(const IOOptions& opts, bool use_fsync) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
- IOStatus s = Flush();
+ IOOptions io_options = FinalizeIOOptions(opts);
+ IOStatus s = Flush(io_options);
if (!s.ok()) {
set_seen_error();
return s;
}
TEST_KILL_RANDOM("WritableFileWriter::Sync:0");
if (!use_direct_io() && pending_sync_) {
- s = SyncInternal(use_fsync);
+ s = SyncInternal(io_options, use_fsync);
if (!s.ok()) {
set_seen_error();
return s;
@@ -452,17 +475,19 @@ IOStatus WritableFileWriter::Sync(bool use_fsync) {
return IOStatus::OK();
}
-IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) {
+IOStatus WritableFileWriter::SyncWithoutFlush(const IOOptions& opts,
+ bool use_fsync) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
+ IOOptions io_options = FinalizeIOOptions(opts);
if (!writable_file_->IsSyncThreadSafe()) {
return IOStatus::NotSupported(
"Can't WritableFileWriter::SyncWithoutFlush() because "
"WritableFile::IsSyncThreadSafe() is false");
}
TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:1");
- IOStatus s = SyncInternal(use_fsync);
+ IOStatus s = SyncInternal(io_options, use_fsync);
TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:2");
if (!s.ok()) {
#ifndef NDEBUG
@@ -473,7 +498,8 @@ IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) {
return s;
}
-IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
+IOStatus WritableFileWriter::SyncInternal(const IOOptions& opts,
+ bool use_fsync) {
// Caller is supposed to check seen_error_
IOStatus s;
IOSTATS_TIMER_GUARD(fsync_nanos);
@@ -487,12 +513,10 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
start_ts = FileOperationInfo::StartNow();
}
- IOOptions io_options;
- io_options.rate_limiter_priority = writable_file_->GetIOPriority();
if (use_fsync) {
- s = writable_file_->Fsync(io_options, nullptr);
+ s = writable_file_->Fsync(opts, nullptr);
} else {
- s = writable_file_->Sync(io_options, nullptr);
+ s = writable_file_->Sync(opts, nullptr);
}
if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::steady_clock::now();
@@ -511,7 +535,8 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
return s;
}
-IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
+IOStatus WritableFileWriter::RangeSync(const IOOptions& opts, uint64_t offset,
+ uint64_t nbytes) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
@@ -522,9 +547,7 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
if (ShouldNotifyListeners()) {
start_ts = FileOperationInfo::StartNow();
}
- IOOptions io_options;
- io_options.rate_limiter_priority = writable_file_->GetIOPriority();
- IOStatus s = writable_file_->RangeSync(offset, nbytes, io_options, nullptr);
+ IOStatus s = writable_file_->RangeSync(offset, nbytes, opts, nullptr);
if (!s.ok()) {
set_seen_error();
}
@@ -541,8 +564,8 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
// This method writes to disk the specified data and makes use of the rate
// limiter if available
-IOStatus WritableFileWriter::WriteBuffered(
- const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::WriteBuffered(const IOOptions& opts,
+ const char* data, size_t size) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
@@ -553,11 +576,7 @@ IOStatus WritableFileWriter::WriteBuffered(
size_t left = size;
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
- Env::IOPriority rate_limiter_priority_used =
- WritableFileWriter::DecideRateLimiterPriority(
- writable_file_->GetIOPriority(), op_rate_limiter_priority);
- IOOptions io_options;
- io_options.rate_limiter_priority = rate_limiter_priority_used;
+ Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority;
while (left > 0) {
size_t allowed = left;
@@ -573,7 +592,7 @@ IOStatus WritableFileWriter::WriteBuffered(
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
FileOperationInfo::StartTimePoint start_ts;
- uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr);
+ uint64_t old_size = writable_file_->GetFileSize(opts, nullptr);
if (ShouldNotifyListeners()) {
start_ts = FileOperationInfo::StartNow();
old_size = next_write_offset_;
@@ -585,10 +604,10 @@ IOStatus WritableFileWriter::WriteBuffered(
if (perform_data_verification_) {
Crc32cHandoffChecksumCalculation(src, allowed, checksum_buf);
v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
- s = writable_file_->Append(Slice(src, allowed), io_options, v_info,
+ s = writable_file_->Append(Slice(src, allowed), opts, v_info,
nullptr);
} else {
- s = writable_file_->Append(Slice(src, allowed), io_options, nullptr);
+ s = writable_file_->Append(Slice(src, allowed), opts, nullptr);
}
if (!s.ok()) {
// If writable_file_->Append() failed, then the data may or may not
@@ -635,8 +654,9 @@ IOStatus WritableFileWriter::WriteBuffered(
return s;
}
-IOStatus WritableFileWriter::WriteBufferedWithChecksum(
- const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::WriteBufferedWithChecksum(const IOOptions& opts,
+ const char* data,
+ size_t size) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
@@ -648,11 +668,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
size_t left = size;
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
- Env::IOPriority rate_limiter_priority_used =
- WritableFileWriter::DecideRateLimiterPriority(
- writable_file_->GetIOPriority(), op_rate_limiter_priority);
- IOOptions io_options;
- io_options.rate_limiter_priority = rate_limiter_priority_used;
+ Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority;
// Check how much is allowed. Here, we loop until the rate limiter allows to
// write the entire buffer.
// TODO: need to be improved since it sort of defeats the purpose of the rate
@@ -673,7 +689,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
FileOperationInfo::StartTimePoint start_ts;
- uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr);
+ uint64_t old_size = writable_file_->GetFileSize(opts, nullptr);
if (ShouldNotifyListeners()) {
start_ts = FileOperationInfo::StartNow();
old_size = next_write_offset_;
@@ -685,7 +701,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_);
v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
- s = writable_file_->Append(Slice(src, left), io_options, v_info, nullptr);
+ s = writable_file_->Append(Slice(src, left), opts, v_info, nullptr);
SetPerfLevel(prev_perf_level);
}
if (ShouldNotifyListeners()) {
@@ -755,8 +771,7 @@ void WritableFileWriter::Crc32cHandoffChecksumCalculation(const char* data,
// whole number of pages to be written again on the next flush because we can
// only write on aligned
// offsets.
-IOStatus WritableFileWriter::WriteDirect(
- Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::WriteDirect(const IOOptions& opts) {
if (seen_error()) {
assert(false);
@@ -785,11 +800,7 @@ IOStatus WritableFileWriter::WriteDirect(
size_t left = buf_.CurrentSize();
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
- Env::IOPriority rate_limiter_priority_used =
- WritableFileWriter::DecideRateLimiterPriority(
- writable_file_->GetIOPriority(), op_rate_limiter_priority);
- IOOptions io_options;
- io_options.rate_limiter_priority = rate_limiter_priority_used;
+ Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority;
while (left > 0) {
// Check how much is allowed
@@ -813,10 +824,10 @@ IOStatus WritableFileWriter::WriteDirect(
Crc32cHandoffChecksumCalculation(src, size, checksum_buf);
v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
- io_options, v_info, nullptr);
+ opts, v_info, nullptr);
} else {
s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
- io_options, nullptr);
+ opts, nullptr);
}
if (ShouldNotifyListeners()) {
@@ -859,8 +870,7 @@ IOStatus WritableFileWriter::WriteDirect(
return s;
}
-IOStatus WritableFileWriter::WriteDirectWithChecksum(
- Env::IOPriority op_rate_limiter_priority) {
+IOStatus WritableFileWriter::WriteDirectWithChecksum(const IOOptions& opts) {
if (seen_error()) {
return AssertFalseAndGetStatusForPrevError();
}
@@ -895,11 +905,7 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
DataVerificationInfo v_info;
char checksum_buf[sizeof(uint32_t)];
- Env::IOPriority rate_limiter_priority_used =
- WritableFileWriter::DecideRateLimiterPriority(
- writable_file_->GetIOPriority(), op_rate_limiter_priority);
- IOOptions io_options;
- io_options.rate_limiter_priority = rate_limiter_priority_used;
+ Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority;
// Check how much is allowed. Here, we loop until the rate limiter allows to
// write the entire buffer.
// TODO: need to be improved since it sort of defeats the purpose of the rate
@@ -925,8 +931,8 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
// direct writes must be positional
EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_);
v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
- s = writable_file_->PositionedAppend(Slice(src, left), write_offset,
- io_options, v_info, nullptr);
+ s = writable_file_->PositionedAppend(Slice(src, left), write_offset, opts,
+ v_info, nullptr);
if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::steady_clock::now();
@@ -986,4 +992,14 @@ Env::IOPriority WritableFileWriter::DecideRateLimiterPriority(
}
}
+IOOptions WritableFileWriter::FinalizeIOOptions(const IOOptions& opts) const {
+ Env::IOPriority op_rate_limiter_priority = opts.rate_limiter_priority;
+ IOOptions io_options(opts);
+ if (writable_file_.get() != nullptr) {
+ io_options.rate_limiter_priority =
+ WritableFileWriter::DecideRateLimiterPriority(
+ writable_file_->GetIOPriority(), op_rate_limiter_priority);
+ }
+ return io_options;
+}
} // namespace ROCKSDB_NAMESPACE
diff --git a/file/writable_file_writer.h b/file/writable_file_writer.h
index aac0f5949..6b71cfa64 100644
--- a/file/writable_file_writer.h
+++ b/file/writable_file_writer.h
@@ -13,6 +13,7 @@
#include "db/version_edit.h"
#include "env/file_system_tracer.h"
+#include "monitoring/thread_status_util.h"
#include "port/port.h"
#include "rocksdb/file_checksum.h"
#include "rocksdb/file_system.h"
@@ -159,6 +160,7 @@ class WritableFileWriter {
uint64_t bytes_per_sync_;
RateLimiter* rate_limiter_;
Statistics* stats_;
+ Histograms hist_type_;
std::vector<std::shared_ptr<EventListener>> listeners_;
std::unique_ptr<FileChecksumGenerator> checksum_generator_;
bool checksum_finalized_;
@@ -173,6 +175,7 @@ class WritableFileWriter {
const FileOptions& options, SystemClock* clock = nullptr,
const std::shared_ptr<IOTracer>& io_tracer = nullptr,
Statistics* stats = nullptr,
+ Histograms hist_type = Histograms::HISTOGRAM_ENUM_MAX,
const std::vector<std::shared_ptr<EventListener>>& listeners = {},
FileChecksumGenFactory* file_checksum_gen_factory = nullptr,
bool perform_data_verification = false,
@@ -191,6 +194,7 @@ class WritableFileWriter {
bytes_per_sync_(options.bytes_per_sync),
rate_limiter_(options.rate_limiter),
stats_(stats),
+ hist_type_(hist_type),
listeners_(),
checksum_generator_(nullptr),
checksum_finalized_(false),
@@ -222,35 +226,42 @@ class WritableFileWriter {
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<WritableFileWriter>* writer,
IODebugContext* dbg);
+
+ static IOStatus PrepareIOOptions(const WriteOptions& wo, IOOptions& opts);
+
WritableFileWriter(const WritableFileWriter&) = delete;
WritableFileWriter& operator=(const WritableFileWriter&) = delete;
~WritableFileWriter() {
- auto s = Close();
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(
+ ThreadStatus::OperationType::OP_UNKNOWN);
+ auto s = Close(IOOptions());
s.PermitUncheckedError();
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
}
std::string file_name() const { return file_name_; }
// When this Append API is called, if the crc32c_checksum is not provided, we
// will calculate the checksum internally.
- IOStatus Append(const Slice& data, uint32_t crc32c_checksum = 0,
- Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL);
+ IOStatus Append(const IOOptions& opts, const Slice& data,
+ uint32_t crc32c_checksum = 0);
- IOStatus Pad(const size_t pad_bytes,
- Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL);
+ IOStatus Pad(const IOOptions& opts, const size_t pad_bytes);
- IOStatus Flush(Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL);
+ IOStatus Flush(const IOOptions& opts);
- IOStatus Close();
+ IOStatus Close(const IOOptions& opts);
- IOStatus Sync(bool use_fsync);
+ IOStatus Sync(const IOOptions& opts, bool use_fsync);
// Sync only the data that was already Flush()ed. Safe to call concurrently
// with Append() and Flush(). If !writable_file_->IsSyncThreadSafe(),
// returns NotSupported status.
- IOStatus SyncWithoutFlush(bool use_fsync);
+ IOStatus SyncWithoutFlush(const IOOptions& opts, bool use_fsync);
uint64_t GetFileSize() const {
return filesize_.load(std::memory_order_acquire);
@@ -307,14 +318,20 @@ class WritableFileWriter {
// Used when os buffering is OFF and we are writing
// DMA such as in Direct I/O mode
- IOStatus WriteDirect(Env::IOPriority op_rate_limiter_priority);
- IOStatus WriteDirectWithChecksum(Env::IOPriority op_rate_limiter_priority);
+ // `opts` should've been called with `FinalizeIOOptions()` before passing in
+ IOStatus WriteDirect(const IOOptions& opts);
+ // `opts` should've been called with `FinalizeIOOptions()` before passing in
+ IOStatus WriteDirectWithChecksum(const IOOptions& opts);
// Normal write.
- IOStatus WriteBuffered(const char* data, size_t size,
- Env::IOPriority op_rate_limiter_priority);
- IOStatus WriteBufferedWithChecksum(const char* data, size_t size,
- Env::IOPriority op_rate_limiter_priority);
- IOStatus RangeSync(uint64_t offset, uint64_t nbytes);
- IOStatus SyncInternal(bool use_fsync);
+ // `opts` should've been called with `FinalizeIOOptions()` before passing in
+ IOStatus WriteBuffered(const IOOptions& opts, const char* data, size_t size);
+ // `opts` should've been called with `FinalizeIOOptions()` before passing in
+ IOStatus WriteBufferedWithChecksum(const IOOptions& opts, const char* data,
+ size_t size);
+ // `opts` should've been called with `FinalizeIOOptions()` before passing in
+ IOStatus RangeSync(const IOOptions& opts, uint64_t offset, uint64_t nbytes);
+ // `opts` should've been called with `FinalizeIOOptions()` before passing in
+ IOStatus SyncInternal(const IOOptions& opts, bool use_fsync);
+ IOOptions FinalizeIOOptions(const IOOptions& opts) const;
};
} // namespace ROCKSDB_NAMESPACE
diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h
index 7b0220635..02cc60494 100644
--- a/include/rocksdb/env.h
+++ b/include/rocksdb/env.h
@@ -67,6 +67,7 @@ struct ThreadStatus;
class FileSystem;
class SystemClock;
struct ConfigOptions;
+struct IOOptions;
const size_t kDefaultPageSize = 4 * 1024;
@@ -1352,7 +1353,8 @@ extern void Fatal(Logger* info_log, const char* format, ...)
// A utility routine: write "data" to the named file.
extern Status WriteStringToFile(Env* env, const Slice& data,
const std::string& fname,
- bool should_sync = false);
+ bool should_sync = false,
+ const IOOptions* io_options = nullptr);
// A utility routine: read contents of named file into *data
extern Status ReadFileToString(Env* env, const std::string& fname,
diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h
index 647aad6c9..859032673 100644
--- a/include/rocksdb/file_system.h
+++ b/include/rocksdb/file_system.h
@@ -1918,7 +1918,8 @@ class FSDirectoryWrapper : public FSDirectory {
// A utility routine: write "data" to the named file.
extern IOStatus WriteStringToFile(FileSystem* fs, const Slice& data,
const std::string& fname,
- bool should_sync = false);
+ bool should_sync = false,
+ const IOOptions& io_options = IOOptions());
// A utility routine: read contents of named file into *data
extern IOStatus ReadFileToString(FileSystem* fs, const std::string& fname,
diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h
index ae5ed2c26..9146f6d7b 100644
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -1781,7 +1781,7 @@ struct WriteOptions {
// system call followed by "fdatasync()".
//
// Default: false
- bool sync;
+ bool sync = false;
// If true, writes will not first go to the write ahead log,
// and the write may get lost after a crash. The backup engine
@@ -1789,18 +1789,18 @@ struct WriteOptions {
// you disable write-ahead logs, you must create backups with
// flush_before_backup=true to avoid losing unflushed memtable data.
// Default: false
- bool disableWAL;
+ bool disableWAL = false;
// If true and if user is trying to write to column families that don't exist
// (they were dropped), ignore the write (don't return an error). If there
// are multiple writes in a WriteBatch, other writes will succeed.
// Default: false
- bool ignore_missing_column_families;
+ bool ignore_missing_column_families = false;
// If true and we need to wait or sleep for the write request, fails
// immediately with Status::Incomplete().
// Default: false
- bool no_slowdown;
+ bool no_slowdown = false;
// If true, this write request is of lower priority if compaction is
// behind. In this case, no_slowdown = true, the request will be canceled
@@ -1809,7 +1809,7 @@ struct WriteOptions {
// it introduces minimum impacts to high priority writes.
//
// Default: false
- bool low_pri;
+ bool low_pri = false;
// If true, this writebatch will maintain the last insert positions of each
// memtable as hints in concurrent write. It can improve write performance
@@ -1818,7 +1818,7 @@ struct WriteOptions {
// option will be ignored.
//
// Default: false
- bool memtable_insert_hint_per_batch;
+ bool memtable_insert_hint_per_batch = false;
// For writes associated with this option, charge the internal rate
// limiter (see `DBOptions::rate_limiter`) at the specified priority. The
@@ -1833,24 +1833,25 @@ struct WriteOptions {
// due to implementation constraints.
//
// Default: `Env::IO_TOTAL`
- Env::IOPriority rate_limiter_priority;
+ Env::IOPriority rate_limiter_priority = Env::IO_TOTAL;
// `protection_bytes_per_key` is the number of bytes used to store
// protection information for each key entry. Currently supported values are
// zero (disabled) and eight.
//
// Default: zero (disabled).
- size_t protection_bytes_per_key;
-
- WriteOptions()
- : sync(false),
- disableWAL(false),
- ignore_missing_column_families(false),
- no_slowdown(false),
- low_pri(false),
- memtable_insert_hint_per_batch(false),
- rate_limiter_priority(Env::IO_TOTAL),
- protection_bytes_per_key(0) {}
+ size_t protection_bytes_per_key = 0;
+
+ // For RocksDB internal use only
+ //
+ // Default: Env::IOActivity::kUnknown.
+ Env::IOActivity io_activity = Env::IOActivity::kUnknown;
+
+ WriteOptions() {}
+ explicit WriteOptions(Env::IOActivity _io_activity);
+ explicit WriteOptions(
+ Env::IOPriority _rate_limiter_priority,
+ Env::IOActivity _io_activity = Env::IOActivity::kUnknown);
};
// Options that control flush operations
diff --git a/include/rocksdb/sst_file_reader.h b/include/rocksdb/sst_file_reader.h
index 026ae66d0..dca5a8f03 100644
--- a/include/rocksdb/sst_file_reader.h
+++ b/include/rocksdb/sst_file_reader.h
@@ -34,6 +34,7 @@ class SstFileReader {
// Verifies whether there is corruption in this table.
Status VerifyChecksum(const ReadOptions& /*read_options*/);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
Status VerifyChecksum() { return VerifyChecksum(ReadOptions()); }
private:
@@ -42,4 +43,3 @@ class SstFileReader {
};
} // namespace ROCKSDB_NAMESPACE
-
diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h
index 9aab33712..1853e73d5 100644
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@@ -589,6 +589,14 @@ enum Histograms : uint32_t {
FILE_READ_VERIFY_DB_CHECKSUM_MICROS,
FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS,
+ // Time spent in writing SST files
+ SST_WRITE_MICROS,
+ // Time spent in writing SST table (currently only block-based table) or blob
+ // file for flush, compaction or db open
+ FILE_WRITE_FLUSH_MICROS,
+ FILE_WRITE_COMPACTION_MICROS,
+ FILE_WRITE_DB_OPEN_MICROS,
+
// The number of subcompactions actually scheduled during a compaction
NUM_SUBCOMPACTIONS_SCHEDULED,
// Value size distribution in each operation
diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h
index 3edff81aa..38e305f32 100644
--- a/java/rocksjni/portal.h
+++ b/java/rocksjni/portal.h
@@ -5716,10 +5716,17 @@ class HistogramTypeJni {
case ROCKSDB_NAMESPACE::Histograms::
FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS:
return 0x41;
+ case ROCKSDB_NAMESPACE::Histograms::SST_WRITE_MICROS:
+ return 0x42;
+ case ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_FLUSH_MICROS:
+ return 0x43;
+ case ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_COMPACTION_MICROS:
+ return 0x44;
+ case ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_DB_OPEN_MICROS:
+ return 0x45;
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
// 0x1F for backwards compatibility on current minor version.
return 0x1F;
-
default:
// undefined/default
return 0x0;
@@ -5853,6 +5860,14 @@ class HistogramTypeJni {
case 0x41:
return ROCKSDB_NAMESPACE::Histograms::
FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS;
+ case 0x42:
+ return ROCKSDB_NAMESPACE::Histograms::SST_WRITE_MICROS;
+ case 0x43:
+ return ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_FLUSH_MICROS;
+ case 0x44:
+ return ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_COMPACTION_MICROS;
+ case 0x45:
+ return ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_DB_OPEN_MICROS;
case 0x1F:
// 0x1F for backwards compatibility on current minor version.
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;
diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/rocksdb/HistogramType.java
index 41fe241ad..aff5cad14 100644
--- a/java/src/main/java/org/rocksdb/HistogramType.java
+++ b/java/src/main/java/org/rocksdb/HistogramType.java
@@ -185,6 +185,14 @@ public enum HistogramType {
FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS((byte) 0x41),
+ SST_WRITE_MICROS((byte) 0x42),
+
+ FILE_WRITE_FLUSH_MICROS((byte) 0x43),
+
+ FILE_WRITE_COMPACTION_MICROS((byte) 0x44),
+
+ FILE_WRITE_DB_OPEN_MICROS((byte) 0x45),
+
// 0x1F for backwards compatibility on current minor version.
HISTOGRAM_ENUM_MAX((byte) 0x1F);
diff --git a/logging/env_logger.h b/logging/env_logger.h
index fc9b24550..b236dc817 100644
--- a/logging/env_logger.h
+++ b/logging/env_logger.h
@@ -75,7 +75,7 @@ class EnvLogger : public Logger {
mutex_.AssertHeld();
if (flush_pending_) {
flush_pending_ = false;
- file_.Flush().PermitUncheckedError();
+ file_.Flush(IOOptions()).PermitUncheckedError();
file_.reset_seen_error();
}
last_flush_micros_ = clock_->NowMicros();
@@ -93,7 +93,7 @@ class EnvLogger : public Logger {
Status CloseHelper() {
FileOpGuard guard(*this);
- const auto close_status = file_.Close();
+ const auto close_status = file_.Close(IOOptions());
if (close_status.ok()) {
return close_status;
@@ -162,7 +162,7 @@ class EnvLogger : public Logger {
{
FileOpGuard guard(*this);
// We will ignore any error returned by Append().
- file_.Append(Slice(base, p - base)).PermitUncheckedError();
+ file_.Append(IOOptions(), Slice(base, p - base)).PermitUncheckedError();
file_.reset_seen_error();
flush_pending_ = true;
const uint64_t now_micros = clock_->NowMicros();
diff --git a/monitoring/persistent_stats_history.cc b/monitoring/persistent_stats_history.cc
index 964fe536f..8c077c55f 100644
--- a/monitoring/persistent_stats_history.cc
+++ b/monitoring/persistent_stats_history.cc
@@ -41,6 +41,8 @@ Status DecodePersistentStatsVersionNumber(DBImpl* db, StatsVersionKeyType type,
} else if (type == StatsVersionKeyType::kCompatibleVersion) {
key = kCompatibleVersionKeyString;
}
+
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions options;
options.verify_checksums = true;
std::string result;
@@ -122,6 +124,7 @@ void PersistentStatsHistoryIterator::AdvanceIteratorByTime(uint64_t start_time,
uint64_t end_time) {
// try to find next entry in stats_history_ map
if (db_impl_ != nullptr) {
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions ro;
Iterator* iter =
db_impl_->NewIterator(ro, db_impl_->PersistentStatsColumnFamily());
diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc
index cc679ec0a..072083865 100644
--- a/monitoring/statistics.cc
+++ b/monitoring/statistics.cc
@@ -303,6 +303,10 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
"rocksdb.file.read.verify.db.checksum.micros"},
{FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS,
"rocksdb.file.read.verify.file.checksums.micros"},
+ {SST_WRITE_MICROS, "rocksdb.sst.write.micros"},
+ {FILE_WRITE_FLUSH_MICROS, "rocksdb.file.write.flush.micros"},
+ {FILE_WRITE_COMPACTION_MICROS, "rocksdb.file.write.compaction.micros"},
+ {FILE_WRITE_DB_OPEN_MICROS, "rocksdb.file.write.db.open.micros"},
{NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"},
{BYTES_PER_READ, "rocksdb.bytes.per.read"},
{BYTES_PER_WRITE, "rocksdb.bytes.per.write"},
diff --git a/options/options.cc b/options/options.cc
index d96cf4072..2e7c02503 100644
--- a/options/options.cc
+++ b/options/options.cc
@@ -703,4 +703,11 @@ ReadOptions::ReadOptions(bool _verify_checksums, bool _fill_cache)
ReadOptions::ReadOptions(Env::IOActivity _io_activity)
: io_activity(_io_activity) {}
+WriteOptions::WriteOptions(Env::IOActivity _io_activity)
+ : io_activity(_io_activity) {}
+
+WriteOptions::WriteOptions(Env::IOPriority _rate_limiter_priority,
+ Env::IOActivity _io_activity)
+ : rate_limiter_priority(_rate_limiter_priority),
+ io_activity(_io_activity) {}
} // namespace ROCKSDB_NAMESPACE
diff --git a/options/options_parser.cc b/options/options_parser.cc
index e2431016d..ec32f7644 100644
--- a/options/options_parser.cc
+++ b/options/options_parser.cc
@@ -35,7 +35,8 @@ static const std::string option_file_header =
"#\n"
"\n";
-Status PersistRocksDBOptions(const DBOptions& db_opt,
+Status PersistRocksDBOptions(const WriteOptions& write_options,
+ const DBOptions& db_opt,
const std::vector<std::string>& cf_names,
const std::vector<ColumnFamilyOptions>& cf_opts,
const std::string& file_name, FileSystem* fs) {
@@ -48,11 +49,12 @@ Status PersistRocksDBOptions(const DBOptions& db_opt,
if (db_opt.log_readahead_size > 0) {
config_options.file_readahead_size = db_opt.log_readahead_size;
}
- return PersistRocksDBOptions(config_options, db_opt, cf_names, cf_opts,
- file_name, fs);
+ return PersistRocksDBOptions(write_options, config_options, db_opt, cf_names,
+ cf_opts, file_name, fs);
}
-Status PersistRocksDBOptions(const ConfigOptions& config_options_in,
+Status PersistRocksDBOptions(const WriteOptions& write_options,
+ const ConfigOptions& config_options_in,
const DBOptions& db_opt,
const std::vector<std::string>& cf_names,
const std::vector<ColumnFamilyOptions>& cf_opts,
@@ -79,62 +81,70 @@ Status PersistRocksDBOptions(const ConfigOptions& config_options_in,
std::string options_file_content;
- s = writable->Append(
- option_file_header + "[" + opt_section_titles[kOptionSectionVersion] +
- "]\n"
- " rocksdb_version=" +
- std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR) +
- "." + std::to_string(ROCKSDB_PATCH) + "\n");
+ IOOptions opts;
+ s = WritableFileWriter::PrepareIOOptions(write_options, opts);
+ if (s.ok()) {
+ s = writable->Append(opts, option_file_header + "[" +
+ opt_section_titles[kOptionSectionVersion] +
+ "]\n"
+ " rocksdb_version=" +
+ std::to_string(ROCKSDB_MAJOR) + "." +
+ std::to_string(ROCKSDB_MINOR) + "." +
+ std::to_string(ROCKSDB_PATCH) + "\n");
+ }
if (s.ok()) {
s = writable->Append(
+ opts,
" options_file_version=" + std::to_string(ROCKSDB_OPTION_FILE_MAJOR) +
- "." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n");
+ "." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n");
}
if (s.ok()) {
- s = writable->Append("\n[" + opt_section_titles[kOptionSectionDBOptions] +
- "]\n ");
+ s = writable->Append(
+ opts, "\n[" + opt_section_titles[kOptionSectionDBOptions] + "]\n ");
}
if (s.ok()) {
s = GetStringFromDBOptions(config_options, db_opt, &options_file_content);
}
if (s.ok()) {
- s = writable->Append(options_file_content + "\n");
+ s = writable->Append(opts, options_file_content + "\n");
}
for (size_t i = 0; s.ok() && i < cf_opts.size(); ++i) {
// CFOptions section
- s = writable->Append("\n[" + opt_section_titles[kOptionSectionCFOptions] +
- " \"" + EscapeOptionString(cf_names[i]) + "\"]\n ");
+ s = writable->Append(
+ opts, "\n[" + opt_section_titles[kOptionSectionCFOptions] + " \"" +
+ EscapeOptionString(cf_names[i]) + "\"]\n ");
if (s.ok()) {
s = GetStringFromColumnFamilyOptions(config_options, cf_opts[i],
&options_file_content);
}
if (s.ok()) {
- s = writable->Append(options_file_content + "\n");
+ s = writable->Append(opts, options_file_content + "\n");
}
// TableOptions section
auto* tf = cf_opts[i].table_factory.get();
if (tf != nullptr) {
if (s.ok()) {
s = writable->Append(
- "[" + opt_section_titles[kOptionSectionTableOptions] + tf->Name() +
- " \"" + EscapeOptionString(cf_names[i]) + "\"]\n ");
+ opts, "[" + opt_section_titles[kOptionSectionTableOptions] +
+ tf->Name() + " \"" + EscapeOptionString(cf_names[i]) +
+ "\"]\n ");
}
if (s.ok()) {
options_file_content.clear();
s = tf->GetOptionString(config_options, &options_file_content);
}
if (s.ok()) {
- s = writable->Append(options_file_content + "\n");
+ s = writable->Append(opts, options_file_content + "\n");
}
}
}
if (s.ok()) {
- s = writable->Sync(true /* use_fsync */);
+ s = writable->Sync(opts, true /* use_fsync */);
}
if (s.ok()) {
- s = writable->Close();
+ s = writable->Close(opts);
}
TEST_SYNC_POINT("PersistRocksDBOptions:written");
if (s.ok()) {
@@ -733,4 +743,3 @@ Status RocksDBOptionsParser::VerifyTableFactory(
return Status::OK();
}
} // namespace ROCKSDB_NAMESPACE
-
diff --git a/options/options_parser.h b/options/options_parser.h
index 4268051f3..e702c9f49 100644
--- a/options/options_parser.h
+++ b/options/options_parser.h
@@ -32,11 +32,13 @@ enum OptionSection : char {
static const std::string opt_section_titles[] = {
"Version", "DBOptions", "CFOptions", "TableOptions/", "Unknown"};
-Status PersistRocksDBOptions(const DBOptions& db_opt,
+Status PersistRocksDBOptions(const WriteOptions& write_options,
+ const DBOptions& db_opt,
const std::vector<std::string>& cf_names,
const std::vector<ColumnFamilyOptions>& cf_opts,
const std::string& file_name, FileSystem* fs);
-Status PersistRocksDBOptions(const ConfigOptions& config_options,
+Status PersistRocksDBOptions(const WriteOptions& write_options,
+ const ConfigOptions& config_options,
const DBOptions& db_opt,
const std::vector<std::string>& cf_names,
const std::vector<ColumnFamilyOptions>& cf_opts,
diff --git a/options/options_test.cc b/options/options_test.cc
index 6420ebf46..8fa39fa2f 100644
--- a/options/options_test.cc
+++ b/options/options_test.cc
@@ -3672,8 +3672,8 @@ TEST_F(OptionsParserTest, Readahead) {
std::vector<std::string> cf_names = {"default", one_mb_string};
const std::string kOptionsFileName = "test-persisted-options.ini";
- ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts,
- kOptionsFileName, fs_.get()));
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), base_db_opt, cf_names,
+ base_cf_opts, kOptionsFileName, fs_.get()));
uint64_t file_size = 0;
ASSERT_OK(
@@ -3747,8 +3747,8 @@ TEST_F(OptionsParserTest, DumpAndParse) {
const std::string kOptionsFileName = "test-persisted-options.ini";
// Use default for escaped(true), unknown(false) and check (exact)
ConfigOptions config_options;
- ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts,
- kOptionsFileName, fs_.get()));
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), base_db_opt, cf_names,
+ base_cf_opts, kOptionsFileName, fs_.get()));
RocksDBOptionsParser parser;
ASSERT_OK(parser.Parse(config_options, kOptionsFileName, fs_.get()));
@@ -3808,9 +3808,9 @@ TEST_F(OptionsParserTest, DifferentDefault) {
ColumnFamilyOptions cf_univ_opts;
cf_univ_opts.OptimizeUniversalStyleCompaction();
- ASSERT_OK(PersistRocksDBOptions(DBOptions(), {"default", "universal"},
- {cf_level_opts, cf_univ_opts},
- kOptionsFileName, fs_.get()));
+ ASSERT_OK(PersistRocksDBOptions(
+ WriteOptions(), DBOptions(), {"default", "universal"},
+ {cf_level_opts, cf_univ_opts}, kOptionsFileName, fs_.get()));
RocksDBOptionsParser parser;
ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false,
@@ -3953,8 +3953,8 @@ class OptionsSanityCheckTest : public OptionsParserTest,
if (!s.ok()) {
return s;
}
- return PersistRocksDBOptions(db_opts, {"default"}, {cf_opts},
- kOptionsFileName, fs_.get());
+ return PersistRocksDBOptions(WriteOptions(), db_opts, {"default"},
+ {cf_opts}, kOptionsFileName, fs_.get());
}
Status PersistCFOptions(const ColumnFamilyOptions& cf_opts) {
diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc
index 7b8bd0275..6bd28804c 100644
--- a/table/block_based/block_based_table_builder.cc
+++ b/table/block_based/block_based_table_builder.cc
@@ -264,6 +264,7 @@ struct BlockBasedTableBuilder::Rep {
// BEGIN from MutableCFOptions
std::shared_ptr<const SliceTransform> prefix_extractor;
// END from MutableCFOptions
+ const WriteOptions write_options;
const BlockBasedTableOptions table_options;
const InternalKeyComparator& internal_comparator;
// Size in bytes for the user-defined timestamps.
@@ -439,6 +440,7 @@ struct BlockBasedTableBuilder::Rep {
WritableFileWriter* f)
: ioptions(tbo.ioptions),
prefix_extractor(tbo.moptions.prefix_extractor),
+ write_options(tbo.write_options),
table_options(table_opt),
internal_comparator(tbo.internal_comparator),
ts_sz(tbo.internal_comparator.user_comparator()->timestamp_size()),
@@ -1317,6 +1319,13 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
// checksum: uint32
Rep* r = rep_;
bool is_data_block = block_type == BlockType::kData;
+ IOOptions io_options;
+ IOStatus io_s =
+ WritableFileWriter::PrepareIOOptions(r->write_options, io_options);
+ if (!io_s.ok()) {
+ r->SetIOStatus(io_s);
+ return;
+ }
// Old, misleading name of this function: WriteRawBlock
StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS);
const uint64_t offset = r->get_offset();
@@ -1330,7 +1339,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
}
{
- IOStatus io_s = r->file->Append(block_contents);
+ io_s = r->file->Append(io_options, block_contents);
if (!io_s.ok()) {
r->SetIOStatus(io_s);
return;
@@ -1357,7 +1366,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
"BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum",
trailer.data());
{
- IOStatus io_s = r->file->Append(Slice(trailer.data(), trailer.size()));
+ io_s = r->file->Append(io_options, Slice(trailer.data(), trailer.size()));
if (!io_s.ok()) {
r->SetIOStatus(io_s);
return;
@@ -1394,7 +1403,8 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
(r->alignment -
((block_contents.size() + kBlockTrailerSize) & (r->alignment - 1))) &
(r->alignment - 1);
- IOStatus io_s = r->file->Pad(pad_bytes);
+
+ io_s = r->file->Pad(io_options, pad_bytes);
if (io_s.ok()) {
r->set_offset(r->get_offset() + pad_bytes);
} else {
@@ -1800,7 +1810,14 @@ void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle,
r->SetStatus(s);
return;
}
- IOStatus ios = r->file->Append(footer.GetSlice());
+ IOOptions io_options;
+ IOStatus ios =
+ WritableFileWriter::PrepareIOOptions(r->write_options, io_options);
+ if (!ios.ok()) {
+ r->SetIOStatus(ios);
+ return;
+ }
+ ios = r->file->Append(io_options, footer.GetSlice());
if (ios.ok()) {
r->set_offset(r->get_offset() + footer.GetSlice().size());
} else {
diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc
index a184264df..4de9eba23 100644
--- a/table/block_based/block_based_table_reader.cc
+++ b/table/block_based/block_based_table_reader.cc
@@ -2922,7 +2922,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
"--------------------------------------\n";
std::unique_ptr<Block> metaindex;
std::unique_ptr<InternalIterator> metaindex_iter;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions ro;
Status s = ReadMetaIndexBlock(ro, nullptr /* prefetch_buffer */, &metaindex,
&metaindex_iter);
@@ -3027,7 +3027,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) {
out_stream << "Index Details:\n"
"--------------------------------------\n";
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
NewIndexIterator(read_options, /*need_upper_bound_check=*/false,
@@ -3078,7 +3078,7 @@ Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) {
}
Status BlockBasedTable::DumpDataBlocks(std::ostream& out_stream) {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
std::unique_ptr<InternalIteratorBase<IndexValue>> blockhandles_iter(
NewIndexIterator(read_options, /*need_upper_bound_check=*/false,
diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc
index 254546893..7255fae7e 100644
--- a/table/block_based/block_based_table_reader_test.cc
+++ b/table/block_based/block_based_table_reader_test.cc
@@ -19,6 +19,7 @@
#include "rocksdb/compression_type.h"
#include "rocksdb/db.h"
#include "rocksdb/file_system.h"
+#include "rocksdb/options.h"
#include "table/block_based/block_based_table_builder.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/block_based/partitioned_index_iterator.h"
@@ -133,11 +134,13 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
compression_opts.max_dict_bytes = compression_dict_bytes;
compression_opts.max_dict_buffer_bytes = compression_dict_bytes;
IntTblPropCollectorFactories factories;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> table_builder(
options_.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, comparator, &factories,
- compression_type, compression_opts,
- 0 /* column_family_id */,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ comparator, &factories, compression_type,
+ compression_opts, 0 /* column_family_id */,
kDefaultColumnFamilyName, -1 /* level */),
writer.get()));
diff --git a/table/block_based/data_block_hash_index_test.cc b/table/block_based/data_block_hash_index_test.cc
index 2841b271d..b4ccfce44 100644
--- a/table/block_based/data_block_hash_index_test.cc
+++ b/table/block_based/data_block_hash_index_test.cc
@@ -553,9 +553,11 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2,
std::unique_ptr<TableBuilder> builder;
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
std::string column_family_name;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
builder.reset(ioptions.table_factory->NewTableBuilder(
TableBuilderOptions(
- ioptions, moptions, internal_comparator,
+ ioptions, moptions, read_options, write_options, internal_comparator,
&int_tbl_prop_collector_factories, options.compression,
CompressionOptions(),
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
@@ -567,7 +569,7 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2,
EXPECT_TRUE(builder->status().ok());
Status s = builder->Finish();
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Flush(IOOptions()));
EXPECT_TRUE(s.ok()) << s.ToString();
EXPECT_EQ(sink->contents().size(), builder->FileSize());
diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc
index 61e444e92..95a025574 100644
--- a/table/block_fetcher_test.cc
+++ b/table/block_fetcher_test.cc
@@ -77,11 +77,13 @@ class BlockFetcherTest : public testing::Test {
ColumnFamilyOptions cf_options(options_);
MutableCFOptions moptions(cf_options);
IntTblPropCollectorFactories factories;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> table_builder(table_factory_.NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, comparator, &factories,
- compression_type, CompressionOptions(),
- 0 /* column_family_id */, kDefaultColumnFamilyName,
- -1 /* level */),
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ comparator, &factories, compression_type,
+ CompressionOptions(), 0 /* column_family_id */,
+ kDefaultColumnFamilyName, -1 /* level */),
writer.get()));
// Build table.
diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc
index b0596edac..16e7f46e8 100644
--- a/table/cuckoo/cuckoo_table_builder.cc
+++ b/table/cuckoo/cuckoo_table_builder.cc
@@ -318,15 +318,16 @@ Status CuckooTableBuilder::Finish() {
unused_bucket.resize(static_cast<size_t>(bucket_size), 'a');
// Write the table.
uint32_t num_added = 0;
+ const IOOptions opts;
for (auto& bucket : buckets) {
if (bucket.vector_idx == kMaxVectorIdx) {
- io_status_ = file_->Append(Slice(unused_bucket));
+ io_status_ = file_->Append(opts, Slice(unused_bucket));
} else {
++num_added;
- io_status_ = file_->Append(GetKey(bucket.vector_idx));
+ io_status_ = file_->Append(opts, GetKey(bucket.vector_idx));
if (io_status_.ok()) {
if (value_size_ > 0) {
- io_status_ = file_->Append(GetValue(bucket.vector_idx));
+ io_status_ = file_->Append(opts, GetValue(bucket.vector_idx));
}
}
}
@@ -382,7 +383,7 @@ Status CuckooTableBuilder::Finish() {
BlockHandle property_block_handle;
property_block_handle.set_offset(offset);
property_block_handle.set_size(property_block.size());
- io_status_ = file_->Append(property_block);
+ io_status_ = file_->Append(opts, property_block);
offset += property_block.size();
if (!io_status_.ok()) {
status_ = io_status_;
@@ -395,7 +396,7 @@ Status CuckooTableBuilder::Finish() {
BlockHandle meta_index_block_handle;
meta_index_block_handle.set_offset(offset);
meta_index_block_handle.set_size(meta_index_block.size());
- io_status_ = file_->Append(meta_index_block);
+ io_status_ = file_->Append(opts, meta_index_block);
if (!io_status_.ok()) {
status_ = io_status_;
return status_;
@@ -408,7 +409,7 @@ Status CuckooTableBuilder::Finish() {
status_ = s;
return status_;
}
- io_status_ = file_->Append(footer.GetSlice());
+ io_status_ = file_->Append(opts, footer.GetSlice());
status_ = io_status_;
return status_;
}
diff --git a/table/cuckoo/cuckoo_table_builder_test.cc b/table/cuckoo/cuckoo_table_builder_test.cc
index 1a0d58c76..967e8e2db 100644
--- a/table/cuckoo/cuckoo_table_builder_test.cc
+++ b/table/cuckoo/cuckoo_table_builder_test.cc
@@ -182,7 +182,7 @@ TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) {
ASSERT_OK(builder.status());
ASSERT_EQ(0UL, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
CheckFileContents({}, {}, {}, "", 2, 2, false);
}
@@ -229,7 +229,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true);
@@ -277,7 +277,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true);
@@ -325,7 +325,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true);
@@ -374,7 +374,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) {
size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true);
@@ -420,7 +420,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true);
@@ -463,7 +463,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
size_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = "key00";
@@ -507,7 +507,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
size_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = "key00";
@@ -550,7 +550,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) {
size_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = "key00";
@@ -589,7 +589,7 @@ TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
ASSERT_OK(builder.status());
}
ASSERT_TRUE(builder.Finish().IsNotSupported());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
}
TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
@@ -619,7 +619,7 @@ TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
ASSERT_OK(builder.status());
ASSERT_TRUE(builder.Finish().IsNotSupported());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
}
} // namespace ROCKSDB_NAMESPACE
diff --git a/table/cuckoo/cuckoo_table_reader.cc b/table/cuckoo/cuckoo_table_reader.cc
index 54ae6266e..d74a0b041 100644
--- a/table/cuckoo/cuckoo_table_reader.cc
+++ b/table/cuckoo/cuckoo_table_reader.cc
@@ -59,7 +59,7 @@ CuckooTableReader::CuckooTableReader(
}
{
std::unique_ptr<TableProperties> props;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
status_ =
ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber,
diff --git a/table/cuckoo/cuckoo_table_reader_test.cc b/table/cuckoo/cuckoo_table_reader_test.cc
index d829b3630..25e2c1bca 100644
--- a/table/cuckoo/cuckoo_table_reader_test.cc
+++ b/table/cuckoo/cuckoo_table_reader_test.cc
@@ -104,7 +104,7 @@ class CuckooReaderTest : public testing::Test {
ASSERT_OK(builder.Finish());
ASSERT_EQ(num_items, builder.NumEntries());
file_size = builder.FileSize();
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
// Check reader now.
std::unique_ptr<RandomAccessFileReader> file_reader;
@@ -431,7 +431,7 @@ void WriteFile(const std::vector<std::string>& keys, const uint64_t num,
}
ASSERT_OK(builder.Finish());
ASSERT_EQ(num, builder.NumEntries());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
uint64_t file_size;
ASSERT_OK(
@@ -571,4 +571,3 @@ int main(int argc, char** argv) {
}
#endif // GFLAGS.
-
diff --git a/table/mock_table.cc b/table/mock_table.cc
index 1971c00fc..fe3bd854c 100644
--- a/table/mock_table.cc
+++ b/table/mock_table.cc
@@ -298,7 +298,7 @@ Status MockTableFactory::GetAndWriteNextID(WritableFileWriter* file,
*next_id = next_id_.fetch_add(1);
char buf[4];
EncodeFixed32(buf, *next_id);
- return file->Append(Slice(buf, 4));
+ return file->Append(IOOptions(), Slice(buf, 4));
}
Status MockTableFactory::GetIDFromFile(RandomAccessFileReader* file,
diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc
index 1e61773d6..32f53be49 100644
--- a/table/plain/plain_table_builder.cc
+++ b/table/plain/plain_table_builder.cc
@@ -39,7 +39,7 @@ IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file,
uint64_t* offset, BlockHandle* block_handle) {
block_handle->set_offset(*offset);
block_handle->set_size(block_contents.size());
- IOStatus io_s = file->Append(block_contents);
+ IOStatus io_s = file->Append(IOOptions(), block_contents);
if (io_s.ok()) {
*offset += block_contents.size();
@@ -138,6 +138,7 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
// temp buffer for metadata bytes between key and value.
char meta_bytes_buf[6];
size_t meta_bytes_buf_size = 0;
+ const IOOptions opts;
ParsedInternalKey internal_key;
if (!ParseInternalKey(key, &internal_key, false /* log_err_key */)
@@ -178,12 +179,13 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size);
assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf));
meta_bytes_buf_size = end_ptr - meta_bytes_buf;
- io_status_ = file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size));
+ io_status_ =
+ file_->Append(opts, Slice(meta_bytes_buf, meta_bytes_buf_size));
}
// Write value
if (io_status_.ok()) {
- io_status_ = file_->Append(value);
+ io_status_ = file_->Append(opts, value);
offset_ += value_size + meta_bytes_buf_size;
}
@@ -306,7 +308,7 @@ Status PlainTableBuilder::Finish() {
status_ = s;
return status_;
}
- io_status_ = file_->Append(footer.GetSlice());
+ io_status_ = file_->Append(IOOptions(), footer.GetSlice());
if (io_status_.ok()) {
offset_ += footer.GetSlice().size();
}
diff --git a/table/plain/plain_table_key_coding.cc b/table/plain/plain_table_key_coding.cc
index 0ac423191..102a16a6b 100644
--- a/table/plain/plain_table_key_coding.cc
+++ b/table/plain/plain_table_key_coding.cc
@@ -94,6 +94,8 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
Slice key_to_write = key; // Portion of internal key to write out.
uint32_t user_key_size = static_cast<uint32_t>(key.size() - 8);
+ const IOOptions opts;
+
if (encoding_type_ == kPlain) {
if (fixed_user_key_len_ == kPlainTableVariableLength) {
// Write key length
@@ -101,7 +103,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
char* ptr = EncodeVarint32(key_size_buf, user_key_size);
assert(ptr <= key_size_buf + sizeof(key_size_buf));
auto len = ptr - key_size_buf;
- IOStatus io_s = file->Append(Slice(key_size_buf, len));
+ IOStatus io_s = file->Append(opts, Slice(key_size_buf, len));
if (!io_s.ok()) {
return io_s;
}
@@ -119,7 +121,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
key_count_for_prefix_ = 1;
pre_prefix_.SetUserKey(prefix);
size_bytes_pos += EncodeSize(kFullKey, user_key_size, size_bytes);
- IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos));
+ IOStatus io_s = file->Append(opts, Slice(size_bytes, size_bytes_pos));
if (!io_s.ok()) {
return io_s;
}
@@ -137,7 +139,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
static_cast<uint32_t>(pre_prefix_.GetUserKey().size());
size_bytes_pos += EncodeSize(kKeySuffix, user_key_size - prefix_len,
size_bytes + size_bytes_pos);
- IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos));
+ IOStatus io_s = file->Append(opts, Slice(size_bytes, size_bytes_pos));
if (!io_s.ok()) {
return io_s;
}
@@ -152,7 +154,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
// in this buffer to safe one file append call, which takes 1 byte.
if (parsed_key.sequence == 0 && parsed_key.type == kTypeValue) {
IOStatus io_s =
- file->Append(Slice(key_to_write.data(), key_to_write.size() - 8));
+ file->Append(opts, Slice(key_to_write.data(), key_to_write.size() - 8));
if (!io_s.ok()) {
return io_s;
}
@@ -160,7 +162,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key,
meta_bytes_buf[*meta_bytes_buf_size] = PlainTableFactory::kValueTypeSeqId0;
*meta_bytes_buf_size += 1;
} else {
- IOStatus io_s = file->Append(key_to_write);
+ IOStatus io_s = file->Append(opts, key_to_write);
if (!io_s.ok()) {
return io_s;
}
diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc
index b917fce34..89b1853ce 100644
--- a/table/plain/plain_table_reader.cc
+++ b/table/plain/plain_table_reader.cc
@@ -126,7 +126,7 @@ Status PlainTableReader::Open(
}
std::unique_ptr<TableProperties> props;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
ioptions, read_options, &props);
@@ -300,7 +300,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props,
BlockContents index_block_contents;
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
Status s =
ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc
index 821fff5b3..3972cdfb3 100644
--- a/table/sst_file_dumper.cc
+++ b/table/sst_file_dumper.cc
@@ -58,6 +58,7 @@ SstFileDumper::SstFileDumper(const Options& options,
options_(options),
ioptions_(options_),
moptions_(ColumnFamilyOptions(options_)),
+ // TODO: plumb Env::IOActivity, Env::IOPriority
read_options_(verify_checksum, false),
internal_comparator_(BytewiseComparator()) {
read_options_.readahead_size = readahead_size;
@@ -303,14 +304,18 @@ Status SstFileDumper::ShowCompressionSize(
const ImmutableOptions imoptions(opts);
const ColumnFamilyOptions cfo(opts);
const MutableCFOptions moptions(cfo);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ const ReadOptions read_options;
+ const WriteOptions write_options;
ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator);
IntTblPropCollectorFactories block_based_table_factories;
std::string column_family_name;
int unknown_level = -1;
+
TableBuilderOptions tb_opts(
- imoptions, moptions, ikc, &block_based_table_factories, compress_type,
- compress_opt,
+ imoptions, moptions, read_options, write_options, ikc,
+ &block_based_table_factories, compress_type, compress_opt,
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
column_family_name, unknown_level);
uint64_t num_data_blocks = 0;
@@ -375,10 +380,8 @@ Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number,
RandomAccessFileReader* file,
uint64_t file_size,
FilePrefetchBuffer* prefetch_buffer) {
- // TODO: plumb Env::IOActivity
- const ReadOptions read_options;
Status s = ROCKSDB_NAMESPACE::ReadTableProperties(
- file, file_size, table_magic_number, ioptions_, read_options,
+ file, file_size, table_magic_number, ioptions_, read_options_,
&table_properties_,
/* memory_allocator= */ nullptr, prefetch_buffer);
if (!s.ok()) {
diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc
index 1ef0f98aa..2d63bad3a 100644
--- a/table/sst_file_writer.cc
+++ b/table/sst_file_writer.cc
@@ -41,7 +41,11 @@ struct SstFileWriter::Rep {
cfh(_cfh),
invalidate_page_cache(_invalidate_page_cache),
skip_filters(_skip_filters),
- db_session_id(_db_session_id) {}
+ db_session_id(_db_session_id) {
+ // TODO (hx235): pass in `WriteOptions` instead of `rate_limiter_priority`
+ // during construction
+ write_options.rate_limiter_priority = io_priority;
+ }
std::unique_ptr<WritableFileWriter> file_writer;
std::unique_ptr<TableBuilder> builder;
@@ -49,6 +53,7 @@ struct SstFileWriter::Rep {
ImmutableOptions ioptions;
MutableCFOptions mutable_cf_options;
Env::IOPriority io_priority;
+ WriteOptions write_options;
InternalKeyComparator internal_comparator;
ExternalSstFileInfo file_info;
InternalKey ikey;
@@ -343,13 +348,15 @@ Status SstFileWriter::Open(const std::string& file_path) {
// TODO: it would be better to set oldest_key_time to be used for getting the
// approximate time of ingested keys.
+ // TODO: plumb Env::IOActivity, Env::IOPriority
TableBuilderOptions table_builder_options(
- r->ioptions, r->mutable_cf_options, r->internal_comparator,
- &int_tbl_prop_collector_factories, compression_type, compression_opts,
- cf_id, r->column_family_name, unknown_level, false /* is_bottommost */,
- TableFileCreationReason::kMisc, 0 /* oldest_key_time */,
- 0 /* file_creation_time */, "SST Writer" /* db_id */, r->db_session_id,
- 0 /* target_file_size */, r->next_file_number);
+ r->ioptions, r->mutable_cf_options, ReadOptions(), r->write_options,
+ r->internal_comparator, &int_tbl_prop_collector_factories,
+ compression_type, compression_opts, cf_id, r->column_family_name,
+ unknown_level, false /* is_bottommost */, TableFileCreationReason::kMisc,
+ 0 /* oldest_key_time */, 0 /* file_creation_time */,
+ "SST Writer" /* db_id */, r->db_session_id, 0 /* target_file_size */,
+ r->next_file_number);
// External SST files used to each get a unique session id. Now for
// slightly better uniqueness probability in constructing cache keys, we
// assign fake file numbers to each file (into table properties) and keep
@@ -361,8 +368,8 @@ Status SstFileWriter::Open(const std::string& file_path) {
FileTypeSet tmp_set = r->ioptions.checksum_handoff_file_types;
r->file_writer.reset(new WritableFileWriter(
std::move(sst_file), file_path, r->env_options, r->ioptions.clock,
- nullptr /* io_tracer */, nullptr /* stats */, r->ioptions.listeners,
- r->ioptions.file_checksum_gen_factory.get(),
+ nullptr /* io_tracer */, r->ioptions.stats, Histograms::SST_WRITE_MICROS,
+ r->ioptions.listeners, r->ioptions.file_checksum_gen_factory.get(),
tmp_set.Contains(FileType::kTableFile), false));
// TODO(tec) : If table_factory is using compressed block cache, we will
@@ -430,11 +437,13 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
Status s = r->builder->Finish();
r->file_info.file_size = r->builder->FileSize();
+ IOOptions opts;
+ s = WritableFileWriter::PrepareIOOptions(r->write_options, opts);
if (s.ok()) {
- s = r->file_writer->Sync(r->ioptions.use_fsync);
+ s = r->file_writer->Sync(opts, r->ioptions.use_fsync);
r->InvalidatePageCache(true /* closing */).PermitUncheckedError();
if (s.ok()) {
- s = r->file_writer->Close();
+ s = r->file_writer->Close(opts);
}
}
if (s.ok()) {
diff --git a/table/table_builder.h b/table/table_builder.h
index d6f0e1a03..c01d03cb2 100644
--- a/table/table_builder.h
+++ b/table/table_builder.h
@@ -102,6 +102,7 @@ struct TableReaderOptions {
struct TableBuilderOptions {
TableBuilderOptions(
const ImmutableOptions& _ioptions, const MutableCFOptions& _moptions,
+ const ReadOptions& _read_options, const WriteOptions& _write_options,
const InternalKeyComparator& _internal_comparator,
const IntTblPropCollectorFactories* _int_tbl_prop_collector_factories,
CompressionType _compression_type,
@@ -115,6 +116,8 @@ struct TableBuilderOptions {
const uint64_t _target_file_size = 0, const uint64_t _cur_file_num = 0)
: ioptions(_ioptions),
moptions(_moptions),
+ read_options(_read_options),
+ write_options(_write_options),
internal_comparator(_internal_comparator),
int_tbl_prop_collector_factories(_int_tbl_prop_collector_factories),
compression_type(_compression_type),
@@ -133,6 +136,8 @@ struct TableBuilderOptions {
const ImmutableOptions& ioptions;
const MutableCFOptions& moptions;
+ const ReadOptions& read_options;
+ const WriteOptions& write_options;
const InternalKeyComparator& internal_comparator;
const IntTblPropCollectorFactories* int_tbl_prop_collector_factories;
const CompressionType compression_type;
diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc
index 60c84d7bf..9b24e3c43 100644
--- a/table/table_reader_bench.cc
+++ b/table/table_reader_bench.cc
@@ -98,11 +98,13 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
int unknown_level = -1;
+ const WriteOptions write_options;
tb = opts.table_factory->NewTableBuilder(
- TableBuilderOptions(
- ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
- CompressionType::kNoCompression, CompressionOptions(),
- 0 /* column_family_id */, kDefaultColumnFamilyName, unknown_level),
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ ikc, &int_tbl_prop_collector_factories,
+ CompressionType::kNoCompression,
+ CompressionOptions(), 0 /* column_family_id */,
+ kDefaultColumnFamilyName, unknown_level),
file_writer.get());
} else {
s = DB::Open(opts, dbname, &db);
@@ -122,7 +124,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
}
if (!through_db) {
tb->Finish();
- file_writer->Close();
+ file_writer->Close(IOOptions());
} else {
db->Flush(FlushOptions());
}
diff --git a/table/table_test.cc b/table/table_test.cc
index 298e25fbd..15b07854e 100644
--- a/table/table_test.cc
+++ b/table/table_test.cc
@@ -383,8 +383,11 @@ class TableConstructor : public Constructor {
}
std::string column_family_name;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
builder.reset(ioptions.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, internal_comparator,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ internal_comparator,
&int_tbl_prop_collector_factories,
options.compression, options.compression_opts,
kUnknownColumnFamily, column_family_name, level_),
@@ -402,7 +405,7 @@ class TableConstructor : public Constructor {
EXPECT_OK(builder->status());
}
Status s = builder->Finish();
- EXPECT_OK(file_writer_->Flush());
+ EXPECT_OK(file_writer_->Flush(IOOptions()));
EXPECT_TRUE(s.ok()) << s.ToString();
EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
@@ -1309,7 +1312,7 @@ class FileChecksumTestHelper {
EXPECT_TRUE(table_builder_->status().ok());
}
Status s = table_builder_->Finish();
- EXPECT_OK(file_writer_->Flush());
+ EXPECT_OK(file_writer_->Flush(IOOptions()));
EXPECT_OK(s);
EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize());
@@ -1317,7 +1320,7 @@ class FileChecksumTestHelper {
}
std::string GetFileChecksum() {
- EXPECT_OK(file_writer_->Close());
+ EXPECT_OK(file_writer_->Close(IOOptions()));
return table_builder_->GetFileChecksum();
}
@@ -4466,9 +4469,11 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) {
FileChecksumTestHelper f(true);
f.CreateWritableFile();
std::unique_ptr<TableBuilder> builder;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
builder.reset(ioptions.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, *comparator,
- &int_tbl_prop_collector_factories,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ *comparator, &int_tbl_prop_collector_factories,
options.compression, options.compression_opts,
kUnknownColumnFamily, column_family_name, level),
f.GetFileWriter()));
@@ -4502,9 +4507,11 @@ TEST_P(BlockBasedTableTest, Crc32cFileChecksum) {
f.CreateWritableFile();
f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
std::unique_ptr<TableBuilder> builder;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
builder.reset(ioptions.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, *comparator,
- &int_tbl_prop_collector_factories,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ *comparator, &int_tbl_prop_collector_factories,
options.compression, options.compression_opts,
kUnknownColumnFamily, column_family_name, level),
f.GetFileWriter()));
@@ -4548,8 +4555,10 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) {
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
std::string column_family_name;
int unknown_level = -1;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(), kUnknownColumnFamily,
column_family_name, unknown_level),
@@ -4562,7 +4571,7 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) {
builder->Add(key, value);
}
ASSERT_OK(builder->Finish());
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Flush(IOOptions()));
test::StringSink* ss =
static_cast<test::StringSink*>(file_writer->writable_file());
@@ -4572,7 +4581,6 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) {
new RandomAccessFileReader(std::move(source), "test"));
std::unique_ptr<TableProperties> props;
- const ReadOptions read_options;
auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
kPlainTableMagicNumber, ioptions, read_options,
&props);
@@ -4602,9 +4610,10 @@ TEST_F(PlainTableTest, NoFileChecksum) {
int unknown_level = -1;
FileChecksumTestHelper f(true);
f.CreateWritableFile();
-
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(), kUnknownColumnFamily,
column_family_name, unknown_level),
@@ -4642,9 +4651,10 @@ TEST_F(PlainTableTest, Crc32cFileChecksum) {
FileChecksumTestHelper f(true);
f.CreateWritableFile();
f.SetFileChecksumGenerator(checksum_crc32c_gen1.release());
-
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(), kUnknownColumnFamily,
column_family_name, unknown_level),
@@ -5252,8 +5262,10 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
new SstFileWriterPropertiesCollectorFactory(2 /* version */,
0 /* global_seqno*/));
std::string column_family_name;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(), kUnknownColumnFamily,
column_family_name, -1),
@@ -5267,7 +5279,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
builder->Add(ik.Encode(), value);
}
ASSERT_OK(builder->Finish());
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Flush(IOOptions()));
test::RandomRWStringSink ss_rw(sink);
uint32_t version;
@@ -5282,7 +5294,6 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
new RandomAccessFileReader(std::move(source), ""));
std::unique_ptr<TableProperties> props;
- const ReadOptions read_options;
ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
kBlockBasedTableMagicNumber, ioptions,
read_options, &props));
@@ -5306,7 +5317,6 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
// Helper function to get the contents of the table InternalIterator
std::unique_ptr<TableReader> table_reader;
- const ReadOptions read_options;
std::function<InternalIterator*()> GetTableInternalIter = [&]() {
std::unique_ptr<FSRandomAccessFile> source(
new test::StringSource(ss_rw.contents(), 73342, true));
@@ -5434,8 +5444,10 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) {
InternalKeyComparator ikc(options.comparator);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
std::string column_family_name;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(), kUnknownColumnFamily,
column_family_name, -1),
@@ -5451,7 +5463,7 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) {
builder->Add(ik.Encode(), value);
}
ASSERT_OK(builder->Finish());
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Flush(IOOptions()));
std::unique_ptr<FSRandomAccessFile> source(
new test::StringSource(sink->contents(), 73342, false));
@@ -5460,7 +5472,6 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) {
// Helper function to get version, global_seqno, global_seqno_offset
std::function<void()> VerifyBlockAlignment = [&]() {
std::unique_ptr<TableProperties> props;
- const ReadOptions read_options;
ASSERT_OK(ReadTableProperties(file_reader.get(), sink->contents().size(),
kBlockBasedTableMagicNumber, ioptions,
read_options, &props));
@@ -5488,7 +5499,6 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) {
0 /* block_protection_bytes_per_key */),
std::move(file_reader), sink->contents().size(), &table_reader));
- ReadOptions read_options;
std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
read_options, moptions2.prefix_extractor.get(), /*arena=*/nullptr,
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
@@ -5526,9 +5536,10 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
InternalKeyComparator ikc(options.comparator);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
std::string column_family_name;
-
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kNoCompression,
CompressionOptions(), kUnknownColumnFamily,
column_family_name, -1),
@@ -5544,7 +5555,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
builder->Add(ik.Encode(), value);
}
ASSERT_OK(builder->Finish());
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Flush(IOOptions()));
std::unique_ptr<FSRandomAccessFile> source(
new test::StringSource(sink->contents(), 73342, true));
@@ -5556,20 +5567,19 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
uint64_t file_size = sink->contents().size();
Footer footer;
- IOOptions opts;
- ASSERT_OK(ReadFooterFromFile(opts, file, *FileSystem::Default(),
+ ASSERT_OK(ReadFooterFromFile(IOOptions(), file, *FileSystem::Default(),
nullptr /* prefetch_buffer */, file_size,
&footer, kBlockBasedTableMagicNumber));
auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
BlockContents* contents) {
- ReadOptions read_options;
- read_options.verify_checksums = false;
+ ReadOptions read_options_for_helper;
+ read_options_for_helper.verify_checksums = false;
PersistentCacheOptions cache_options;
BlockFetcher block_fetcher(
- file, nullptr /* prefetch_buffer */, footer, read_options, handle,
- contents, ioptions, false /* decompress */,
+ file, nullptr /* prefetch_buffer */, footer, read_options_for_helper,
+ handle, contents, ioptions, false /* decompress */,
false /*maybe_compressed*/, block_type,
UncompressionDict::GetEmptyDict(), cache_options);
@@ -6117,12 +6127,15 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) {
InternalKeyComparator ikc(options.comparator);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(
options.table_factory->NewTableBuilder(
- TableBuilderOptions(
- ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
- kSnappyCompression, options.compression_opts,
- kUnknownColumnFamily, "test_cf", -1 /* level */),
+ TableBuilderOptions(ioptions, moptions, read_options, write_options,
+ ikc, &int_tbl_prop_collector_factories,
+ kSnappyCompression, options.compression_opts,
+ kUnknownColumnFamily, "test_cf",
+ -1 /* level */),
file_writer.get()));
std::string key1 = "key1";
@@ -6193,8 +6206,10 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest,
InternalKeyComparator ikc(options.comparator);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kSnappyCompression,
options.compression_opts, kUnknownColumnFamily,
"test_cf", -1 /* level */),
@@ -6278,8 +6293,10 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, BasicWithCacheFull) {
InternalKeyComparator ikc(options.comparator);
IntTblPropCollectorFactories int_tbl_prop_collector_factories;
+ const ReadOptions read_options;
+ const WriteOptions write_options;
std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
- TableBuilderOptions(ioptions, moptions, ikc,
+ TableBuilderOptions(ioptions, moptions, read_options, write_options, ikc,
&int_tbl_prop_collector_factories, kSnappyCompression,
options.compression_opts, kUnknownColumnFamily,
"test_cf", -1 /* level */),
diff --git a/test_util/testutil.cc b/test_util/testutil.cc
index ce221e79b..b4939ee03 100644
--- a/test_util/testutil.cc
+++ b/test_util/testutil.cc
@@ -463,15 +463,16 @@ bool IsPrefetchSupported(const std::shared_ptr<FileSystem>& fs,
Random rnd(301);
std::string test_string = rnd.RandomString(4096);
Slice data(test_string);
- Status s = WriteStringToFile(fs.get(), data, tmp, true);
+ IOOptions opts;
+ Status s = WriteStringToFile(fs.get(), data, tmp, true, opts);
if (s.ok()) {
std::unique_ptr<FSRandomAccessFile> file;
auto io_s = fs->NewRandomAccessFile(tmp, FileOptions(), &file, nullptr);
if (io_s.ok()) {
- supported = !(file->Prefetch(0, data.size(), IOOptions(), nullptr)
- .IsNotSupported());
+ supported =
+ !(file->Prefetch(0, data.size(), opts, nullptr).IsNotSupported());
}
- s = fs->DeleteFile(tmp, IOOptions(), nullptr);
+ s = fs->DeleteFile(tmp, opts, nullptr);
}
return s.ok() && supported;
}
@@ -521,7 +522,7 @@ Status CorruptFile(Env* env, const std::string& fname, int offset,
for (int i = 0; i < bytes_to_corrupt; i++) {
contents[i + offset] ^= 0x80;
}
- s = WriteStringToFile(env, contents, fname);
+ s = WriteStringToFile(env, contents, fname, false /* should_sync */);
}
if (s.ok() && verify_checksum) {
Options options;
@@ -544,7 +545,7 @@ Status TruncateFile(Env* env, const std::string& fname, uint64_t new_length) {
s = ReadFileToString(env, fname, &contents);
if (s.ok()) {
contents.resize(static_cast<size_t>(new_length), 'b');
- s = WriteStringToFile(env, contents, fname);
+ s = WriteStringToFile(env, contents, fname, false /* should_sync */);
}
return s;
}
diff --git a/tools/db_bench_tool_test.cc b/tools/db_bench_tool_test.cc
index a30c65065..1668dfb88 100644
--- a/tools/db_bench_tool_test.cc
+++ b/tools/db_bench_tool_test.cc
@@ -130,7 +130,7 @@ namespace {} // namespace
TEST_F(DBBenchTest, OptionsFile) {
const std::string kOptionsFileName = test_path_ + "/OPTIONS_test";
Options opt = GetDefaultOptions();
- ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"},
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), DBOptions(opt), {"default"},
{ColumnFamilyOptions(opt)}, kOptionsFileName,
opt.env->GetFileSystem().get()));
@@ -149,7 +149,7 @@ TEST_F(DBBenchTest, OptionsFileUniversal) {
Options opt = GetDefaultOptions(kCompactionStyleUniversal, 1);
- ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"},
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), DBOptions(opt), {"default"},
{ColumnFamilyOptions(opt)}, kOptionsFileName,
opt.env->GetFileSystem().get()));
@@ -166,7 +166,7 @@ TEST_F(DBBenchTest, OptionsFileMultiLevelUniversal) {
Options opt = GetDefaultOptions(kCompactionStyleUniversal, 12);
- ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"},
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), DBOptions(opt), {"default"},
{ColumnFamilyOptions(opt)}, kOptionsFileName,
opt.env->GetFileSystem().get()));
diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc
index 81e946bbc..90b7886ee 100644
--- a/tools/ldb_cmd.cc
+++ b/tools/ldb_cmd.cc
@@ -4376,8 +4376,10 @@ UnsafeRemoveSstFileCommand::UnsafeRemoveSstFileCommand(
}
void UnsafeRemoveSstFileCommand::DoCommand() {
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
+ const WriteOptions write_options;
+
PrepareOptions();
OfflineManifestWriter w(options_, db_path_);
@@ -4402,7 +4404,7 @@ void UnsafeRemoveSstFileCommand::DoCommand() {
s = options_.env->GetFileSystem()->NewDirectory(db_path_, IOOptions(),
&db_dir, nullptr);
if (s.ok()) {
- s = w.LogAndApply(read_options, cfd, &edit, db_dir.get());
+ s = w.LogAndApply(read_options, write_options, cfd, &edit, db_dir.get());
}
}
diff --git a/tools/simulated_hybrid_file_system.cc b/tools/simulated_hybrid_file_system.cc
index 2b9aa0950..7d9b9dc6e 100644
--- a/tools/simulated_hybrid_file_system.cc
+++ b/tools/simulated_hybrid_file_system.cc
@@ -86,7 +86,9 @@ SimulatedHybridFileSystem::~SimulatedHybridFileSystem() {
metadata += f;
metadata += "\n";
}
- IOStatus s = WriteStringToFile(target(), metadata, metadata_file_name_, true);
+ IOOptions opts;
+ IOStatus s =
+ WriteStringToFile(target(), metadata, metadata_file_name_, true, opts);
if (!s.ok()) {
fprintf(stderr, "Error writing to file %s: %s", metadata_file_name_.c_str(),
s.ToString().c_str());
@@ -240,4 +242,3 @@ IOStatus SimulatedWritableFile::Sync(const IOOptions& options,
return target()->Sync(options, dbg);
}
} // namespace ROCKSDB_NAMESPACE
-
diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc
index 07c42b6e7..2ebdad1ee 100644
--- a/tools/sst_dump_test.cc
+++ b/tools/sst_dump_test.cc
@@ -123,10 +123,12 @@ class SSTDumpToolTest : public testing::Test {
std::string column_family_name;
int unknown_level = -1;
+ const WriteOptions write_options;
tb.reset(opts.table_factory->NewTableBuilder(
TableBuilderOptions(
- imoptions, moptions, ikc, &int_tbl_prop_collector_factories,
- CompressionType::kNoCompression, CompressionOptions(),
+ imoptions, moptions, read_options, write_options, ikc,
+ &int_tbl_prop_collector_factories, CompressionType::kNoCompression,
+ CompressionOptions(),
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
column_family_name, unknown_level),
file_writer.get()));
@@ -160,7 +162,7 @@ class SSTDumpToolTest : public testing::Test {
}
}
ASSERT_OK(tb->Finish());
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
}
protected:
@@ -417,9 +419,9 @@ TEST_F(SSTDumpToolTest, ValidSSTPath) {
std::string sst_file = MakeFilePath("rocksdb_sst_test.sst");
createSST(opts, sst_file);
std::string text_file = MakeFilePath("text_file");
- ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file));
+ ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file, false));
std::string fake_sst = MakeFilePath("fake_sst.sst");
- ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst));
+ ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst, false));
for (const auto& command_arg : {"--command=verify", "--command=identify"}) {
snprintf(usage[1], kOptLength, "%s", command_arg);
diff --git a/unreleased_history/behavior_changes/blob_file_write_micros.md b/unreleased_history/behavior_changes/blob_file_write_micros.md
new file mode 100644
index 000000000..aceb05920
--- /dev/null
+++ b/unreleased_history/behavior_changes/blob_file_write_micros.md
@@ -0,0 +1 @@
+`rocksdb.blobdb.blob.file.write.micros` expands to also measure time writing the header and footer. Therefore the COUNT may be higher and values may be smaller than before. For stacked BlobDB, it no longer measures the time of explictly flushing blob file.
diff --git a/unreleased_history/bug_fixes/blob_tickers.md b/unreleased_history/bug_fixes/blob_tickers.md
new file mode 100644
index 000000000..685895944
--- /dev/null
+++ b/unreleased_history/bug_fixes/blob_tickers.md
@@ -0,0 +1 @@
+Fix bugs where `rocksdb.blobdb.blob.file.synced` includes blob files failed to get synced and `rocksdb.blobdb.blob.file.bytes.written` includes blob bytes failed to get written.
diff --git a/unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md b/unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md
new file mode 100644
index 000000000..6e8699867
--- /dev/null
+++ b/unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md
@@ -0,0 +1 @@
+Add new statistics: `rocksdb.sst.write.micros` measures time of each write to SST file; `rocksdb.file.write.{flush|compaction|db.open}.micros` measure time of each write to SST table (currently only block-based table format) and blob file for flush, compaction and db open.
diff --git a/util/file_checksum_helper.cc b/util/file_checksum_helper.cc
index 59da96fa8..3e57bc78a 100644
--- a/util/file_checksum_helper.cc
+++ b/util/file_checksum_helper.cc
@@ -98,7 +98,7 @@ Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path,
return Status::InvalidArgument("checksum_list is nullptr");
}
assert(checksum_list);
- // TODO: plumb Env::IOActivity
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const ReadOptions read_options;
checksum_list->reset();
Status s;
diff --git a/util/file_reader_writer_test.cc b/util/file_reader_writer_test.cc
index 036c030dc..dfdd7d32c 100644
--- a/util/file_reader_writer_test.cc
+++ b/util/file_reader_writer_test.cc
@@ -113,16 +113,16 @@ TEST_F(WritableFileWriterTest, RangeSync) {
for (int i = 0; i < 1000; i++) {
int skew_limit = (i < 700) ? 10 : 15;
uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100);
- s = writer->Append(Slice(large_buf.get(), num));
+ s = writer->Append(IOOptions(), Slice(large_buf.get(), num));
ASSERT_OK(s);
// Flush in a chance of 1/10.
if (r.Uniform(10) == 0) {
- s = writer->Flush();
+ s = writer->Flush(IOOptions());
ASSERT_OK(s);
}
}
- s = writer->Close();
+ s = writer->Close(IOOptions());
ASSERT_OK(s);
}
@@ -215,16 +215,16 @@ TEST_F(WritableFileWriterTest, IncrementalBuffer) {
for (int i = 0; i < 20; i++) {
uint32_t num = r.Skewed(16) * 100 + r.Uniform(100);
std::string random_string = r.RandomString(num);
- ASSERT_OK(writer->Append(Slice(random_string.c_str(), num)));
+ ASSERT_OK(writer->Append(IOOptions(), Slice(random_string.c_str(), num)));
target.append(random_string.c_str(), num);
// In some attempts, flush in a chance of 1/10.
if (!no_flush && r.Uniform(10) == 0) {
- ASSERT_OK(writer->Flush());
+ ASSERT_OK(writer->Flush(IOOptions()));
}
}
- ASSERT_OK(writer->Flush());
- ASSERT_OK(writer->Close());
+ ASSERT_OK(writer->Flush(IOOptions()));
+ ASSERT_OK(writer->Close(IOOptions()));
ASSERT_EQ(target.size(), actual.size());
ASSERT_EQ(target, actual);
}
@@ -272,27 +272,28 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksum) {
ImmutableOptions ioptions(options);
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, SystemClock::Default().get(),
- nullptr, ioptions.stats, ioptions.listeners,
- ioptions.file_checksum_gen_factory.get(), true, true));
+ nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */,
+ ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true,
+ true));
Random rnd(301);
std::string data = rnd.RandomString(1000);
uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size());
fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
-
- ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c));
+ ASSERT_OK(file_writer->Flush(IOOptions()));
Random size_r(47);
for (int i = 0; i < 2000; i++) {
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
data_crc32c = crc32c::Value(data.c_str(), data.size());
- ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+ ASSERT_OK(
+ file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c));
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
- ASSERT_OK(file_writer->Append(Slice(data.c_str())));
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str())));
+ ASSERT_OK(file_writer->Flush(IOOptions()));
}
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
Destroy(options);
}
@@ -314,27 +315,29 @@ TEST_F(DBWritableFileWriterTest, AppendVerifyNoChecksum) {
// So Append with checksum logic will not be triggered
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, SystemClock::Default().get(),
- nullptr, ioptions.stats, ioptions.listeners,
- ioptions.file_checksum_gen_factory.get(), true, false));
+ nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */,
+ ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true,
+ false));
Random rnd(301);
std::string data = rnd.RandomString(1000);
uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size());
fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
- ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c));
+ ASSERT_OK(file_writer->Flush(IOOptions()));
Random size_r(47);
for (int i = 0; i < 1000; i++) {
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
data_crc32c = crc32c::Value(data.c_str(), data.size());
- ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+ ASSERT_OK(
+ file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c));
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
- ASSERT_OK(file_writer->Append(Slice(data.c_str())));
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str())));
+ ASSERT_OK(file_writer->Flush(IOOptions()));
}
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
Destroy(options);
}
@@ -357,8 +360,9 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) {
// So Append with checksum logic will not be triggered
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, SystemClock::Default().get(),
- nullptr, ioptions.stats, ioptions.listeners,
- ioptions.file_checksum_gen_factory.get(), true, true));
+ nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */,
+ ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true,
+ true));
fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c);
Random rnd(301);
@@ -370,17 +374,18 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) {
for (int i = 0; i < 100; i++) {
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
data_crc32c = crc32c::Value(data.c_str(), data.size());
- ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+ ASSERT_OK(
+ file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c));
bytes_written += static_cast<uint64_t>(data.size());
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
- ASSERT_OK(file_writer->Append(Slice(data.c_str())));
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str())));
+ ASSERT_OK(file_writer->Flush(IOOptions()));
bytes_written += static_cast<uint64_t>(data.size());
}
uint64_t elapsed = fault_env_->NowMicros() - start;
double raw_rate = bytes_written * 1000000.0 / elapsed;
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
// Set the rate-limiter
FileOptions file_options1 = FileOptions();
@@ -397,19 +402,21 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) {
// So Append with checksum logic will not be triggered
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options1, SystemClock::Default().get(),
- nullptr, ioptions.stats, ioptions.listeners,
- ioptions.file_checksum_gen_factory.get(), true, true));
+ nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */,
+ ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true,
+ true));
for (int i = 0; i < 1000; i++) {
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 10000));
data_crc32c = crc32c::Value(data.c_str(), data.size());
- ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c));
+ ASSERT_OK(
+ file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c));
data = rnd.RandomString((static_cast<int>(size_r.Next()) % 97));
- ASSERT_OK(file_writer->Append(Slice(data.c_str())));
- ASSERT_OK(file_writer->Flush());
+ ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str())));
+ ASSERT_OK(file_writer->Flush(IOOptions()));
}
- ASSERT_OK(file_writer->Close());
+ ASSERT_OK(file_writer->Close(IOOptions()));
if (file_options1.rate_limiter != nullptr) {
delete file_options1.rate_limiter;
}
@@ -465,12 +472,12 @@ TEST_F(WritableFileWriterTest, AppendStatusReturn) {
std::unique_ptr<WritableFileWriter> writer(
new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions()));
- ASSERT_OK(writer->Append(std::string(2 * kMb, 'a')));
+ ASSERT_OK(writer->Append(IOOptions(), std::string(2 * kMb, 'a')));
// Next call to WritableFile::Append() should fail
FakeWF* fwf = static_cast<FakeWF*>(writer->writable_file());
fwf->SetIOError(true);
- ASSERT_NOK(writer->Append(std::string(2 * kMb, 'b')));
+ ASSERT_NOK(writer->Append(IOOptions(), std::string(2 * kMb, 'b')));
}
class ReadaheadRandomAccessFileTest
@@ -498,9 +505,9 @@ class ReadaheadRandomAccessFileTest
new test::StringSink(&control_contents_));
std::unique_ptr<WritableFileWriter> write_holder(new WritableFileWriter(
std::move(sink), "" /* don't care */, FileOptions()));
- Status s = write_holder->Append(Slice(str));
+ Status s = write_holder->Append(IOOptions(), Slice(str));
EXPECT_OK(s);
- s = write_holder->Flush();
+ s = write_holder->Flush(IOOptions());
EXPECT_OK(s);
std::unique_ptr<FSRandomAccessFile> read_holder(
new test::StringSource(control_contents_));
@@ -878,26 +885,27 @@ TEST_F(DBWritableFileWriterTest, IOErrorNotification) {
file_writer.reset(new WritableFileWriter(
std::move(writable_file_ptr), fname, file_options,
- SystemClock::Default().get(), nullptr, ioptions.stats, ioptions.listeners,
+ SystemClock::Default().get(), nullptr, ioptions.stats,
+ Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, ioptions.listeners,
ioptions.file_checksum_gen_factory.get(), true, true));
FakeWF* fwf = static_cast<FakeWF*>(file_writer->writable_file());
fwf->SetIOError(true);
- ASSERT_NOK(file_writer->Append(std::string(2 * kMb, 'a')));
+ ASSERT_NOK(file_writer->Append(IOOptions(), std::string(2 * kMb, 'a')));
fwf->CheckCounters(1, 0);
ASSERT_EQ(listener->NotifyErrorCount(), 1);
file_writer->reset_seen_error();
fwf->SetIOError(true);
- ASSERT_NOK(file_writer->Flush());
+ ASSERT_NOK(file_writer->Flush(IOOptions()));
fwf->CheckCounters(1, 1);
ASSERT_EQ(listener->NotifyErrorCount(), 2);
/* No error generation */
file_writer->reset_seen_error();
fwf->SetIOError(false);
- ASSERT_OK(file_writer->Append(std::string(2 * kMb, 'b')));
+ ASSERT_OK(file_writer->Append(IOOptions(), std::string(2 * kMb, 'b')));
ASSERT_EQ(listener->NotifyErrorCount(), 2);
fwf->CheckCounters(1, 1);
}
@@ -1006,23 +1014,29 @@ class WritableFileWriterIOPriorityTest : public testing::Test {
};
TEST_F(WritableFileWriterIOPriorityTest, Append) {
- ASSERT_OK(writer_->Append(Slice("abc")));
+ ASSERT_OK(writer_->Append(IOOptions(), Slice("abc")));
}
-TEST_F(WritableFileWriterIOPriorityTest, Pad) { ASSERT_OK(writer_->Pad(500)); }
+TEST_F(WritableFileWriterIOPriorityTest, Pad) {
+ ASSERT_OK(writer_->Pad(IOOptions(), 500));
+}
-TEST_F(WritableFileWriterIOPriorityTest, Flush) { ASSERT_OK(writer_->Flush()); }
+TEST_F(WritableFileWriterIOPriorityTest, Flush) {
+ ASSERT_OK(writer_->Flush(IOOptions()));
+}
-TEST_F(WritableFileWriterIOPriorityTest, Close) { ASSERT_OK(writer_->Close()); }
+TEST_F(WritableFileWriterIOPriorityTest, Close) {
+ ASSERT_OK(writer_->Close(IOOptions()));
+}
TEST_F(WritableFileWriterIOPriorityTest, Sync) {
- ASSERT_OK(writer_->Sync(false));
- ASSERT_OK(writer_->Sync(true));
+ ASSERT_OK(writer_->Sync(IOOptions(), false));
+ ASSERT_OK(writer_->Sync(IOOptions(), true));
}
TEST_F(WritableFileWriterIOPriorityTest, SyncWithoutFlush) {
- ASSERT_OK(writer_->SyncWithoutFlush(false));
- ASSERT_OK(writer_->SyncWithoutFlush(true));
+ ASSERT_OK(writer_->SyncWithoutFlush(IOOptions(), false));
+ ASSERT_OK(writer_->SyncWithoutFlush(IOOptions(), true));
}
TEST_F(WritableFileWriterIOPriorityTest, BasicOp) {
@@ -1037,16 +1051,16 @@ TEST_F(WritableFileWriterIOPriorityTest, BasicOp) {
for (int i = 0; i < 1000; i++) {
int skew_limit = (i < 700) ? 10 : 15;
uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100);
- s = writer->Append(Slice(large_buf.get(), num));
+ s = writer->Append(IOOptions(), Slice(large_buf.get(), num));
ASSERT_OK(s);
// Flush in a chance of 1/10.
if (r.Uniform(10) == 0) {
- s = writer->Flush();
+ s = writer->Flush(IOOptions());
ASSERT_OK(s);
}
}
- s = writer->Close();
+ s = writer->Close(IOOptions());
ASSERT_OK(s);
}
} // namespace ROCKSDB_NAMESPACE
diff --git a/util/log_write_bench.cc b/util/log_write_bench.cc
index c1637db15..25602791e 100644
--- a/util/log_write_bench.cc
+++ b/util/log_write_bench.cc
@@ -41,9 +41,9 @@ void RunBenchmark() {
std::unique_ptr<WritableFile> file;
env->NewWritableFile(file_name, &file, env_options);
std::unique_ptr<WritableFileWriter> writer;
- writer.reset(new WritableFileWriter(std::move(file), file_name, env_options,
- clock, nullptr /* stats */,
- options.listeners));
+ writer.reset(new WritableFileWriter(
+ std::move(file), file_name, env_options, clock, nullptr /* stats */,
+ Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, options.listeners));
std::string record;
record.assign(FLAGS_record_size, 'X');
diff --git a/utilities/backup/backup_engine.cc b/utilities/backup/backup_engine.cc
index 31a733731..e2f0d4a7b 100644
--- a/utilities/backup/backup_engine.cc
+++ b/utilities/backup/backup_engine.cc
@@ -2195,6 +2195,7 @@ IOStatus BackupEngineImpl::CopyOrCreateFile(
rate_limiter ? static_cast<size_t>(rate_limiter->GetSingleBurstBytes())
: kDefaultCopyFileBufferSize;
+ // TODO: pass in Histograms if the destination file is sst or blob
std::unique_ptr<WritableFileWriter> dest_writer(
new WritableFileWriter(std::move(dst_file), dst, dst_file_options));
std::unique_ptr<SequentialFileReader> src_reader;
@@ -2209,6 +2210,7 @@ IOStatus BackupEngineImpl::CopyOrCreateFile(
}
Slice data;
+ const IOOptions opts;
do {
if (stop_backup_.load(std::memory_order_acquire)) {
return status_to_io_status(Status::Incomplete("Backup stopped"));
@@ -2238,7 +2240,8 @@ IOStatus BackupEngineImpl::CopyOrCreateFile(
if (checksum_hex != nullptr) {
checksum_value = crc32c::Extend(checksum_value, data.data(), data.size());
}
- io_s = dest_writer->Append(data);
+
+ io_s = dest_writer->Append(opts, data);
if (rate_limiter != nullptr) {
if (!src.empty()) {
@@ -2275,10 +2278,10 @@ IOStatus BackupEngineImpl::CopyOrCreateFile(
}
if (io_s.ok() && sync) {
- io_s = dest_writer->Sync(false);
+ io_s = dest_writer->Sync(opts, false);
}
if (io_s.ok()) {
- io_s = dest_writer->Close();
+ io_s = dest_writer->Close(opts);
}
return io_s;
}
@@ -3352,4 +3355,3 @@ void TEST_SetDefaultRateLimitersClock(
restore_rate_limiter_clock);
}
} // namespace ROCKSDB_NAMESPACE
-
diff --git a/utilities/backup/backup_engine_test.cc b/utilities/backup/backup_engine_test.cc
index 5ed6ae895..d768fbe97 100644
--- a/utilities/backup/backup_engine_test.cc
+++ b/utilities/backup/backup_engine_test.cc
@@ -931,7 +931,7 @@ class BackupEngineTest : public testing::Test {
}
file_contents[0] = (file_contents[0] + 257) % 256;
- return WriteStringToFile(test_db_env_.get(), file_contents, fname);
+ return WriteStringToFile(test_db_env_.get(), file_contents, fname, false);
}
void AssertDirectoryFilesMatchRegex(const std::string& dir,
diff --git a/utilities/blob_db/blob_compaction_filter.cc b/utilities/blob_db/blob_compaction_filter.cc
index ddaa98c7d..97543214d 100644
--- a/utilities/blob_db/blob_compaction_filter.cc
+++ b/utilities/blob_db/blob_compaction_filter.cc
@@ -181,7 +181,9 @@ bool BlobIndexCompactionFilterBase::OpenNewBlobFileIfNeeded() const {
BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
assert(blob_db_impl);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
const Status s = blob_db_impl->CreateBlobFileAndWriter(
+ WriteOptions(),
/* has_ttl */ false, ExpirationRange(), "compaction/GC", &blob_file_,
&writer_);
if (!s.ok()) {
@@ -251,8 +253,9 @@ bool BlobIndexCompactionFilterBase::WriteBlobToNewFile(
assert(writer_);
uint64_t new_key_offset = 0;
- const Status s = writer_->AddRecord(key, blob, kNoExpiration, &new_key_offset,
- new_blob_offset);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ const Status s = writer_->AddRecord(WriteOptions(), key, blob, kNoExpiration,
+ &new_key_offset, new_blob_offset);
if (!s.ok()) {
const BlobDBImpl* const blob_db_impl = context_.blob_db_impl;
@@ -302,7 +305,8 @@ bool BlobIndexCompactionFilterBase::CloseAndRegisterNewBlobFile() const {
{
WriteLock wl(&blob_db_impl->mutex_);
- s = blob_db_impl->CloseBlobFile(blob_file_);
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ s = blob_db_impl->CloseBlobFile(WriteOptions(), blob_file_);
// Note: we delay registering the new blob file until it's closed to
// prevent FIFO eviction from processing it during compaction/GC.
diff --git a/utilities/blob_db/blob_db.h b/utilities/blob_db/blob_db.h
index e2f0b7bdb..59242a645 100644
--- a/utilities/blob_db/blob_db.h
+++ b/utilities/blob_db/blob_db.h
@@ -248,7 +248,7 @@ class BlobDB : public StackableDB {
virtual BlobDBOptions GetBlobDBOptions() const = 0;
- virtual Status SyncBlobFiles() = 0;
+ virtual Status SyncBlobFiles(const WriteOptions& write_options) = 0;
virtual ~BlobDB() {}
diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc
index 2fa7ae898..6fded8441 100644
--- a/utilities/blob_db/blob_db_impl.cc
+++ b/utilities/blob_db/blob_db_impl.cc
@@ -23,6 +23,7 @@
#include "logging/logging.h"
#include "monitoring/instrumented_mutex.h"
#include "monitoring/statistics_impl.h"
+#include "monitoring/thread_status_util.h"
#include "rocksdb/convenience.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
@@ -106,6 +107,15 @@ BlobDBImpl::~BlobDBImpl() {
}
Status BlobDBImpl::Close() {
+ ThreadStatus::OperationType cur_op_type =
+ ThreadStatusUtil::GetThreadOperation();
+ ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN);
+ Status s = CloseImpl();
+ ThreadStatusUtil::SetThreadOperation(cur_op_type);
+ return s;
+}
+
+Status BlobDBImpl::CloseImpl() {
if (closed_) {
return Status::OK();
}
@@ -123,7 +133,8 @@ Status BlobDBImpl::Close() {
return s;
}
- s = SyncBlobFiles();
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ s = SyncBlobFiles(WriteOptions());
return s;
}
@@ -277,7 +288,7 @@ Status BlobDBImpl::Open(std::vector<ColumnFamilyHandle*>* handles) {
return s;
}
- UpdateLiveSSTSize();
+ UpdateLiveSSTSize(WriteOptions(Env::IOActivity::kDBOpen));
// Start background jobs.
if (!bdb_options_.disable_background_tasks) {
@@ -743,7 +754,9 @@ Status BlobDBImpl::CreateWriterLocked(const std::shared_ptr<BlobFile>& bfile) {
}
std::unique_ptr<WritableFileWriter> fwriter;
- fwriter.reset(new WritableFileWriter(std::move(wfile), fpath, file_options_));
+ fwriter.reset(new WritableFileWriter(
+ std::move(wfile), fpath, file_options_, clock_, nullptr /* io_tracer */,
+ statistics_, Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS));
uint64_t boffset = bfile->GetFileSize();
if (debug_level_ >= 2 && boffset) {
@@ -824,8 +837,9 @@ Status BlobDBImpl::CheckOrCreateWriterLocked(
}
Status BlobDBImpl::CreateBlobFileAndWriter(
- bool has_ttl, const ExpirationRange& expiration_range,
- const std::string& reason, std::shared_ptr<BlobFile>* blob_file,
+ const WriteOptions& write_options, bool has_ttl,
+ const ExpirationRange& expiration_range, const std::string& reason,
+ std::shared_ptr<BlobFile>* blob_file,
std::shared_ptr<BlobLogWriter>* writer) {
TEST_SYNC_POINT("BlobDBImpl::CreateBlobFileAndWriter");
assert(has_ttl == (expiration_range.first || expiration_range.second));
@@ -846,7 +860,7 @@ Status BlobDBImpl::CreateBlobFileAndWriter(
assert(*writer);
- s = (*writer)->WriteHeader((*blob_file)->header_);
+ s = (*writer)->WriteHeader(write_options, (*blob_file)->header_);
if (!s.ok()) {
ROCKS_LOG_ERROR(db_options_.info_log,
"Failed to write header to new blob file: %s"
@@ -861,7 +875,8 @@ Status BlobDBImpl::CreateBlobFileAndWriter(
return s;
}
-Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) {
+Status BlobDBImpl::SelectBlobFile(const WriteOptions& write_options,
+ std::shared_ptr<BlobFile>* blob_file) {
assert(blob_file);
{
@@ -885,6 +900,7 @@ Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) {
std::shared_ptr<BlobLogWriter> writer;
const Status s = CreateBlobFileAndWriter(
+ write_options,
/* has_ttl */ false, ExpirationRange(),
/* reason */ "SelectBlobFile", blob_file, &writer);
if (!s.ok()) {
@@ -897,7 +913,8 @@ Status BlobDBImpl::SelectBlobFile(std::shared_ptr<BlobFile>* blob_file) {
return s;
}
-Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration,
+Status BlobDBImpl::SelectBlobFileTTL(const WriteOptions& write_options,
+ uint64_t expiration,
std::shared_ptr<BlobFile>* blob_file) {
assert(blob_file);
assert(expiration != kNoExpiration);
@@ -930,9 +947,9 @@ Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration,
oss << "SelectBlobFileTTL range: [" << exp_low << ',' << exp_high << ')';
std::shared_ptr<BlobLogWriter> writer;
- const Status s =
- CreateBlobFileAndWriter(/* has_ttl */ true, expiration_range,
- /* reason */ oss.str(), blob_file, &writer);
+ const Status s = CreateBlobFileAndWriter(
+ write_options, /* has_ttl */ true, expiration_range,
+ /* reason */ oss.str(), blob_file, &writer);
if (!s.ok()) {
return s;
}
@@ -1055,7 +1072,7 @@ Status BlobDBImpl::PutUntil(const WriteOptions& options, const Slice& key,
return s;
}
-Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/,
+Status BlobDBImpl::PutBlobValue(const WriteOptions& write_options,
const Slice& key, const Slice& value,
uint64_t expiration, WriteBatch* batch) {
write_mutex_.AssertHeld();
@@ -1087,30 +1104,30 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/,
// Check DB size limit before selecting blob file to
// Since CheckSizeAndEvictBlobFiles() can close blob files, it needs to be
// done before calling SelectBlobFile().
- s = CheckSizeAndEvictBlobFiles(headerbuf.size() + key.size() +
- value_compressed.size());
+ s = CheckSizeAndEvictBlobFiles(
+ write_options, headerbuf.size() + key.size() + value_compressed.size());
if (!s.ok()) {
return s;
}
std::shared_ptr<BlobFile> blob_file;
if (expiration != kNoExpiration) {
- s = SelectBlobFileTTL(expiration, &blob_file);
+ s = SelectBlobFileTTL(write_options, expiration, &blob_file);
} else {
- s = SelectBlobFile(&blob_file);
+ s = SelectBlobFile(write_options, &blob_file);
}
if (s.ok()) {
assert(blob_file != nullptr);
assert(blob_file->GetCompressionType() == bdb_options_.compression);
- s = AppendBlob(blob_file, headerbuf, key, value_compressed, expiration,
- &index_entry);
+ s = AppendBlob(write_options, blob_file, headerbuf, key, value_compressed,
+ expiration, &index_entry);
}
if (s.ok()) {
if (expiration != kNoExpiration) {
WriteLock file_lock(&blob_file->mutex_);
blob_file->ExtendExpirationRange(expiration);
}
- s = CloseBlobFileIfNeeded(blob_file);
+ s = CloseBlobFileIfNeeded(write_options, blob_file);
}
if (s.ok()) {
s = WriteBatchInternal::PutBlobIndex(batch, column_family_id, key,
@@ -1249,7 +1266,7 @@ void BlobDBImpl::GetCompactionContext(BlobCompactionContext* context,
}
}
-void BlobDBImpl::UpdateLiveSSTSize() {
+void BlobDBImpl::UpdateLiveSSTSize(const WriteOptions& write_options) {
uint64_t live_sst_size = 0;
bool ok = GetIntProperty(DB::Properties::kLiveSstFilesSize, &live_sst_size);
if (ok) {
@@ -1265,7 +1282,7 @@ void BlobDBImpl::UpdateLiveSSTSize() {
{
// Trigger FIFO eviction if needed.
MutexLock l(&write_mutex_);
- Status s = CheckSizeAndEvictBlobFiles(0, true /*force*/);
+ Status s = CheckSizeAndEvictBlobFiles(write_options, 0, true /*force*/);
if (s.IsNoSpace()) {
ROCKS_LOG_WARN(db_options_.info_log,
"DB grow out-of-space after SST size updated. Current live"
@@ -1276,7 +1293,8 @@ void BlobDBImpl::UpdateLiveSSTSize() {
}
}
-Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size,
+Status BlobDBImpl::CheckSizeAndEvictBlobFiles(const WriteOptions& write_options,
+ uint64_t blob_size,
bool force_evict) {
write_mutex_.AssertHeld();
@@ -1316,7 +1334,7 @@ Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size,
}
// FIFO eviction can evict open blob files.
if (!blob_file->Immutable()) {
- Status s = CloseBlobFile(blob_file);
+ Status s = CloseBlobFile(write_options, blob_file);
if (!s.ok()) {
return s;
}
@@ -1347,7 +1365,8 @@ Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size,
return Status::OK();
}
-Status BlobDBImpl::AppendBlob(const std::shared_ptr<BlobFile>& bfile,
+Status BlobDBImpl::AppendBlob(const WriteOptions& write_options,
+ const std::shared_ptr<BlobFile>& bfile,
const std::string& headerbuf, const Slice& key,
const Slice& value, uint64_t expiration,
std::string* index_entry) {
@@ -1363,8 +1382,8 @@ Status BlobDBImpl::AppendBlob(const std::shared_ptr<BlobFile>& bfile,
}
// write the blob to the blob log.
- s = writer->EmitPhysicalRecord(headerbuf, key, value, &key_offset,
- &blob_offset);
+ s = writer->EmitPhysicalRecord(write_options, headerbuf, key, value,
+ &key_offset, &blob_offset);
}
if (!s.ok()) {
@@ -1767,7 +1786,8 @@ std::pair<bool, int64_t> BlobDBImpl::SanityCheck(bool aborted) {
return std::make_pair(true, -1);
}
-Status BlobDBImpl::CloseBlobFile(std::shared_ptr<BlobFile> bfile) {
+Status BlobDBImpl::CloseBlobFile(const WriteOptions& write_options,
+ std::shared_ptr<BlobFile> bfile) {
TEST_SYNC_POINT("BlobDBImpl::CloseBlobFile");
assert(bfile);
assert(!bfile->Immutable());
@@ -1783,7 +1803,7 @@ Status BlobDBImpl::CloseBlobFile(std::shared_ptr<BlobFile> bfile) {
const SequenceNumber sequence = GetLatestSequenceNumber();
- const Status s = bfile->WriteFooterAndCloseLocked(sequence);
+ const Status s = bfile->WriteFooterAndCloseLocked(write_options, sequence);
if (s.ok()) {
total_blob_size_ += BlobLogFooter::kSize;
@@ -1815,7 +1835,8 @@ Status BlobDBImpl::CloseBlobFile(std::shared_ptr<BlobFile> bfile) {
return s;
}
-Status BlobDBImpl::CloseBlobFileIfNeeded(std::shared_ptr<BlobFile>& bfile) {
+Status BlobDBImpl::CloseBlobFileIfNeeded(const WriteOptions& write_options,
+ std::shared_ptr<BlobFile>& bfile) {
write_mutex_.AssertHeld();
// atomic read
@@ -1831,7 +1852,7 @@ Status BlobDBImpl::CloseBlobFileIfNeeded(std::shared_ptr<BlobFile>& bfile) {
return Status::OK();
}
- return CloseBlobFile(bfile);
+ return CloseBlobFile(write_options, bfile);
}
void BlobDBImpl::ObsoleteBlobFile(std::shared_ptr<BlobFile> blob_file,
@@ -1921,7 +1942,8 @@ std::pair<bool, int64_t> BlobDBImpl::EvictExpiredFiles(bool aborted) {
}
if (!blob_file->Immutable()) {
- CloseBlobFile(blob_file).PermitUncheckedError();
+ // TODO: plumb Env::IOActivity, Env::IOPriority
+ CloseBlobFile(WriteOptions(), blob_file).PermitUncheckedError();
}
assert(blob_file->Immutable());
@@ -1933,7 +1955,7 @@ std::pair<bool, int64_t> BlobDBImpl::EvictExpiredFiles(bool aborted) {
return std::make_pair(true, -1);
}
-Status BlobDBImpl::SyncBlobFiles() {
+Status BlobDBImpl::SyncBlobFiles(const WriteOptions& write_options) {
MutexLock l(&write_mutex_);
std::vector<std::shared_ptr<BlobFile>> process_files;
@@ -1949,7 +1971,7 @@ Status BlobDBImpl::SyncBlobFiles() {
Status s;
for (auto& blob_file : process_files) {
- s = blob_file->Fsync();
+ s = blob_file->Fsync(write_options);
if (!s.ok()) {
ROCKS_LOG_ERROR(db_options_.info_log,
"Failed to sync blob file %" PRIu64 ", status: %s",
@@ -2196,7 +2218,7 @@ Status BlobDBImpl::TEST_CloseBlobFile(std::shared_ptr<BlobFile>& bfile) {
WriteLock lock(&mutex_);
WriteLock file_lock(&bfile->mutex_);
- return CloseBlobFile(bfile);
+ return CloseBlobFile(WriteOptions(), bfile);
}
void BlobDBImpl::TEST_ObsoleteBlobFile(std::shared_ptr<BlobFile>& blob_file,
diff --git a/utilities/blob_db/blob_db_impl.h b/utilities/blob_db/blob_db_impl.h
index d491108d3..365ce6c50 100644
--- a/utilities/blob_db/blob_db_impl.h
+++ b/utilities/blob_db/blob_db_impl.h
@@ -167,7 +167,7 @@ class BlobDBImpl : public BlobDB {
Status Open(std::vector<ColumnFamilyHandle*>* handles);
- Status SyncBlobFiles() override;
+ Status SyncBlobFiles(const WriteOptions& write_options) override;
// Common part of the two GetCompactionContext methods below.
// REQUIRES: read lock on mutex_
@@ -245,11 +245,13 @@ class BlobDBImpl : public BlobDB {
// to a single thread (like in the case of new files written during
// compaction/GC), the locks on write_mutex_ and the blob file's mutex_ can be
// avoided.
- Status CloseBlobFile(std::shared_ptr<BlobFile> bfile);
+ Status CloseBlobFile(const WriteOptions& write_options,
+ std::shared_ptr<BlobFile> bfile);
// Close a file if its size exceeds blob_file_size
// REQUIRES: lock held on write_mutex_.
- Status CloseBlobFileIfNeeded(std::shared_ptr<BlobFile>& bfile);
+ Status CloseBlobFileIfNeeded(const WriteOptions& write_options,
+ std::shared_ptr<BlobFile>& bfile);
// Mark file as obsolete and move the file to obsolete file list.
//
@@ -261,13 +263,15 @@ class BlobDBImpl : public BlobDB {
const Slice& value, uint64_t expiration,
WriteBatch* batch);
- Status AppendBlob(const std::shared_ptr<BlobFile>& bfile,
+ Status AppendBlob(const WriteOptions& write_options,
+ const std::shared_ptr<BlobFile>& bfile,
const std::string& headerbuf, const Slice& key,
const Slice& value, uint64_t expiration,
std::string* index_entry);
// Create a new blob file and associated writer.
- Status CreateBlobFileAndWriter(bool has_ttl,
+ Status CreateBlobFileAndWriter(const WriteOptions& write_options,
+ bool has_ttl,
const ExpirationRange& expiration_range,
const std::string& reason,
std::shared_ptr<BlobFile>* blob_file,
@@ -275,11 +279,13 @@ class BlobDBImpl : public BlobDB {
// Get the open non-TTL blob log file, or create a new one if no such file
// exists.
- Status SelectBlobFile(std::shared_ptr<BlobFile>* blob_file);
+ Status SelectBlobFile(const WriteOptions& write_options,
+ std::shared_ptr<BlobFile>* blob_file);
// Get the open TTL blob log file for a certain expiration, or create a new
// one if no such file exists.
- Status SelectBlobFileTTL(uint64_t expiration,
+ Status SelectBlobFileTTL(const WriteOptions& write_options,
+ uint64_t expiration,
std::shared_ptr<BlobFile>* blob_file);
std::shared_ptr<BlobFile> FindBlobFileLocked(uint64_t expiration) const;
@@ -363,7 +369,7 @@ class BlobDBImpl : public BlobDB {
void MarkUnreferencedBlobFilesObsolete();
void MarkUnreferencedBlobFilesObsoleteDuringOpen();
- void UpdateLiveSSTSize();
+ void UpdateLiveSSTSize(const WriteOptions& write_options);
Status GetBlobFileReader(const std::shared_ptr<BlobFile>& blob_file,
std::shared_ptr<RandomAccessFileReader>* reader);
@@ -394,9 +400,12 @@ class BlobDBImpl : public BlobDB {
// If is_fifo = true, FIFO eviction will be triggered to make room for the
// new blob. If force_evict = true, FIFO eviction will evict blob files
// even eviction will not make enough room for the new blob.
- Status CheckSizeAndEvictBlobFiles(uint64_t blob_size,
+ Status CheckSizeAndEvictBlobFiles(const WriteOptions& write_options,
+ uint64_t blob_size,
bool force_evict = false);
+ Status CloseImpl();
+
// name of the database directory
std::string dbname_;
diff --git a/utilities/blob_db/blob_db_listener.h b/utilities/blob_db/blob_db_listener.h
index c95740c50..0759b6811 100644
--- a/utilities/blob_db/blob_db_listener.h
+++ b/utilities/blob_db/blob_db_listener.h
@@ -22,18 +22,20 @@ class BlobDBListener : public EventListener {
void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*info*/) override {
assert(blob_db_impl_ != nullptr);
- blob_db_impl_->SyncBlobFiles().PermitUncheckedError();
+ blob_db_impl_->SyncBlobFiles(WriteOptions(Env::IOActivity::kFlush))
+ .PermitUncheckedError();
}
void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& /*info*/) override {
assert(blob_db_impl_ != nullptr);
- blob_db_impl_->UpdateLiveSSTSize();
+ blob_db_impl_->UpdateLiveSSTSize(WriteOptions(Env::IOActivity::kFlush));
}
void OnCompactionCompleted(DB* /*db*/,
const CompactionJobInfo& /*info*/) override {
assert(blob_db_impl_ != nullptr);
- blob_db_impl_->UpdateLiveSSTSize();
+ blob_db_impl_->UpdateLiveSSTSize(
+ WriteOptions(Env::IOActivity::kCompaction));
}
const char* Name() const override { return kClassName(); }
diff --git a/utilities/blob_db/blob_file.cc b/utilities/blob_db/blob_file.cc
index 5b31d5697..c4c0556fb 100644
--- a/utilities/blob_db/blob_file.cc
+++ b/utilities/blob_db/blob_file.cc
@@ -78,7 +78,8 @@ void BlobFile::MarkObsolete(SequenceNumber sequence) {
obsolete_.store(true);
}
-Status BlobFile::WriteFooterAndCloseLocked(SequenceNumber sequence) {
+Status BlobFile::WriteFooterAndCloseLocked(const WriteOptions& write_options,
+ SequenceNumber sequence) {
BlobLogFooter footer;
footer.blob_count = blob_count_;
if (HasTTL()) {
@@ -86,7 +87,8 @@ Status BlobFile::WriteFooterAndCloseLocked(SequenceNumber sequence) {
}
// this will close the file and reset the Writable File Pointer.
- Status s = log_writer_->AppendFooter(footer, /* checksum_method */ nullptr,
+ Status s = log_writer_->AppendFooter(write_options, footer,
+ /* checksum_method */ nullptr,
/* checksum_value */ nullptr);
if (s.ok()) {
closed_ = true;
@@ -137,10 +139,10 @@ Status BlobFile::SetFromFooterLocked(const BlobLogFooter& footer) {
return Status::OK();
}
-Status BlobFile::Fsync() {
+Status BlobFile::Fsync(const WriteOptions& write_options) {
Status s;
if (log_writer_.get()) {
- s = log_writer_->Sync();
+ s = log_writer_->Sync(write_options);
}
return s;
}
diff --git a/utilities/blob_db/blob_file.h b/utilities/blob_db/blob_file.h
index 8651c6b67..f0ec83ebe 100644
--- a/utilities/blob_db/blob_file.h
+++ b/utilities/blob_db/blob_file.h
@@ -180,7 +180,7 @@ class BlobFile {
return obsolete_sequence_;
}
- Status Fsync();
+ Status Fsync(const WriteOptions& write_options);
uint64_t GetFileSize() const {
return file_size_.load(std::memory_order_acquire);
@@ -218,7 +218,8 @@ class BlobFile {
private:
Status ReadFooter(BlobLogFooter* footer);
- Status WriteFooterAndCloseLocked(SequenceNumber sequence);
+ Status WriteFooterAndCloseLocked(const WriteOptions& write_options,
+ SequenceNumber sequence);
void CloseRandomAccessLocked();
diff --git a/utilities/cache_dump_load_impl.h b/utilities/cache_dump_load_impl.h
index b5a0783de..26839e17a 100644
--- a/utilities/cache_dump_load_impl.h
+++ b/utilities/cache_dump_load_impl.h
@@ -162,11 +162,12 @@ class ToFileCacheDumpWriter : public CacheDumpWriter {
assert(file_writer_ != nullptr);
std::string prefix;
PutFixed32(&prefix, static_cast<uint32_t>(metadata.size()));
- IOStatus io_s = file_writer_->Append(Slice(prefix));
+ const IOOptions opts;
+ IOStatus io_s = file_writer_->Append(opts, Slice(prefix));
if (!io_s.ok()) {
return io_s;
}
- io_s = file_writer_->Append(metadata);
+ io_s = file_writer_->Append(opts, metadata);
return io_s;
}
@@ -175,11 +176,12 @@ class ToFileCacheDumpWriter : public CacheDumpWriter {
assert(file_writer_ != nullptr);
std::string prefix;
PutFixed32(&prefix, static_cast<uint32_t>(data.size()));
- IOStatus io_s = file_writer_->Append(Slice(prefix));
+ const IOOptions opts;
+ IOStatus io_s = file_writer_->Append(opts, Slice(prefix));
if (!io_s.ok()) {
return io_s;
}
- io_s = file_writer_->Append(data);
+ io_s = file_writer_->Append(opts, data);
return io_s;
}
diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc
index 53bbaeb07..d19eb3592 100644
--- a/utilities/fault_injection_fs.cc
+++ b/utilities/fault_injection_fs.cc
@@ -917,9 +917,10 @@ IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync(
return io_s;
}
} else {
+ IOOptions opts;
IOStatus io_s =
WriteStringToFile(target(), file_pair.second,
- pair.first + "/" + file_pair.first, true);
+ pair.first + "/" + file_pair.first, true, opts);
if (!io_s.ok()) {
return io_s;
}
diff --git a/utilities/options/options_util_test.cc b/utilities/options/options_util_test.cc
index fd9affb0d..2d08c3dd0 100644
--- a/utilities/options/options_util_test.cc
+++ b/utilities/options/options_util_test.cc
@@ -57,8 +57,8 @@ TEST_F(OptionsUtilTest, SaveAndLoad) {
}
const std::string kFileName = "OPTIONS-123456";
- ASSERT_OK(PersistRocksDBOptions(db_opt, cf_names, cf_opts, kFileName,
- env_->GetFileSystem().get()));
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), db_opt, cf_names, cf_opts,
+ kFileName, env_->GetFileSystem().get()));
DBOptions loaded_db_opt;
std::vector<ColumnFamilyDescriptor> loaded_cf_descs;
@@ -125,8 +125,8 @@ TEST_F(OptionsUtilTest, SaveAndLoadWithCacheCheck) {
cf_names.push_back("cf_plain_table_sample");
// Saving DB in file
const std::string kFileName = "OPTIONS-LOAD_CACHE_123456";
- ASSERT_OK(PersistRocksDBOptions(db_opt, cf_names, cf_opts, kFileName,
- env_->GetFileSystem().get()));
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), db_opt, cf_names, cf_opts,
+ kFileName, env_->GetFileSystem().get()));
DBOptions loaded_db_opt;
std::vector<ColumnFamilyDescriptor> loaded_cf_descs;
@@ -758,8 +758,8 @@ TEST_F(OptionsUtilTest, WalDirInOptins) {
options.wal_dir = dbname_;
std::string options_file;
ASSERT_OK(GetLatestOptionsFileName(dbname_, options.env, &options_file));
- ASSERT_OK(PersistRocksDBOptions(options, {"default"}, {options},
- dbname_ + "/" + options_file,
+ ASSERT_OK(PersistRocksDBOptions(WriteOptions(), options, {"default"},
+ {options}, dbname_ + "/" + options_file,
options.env->GetFileSystem().get()));
ASSERT_OK(LoadLatestOptions(ignore_opts, dbname_, &db_opts, &cf_descs));
ASSERT_EQ(db_opts.wal_dir, dbname_);
@@ -779,4 +779,3 @@ int main(int argc, char** argv) {
#endif // GFLAGS
return RUN_ALL_TESTS();
}
-
diff --git a/utilities/simulator_cache/sim_cache.cc b/utilities/simulator_cache/sim_cache.cc
index ff9d52dca..6d2bf098c 100644
--- a/utilities/simulator_cache/sim_cache.cc
+++ b/utilities/simulator_cache/sim_cache.cc
@@ -73,7 +73,7 @@ class CacheActivityLogger {
oss << "LOOKUP - " << key.ToString(true) << std::endl;
MutexLock l(&mutex_);
- Status s = file_writer_->Append(oss.str());
+ Status s = file_writer_->Append(IOOptions(), oss.str());
if (!s.ok() && bg_status_.ok()) {
bg_status_ = s;
}
@@ -93,7 +93,7 @@ class CacheActivityLogger {
// line format: "ADD - <KEY> - <KEY-SIZE>"
oss << "ADD - " << key.ToString(true) << " - " << size << std::endl;
MutexLock l(&mutex_);
- Status s = file_writer_->Append(oss.str());
+ Status s = file_writer_->Append(IOOptions(), oss.str());
if (!s.ok() && bg_status_.ok()) {
bg_status_ = s;
}
@@ -126,7 +126,7 @@ class CacheActivityLogger {
}
activity_logging_enabled_.store(false);
- Status s = file_writer_->Close();
+ Status s = file_writer_->Close(IOOptions());
if (!s.ok() && bg_status_.ok()) {
bg_status_ = s;
}
diff --git a/utilities/trace/file_trace_reader_writer.cc b/utilities/trace/file_trace_reader_writer.cc
index f2ca74144..cbbada57c 100644
--- a/utilities/trace/file_trace_reader_writer.cc
+++ b/utilities/trace/file_trace_reader_writer.cc
@@ -96,7 +96,7 @@ Status FileTraceWriter::Close() {
}
Status FileTraceWriter::Write(const Slice& data) {
- return file_writer_->Append(data);
+ return file_writer_->Append(IOOptions(), data);
}
uint64_t FileTraceWriter::GetFileSize() { return file_writer_->GetFileSize(); }
diff --git a/utilities/transactions/pessimistic_transaction_db.cc b/utilities/transactions/pessimistic_transaction_db.cc
index 8009bef19..661e6bc4d 100644
--- a/utilities/transactions/pessimistic_transaction_db.cc
+++ b/utilities/transactions/pessimistic_transaction_db.cc
@@ -134,6 +134,7 @@ Status PessimisticTransactionDB::Initialize(
assert(batch_info.log_number_);
assert(recovered_trx->name_.length());
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions w_options;
w_options.sync = true;
TransactionOptions t_options;
diff --git a/utilities/transactions/write_prepared_txn.cc b/utilities/transactions/write_prepared_txn.cc
index 58126a475..ddaf077ac 100644
--- a/utilities/transactions/write_prepared_txn.cc
+++ b/utilities/transactions/write_prepared_txn.cc
@@ -306,6 +306,7 @@ Status WritePreparedTxn::RollbackInternal() {
auto cf_map_shared_ptr = wpt_db_->GetCFHandleMap();
auto cf_comp_map_shared_ptr = wpt_db_->GetCFComparatorMap();
auto read_at_seq = kMaxSequenceNumber;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions roptions;
// to prevent callback's seq to be overrriden inside DBImpk::Get
roptions.snapshot = wpt_db_->GetMaxSnapshot();
diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc
index 91a81d158..1ddc17511 100644
--- a/utilities/transactions/write_prepared_txn_db.cc
+++ b/utilities/transactions/write_prepared_txn_db.cc
@@ -813,6 +813,7 @@ void WritePreparedTxnDB::AdvanceSeqByOne() {
// Inserting an empty value will i) let the max evicted entry to be
// published, i.e., max == last_published, increase the last published to
// be one beyond max, i.e., max < last_published.
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions woptions;
TransactionOptions txn_options;
Transaction* txn0 = BeginTransaction(woptions, txn_options, nullptr);
diff --git a/utilities/transactions/write_unprepared_txn.cc b/utilities/transactions/write_unprepared_txn.cc
index c30cf9e1f..2f1069dbc 100644
--- a/utilities/transactions/write_unprepared_txn.cc
+++ b/utilities/transactions/write_unprepared_txn.cc
@@ -727,6 +727,7 @@ Status WriteUnpreparedTxn::RollbackInternal() {
assert(GetId() > 0);
Status s;
auto read_at_seq = kMaxSequenceNumber;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions roptions;
// to prevent callback's seq to be overrriden inside DBImpk::Get
roptions.snapshot = wpt_db_->GetMaxSnapshot();
@@ -882,6 +883,7 @@ Status WriteUnpreparedTxn::RollbackToSavePointInternal() {
assert(save_points_ != nullptr && save_points_->size() > 0);
const LockTracker& tracked_keys = *save_points_->top().new_locks_;
+ // TODO: plumb Env::IOActivity, Env::IOPriority
ReadOptions roptions;
roptions.snapshot = top.snapshot_->snapshot();
SequenceNumber min_uncommitted =
diff --git a/utilities/transactions/write_unprepared_txn_db.cc b/utilities/transactions/write_unprepared_txn_db.cc
index 1d75dd449..9219ec03c 100644
--- a/utilities/transactions/write_unprepared_txn_db.cc
+++ b/utilities/transactions/write_unprepared_txn_db.cc
@@ -37,6 +37,7 @@ Status WriteUnpreparedTxnDB::RollbackRecoveredTransaction(
// MemTableInserter during recovery to actually do writes into the DB
// instead of just dropping the in-memory write batch.
//
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions w_options;
class InvalidSnapshotReadCallback : public ReadCallback {
@@ -262,6 +263,7 @@ Status WriteUnpreparedTxnDB::Initialize(
continue;
}
+ // TODO: plumb Env::IOActivity, Env::IOPriority
WriteOptions w_options;
w_options.sync = true;
TransactionOptions t_options;