summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsdong <siying.d@fb.com>2021-06-30 16:45:44 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2021-06-30 16:46:41 -0700
commitba224b75c7362897df8c0da97f5fb04594c4664b (patch)
tree84548a66ead96080d93a1813790670fdbbef80bb
parent41c4b665f4c6c9ec17be7b75e1be63c13a3af1ab (diff)
Stress Test to inject write failures in reopen (#8474)
Summary: Previously Stress can inject metadata write failures when reopening a DB. We extend it to file append too, in the same way. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8474 Test Plan: manually run crash test with various setting and make sure the failures are triggered as expected. Reviewed By: zhichao-cao Differential Revision: D29503116 fbshipit-source-id: e73a446e80ccbd09301a579280e56ff949381fab
-rw-r--r--db_stress_tool/db_stress_gflags.cc3
-rw-r--r--db_stress_tool/db_stress_shared_state.h1
-rw-r--r--db_stress_tool/db_stress_test_base.cc35
-rw-r--r--db_stress_tool/db_stress_tool.cc3
-rw-r--r--tools/db_crashtest.py1
-rw-r--r--utilities/fault_injection_fs.cc28
-rw-r--r--utilities/fault_injection_fs.h3
7 files changed, 53 insertions, 21 deletions
diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc
index 315de67d7..eed9ee4bf 100644
--- a/db_stress_tool/db_stress_gflags.cc
+++ b/db_stress_tool/db_stress_gflags.cc
@@ -824,5 +824,8 @@ DEFINE_int32(open_metadata_write_fault_one_in, 0,
DEFINE_string(secondary_cache_uri, "",
"Full URI for creating a customized secondary cache object");
#endif // ROCKSDB_LITE
+DEFINE_int32(open_write_fault_one_in, 0,
+ "On non-zero, enables fault injection on file write "
+ "during DB reopen.");
#endif // GFLAGS
diff --git a/db_stress_tool/db_stress_shared_state.h b/db_stress_tool/db_stress_shared_state.h
index 03bc0784c..d214a42b5 100644
--- a/db_stress_tool/db_stress_shared_state.h
+++ b/db_stress_tool/db_stress_shared_state.h
@@ -31,6 +31,7 @@ DECLARE_int32(continuous_verification_interval);
DECLARE_int32(read_fault_one_in);
DECLARE_int32(write_fault_one_in);
DECLARE_int32(open_metadata_write_fault_one_in);
+DECLARE_int32(open_write_fault_one_in);
namespace ROCKSDB_NAMESPACE {
class StressTest;
diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc
index f2edfbd0d..71b6bbc91 100644
--- a/db_stress_tool/db_stress_test_base.cc
+++ b/db_stress_tool/db_stress_test_base.cc
@@ -620,7 +620,8 @@ void StressTest::OperateDb(ThreadState* thread) {
FileType::kDescriptorFile,
FileType::kCurrentFile};
fault_fs_guard->SetRandomWriteError(
- thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg, types);
+ thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg,
+ /*inject_for_all_file_types=*/false, types);
}
#endif // NDEBUG
thread->stats.Start();
@@ -2464,15 +2465,28 @@ void StressTest::Open() {
// Only ingest metadata error if it is reopening, as initial open
// failure doesn't need to be handled.
// TODO cover transaction DB is not covered in this fault test too.
- bool ingest_meta_error =
- FLAGS_open_metadata_write_fault_one_in &&
+ bool ingest_meta_error = false;
+ bool ingest_write_error = false;
+ if ((FLAGS_open_metadata_write_fault_one_in ||
+ FLAGS_open_write_fault_one_in) &&
fault_fs_guard
->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr)
- .ok();
- if (ingest_meta_error) {
- fault_fs_guard->EnableMetadataWriteErrorInjection();
- fault_fs_guard->SetRandomMetadataWriteError(
- FLAGS_open_metadata_write_fault_one_in);
+ .ok()) {
+ ingest_meta_error = FLAGS_open_metadata_write_fault_one_in;
+ ingest_write_error = FLAGS_open_write_fault_one_in;
+ if (ingest_meta_error) {
+ fault_fs_guard->EnableMetadataWriteErrorInjection();
+ fault_fs_guard->SetRandomMetadataWriteError(
+ FLAGS_open_metadata_write_fault_one_in);
+ }
+ if (ingest_write_error) {
+ fault_fs_guard->SetFilesystemDirectWritable(false);
+ fault_fs_guard->EnableWriteErrorInjection();
+ fault_fs_guard->SetRandomWriteError(
+ static_cast<uint32_t>(FLAGS_seed), FLAGS_open_write_fault_one_in,
+ IOStatus::IOError("Injected Open Error"),
+ /*inject_for_all_file_types=*/true, /*types=*/{});
+ }
}
while (true) {
#endif // NDEBUG
@@ -2506,8 +2520,10 @@ void StressTest::Open() {
}
#ifndef NDEBUG
- if (ingest_meta_error) {
+ if (ingest_meta_error || ingest_write_error) {
+ fault_fs_guard->SetFilesystemDirectWritable(true);
fault_fs_guard->DisableMetadataWriteErrorInjection();
+ fault_fs_guard->DisableWriteErrorInjection();
if (s.ok()) {
// Ingested errors might happen in background compactions. We
// wait for all compactions to finish to make sure DB is in
@@ -2523,6 +2539,7 @@ void StressTest::Open() {
// successfully open the DB with correct data if no IO error shows
// up.
ingest_meta_error = false;
+ ingest_write_error = false;
Random rand(static_cast<uint32_t>(FLAGS_seed));
if (rand.OneIn(2)) {
diff --git a/db_stress_tool/db_stress_tool.cc b/db_stress_tool/db_stress_tool.cc
index cb489eb13..b7acf7947 100644
--- a/db_stress_tool/db_stress_tool.cc
+++ b/db_stress_tool/db_stress_tool.cc
@@ -86,7 +86,8 @@ int db_stress_tool(int argc, char** argv) {
#ifndef NDEBUG
if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
- FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in) {
+ FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in ||
+ FLAGS_open_write_fault_one_in) {
FaultInjectionTestFS* fs =
new FaultInjectionTestFS(raw_env->GetFileSystem());
fault_fs_guard.reset(fs);
diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py
index 8f9b3957f..86fddccce 100644
--- a/tools/db_crashtest.py
+++ b/tools/db_crashtest.py
@@ -140,6 +140,7 @@ default_params = {
"key_len_percent_dist": "1,30,69",
"read_fault_one_in": lambda: random.choice([0, 1000]),
"open_metadata_write_fault_one_in": lambda: random.choice([0, 8]),
+ "open_write_fault_one_in": lambda: random.choice([0, 16]),
"sync_fault_injection": False,
"get_property_one_in": 1000000,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc
index 00b5eaa4e..2a557303e 100644
--- a/utilities/fault_injection_fs.cc
+++ b/utilities/fault_injection_fs.cc
@@ -168,7 +168,8 @@ IOStatus TestFSWritableFile::Append(
state_.pos_ += data.size();
fs_->WritableFileAppended(state_);
}
- return IOStatus::OK();
+ IOStatus io_s = fs_->InjectWriteError(state_.filename_);
+ return io_s;
}
IOStatus TestFSWritableFile::PositionedAppend(
@@ -194,7 +195,8 @@ IOStatus TestFSWritableFile::PositionedAppend(
return IOStatus::Corruption(msg);
}
target_->PositionedAppend(data, offset, options, dbg);
- return IOStatus::OK();
+ IOStatus io_s = fs_->InjectWriteError(state_.filename_);
+ return io_s;
}
IOStatus TestFSWritableFile::Close(const IOOptions& options,
@@ -724,15 +726,19 @@ IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) {
}
bool allowed_type = false;
- uint64_t number;
- FileType cur_type = kTempFile;
- std::size_t found = file_name.find_last_of("/");
- std::string file = file_name.substr(found);
- bool ret = ParseFileName(file, &number, &cur_type);
- if (ret) {
- for (const auto& type : write_error_allowed_types_) {
- if (cur_type == type) {
- allowed_type = true;
+ if (inject_for_all_file_types_) {
+ allowed_type = true;
+ } else {
+ uint64_t number;
+ FileType cur_type = kTempFile;
+ std::size_t found = file_name.find_last_of("/");
+ std::string file = file_name.substr(found);
+ bool ret = ParseFileName(file, &number, &cur_type);
+ if (ret) {
+ for (const auto& type : write_error_allowed_types_) {
+ if (cur_type == type) {
+ allowed_type = true;
+ }
}
}
}
diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h
index 2ee9afe04..3a7135e05 100644
--- a/utilities/fault_injection_fs.h
+++ b/utilities/fault_injection_fs.h
@@ -365,6 +365,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
// want to inject. Types decides the file types we want to inject the
// error (e.g., Wal files, SST files), which is empty by default.
void SetRandomWriteError(uint32_t seed, int one_in, IOStatus error,
+ bool inject_for_all_file_types,
const std::vector<FileType>& types) {
MutexLock l(&mutex_);
Random tmp_rand(seed);
@@ -372,6 +373,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
error_ = error;
write_error_rand_ = tmp_rand;
write_error_one_in_ = one_in;
+ inject_for_all_file_types_ = inject_for_all_file_types;
write_error_allowed_types_ = types;
}
@@ -492,6 +494,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
Random write_error_rand_;
int write_error_one_in_;
int metadata_write_error_one_in_;
+ bool inject_for_all_file_types_;
std::vector<FileType> write_error_allowed_types_;
bool ingest_data_corruption_before_write_;
ChecksumType checksum_handoff_func_tpye_;