summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorYanqin Jin <yanqin@fb.com>2022-05-13 12:29:20 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2022-05-13 12:29:20 -0700
commitf6d9730ea1be3fc05080c7147a26be34254fb44c (patch)
tree3099a9b14805a524bcfbbea18739c50bcf4c9abb /tools
parentbfc6a8ee4a70323520fc778e34859c722029e725 (diff)
Fix stress test with best-efforts-recovery (#9986)
Summary: This PR - since we are testing with disable_wal = true and best_efforts_recovery, we should set column family count to 1, due to the requirement of `ExpectedState` tracking and replaying logic. - during backup and checkpoint restore, disable best-efforts-recovery. This does not matter now because db_crashtest.py always disables wal when testing best-efforts-recovery. In the future, if we enable wal, then not setting `restore_opitions.best_efforts_recovery` will cause backup db not to recover the WALs, and differ from db (that enables WAL). - during verification of backup and checkpoint restore, print the key where inconsistency exists between expected state and db. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9986 Test Plan: TEST_TMPDIR=/dev/shm/rocksdb make crash_test_with_best_efforts_recovery Reviewed By: siying Differential Revision: D36353105 Pulled By: riversand963 fbshipit-source-id: a484da161273e6216a1f7e245bac15a349693917
Diffstat (limited to 'tools')
-rw-r--r--tools/db_crashtest.py54
1 files changed, 11 insertions, 43 deletions
diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py
index 1aed479e0..325a46871 100644
--- a/tools/db_crashtest.py
+++ b/tools/db_crashtest.py
@@ -313,10 +313,10 @@ txn_params = {
}
best_efforts_recovery_params = {
- "best_efforts_recovery": True,
- "skip_verifydb": True,
- "verify_db_one_in": 0,
- "continuous_verification_interval": 0,
+ "best_efforts_recovery": 1,
+ "atomic_flush": 0,
+ "disable_wal": 1,
+ "column_families": 1,
}
blob_params = {
@@ -502,6 +502,13 @@ def finalize_and_sanitize(src_params):
dest_params["memtable_prefix_bloom_size_ratio"] = 0
if dest_params.get("two_write_queues") == 1:
dest_params["enable_pipelined_write"] = 0
+ if dest_params.get("best_efforts_recovery") == 1:
+ dest_params["disable_wal"] = 1
+ dest_params["atomic_flush"] = 0
+ dest_params["enable_compaction_filter"] = 0
+ dest_params["sync"] = 0
+ dest_params["write_fault_one_in"] = 0
+
return dest_params
def gen_cmd_params(args):
@@ -560,42 +567,6 @@ def gen_cmd(params, unknown_params):
return cmd
-# Inject inconsistency to db directory.
-def inject_inconsistencies_to_db_dir(dir_path):
- files = os.listdir(dir_path)
- file_num_rgx = re.compile(r'(?P<number>[0-9]{6})')
- largest_fnum = 0
- for f in files:
- m = file_num_rgx.search(f)
- if m and not f.startswith('LOG'):
- largest_fnum = max(largest_fnum, int(m.group('number')))
-
- candidates = [
- f for f in files if re.search(r'[0-9]+\.sst', f)
- ]
- deleted = 0
- corrupted = 0
- for f in candidates:
- rnd = random.randint(0, 99)
- f_path = os.path.join(dir_path, f)
- if rnd < 10:
- os.unlink(f_path)
- deleted = deleted + 1
- elif 10 <= rnd and rnd < 30:
- with open(f_path, "a") as fd:
- fd.write('12345678')
- corrupted = corrupted + 1
- print('Removed %d table files' % deleted)
- print('Corrupted %d table files' % corrupted)
-
- # Add corrupted MANIFEST and SST
- for num in range(largest_fnum + 1, largest_fnum + 10):
- rnd = random.randint(0, 1)
- fname = ("MANIFEST-%06d" % num) if rnd == 0 else ("%06d.sst" % num)
- print('Write %s' % fname)
- with open(os.path.join(dir_path, fname), "w") as fd:
- fd.write("garbage")
-
def execute_cmd(cmd, timeout):
child = subprocess.Popen(cmd, stderr=subprocess.PIPE,
stdout=subprocess.PIPE)
@@ -649,9 +620,6 @@ def blackbox_crash_main(args, unknown_args):
time.sleep(1) # time to stabilize before the next run
- if args.test_best_efforts_recovery:
- inject_inconsistencies_to_db_dir(dbname)
-
time.sleep(1) # time to stabilize before the next run
# we need to clean up after ourselves -- only do this on test success