diff options
author | Yanqin Jin <yanqin@fb.com> | 2022-05-13 12:29:20 -0700 |
---|---|---|
committer | Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> | 2022-05-13 12:29:20 -0700 |
commit | f6d9730ea1be3fc05080c7147a26be34254fb44c (patch) | |
tree | 3099a9b14805a524bcfbbea18739c50bcf4c9abb /tools | |
parent | bfc6a8ee4a70323520fc778e34859c722029e725 (diff) |
Fix stress test with best-efforts-recovery (#9986)
Summary:
This PR
- since we are testing with disable_wal = true and best_efforts_recovery, we should set column family count to 1, due to the requirement of `ExpectedState` tracking and replaying logic.
- during backup and checkpoint restore, disable best-efforts-recovery. This does not matter now because db_crashtest.py always disables wal when testing best-efforts-recovery. In the future, if we enable wal, then not setting `restore_opitions.best_efforts_recovery` will cause backup db not to recover the WALs, and differ from db (that enables WAL).
- during verification of backup and checkpoint restore, print the key where inconsistency exists between expected state and db.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9986
Test Plan: TEST_TMPDIR=/dev/shm/rocksdb make crash_test_with_best_efforts_recovery
Reviewed By: siying
Differential Revision: D36353105
Pulled By: riversand963
fbshipit-source-id: a484da161273e6216a1f7e245bac15a349693917
Diffstat (limited to 'tools')
-rw-r--r-- | tools/db_crashtest.py | 54 |
1 files changed, 11 insertions, 43 deletions
diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 1aed479e0..325a46871 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -313,10 +313,10 @@ txn_params = { } best_efforts_recovery_params = { - "best_efforts_recovery": True, - "skip_verifydb": True, - "verify_db_one_in": 0, - "continuous_verification_interval": 0, + "best_efforts_recovery": 1, + "atomic_flush": 0, + "disable_wal": 1, + "column_families": 1, } blob_params = { @@ -502,6 +502,13 @@ def finalize_and_sanitize(src_params): dest_params["memtable_prefix_bloom_size_ratio"] = 0 if dest_params.get("two_write_queues") == 1: dest_params["enable_pipelined_write"] = 0 + if dest_params.get("best_efforts_recovery") == 1: + dest_params["disable_wal"] = 1 + dest_params["atomic_flush"] = 0 + dest_params["enable_compaction_filter"] = 0 + dest_params["sync"] = 0 + dest_params["write_fault_one_in"] = 0 + return dest_params def gen_cmd_params(args): @@ -560,42 +567,6 @@ def gen_cmd(params, unknown_params): return cmd -# Inject inconsistency to db directory. -def inject_inconsistencies_to_db_dir(dir_path): - files = os.listdir(dir_path) - file_num_rgx = re.compile(r'(?P<number>[0-9]{6})') - largest_fnum = 0 - for f in files: - m = file_num_rgx.search(f) - if m and not f.startswith('LOG'): - largest_fnum = max(largest_fnum, int(m.group('number'))) - - candidates = [ - f for f in files if re.search(r'[0-9]+\.sst', f) - ] - deleted = 0 - corrupted = 0 - for f in candidates: - rnd = random.randint(0, 99) - f_path = os.path.join(dir_path, f) - if rnd < 10: - os.unlink(f_path) - deleted = deleted + 1 - elif 10 <= rnd and rnd < 30: - with open(f_path, "a") as fd: - fd.write('12345678') - corrupted = corrupted + 1 - print('Removed %d table files' % deleted) - print('Corrupted %d table files' % corrupted) - - # Add corrupted MANIFEST and SST - for num in range(largest_fnum + 1, largest_fnum + 10): - rnd = random.randint(0, 1) - fname = ("MANIFEST-%06d" % num) if rnd == 0 else ("%06d.sst" % num) - print('Write %s' % fname) - with open(os.path.join(dir_path, fname), "w") as fd: - fd.write("garbage") - def execute_cmd(cmd, timeout): child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) @@ -649,9 +620,6 @@ def blackbox_crash_main(args, unknown_args): time.sleep(1) # time to stabilize before the next run - if args.test_best_efforts_recovery: - inject_inconsistencies_to_db_dir(dbname) - time.sleep(1) # time to stabilize before the next run # we need to clean up after ourselves -- only do this on test success |