summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChangyu Bi <changyubi@meta.com>2024-09-03 13:06:25 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2024-09-03 13:06:25 -0700
commitcd6f802ccb2f6f64263ba4b61134e3072d03a867 (patch)
tree1a10329532b8f7145978fa1fe13ebefa5281007e
parent92ad4a88f3199b013532b37d6598c442319355a5 (diff)
Add a new file ingestion option `link_files` (#12980)
Summary: Add option `IngestExternalFileOptions::link_files` that hard links input files and preserves original file links after ingestion, unlike `move_files` which will unlink input files after ingestion. This can be useful when being used together with `allow_db_generated_files` to ingest files from another DB. Also reverted the change to `move_files` in https://github.com/facebook/rocksdb/issues/12959 to simplify the contract so that it will always unlink input files without exception. Pull Request resolved: https://github.com/facebook/rocksdb/pull/12980 Test Plan: updated unit test `ExternSSTFileLinkFailFallbackTest.LinkFailFallBackExternalSst` to test that input files will not be unlinked. Reviewed By: pdillinger Differential Revision: D61925111 Pulled By: cbi42 fbshipit-source-id: eadaca72e1ae5288bdd195d57158466e5656fa62
-rw-r--r--db/db_impl/db_impl.cc4
-rw-r--r--db/external_sst_file_ingestion_job.cc5
-rw-r--r--db/external_sst_file_test.cc31
-rw-r--r--include/rocksdb/options.h14
-rw-r--r--unreleased_history/behavior_changes/ingest-live-file-with-move.md2
-rw-r--r--unreleased_history/behavior_changes/link-file-ingest.md1
6 files changed, 36 insertions, 21 deletions
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
index 392cbac41..5c885bc31 100644
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@@ -5837,6 +5837,10 @@ Status DBImpl::IngestExternalFiles(
"allow_db_generated_files.");
}
}
+ if (ingest_opts.move_files && ingest_opts.link_files) {
+ return Status::InvalidArgument(
+ "`move_files` and `link_files` can not both be true.");
+ }
}
// TODO (yanqin) maybe handle the case in which column_families have
diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc
index 750c9641a..7e5a97562 100644
--- a/db/external_sst_file_ingestion_job.cc
+++ b/db/external_sst_file_ingestion_job.cc
@@ -113,7 +113,7 @@ Status ExternalSstFileIngestionJob::Prepare(
const std::string path_outside_db = f.external_file_path;
const std::string path_inside_db = TableFileName(
cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId());
- if (ingestion_options_.move_files) {
+ if (ingestion_options_.move_files || ingestion_options_.link_files) {
status =
fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr);
if (status.ok()) {
@@ -626,8 +626,7 @@ void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
DeleteInternalFiles();
consumed_seqno_count_ = 0;
files_overlap_ = false;
- } else if (status.ok() && ingestion_options_.move_files &&
- !ingestion_options_.allow_db_generated_files) {
+ } else if (status.ok() && ingestion_options_.move_files) {
// The files were moved and added successfully, remove original file links
for (IngestedFileInfo& f : files_to_ingest_) {
Status s = fs_->DeleteFile(f.external_file_path, io_opts, nullptr);
diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc
index 3a15c8ef1..17793c493 100644
--- a/db/external_sst_file_test.cc
+++ b/db/external_sst_file_test.cc
@@ -66,7 +66,7 @@ class ExternalSSTFileTestBase : public DBTestBase {
class ExternSSTFileLinkFailFallbackTest
: public ExternalSSTFileTestBase,
- public ::testing::WithParamInterface<std::tuple<bool, bool>> {
+ public ::testing::WithParamInterface<std::tuple<bool, bool, bool>> {
public:
ExternSSTFileLinkFailFallbackTest() {
fs_ = std::make_shared<ExternalSSTTestFS>(env_->GetFileSystem(), true);
@@ -2210,7 +2210,8 @@ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
DestroyAndReopen(options_);
const int kNumKeys = 10000;
IngestExternalFileOptions ifo;
- ifo.move_files = true;
+ ifo.move_files = std::get<2>(GetParam());
+ ifo.link_files = !ifo.move_files;
ifo.failed_move_fall_back_to_copy = failed_move_fall_back_to_copy;
std::string file_path = sst_files_dir_ + "file1.sst";
@@ -2251,6 +2252,13 @@ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
ASSERT_EQ(0, bytes_copied);
ASSERT_EQ(file_size, bytes_moved);
ASSERT_FALSE(copyfile);
+
+ Status es = env_->FileExists(file_path);
+ if (ifo.move_files) {
+ ASSERT_TRUE(es.IsNotFound());
+ } else {
+ ASSERT_OK(es);
+ }
} else {
// Link operation fails.
ASSERT_EQ(0, bytes_moved);
@@ -2269,6 +2277,11 @@ TEST_P(ExternSSTFileLinkFailFallbackTest, LinkFailFallBackExternalSst) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
+INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest,
+ ExternSSTFileLinkFailFallbackTest,
+ testing::Combine(testing::Bool(), testing::Bool(),
+ testing::Bool()));
+
class TestIngestExternalFileListener : public EventListener {
public:
void OnExternalFileIngested(DB* /*db*/,
@@ -3719,19 +3732,13 @@ TEST_F(ExternalSSTFileWithTimestampTest, TimestampsNotPersistedBasic) {
INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest,
testing::Combine(testing::Bool(), testing::Bool()));
-INSTANTIATE_TEST_CASE_P(ExternSSTFileLinkFailFallbackTest,
- ExternSSTFileLinkFailFallbackTest,
- testing::Values(std::make_tuple(true, false),
- std::make_tuple(true, true),
- std::make_tuple(false, false)));
-
class IngestDBGeneratedFileTest
: public ExternalSSTFileTestBase,
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
public:
IngestDBGeneratedFileTest() {
ingest_opts.allow_db_generated_files = true;
- ingest_opts.move_files = std::get<0>(GetParam());
+ ingest_opts.link_files = std::get<0>(GetParam());
ingest_opts.verify_checksums_before_ingest = std::get<1>(GetParam());
ingest_opts.snapshot_consistency = false;
}
@@ -3744,10 +3751,10 @@ INSTANTIATE_TEST_CASE_P(BasicMultiConfig, IngestDBGeneratedFileTest,
testing::Combine(testing::Bool(), testing::Bool()));
TEST_P(IngestDBGeneratedFileTest, FailureCase) {
- if (encrypted_env_ && ingest_opts.move_files) {
+ if (encrypted_env_ && ingest_opts.link_files) {
// FIXME: should fail ingestion or support this combination.
ROCKSDB_GTEST_SKIP(
- "Encrypted env and move_files do not work together, as we reopen the "
+ "Encrypted env and link_files do not work together, as we reopen the "
"file after linking it which appends an extra encryption prefix.");
return;
}
@@ -3943,7 +3950,7 @@ TEST_P(IngestDBGeneratedFileTest2, NotOverlapWithDB) {
ingest_opts.allow_global_seqno = std::get<1>(GetParam());
ingest_opts.allow_blocking_flush = std::get<2>(GetParam());
ingest_opts.fail_if_not_bottommost_level = std::get<3>(GetParam());
- ingest_opts.move_files = std::get<4>(GetParam());
+ ingest_opts.link_files = std::get<4>(GetParam());
do {
SCOPED_TRACE("option_config_ = " + std::to_string(option_config_));
diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h
index e3eb0368d..fb272663b 100644
--- a/include/rocksdb/options.h
+++ b/include/rocksdb/options.h
@@ -2131,10 +2131,16 @@ struct CompactRangeOptions {
// IngestExternalFileOptions is used by IngestExternalFile()
struct IngestExternalFileOptions {
// Can be set to true to move the files instead of copying them.
- // Note that original file links will be removed after successful ingestion,
- // unless `allow_db_generated_files` is true.
+ // The input files will be unlinked after successful ingestion.
+ // The implementation depends on hard links (LinkFile) instead of traditional
+ // move (RenameFile) to maximize the chances to restore to the original
+ // state upon failure.
bool move_files = false;
- // If set to true, ingestion falls back to copy when move fails.
+ // Same as move_files except that input files will NOT be unlinked.
+ // Only one of `move_files` and `link_files` can be set at the same time.
+ bool link_files = false;
+ // If set to true, ingestion falls back to copy when hard linking fails.
+ // This applies to both `move_files` and `link_files`.
bool failed_move_fall_back_to_copy = true;
// If set to false, an ingested file keys could appear in existing snapshots
// that where created before the file was ingested.
@@ -2209,8 +2215,6 @@ struct IngestExternalFileOptions {
// Enables ingestion of files not generated by SstFileWriter. When true:
// - Allows files to be ingested when their cf_id doesn't match the CF they
// are being ingested into.
- // - Preserves original file links after successful ingestion when
- // `move_files = true`.
// REQUIREMENTS:
// - Ingested files must not overlap with existing keys.
// - `write_global_seqno` must be false.
diff --git a/unreleased_history/behavior_changes/ingest-live-file-with-move.md b/unreleased_history/behavior_changes/ingest-live-file-with-move.md
index 303c5754e..444a7a45e 100644
--- a/unreleased_history/behavior_changes/ingest-live-file-with-move.md
+++ b/unreleased_history/behavior_changes/ingest-live-file-with-move.md
@@ -1 +1 @@
-* Support ingesting db generated files using hard link, i.e. IngestExternalFileOptions::move_files and IngestExternalFileOptions::allow_db_generated_files._ \ No newline at end of file
+* Support ingesting db generated files using hard link, i.e. IngestExternalFileOptions::move_files/link_files and IngestExternalFileOptions::allow_db_generated_files. \ No newline at end of file
diff --git a/unreleased_history/behavior_changes/link-file-ingest.md b/unreleased_history/behavior_changes/link-file-ingest.md
new file mode 100644
index 000000000..e9f909eee
--- /dev/null
+++ b/unreleased_history/behavior_changes/link-file-ingest.md
@@ -0,0 +1 @@
+* Add a new file ingestion option `IngestExternalFileOptions::link_files` to hard link input files and preserve original files links after ingestion. \ No newline at end of file