summaryrefslogtreecommitdiff
path: root/table
diff options
context:
space:
mode:
authorLevi Tamasi <ltamasi@fb.com>2020-08-12 18:24:27 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2020-08-12 18:25:48 -0700
commit9d6f48ec1d1ca52eec8e5fe03735dd6de54639e0 (patch)
tree1d06df4226b3dec457f4c290aa09aaf0140ab03d /table
parent1f9f630b2795f32c72de281d02127bbed4abdb4f (diff)
Clean up CompressBlock/CompressBlockInternal a bit (#7249)
Summary: The patch cleans up and refactors `CompressBlock` and `CompressBlockInternal` a bit. In particular, it does the following: * It renames `CompressBlockInternal` to `CompressData` and moves it to `util/compression.h`, where other general compression-related utilities are located. This will facilitate reuse in the BlobDB write path. * The signature of the method is changed so it now takes `compression_format_version` (similarly to the compression library specific methods) instead of `format_version` (which is specific to the block based table). * `GetCompressionFormatForVersion` no longer takes `compression_type` as a parameter. This parameter was only used in a (not entirely up-to-date) assertion; also, removing it eliminates the need to ensure this precondition holds at all call sites. * Does some minor cleanup in `CompressBlock`, for instance, it is now possible to pass only one of `sampled_output_fast` and `sampled_output_slow`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7249 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D23087278 Pulled By: ltamasi fbshipit-source-id: e6316e45baed8b4e7de7c1780c90501c2a3439b3
Diffstat (limited to 'table')
-rw-r--r--table/block_based/block_based_table_builder.cc110
-rw-r--r--table/format.cc28
-rw-r--r--table/format.h14
3 files changed, 47 insertions, 105 deletions
diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc
index 30fc9197e..3d827838f 100644
--- a/table/block_based/block_based_table_builder.cc
+++ b/table/block_based/block_based_table_builder.cc
@@ -105,63 +105,6 @@ bool GoodCompressionRatio(size_t compressed_size, size_t raw_size) {
return compressed_size < raw_size - (raw_size / 8u);
}
-bool CompressBlockInternal(const Slice& raw,
- const CompressionInfo& compression_info,
- uint32_t format_version,
- std::string* compressed_output) {
- bool ret;
-
- // Will return compressed block contents if (1) the compression method is
- // supported in this platform and (2) the compression rate is "good enough".
- switch (compression_info.type()) {
- case kSnappyCompression:
- ret = Snappy_Compress(compression_info, raw.data(), raw.size(),
- compressed_output);
- break;
- case kZlibCompression:
- ret = Zlib_Compress(
- compression_info,
- GetCompressFormatForVersion(kZlibCompression, format_version),
- raw.data(), raw.size(), compressed_output);
- break;
- case kBZip2Compression:
- ret = BZip2_Compress(
- compression_info,
- GetCompressFormatForVersion(kBZip2Compression, format_version),
- raw.data(), raw.size(), compressed_output);
- break;
- case kLZ4Compression:
- ret = LZ4_Compress(
- compression_info,
- GetCompressFormatForVersion(kLZ4Compression, format_version),
- raw.data(), raw.size(), compressed_output);
- break;
- case kLZ4HCCompression:
- ret = LZ4HC_Compress(
- compression_info,
- GetCompressFormatForVersion(kLZ4HCCompression, format_version),
- raw.data(), raw.size(), compressed_output);
- break;
- case kXpressCompression:
- ret = XPRESS_Compress(raw.data(), raw.size(), compressed_output);
- break;
- case kZSTD:
- case kZSTDNotFinalCompression:
- ret = ZSTD_Compress(compression_info, raw.data(), raw.size(),
- compressed_output);
- break;
- default:
- // Do not recognize this compression type
- ret = false;
- }
-
- TEST_SYNC_POINT_CALLBACK(
- "BlockBasedTableBuilder::CompressBlockInternal:TamperWithReturnValue",
- static_cast<void*>(&ret));
-
- return ret;
-}
-
} // namespace
// format_version is the block format as defined in include/rocksdb/table.h
@@ -170,11 +113,9 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info,
bool do_sample, std::string* compressed_output,
std::string* sampled_output_fast,
std::string* sampled_output_slow) {
- *type = info.type();
-
- if (info.type() == kNoCompression && !info.SampleForCompression()) {
- return raw;
- }
+ assert(type);
+ assert(compressed_output);
+ assert(compressed_output->empty());
// If requested, we sample one in every N block with a
// fast and slow compression algorithm and report the stats.
@@ -182,10 +123,10 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info,
// enabling compression and they also get a hint about which
// compression algorithm wil be beneficial.
if (do_sample && info.SampleForCompression() &&
- Random::GetTLSInstance()->OneIn((int)info.SampleForCompression()) &&
- sampled_output_fast && sampled_output_slow) {
+ Random::GetTLSInstance()->OneIn(
+ static_cast<int>(info.SampleForCompression()))) {
// Sampling with a fast compression algorithm
- if (LZ4_Supported() || Snappy_Supported()) {
+ if (sampled_output_fast && (LZ4_Supported() || Snappy_Supported())) {
CompressionType c =
LZ4_Supported() ? kLZ4Compression : kSnappyCompression;
CompressionContext context(c);
@@ -194,33 +135,46 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info,
CompressionDict::GetEmptyDict(), c,
info.SampleForCompression());
- CompressBlockInternal(raw, info_tmp, format_version, sampled_output_fast);
+ CompressData(raw, info_tmp, GetCompressFormatForVersion(format_version),
+ sampled_output_fast);
}
// Sampling with a slow but high-compression algorithm
- if (ZSTD_Supported() || Zlib_Supported()) {
+ if (sampled_output_slow && (ZSTD_Supported() || Zlib_Supported())) {
CompressionType c = ZSTD_Supported() ? kZSTD : kZlibCompression;
CompressionContext context(c);
CompressionOptions options;
CompressionInfo info_tmp(options, context,
CompressionDict::GetEmptyDict(), c,
info.SampleForCompression());
- CompressBlockInternal(raw, info_tmp, format_version, sampled_output_slow);
+
+ CompressData(raw, info_tmp, GetCompressFormatForVersion(format_version),
+ sampled_output_slow);
}
}
- // Actually compress the data
- if (*type != kNoCompression) {
- if (CompressBlockInternal(raw, info, format_version, compressed_output) &&
- GoodCompressionRatio(compressed_output->size(), raw.size())) {
- return *compressed_output;
- }
+ if (info.type() == kNoCompression) {
+ *type = kNoCompression;
+ return raw;
}
- // Compression method is not supported, or not good
- // compression ratio, so just fall back to uncompressed form.
- *type = kNoCompression;
- return raw;
+ // Actually compress the data; if the compression method is not supported,
+ // or the compression fails etc., just fall back to uncompressed
+ if (!CompressData(raw, info, GetCompressFormatForVersion(format_version),
+ compressed_output)) {
+ *type = kNoCompression;
+ return raw;
+ }
+
+ // Check the compression ratio; if it's not good enough, just fall back to
+ // uncompressed
+ if (!GoodCompressionRatio(compressed_output->size(), raw.size())) {
+ *type = kNoCompression;
+ return raw;
+ }
+
+ *type = info.type();
+ return *compressed_output;
}
// kBlockBasedTableMagicNumber was picked by running
diff --git a/table/format.cc b/table/format.cc
index 0dfa6e254..af74222ba 100644
--- a/table/format.cc
+++ b/table/format.cc
@@ -371,10 +371,9 @@ Status UncompressBlockContentsForCompressionType(
break;
}
case kZlibCompression:
- ubuf = Zlib_Uncompress(
- uncompression_info, data, n, &decompress_size,
- GetCompressFormatForVersion(kZlibCompression, format_version),
- allocator);
+ ubuf = Zlib_Uncompress(uncompression_info, data, n, &decompress_size,
+ GetCompressFormatForVersion(format_version),
+ allocator);
if (!ubuf) {
static char zlib_corrupt_msg[] =
"Zlib not supported or corrupted Zlib compressed block contents";
@@ -383,10 +382,9 @@ Status UncompressBlockContentsForCompressionType(
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kBZip2Compression:
- ubuf = BZip2_Uncompress(
- data, n, &decompress_size,
- GetCompressFormatForVersion(kBZip2Compression, format_version),
- allocator);
+ ubuf = BZip2_Uncompress(data, n, &decompress_size,
+ GetCompressFormatForVersion(format_version),
+ allocator);
if (!ubuf) {
static char bzip2_corrupt_msg[] =
"Bzip2 not supported or corrupted Bzip2 compressed block contents";
@@ -395,10 +393,9 @@ Status UncompressBlockContentsForCompressionType(
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kLZ4Compression:
- ubuf = LZ4_Uncompress(
- uncompression_info, data, n, &decompress_size,
- GetCompressFormatForVersion(kLZ4Compression, format_version),
- allocator);
+ ubuf = LZ4_Uncompress(uncompression_info, data, n, &decompress_size,
+ GetCompressFormatForVersion(format_version),
+ allocator);
if (!ubuf) {
static char lz4_corrupt_msg[] =
"LZ4 not supported or corrupted LZ4 compressed block contents";
@@ -407,10 +404,9 @@ Status UncompressBlockContentsForCompressionType(
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kLZ4HCCompression:
- ubuf = LZ4_Uncompress(
- uncompression_info, data, n, &decompress_size,
- GetCompressFormatForVersion(kLZ4HCCompression, format_version),
- allocator);
+ ubuf = LZ4_Uncompress(uncompression_info, data, n, &decompress_size,
+ GetCompressFormatForVersion(format_version),
+ allocator);
if (!ubuf) {
static char lz4hc_corrupt_msg[] =
"LZ4HC not supported or corrupted LZ4HC compressed block contents";
diff --git a/table/format.h b/table/format.h
index 5b6e6a925..e40a5ceae 100644
--- a/table/format.h
+++ b/table/format.h
@@ -110,19 +110,11 @@ struct IndexValue {
std::string ToString(bool hex, bool have_first_key) const;
};
-inline uint32_t GetCompressFormatForVersion(CompressionType compression_type,
- uint32_t version) {
-#ifdef NDEBUG
- (void)compression_type;
-#endif
- // snappy is not versioned
- assert(compression_type != kSnappyCompression &&
- compression_type != kXpressCompression &&
- compression_type != kNoCompression);
- // As of version 2, we encode compressed block with
+inline uint32_t GetCompressFormatForVersion(uint32_t format_version) {
+ // As of format_version 2, we encode compressed block with
// compress_format_version == 2. Before that, the version is 1.
// DO NOT CHANGE THIS FUNCTION, it affects disk format
- return version >= 2 ? 2 : 1;
+ return format_version >= 2 ? 2 : 1;
}
inline bool BlockBasedTableSupportedVersion(uint32_t version) {