From 9d6f48ec1d1ca52eec8e5fe03735dd6de54639e0 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Wed, 12 Aug 2020 18:24:27 -0700 Subject: Clean up CompressBlock/CompressBlockInternal a bit (#7249) Summary: The patch cleans up and refactors `CompressBlock` and `CompressBlockInternal` a bit. In particular, it does the following: * It renames `CompressBlockInternal` to `CompressData` and moves it to `util/compression.h`, where other general compression-related utilities are located. This will facilitate reuse in the BlobDB write path. * The signature of the method is changed so it now takes `compression_format_version` (similarly to the compression library specific methods) instead of `format_version` (which is specific to the block based table). * `GetCompressionFormatForVersion` no longer takes `compression_type` as a parameter. This parameter was only used in a (not entirely up-to-date) assertion; also, removing it eliminates the need to ensure this precondition holds at all call sites. * Does some minor cleanup in `CompressBlock`, for instance, it is now possible to pass only one of `sampled_output_fast` and `sampled_output_slow`. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7249 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D23087278 Pulled By: ltamasi fbshipit-source-id: e6316e45baed8b4e7de7c1780c90501c2a3439b3 --- table/block_based/block_based_table_builder.cc | 110 +++++++------------------ table/format.cc | 28 +++---- table/format.h | 14 +--- 3 files changed, 47 insertions(+), 105 deletions(-) (limited to 'table') diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 30fc9197e..3d827838f 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -105,63 +105,6 @@ bool GoodCompressionRatio(size_t compressed_size, size_t raw_size) { return compressed_size < raw_size - (raw_size / 8u); } -bool CompressBlockInternal(const Slice& raw, - const CompressionInfo& compression_info, - uint32_t format_version, - std::string* compressed_output) { - bool ret; - - // Will return compressed block contents if (1) the compression method is - // supported in this platform and (2) the compression rate is "good enough". - switch (compression_info.type()) { - case kSnappyCompression: - ret = Snappy_Compress(compression_info, raw.data(), raw.size(), - compressed_output); - break; - case kZlibCompression: - ret = Zlib_Compress( - compression_info, - GetCompressFormatForVersion(kZlibCompression, format_version), - raw.data(), raw.size(), compressed_output); - break; - case kBZip2Compression: - ret = BZip2_Compress( - compression_info, - GetCompressFormatForVersion(kBZip2Compression, format_version), - raw.data(), raw.size(), compressed_output); - break; - case kLZ4Compression: - ret = LZ4_Compress( - compression_info, - GetCompressFormatForVersion(kLZ4Compression, format_version), - raw.data(), raw.size(), compressed_output); - break; - case kLZ4HCCompression: - ret = LZ4HC_Compress( - compression_info, - GetCompressFormatForVersion(kLZ4HCCompression, format_version), - raw.data(), raw.size(), compressed_output); - break; - case kXpressCompression: - ret = XPRESS_Compress(raw.data(), raw.size(), compressed_output); - break; - case kZSTD: - case kZSTDNotFinalCompression: - ret = ZSTD_Compress(compression_info, raw.data(), raw.size(), - compressed_output); - break; - default: - // Do not recognize this compression type - ret = false; - } - - TEST_SYNC_POINT_CALLBACK( - "BlockBasedTableBuilder::CompressBlockInternal:TamperWithReturnValue", - static_cast(&ret)); - - return ret; -} - } // namespace // format_version is the block format as defined in include/rocksdb/table.h @@ -170,11 +113,9 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info, bool do_sample, std::string* compressed_output, std::string* sampled_output_fast, std::string* sampled_output_slow) { - *type = info.type(); - - if (info.type() == kNoCompression && !info.SampleForCompression()) { - return raw; - } + assert(type); + assert(compressed_output); + assert(compressed_output->empty()); // If requested, we sample one in every N block with a // fast and slow compression algorithm and report the stats. @@ -182,10 +123,10 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info, // enabling compression and they also get a hint about which // compression algorithm wil be beneficial. if (do_sample && info.SampleForCompression() && - Random::GetTLSInstance()->OneIn((int)info.SampleForCompression()) && - sampled_output_fast && sampled_output_slow) { + Random::GetTLSInstance()->OneIn( + static_cast(info.SampleForCompression()))) { // Sampling with a fast compression algorithm - if (LZ4_Supported() || Snappy_Supported()) { + if (sampled_output_fast && (LZ4_Supported() || Snappy_Supported())) { CompressionType c = LZ4_Supported() ? kLZ4Compression : kSnappyCompression; CompressionContext context(c); @@ -194,33 +135,46 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info, CompressionDict::GetEmptyDict(), c, info.SampleForCompression()); - CompressBlockInternal(raw, info_tmp, format_version, sampled_output_fast); + CompressData(raw, info_tmp, GetCompressFormatForVersion(format_version), + sampled_output_fast); } // Sampling with a slow but high-compression algorithm - if (ZSTD_Supported() || Zlib_Supported()) { + if (sampled_output_slow && (ZSTD_Supported() || Zlib_Supported())) { CompressionType c = ZSTD_Supported() ? kZSTD : kZlibCompression; CompressionContext context(c); CompressionOptions options; CompressionInfo info_tmp(options, context, CompressionDict::GetEmptyDict(), c, info.SampleForCompression()); - CompressBlockInternal(raw, info_tmp, format_version, sampled_output_slow); + + CompressData(raw, info_tmp, GetCompressFormatForVersion(format_version), + sampled_output_slow); } } - // Actually compress the data - if (*type != kNoCompression) { - if (CompressBlockInternal(raw, info, format_version, compressed_output) && - GoodCompressionRatio(compressed_output->size(), raw.size())) { - return *compressed_output; - } + if (info.type() == kNoCompression) { + *type = kNoCompression; + return raw; } - // Compression method is not supported, or not good - // compression ratio, so just fall back to uncompressed form. - *type = kNoCompression; - return raw; + // Actually compress the data; if the compression method is not supported, + // or the compression fails etc., just fall back to uncompressed + if (!CompressData(raw, info, GetCompressFormatForVersion(format_version), + compressed_output)) { + *type = kNoCompression; + return raw; + } + + // Check the compression ratio; if it's not good enough, just fall back to + // uncompressed + if (!GoodCompressionRatio(compressed_output->size(), raw.size())) { + *type = kNoCompression; + return raw; + } + + *type = info.type(); + return *compressed_output; } // kBlockBasedTableMagicNumber was picked by running diff --git a/table/format.cc b/table/format.cc index 0dfa6e254..af74222ba 100644 --- a/table/format.cc +++ b/table/format.cc @@ -371,10 +371,9 @@ Status UncompressBlockContentsForCompressionType( break; } case kZlibCompression: - ubuf = Zlib_Uncompress( - uncompression_info, data, n, &decompress_size, - GetCompressFormatForVersion(kZlibCompression, format_version), - allocator); + ubuf = Zlib_Uncompress(uncompression_info, data, n, &decompress_size, + GetCompressFormatForVersion(format_version), + allocator); if (!ubuf) { static char zlib_corrupt_msg[] = "Zlib not supported or corrupted Zlib compressed block contents"; @@ -383,10 +382,9 @@ Status UncompressBlockContentsForCompressionType( *contents = BlockContents(std::move(ubuf), decompress_size); break; case kBZip2Compression: - ubuf = BZip2_Uncompress( - data, n, &decompress_size, - GetCompressFormatForVersion(kBZip2Compression, format_version), - allocator); + ubuf = BZip2_Uncompress(data, n, &decompress_size, + GetCompressFormatForVersion(format_version), + allocator); if (!ubuf) { static char bzip2_corrupt_msg[] = "Bzip2 not supported or corrupted Bzip2 compressed block contents"; @@ -395,10 +393,9 @@ Status UncompressBlockContentsForCompressionType( *contents = BlockContents(std::move(ubuf), decompress_size); break; case kLZ4Compression: - ubuf = LZ4_Uncompress( - uncompression_info, data, n, &decompress_size, - GetCompressFormatForVersion(kLZ4Compression, format_version), - allocator); + ubuf = LZ4_Uncompress(uncompression_info, data, n, &decompress_size, + GetCompressFormatForVersion(format_version), + allocator); if (!ubuf) { static char lz4_corrupt_msg[] = "LZ4 not supported or corrupted LZ4 compressed block contents"; @@ -407,10 +404,9 @@ Status UncompressBlockContentsForCompressionType( *contents = BlockContents(std::move(ubuf), decompress_size); break; case kLZ4HCCompression: - ubuf = LZ4_Uncompress( - uncompression_info, data, n, &decompress_size, - GetCompressFormatForVersion(kLZ4HCCompression, format_version), - allocator); + ubuf = LZ4_Uncompress(uncompression_info, data, n, &decompress_size, + GetCompressFormatForVersion(format_version), + allocator); if (!ubuf) { static char lz4hc_corrupt_msg[] = "LZ4HC not supported or corrupted LZ4HC compressed block contents"; diff --git a/table/format.h b/table/format.h index 5b6e6a925..e40a5ceae 100644 --- a/table/format.h +++ b/table/format.h @@ -110,19 +110,11 @@ struct IndexValue { std::string ToString(bool hex, bool have_first_key) const; }; -inline uint32_t GetCompressFormatForVersion(CompressionType compression_type, - uint32_t version) { -#ifdef NDEBUG - (void)compression_type; -#endif - // snappy is not versioned - assert(compression_type != kSnappyCompression && - compression_type != kXpressCompression && - compression_type != kNoCompression); - // As of version 2, we encode compressed block with +inline uint32_t GetCompressFormatForVersion(uint32_t format_version) { + // As of format_version 2, we encode compressed block with // compress_format_version == 2. Before that, the version is 1. // DO NOT CHANGE THIS FUNCTION, it affects disk format - return version >= 2 ? 2 : 1; + return format_version >= 2 ? 2 : 1; } inline bool BlockBasedTableSupportedVersion(uint32_t version) { -- cgit v1.2.3-70-g09d2