summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoranand76 <anand76@devvm4702.ftw0.facebook.com>2022-06-16 12:12:43 -0700
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>2022-06-16 12:12:43 -0700
commita6691d0f65aec6ce6ff64e8e911d7cebe00621b0 (patch)
treed37523b498404534a24a4bef0ec0d1da783ade14
parent4d31d3c2ed392657a2c28fe7362f00bced7f19c9 (diff)
Update stats to help users estimate MultiGet async IO impact (#10182)
Summary: Add a couple of stats to help users estimate the impact of potential MultiGet perf improvements - 1. NUM_LEVEL_READ_PER_MULTIGET - A histogram stat for number of levels that required MultiGet to read from a file 2. MULTIGET_COROUTINE_COUNT - A ticker stat to count the number of times the coroutine version of MultiGetFromSST was used The NUM_DATA_BLOCKS_READ_PER_LEVEL stat is obsoleted as it doesn't provide useful information for MultiGet optimization. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10182 Reviewed By: akankshamahajan15 Differential Revision: D37213296 Pulled By: anand1976 fbshipit-source-id: 5d2b7708017c0e278578ae4bffac3926f6530efb
-rw-r--r--HISTORY.md1
-rw-r--r--db/db_basic_test.cc22
-rw-r--r--db/version_set.cc33
-rw-r--r--db/version_set.h2
-rw-r--r--db/version_set_sync_and_async.h5
-rw-r--r--include/rocksdb/statistics.h5
-rw-r--r--monitoring/statistics.cc4
-rw-r--r--table/block_based/block_based_table_reader.cc6
-rw-r--r--table/block_based/block_based_table_reader_sync_and_async.h3
-rw-r--r--table/get_context.h1
10 files changed, 49 insertions, 33 deletions
diff --git a/HISTORY.md b/HISTORY.md
index 7e9532340..4b80bc66b 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -25,6 +25,7 @@
* And Add C API's for Transaction, SstFileWriter, Compaction as mentioned [here](https://github.com/facebook/rocksdb/wiki/User-defined-Timestamp-(Experimental))
* The contract for implementations of Comparator::IsSameLengthImmediateSuccessor has been updated to work around a design bug in `auto_prefix_mode`.
* The API documentation for `auto_prefix_mode` now notes some corner cases in which it returns different results than `total_order_seek`, due to design bugs that are not easily fixed. Users using built-in comparators and keys at least the size of a fixed prefix length are not affected.
+* Obsoleted the NUM_DATA_BLOCKS_READ_PER_LEVEL stat and introduced the NUM_LEVEL_READ_PER_MULTIGET and MULTIGET_COROUTINE_COUNT stats
### New Features
* Add FileSystem::ReadAsync API in io_tracing
diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc
index 840f332dc..55258434e 100644
--- a/db/db_basic_test.cc
+++ b/db/db_basic_test.cc
@@ -2221,6 +2221,7 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL0) {
// No async IO in this case since we don't do parallel lookup in L0
ASSERT_EQ(multiget_io_batch_size.count, 0);
ASSERT_EQ(multiget_io_batch_size.max, 0);
+ ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 0);
}
TEST_F(DBMultiGetAsyncIOTest, GetFromL1) {
@@ -2257,6 +2258,7 @@ TEST_F(DBMultiGetAsyncIOTest, GetFromL1) {
// A batch of 3 async IOs is expected, one for each overlapping file in L1
ASSERT_EQ(multiget_io_batch_size.count, 1);
ASSERT_EQ(multiget_io_batch_size.max, 3);
+ ASSERT_EQ(statistics()->getTickerCount(MULTIGET_COROUTINE_COUNT), 3);
}
TEST_F(DBMultiGetAsyncIOTest, LastKeyInFile) {
@@ -2407,27 +2409,37 @@ TEST_F(DBBasicTest, MultiGetStats) {
values.data(), s.data(), false);
ASSERT_EQ(values.size(), kMultiGetBatchSize);
- HistogramData hist_data_blocks;
+ HistogramData hist_level;
HistogramData hist_index_and_filter_blocks;
HistogramData hist_sst;
- options.statistics->histogramData(NUM_DATA_BLOCKS_READ_PER_LEVEL,
- &hist_data_blocks);
+ options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level);
options.statistics->histogramData(NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
&hist_index_and_filter_blocks);
options.statistics->histogramData(NUM_SST_READ_PER_LEVEL, &hist_sst);
// Maximum number of blocks read from a file system in a level.
- ASSERT_EQ(hist_data_blocks.max, 32);
+ ASSERT_EQ(hist_level.max, 1);
ASSERT_GT(hist_index_and_filter_blocks.max, 0);
// Maximum number of sst files read from file system in a level.
ASSERT_EQ(hist_sst.max, 2);
// Minimun number of blocks read in a level.
- ASSERT_EQ(hist_data_blocks.min, 4);
+ ASSERT_EQ(hist_level.min, 1);
ASSERT_GT(hist_index_and_filter_blocks.min, 0);
// Minimun number of sst files read in a level.
ASSERT_EQ(hist_sst.min, 1);
+
+ for (PinnableSlice& value : values) {
+ value.Reset();
+ }
+ for (Status& status : s) {
+ status = Status::OK();
+ }
+ db_->MultiGet(read_opts, handles_[1], kMultiGetBatchSize, &keys[950],
+ values.data(), s.data(), false);
+ options.statistics->histogramData(NUM_LEVEL_READ_PER_MULTIGET, &hist_level);
+ ASSERT_EQ(hist_level.max, 2);
}
// Test class for batched MultiGet with prefix extractor
diff --git a/db/version_set.cc b/db/version_set.cc
index 729d535cf..3b6157fda 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -2252,8 +2252,8 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
Status s;
uint64_t num_index_read = 0;
uint64_t num_filter_read = 0;
- uint64_t num_data_read = 0;
uint64_t num_sst_read = 0;
+ uint64_t num_level_read = 0;
MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
// blob_file => [[blob_idx, it], ...]
@@ -2275,7 +2275,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
s = MultiGetFromSST(read_options, fp.CurrentFileRange(),
fp.GetHitFileLevel(), fp.IsHitFileLastInLevel(), f,
blob_rqs, num_filter_read, num_index_read,
- num_data_read, num_sst_read);
+ num_sst_read);
if (fp.GetHitFileLevel() == 0) {
dump_stats_for_l0_file = true;
}
@@ -2290,13 +2290,14 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
read_options, fp.CurrentFileRange(), fp.GetHitFileLevel(),
fp.IsHitFileLastInLevel(), f, blob_rqs, num_filter_read,
- num_index_read, num_data_read, num_sst_read));
+ num_index_read, num_sst_read));
if (fp.KeyMaySpanNextFile()) {
break;
}
f = fp.GetNextFileInLevel();
}
if (mget_tasks.size() > 0) {
+ RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT, mget_tasks.size());
// Collect all results so far
std::vector<Status> statuses = folly::coro::blockingWait(
folly::coro::collectAllRange(std::move(mget_tasks))
@@ -2328,18 +2329,18 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
(prev_level != 0 && prev_level != (int)fp.GetHitFileLevel())) {
// Dump the stats if the search has moved to the next level and
// reset for next level.
- if (num_sst_read || (num_filter_read + num_index_read)) {
+ if (num_filter_read + num_index_read) {
RecordInHistogram(db_statistics_,
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
num_index_read + num_filter_read);
- RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL,
- num_data_read);
+ }
+ if (num_sst_read) {
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL,
num_sst_read);
+ num_level_read++;
}
num_filter_read = 0;
num_index_read = 0;
- num_data_read = 0;
num_sst_read = 0;
}
prev_level = fp.GetHitFileLevel();
@@ -2347,11 +2348,19 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
}
// Dump stats for most recent level
- RecordInHistogram(db_statistics_, NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
- num_index_read + num_filter_read);
- RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL,
- num_data_read);
- RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
+ if (num_filter_read + num_index_read) {
+ RecordInHistogram(db_statistics_,
+ NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+ num_index_read + num_filter_read);
+ }
+ if (num_sst_read) {
+ RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
+ num_level_read++;
+ }
+ if (num_level_read) {
+ RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET,
+ num_level_read);
+ }
if (s.ok() && !blob_rqs.empty()) {
MultiGetBlob(read_options, keys_with_blobs_range, blob_rqs);
diff --git a/db/version_set.h b/db/version_set.h
index 72398f162..4bfb4bf74 100644
--- a/db/version_set.h
+++ b/db/version_set.h
@@ -958,7 +958,7 @@ class Version {
int hit_file_level, bool is_hit_file_last_in_level, FdWithKeyRange* f,
std::unordered_map<uint64_t, BlobReadRequests>& blob_rqs,
uint64_t& num_filter_read, uint64_t& num_index_read,
- uint64_t& num_data_read, uint64_t& num_sst_read);
+ uint64_t& num_sst_read);
ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs
Logger* info_log_;
diff --git a/db/version_set_sync_and_async.h b/db/version_set_sync_and_async.h
index 9dd50d21b..229c59a6e 100644
--- a/db/version_set_sync_and_async.h
+++ b/db/version_set_sync_and_async.h
@@ -15,8 +15,7 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
(const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
bool is_hit_file_last_in_level, FdWithKeyRange* f,
std::unordered_map<uint64_t, BlobReadRequests>& blob_rqs,
- uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_data_read,
- uint64_t& num_sst_read) {
+ uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_sst_read) {
bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
get_perf_context()->per_level_perf_context_enabled;
@@ -63,12 +62,10 @@ DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
batch_size++;
num_index_read += get_context.get_context_stats_.num_index_read;
num_filter_read += get_context.get_context_stats_.num_filter_read;
- num_data_read += get_context.get_context_stats_.num_data_read;
num_sst_read += get_context.get_context_stats_.num_sst_read;
// Reset these stats since they're specific to a level
get_context.get_context_stats_.num_index_read = 0;
get_context.get_context_stats_.num_filter_read = 0;
- get_context.get_context_stats_.num_data_read = 0;
get_context.get_context_stats_.num_sst_read = 0;
// report the counters before returning
diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h
index 6f9ca1da7..b89a0ab68 100644
--- a/include/rocksdb/statistics.h
+++ b/include/rocksdb/statistics.h
@@ -432,6 +432,7 @@ enum Tickers : uint32_t {
NON_LAST_LEVEL_READ_COUNT,
BLOCK_CHECKSUM_COMPUTE_COUNT,
+ MULTIGET_COROUTINE_COUNT,
TICKER_ENUM_MAX
};
@@ -529,6 +530,7 @@ enum Histograms : uint32_t {
// Num of index and filter blocks read from file system per level.
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
// Num of data blocks read from file system per level.
+ // Obsolete
NUM_DATA_BLOCKS_READ_PER_LEVEL,
// Num of sst files read from file system per level.
NUM_SST_READ_PER_LEVEL,
@@ -546,6 +548,9 @@ enum Histograms : uint32_t {
// Number of IOs issued in parallel in a MultiGet batch
MULTIGET_IO_BATCH_SIZE,
+ // Number of levels requiring IO for MultiGet
+ NUM_LEVEL_READ_PER_MULTIGET,
+
HISTOGRAM_ENUM_MAX,
};
diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc
index 8afffa5f6..3f9155d1f 100644
--- a/monitoring/statistics.cc
+++ b/monitoring/statistics.cc
@@ -226,7 +226,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
- {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}};
+ {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"},
+ {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"}};
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{DB_GET, "rocksdb.db.get.micros"},
@@ -287,6 +288,7 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{POLL_WAIT_MICROS, "rocksdb.poll.wait.micros"},
{PREFETCHED_BYTES_DISCARDED, "rocksdb.prefetched.bytes.discarded"},
{MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
+ {NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
};
std::shared_ptr<Statistics> CreateDBStatistics() {
diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc
index 31b75cf5e..7074df313 100644
--- a/table/block_based/block_based_table_reader.cc
+++ b/table/block_based/block_based_table_reader.cc
@@ -1611,9 +1611,6 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
case BlockType::kDeprecatedFilter:
++get_context->get_context_stats_.num_filter_read;
break;
- case BlockType::kData:
- ++get_context->get_context_stats_.num_data_read;
- break;
default:
break;
}
@@ -1771,9 +1768,6 @@ Status BlockBasedTable::RetrieveBlock(
case BlockType::kDeprecatedFilter:
++(get_context->get_context_stats_.num_filter_read);
break;
- case BlockType::kData:
- ++(get_context->get_context_stats_.num_data_read);
- break;
default:
break;
}
diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h
index ab42220c3..9eeea40da 100644
--- a/table/block_based/block_based_table_reader_sync_and_async.h
+++ b/table/block_based/block_based_table_reader_sync_and_async.h
@@ -177,9 +177,6 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
size_t& req_idx = req_idx_for_block[valid_batch_idx];
size_t& req_offset = req_offset_for_block[valid_batch_idx];
valid_batch_idx++;
- if (mget_iter->get_context) {
- ++(mget_iter->get_context->get_context_stats_.num_data_read);
- }
FSReadRequest& req = read_reqs[req_idx];
Status s = req.status;
if (s.ok()) {
diff --git a/table/get_context.h b/table/get_context.h
index 8120cfcbb..636af6e61 100644
--- a/table/get_context.h
+++ b/table/get_context.h
@@ -53,7 +53,6 @@ struct GetContextStats {
// MultiGet stats.
uint64_t num_filter_read = 0;
uint64_t num_index_read = 0;
- uint64_t num_data_read = 0;
uint64_t num_sst_read = 0;
};