summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Westhaver <ellis@rwest.io>2024-05-13 00:00:06 +0000
committerRichard Westhaver <ellis@rwest.io>2024-05-13 00:00:06 +0000
commit46aa698931386d924b011ab283ed9c06d9979edb (patch)
tree96acbd95f0cad5d088a1c91702231ab525fee92f
parent265259448e454817d199a968a8c590c3ca1da947 (diff)
parentdaaaf85fffb1c981aa93ca418b380ea2ea91aac3 (diff)
merge upstreamHEADdefault
-rw-r--r--CHANGELOG.md41
-rw-r--r--Cargo.toml9
-rw-r--r--README.md39
-rw-r--r--librocksdb-sys/Cargo.toml5
-rw-r--r--librocksdb-sys/build.rs2
-rw-r--r--src/backup.rs4
-rw-r--r--src/db.rs14
-rw-r--r--src/db_options.rs222
-rw-r--r--src/lib.rs11
-rw-r--r--src/perf.rs23
-rw-r--r--src/prop_name.rs5
-rw-r--r--src/statistics.rs806
-rw-r--r--src/transactions/optimistic_transaction_db.rs51
-rw-r--r--src/transactions/transaction_db.rs2
-rw-r--r--tests/fail/checkpoint_outlive_db.stderr1
-rw-r--r--tests/fail/iterator_outlive_db.stderr1
-rw-r--r--tests/fail/open_with_multiple_refs_as_single_threaded.stderr16
-rw-r--r--tests/fail/snapshot_outlive_db.stderr1
-rw-r--r--tests/fail/snapshot_outlive_transaction.stderr1
-rw-r--r--tests/fail/snapshot_outlive_transaction_db.stderr1
-rw-r--r--tests/fail/transaction_outlive_transaction_db.stderr1
-rw-r--r--tests/test_backup.rs7
-rw-r--r--tests/test_db.rs29
-rw-r--r--tests/test_optimistic_transaction_db.rs28
24 files changed, 1231 insertions, 89 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0d91541..9921d56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,45 @@
# Changelog
## [Unreleased]
-
+* Document that `default` column family doesn't inherit open options of db (
+ 0xdeafbeef)
+
+## 0.22.0 (2024-02-13)
+
+* Free memory on writebatch index and avoid unnecessary clones (jkurian)
+* Update snappy to 1.1.10 (timsueberkrueb)
+* Prefer rocksdb_free to free for RocksDB memory (niklasf)
+* Expose flush_cfs_opt to flush multiple column families (lizhanhui)
+* Update to RocksDB 8.3.2 (niklasf)
+* Remove temporary boxed keys in batched_multi_get (axnsan12)
+* Convert properties to `&PropName` which can be converted at no cost to `&CStr` and `&str` (mina86)
* Bump MSRV to 1.63.0 (mina86)
-* Convert properties to `&PropName` which can be converted at no cost to `&CStr`
- and `&str` (mina86)
+* Add allow_ingest_behind ffi call for DB Options (siyuan0322)
+* Remove wrong outlive requirements for cache in docs (zheland)
+* Feat: support `column_family_metadata` and `column_family_metadata_cf` (ovr)
+* Update RocksDB to 8.5.3 (niklasf)
+* Expose ReadTier publicly (tinct-martini)
+* Update RocksDB to 8.6.7 (aleksuss)
+* Feat: expose `set_optimize_filters_for_memory` (zaidoon1)
+* Feat: expose compression option parallel_threads (zaidoon1)
+* Fix: add raw iterator validation before calling next method (aleksuss)
+* Fix typo in documentation (jazarine)
+* Feat: Expose `set_wal_compression_type` (ovr)
+* Update RocksDB to 8.8.1 (zaidoon1)
+* Feat: Expose `compact_on_deletion_collector_factory` (zaidoon1)
+* Fix bug in DBWALIterator that would return updates before the given sequence (schmidek)
+* Feat: Expose wait_for_compact (zaidoon1)
+* Feat: Expose `set_auto_readahead_size` (niklasf)
+* Update RocksDB to 8.9.1 (zaidoon1)
+* Feat: Expose `set_periodic_compaction_seconds` (zaidoon1)
+* Update hash commit of the rocksdb submodule to corresponding v8.9.1 (aleksuss)
+* Make CompactOptions Send and Sync (GodTamIt)
+* Update to RocksDB 8.10.0 (zaidoon1)
+* Add WriteBufferManager support (benoitmeriaux)
+* Update doc and parameter name for `optimize_for_point_lookup` (XiangpengHao)
+* Expose rocksdb cumulative statistics and histograms (AhmedSoliman)
+* Make FlushOptions Send and Sync (jansegre)
+* Export memory usage builder and MemoryUsage structs to users (AhmedSoliman)
## 0.21.0 (2023-05-09)
diff --git a/Cargo.toml b/Cargo.toml
index 2a5d733..bbbb2eb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,9 @@
[package]
name = "rocksdb"
description = "Rust wrapper for Facebook's RocksDB embeddable database"
-version = "0.21.0"
+version = "0.22.0"
edition = "2018"
-rust-version = "1.66.0"
+rust-version = "1.70.0"
authors = ["Tyler Neely <t@jujit.su>", "David Greenberg <dsg123456789@gmail.com>"]
repository = "https://github.com/rust-rocksdb/rust-rocksdb"
license = "Apache-2.0"
@@ -25,6 +25,7 @@ default = ["snappy", "lz4", "zstd", "zlib", "bzip2"]
jemalloc = ["librocksdb-sys/jemalloc"]
io-uring = ["librocksdb-sys/io-uring"]
valgrind = []
+mt_static = ["librocksdb-sys/mt_static"]
snappy = ["librocksdb-sys/snappy"]
lz4 = ["librocksdb-sys/lz4"]
zstd = ["librocksdb-sys/zstd"]
@@ -36,11 +37,11 @@ serde1 = ["serde"]
[dependencies]
libc = "0.2"
-librocksdb-sys = { path = "librocksdb-sys", version = "0.15.0" }
+librocksdb-sys = { path = "librocksdb-sys", version = "0.17.0" }
serde = { version = "1", features = [ "derive" ], optional = true }
[dev-dependencies]
-trybuild = "1.0"
+trybuild = "<=1.0.89" # trybuild 1.0.90 needs MSRV 1.70
tempfile = "3.1"
pretty_assertions = "1.0"
bincode = "1.3"
diff --git a/README.md b/README.md
index f4ef392..87ab17f 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,11 @@
-rust-rocksdb
-============
-![RocksDB build](https://github.com/rust-rocksdb/rust-rocksdb/workflows/RocksDB%20build/badge.svg?branch=master)
+# rust-rocksdb
+
+[![RocksDB build](https://github.com/rust-rocksdb/rust-rocksdb/actions/workflows/rust.yml/badge.svg?branch=master)](https://github.com/rust-rocksdb/rust-rocksdb/actions/workflows/rust.yml)
[![crates.io](https://img.shields.io/crates/v/rocksdb.svg)](https://crates.io/crates/rocksdb)
[![documentation](https://docs.rs/rocksdb/badge.svg)](https://docs.rs/rocksdb)
[![license](https://img.shields.io/crates/l/rocksdb.svg)](https://github.com/rust-rocksdb/rust-rocksdb/blob/master/LICENSE)
-[![Gitter chat](https://badges.gitter.im/rust-rocksdb/gitter.png)](https://gitter.im/rust-rocksdb/lobby)
-![rust 1.66.0 required](https://img.shields.io/badge/rust-1.66.0-blue.svg?label=MSRV)
-
+[![Gitter chat](https://badges.gitter.im/rust-rocksdb/gitter.svg)](https://gitter.im/rust-rocksdb/lobby)
+![rust 1.70.0 required](https://img.shields.io/badge/rust-1.70.0-blue.svg?label=MSRV)
![GitHub commits (since latest release)](https://img.shields.io/github/commits-since/rust-rocksdb/rust-rocksdb/latest.svg)
@@ -16,7 +15,7 @@ rust-rocksdb
## Contributing
-Feedback and pull requests welcome! If a particular feature of RocksDB is
+Feedback and pull requests welcome! If a particular feature of RocksDB is
important to you, please let me know by opening an issue, and I'll
prioritize it.
@@ -26,18 +25,21 @@ This binding is statically linked with a specific version of RocksDB. If you
want to build it yourself, make sure you've also cloned the RocksDB and
compression submodules:
- git submodule update --init --recursive
+```shell
+git submodule update --init --recursive
+```
## Compression Support
-By default, support for the [Snappy](https://github.com/google/snappy),
+
+By default, support for [Snappy](https://github.com/google/snappy),
[LZ4](https://github.com/lz4/lz4), [Zstd](https://github.com/facebook/zstd),
[Zlib](https://zlib.net), and [Bzip2](http://www.bzip.org) compression
-is enabled through crate features. If support for all of these compression
+is enabled through crate features. If support for all of these compression
algorithms is not needed, default features can be disabled and specific
compression algorithms can be enabled. For example, to enable only LZ4
compression support, make these changes to your Cargo.toml:
-```
+```toml
[dependencies.rocksdb]
default-features = false
features = ["lz4"]
@@ -45,9 +47,16 @@ features = ["lz4"]
## Multithreaded ColumnFamily alternation
-The underlying RocksDB does allow column families to be created and dropped
-from multiple threads concurrently. But this crate doesn't allow it by default
+RocksDB allows column families to be created and dropped
+from multiple threads concurrently, but this crate doesn't allow it by default
for compatibility. If you need to modify column families concurrently, enable
-crate feature called `multi-threaded-cf`, which makes this binding's
-data structures to use RwLock by default. Alternatively, you can directly create
+the crate feature `multi-threaded-cf`, which makes this binding's
+data structures use `RwLock` by default. Alternatively, you can directly create
`DBWithThreadMode<MultiThreaded>` without enabling the crate feature.
+
+## Switch between /MT or /MD run time library (Only for Windows)
+
+The feature `mt_static` will request the library to be built with [/MT](https://learn.microsoft.com/en-us/cpp/build/reference/md-mt-ld-use-run-time-library?view=msvc-170)
+flag, which results in library using the static version of the run-time library.
+*This can be useful in case there's a conflict in the dependecy tree between different
+run-time versions.*
diff --git a/librocksdb-sys/Cargo.toml b/librocksdb-sys/Cargo.toml
index 33394bf..5bb6807 100644
--- a/librocksdb-sys/Cargo.toml
+++ b/librocksdb-sys/Cargo.toml
@@ -1,8 +1,8 @@
[package]
name = "librocksdb-sys"
-version = "0.15.0+8.9.1"
+version = "0.17.0+9.0.0"
edition = "2018"
-rust-version = "1.66.0"
+rust-version = "1.70.0"
authors = ["Karl Hobley <karlhobley10@gmail.com>", "Arkadiy Paronyan <arkadiy@ethcore.io>"]
license = "MIT/Apache-2.0/BSD-3-Clause"
description = "Native bindings to librocksdb"
@@ -16,6 +16,7 @@ links = "rocksdb"
default = [ "static" ]
jemalloc = ["tikv-jemalloc-sys"]
static = ["libz-sys?/static", "bzip2-sys?/static"]
+mt_static = []
io-uring = ["pkg-config"]
snappy = []
lz4 = ["lz4-sys"]
diff --git a/librocksdb-sys/build.rs b/librocksdb-sys/build.rs
index 9c8b423..6b4a86d 100644
--- a/librocksdb-sys/build.rs
+++ b/librocksdb-sys/build.rs
@@ -34,7 +34,7 @@ fn main() {
println!("cargo:rustc-link-lib=dylib=stdc++");
}
println!("cargo:rustc-link-lib=dylib=rocksdb");
- println!("cargo:rustc-link-lib=dylib=snappy");
+ println!("cargo:rustc-link-lib=dylib=zstd");
// Allow dependent crates to locate the sources and output directory of
// this crate. Notably, this allows a dependent crate to locate the RocksDB
// sources and built archive artifacts provided by this crate.
diff --git a/src/backup.rs b/src/backup.rs
index 17d0796..2ad37f9 100644
--- a/src/backup.rs
+++ b/src/backup.rs
@@ -48,6 +48,10 @@ pub struct RestoreOptions {
inner: *mut ffi::rocksdb_restore_options_t,
}
+// BackupEngine is a simple pointer wrapper, so it's safe to send to another thread
+// since the underlying RocksDB backup engine is thread-safe.
+unsafe impl Send for BackupEngine {}
+
impl BackupEngine {
/// Open a backup engine with the specified options and RocksDB Env.
pub fn open(opts: &BackupEngineOptions, env: &Env) -> Result<Self, Error> {
diff --git a/src/db.rs b/src/db.rs
index 554fd79..da4c378 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -408,6 +408,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
/// Opens a database with the given database with a Time to Live compaction filter and
/// column family descriptors.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_descriptors_with_ttl<P, I>(
opts: &Options,
path: P,
@@ -454,6 +456,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
}
/// Opens a database for read only with the given database options and column family names.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_for_read_only<P, I, N>(
opts: &Options,
path: P,
@@ -480,6 +484,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
}
/// Opens a database for read only with the given database options and column family names.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_with_opts_for_read_only<P, I, N>(
db_opts: &Options,
path: P,
@@ -507,6 +513,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
/// Opens a database for ready only with the given database options and
/// column family descriptors.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_descriptors_read_only<P, I>(
opts: &Options,
path: P,
@@ -528,6 +536,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
}
/// Opens the database as a secondary with the given database options and column family names.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_as_secondary<P, I, N>(
opts: &Options,
primary_path: P,
@@ -555,6 +565,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
/// Opens the database as a secondary with the given database options and
/// column family descriptors.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_descriptors_as_secondary<P, I>(
opts: &Options,
path: P,
@@ -576,6 +588,8 @@ impl<T: ThreadMode> DBWithThreadMode<T> {
}
/// Opens a database with the given database options and column family descriptors.
+ /// *NOTE*: `default` column family is opened with `Options::default()`.
+ /// If you want to open `default` cf with different options, set them explicitly in `cfs`.
pub fn open_cf_descriptors<P, I>(opts: &Options, path: P, cfs: I) -> Result<Self, Error>
where
P: AsRef<Path>,
diff --git a/src/db_options.rs b/src/db_options.rs
index 2db5bcb..2ee25b3 100644
--- a/src/db_options.rs
+++ b/src/db_options.rs
@@ -20,6 +20,7 @@ use std::sync::Arc;
use libc::{self, c_char, c_double, c_int, c_uchar, c_uint, c_void, size_t};
+use crate::statistics::{Histogram, HistogramData, StatsLevel};
use crate::{
compaction_filter::{self, CompactionFilterCallback, CompactionFilterFn},
compaction_filter_factory::{self, CompactionFilterFactory},
@@ -32,9 +33,110 @@ use crate::{
self, full_merge_callback, partial_merge_callback, MergeFn, MergeOperatorCallback,
},
slice_transform::SliceTransform,
+ statistics::Ticker,
ColumnFamilyDescriptor, Error, SnapshotWithThreadMode,
};
+pub(crate) struct WriteBufferManagerWrapper {
+ pub(crate) inner: NonNull<ffi::rocksdb_write_buffer_manager_t>,
+}
+
+impl Drop for WriteBufferManagerWrapper {
+ fn drop(&mut self) {
+ unsafe {
+ ffi::rocksdb_write_buffer_manager_destroy(self.inner.as_ptr());
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct WriteBufferManager(pub(crate) Arc<WriteBufferManagerWrapper>);
+
+impl WriteBufferManager {
+ /// <https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager>
+ /// Write buffer manager helps users control the total memory used by memtables across multiple column families and/or DB instances.
+ /// Users can enable this control by 2 ways:
+ ///
+ /// 1- Limit the total memtable usage across multiple column families and DBs under a threshold.
+ /// 2- Cost the memtable memory usage to block cache so that memory of RocksDB can be capped by the single limit.
+ /// The usage of a write buffer manager is similar to rate_limiter and sst_file_manager.
+ /// Users can create one write buffer manager object and pass it to all the options of column families or DBs whose memtable size they want to be controlled by this object.
+ ///
+ /// A memory limit is given when creating the write buffer manager object. RocksDB will try to limit the total memory to under this limit.
+ ///
+ /// a flush will be triggered on one column family of the DB you are inserting to,
+ ///
+ /// If mutable memtable size exceeds about 90% of the limit,
+ /// If the total memory is over the limit, more aggressive flush may also be triggered only if the mutable memtable size also exceeds 50% of the limit.
+ /// Both checks are needed because if already more than half memory is being flushed, triggering more flush may not help.
+ ///
+ /// The total memory is counted as total memory allocated in the arena, even if some of that may not yet be used by memtable.
+ ///
+ /// buffer_size: the memory limit in bytes.
+ /// allow_stall: If set true, it will enable stalling of all writers when memory usage exceeds buffer_size (soft limit).
+ /// It will wait for flush to complete and memory usage to drop down
+ pub fn new_write_buffer_manager(buffer_size: size_t, allow_stall: bool) -> Self {
+ let inner = NonNull::new(unsafe {
+ ffi::rocksdb_write_buffer_manager_create(buffer_size, allow_stall)
+ })
+ .unwrap();
+ WriteBufferManager(Arc::new(WriteBufferManagerWrapper { inner }))
+ }
+
+ /// Users can set up RocksDB to cost memory used by memtables to block cache.
+ /// This can happen no matter whether you enable memtable memory limit or not.
+ /// This option is added to manage memory (memtables + block cache) under a single limit.
+ ///
+ /// buffer_size: the memory limit in bytes.
+ /// allow_stall: If set true, it will enable stalling of all writers when memory usage exceeds buffer_size (soft limit).
+ /// It will wait for flush to complete and memory usage to drop down
+ /// cache: the block cache instance
+ pub fn new_write_buffer_manager_with_cache(
+ buffer_size: size_t,
+ allow_stall: bool,
+ cache: Cache,
+ ) -> Self {
+ let inner = NonNull::new(unsafe {
+ ffi::rocksdb_write_buffer_manager_create_with_cache(
+ buffer_size,
+ cache.0.inner.as_ptr(),
+ allow_stall,
+ )
+ })
+ .unwrap();
+ WriteBufferManager(Arc::new(WriteBufferManagerWrapper { inner }))
+ }
+
+ /// Returns the WriteBufferManager memory usage in bytes.
+ pub fn get_usage(&self) -> usize {
+ unsafe { ffi::rocksdb_write_buffer_manager_memory_usage(self.0.inner.as_ptr()) }
+ }
+
+ /// Returns the current buffer size in bytes.
+ pub fn get_buffer_size(&self) -> usize {
+ unsafe { ffi::rocksdb_write_buffer_manager_buffer_size(self.0.inner.as_ptr()) }
+ }
+
+ /// Set the buffer size in bytes.
+ pub fn set_buffer_size(&self, new_size: usize) {
+ unsafe {
+ ffi::rocksdb_write_buffer_manager_set_buffer_size(self.0.inner.as_ptr(), new_size);
+ }
+ }
+
+ /// Returns if WriteBufferManager is enabled.
+ pub fn enabled(&self) -> bool {
+ unsafe { ffi::rocksdb_write_buffer_manager_enabled(self.0.inner.as_ptr()) }
+ }
+
+ /// set the allow_stall flag.
+ pub fn set_allow_stall(&self, allow_stall: bool) {
+ unsafe {
+ ffi::rocksdb_write_buffer_manager_set_allow_stall(self.0.inner.as_ptr(), allow_stall);
+ }
+ }
+}
+
pub(crate) struct CacheWrapper {
pub(crate) inner: NonNull<ffi::rocksdb_cache_t>,
}
@@ -109,6 +211,7 @@ pub(crate) struct OptionsMustOutliveDB {
env: Option<Env>,
row_cache: Option<Cache>,
block_based: Option<BlockBasedOptionsMustOutliveDB>,
+ write_buffer_manager: Option<WriteBufferManager>,
}
impl OptionsMustOutliveDB {
@@ -120,6 +223,10 @@ impl OptionsMustOutliveDB {
.block_based
.as_ref()
.map(BlockBasedOptionsMustOutliveDB::clone),
+ write_buffer_manager: self
+ .write_buffer_manager
+ .as_ref()
+ .map(WriteBufferManager::clone),
}
}
}
@@ -277,23 +384,27 @@ pub struct IngestExternalFileOptions {
// rocksdb internally does not rely on thread-local information for its user-exposed types.
unsafe impl Send for Options {}
unsafe impl Send for WriteOptions {}
+unsafe impl Send for FlushOptions {}
unsafe impl Send for BlockBasedOptions {}
unsafe impl Send for CuckooTableOptions {}
unsafe impl Send for ReadOptions {}
unsafe impl Send for IngestExternalFileOptions {}
unsafe impl Send for CacheWrapper {}
unsafe impl Send for CompactOptions {}
+unsafe impl Send for WriteBufferManagerWrapper {}
// Sync is similarly safe for many types because they do not expose interior mutability, and their
// use within the rocksdb library is generally behind a const reference
unsafe impl Sync for Options {}
unsafe impl Sync for WriteOptions {}
+unsafe impl Sync for FlushOptions {}
unsafe impl Sync for BlockBasedOptions {}
unsafe impl Sync for CuckooTableOptions {}
unsafe impl Sync for ReadOptions {}
unsafe impl Sync for IngestExternalFileOptions {}
unsafe impl Sync for CacheWrapper {}
unsafe impl Sync for CompactOptions {}
+unsafe impl Sync for WriteBufferManagerWrapper {}
impl Drop for Options {
fn drop(&mut self) {
@@ -1474,9 +1585,12 @@ impl Options {
}
}
- pub fn optimize_for_point_lookup(&mut self, cache_size: u64) {
+ // Use this if you don't need to keep the data sorted, i.e. you'll never use
+ // an iterator, only Put() and Get() API calls
+ //
+ pub fn optimize_for_point_lookup(&mut self, block_cache_size_mb: u64) {
unsafe {
- ffi::rocksdb_options_optimize_for_point_lookup(self.inner, cache_size);
+ ffi::rocksdb_options_optimize_for_point_lookup(self.inner, block_cache_size_mb);
}
}
@@ -2664,6 +2778,30 @@ impl Options {
}
}
+ /// StatsLevel can be used to reduce statistics overhead by skipping certain
+ /// types of stats in the stats collection process.
+ pub fn set_statistics_level(&self, level: StatsLevel) {
+ unsafe { ffi::rocksdb_options_set_statistics_level(self.inner, level as c_int) }
+ }
+
+ /// Returns the value of cumulative db counters if stat collection is enabled.
+ pub fn get_ticker_count(&self, ticker: Ticker) -> u64 {
+ unsafe { ffi::rocksdb_options_statistics_get_ticker_count(self.inner, ticker as u32) }
+ }
+
+ /// Gets Histogram data from collected db stats. Requires stats to be enabled.
+ pub fn get_histogram_data(&self, histogram: Histogram) -> HistogramData {
+ unsafe {
+ let data = HistogramData::default();
+ ffi::rocksdb_options_statistics_get_histogram_data(
+ self.inner,
+ histogram as u32,
+ data.inner,
+ );
+ data
+ }
+ }
+
/// If not zero, dump `rocksdb.stats` to LOG every `stats_dump_period_sec`.
///
/// Default: `600` (10 mins)
@@ -2711,17 +2849,6 @@ impl Options {
}
}
- /// Specifies the file access pattern once a compaction is started.
- ///
- /// It will be applied to all input files of a compaction.
- ///
- /// Default: Normal
- pub fn set_access_hint_on_compaction_start(&mut self, pattern: AccessHint) {
- unsafe {
- ffi::rocksdb_options_set_access_hint_on_compaction_start(self.inner, pattern as c_int);
- }
- }
-
/// Enable/disable adaptive mutex, which spins in the user space before resorting to kernel.
///
/// This could reduce context switch when the mutex is not
@@ -3229,6 +3356,24 @@ impl Options {
);
}
}
+
+ /// <https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager>
+ /// Write buffer manager helps users control the total memory used by memtables across multiple column families and/or DB instances.
+ /// Users can enable this control by 2 ways:
+ ///
+ /// 1- Limit the total memtable usage across multiple column families and DBs under a threshold.
+ /// 2- Cost the memtable memory usage to block cache so that memory of RocksDB can be capped by the single limit.
+ /// The usage of a write buffer manager is similar to rate_limiter and sst_file_manager.
+ /// Users can create one write buffer manager object and pass it to all the options of column families or DBs whose memtable size they want to be controlled by this object.
+ pub fn set_write_buffer_manager(&mut self, write_buffer_manager: &WriteBufferManager) {
+ unsafe {
+ ffi::rocksdb_options_set_write_buffer_manager(
+ self.inner,
+ write_buffer_manager.0.inner.as_ptr(),
+ );
+ }
+ self.outlive.write_buffer_manager = Some(write_buffer_manager.clone());
+ }
}
impl Default for Options {
@@ -3589,9 +3734,16 @@ impl ReadOptions {
}
}
- /// Automatically trim readahead size when iterating with an upper bound.
+ /// If auto_readahead_size is set to true, it will auto tune the readahead_size
+ /// during scans internally.
+ /// For this feature to be enabled, iterate_upper_bound must also be specified.
///
- /// Default: `false`
+ /// NOTE: - Recommended for forward Scans only.
+ /// - If there is a backward scans, this option will be
+ /// disabled internally and won't be enabled again if the forward scan
+ /// is issued again.
+ ///
+ /// Default: true
pub fn set_auto_readahead_size(&mut self, v: bool) {
unsafe {
ffi::rocksdb_readoptions_set_auto_readahead_size(self.inner, c_uchar::from(v));
@@ -3761,20 +3913,15 @@ pub enum ChecksumType {
}
/// Used in [`PlainTableFactoryOptions`].
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
pub enum KeyEncodingType {
/// Always write full keys.
+ #[default]
Plain = 0,
/// Find opportunities to write the same prefix for multiple rows.
Prefix = 1,
}
-impl Default for KeyEncodingType {
- fn default() -> Self {
- KeyEncodingType::Plain
- }
-}
-
/// Used with DBOptions::set_plain_table_factory.
/// See official [wiki](https://github.com/facebook/rocksdb/wiki/PlainTable-Format) for more
/// information.
@@ -3828,17 +3975,6 @@ pub enum DBRecoveryMode {
SkipAnyCorruptedRecord = ffi::rocksdb_skip_any_corrupted_records_recovery as isize,
}
-/// File access pattern once a compaction has started
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
-#[repr(i32)]
-pub enum AccessHint {
- None = 0,
- Normal,
- Sequential,
- WillNeed,
-}
-
pub struct FifoCompactOptions {
pub(crate) inner: *mut ffi::rocksdb_fifo_compaction_options_t,
}
@@ -4159,7 +4295,8 @@ impl Drop for DBPath {
#[cfg(test)]
mod tests {
- use crate::{MemtableFactory, Options};
+ use crate::db_options::WriteBufferManager;
+ use crate::{Cache, MemtableFactory, Options};
#[test]
fn test_enable_statistics() {
@@ -4194,4 +4331,21 @@ mod tests {
let opts = Options::default();
assert!(opts.get_statistics().is_none());
}
+
+ #[test]
+ fn test_set_write_buffer_manager() {
+ let mut opts = Options::default();
+ let lrucache = Cache::new_lru_cache(100);
+ let write_buffer_manager =
+ WriteBufferManager::new_write_buffer_manager_with_cache(100, false, lrucache);
+ assert_eq!(write_buffer_manager.get_buffer_size(), 100);
+ assert_eq!(write_buffer_manager.get_usage(), 0);
+ assert!(write_buffer_manager.enabled());
+
+ opts.set_write_buffer_manager(&write_buffer_manager);
+ drop(opts);
+
+ // WriteBufferManager outlives options
+ assert!(write_buffer_manager.enabled());
+ }
}
diff --git a/src/lib.rs b/src/lib.rs
index 98c7751..f86033f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -94,6 +94,7 @@ pub mod properties;
mod slice_transform;
mod snapshot;
mod sst_file_writer;
+pub mod statistics;
mod transactions;
mod write_batch;
@@ -117,7 +118,7 @@ pub use crate::{
DBRecoveryMode, DataBlockIndexType, FifoCompactOptions, FlushOptions,
IngestExternalFileOptions, KeyEncodingType, LogLevel, MemtableFactory, Options,
PlainTableFactoryOptions, ReadOptions, ReadTier, UniversalCompactOptions,
- UniversalCompactionStopStyle, WaitForCompactOptions, WriteOptions,
+ UniversalCompactionStopStyle, WaitForCompactOptions, WriteBufferManager, WriteOptions,
},
db_pinnable_slice::DBPinnableSlice,
env::Env,
@@ -233,11 +234,11 @@ mod test {
use super::{
column_family::UnboundColumnFamily,
- db_options::CacheWrapper,
+ db_options::{CacheWrapper, WriteBufferManagerWrapper},
env::{Env, EnvWrapper},
BlockBasedOptions, BoundColumnFamily, Cache, ColumnFamily, ColumnFamilyDescriptor,
DBIterator, DBRawIterator, IngestExternalFileOptions, Options, PlainTableFactoryOptions,
- ReadOptions, Snapshot, SstFileWriter, WriteBatch, WriteOptions, DB,
+ ReadOptions, Snapshot, SstFileWriter, WriteBatch, WriteBufferManager, WriteOptions, DB,
};
#[test]
@@ -275,6 +276,8 @@ mod test {
is_send::<TransactionDBOptions>();
is_send::<OptimisticTransactionOptions>();
is_send::<TransactionOptions>();
+ is_send::<WriteBufferManager>();
+ is_send::<WriteBufferManagerWrapper>();
}
#[test]
@@ -305,5 +308,7 @@ mod test {
is_sync::<TransactionDBOptions>();
is_sync::<OptimisticTransactionOptions>();
is_sync::<TransactionOptions>();
+ is_sync::<WriteBufferManager>();
+ is_sync::<WriteBufferManagerWrapper>();
}
}
diff --git a/src/perf.rs b/src/perf.rs
index 12644b3..eb37113 100644
--- a/src/perf.rs
+++ b/src/perf.rs
@@ -14,7 +14,8 @@
use libc::{c_int, c_uchar, c_void};
-use crate::{db::DBInner, ffi, ffi_util::from_cstr, Cache, Error, DB};
+use crate::{db::DBInner, ffi, ffi_util::from_cstr, Cache, Error};
+use crate::{DBCommon, ThreadMode, DB};
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(i32)]
@@ -179,7 +180,7 @@ pub struct MemoryUsageStats {
}
/// Wrap over memory_usage_t. Hold current memory usage of the specified DB instances and caches
-struct MemoryUsage {
+pub struct MemoryUsage {
inner: *mut ffi::rocksdb_memory_usage_t,
}
@@ -193,28 +194,28 @@ impl Drop for MemoryUsage {
impl MemoryUsage {
/// Approximate memory usage of all the mem-tables
- fn approximate_mem_table_total(&self) -> u64 {
+ pub fn approximate_mem_table_total(&self) -> u64 {
unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_total(self.inner) }
}
/// Approximate memory usage of un-flushed mem-tables
- fn approximate_mem_table_unflushed(&self) -> u64 {
+ pub fn approximate_mem_table_unflushed(&self) -> u64 {
unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_unflushed(self.inner) }
}
/// Approximate memory usage of all the table readers
- fn approximate_mem_table_readers_total(&self) -> u64 {
+ pub fn approximate_mem_table_readers_total(&self) -> u64 {
unsafe { ffi::rocksdb_approximate_memory_usage_get_mem_table_readers_total(self.inner) }
}
/// Approximate memory usage by cache
- fn approximate_cache_total(&self) -> u64 {
+ pub fn approximate_cache_total(&self) -> u64 {
unsafe { ffi::rocksdb_approximate_memory_usage_get_cache_total(self.inner) }
}
}
/// Builder for MemoryUsage
-struct MemoryUsageBuilder {
+pub struct MemoryUsageBuilder {
inner: *mut ffi::rocksdb_memory_consumers_t,
}
@@ -228,7 +229,7 @@ impl Drop for MemoryUsageBuilder {
impl MemoryUsageBuilder {
/// Create new instance
- fn new() -> Result<Self, Error> {
+ pub fn new() -> Result<Self, Error> {
let mc = unsafe { ffi::rocksdb_memory_consumers_create() };
if mc.is_null() {
Err(Error::new(
@@ -240,21 +241,21 @@ impl MemoryUsageBuilder {
}
/// Add a DB instance to collect memory usage from it and add up in total stats
- fn add_db(&mut self, db: &DB) {
+ pub fn add_db<T: ThreadMode, D: DBInner>(&mut self, db: &DBCommon<T, D>) {
unsafe {
ffi::rocksdb_memory_consumers_add_db(self.inner, db.inner.inner());
}
}
/// Add a cache to collect memory usage from it and add up in total stats
- fn add_cache(&mut self, cache: &Cache) {
+ pub fn add_cache(&mut self, cache: &Cache) {
unsafe {
ffi::rocksdb_memory_consumers_add_cache(self.inner, cache.0.inner.as_ptr());
}
}
/// Build up MemoryUsage
- fn build(&self) -> Result<MemoryUsage, Error> {
+ pub fn build(&self) -> Result<MemoryUsage, Error> {
unsafe {
let mu = ffi_try!(ffi::rocksdb_approximate_memory_usage_create(self.inner));
Ok(MemoryUsage { inner: mu })
diff --git a/src/prop_name.rs b/src/prop_name.rs
index 0c8f717..8f1c046 100644
--- a/src/prop_name.rs
+++ b/src/prop_name.rs
@@ -17,12 +17,9 @@ impl PropName {
/// Panics if the `value` isn’t terminated by a nul byte or contains
/// interior nul bytes.
pub(crate) const fn new_unwrap(value: &str) -> &Self {
- let bytes = if let Some((&0, bytes)) = value.as_bytes().split_last() {
- bytes
- } else {
+ let Some((&0, bytes)) = value.as_bytes().split_last() else {
panic!("input was not nul-terminated");
};
-
let mut idx = 0;
while idx < bytes.len() {
assert!(bytes[idx] != 0, "input contained interior nul byte");
diff --git a/src/statistics.rs b/src/statistics.rs
new file mode 100644
index 0000000..d79024e
--- /dev/null
+++ b/src/statistics.rs
@@ -0,0 +1,806 @@
+use crate::ffi;
+
+#[derive(Debug, Clone)]
+pub struct NameParseError;
+impl core::fmt::Display for NameParseError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "unrecognized name")
+ }
+}
+
+impl std::error::Error for NameParseError {}
+
+// Helper macro to generate iterable nums that translate into static strings mapped from the cpp
+// land.
+macro_rules! iterable_named_enum {
+ (
+ $(#[$m:meta])*
+ $type_vis:vis enum $typename:ident {
+ $(
+ $(#[$variant_meta:meta])*
+ $variant:ident($variant_str:literal) $(= $value:expr)?,
+ )+
+ }
+ ) => {
+ // Main Type
+ #[allow(clippy::all)]
+ $(#[$m])*
+ $type_vis enum $typename {
+ $(
+ $(#[$variant_meta])*
+ $variant$( = $value)?,
+ )+
+ }
+
+ #[automatically_derived]
+ impl $typename {
+ #[doc = "The corresponding rocksdb string identifier for this variant"]
+ pub const fn name(&self) -> &'static str {
+ match self {
+ $(
+ $typename::$variant => $variant_str,
+ )+
+ }
+ }
+ pub fn iter() -> ::core::slice::Iter<'static, $typename> {
+ static VARIANTS: &'static [$typename] = &[
+ $(
+ $typename::$variant,
+ )+
+ ];
+ VARIANTS.iter()
+ }
+ }
+
+
+ #[automatically_derived]
+ impl ::core::str::FromStr for $typename {
+ type Err = NameParseError;
+ fn from_str(s: &str) -> Result<Self, Self::Err> {
+ match s {
+ $(
+ $variant_str => Ok($typename::$variant),
+ )+
+ _ => Err(NameParseError),
+ }
+ }
+ }
+
+ #[automatically_derived]
+ impl ::core::fmt::Display for $typename {
+ fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
+ self.name().fmt(f)
+ }
+ }
+ };
+}
+
+/// StatsLevel can be used to reduce statistics overhead by skipping certain
+/// types of stats in the stats collection process.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[repr(u8)]
+pub enum StatsLevel {
+ /// Disable all metrics
+ DisableAll = 0,
+ /// Disable timer stats, and skip histogram stats
+ ExceptHistogramOrTimers = 2,
+ /// Skip timer stats
+ ExceptTimers,
+ /// Collect all stats except time inside mutex lock AND time spent on
+ /// compression.
+ ExceptDetailedTimers,
+ /// Collect all stats except the counters requiring to get time inside the
+ /// mutex lock.
+ ExceptTimeForMutex,
+ /// Collect all stats, including measuring duration of mutex operations.
+ /// If getting time is expensive on the platform to run, it can
+ /// reduce scalability to more threads, especially for writes.
+ All,
+}
+
+// Keep in-sync with rocksdb/include/rocksdb/statistics.h
+iterable_named_enum! {
+ #[derive(Debug, Copy, Clone, PartialEq, Eq)]
+ #[repr(u32)]
+ pub enum Ticker {
+ /// total block cache misses
+ /// REQUIRES: BlockCacheMiss == BlockCacheIndexMiss +
+ /// BlockCacheFilterMiss +
+ /// BlockCacheDataMiss;
+ BlockCacheMiss("rocksdb.block.cache.miss") = 0,
+ /// total block cache hit
+ /// REQUIRES: BlockCacheHit == BlockCacheIndexHit +
+ /// BlockCacheFilterHit +
+ /// BlockCacheDataHit;
+ BlockCacheHit("rocksdb.block.cache.hit"),
+ /// # of blocks added to block cache.
+ BlockCacheAdd("rocksdb.block.cache.add"),
+ /// # of failures when adding blocks to block cache.
+ BlockCacheAddFailures("rocksdb.block.cache.add.failures"),
+ /// # of times cache miss when accessing index block from block cache.
+ BlockCacheIndexMiss("rocksdb.block.cache.index.miss"),
+ /// # of times cache hit when accessing index block from block cache.
+ BlockCacheIndexHit("rocksdb.block.cache.index.hit"),
+ /// # of index blocks added to block cache.
+ BlockCacheIndexAdd("rocksdb.block.cache.index.add"),
+ /// # of bytes of index blocks inserted into cache
+ BlockCacheIndexBytesInsert("rocksdb.block.cache.index.bytes.insert"),
+ /// # of times cache miss when accessing filter block from block cache.
+ BlockCacheFilterMiss("rocksdb.block.cache.filter.miss"),
+ /// # of times cache hit when accessing filter block from block cache.
+ BlockCacheFilterHit("rocksdb.block.cache.filter.hit"),
+ /// # of filter blocks added to block cache.
+ BlockCacheFilterAdd("rocksdb.block.cache.filter.add"),
+ /// # of bytes of bloom filter blocks inserted into cache
+ BlockCacheFilterBytesInsert("rocksdb.block.cache.filter.bytes.insert"),
+ /// # of times cache miss when accessing data block from block cache.
+ BlockCacheDataMiss("rocksdb.block.cache.data.miss"),
+ /// # of times cache hit when accessing data block from block cache.
+ BlockCacheDataHit("rocksdb.block.cache.data.hit"),
+ /// # of data blocks added to block cache.
+ BlockCacheDataAdd("rocksdb.block.cache.data.add"),
+ /// # of bytes of data blocks inserted into cache
+ BlockCacheDataBytesInsert("rocksdb.block.cache.data.bytes.insert"),
+ /// # of bytes read from cache.
+ BlockCacheBytesRead("rocksdb.block.cache.bytes.read"),
+ /// # of bytes written into cache.
+ BlockCacheBytesWrite("rocksdb.block.cache.bytes.write"),
+
+ BlockCacheCompressionDictMiss("rocksdb.block.cache.compression.dict.miss"),
+ BlockCacheCompressionDictHit("rocksdb.block.cache.compression.dict.hit"),
+ BlockCacheCompressionDictAdd("rocksdb.block.cache.compression.dict.add"),
+ BlockCacheCompressionDictBytesInsert("rocksdb.block.cache.compression.dict.bytes.insert"),
+
+ /// # of blocks redundantly inserted into block cache.
+ /// REQUIRES: BlockCacheAddRedundant <= BlockCacheAdd
+ BlockCacheAddRedundant("rocksdb.block.cache.add.redundant"),
+ /// # of index blocks redundantly inserted into block cache.
+ /// REQUIRES: BlockCacheIndexAddRedundant <= BlockCacheIndexAdd
+ BlockCacheIndexAddRedundant("rocksdb.block.cache.index.add.redundant"),
+ /// # of filter blocks redundantly inserted into block cache.
+ /// REQUIRES: BlockCacheFilterAddRedundant <= BlockCacheFilterAdd
+ BlockCacheFilterAddRedundant("rocksdb.block.cache.filter.add.redundant"),
+ /// # of data blocks redundantly inserted into block cache.
+ /// REQUIRES: BlockCacheDataAddRedundant <= BlockCacheDataAdd
+ BlockCacheDataAddRedundant("rocksdb.block.cache.data.add.redundant"),
+ /// # of dict blocks redundantly inserted into block cache.
+ /// REQUIRES: BlockCacheCompressionDictAddRedundant
+ /// <= BlockCacheCompressionDictAdd
+ BlockCacheCompressionDictAddRedundant("rocksdb.block.cache.compression.dict.add.redundant"),
+
+ /// Secondary cache statistics
+ SecondaryCacheHits("rocksdb.secondary.cache.hits"),
+
+ /// Fine grained secondary cache stats
+ SecondaryCacheFilterHits("rocksdb.secondary.cache.filter.hits"),
+ SecondaryCacheIndexHits("rocksdb.secondary.cache.index.hits"),
+ SecondaryCacheDataHits("rocksdb.secondary.cache.data.hits"),
+
+ /// Compressed secondary cache related stats
+ CompressedSecondaryCacheDummyHits("rocksdb.compressed.secondary.cache.dummy.hits"),
+ CompressedSecondaryCacheHits("rocksdb.compressed.secondary.cache.hits"),
+ CompressedSecondaryCachePromotions("rocksdb.compressed.secondary.cache.promotions"),
+ CompressedSecondaryCachePromotionSkips("rocksdb.compressed.secondary.cache.promotion.skips"),
+
+ /// # of times bloom filter has avoided file reads, i.e., negatives.
+ BloomFilterUseful("rocksdb.bloom.filter.useful"),
+ /// # of times bloom FullFilter has not avoided the reads.
+ BloomFilterFullPositive("rocksdb.bloom.filter.full.positive"),
+ /// # of times bloom FullFilter has not avoided the reads and data actually
+ /// exist.
+ BloomFilterFullTruePositive("rocksdb.bloom.filter.full.true.positive"),
+ /// Prefix filter stats when used for point lookups (Get / MultiGet).
+ /// (For prefix filter stats on iterators, see *_LEVEL_Seek_*.)
+ /// Checked: filter was queried
+ BloomFilterPrefixChecked("rocksdb.bloom.filter.prefix.checked"),
+ /// Useful: filter returned false so prevented accessing data+index blocks
+ BloomFilterPrefixUseful("rocksdb.bloom.filter.prefix.useful"),
+ /// True positive: found a key matching the point query. When another key
+ /// with the same prefix matches, it is considered a false positive by
+ /// these statistics even though the filter returned a true positive.
+ BloomFilterPrefixTruePositive("rocksdb.bloom.filter.prefix.true.positive"),
+
+ /// # persistent cache hit
+ PersistentCacheHit("rocksdb.persistent.cache.hit"),
+ /// # persistent cache miss
+ PersistentCacheMiss("rocksdb.persistent.cache.miss"),
+
+ /// # total simulation block cache hits
+ SimBlockCacheHit("rocksdb.sim.block.cache.hit"),
+ /// # total simulation block cache misses
+ SimBlockCacheMiss("rocksdb.sim.block.cache.miss"),
+
+ /// # of memtable hits.
+ MemtableHit("rocksdb.memtable.hit"),
+ /// # of memtable misses.
+ MemtableMiss("rocksdb.memtable.miss"),
+
+ /// # of Get() queries served by L0
+ GetHitL0("rocksdb.l0.hit"),
+ /// # of Get() queries served by L1
+ GetHitL1("rocksdb.l1.hit"),
+ /// # of Get() queries served by L2 and up
+ GetHitL2AndUp("rocksdb.l2andup.hit"),
+
+ ///
+ /// Compaction_KeyDrop* count the reasons for key drop during compaction
+ /// There are 4 reasons currently.
+ ///
+ /// key was written with a newer value.
+ /// Also includes keys dropped for range del.
+ CompactionKeyDropNewerEntry("rocksdb.compaction.key.drop.new"),
+ /// The key is obsolete.
+ CompactionKeyDropObsolete("rocksdb.compaction.key.drop.obsolete"),
+ /// key was covered by a range tombstone.
+ CompactionKeyDropRangeDel("rocksdb.compaction.key.drop.range_del"),
+ /// user compaction function has dropped the key.
+ CompactionKeyDropUser("rocksdb.compaction.key.drop.user"),
+ /// all keys in range were deleted.
+ CompactionRangeDelDropObsolete("rocksdb.compaction.range_del.drop.obsolete"),
+ /// Deletions obsoleted before bottom level due to file gap optimization.
+ CompactionOptimizedDelDropObsolete("rocksdb.compaction.optimized.del.drop.obsolete"),
+ /// If a compaction was canceled in sfm to prevent ENOSPC
+ CompactionCancelled("rocksdb.compaction.cancelled"),
+
+ /// Number of keys written to the database via the Put and Write call's
+ NumberKeysWritten("rocksdb.number.keys.written"),
+ /// Number of Keys read,
+ NumberKeysRead("rocksdb.number.keys.read"),
+ /// Number keys updated, if inplace update is enabled
+ NumberKeysUpdated("rocksdb.number.keys.updated"),
+ /// The number of uncompressed bytes issued by DB::Put(), DB::Delete(),
+ /// DB::Merge(), and DB::Write().
+ BytesWritten("rocksdb.bytes.written"),
+ /// The number of uncompressed bytes read from DB::Get(). It could be
+ /// either from memtables, cache, or table files.
+ /// For the number of logical bytes read from DB::MultiGet(),
+ /// please use NumberMultigetBytesRead.
+ BytesRead("rocksdb.bytes.read"),
+ /// The number of calls to seek/next/prev
+ NumberDbSeek("rocksdb.number.db.seek"),
+ NumberDbNext("rocksdb.number.db.next"),
+ NumberDbPrev("rocksdb.number.db.prev"),
+ /// The number of calls to seek/next/prev that returned data
+ NumberDbSeekFound("rocksdb.number.db.seek.found"),
+ NumberDbNextFound("rocksdb.number.db.next.found"),
+ NumberDbPrevFound("rocksdb.number.db.prev.found"),
+ /// The number of uncompressed bytes read from an iterator.
+ /// Includes size of key and value.
+ IterBytesRead("rocksdb.db.iter.bytes.read"),
+ /// Number of internal keys skipped by Iterator
+ NumberIterSkip("rocksdb.number.iter.skip"),
+ /// Number of times we had to reseek inside an iteration to skip
+ /// over large number of keys with same userkey.
+ NumberOfReseeksInIteration("rocksdb.number.reseeks.iteration"),
+
+ /// number of iterators created
+ NoIteratorCreated("rocksdb.num.iterator.created"),
+ /// number of iterators deleted
+ NoIteratorDeleted("rocksdb.num.iterator.deleted"),
+
+ NoFileOpens("rocksdb.no.file.opens"),
+ NoFileErrors("rocksdb.no.file.errors"),
+ /// Writer has to wait for compaction or flush to finish.
+ StallMicros("rocksdb.stall.micros"),
+ /// The wait time for db mutex.
+ /// Disabled by default. To enable it set stats level to kAll
+ DbMutexWaitMicros("rocksdb.db.mutex.wait.micros"),
+
+ /// Number of MultiGet calls, keys read, and bytes read
+ NumberMultigetCalls("rocksdb.number.multiget.get"),
+ NumberMultigetKeysRead("rocksdb.number.multiget.keys.read"),
+ NumberMultigetBytesRead("rocksdb.number.multiget.bytes.read"),
+ /// Number of keys actually found in MultiGet calls (vs number requested by
+ /// caller)
+ /// NumberMultigetKeys_Read gives the number requested by caller
+ NumberMultigetKeysFound("rocksdb.number.multiget.keys.found"),
+
+ NumberMergeFailures("rocksdb.number.merge.failures"),
+
+ /// Record the number of calls to GetUpdatesSince. Useful to keep track of
+ /// transaction log iterator refreshes
+ GetUpdatesSinceCalls("rocksdb.getupdatessince.calls"),
+ /// Number of times WAL sync is done
+ WalFileSynced("rocksdb.wal.synced"),
+ /// Number of bytes written to WAL
+ WalFileBytes("rocksdb.wal.bytes"),
+
+ /// Writes can be processed by requesting thread or by the thread at the
+ /// head of the writers queue.
+ WriteDoneBySelf("rocksdb.write.self"),
+ /// Equivalent to writes done for others
+ WriteDoneByOther("rocksdb.write.other"),
+ /// Number of Write calls that request WAL
+ WriteWithWal("rocksdb.write.wal"),
+ /// Bytes read during compaction
+ CompactReadBytes("rocksdb.compact.read.bytes"),
+ /// Bytes written during compaction
+ CompactWriteBytes("rocksdb.compact.write.bytes"),
+ /// Bytes written during flush
+ FlushWriteBytes("rocksdb.flush.write.bytes"),
+
+
+ /// Compaction read and write statistics broken down by CompactionReason
+ CompactReadBytesMarked("rocksdb.compact.read.marked.bytes"),
+ CompactReadBytesPeriodic("rocksdb.compact.read.periodic.bytes"),
+ CompactReadBytesTtl("rocksdb.compact.read.ttl.bytes"),
+ CompactWriteBytesMarked("rocksdb.compact.write.marked.bytes"),
+ CompactWriteBytesPeriodic("rocksdb.compact.write.periodic.bytes"),
+ CompactWriteBytesTtl("rocksdb.compact.write.ttl.bytes"),
+
+ /// Number of table's properties loaded directly from file, without creating
+ /// table reader object.
+ NumberDirectLoadTableProperties("rocksdb.number.direct.load.table.properties"),
+ NumberSuperversionAcquires("rocksdb.number.superversion_acquires"),
+ NumberSuperversionReleases("rocksdb.number.superversion_releases"),
+ NumberSuperversionCleanups("rocksdb.number.superversion_cleanups"),
+
+ /// # of compressions/decompressions executed
+ NumberBlockCompressed("rocksdb.number.block.compressed"),
+ NumberBlockDecompressed("rocksdb.number.block.decompressed"),
+
+ /// Number of input bytes (uncompressed) to compression for SST blocks that
+ /// are stored compressed.
+ BytesCompressedFrom("rocksdb.bytes.compressed.from"),
+ /// Number of output bytes (compressed) from compression for SST blocks that
+ /// are stored compressed.
+ BytesCompressedTo("rocksdb.bytes.compressed.to"),
+ /// Number of uncompressed bytes for SST blocks that are stored uncompressed
+ /// because compression type is kNoCompression, or some error case caused
+ /// compression not to run or produce an output. Index blocks are only counted
+ /// if enable_index_compression is true.
+ BytesCompressionBypassed("rocksdb.bytes.compression_bypassed"),
+ /// Number of input bytes (uncompressed) to compression for SST blocks that
+ /// are stored uncompressed because the compression result was rejected,
+ /// either because the ratio was not acceptable (see
+ /// CompressionOptions::max_compressed_bytes_per_kb) or found invalid by the
+ /// `verify_compression` option.
+ BytesCompressionRejected("rocksdb.bytes.compression.rejected"),
+
+ /// Like BytesCompressionBypassed but counting number of blocks
+ NumberBlockCompressionBypassed("rocksdb.number.block_compression_bypassed"),
+ /// Like BytesCompressionRejected but counting number of blocks
+ NumberBlockCompressionRejected("rocksdb.number.block_compression_rejected"),
+
+ /// Number of input bytes (compressed) to decompression in reading compressed
+ /// SST blocks from storage.
+ BytesDecompressedFrom("rocksdb.bytes.decompressed.from"),
+ /// Number of output bytes (uncompressed) from decompression in reading
+ /// compressed SST blocks from storage.
+ BytesDecompressedTo("rocksdb.bytes.decompressed.to"),
+
+ /// Tickers that record cumulative time.
+ MergeOperationTotalTime("rocksdb.merge.operation.time.nanos"),
+ FilterOperationTotalTime("rocksdb.filter.operation.time.nanos"),
+ CompactionCpuTotalTime("rocksdb.compaction.total.time.cpu_micros"),
+
+ /// Row cache.
+ RowCacheHit("rocksdb.row.cache.hit"),
+ RowCacheMiss("rocksdb.row.cache.miss"),
+
+ /// Read amplification statistics.
+ /// Read amplification can be calculated using this formula
+ /// (ReadAMP_ToTAL_ReadBytes / Read_AMP_Estimate_UsefulBytes)
+ //
+ /// REQUIRES: ReadOptions::read_amp_bytes_per_bit to be enabled
+ /// Estimate of total bytes actually used.
+ ReadAmpEstimateUsefulBytes("rocksdb.read.amp.estimate.useful.bytes"),
+ /// Total size of loaded data blocks.
+ ReadAmpTotalReadBytes("rocksdb.read.amp.total.read.bytes"),
+
+
+ /// Number of refill intervals where rate limiter's bytes are fully consumed.
+ NumberRateLimiterDrains("rocksdb.number.rate_limiter.drains"),
+
+ /// BlobDB specific stats
+ /// # of Put/PutTtl/PutUntil to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbNumPut("rocksdb.blobdb.num.put"),
+ /// # of Write to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbNumWrite("rocksdb.blobdb.num.write"),
+ /// # of Get to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbNumGet("rocksdb.blobdb.num.get"),
+ /// # of MultiGet to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbNumMultiget("rocksdb.blobdb.num.multiget"),
+ /// # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. Only
+ /// applicable to legacy BlobDB.
+ BlobDbNumSeek("rocksdb.blobdb.num.seek"),
+ /// # of Next to BlobDB iterator. Only applicable to legacy BlobDB.
+ BlobDbNumNext("rocksdb.blobdb.num.next"),
+ /// # of Prev to BlobDB iterator. Only applicable to legacy BlobDB.
+ BlobDbNumPrev("rocksdb.blobdb.num.prev"),
+ /// # of keys written to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbNumKeysWritten("rocksdb.blobdb.num.keys.written"),
+ /// # of keys read from BlobDB. Only applicable to legacy BlobDB.
+ BlobDbNumKeysRead("rocksdb.blobdb.num.keys.read"),
+ /// # of bytes (key + value) written to BlobDB. Only applicable to legacy
+ /// BlobDB.
+ BlobDbBytesWritten("rocksdb.blobdb.bytes.written"),
+ /// # of bytes (keys + value) read from BlobDB. Only applicable to legacy
+ /// BlobDB.
+ BlobDbBytesRead("rocksdb.blobdb.bytes.read"),
+ /// # of keys written by BlobDB as non-Ttl inlined value. Only applicable to
+ /// legacy BlobDB.
+ BlobDbWriteInlined("rocksdb.blobdb.write.inlined"),
+ /// # of keys written by BlobDB as Ttl inlined value. Only applicable to legacy
+ /// BlobDB.
+ BlobDbWriteInlinedTtl("rocksdb.blobdb.write.inlined.ttl"),
+ /// # of keys written by BlobDB as non-Ttl blob value. Only applicable to
+ /// legacy BlobDB.
+ BlobDbWriteBlob("rocksdb.blobdb.write.blob"),
+ /// # of keys written by BlobDB as Ttl blob value. Only applicable to legacy
+ /// BlobDB.
+ BlobDbWriteBlobTtl("rocksdb.blobdb.write.blob.ttl"),
+ /// # of bytes written to blob file.
+ BlobDbBlobFileBytesWritten("rocksdb.blobdb.blob.file.bytes.written"),
+ /// # of bytes read from blob file.
+ BlobDbBlobFileBytesRead("rocksdb.blobdb.blob.file.bytes.read"),
+ /// # of times a blob files being synced.
+ BlobDbBlobFileSynced("rocksdb.blobdb.blob.file.synced"),
+ /// # of blob index evicted from base DB by BlobDB compaction filter because
+ /// of expiration. Only applicable to legacy BlobDB.
+ BlobDbBlobIndexExpiredCount("rocksdb.blobdb.blob.index.expired.count"),
+ /// size of blob index evicted from base DB by BlobDB compaction filter
+ /// because of expiration. Only applicable to legacy BlobDB.
+ BlobDbBlobIndexExpiredSize("rocksdb.blobdb.blob.index.expired.size"),
+ /// # of blob index evicted from base DB by BlobDB compaction filter because
+ /// of corresponding file deleted. Only applicable to legacy BlobDB.
+ BlobDbBlobIndexEvictedCount("rocksdb.blobdb.blob.index.evicted.count"),
+ /// size of blob index evicted from base DB by BlobDB compaction filter
+ /// because of corresponding file deleted. Only applicable to legacy BlobDB.
+ BlobDbBlobIndexEvictedSize("rocksdb.blobdb.blob.index.evicted.size"),
+ /// # of blob files that were obsoleted by garbage collection. Only applicable
+ /// to legacy BlobDB.
+ BlobDbGcNumFiles("rocksdb.blobdb.gc.num.files"),
+ /// # of blob files generated by garbage collection. Only applicable to legacy
+ /// BlobDB.
+ BlobDbGcNumNewFiles("rocksdb.blobdb.gc.num.new.files"),
+ /// # of BlobDB garbage collection failures. Only applicable to legacy BlobDB.
+ BlobDbGcFailures("rocksdb.blobdb.gc.failures"),
+ /// # of keys relocated to new blob file by garbage collection.
+ BlobDbGcNumKeysRelocated("rocksdb.blobdb.gc.num.keys.relocated"),
+ /// # of bytes relocated to new blob file by garbage collection.
+ BlobDbGcBytesRelocated("rocksdb.blobdb.gc.bytes.relocated"),
+ /// # of blob files evicted because of BlobDB is full. Only applicable to
+ /// legacy BlobDB.
+ BlobDbFifoNumFilesEvicted("rocksdb.blobdb.fifo.num.files.evicted"),
+ /// # of keys in the blob files evicted because of BlobDB is full. Only
+ /// applicable to legacy BlobDB.
+ BlobDbFifoNumKeysEvicted("rocksdb.blobdb.fifo.num.keys.evicted"),
+ /// # of bytes in the blob files evicted because of BlobDB is full. Only
+ /// applicable to legacy BlobDB.
+ BlobDbFifoBytesEvicted("rocksdb.blobdb.fifo.bytes.evicted"),
+
+ /// Integrated BlobDB specific stats
+ /// # of times cache miss when accessing blob from blob cache.
+ BlobDbCacheMiss("rocksdb.blobdb.cache.miss"),
+ /// # of times cache hit when accessing blob from blob cache.
+ BlobDbCacheHit("rocksdb.blobdb.cache.hit"),
+ /// # of data blocks added to blob cache.
+ BlobDbCacheAdd("rocksdb.blobdb.cache.add"),
+ /// # of failures when adding blobs to blob cache.
+ BlobDbCacheAddFailures("rocksdb.blobdb.cache.add.failures"),
+ /// # of bytes read from blob cache.
+ BlobDbCacheBytesRead("rocksdb.blobdb.cache.bytes.read"),
+ /// # of bytes written into blob cache.
+ BlobDbCacheBytesWrite("rocksdb.blobdb.cache.bytes.write"),
+
+ /// These counters indicate a performance issue in WritePrepared transactions.
+ /// We should not seem them ticking them much.
+ /// # of times prepare_mutex_ is acquired in the fast path.
+ TxnPrepareMutexOverhead("rocksdb.txn.overhead.mutex.prepare"),
+ /// # of times old_commit_map_mutex_ is acquired in the fast path.
+ TxnOldCommitMapMutexOverhead("rocksdb.txn.overhead.mutex.old.commit.map"),
+ /// # of times we checked a batch for duplicate keys.
+ TxnDuplicateKeyOverhead("rocksdb.txn.overhead.duplicate.key"),
+ /// # of times snapshot_mutex_ is acquired in the fast path.
+ TxnSnapshotMutexOverhead("rocksdb.txn.overhead.mutex.snapshot"),
+ /// # of times ::Get returned TryAgain due to expired snapshot seq
+ TxnGetTryAgain("rocksdb.txn.get.tryagain"),
+
+ /// # of files marked as trash by sst file manager and will be deleted
+ /// later by background thread.
+ FilesMarkedTrash("rocksdb.files.marked.trash"),
+ /// # of trash files deleted by the background thread from the trash queue.
+ FilesDeletedFromTrashQueue("rocksdb.files.marked.trash.deleted"),
+ /// # of files deleted immediately by sst file manager through delete
+ /// scheduler.
+ FilesDeletedImmediately("rocksdb.files.deleted.immediately"),
+
+ /// The counters for error handler, not that, bg_io_error is the subset of
+ /// bg_error and bg_retryable_io_error is the subset of bg_io_error.
+ ErrorHandlerBgErrorCount("rocksdb.error.handler.bg.error.count"),
+ ErrorHandlerBgIoErrorCount("rocksdb.error.handler.bg.io.error.count"),
+ ErrorHandlerBgRetryableIoErrorCount("rocksdb.error.handler.bg.retryable.io.error.count"),
+ ErrorHandlerAutoResumeCount("rocksdb.error.handler.autoresume.count"),
+ ErrorHandlerAutoResumeRetryTotalCount("rocksdb.error.handler.autoresume.retry.total.count"),
+ ErrorHandlerAutoResumeSuccessCount("rocksdb.error.handler.autoresume.success.count"),
+
+ /// Statistics for memtable garbage collection:
+ /// Raw bytes of data (payload) present on memtable at flush time.
+ MemtablePayloadBytesAtFlush("rocksdb.memtable.payload.bytes.at.flush"),
+ /// Outdated bytes of data present on memtable at flush time.
+ MemtableGarbageBytesAtFlush("rocksdb.memtable.garbage.bytes.at.flush"),
+
+ /// Bytes read by `VerifyChecksum()` and `VerifyFileChecksums()` APIs.
+ VerifyChecksumReadBytes("rocksdb.verify_checksum.read.bytes"),
+
+ /// Bytes read/written while creating backups
+ BackupReadBytes("rocksdb.backup.read.bytes"),
+ BackupWriteBytes("rocksdb.backup.write.bytes"),
+
+ /// Remote compaction read/write statistics
+ RemoteCompactReadBytes("rocksdb.remote.compact.read.bytes"),
+ RemoteCompactWriteBytes("rocksdb.remote.compact.write.bytes"),
+
+ /// Tiered storage related statistics
+ HotFileReadBytes("rocksdb.hot.file.read.bytes"),
+ WarmFileReadBytes("rocksdb.warm.file.read.bytes"),
+ ColdFileReadBytes("rocksdb.cold.file.read.bytes"),
+ HotFileReadCount("rocksdb.hot.file.read.count"),
+ WarmFileReadCount("rocksdb.warm.file.read.count"),
+ ColdFileReadCount("rocksdb.cold.file.read.count"),
+
+ /// Last level and non-last level read statistics
+ LastLevelReadBytes("rocksdb.last.level.read.bytes"),
+ LastLevelReadCount("rocksdb.last.level.read.count"),
+ NonLastLevelReadBytes("rocksdb.non.last.level.read.bytes"),
+ NonLastLevelReadCount("rocksdb.non.last.level.read.count"),
+
+ /// Statistics on iterator Seek() (and variants) for each sorted run. I.e. a
+ /// single user Seek() can result in many sorted run Seek()s.
+ /// The stats are split between last level and non-last level.
+ /// Filtered: a filter such as prefix Bloom filter indicate the Seek() would
+ /// not find anything relevant, so avoided a likely access to data+index
+ /// blocks.
+ LastLevelSeekFiltered("rocksdb.last.level.seek.filtered"),
+ /// Filter match: a filter such as prefix Bloom filter was queried but did
+ /// not filter out the seek.
+ LastLevelSeekFilterMatch("rocksdb.last.level.seek.filter.match"),
+ /// At least one data block was accessed for a Seek() (or variant) on a
+ /// sorted run.
+ LastLevelSeekData("rocksdb.last.level.seek.data"),
+ /// At least one value() was accessed for the seek (suggesting it was useful),
+ /// and no filter such as prefix Bloom was queried.
+ LastLevelSeekDataUsefulNoFilter("rocksdb.last.level.seek.data.useful.no.filter"),
+ /// At least one value() was accessed for the seek (suggesting it was useful),
+ /// after querying a filter such as prefix Bloom.
+ LastLevelSeekDataUsefulFilterMatch("rocksdb.last.level.seek.data.useful.filter.match"),
+ /// The same set of stats, but for non-last level seeks.
+ NonLastLevelSeekFiltered("rocksdb.non.last.level.seek.filtered"),
+ NonLastLevelSeekFilterMatch("rocksdb.non.last.level.seek.filter.match"),
+ NonLastLevelSeekData("rocksdb.non.last.level.seek.data"),
+ NonLastLevelSeekDataUsefulNoFilter("rocksdb.non.last.level.seek.data.useful.no.filter"),
+ NonLastLevelSeekDataUsefulFilterMatch("rocksdb.non.last.level.seek.data.useful.filter.match"),
+
+ /// Number of block checksum verifications
+ BlockChecksumComputeCount("rocksdb.block.checksum.compute.count"),
+ /// Number of times RocksDB detected a corruption while verifying a block
+ /// checksum. RocksDB does not remember corruptions that happened during user
+ /// reads so the same block corruption may be detected multiple times.
+ BlockChecksumMismatchCount("rocksdb.block.checksum.mismatch.count"),
+
+ MultigetCoroutineCount("rocksdb.multiget.coroutine.count"),
+
+ /// Time spent in the ReadAsync file system call
+ ReadAsyncMicros("rocksdb.read.async.micros"),
+ /// Number of errors returned to the async read callback
+ AsyncReadErrorCount("rocksdb.async.read.error.count"),
+
+ /// Number of lookup into the prefetched tail (see
+ /// `TableOpenPrefetchTailReadBytes`)
+ /// that can't find its data for table open
+ TableOpenPrefetchTailMiss("rocksdb.table.open.prefetch.tail.miss"),
+ /// Number of lookup into the prefetched tail (see
+ /// `TableOpenPrefetchTailReadBytes`)
+ /// that finds its data for table open
+ TableOpenPrefetchTailHit("rocksdb.table.open.prefetch.tail.hit"),
+
+ /// Statistics on the filtering by user-defined timestamps
+ /// # of times timestamps are checked on accessing the table
+ TimestampFilterTableChecked("rocksdb.timestamp.filter.table.checked"),
+ /// # of times timestamps can successfully help skip the table access
+ TimestampFilterTableFiltered("rocksdb.timestamp.filter.table.filtered"),
+
+ /// Number of times readahead is trimmed during scans when
+ /// ReadOptions.auto_readahead_size is set.
+ ReadAheadTrimmed("rocksdb.readahead.trimmed"),
+
+ /// Number of Fifo compactions that drop files based on different reasons
+ FifoMaxSizeCompactions("rocksdb.fifo.max.size.compactions"),
+ FifoTtlCompactions("rocksdb.fifo.ttl.compactions"),
+
+ /// Number of bytes prefetched during user initiated scan
+ PrefetchBytes("rocksdb.prefetch.bytes"),
+
+ /// Number of prefetched bytes that were actually useful
+ PrefetchBytesUseful("rocksdb.prefetch.bytes.useful"),
+
+ /// Number of FS reads avoided due to scan prefetching
+ PrefetchHits("rocksdb.prefetch.hits"),
+ }
+}
+
+iterable_named_enum! {
+ #[derive(Debug, Copy, Clone, PartialEq, Eq)]
+ #[repr(u32)]
+ pub enum Histogram {
+ DbGet("rocksdb.db.get.micros") = 0,
+ DbWrite("rocksdb.db.write.micros"),
+ CompactionTime("rocksdb.compaction.times.micros"),
+ CompactionCpuTime("rocksdb.compaction.times.cpu_micros"),
+ SubcompactionSetupTime("rocksdb.subcompaction.setup.times.micros"),
+ TableSyncMicros("rocksdb.table.sync.micros"),
+ CompactionOutfileSyncMicros("rocksdb.compaction.outfile.sync.micros"),
+ WalFileSyncMicros("rocksdb.wal.file.sync.micros"),
+ ManifestFileSyncMicros("rocksdb.manifest.file.sync.micros"),
+ /// Time spent in IO during table open
+ TableOpenIoMicros("rocksdb.table.open.io.micros"),
+ DbMultiget("rocksdb.db.multiget.micros"),
+ ReadBlockCompactionMicros("rocksdb.read.block.compaction.micros"),
+ ReadBlockGetMicros("rocksdb.read.block.get.micros"),
+ WriteRawBlockMicros("rocksdb.write.raw.block.micros"),
+ NumFilesInSingleCompaction("rocksdb.numfiles.in.singlecompaction"),
+ DbSeek("rocksdb.db.seek.micros"),
+ WriteStall("rocksdb.db.write.stall"),
+ /// Time spent in reading block-based or plain SST table
+ SstReadMicros("rocksdb.sst.read.micros"),
+ /// Time spent in reading SST table (currently only block-based table) or blob
+ /// file corresponding to `Env::IOActivity`
+ FileReadFlushMicros("rocksdb.file.read.flush.micros"),
+ FileReadCompactionMicros("rocksdb.file.read.compaction.micros"),
+ FileReadDbOpenMicros("rocksdb.file.read.db.open.micros"),
+ /// The following `FILE_READ_*` require stats level greater than
+ /// `StatsLevel::kExceptDetailedTimers`
+ FileReadGetMicros("rocksdb.file.read.get.micros"),
+ FileReadMultigetMicros("rocksdb.file.read.multiget.micros"),
+ FileReadDbIteratorMicros("rocksdb.file.read.db.iterator.micros"),
+ FileReadVerifyDbChecksumMicros("rocksdb.file.read.verify.db.checksum.micros"),
+ FileReadVerifyFileChecksumsMicros("rocksdb.file.read.verify.file.checksums.micros"),
+
+ // Time spent in writing SST files
+ SstWriteMicros("rocksdb.sst.write.micros"),
+ // Time spent in writing SST table (currently only block-based table) or blob
+ // file for flush, compaction or db open
+ FileWriteFlushMicros("rocksdb.file.write.flush.micros"),
+ FileWriteCompactionMicros("rocksdb.file.write.compaction.micros"),
+ FileWriteDbOpenMicros("rocksdb.file.write.db.open.micros"),
+
+ /// The number of subcompactions actually scheduled during a compaction
+ NumSubcompactionsScheduled("rocksdb.num.subcompactions.scheduled"),
+ /// Value size distribution in each operation
+ BytesPerRead("rocksdb.bytes.per.read"),
+ BytesPerWrite("rocksdb.bytes.per.write"),
+ BytesPerMultiget("rocksdb.bytes.per.multiget"),
+
+ CompressionTimesNanos("rocksdb.compression.times.nanos"),
+ DecompressionTimesNanos("rocksdb.decompression.times.nanos"),
+ /// Number of merge operands passed to the merge operator in user read
+ /// requests.
+ ReadNumMergeOperands("rocksdb.read.num.merge_operands"),
+ /// BlobDB specific stats
+ /// Size of keys written to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbKeySize("rocksdb.blobdb.key.size"),
+ /// Size of values written to BlobDB. Only applicable to legacy BlobDB.
+ BlobDbValueSize("rocksdb.blobdb.value.size"),
+ /// BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
+ /// BlobDB.
+ BlobDbWriteMicros("rocksdb.blobdb.write.micros"),
+ /// BlobDB Get latency. Only applicable to legacy BlobDB.
+ BlobDbGetMicros("rocksdb.blobdb.get.micros"),
+ /// BlobDB MultiGet latency. Only applicable to legacy BlobDB.
+ BlobDbMultigetMicros("rocksdb.blobdb.multiget.micros"),
+ /// BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. Only applicable to
+ /// legacy BlobDB.
+ BlobDbSeekMicros("rocksdb.blobdb.seek.micros"),
+ /// BlobDB Next latency. Only applicable to legacy BlobDB.
+ BlobDbNextMicros("rocksdb.blobdb.next.micros"),
+ /// BlobDB Prev latency. Only applicable to legacy BlobDB.
+ BlobDbPrevMicros("rocksdb.blobdb.prev.micros"),
+ /// Blob file write latency.
+ BlobDbBlobFileWriteMicros("rocksdb.blobdb.blob.file.write.micros"),
+ /// Blob file read latency.
+ BlobDbBlobFileReadMicros("rocksdb.blobdb.blob.file.read.micros"),
+ /// Blob file sync latency.
+ BlobDbBlobFileSyncMicros("rocksdb.blobdb.blob.file.sync.micros"),
+ /// BlobDB compression time.
+ BlobDbCompressionMicros("rocksdb.blobdb.compression.micros"),
+ /// BlobDB decompression time.
+ BlobDbDecompressionMicros("rocksdb.blobdb.decompression.micros"),
+ /// Time spent flushing memtable to disk
+ FlushTime("rocksdb.db.flush.micros"),
+ SstBatchSize("rocksdb.sst.batch.size"),
+ /// MultiGet stats logged per level
+ /// Num of index and filter blocks read from file system per level.
+ NumIndexAndFilterBlocksReadPerLevel("rocksdb.num.index.and.filter.blocks.read.per.level"),
+ /// Num of sst files read from file system per level.
+ NumSstReadPerLevel("rocksdb.num.sst.read.per.level"),
+ /// Error handler statistics
+ ErrorHandlerAutoresumeRetryCount("rocksdb.error.handler.autoresume.retry.count"),
+ /// Stats related to asynchronous read requests.
+ AsyncReadBytes("rocksdb.async.read.bytes"),
+ PollWaitMicros("rocksdb.poll.wait.micros"),
+ /// Number of prefetched bytes discarded by RocksDB.
+ PrefetchedBytesDiscarded("rocksdb.prefetched.bytes.discarded"),
+ /// Number of IOs issued in parallel in a MultiGet batch
+ MultigetIoBatchSize("rocksdb.multiget.io.batch.size"),
+ /// Number of levels requiring IO for MultiGet
+ NumLevelReadPerMultiget("rocksdb.num.level.read.per.multiget"),
+ /// Wait time for aborting async read in FilePrefetchBuffer destructor
+ AsyncPrefetchAbortMicros("rocksdb.async.prefetch.abort.micros"),
+ /// Number of bytes read for RocksDB's prefetching contents (as opposed to file
+ /// system's prefetch) from the end of SST table during block based table open
+ TableOpenPrefetchTailReadBytes("rocksdb.table.open.prefetch.tail.read.bytes"),
+ }
+}
+
+pub struct HistogramData {
+ pub(crate) inner: *mut ffi::rocksdb_statistics_histogram_data_t,
+}
+
+impl HistogramData {
+ pub fn new() -> HistogramData {
+ HistogramData::default()
+ }
+ pub fn median(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_median(self.inner) }
+ }
+ pub fn average(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_average(self.inner) }
+ }
+ pub fn p95(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_p95(self.inner) }
+ }
+ pub fn p99(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_p99(self.inner) }
+ }
+ pub fn max(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_max(self.inner) }
+ }
+ pub fn min(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_min(self.inner) }
+ }
+ pub fn sum(&self) -> u64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_sum(self.inner) }
+ }
+ pub fn count(&self) -> u64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_count(self.inner) }
+ }
+ pub fn std_dev(&self) -> f64 {
+ unsafe { ffi::rocksdb_statistics_histogram_data_get_std_dev(self.inner) }
+ }
+}
+
+impl Default for HistogramData {
+ fn default() -> Self {
+ let histogram_data_inner = unsafe { ffi::rocksdb_statistics_histogram_data_create() };
+ assert!(
+ !histogram_data_inner.is_null(),
+ "Could not create RocksDB histogram data"
+ );
+
+ Self {
+ inner: histogram_data_inner,
+ }
+ }
+}
+
+impl Drop for HistogramData {
+ fn drop(&mut self) {
+ unsafe {
+ ffi::rocksdb_statistics_histogram_data_destroy(self.inner);
+ }
+ }
+}
+
+#[test]
+fn sanity_checks() {
+ let want = "rocksdb.async.read.bytes";
+ assert_eq!(want, Histogram::AsyncReadBytes.name());
+
+ let want = "rocksdb.block.cache.index.miss";
+ assert_eq!(want, Ticker::BlockCacheIndexMiss.to_string());
+
+ // assert enum lengths
+ assert_eq!(Ticker::iter().count(), 211 /* TICKER_ENUM_MAX */);
+ assert_eq!(Histogram::iter().count(), 62 /* HISTOGRAM_ENUM_MAX */);
+}
diff --git a/src/transactions/optimistic_transaction_db.rs b/src/transactions/optimistic_transaction_db.rs
index 5642594..89a061c 100644
--- a/src/transactions/optimistic_transaction_db.rs
+++ b/src/transactions/optimistic_transaction_db.rs
@@ -15,12 +15,15 @@
use std::{collections::BTreeMap, ffi::CString, fs, iter, marker::PhantomData, path::Path, ptr};
-use libc::{c_char, c_int};
+use libc::{c_char, c_int, size_t};
use crate::{
- db::DBCommon, db::DBInner, ffi, ffi_util::to_cpath, write_batch::WriteBatchWithTransaction,
- ColumnFamilyDescriptor, Error, OptimisticTransactionOptions, Options, ThreadMode, Transaction,
- WriteOptions, DEFAULT_COLUMN_FAMILY_NAME,
+ db::{DBCommon, DBInner},
+ ffi,
+ ffi_util::to_cpath,
+ write_batch::WriteBatchWithTransaction,
+ AsColumnFamilyRef, ColumnFamilyDescriptor, Error, OptimisticTransactionOptions, Options,
+ ThreadMode, Transaction, WriteOptions, DEFAULT_COLUMN_FAMILY_NAME,
};
/// A type alias to RocksDB Optimistic Transaction DB.
@@ -42,7 +45,7 @@ use crate::{
/// {
/// let db: OptimisticTransactionDB = OptimisticTransactionDB::open_default(path).unwrap();
/// db.put(b"my key", b"my value").unwrap();
-///
+///
/// // create transaction
/// let txn = db.transaction();
/// txn.put(b"key2", b"value2");
@@ -97,6 +100,9 @@ impl<T: ThreadMode> OptimisticTransactionDB<T> {
/// Opens a database with the given database options and column family names.
///
/// Column families opened using this function will be created with default `Options`.
+ /// *NOTE*: `default` column family will be opened with the `Options::default()`.
+ /// If you want to open `default` column family with custom options, use `open_cf_descriptors` and
+ /// provide a `ColumnFamilyDescriptor` with the desired options.
pub fn open_cf<P, I, N>(opts: &Options, path: P, cfs: I) -> Result<Self, Error>
where
P: AsRef<Path>,
@@ -290,4 +296,39 @@ impl<T: ThreadMode> OptimisticTransactionDB<T> {
wo.disable_wal(true);
self.write_opt(batch, &wo)
}
+
+ /// Removes the database entries in the range `["from", "to")` using given write options.
+ pub fn delete_range_cf_opt<K: AsRef<[u8]>>(
+ &self,
+ cf: &impl AsColumnFamilyRef,
+ from: K,
+ to: K,
+ writeopts: &WriteOptions,
+ ) -> Result<(), Error> {
+ let from = from.as_ref();
+ let to = to.as_ref();
+
+ unsafe {
+ ffi_try!(ffi::rocksdb_delete_range_cf(
+ self.inner.inner(),
+ writeopts.inner,
+ cf.inner(),
+ from.as_ptr() as *const c_char,
+ from.len() as size_t,
+ to.as_ptr() as *const c_char,
+ to.len() as size_t,
+ ));
+ Ok(())
+ }
+ }
+
+ /// Removes the database entries in the range `["from", "to")` using default write options.
+ pub fn delete_range_cf<K: AsRef<[u8]>>(
+ &self,
+ cf: &impl AsColumnFamilyRef,
+ from: K,
+ to: K,
+ ) -> Result<(), Error> {
+ self.delete_range_cf_opt(cf, from, to, &WriteOptions::default())
+ }
}
diff --git a/src/transactions/transaction_db.rs b/src/transactions/transaction_db.rs
index c4f81d8..365ac2b 100644
--- a/src/transactions/transaction_db.rs
+++ b/src/transactions/transaction_db.rs
@@ -300,8 +300,8 @@ impl<T: ThreadMode> TransactionDB<T> {
let mut cnt = 0;
let ptr = ffi::rocksdb_transactiondb_get_prepared_transactions(db, &mut cnt);
let mut vec = vec![std::ptr::null_mut(); cnt];
- std::ptr::copy_nonoverlapping(ptr, vec.as_mut_ptr(), cnt);
if !ptr.is_null() {
+ std::ptr::copy_nonoverlapping(ptr, vec.as_mut_ptr(), cnt);
ffi::rocksdb_free(ptr as *mut c_void);
}
vec
diff --git a/tests/fail/checkpoint_outlive_db.stderr b/tests/fail/checkpoint_outlive_db.stderr
index a8ff0cc..6d16fa8 100644
--- a/tests/fail/checkpoint_outlive_db.stderr
+++ b/tests/fail/checkpoint_outlive_db.stderr
@@ -4,6 +4,7 @@ error[E0597]: `db` does not live long enough
4 | let _checkpoint = {
| ----------- borrow later stored here
5 | let db = DB::open_default("foo").unwrap();
+ | -- binding `db` declared here
6 | Checkpoint::new(&db)
| ^^^ borrowed value does not live long enough
7 | };
diff --git a/tests/fail/iterator_outlive_db.stderr b/tests/fail/iterator_outlive_db.stderr
index 9fbaf15..3eb3085 100644
--- a/tests/fail/iterator_outlive_db.stderr
+++ b/tests/fail/iterator_outlive_db.stderr
@@ -4,6 +4,7 @@ error[E0597]: `db` does not live long enough
4 | let _iter = {
| ----- borrow later stored here
5 | let db = DB::open_default("foo").unwrap();
+ | -- binding `db` declared here
6 | db.iterator(IteratorMode::Start)
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ borrowed value does not live long enough
7 | };
diff --git a/tests/fail/open_with_multiple_refs_as_single_threaded.stderr b/tests/fail/open_with_multiple_refs_as_single_threaded.stderr
index 0f17e9d..984ef88 100644
--- a/tests/fail/open_with_multiple_refs_as_single_threaded.stderr
+++ b/tests/fail/open_with_multiple_refs_as_single_threaded.stderr
@@ -1,17 +1,21 @@
error[E0596]: cannot borrow `*db_ref1` as mutable, as it is behind a `&` reference
--> tests/fail/open_with_multiple_refs_as_single_threaded.rs:8:5
|
-5 | let db_ref1 = &db;
- | --- help: consider changing this to be a mutable reference: `&mut db`
-...
8 | db_ref1.create_cf("cf1", &opts).unwrap();
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `db_ref1` is a `&` reference, so the data it refers to cannot be borrowed as mutable
+ |
+help: consider changing this to be a mutable reference
+ |
+5 | let db_ref1 = &mut db;
+ | ~~~~~~~
error[E0596]: cannot borrow `*db_ref2` as mutable, as it is behind a `&` reference
--> tests/fail/open_with_multiple_refs_as_single_threaded.rs:9:5
|
-6 | let db_ref2 = &db;
- | --- help: consider changing this to be a mutable reference: `&mut db`
-...
9 | db_ref2.create_cf("cf2", &opts).unwrap();
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `db_ref2` is a `&` reference, so the data it refers to cannot be borrowed as mutable
+ |
+help: consider changing this to be a mutable reference
+ |
+6 | let db_ref2 = &mut db;
+ | ~~~~~~~
diff --git a/tests/fail/snapshot_outlive_db.stderr b/tests/fail/snapshot_outlive_db.stderr
index e0ae012..05fbc56 100644
--- a/tests/fail/snapshot_outlive_db.stderr
+++ b/tests/fail/snapshot_outlive_db.stderr
@@ -4,6 +4,7 @@ error[E0597]: `db` does not live long enough
4 | let _snapshot = {
| --------- borrow later stored here
5 | let db = DB::open_default("foo").unwrap();
+ | -- binding `db` declared here
6 | db.snapshot()
| ^^^^^^^^^^^^^ borrowed value does not live long enough
7 | };
diff --git a/tests/fail/snapshot_outlive_transaction.stderr b/tests/fail/snapshot_outlive_transaction.stderr
index 653c1a8..9f5305b 100644
--- a/tests/fail/snapshot_outlive_transaction.stderr
+++ b/tests/fail/snapshot_outlive_transaction.stderr
@@ -4,6 +4,7 @@ error[E0597]: `txn` does not live long enough
5 | let _snapshot = {
| --------- borrow later stored here
6 | let txn = db.transaction();
+ | --- binding `txn` declared here
7 | txn.snapshot()
| ^^^^^^^^^^^^^^ borrowed value does not live long enough
8 | };
diff --git a/tests/fail/snapshot_outlive_transaction_db.stderr b/tests/fail/snapshot_outlive_transaction_db.stderr
index d07341f..a9721b6 100644
--- a/tests/fail/snapshot_outlive_transaction_db.stderr
+++ b/tests/fail/snapshot_outlive_transaction_db.stderr
@@ -4,6 +4,7 @@ error[E0597]: `db` does not live long enough
4 | let _snapshot = {
| --------- borrow later stored here
5 | let db = TransactionDB::<SingleThreaded>::open_default("foo").unwrap();
+ | -- binding `db` declared here
6 | db.snapshot()
| ^^^^^^^^^^^^^ borrowed value does not live long enough
7 | };
diff --git a/tests/fail/transaction_outlive_transaction_db.stderr b/tests/fail/transaction_outlive_transaction_db.stderr
index 45237a3..8a8d343 100644
--- a/tests/fail/transaction_outlive_transaction_db.stderr
+++ b/tests/fail/transaction_outlive_transaction_db.stderr
@@ -4,6 +4,7 @@ error[E0597]: `db` does not live long enough
4 | let _txn = {
| ---- borrow later stored here
5 | let db = TransactionDB::<SingleThreaded>::open_default("foo").unwrap();
+ | -- binding `db` declared here
6 | db.transaction()
| ^^^^^^^^^^^^^^^^ borrowed value does not live long enough
7 | };
diff --git a/tests/test_backup.rs b/tests/test_backup.rs
index b1dff2e..5109e52 100644
--- a/tests/test_backup.rs
+++ b/tests/test_backup.rs
@@ -107,3 +107,10 @@ fn restore_from_backup() {
}
}
}
+
+fn assert_send_generic<T: Send>() {}
+
+#[test]
+fn assert_send() {
+ assert_send_generic::<BackupEngine>();
+}
diff --git a/tests/test_db.rs b/tests/test_db.rs
index e7eec0c..62a5847 100644
--- a/tests/test_db.rs
+++ b/tests/test_db.rs
@@ -19,6 +19,7 @@ use std::{mem, sync::Arc, thread, time::Duration};
use pretty_assertions::assert_eq;
+use rocksdb::statistics::{Histogram, StatsLevel, Ticker};
use rocksdb::{
perf::get_memory_usage_stats, BlockBasedOptions, BottommostLevelCompaction, Cache,
ColumnFamilyDescriptor, CompactOptions, CuckooTableOptions, DBAccess, DBCompactionStyle,
@@ -377,6 +378,34 @@ fn set_option_cf_test() {
}
#[test]
+fn get_statistics_test() {
+ let path = DBPath::new("_rust_rocksdb_get_statisticstest");
+ {
+ let mut opts = Options::default();
+ opts.create_if_missing(true);
+ opts.create_missing_column_families(true);
+ opts.enable_statistics();
+ opts.set_statistics_level(StatsLevel::All);
+ let db = DB::open_cf(&opts, &path, vec!["cf1"]).unwrap();
+ let cf = db.cf_handle("cf1").unwrap();
+
+ let initial_bytes_written = opts.get_ticker_count(Ticker::BytesWritten);
+ db.put_cf(&cf, b"key1", b"value").unwrap();
+ db.put_cf(&cf, b"key2", b"value").unwrap();
+ db.put_cf(&cf, b"key3", b"value").unwrap();
+ db.flush_cf(&cf).unwrap();
+
+ assert!(opts.get_ticker_count(Ticker::BytesWritten) > 0);
+ // We should see some counters increased
+ assert!(opts.get_ticker_count(Ticker::BytesWritten) > initial_bytes_written);
+
+ let histogram_data = opts.get_histogram_data(Histogram::DbWrite);
+ assert!(histogram_data.count() > 0);
+ assert!(histogram_data.max().is_normal());
+ }
+}
+
+#[test]
fn set_column_family_metadata_test() {
let path = DBPath::new("_set_column_family_metadata_test");
{
diff --git a/tests/test_optimistic_transaction_db.rs b/tests/test_optimistic_transaction_db.rs
index 61c86de..036ab92 100644
--- a/tests/test_optimistic_transaction_db.rs
+++ b/tests/test_optimistic_transaction_db.rs
@@ -581,3 +581,31 @@ fn transaction_snapshot() {
assert_eq!(snapshot.get(b"k3").unwrap().unwrap(), b"v3");
}
}
+
+#[test]
+fn delete_range_test() {
+ let path = DBPath::new("_rust_rocksdb_optimistic_transaction_db_delete_range_test");
+ {
+ let mut opts = Options::default();
+ opts.create_if_missing(true);
+ opts.create_missing_column_families(true);
+
+ let cfs = vec!["cf1"];
+ let db: OptimisticTransactionDB =
+ OptimisticTransactionDB::open_cf(&opts, &path, cfs).unwrap();
+
+ let cf1 = db.cf_handle("cf1").unwrap();
+ db.put_cf(&cf1, b"k1", b"v1").unwrap();
+ db.put_cf(&cf1, b"k2", b"v2").unwrap();
+ db.put_cf(&cf1, b"k3", b"v3").unwrap();
+ db.put_cf(&cf1, b"k4", b"v4").unwrap();
+ db.put_cf(&cf1, b"k5", b"v5").unwrap();
+
+ db.delete_range_cf(&cf1, b"k2", b"k4").unwrap();
+ assert_eq!(db.get_cf(&cf1, b"k1").unwrap().unwrap(), b"v1");
+ assert_eq!(db.get_cf(&cf1, b"k4").unwrap().unwrap(), b"v4");
+ assert_eq!(db.get_cf(&cf1, b"k5").unwrap().unwrap(), b"v5");
+ assert!(db.get_cf(&cf1, b"k2").unwrap().is_none());
+ assert!(db.get_cf(&cf1, b"k3").unwrap().is_none());
+ }
+}