summaryrefslogtreecommitdiff
path: root/src/db_options.rs
diff options
context:
space:
mode:
authorRichard Westhaver <ellis@rwest.io>2024-05-13 00:00:06 +0000
committerRichard Westhaver <ellis@rwest.io>2024-05-13 00:00:06 +0000
commit46aa698931386d924b011ab283ed9c06d9979edb (patch)
tree96acbd95f0cad5d088a1c91702231ab525fee92f /src/db_options.rs
parent265259448e454817d199a968a8c590c3ca1da947 (diff)
parentdaaaf85fffb1c981aa93ca418b380ea2ea91aac3 (diff)
merge upstreamHEADdefault
Diffstat (limited to 'src/db_options.rs')
-rw-r--r--src/db_options.rs222
1 files changed, 188 insertions, 34 deletions
diff --git a/src/db_options.rs b/src/db_options.rs
index 2db5bcb..2ee25b3 100644
--- a/src/db_options.rs
+++ b/src/db_options.rs
@@ -20,6 +20,7 @@ use std::sync::Arc;
use libc::{self, c_char, c_double, c_int, c_uchar, c_uint, c_void, size_t};
+use crate::statistics::{Histogram, HistogramData, StatsLevel};
use crate::{
compaction_filter::{self, CompactionFilterCallback, CompactionFilterFn},
compaction_filter_factory::{self, CompactionFilterFactory},
@@ -32,9 +33,110 @@ use crate::{
self, full_merge_callback, partial_merge_callback, MergeFn, MergeOperatorCallback,
},
slice_transform::SliceTransform,
+ statistics::Ticker,
ColumnFamilyDescriptor, Error, SnapshotWithThreadMode,
};
+pub(crate) struct WriteBufferManagerWrapper {
+ pub(crate) inner: NonNull<ffi::rocksdb_write_buffer_manager_t>,
+}
+
+impl Drop for WriteBufferManagerWrapper {
+ fn drop(&mut self) {
+ unsafe {
+ ffi::rocksdb_write_buffer_manager_destroy(self.inner.as_ptr());
+ }
+ }
+}
+
+#[derive(Clone)]
+pub struct WriteBufferManager(pub(crate) Arc<WriteBufferManagerWrapper>);
+
+impl WriteBufferManager {
+ /// <https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager>
+ /// Write buffer manager helps users control the total memory used by memtables across multiple column families and/or DB instances.
+ /// Users can enable this control by 2 ways:
+ ///
+ /// 1- Limit the total memtable usage across multiple column families and DBs under a threshold.
+ /// 2- Cost the memtable memory usage to block cache so that memory of RocksDB can be capped by the single limit.
+ /// The usage of a write buffer manager is similar to rate_limiter and sst_file_manager.
+ /// Users can create one write buffer manager object and pass it to all the options of column families or DBs whose memtable size they want to be controlled by this object.
+ ///
+ /// A memory limit is given when creating the write buffer manager object. RocksDB will try to limit the total memory to under this limit.
+ ///
+ /// a flush will be triggered on one column family of the DB you are inserting to,
+ ///
+ /// If mutable memtable size exceeds about 90% of the limit,
+ /// If the total memory is over the limit, more aggressive flush may also be triggered only if the mutable memtable size also exceeds 50% of the limit.
+ /// Both checks are needed because if already more than half memory is being flushed, triggering more flush may not help.
+ ///
+ /// The total memory is counted as total memory allocated in the arena, even if some of that may not yet be used by memtable.
+ ///
+ /// buffer_size: the memory limit in bytes.
+ /// allow_stall: If set true, it will enable stalling of all writers when memory usage exceeds buffer_size (soft limit).
+ /// It will wait for flush to complete and memory usage to drop down
+ pub fn new_write_buffer_manager(buffer_size: size_t, allow_stall: bool) -> Self {
+ let inner = NonNull::new(unsafe {
+ ffi::rocksdb_write_buffer_manager_create(buffer_size, allow_stall)
+ })
+ .unwrap();
+ WriteBufferManager(Arc::new(WriteBufferManagerWrapper { inner }))
+ }
+
+ /// Users can set up RocksDB to cost memory used by memtables to block cache.
+ /// This can happen no matter whether you enable memtable memory limit or not.
+ /// This option is added to manage memory (memtables + block cache) under a single limit.
+ ///
+ /// buffer_size: the memory limit in bytes.
+ /// allow_stall: If set true, it will enable stalling of all writers when memory usage exceeds buffer_size (soft limit).
+ /// It will wait for flush to complete and memory usage to drop down
+ /// cache: the block cache instance
+ pub fn new_write_buffer_manager_with_cache(
+ buffer_size: size_t,
+ allow_stall: bool,
+ cache: Cache,
+ ) -> Self {
+ let inner = NonNull::new(unsafe {
+ ffi::rocksdb_write_buffer_manager_create_with_cache(
+ buffer_size,
+ cache.0.inner.as_ptr(),
+ allow_stall,
+ )
+ })
+ .unwrap();
+ WriteBufferManager(Arc::new(WriteBufferManagerWrapper { inner }))
+ }
+
+ /// Returns the WriteBufferManager memory usage in bytes.
+ pub fn get_usage(&self) -> usize {
+ unsafe { ffi::rocksdb_write_buffer_manager_memory_usage(self.0.inner.as_ptr()) }
+ }
+
+ /// Returns the current buffer size in bytes.
+ pub fn get_buffer_size(&self) -> usize {
+ unsafe { ffi::rocksdb_write_buffer_manager_buffer_size(self.0.inner.as_ptr()) }
+ }
+
+ /// Set the buffer size in bytes.
+ pub fn set_buffer_size(&self, new_size: usize) {
+ unsafe {
+ ffi::rocksdb_write_buffer_manager_set_buffer_size(self.0.inner.as_ptr(), new_size);
+ }
+ }
+
+ /// Returns if WriteBufferManager is enabled.
+ pub fn enabled(&self) -> bool {
+ unsafe { ffi::rocksdb_write_buffer_manager_enabled(self.0.inner.as_ptr()) }
+ }
+
+ /// set the allow_stall flag.
+ pub fn set_allow_stall(&self, allow_stall: bool) {
+ unsafe {
+ ffi::rocksdb_write_buffer_manager_set_allow_stall(self.0.inner.as_ptr(), allow_stall);
+ }
+ }
+}
+
pub(crate) struct CacheWrapper {
pub(crate) inner: NonNull<ffi::rocksdb_cache_t>,
}
@@ -109,6 +211,7 @@ pub(crate) struct OptionsMustOutliveDB {
env: Option<Env>,
row_cache: Option<Cache>,
block_based: Option<BlockBasedOptionsMustOutliveDB>,
+ write_buffer_manager: Option<WriteBufferManager>,
}
impl OptionsMustOutliveDB {
@@ -120,6 +223,10 @@ impl OptionsMustOutliveDB {
.block_based
.as_ref()
.map(BlockBasedOptionsMustOutliveDB::clone),
+ write_buffer_manager: self
+ .write_buffer_manager
+ .as_ref()
+ .map(WriteBufferManager::clone),
}
}
}
@@ -277,23 +384,27 @@ pub struct IngestExternalFileOptions {
// rocksdb internally does not rely on thread-local information for its user-exposed types.
unsafe impl Send for Options {}
unsafe impl Send for WriteOptions {}
+unsafe impl Send for FlushOptions {}
unsafe impl Send for BlockBasedOptions {}
unsafe impl Send for CuckooTableOptions {}
unsafe impl Send for ReadOptions {}
unsafe impl Send for IngestExternalFileOptions {}
unsafe impl Send for CacheWrapper {}
unsafe impl Send for CompactOptions {}
+unsafe impl Send for WriteBufferManagerWrapper {}
// Sync is similarly safe for many types because they do not expose interior mutability, and their
// use within the rocksdb library is generally behind a const reference
unsafe impl Sync for Options {}
unsafe impl Sync for WriteOptions {}
+unsafe impl Sync for FlushOptions {}
unsafe impl Sync for BlockBasedOptions {}
unsafe impl Sync for CuckooTableOptions {}
unsafe impl Sync for ReadOptions {}
unsafe impl Sync for IngestExternalFileOptions {}
unsafe impl Sync for CacheWrapper {}
unsafe impl Sync for CompactOptions {}
+unsafe impl Sync for WriteBufferManagerWrapper {}
impl Drop for Options {
fn drop(&mut self) {
@@ -1474,9 +1585,12 @@ impl Options {
}
}
- pub fn optimize_for_point_lookup(&mut self, cache_size: u64) {
+ // Use this if you don't need to keep the data sorted, i.e. you'll never use
+ // an iterator, only Put() and Get() API calls
+ //
+ pub fn optimize_for_point_lookup(&mut self, block_cache_size_mb: u64) {
unsafe {
- ffi::rocksdb_options_optimize_for_point_lookup(self.inner, cache_size);
+ ffi::rocksdb_options_optimize_for_point_lookup(self.inner, block_cache_size_mb);
}
}
@@ -2664,6 +2778,30 @@ impl Options {
}
}
+ /// StatsLevel can be used to reduce statistics overhead by skipping certain
+ /// types of stats in the stats collection process.
+ pub fn set_statistics_level(&self, level: StatsLevel) {
+ unsafe { ffi::rocksdb_options_set_statistics_level(self.inner, level as c_int) }
+ }
+
+ /// Returns the value of cumulative db counters if stat collection is enabled.
+ pub fn get_ticker_count(&self, ticker: Ticker) -> u64 {
+ unsafe { ffi::rocksdb_options_statistics_get_ticker_count(self.inner, ticker as u32) }
+ }
+
+ /// Gets Histogram data from collected db stats. Requires stats to be enabled.
+ pub fn get_histogram_data(&self, histogram: Histogram) -> HistogramData {
+ unsafe {
+ let data = HistogramData::default();
+ ffi::rocksdb_options_statistics_get_histogram_data(
+ self.inner,
+ histogram as u32,
+ data.inner,
+ );
+ data
+ }
+ }
+
/// If not zero, dump `rocksdb.stats` to LOG every `stats_dump_period_sec`.
///
/// Default: `600` (10 mins)
@@ -2711,17 +2849,6 @@ impl Options {
}
}
- /// Specifies the file access pattern once a compaction is started.
- ///
- /// It will be applied to all input files of a compaction.
- ///
- /// Default: Normal
- pub fn set_access_hint_on_compaction_start(&mut self, pattern: AccessHint) {
- unsafe {
- ffi::rocksdb_options_set_access_hint_on_compaction_start(self.inner, pattern as c_int);
- }
- }
-
/// Enable/disable adaptive mutex, which spins in the user space before resorting to kernel.
///
/// This could reduce context switch when the mutex is not
@@ -3229,6 +3356,24 @@ impl Options {
);
}
}
+
+ /// <https://github.com/facebook/rocksdb/wiki/Write-Buffer-Manager>
+ /// Write buffer manager helps users control the total memory used by memtables across multiple column families and/or DB instances.
+ /// Users can enable this control by 2 ways:
+ ///
+ /// 1- Limit the total memtable usage across multiple column families and DBs under a threshold.
+ /// 2- Cost the memtable memory usage to block cache so that memory of RocksDB can be capped by the single limit.
+ /// The usage of a write buffer manager is similar to rate_limiter and sst_file_manager.
+ /// Users can create one write buffer manager object and pass it to all the options of column families or DBs whose memtable size they want to be controlled by this object.
+ pub fn set_write_buffer_manager(&mut self, write_buffer_manager: &WriteBufferManager) {
+ unsafe {
+ ffi::rocksdb_options_set_write_buffer_manager(
+ self.inner,
+ write_buffer_manager.0.inner.as_ptr(),
+ );
+ }
+ self.outlive.write_buffer_manager = Some(write_buffer_manager.clone());
+ }
}
impl Default for Options {
@@ -3589,9 +3734,16 @@ impl ReadOptions {
}
}
- /// Automatically trim readahead size when iterating with an upper bound.
+ /// If auto_readahead_size is set to true, it will auto tune the readahead_size
+ /// during scans internally.
+ /// For this feature to be enabled, iterate_upper_bound must also be specified.
///
- /// Default: `false`
+ /// NOTE: - Recommended for forward Scans only.
+ /// - If there is a backward scans, this option will be
+ /// disabled internally and won't be enabled again if the forward scan
+ /// is issued again.
+ ///
+ /// Default: true
pub fn set_auto_readahead_size(&mut self, v: bool) {
unsafe {
ffi::rocksdb_readoptions_set_auto_readahead_size(self.inner, c_uchar::from(v));
@@ -3761,20 +3913,15 @@ pub enum ChecksumType {
}
/// Used in [`PlainTableFactoryOptions`].
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
pub enum KeyEncodingType {
/// Always write full keys.
+ #[default]
Plain = 0,
/// Find opportunities to write the same prefix for multiple rows.
Prefix = 1,
}
-impl Default for KeyEncodingType {
- fn default() -> Self {
- KeyEncodingType::Plain
- }
-}
-
/// Used with DBOptions::set_plain_table_factory.
/// See official [wiki](https://github.com/facebook/rocksdb/wiki/PlainTable-Format) for more
/// information.
@@ -3828,17 +3975,6 @@ pub enum DBRecoveryMode {
SkipAnyCorruptedRecord = ffi::rocksdb_skip_any_corrupted_records_recovery as isize,
}
-/// File access pattern once a compaction has started
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
-#[repr(i32)]
-pub enum AccessHint {
- None = 0,
- Normal,
- Sequential,
- WillNeed,
-}
-
pub struct FifoCompactOptions {
pub(crate) inner: *mut ffi::rocksdb_fifo_compaction_options_t,
}
@@ -4159,7 +4295,8 @@ impl Drop for DBPath {
#[cfg(test)]
mod tests {
- use crate::{MemtableFactory, Options};
+ use crate::db_options::WriteBufferManager;
+ use crate::{Cache, MemtableFactory, Options};
#[test]
fn test_enable_statistics() {
@@ -4194,4 +4331,21 @@ mod tests {
let opts = Options::default();
assert!(opts.get_statistics().is_none());
}
+
+ #[test]
+ fn test_set_write_buffer_manager() {
+ let mut opts = Options::default();
+ let lrucache = Cache::new_lru_cache(100);
+ let write_buffer_manager =
+ WriteBufferManager::new_write_buffer_manager_with_cache(100, false, lrucache);
+ assert_eq!(write_buffer_manager.get_buffer_size(), 100);
+ assert_eq!(write_buffer_manager.get_usage(), 0);
+ assert!(write_buffer_manager.enabled());
+
+ opts.set_write_buffer_manager(&write_buffer_manager);
+ drop(opts);
+
+ // WriteBufferManager outlives options
+ assert!(write_buffer_manager.enabled());
+ }
}