From 14eefe8f3dd1c7f28a8faf6ed5a8b15afddacd45 Mon Sep 17 00:00:00 2001 From: Philippe McLean Date: Fri, 20 Feb 2026 12:44:17 -0800 Subject: [PATCH] feat(metrics): export per-level SST file counts to Prometheus Adds rocksdb_num_files_at_level{db, level} gauge, polled every 5s via the existing stats exporter loop. Covers levels 0-6 (RocksDB default). Useful for monitoring L0 file accumulation during initial sync. --- src/new_index/db.rs | 92 +++++++++++++++++++++---------------- src/new_index/db_metrics.rs | 5 ++ 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/src/new_index/db.rs b/src/new_index/db.rs index fb1766508..595aa39aa 100644 --- a/src/new_index/db.rs +++ b/src/new_index/db.rs @@ -13,6 +13,8 @@ use crate::util::{bincode, spawn_thread, Bytes}; static DB_VERSION: u32 = 2; +const ROCKSDB_NUM_LEVELS: u32 = 7; + #[derive(Debug, Eq, PartialEq)] pub struct DBRow { pub key: Vec, @@ -137,11 +139,11 @@ impl DB { self.db.set_options(&opts).unwrap(); } - pub fn raw_iterator(&self) -> rocksdb::DBRawIterator { + pub fn raw_iterator(&self) -> rocksdb::DBRawIterator<'_> { self.db.raw_iterator() } - pub fn iter_scan(&self, prefix: &[u8]) -> ScanIterator { + pub fn iter_scan(&self, prefix: &[u8]) -> ScanIterator<'_> { ScanIterator { prefix: prefix.to_vec(), iter: self.db.prefix_iterator(prefix), @@ -149,7 +151,7 @@ impl DB { } } - pub fn iter_scan_from(&self, prefix: &[u8], start_at: &[u8]) -> ScanIterator { + pub fn iter_scan_from(&self, prefix: &[u8], start_at: &[u8]) -> ScanIterator<'_> { let iter = self.db.iterator(rocksdb::IteratorMode::From( start_at, rocksdb::Direction::Forward, @@ -161,7 +163,7 @@ impl DB { } } - pub fn iter_scan_reverse(&self, prefix: &[u8], prefix_max: &[u8]) -> ReverseScanIterator { + pub fn iter_scan_reverse(&self, prefix: &[u8], prefix_max: &[u8]) -> ReverseScanIterator<'_> { let mut iter = self.db.raw_iterator(); iter.seek_for_prev(prefix_max); @@ -257,48 +259,58 @@ impl DB { let db_arc = Arc::clone(&self.db); let label = db_name.to_string(); - let update_gauge = move |gauge: &GaugeVec, property: &str| { + let update_gauge = move |gauge: &GaugeVec, property: &str, extra_labels: &[&str]| { if let Ok(Some(value)) = db_arc.property_value(property) { - if let Ok(v) = value.parse::() { - gauge.with_label_values(&[&label]).set(v); + match value.parse::() { + Ok(v) => { + let mut label_values = vec![label.as_str()]; + label_values.extend_from_slice(extra_labels); + gauge.with_label_values(&label_values).set(v); + } + Err(e) => warn!("failed to parse RocksDB property '{}': {}", property, e), } } }; spawn_thread("db_stats_exporter", move || loop { - update_gauge(&db_metrics.num_immutable_mem_table, "rocksdb.num-immutable-mem-table"); - update_gauge(&db_metrics.mem_table_flush_pending, "rocksdb.mem-table-flush-pending"); - update_gauge(&db_metrics.compaction_pending, "rocksdb.compaction-pending"); - update_gauge(&db_metrics.background_errors, "rocksdb.background-errors"); - update_gauge(&db_metrics.cur_size_active_mem_table, "rocksdb.cur-size-active-mem-table"); - update_gauge(&db_metrics.cur_size_all_mem_tables, "rocksdb.cur-size-all-mem-tables"); - update_gauge(&db_metrics.size_all_mem_tables, "rocksdb.size-all-mem-tables"); - update_gauge(&db_metrics.num_entries_active_mem_table, "rocksdb.num-entries-active-mem-table"); - update_gauge(&db_metrics.num_entries_imm_mem_tables, "rocksdb.num-entries-imm-mem-tables"); - update_gauge(&db_metrics.num_deletes_active_mem_table, "rocksdb.num-deletes-active-mem-table"); - update_gauge(&db_metrics.num_deletes_imm_mem_tables, "rocksdb.num-deletes-imm-mem-tables"); - update_gauge(&db_metrics.estimate_num_keys, "rocksdb.estimate-num-keys"); - update_gauge(&db_metrics.estimate_table_readers_mem, "rocksdb.estimate-table-readers-mem"); - update_gauge(&db_metrics.is_file_deletions_enabled, "rocksdb.is-file-deletions-enabled"); - update_gauge(&db_metrics.num_snapshots, "rocksdb.num-snapshots"); - update_gauge(&db_metrics.oldest_snapshot_time, "rocksdb.oldest-snapshot-time"); - update_gauge(&db_metrics.num_live_versions, "rocksdb.num-live-versions"); - update_gauge(&db_metrics.current_super_version_number, "rocksdb.current-super-version-number"); - update_gauge(&db_metrics.estimate_live_data_size, "rocksdb.estimate-live-data-size"); - update_gauge(&db_metrics.min_log_number_to_keep, "rocksdb.min-log-number-to-keep"); - update_gauge(&db_metrics.min_obsolete_sst_number_to_keep, "rocksdb.min-obsolete-sst-number-to-keep"); - update_gauge(&db_metrics.total_sst_files_size, "rocksdb.total-sst-files-size"); - update_gauge(&db_metrics.live_sst_files_size, "rocksdb.live-sst-files-size"); - update_gauge(&db_metrics.base_level, "rocksdb.base-level"); - update_gauge(&db_metrics.estimate_pending_compaction_bytes, "rocksdb.estimate-pending-compaction-bytes"); - update_gauge(&db_metrics.num_running_compactions, "rocksdb.num-running-compactions"); - update_gauge(&db_metrics.num_running_flushes, "rocksdb.num-running-flushes"); - update_gauge(&db_metrics.actual_delayed_write_rate, "rocksdb.actual-delayed-write-rate"); - update_gauge(&db_metrics.is_write_stopped, "rocksdb.is-write-stopped"); - update_gauge(&db_metrics.estimate_oldest_key_time, "rocksdb.estimate-oldest-key-time"); - update_gauge(&db_metrics.block_cache_capacity, "rocksdb.block-cache-capacity"); - update_gauge(&db_metrics.block_cache_usage, "rocksdb.block-cache-usage"); - update_gauge(&db_metrics.block_cache_pinned_usage, "rocksdb.block-cache-pinned-usage"); + update_gauge(&db_metrics.num_immutable_mem_table, "rocksdb.num-immutable-mem-table", &[]); + update_gauge(&db_metrics.mem_table_flush_pending, "rocksdb.mem-table-flush-pending", &[]); + update_gauge(&db_metrics.compaction_pending, "rocksdb.compaction-pending", &[]); + update_gauge(&db_metrics.background_errors, "rocksdb.background-errors", &[]); + update_gauge(&db_metrics.cur_size_active_mem_table, "rocksdb.cur-size-active-mem-table", &[]); + update_gauge(&db_metrics.cur_size_all_mem_tables, "rocksdb.cur-size-all-mem-tables", &[]); + update_gauge(&db_metrics.size_all_mem_tables, "rocksdb.size-all-mem-tables", &[]); + update_gauge(&db_metrics.num_entries_active_mem_table, "rocksdb.num-entries-active-mem-table", &[]); + update_gauge(&db_metrics.num_entries_imm_mem_tables, "rocksdb.num-entries-imm-mem-tables", &[]); + update_gauge(&db_metrics.num_deletes_active_mem_table, "rocksdb.num-deletes-active-mem-table", &[]); + update_gauge(&db_metrics.num_deletes_imm_mem_tables, "rocksdb.num-deletes-imm-mem-tables", &[]); + update_gauge(&db_metrics.estimate_num_keys, "rocksdb.estimate-num-keys", &[]); + update_gauge(&db_metrics.estimate_table_readers_mem, "rocksdb.estimate-table-readers-mem", &[]); + update_gauge(&db_metrics.is_file_deletions_enabled, "rocksdb.is-file-deletions-enabled", &[]); + update_gauge(&db_metrics.num_snapshots, "rocksdb.num-snapshots", &[]); + update_gauge(&db_metrics.oldest_snapshot_time, "rocksdb.oldest-snapshot-time", &[]); + update_gauge(&db_metrics.num_live_versions, "rocksdb.num-live-versions", &[]); + update_gauge(&db_metrics.current_super_version_number, "rocksdb.current-super-version-number", &[]); + update_gauge(&db_metrics.estimate_live_data_size, "rocksdb.estimate-live-data-size", &[]); + update_gauge(&db_metrics.min_log_number_to_keep, "rocksdb.min-log-number-to-keep", &[]); + update_gauge(&db_metrics.min_obsolete_sst_number_to_keep, "rocksdb.min-obsolete-sst-number-to-keep", &[]); + update_gauge(&db_metrics.total_sst_files_size, "rocksdb.total-sst-files-size", &[]); + update_gauge(&db_metrics.live_sst_files_size, "rocksdb.live-sst-files-size", &[]); + update_gauge(&db_metrics.base_level, "rocksdb.base-level", &[]); + update_gauge(&db_metrics.estimate_pending_compaction_bytes, "rocksdb.estimate-pending-compaction-bytes", &[]); + update_gauge(&db_metrics.num_running_compactions, "rocksdb.num-running-compactions", &[]); + update_gauge(&db_metrics.num_running_flushes, "rocksdb.num-running-flushes", &[]); + update_gauge(&db_metrics.actual_delayed_write_rate, "rocksdb.actual-delayed-write-rate", &[]); + update_gauge(&db_metrics.is_write_stopped, "rocksdb.is-write-stopped", &[]); + update_gauge(&db_metrics.estimate_oldest_key_time, "rocksdb.estimate-oldest-key-time", &[]); + update_gauge(&db_metrics.block_cache_capacity, "rocksdb.block-cache-capacity", &[]); + update_gauge(&db_metrics.block_cache_usage, "rocksdb.block-cache-usage", &[]); + update_gauge(&db_metrics.block_cache_pinned_usage, "rocksdb.block-cache-pinned-usage", &[]); + for level in 0..ROCKSDB_NUM_LEVELS { + let level_str = level.to_string(); + let prop = format!("rocksdb.num-files-at-level{}", level); + update_gauge(&db_metrics.num_files_at_level, &prop, &[&level_str]); + } thread::sleep(Duration::from_secs(5)); }); } diff --git a/src/new_index/db_metrics.rs b/src/new_index/db_metrics.rs index e8df0db43..5e19439dc 100644 --- a/src/new_index/db_metrics.rs +++ b/src/new_index/db_metrics.rs @@ -49,6 +49,7 @@ pub struct RocksDbMetrics { // Level metrics pub base_level: GaugeVec, + pub num_files_at_level: GaugeVec, // Write metrics pub actual_delayed_write_rate: GaugeVec, @@ -204,6 +205,10 @@ impl RocksDbMetrics { format!("rocksdb_base_level"), "Base level for compaction." ), labels), + num_files_at_level: metrics.gauge_vec(MetricOpts::new( + "rocksdb_num_files_at_level", + "Number of SST files at each compaction level." + ), &["db", "level"]), // Write metrics actual_delayed_write_rate: metrics.gauge_vec(MetricOpts::new(