From da3acaaacb8b9f45895efc41f688f1d9a764082e Mon Sep 17 00:00:00 2001 From: fromost Date: Sun, 26 Oct 2025 01:31:57 +0800 Subject: [PATCH] Optimize rocksdb --- Cargo.lock | 17 +++++++++++++++++ Cargo.toml | 1 + src/cli/sync.rs | 37 ++++++++++++++++--------------------- src/constants.rs | 9 ++------- src/helpers/db.rs | 8 +++++--- src/helpers/mod.rs | 19 +++++++++++++++++++ 6 files changed, 60 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7b5ab22..9931d1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1054,6 +1054,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "html5ever" version = "0.35.0" @@ -1785,6 +1791,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "num_threads" version = "0.1.7" @@ -2741,6 +2757,7 @@ dependencies = [ "image", "lazy_static", "log", + "num_cpus", "rat-cursor", "ratatui", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 5e958da..4630d25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ serde_json = "1.0.145" image = "0.25.8" colored = "3.0.0" log = "0.4.28" +num_cpus = "1.17.0" [dependencies.rocksdb] version = "0.24.0" diff --git a/src/cli/sync.rs b/src/cli/sync.rs index 1ac27ee..1945368 100644 --- a/src/cli/sync.rs +++ b/src/cli/sync.rs @@ -6,6 +6,7 @@ use colored::Colorize; use crate::config::types::ApplicationConfig; use crate::constants::{DB_CF_OPTIONS, DB_OPTIONS}; use crate::crawler::{dlsite, DLSiteCrawler}; +use crate::helpers; use crate::helpers::db::RocksDB; use crate::models::DLSiteManiax; @@ -63,29 +64,23 @@ impl SyncDLSiteCommand { async fn sync_works(app_conf: &ApplicationConfig, db: &RocksDB) -> Result<()> { let crawler = DLSiteCrawler::new(); let mut rj_nums: Vec = Vec::new(); - for path_str in app_conf.path_config.dlsite_paths.iter() { - let path = Path::new(path_str); - if !path.exists() { - return Err(eyre!("{} {}", path_str.blue(), "does not exist".red())); + let paths = app_conf.path_config.dlsite_paths.iter() + .map(|path| Path::new(path).to_path_buf()) + .collect::>(); + let dirs = helpers::get_all_folders(&paths).await?; + for dir_path in dirs.iter() { + if !dir_path.is_dir() { + println!("{dir_path:?} is not a directory"); + continue; } - let dir_paths = path.read_dir()? - .filter_map(Result::ok) - .map(|e| e.path()) - .collect::>(); - for dir_path in dir_paths.iter() { - if !dir_path.is_dir() { - println!("{dir_path:?} is not a directory"); - continue; - } - let dir_name = dir_path - .file_name().unwrap() - .to_str().unwrap(); - if !dlsite::is_valid_rj_number(dir_name) { - println!("{} {}", dir_path.to_str().unwrap().blue(), "is not a valid rj number, please add it manually".red()); - continue; - } - rj_nums.push(dir_name.to_string()); + let dir_name = dir_path + .file_name().unwrap() + .to_str().unwrap(); + if !dlsite::is_valid_rj_number(dir_name) { + println!("{} {}", dir_path.to_str().unwrap().blue(), "is not a valid rj number, please add it manually".red()); + continue; } + rj_nums.push(dir_name.to_string()); } let maniaxes = crawler.get_game_infos(rj_nums).await?; db.set_values(&maniaxes)?; diff --git a/src/constants.rs b/src/constants.rs index d0006cb..0c754d2 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -14,7 +14,7 @@ lazy_static! { pub static ref APP_DB_DATA_DIR: PathBuf = APP_DATA_DIR.clone().join("db"); pub static ref DB_OPTIONS: rocksdb::Options = get_db_options(); - pub static ref DB_CF_OPTIONS: rocksdb::Options = get_cf_options(); + pub static ref DB_CF_OPTIONS: rocksdb::Options = rocksdb::Options::default(); } lazy_static! { @@ -26,12 +26,7 @@ fn get_db_options() -> rocksdb::Options { opts.create_missing_column_families(true); opts.create_if_missing(true); - - opts -} - -fn get_cf_options() -> rocksdb::Options { - let opts = rocksdb::Options::default(); + opts.increase_parallelism(num_cpus::get() as i32); opts } \ No newline at end of file diff --git a/src/helpers/db.rs b/src/helpers/db.rs index f957e46..9a2a0b6 100644 --- a/src/helpers/db.rs +++ b/src/helpers/db.rs @@ -1,5 +1,5 @@ use crate::constants::{APP_DB_DATA_DIR, DB_COLUMNS}; -use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options}; +use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options, ReadOptions}; use serde::{Serialize}; use serde::de::DeserializeOwned; use crate::models::RocksColumn; @@ -63,8 +63,10 @@ impl RocksDB { where TColumn: RocksColumn + DeserializeOwned { let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); - let values = self.db.iterator_cf(&cf, IteratorMode::Start) - .filter_map(|res| res.ok()) + let mut options = ReadOptions::default(); + options.set_async_io(true); + let values = self.db.iterator_cf_opt(&cf, options, IteratorMode::Start) + .filter_map(Result::ok) .map(|(k, v)| ( serde_json::from_slice::(&k).unwrap(), diff --git a/src/helpers/mod.rs b/src/helpers/mod.rs index 559e9d2..72ef5d1 100644 --- a/src/helpers/mod.rs +++ b/src/helpers/mod.rs @@ -1,5 +1,8 @@ pub mod db; +use std::path::PathBuf; +use color_eyre::eyre::eyre; +use color_eyre::owo_colors::OwoColorize; use tokio::fs; use crate::constants::{APP_CONFIG_DIR, APP_DATA_DIR, APP_DB_DATA_DIR}; use crate::crawler::DLSITE_IMG_FOLDER; @@ -19,4 +22,20 @@ pub async fn initialize_folders() -> color_eyre::Result<()> { fs::create_dir_all(APP_DB_DATA_DIR.as_path()).await?; } Ok(()) +} + +pub async fn get_all_folders(paths: &Vec) -> color_eyre::Result> { + let mut folders: Vec = Vec::new(); + for path in paths { + let path = path.as_path(); + if !path.exists() { + return Err(eyre!("{:?} {}", path.blue(), "does not exist".red())); + } + + let mut dirs = fs::read_dir(path).await?; + while let Some(dir) = dirs.next_entry().await? { + folders.push(dir.path()); + } + } + Ok(folders) } \ No newline at end of file