From ab7f6fe206b756476ccf2c98ec1f6d800d313a16 Mon Sep 17 00:00:00 2001 From: fromost Date: Sun, 26 Oct 2025 18:25:30 +0800 Subject: [PATCH] Add Reference id --- Cargo.toml | 5 +++ src/cli/sync.rs | 22 +++++----- src/constants.rs | 6 +++ src/crawler/dlsite.rs | 69 ++++++++++++++++++++++---------- src/helpers/db.rs | 54 ++++++++++++++++--------- src/models/game.rs | 93 ++++++++++++++++++++++++++++++++++++++----- src/models/mod.rs | 9 +++++ 7 files changed, 200 insertions(+), 58 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4630d25..b67deaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,11 @@ authors = ["fromost"] license = "MIT" edition = "2024" +[profile.dev] +debug = true +incremental = true +lto = "fat" + [dependencies] color-eyre = "0.6.3" futures = "0.3.28" diff --git a/src/cli/sync.rs b/src/cli/sync.rs index 1945368..38074b9 100644 --- a/src/cli/sync.rs +++ b/src/cli/sync.rs @@ -1,14 +1,14 @@ use std::path::Path; use clap::{Args, Command, Parser, Subcommand}; -use color_eyre::eyre::eyre; use color_eyre::eyre::Result; use colored::Colorize; +use tokio::time::Instant; +use crate::models; use crate::config::types::ApplicationConfig; use crate::constants::{DB_CF_OPTIONS, DB_OPTIONS}; use crate::crawler::{dlsite, DLSiteCrawler}; use crate::helpers; use crate::helpers::db::RocksDB; -use crate::models::DLSiteManiax; #[derive(Parser, Debug)] pub(super) struct SyncCommand { @@ -50,10 +50,12 @@ impl SyncSubCommand { impl SyncDLSiteCommand { pub async fn handle(&self) -> color_eyre::Result<()> { + let now = Instant::now(); let app_conf = ApplicationConfig::get_config()?; - let db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?; + let mut db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?; Self::sync_genres(&app_conf).await?; - Self::sync_works(&app_conf, &db).await?; + Self::sync_works(&app_conf, &mut db).await?; + println!("{} Done in {:.2?}", "Syncing".green(), now.elapsed()); Ok(()) } @@ -61,14 +63,14 @@ impl SyncDLSiteCommand { Ok(()) } - async fn sync_works(app_conf: &ApplicationConfig, db: &RocksDB) -> Result<()> { + async fn sync_works(app_conf: &ApplicationConfig, db: &mut RocksDB) -> Result<()> { let crawler = DLSiteCrawler::new(); let mut rj_nums: Vec = Vec::new(); - let paths = app_conf.path_config.dlsite_paths.iter() + let config_paths = app_conf.path_config.dlsite_paths.iter() .map(|path| Path::new(path).to_path_buf()) .collect::>(); - let dirs = helpers::get_all_folders(&paths).await?; - for dir_path in dirs.iter() { + let dir_paths = helpers::get_all_folders(&config_paths).await?; + for dir_path in dir_paths.iter() { if !dir_path.is_dir() { println!("{dir_path:?} is not a directory"); continue; @@ -82,7 +84,9 @@ impl SyncDLSiteCommand { } rj_nums.push(dir_name.to_string()); } - let maniaxes = crawler.get_game_infos(rj_nums).await?; + let maniaxes: Vec = crawler.get_game_infos(rj_nums).await?.into_iter() + .map(|x| x.into()) + .collect::>(); db.set_values(&maniaxes)?; Ok(()) } diff --git a/src/constants.rs b/src/constants.rs index 0c754d2..9b5e8bd 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -28,5 +28,11 @@ fn get_db_options() -> rocksdb::Options { opts.create_if_missing(true); opts.increase_parallelism(num_cpus::get() as i32); + opts +} + +pub(crate) fn get_db_read_options() -> rocksdb::ReadOptions { + let mut opts = rocksdb::ReadOptions::default(); + opts.set_async_io(true); opts } \ No newline at end of file diff --git a/src/crawler/dlsite.rs b/src/crawler/dlsite.rs index 43718a8..dbf2da8 100644 --- a/src/crawler/dlsite.rs +++ b/src/crawler/dlsite.rs @@ -1,14 +1,18 @@ use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use color_eyre::eyre::eyre; +use color_eyre::owo_colors::OwoColorize; use reqwest::Url; -use color_eyre::Result; +use color_eyre::{Report, Result}; use colored::Colorize; +use futures::stream::FuturesUnordered; +use futures::StreamExt; use lazy_static::lazy_static; use scraper::{Html, Selector}; +use serde::{Deserialize, Serialize}; +use tokio::time::Instant; use crate::constants::{APP_DATA_DIR}; use crate::crawler::Crawler; -use crate::models::DLSiteManiax; //TODO: override locale with user one const DLSITE_URL: &str = "https://www.dlsite.com/"; @@ -24,6 +28,20 @@ pub struct DLSiteCrawler { crawler: Crawler, } +#[derive(Deserialize, Serialize, Debug, Clone)] +pub(crate) struct DLSiteManiax { + #[serde(rename = "work_name")] + pub(crate) title: String, + #[serde(rename = "work_image")] + pub(crate) work_image_url: String, + #[serde(rename = "dl_count")] + pub(crate) sells_count: u32, + #[serde(skip)] + pub(crate) genre_ids: Vec, + #[serde(skip)] + pub(crate) rj_num: String, +} + impl DLSiteCrawler { pub fn new() -> Self { Self { @@ -41,10 +59,35 @@ impl DLSiteCrawler { eyre!("Invalid numbers: {}", invalid_nums.join(", ")) ); } + let query = &format!("product_id={}", rj_nums.join(",")); let (maniax_result, _) = self.crawler .get_json::>(DLSITE_PRODUCT_API_ENDPOINT, Some(query)) .await?; + + Self::verify_all_works_exists(&maniax_result, rj_nums); + + let mut tasks = FuturesUnordered::new(); + for (rj_num, mut info) in maniax_result { + tasks.push(async { + let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}"); + let (_, html_result) = tokio::join!(self.save_main_image(&info, &rj_num), self.crawler.get_html(&html_path)); + let (html, _) = html_result?; + let genres = self.get_genres(&html).await?; + info.genre_ids = genres; + info.rj_num = rj_num; + Ok::(info) + }) + } + let mut maniax_infos = Vec::new(); + while let Some(result) = tasks.next().await { + maniax_infos.push(result?); + } + + Ok(maniax_infos) + } + + fn verify_all_works_exists(maniax_result: &HashMap, rj_nums: Vec) { let keys = maniax_result.keys() .map(|k| k.to_string()) .collect::>(); @@ -56,19 +99,6 @@ impl DLSiteCrawler { if !nums_diff.is_empty() { println!("Restricted/Removed Works: {}", nums_diff.join(", ").red()); } - - let mut maniax_infos = Vec::new(); - for (rj_num, mut info) in maniax_result { - self.save_main_image(&info, &rj_num).await?; - - let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}"); - let (html, _) = self.crawler.get_html(&html_path).await?; - let genres = self.get_genres(&html)?; - info.genre_ids = genres; - info.id = rj_num; - maniax_infos.push(info); - } - Ok(maniax_infos) } async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> { @@ -85,17 +115,16 @@ impl DLSiteCrawler { Ok(()) } - fn get_genres(&self, html: &Html) -> Result> { + async fn get_genres(&self, html: &Html) -> Result> { let selector = Result::unwrap( Selector::parse( "#work_outline > tbody:nth-child(1)" ) ); let result = html.select(&selector).next().unwrap(); - let genre_row = result.child_elements() - .filter(|e| - e.child_elements().any(|e| e.inner_html() == "ジャンル") // TODO: will not work with english - ).next().unwrap(); + let genre_rows = result.child_elements().collect::>(); + let genre_len = genre_rows.iter().count(); + let genre_row = genre_rows.iter().skip(genre_len - 2).next().unwrap(); let data = genre_row .child_elements().skip(1).next().unwrap() .child_elements().next().unwrap(); diff --git a/src/helpers/db.rs b/src/helpers/db.rs index 9a2a0b6..a21c21d 100644 --- a/src/helpers/db.rs +++ b/src/helpers/db.rs @@ -2,14 +2,15 @@ use crate::constants::{APP_DB_DATA_DIR, DB_COLUMNS}; use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options, ReadOptions}; use serde::{Serialize}; use serde::de::DeserializeOwned; -use crate::models::RocksColumn; +use crate::models::{RocksColumn, RocksReference, RocksReferences}; +use color_eyre::Result; pub struct RocksDB { db: OptimisticTransactionDB, } impl RocksDB { - pub fn new(db_opts: Options, cf_opts: Options) -> color_eyre::Result { + pub fn new(db_opts: Options, cf_opts: Options) -> Result { let cfs = DB_COLUMNS.iter() .map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), cf_opts.clone())) .collect::>(); @@ -24,18 +25,18 @@ impl RocksDB { Ok(rocks) } - pub fn get_value(&self, id: TColumn::Id) -> color_eyre::Result> - where TColumn: RocksColumn, TValue: DeserializeOwned + pub fn get_value(&self, id: &TColumn::Id) -> Result> + where TColumn: RocksColumn + DeserializeOwned { let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); - let query_res = self.db.get_cf(&cf, serde_json::to_string(&id)?)?; + let query_res = self.db.get_cf(&cf, serde_json::to_string(id)?)?; if query_res.is_none() { return Ok(None); } Ok(Some(serde_json::from_slice(&query_res.unwrap())?)) } - pub fn set_value(&self, value: &TColumn) -> color_eyre::Result<()> + pub fn set_value(&self, value: &TColumn) -> Result<()> where TColumn: RocksColumn + Serialize { let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); @@ -43,7 +44,7 @@ impl RocksDB { Ok(()) } - pub fn get_values(&self, ids: &[TColumn::Id]) -> color_eyre::Result> + pub fn get_values(&self, ids: &[TColumn::Id]) -> Result> where TColumn: RocksColumn + DeserializeOwned { let transaction = self.db.transaction(); @@ -59,26 +60,41 @@ impl RocksDB { Ok(values) } - pub fn get_all_values(&self) -> color_eyre::Result> + pub fn get_reference_value(&self, id: &TReference::Id) -> Result> + where TReference: RocksColumn + DeserializeOwned, + TColumn: RocksColumn + RocksReference + { + let reference = self.get_value::(id)?; + if reference.is_none() { + return Ok(None); + } + Ok(Some(reference.unwrap())) + } + + pub fn get_reference_values(&self, ids: &[TReference::Id]) -> Result> + where TReference: RocksColumn + DeserializeOwned, + TColumn: RocksColumn + RocksReferences + { + self.get_values::(ids) + } + + pub fn get_all_values(&self) -> Result> where TColumn: RocksColumn + DeserializeOwned { let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); - let mut options = ReadOptions::default(); - options.set_async_io(true); - let values = self.db.iterator_cf_opt(&cf, options, IteratorMode::Start) + let values = self.db.iterator_cf_opt(&cf, crate::constants::get_db_read_options(), IteratorMode::Start) .filter_map(Result::ok) - .map(|(k, v)| - ( - serde_json::from_slice::(&k).unwrap(), - serde_json::from_slice::(&v).unwrap() - ) - ) + .map(|(k, v)| { + let id = serde_json::from_slice::(&k).unwrap(); + let mut value = serde_json::from_slice::(&v).unwrap(); + value.set_id(id); + value + }) .collect::>(); Ok(values) } - - pub fn set_values(&self, values: &[TColumn]) -> color_eyre::Result<()> + pub fn set_values(&mut self, values: &[TColumn]) -> Result<()> where TColumn: RocksColumn + Serialize { let transaction = self.db.transaction(); diff --git a/src/models/game.rs b/src/models/game.rs index 2bc2ec0..65bd85e 100644 --- a/src/models/game.rs +++ b/src/models/game.rs @@ -1,6 +1,6 @@ use ratatui::widgets::ListState; use serde::{Deserialize, Serialize}; -use crate::models::RocksColumn; +use crate::models::{RocksColumn, RocksReference, RocksReferences}; pub(crate) struct GameList { games: Vec, @@ -9,26 +9,99 @@ pub(crate) struct GameList { #[derive(Clone, Debug, Serialize, Deserialize)] pub(crate) struct DLSiteManiax { - #[serde(rename = "work_name")] - pub title: String, - #[serde(rename = "work_image")] - pub work_image_url: String, - #[serde(rename = "dl_count")] - pub sells_count: u32, #[serde(skip)] + pub rj_num: String, pub genre_ids: Vec, - #[serde(skip)] - pub id: String, + pub name: String, + pub sells_count: u32 +} + +impl From for DLSiteManiax { + fn from(value: crate::crawler::DLSiteManiax) -> Self { + Self { + rj_num: value.rj_num, + genre_ids: value.genre_ids, + name: value.title, + sells_count: value.sells_count + } + } } impl RocksColumn for DLSiteManiax { type Id = String; fn get_id(&self) -> Self::Id { - self.id.clone() + self.rj_num.clone() + } + + fn set_id(&mut self, id: Self::Id) { + self.rj_num = id; } fn get_column_name() -> String { String::from("dl_games") } +} + +impl RocksReferences for DLSiteManiax { + fn get_reference_ids(&self) -> Vec<::Id> { + self.genre_ids.clone() + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) struct DLSiteGenre { + #[serde(skip)] + pub id: u16, + pub category_id: u16, + pub translations: Vec +} + +impl RocksColumn for DLSiteGenre { + type Id = u16; + + fn get_id(&self) -> Self::Id { + self.id.clone() + } + + fn set_id(&mut self, id: Self::Id) { + self.id = id; + } + + fn get_column_name() -> String { + String::from("dl_genres") + } +} + +impl RocksReference for DLSiteGenre { + fn get_reference_id(&self) -> ::Id { + self.category_id.clone() + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) struct DLSiteCategory { + #[serde(skip)] + pub id: u16, + pub translations: Vec +} + +impl RocksColumn for DLSiteCategory { + type Id = u16; + fn get_id(&self) -> Self::Id { + self.id.clone() + } + + fn set_id(&mut self, id: Self::Id) { + self.id = id; + } + + fn get_column_name() -> String { + String::from("dl_translations") + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) enum DLSiteTranslation { + EN(String), JP(String) } \ No newline at end of file diff --git a/src/models/mod.rs b/src/models/mod.rs index d30cb2a..3480e02 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -7,5 +7,14 @@ pub(crate) use game::*; pub trait RocksColumn { type Id: Serialize + DeserializeOwned; fn get_id(&self) -> Self::Id; + fn set_id(&mut self, id: Self::Id); fn get_column_name() -> String; +} + +pub trait RocksReference where T: RocksColumn { + fn get_reference_id(&self) -> T::Id; +} + +pub trait RocksReferences where T: RocksColumn { + fn get_reference_ids(&self) -> Vec; } \ No newline at end of file