Add Reference id

This commit is contained in:
2025-10-26 18:25:30 +08:00
parent 11dad7daac
commit ab7f6fe206
7 changed files with 200 additions and 58 deletions

View File

@@ -6,6 +6,11 @@ authors = ["fromost"]
license = "MIT" license = "MIT"
edition = "2024" edition = "2024"
[profile.dev]
debug = true
incremental = true
lto = "fat"
[dependencies] [dependencies]
color-eyre = "0.6.3" color-eyre = "0.6.3"
futures = "0.3.28" futures = "0.3.28"

View File

@@ -1,14 +1,14 @@
use std::path::Path; use std::path::Path;
use clap::{Args, Command, Parser, Subcommand}; use clap::{Args, Command, Parser, Subcommand};
use color_eyre::eyre::eyre;
use color_eyre::eyre::Result; use color_eyre::eyre::Result;
use colored::Colorize; use colored::Colorize;
use tokio::time::Instant;
use crate::models;
use crate::config::types::ApplicationConfig; use crate::config::types::ApplicationConfig;
use crate::constants::{DB_CF_OPTIONS, DB_OPTIONS}; use crate::constants::{DB_CF_OPTIONS, DB_OPTIONS};
use crate::crawler::{dlsite, DLSiteCrawler}; use crate::crawler::{dlsite, DLSiteCrawler};
use crate::helpers; use crate::helpers;
use crate::helpers::db::RocksDB; use crate::helpers::db::RocksDB;
use crate::models::DLSiteManiax;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
pub(super) struct SyncCommand { pub(super) struct SyncCommand {
@@ -50,10 +50,12 @@ impl SyncSubCommand {
impl SyncDLSiteCommand { impl SyncDLSiteCommand {
pub async fn handle(&self) -> color_eyre::Result<()> { pub async fn handle(&self) -> color_eyre::Result<()> {
let now = Instant::now();
let app_conf = ApplicationConfig::get_config()?; let app_conf = ApplicationConfig::get_config()?;
let db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?; let mut db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?;
Self::sync_genres(&app_conf).await?; Self::sync_genres(&app_conf).await?;
Self::sync_works(&app_conf, &db).await?; Self::sync_works(&app_conf, &mut db).await?;
println!("{} Done in {:.2?}", "Syncing".green(), now.elapsed());
Ok(()) Ok(())
} }
@@ -61,14 +63,14 @@ impl SyncDLSiteCommand {
Ok(()) Ok(())
} }
async fn sync_works(app_conf: &ApplicationConfig, db: &RocksDB) -> Result<()> { async fn sync_works(app_conf: &ApplicationConfig, db: &mut RocksDB) -> Result<()> {
let crawler = DLSiteCrawler::new(); let crawler = DLSiteCrawler::new();
let mut rj_nums: Vec<String> = Vec::new(); let mut rj_nums: Vec<String> = Vec::new();
let paths = app_conf.path_config.dlsite_paths.iter() let config_paths = app_conf.path_config.dlsite_paths.iter()
.map(|path| Path::new(path).to_path_buf()) .map(|path| Path::new(path).to_path_buf())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let dirs = helpers::get_all_folders(&paths).await?; let dir_paths = helpers::get_all_folders(&config_paths).await?;
for dir_path in dirs.iter() { for dir_path in dir_paths.iter() {
if !dir_path.is_dir() { if !dir_path.is_dir() {
println!("{dir_path:?} is not a directory"); println!("{dir_path:?} is not a directory");
continue; continue;
@@ -82,7 +84,9 @@ impl SyncDLSiteCommand {
} }
rj_nums.push(dir_name.to_string()); rj_nums.push(dir_name.to_string());
} }
let maniaxes = crawler.get_game_infos(rj_nums).await?; let maniaxes: Vec<models::DLSiteManiax> = crawler.get_game_infos(rj_nums).await?.into_iter()
.map(|x| x.into())
.collect::<Vec<_>>();
db.set_values(&maniaxes)?; db.set_values(&maniaxes)?;
Ok(()) Ok(())
} }

View File

@@ -28,5 +28,11 @@ fn get_db_options() -> rocksdb::Options {
opts.create_if_missing(true); opts.create_if_missing(true);
opts.increase_parallelism(num_cpus::get() as i32); opts.increase_parallelism(num_cpus::get() as i32);
opts
}
pub(crate) fn get_db_read_options() -> rocksdb::ReadOptions {
let mut opts = rocksdb::ReadOptions::default();
opts.set_async_io(true);
opts opts
} }

View File

@@ -1,14 +1,18 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::path::PathBuf; use std::path::PathBuf;
use color_eyre::eyre::eyre; use color_eyre::eyre::eyre;
use color_eyre::owo_colors::OwoColorize;
use reqwest::Url; use reqwest::Url;
use color_eyre::Result; use color_eyre::{Report, Result};
use colored::Colorize; use colored::Colorize;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use crate::constants::{APP_DATA_DIR}; use crate::constants::{APP_DATA_DIR};
use crate::crawler::Crawler; use crate::crawler::Crawler;
use crate::models::DLSiteManiax;
//TODO: override locale with user one //TODO: override locale with user one
const DLSITE_URL: &str = "https://www.dlsite.com/"; const DLSITE_URL: &str = "https://www.dlsite.com/";
@@ -24,6 +28,20 @@ pub struct DLSiteCrawler {
crawler: Crawler, crawler: Crawler,
} }
#[derive(Deserialize, Serialize, Debug, Clone)]
pub(crate) struct DLSiteManiax {
#[serde(rename = "work_name")]
pub(crate) title: String,
#[serde(rename = "work_image")]
pub(crate) work_image_url: String,
#[serde(rename = "dl_count")]
pub(crate) sells_count: u32,
#[serde(skip)]
pub(crate) genre_ids: Vec<u16>,
#[serde(skip)]
pub(crate) rj_num: String,
}
impl DLSiteCrawler { impl DLSiteCrawler {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
@@ -41,10 +59,35 @@ impl DLSiteCrawler {
eyre!("Invalid numbers: {}", invalid_nums.join(", ")) eyre!("Invalid numbers: {}", invalid_nums.join(", "))
); );
} }
let query = &format!("product_id={}", rj_nums.join(",")); let query = &format!("product_id={}", rj_nums.join(","));
let (maniax_result, _) = self.crawler let (maniax_result, _) = self.crawler
.get_json::<HashMap<String, DLSiteManiax>>(DLSITE_PRODUCT_API_ENDPOINT, Some(query)) .get_json::<HashMap<String, DLSiteManiax>>(DLSITE_PRODUCT_API_ENDPOINT, Some(query))
.await?; .await?;
Self::verify_all_works_exists(&maniax_result, rj_nums);
let mut tasks = FuturesUnordered::new();
for (rj_num, mut info) in maniax_result {
tasks.push(async {
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
let (_, html_result) = tokio::join!(self.save_main_image(&info, &rj_num), self.crawler.get_html(&html_path));
let (html, _) = html_result?;
let genres = self.get_genres(&html).await?;
info.genre_ids = genres;
info.rj_num = rj_num;
Ok::<DLSiteManiax, Report>(info)
})
}
let mut maniax_infos = Vec::new();
while let Some(result) = tasks.next().await {
maniax_infos.push(result?);
}
Ok(maniax_infos)
}
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) {
let keys = maniax_result.keys() let keys = maniax_result.keys()
.map(|k| k.to_string()) .map(|k| k.to_string())
.collect::<Vec<String>>(); .collect::<Vec<String>>();
@@ -56,19 +99,6 @@ impl DLSiteCrawler {
if !nums_diff.is_empty() { if !nums_diff.is_empty() {
println!("Restricted/Removed Works: {}", nums_diff.join(", ").red()); println!("Restricted/Removed Works: {}", nums_diff.join(", ").red());
} }
let mut maniax_infos = Vec::new();
for (rj_num, mut info) in maniax_result {
self.save_main_image(&info, &rj_num).await?;
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
let (html, _) = self.crawler.get_html(&html_path).await?;
let genres = self.get_genres(&html)?;
info.genre_ids = genres;
info.id = rj_num;
maniax_infos.push(info);
}
Ok(maniax_infos)
} }
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> { async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
@@ -85,17 +115,16 @@ impl DLSiteCrawler {
Ok(()) Ok(())
} }
fn get_genres(&self, html: &Html) -> Result<Vec<u16>> { async fn get_genres(&self, html: &Html) -> Result<Vec<u16>> {
let selector = Result::unwrap( let selector = Result::unwrap(
Selector::parse( Selector::parse(
"#work_outline > tbody:nth-child(1)" "#work_outline > tbody:nth-child(1)"
) )
); );
let result = html.select(&selector).next().unwrap(); let result = html.select(&selector).next().unwrap();
let genre_row = result.child_elements() let genre_rows = result.child_elements().collect::<Vec<_>>();
.filter(|e| let genre_len = genre_rows.iter().count();
e.child_elements().any(|e| e.inner_html() == "ジャンル") // TODO: will not work with english let genre_row = genre_rows.iter().skip(genre_len - 2).next().unwrap();
).next().unwrap();
let data = genre_row let data = genre_row
.child_elements().skip(1).next().unwrap() .child_elements().skip(1).next().unwrap()
.child_elements().next().unwrap(); .child_elements().next().unwrap();

View File

@@ -2,14 +2,15 @@ use crate::constants::{APP_DB_DATA_DIR, DB_COLUMNS};
use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options, ReadOptions}; use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options, ReadOptions};
use serde::{Serialize}; use serde::{Serialize};
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use crate::models::RocksColumn; use crate::models::{RocksColumn, RocksReference, RocksReferences};
use color_eyre::Result;
pub struct RocksDB { pub struct RocksDB {
db: OptimisticTransactionDB, db: OptimisticTransactionDB,
} }
impl RocksDB { impl RocksDB {
pub fn new(db_opts: Options, cf_opts: Options) -> color_eyre::Result<Self> { pub fn new(db_opts: Options, cf_opts: Options) -> Result<Self> {
let cfs = DB_COLUMNS.iter() let cfs = DB_COLUMNS.iter()
.map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), cf_opts.clone())) .map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), cf_opts.clone()))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@@ -24,18 +25,18 @@ impl RocksDB {
Ok(rocks) Ok(rocks)
} }
pub fn get_value<TValue, TColumn>(&self, id: TColumn::Id) -> color_eyre::Result<Option<TValue>> pub fn get_value<TColumn>(&self, id: &TColumn::Id) -> Result<Option<TColumn>>
where TColumn: RocksColumn, TValue: DeserializeOwned where TColumn: RocksColumn + DeserializeOwned
{ {
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
let query_res = self.db.get_cf(&cf, serde_json::to_string(&id)?)?; let query_res = self.db.get_cf(&cf, serde_json::to_string(id)?)?;
if query_res.is_none() { if query_res.is_none() {
return Ok(None); return Ok(None);
} }
Ok(Some(serde_json::from_slice(&query_res.unwrap())?)) Ok(Some(serde_json::from_slice(&query_res.unwrap())?))
} }
pub fn set_value<TColumn>(&self, value: &TColumn) -> color_eyre::Result<()> pub fn set_value<TColumn>(&self, value: &TColumn) -> Result<()>
where TColumn: RocksColumn + Serialize where TColumn: RocksColumn + Serialize
{ {
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
@@ -43,7 +44,7 @@ impl RocksDB {
Ok(()) Ok(())
} }
pub fn get_values<TColumn>(&self, ids: &[TColumn::Id]) -> color_eyre::Result<Vec<TColumn>> pub fn get_values<TColumn>(&self, ids: &[TColumn::Id]) -> Result<Vec<TColumn>>
where TColumn: RocksColumn + DeserializeOwned where TColumn: RocksColumn + DeserializeOwned
{ {
let transaction = self.db.transaction(); let transaction = self.db.transaction();
@@ -59,26 +60,41 @@ impl RocksDB {
Ok(values) Ok(values)
} }
pub fn get_all_values<TColumn>(&self) -> color_eyre::Result<Vec<(TColumn::Id, TColumn)>> pub fn get_reference_value<TReference, TColumn>(&self, id: &TReference::Id) -> Result<Option<TReference>>
where TReference: RocksColumn + DeserializeOwned,
TColumn: RocksColumn + RocksReference<TReference>
{
let reference = self.get_value::<TReference>(id)?;
if reference.is_none() {
return Ok(None);
}
Ok(Some(reference.unwrap()))
}
pub fn get_reference_values<TReference, TColumn>(&self, ids: &[TReference::Id]) -> Result<Vec<TReference>>
where TReference: RocksColumn + DeserializeOwned,
TColumn: RocksColumn + RocksReferences<TReference>
{
self.get_values::<TReference>(ids)
}
pub fn get_all_values<TColumn>(&self) -> Result<Vec<TColumn>>
where TColumn: RocksColumn + DeserializeOwned where TColumn: RocksColumn + DeserializeOwned
{ {
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap(); let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
let mut options = ReadOptions::default(); let values = self.db.iterator_cf_opt(&cf, crate::constants::get_db_read_options(), IteratorMode::Start)
options.set_async_io(true);
let values = self.db.iterator_cf_opt(&cf, options, IteratorMode::Start)
.filter_map(Result::ok) .filter_map(Result::ok)
.map(|(k, v)| .map(|(k, v)| {
( let id = serde_json::from_slice::<TColumn::Id>(&k).unwrap();
serde_json::from_slice::<TColumn::Id>(&k).unwrap(), let mut value = serde_json::from_slice::<TColumn>(&v).unwrap();
serde_json::from_slice::<TColumn>(&v).unwrap() value.set_id(id);
) value
) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
Ok(values) Ok(values)
} }
pub fn set_values<TColumn>(&mut self, values: &[TColumn]) -> Result<()>
pub fn set_values<TColumn>(&self, values: &[TColumn]) -> color_eyre::Result<()>
where TColumn: RocksColumn + Serialize where TColumn: RocksColumn + Serialize
{ {
let transaction = self.db.transaction(); let transaction = self.db.transaction();

View File

@@ -1,6 +1,6 @@
use ratatui::widgets::ListState; use ratatui::widgets::ListState;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::models::RocksColumn; use crate::models::{RocksColumn, RocksReference, RocksReferences};
pub(crate) struct GameList<T> { pub(crate) struct GameList<T> {
games: Vec<T>, games: Vec<T>,
@@ -9,26 +9,99 @@ pub(crate) struct GameList<T> {
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct DLSiteManiax { pub(crate) struct DLSiteManiax {
#[serde(rename = "work_name")]
pub title: String,
#[serde(rename = "work_image")]
pub work_image_url: String,
#[serde(rename = "dl_count")]
pub sells_count: u32,
#[serde(skip)] #[serde(skip)]
pub rj_num: String,
pub genre_ids: Vec<u16>, pub genre_ids: Vec<u16>,
#[serde(skip)] pub name: String,
pub id: String, pub sells_count: u32
}
impl From<crate::crawler::dlsite::DLSiteManiax> for DLSiteManiax {
fn from(value: crate::crawler::DLSiteManiax) -> Self {
Self {
rj_num: value.rj_num,
genre_ids: value.genre_ids,
name: value.title,
sells_count: value.sells_count
}
}
} }
impl RocksColumn for DLSiteManiax { impl RocksColumn for DLSiteManiax {
type Id = String; type Id = String;
fn get_id(&self) -> Self::Id { fn get_id(&self) -> Self::Id {
self.id.clone() self.rj_num.clone()
}
fn set_id(&mut self, id: Self::Id) {
self.rj_num = id;
} }
fn get_column_name() -> String { fn get_column_name() -> String {
String::from("dl_games") String::from("dl_games")
} }
}
impl RocksReferences<DLSiteGenre> for DLSiteManiax {
fn get_reference_ids(&self) -> Vec<<DLSiteGenre as RocksColumn>::Id> {
self.genre_ids.clone()
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct DLSiteGenre {
#[serde(skip)]
pub id: u16,
pub category_id: u16,
pub translations: Vec<DLSiteTranslation>
}
impl RocksColumn for DLSiteGenre {
type Id = u16;
fn get_id(&self) -> Self::Id {
self.id.clone()
}
fn set_id(&mut self, id: Self::Id) {
self.id = id;
}
fn get_column_name() -> String {
String::from("dl_genres")
}
}
impl RocksReference<DLSiteCategory> for DLSiteGenre {
fn get_reference_id(&self) -> <DLSiteCategory as RocksColumn>::Id {
self.category_id.clone()
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct DLSiteCategory {
#[serde(skip)]
pub id: u16,
pub translations: Vec<DLSiteTranslation>
}
impl RocksColumn for DLSiteCategory {
type Id = u16;
fn get_id(&self) -> Self::Id {
self.id.clone()
}
fn set_id(&mut self, id: Self::Id) {
self.id = id;
}
fn get_column_name() -> String {
String::from("dl_translations")
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) enum DLSiteTranslation {
EN(String), JP(String)
} }

View File

@@ -7,5 +7,14 @@ pub(crate) use game::*;
pub trait RocksColumn { pub trait RocksColumn {
type Id: Serialize + DeserializeOwned; type Id: Serialize + DeserializeOwned;
fn get_id(&self) -> Self::Id; fn get_id(&self) -> Self::Id;
fn set_id(&mut self, id: Self::Id);
fn get_column_name() -> String; fn get_column_name() -> String;
}
pub trait RocksReference<T> where T: RocksColumn {
fn get_reference_id(&self) -> T::Id;
}
pub trait RocksReferences<T> where T: RocksColumn {
fn get_reference_ids(&self) -> Vec<T::Id>;
} }