Add Reference id

This commit is contained in:
2025-10-26 18:25:30 +08:00
parent 11dad7daac
commit ab7f6fe206
7 changed files with 200 additions and 58 deletions

View File

@@ -1,14 +1,14 @@
use std::path::Path;
use clap::{Args, Command, Parser, Subcommand};
use color_eyre::eyre::eyre;
use color_eyre::eyre::Result;
use colored::Colorize;
use tokio::time::Instant;
use crate::models;
use crate::config::types::ApplicationConfig;
use crate::constants::{DB_CF_OPTIONS, DB_OPTIONS};
use crate::crawler::{dlsite, DLSiteCrawler};
use crate::helpers;
use crate::helpers::db::RocksDB;
use crate::models::DLSiteManiax;
#[derive(Parser, Debug)]
pub(super) struct SyncCommand {
@@ -50,10 +50,12 @@ impl SyncSubCommand {
impl SyncDLSiteCommand {
pub async fn handle(&self) -> color_eyre::Result<()> {
let now = Instant::now();
let app_conf = ApplicationConfig::get_config()?;
let db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?;
let mut db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?;
Self::sync_genres(&app_conf).await?;
Self::sync_works(&app_conf, &db).await?;
Self::sync_works(&app_conf, &mut db).await?;
println!("{} Done in {:.2?}", "Syncing".green(), now.elapsed());
Ok(())
}
@@ -61,14 +63,14 @@ impl SyncDLSiteCommand {
Ok(())
}
async fn sync_works(app_conf: &ApplicationConfig, db: &RocksDB) -> Result<()> {
async fn sync_works(app_conf: &ApplicationConfig, db: &mut RocksDB) -> Result<()> {
let crawler = DLSiteCrawler::new();
let mut rj_nums: Vec<String> = Vec::new();
let paths = app_conf.path_config.dlsite_paths.iter()
let config_paths = app_conf.path_config.dlsite_paths.iter()
.map(|path| Path::new(path).to_path_buf())
.collect::<Vec<_>>();
let dirs = helpers::get_all_folders(&paths).await?;
for dir_path in dirs.iter() {
let dir_paths = helpers::get_all_folders(&config_paths).await?;
for dir_path in dir_paths.iter() {
if !dir_path.is_dir() {
println!("{dir_path:?} is not a directory");
continue;
@@ -82,7 +84,9 @@ impl SyncDLSiteCommand {
}
rj_nums.push(dir_name.to_string());
}
let maniaxes = crawler.get_game_infos(rj_nums).await?;
let maniaxes: Vec<models::DLSiteManiax> = crawler.get_game_infos(rj_nums).await?.into_iter()
.map(|x| x.into())
.collect::<Vec<_>>();
db.set_values(&maniaxes)?;
Ok(())
}

View File

@@ -28,5 +28,11 @@ fn get_db_options() -> rocksdb::Options {
opts.create_if_missing(true);
opts.increase_parallelism(num_cpus::get() as i32);
opts
}
pub(crate) fn get_db_read_options() -> rocksdb::ReadOptions {
let mut opts = rocksdb::ReadOptions::default();
opts.set_async_io(true);
opts
}

View File

@@ -1,14 +1,18 @@
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use color_eyre::eyre::eyre;
use color_eyre::owo_colors::OwoColorize;
use reqwest::Url;
use color_eyre::Result;
use color_eyre::{Report, Result};
use colored::Colorize;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use lazy_static::lazy_static;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use tokio::time::Instant;
use crate::constants::{APP_DATA_DIR};
use crate::crawler::Crawler;
use crate::models::DLSiteManiax;
//TODO: override locale with user one
const DLSITE_URL: &str = "https://www.dlsite.com/";
@@ -24,6 +28,20 @@ pub struct DLSiteCrawler {
crawler: Crawler,
}
#[derive(Deserialize, Serialize, Debug, Clone)]
pub(crate) struct DLSiteManiax {
#[serde(rename = "work_name")]
pub(crate) title: String,
#[serde(rename = "work_image")]
pub(crate) work_image_url: String,
#[serde(rename = "dl_count")]
pub(crate) sells_count: u32,
#[serde(skip)]
pub(crate) genre_ids: Vec<u16>,
#[serde(skip)]
pub(crate) rj_num: String,
}
impl DLSiteCrawler {
pub fn new() -> Self {
Self {
@@ -41,10 +59,35 @@ impl DLSiteCrawler {
eyre!("Invalid numbers: {}", invalid_nums.join(", "))
);
}
let query = &format!("product_id={}", rj_nums.join(","));
let (maniax_result, _) = self.crawler
.get_json::<HashMap<String, DLSiteManiax>>(DLSITE_PRODUCT_API_ENDPOINT, Some(query))
.await?;
Self::verify_all_works_exists(&maniax_result, rj_nums);
let mut tasks = FuturesUnordered::new();
for (rj_num, mut info) in maniax_result {
tasks.push(async {
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
let (_, html_result) = tokio::join!(self.save_main_image(&info, &rj_num), self.crawler.get_html(&html_path));
let (html, _) = html_result?;
let genres = self.get_genres(&html).await?;
info.genre_ids = genres;
info.rj_num = rj_num;
Ok::<DLSiteManiax, Report>(info)
})
}
let mut maniax_infos = Vec::new();
while let Some(result) = tasks.next().await {
maniax_infos.push(result?);
}
Ok(maniax_infos)
}
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) {
let keys = maniax_result.keys()
.map(|k| k.to_string())
.collect::<Vec<String>>();
@@ -56,19 +99,6 @@ impl DLSiteCrawler {
if !nums_diff.is_empty() {
println!("Restricted/Removed Works: {}", nums_diff.join(", ").red());
}
let mut maniax_infos = Vec::new();
for (rj_num, mut info) in maniax_result {
self.save_main_image(&info, &rj_num).await?;
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
let (html, _) = self.crawler.get_html(&html_path).await?;
let genres = self.get_genres(&html)?;
info.genre_ids = genres;
info.id = rj_num;
maniax_infos.push(info);
}
Ok(maniax_infos)
}
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
@@ -85,17 +115,16 @@ impl DLSiteCrawler {
Ok(())
}
fn get_genres(&self, html: &Html) -> Result<Vec<u16>> {
async fn get_genres(&self, html: &Html) -> Result<Vec<u16>> {
let selector = Result::unwrap(
Selector::parse(
"#work_outline > tbody:nth-child(1)"
)
);
let result = html.select(&selector).next().unwrap();
let genre_row = result.child_elements()
.filter(|e|
e.child_elements().any(|e| e.inner_html() == "ジャンル") // TODO: will not work with english
).next().unwrap();
let genre_rows = result.child_elements().collect::<Vec<_>>();
let genre_len = genre_rows.iter().count();
let genre_row = genre_rows.iter().skip(genre_len - 2).next().unwrap();
let data = genre_row
.child_elements().skip(1).next().unwrap()
.child_elements().next().unwrap();

View File

@@ -2,14 +2,15 @@ use crate::constants::{APP_DB_DATA_DIR, DB_COLUMNS};
use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options, ReadOptions};
use serde::{Serialize};
use serde::de::DeserializeOwned;
use crate::models::RocksColumn;
use crate::models::{RocksColumn, RocksReference, RocksReferences};
use color_eyre::Result;
pub struct RocksDB {
db: OptimisticTransactionDB,
}
impl RocksDB {
pub fn new(db_opts: Options, cf_opts: Options) -> color_eyre::Result<Self> {
pub fn new(db_opts: Options, cf_opts: Options) -> Result<Self> {
let cfs = DB_COLUMNS.iter()
.map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), cf_opts.clone()))
.collect::<Vec<_>>();
@@ -24,18 +25,18 @@ impl RocksDB {
Ok(rocks)
}
pub fn get_value<TValue, TColumn>(&self, id: TColumn::Id) -> color_eyre::Result<Option<TValue>>
where TColumn: RocksColumn, TValue: DeserializeOwned
pub fn get_value<TColumn>(&self, id: &TColumn::Id) -> Result<Option<TColumn>>
where TColumn: RocksColumn + DeserializeOwned
{
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
let query_res = self.db.get_cf(&cf, serde_json::to_string(&id)?)?;
let query_res = self.db.get_cf(&cf, serde_json::to_string(id)?)?;
if query_res.is_none() {
return Ok(None);
}
Ok(Some(serde_json::from_slice(&query_res.unwrap())?))
}
pub fn set_value<TColumn>(&self, value: &TColumn) -> color_eyre::Result<()>
pub fn set_value<TColumn>(&self, value: &TColumn) -> Result<()>
where TColumn: RocksColumn + Serialize
{
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
@@ -43,7 +44,7 @@ impl RocksDB {
Ok(())
}
pub fn get_values<TColumn>(&self, ids: &[TColumn::Id]) -> color_eyre::Result<Vec<TColumn>>
pub fn get_values<TColumn>(&self, ids: &[TColumn::Id]) -> Result<Vec<TColumn>>
where TColumn: RocksColumn + DeserializeOwned
{
let transaction = self.db.transaction();
@@ -59,26 +60,41 @@ impl RocksDB {
Ok(values)
}
pub fn get_all_values<TColumn>(&self) -> color_eyre::Result<Vec<(TColumn::Id, TColumn)>>
pub fn get_reference_value<TReference, TColumn>(&self, id: &TReference::Id) -> Result<Option<TReference>>
where TReference: RocksColumn + DeserializeOwned,
TColumn: RocksColumn + RocksReference<TReference>
{
let reference = self.get_value::<TReference>(id)?;
if reference.is_none() {
return Ok(None);
}
Ok(Some(reference.unwrap()))
}
pub fn get_reference_values<TReference, TColumn>(&self, ids: &[TReference::Id]) -> Result<Vec<TReference>>
where TReference: RocksColumn + DeserializeOwned,
TColumn: RocksColumn + RocksReferences<TReference>
{
self.get_values::<TReference>(ids)
}
pub fn get_all_values<TColumn>(&self) -> Result<Vec<TColumn>>
where TColumn: RocksColumn + DeserializeOwned
{
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
let mut options = ReadOptions::default();
options.set_async_io(true);
let values = self.db.iterator_cf_opt(&cf, options, IteratorMode::Start)
let values = self.db.iterator_cf_opt(&cf, crate::constants::get_db_read_options(), IteratorMode::Start)
.filter_map(Result::ok)
.map(|(k, v)|
(
serde_json::from_slice::<TColumn::Id>(&k).unwrap(),
serde_json::from_slice::<TColumn>(&v).unwrap()
)
)
.map(|(k, v)| {
let id = serde_json::from_slice::<TColumn::Id>(&k).unwrap();
let mut value = serde_json::from_slice::<TColumn>(&v).unwrap();
value.set_id(id);
value
})
.collect::<Vec<_>>();
Ok(values)
}
pub fn set_values<TColumn>(&self, values: &[TColumn]) -> color_eyre::Result<()>
pub fn set_values<TColumn>(&mut self, values: &[TColumn]) -> Result<()>
where TColumn: RocksColumn + Serialize
{
let transaction = self.db.transaction();

View File

@@ -1,6 +1,6 @@
use ratatui::widgets::ListState;
use serde::{Deserialize, Serialize};
use crate::models::RocksColumn;
use crate::models::{RocksColumn, RocksReference, RocksReferences};
pub(crate) struct GameList<T> {
games: Vec<T>,
@@ -9,26 +9,99 @@ pub(crate) struct GameList<T> {
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct DLSiteManiax {
#[serde(rename = "work_name")]
pub title: String,
#[serde(rename = "work_image")]
pub work_image_url: String,
#[serde(rename = "dl_count")]
pub sells_count: u32,
#[serde(skip)]
pub rj_num: String,
pub genre_ids: Vec<u16>,
#[serde(skip)]
pub id: String,
pub name: String,
pub sells_count: u32
}
impl From<crate::crawler::dlsite::DLSiteManiax> for DLSiteManiax {
fn from(value: crate::crawler::DLSiteManiax) -> Self {
Self {
rj_num: value.rj_num,
genre_ids: value.genre_ids,
name: value.title,
sells_count: value.sells_count
}
}
}
impl RocksColumn for DLSiteManiax {
type Id = String;
fn get_id(&self) -> Self::Id {
self.id.clone()
self.rj_num.clone()
}
fn set_id(&mut self, id: Self::Id) {
self.rj_num = id;
}
fn get_column_name() -> String {
String::from("dl_games")
}
}
impl RocksReferences<DLSiteGenre> for DLSiteManiax {
fn get_reference_ids(&self) -> Vec<<DLSiteGenre as RocksColumn>::Id> {
self.genre_ids.clone()
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct DLSiteGenre {
#[serde(skip)]
pub id: u16,
pub category_id: u16,
pub translations: Vec<DLSiteTranslation>
}
impl RocksColumn for DLSiteGenre {
type Id = u16;
fn get_id(&self) -> Self::Id {
self.id.clone()
}
fn set_id(&mut self, id: Self::Id) {
self.id = id;
}
fn get_column_name() -> String {
String::from("dl_genres")
}
}
impl RocksReference<DLSiteCategory> for DLSiteGenre {
fn get_reference_id(&self) -> <DLSiteCategory as RocksColumn>::Id {
self.category_id.clone()
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct DLSiteCategory {
#[serde(skip)]
pub id: u16,
pub translations: Vec<DLSiteTranslation>
}
impl RocksColumn for DLSiteCategory {
type Id = u16;
fn get_id(&self) -> Self::Id {
self.id.clone()
}
fn set_id(&mut self, id: Self::Id) {
self.id = id;
}
fn get_column_name() -> String {
String::from("dl_translations")
}
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) enum DLSiteTranslation {
EN(String), JP(String)
}

View File

@@ -7,5 +7,14 @@ pub(crate) use game::*;
pub trait RocksColumn {
type Id: Serialize + DeserializeOwned;
fn get_id(&self) -> Self::Id;
fn set_id(&mut self, id: Self::Id);
fn get_column_name() -> String;
}
pub trait RocksReference<T> where T: RocksColumn {
fn get_reference_id(&self) -> T::Id;
}
pub trait RocksReferences<T> where T: RocksColumn {
fn get_reference_ids(&self) -> Vec<T::Id>;
}