Add Reference id
This commit is contained in:
@@ -6,6 +6,11 @@ authors = ["fromost"]
|
||||
license = "MIT"
|
||||
edition = "2024"
|
||||
|
||||
[profile.dev]
|
||||
debug = true
|
||||
incremental = true
|
||||
lto = "fat"
|
||||
|
||||
[dependencies]
|
||||
color-eyre = "0.6.3"
|
||||
futures = "0.3.28"
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
use std::path::Path;
|
||||
use clap::{Args, Command, Parser, Subcommand};
|
||||
use color_eyre::eyre::eyre;
|
||||
use color_eyre::eyre::Result;
|
||||
use colored::Colorize;
|
||||
use tokio::time::Instant;
|
||||
use crate::models;
|
||||
use crate::config::types::ApplicationConfig;
|
||||
use crate::constants::{DB_CF_OPTIONS, DB_OPTIONS};
|
||||
use crate::crawler::{dlsite, DLSiteCrawler};
|
||||
use crate::helpers;
|
||||
use crate::helpers::db::RocksDB;
|
||||
use crate::models::DLSiteManiax;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub(super) struct SyncCommand {
|
||||
@@ -50,10 +50,12 @@ impl SyncSubCommand {
|
||||
|
||||
impl SyncDLSiteCommand {
|
||||
pub async fn handle(&self) -> color_eyre::Result<()> {
|
||||
let now = Instant::now();
|
||||
let app_conf = ApplicationConfig::get_config()?;
|
||||
let db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?;
|
||||
let mut db = RocksDB::new(DB_OPTIONS.clone(), DB_CF_OPTIONS.clone())?;
|
||||
Self::sync_genres(&app_conf).await?;
|
||||
Self::sync_works(&app_conf, &db).await?;
|
||||
Self::sync_works(&app_conf, &mut db).await?;
|
||||
println!("{} Done in {:.2?}", "Syncing".green(), now.elapsed());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -61,14 +63,14 @@ impl SyncDLSiteCommand {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn sync_works(app_conf: &ApplicationConfig, db: &RocksDB) -> Result<()> {
|
||||
async fn sync_works(app_conf: &ApplicationConfig, db: &mut RocksDB) -> Result<()> {
|
||||
let crawler = DLSiteCrawler::new();
|
||||
let mut rj_nums: Vec<String> = Vec::new();
|
||||
let paths = app_conf.path_config.dlsite_paths.iter()
|
||||
let config_paths = app_conf.path_config.dlsite_paths.iter()
|
||||
.map(|path| Path::new(path).to_path_buf())
|
||||
.collect::<Vec<_>>();
|
||||
let dirs = helpers::get_all_folders(&paths).await?;
|
||||
for dir_path in dirs.iter() {
|
||||
let dir_paths = helpers::get_all_folders(&config_paths).await?;
|
||||
for dir_path in dir_paths.iter() {
|
||||
if !dir_path.is_dir() {
|
||||
println!("{dir_path:?} is not a directory");
|
||||
continue;
|
||||
@@ -82,7 +84,9 @@ impl SyncDLSiteCommand {
|
||||
}
|
||||
rj_nums.push(dir_name.to_string());
|
||||
}
|
||||
let maniaxes = crawler.get_game_infos(rj_nums).await?;
|
||||
let maniaxes: Vec<models::DLSiteManiax> = crawler.get_game_infos(rj_nums).await?.into_iter()
|
||||
.map(|x| x.into())
|
||||
.collect::<Vec<_>>();
|
||||
db.set_values(&maniaxes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -30,3 +30,9 @@ fn get_db_options() -> rocksdb::Options {
|
||||
|
||||
opts
|
||||
}
|
||||
|
||||
pub(crate) fn get_db_read_options() -> rocksdb::ReadOptions {
|
||||
let mut opts = rocksdb::ReadOptions::default();
|
||||
opts.set_async_io(true);
|
||||
opts
|
||||
}
|
||||
@@ -1,14 +1,18 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::PathBuf;
|
||||
use color_eyre::eyre::eyre;
|
||||
use color_eyre::owo_colors::OwoColorize;
|
||||
use reqwest::Url;
|
||||
use color_eyre::Result;
|
||||
use color_eyre::{Report, Result};
|
||||
use colored::Colorize;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
use lazy_static::lazy_static;
|
||||
use scraper::{Html, Selector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::time::Instant;
|
||||
use crate::constants::{APP_DATA_DIR};
|
||||
use crate::crawler::Crawler;
|
||||
use crate::models::DLSiteManiax;
|
||||
|
||||
//TODO: override locale with user one
|
||||
const DLSITE_URL: &str = "https://www.dlsite.com/";
|
||||
@@ -24,6 +28,20 @@ pub struct DLSiteCrawler {
|
||||
crawler: Crawler,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, Clone)]
|
||||
pub(crate) struct DLSiteManiax {
|
||||
#[serde(rename = "work_name")]
|
||||
pub(crate) title: String,
|
||||
#[serde(rename = "work_image")]
|
||||
pub(crate) work_image_url: String,
|
||||
#[serde(rename = "dl_count")]
|
||||
pub(crate) sells_count: u32,
|
||||
#[serde(skip)]
|
||||
pub(crate) genre_ids: Vec<u16>,
|
||||
#[serde(skip)]
|
||||
pub(crate) rj_num: String,
|
||||
}
|
||||
|
||||
impl DLSiteCrawler {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
@@ -41,10 +59,35 @@ impl DLSiteCrawler {
|
||||
eyre!("Invalid numbers: {}", invalid_nums.join(", "))
|
||||
);
|
||||
}
|
||||
|
||||
let query = &format!("product_id={}", rj_nums.join(","));
|
||||
let (maniax_result, _) = self.crawler
|
||||
.get_json::<HashMap<String, DLSiteManiax>>(DLSITE_PRODUCT_API_ENDPOINT, Some(query))
|
||||
.await?;
|
||||
|
||||
Self::verify_all_works_exists(&maniax_result, rj_nums);
|
||||
|
||||
let mut tasks = FuturesUnordered::new();
|
||||
for (rj_num, mut info) in maniax_result {
|
||||
tasks.push(async {
|
||||
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
|
||||
let (_, html_result) = tokio::join!(self.save_main_image(&info, &rj_num), self.crawler.get_html(&html_path));
|
||||
let (html, _) = html_result?;
|
||||
let genres = self.get_genres(&html).await?;
|
||||
info.genre_ids = genres;
|
||||
info.rj_num = rj_num;
|
||||
Ok::<DLSiteManiax, Report>(info)
|
||||
})
|
||||
}
|
||||
let mut maniax_infos = Vec::new();
|
||||
while let Some(result) = tasks.next().await {
|
||||
maniax_infos.push(result?);
|
||||
}
|
||||
|
||||
Ok(maniax_infos)
|
||||
}
|
||||
|
||||
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) {
|
||||
let keys = maniax_result.keys()
|
||||
.map(|k| k.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
@@ -56,19 +99,6 @@ impl DLSiteCrawler {
|
||||
if !nums_diff.is_empty() {
|
||||
println!("Restricted/Removed Works: {}", nums_diff.join(", ").red());
|
||||
}
|
||||
|
||||
let mut maniax_infos = Vec::new();
|
||||
for (rj_num, mut info) in maniax_result {
|
||||
self.save_main_image(&info, &rj_num).await?;
|
||||
|
||||
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
|
||||
let (html, _) = self.crawler.get_html(&html_path).await?;
|
||||
let genres = self.get_genres(&html)?;
|
||||
info.genre_ids = genres;
|
||||
info.id = rj_num;
|
||||
maniax_infos.push(info);
|
||||
}
|
||||
Ok(maniax_infos)
|
||||
}
|
||||
|
||||
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
|
||||
@@ -85,17 +115,16 @@ impl DLSiteCrawler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_genres(&self, html: &Html) -> Result<Vec<u16>> {
|
||||
async fn get_genres(&self, html: &Html) -> Result<Vec<u16>> {
|
||||
let selector = Result::unwrap(
|
||||
Selector::parse(
|
||||
"#work_outline > tbody:nth-child(1)"
|
||||
)
|
||||
);
|
||||
let result = html.select(&selector).next().unwrap();
|
||||
let genre_row = result.child_elements()
|
||||
.filter(|e|
|
||||
e.child_elements().any(|e| e.inner_html() == "ジャンル") // TODO: will not work with english
|
||||
).next().unwrap();
|
||||
let genre_rows = result.child_elements().collect::<Vec<_>>();
|
||||
let genre_len = genre_rows.iter().count();
|
||||
let genre_row = genre_rows.iter().skip(genre_len - 2).next().unwrap();
|
||||
let data = genre_row
|
||||
.child_elements().skip(1).next().unwrap()
|
||||
.child_elements().next().unwrap();
|
||||
|
||||
@@ -2,14 +2,15 @@ use crate::constants::{APP_DB_DATA_DIR, DB_COLUMNS};
|
||||
use rocksdb::{ColumnFamilyDescriptor, IteratorMode, OptimisticTransactionDB, Options, ReadOptions};
|
||||
use serde::{Serialize};
|
||||
use serde::de::DeserializeOwned;
|
||||
use crate::models::RocksColumn;
|
||||
use crate::models::{RocksColumn, RocksReference, RocksReferences};
|
||||
use color_eyre::Result;
|
||||
|
||||
pub struct RocksDB {
|
||||
db: OptimisticTransactionDB,
|
||||
}
|
||||
|
||||
impl RocksDB {
|
||||
pub fn new(db_opts: Options, cf_opts: Options) -> color_eyre::Result<Self> {
|
||||
pub fn new(db_opts: Options, cf_opts: Options) -> Result<Self> {
|
||||
let cfs = DB_COLUMNS.iter()
|
||||
.map(|cf| ColumnFamilyDescriptor::new(cf.to_string(), cf_opts.clone()))
|
||||
.collect::<Vec<_>>();
|
||||
@@ -24,18 +25,18 @@ impl RocksDB {
|
||||
Ok(rocks)
|
||||
}
|
||||
|
||||
pub fn get_value<TValue, TColumn>(&self, id: TColumn::Id) -> color_eyre::Result<Option<TValue>>
|
||||
where TColumn: RocksColumn, TValue: DeserializeOwned
|
||||
pub fn get_value<TColumn>(&self, id: &TColumn::Id) -> Result<Option<TColumn>>
|
||||
where TColumn: RocksColumn + DeserializeOwned
|
||||
{
|
||||
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
|
||||
let query_res = self.db.get_cf(&cf, serde_json::to_string(&id)?)?;
|
||||
let query_res = self.db.get_cf(&cf, serde_json::to_string(id)?)?;
|
||||
if query_res.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(Some(serde_json::from_slice(&query_res.unwrap())?))
|
||||
}
|
||||
|
||||
pub fn set_value<TColumn>(&self, value: &TColumn) -> color_eyre::Result<()>
|
||||
pub fn set_value<TColumn>(&self, value: &TColumn) -> Result<()>
|
||||
where TColumn: RocksColumn + Serialize
|
||||
{
|
||||
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
|
||||
@@ -43,7 +44,7 @@ impl RocksDB {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_values<TColumn>(&self, ids: &[TColumn::Id]) -> color_eyre::Result<Vec<TColumn>>
|
||||
pub fn get_values<TColumn>(&self, ids: &[TColumn::Id]) -> Result<Vec<TColumn>>
|
||||
where TColumn: RocksColumn + DeserializeOwned
|
||||
{
|
||||
let transaction = self.db.transaction();
|
||||
@@ -59,26 +60,41 @@ impl RocksDB {
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
pub fn get_all_values<TColumn>(&self) -> color_eyre::Result<Vec<(TColumn::Id, TColumn)>>
|
||||
pub fn get_reference_value<TReference, TColumn>(&self, id: &TReference::Id) -> Result<Option<TReference>>
|
||||
where TReference: RocksColumn + DeserializeOwned,
|
||||
TColumn: RocksColumn + RocksReference<TReference>
|
||||
{
|
||||
let reference = self.get_value::<TReference>(id)?;
|
||||
if reference.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(Some(reference.unwrap()))
|
||||
}
|
||||
|
||||
pub fn get_reference_values<TReference, TColumn>(&self, ids: &[TReference::Id]) -> Result<Vec<TReference>>
|
||||
where TReference: RocksColumn + DeserializeOwned,
|
||||
TColumn: RocksColumn + RocksReferences<TReference>
|
||||
{
|
||||
self.get_values::<TReference>(ids)
|
||||
}
|
||||
|
||||
pub fn get_all_values<TColumn>(&self) -> Result<Vec<TColumn>>
|
||||
where TColumn: RocksColumn + DeserializeOwned
|
||||
{
|
||||
let cf = self.db.cf_handle(TColumn::get_column_name().as_str()).unwrap();
|
||||
let mut options = ReadOptions::default();
|
||||
options.set_async_io(true);
|
||||
let values = self.db.iterator_cf_opt(&cf, options, IteratorMode::Start)
|
||||
let values = self.db.iterator_cf_opt(&cf, crate::constants::get_db_read_options(), IteratorMode::Start)
|
||||
.filter_map(Result::ok)
|
||||
.map(|(k, v)|
|
||||
(
|
||||
serde_json::from_slice::<TColumn::Id>(&k).unwrap(),
|
||||
serde_json::from_slice::<TColumn>(&v).unwrap()
|
||||
)
|
||||
)
|
||||
.map(|(k, v)| {
|
||||
let id = serde_json::from_slice::<TColumn::Id>(&k).unwrap();
|
||||
let mut value = serde_json::from_slice::<TColumn>(&v).unwrap();
|
||||
value.set_id(id);
|
||||
value
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
|
||||
pub fn set_values<TColumn>(&self, values: &[TColumn]) -> color_eyre::Result<()>
|
||||
pub fn set_values<TColumn>(&mut self, values: &[TColumn]) -> Result<()>
|
||||
where TColumn: RocksColumn + Serialize
|
||||
{
|
||||
let transaction = self.db.transaction();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use ratatui::widgets::ListState;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::models::RocksColumn;
|
||||
use crate::models::{RocksColumn, RocksReference, RocksReferences};
|
||||
|
||||
pub(crate) struct GameList<T> {
|
||||
games: Vec<T>,
|
||||
@@ -9,26 +9,99 @@ pub(crate) struct GameList<T> {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct DLSiteManiax {
|
||||
#[serde(rename = "work_name")]
|
||||
pub title: String,
|
||||
#[serde(rename = "work_image")]
|
||||
pub work_image_url: String,
|
||||
#[serde(rename = "dl_count")]
|
||||
pub sells_count: u32,
|
||||
#[serde(skip)]
|
||||
pub rj_num: String,
|
||||
pub genre_ids: Vec<u16>,
|
||||
#[serde(skip)]
|
||||
pub id: String,
|
||||
pub name: String,
|
||||
pub sells_count: u32
|
||||
}
|
||||
|
||||
impl From<crate::crawler::dlsite::DLSiteManiax> for DLSiteManiax {
|
||||
fn from(value: crate::crawler::DLSiteManiax) -> Self {
|
||||
Self {
|
||||
rj_num: value.rj_num,
|
||||
genre_ids: value.genre_ids,
|
||||
name: value.title,
|
||||
sells_count: value.sells_count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RocksColumn for DLSiteManiax {
|
||||
type Id = String;
|
||||
|
||||
fn get_id(&self) -> Self::Id {
|
||||
self.id.clone()
|
||||
self.rj_num.clone()
|
||||
}
|
||||
|
||||
fn set_id(&mut self, id: Self::Id) {
|
||||
self.rj_num = id;
|
||||
}
|
||||
|
||||
fn get_column_name() -> String {
|
||||
String::from("dl_games")
|
||||
}
|
||||
}
|
||||
|
||||
impl RocksReferences<DLSiteGenre> for DLSiteManiax {
|
||||
fn get_reference_ids(&self) -> Vec<<DLSiteGenre as RocksColumn>::Id> {
|
||||
self.genre_ids.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct DLSiteGenre {
|
||||
#[serde(skip)]
|
||||
pub id: u16,
|
||||
pub category_id: u16,
|
||||
pub translations: Vec<DLSiteTranslation>
|
||||
}
|
||||
|
||||
impl RocksColumn for DLSiteGenre {
|
||||
type Id = u16;
|
||||
|
||||
fn get_id(&self) -> Self::Id {
|
||||
self.id.clone()
|
||||
}
|
||||
|
||||
fn set_id(&mut self, id: Self::Id) {
|
||||
self.id = id;
|
||||
}
|
||||
|
||||
fn get_column_name() -> String {
|
||||
String::from("dl_genres")
|
||||
}
|
||||
}
|
||||
|
||||
impl RocksReference<DLSiteCategory> for DLSiteGenre {
|
||||
fn get_reference_id(&self) -> <DLSiteCategory as RocksColumn>::Id {
|
||||
self.category_id.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct DLSiteCategory {
|
||||
#[serde(skip)]
|
||||
pub id: u16,
|
||||
pub translations: Vec<DLSiteTranslation>
|
||||
}
|
||||
|
||||
impl RocksColumn for DLSiteCategory {
|
||||
type Id = u16;
|
||||
fn get_id(&self) -> Self::Id {
|
||||
self.id.clone()
|
||||
}
|
||||
|
||||
fn set_id(&mut self, id: Self::Id) {
|
||||
self.id = id;
|
||||
}
|
||||
|
||||
fn get_column_name() -> String {
|
||||
String::from("dl_translations")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) enum DLSiteTranslation {
|
||||
EN(String), JP(String)
|
||||
}
|
||||
@@ -7,5 +7,14 @@ pub(crate) use game::*;
|
||||
pub trait RocksColumn {
|
||||
type Id: Serialize + DeserializeOwned;
|
||||
fn get_id(&self) -> Self::Id;
|
||||
fn set_id(&mut self, id: Self::Id);
|
||||
fn get_column_name() -> String;
|
||||
}
|
||||
|
||||
pub trait RocksReference<T> where T: RocksColumn {
|
||||
fn get_reference_id(&self) -> T::Id;
|
||||
}
|
||||
|
||||
pub trait RocksReferences<T> where T: RocksColumn {
|
||||
fn get_reference_ids(&self) -> Vec<T::Id>;
|
||||
}
|
||||
Reference in New Issue
Block a user