diff --git a/src/cli.rs b/src/cli.rs index 98dc1c3..297153a 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,6 +1,5 @@ use crate::app; use crate::config::types::ApplicationConfig; -use crate::constants::APP_CONIFG_FILE_PATH; use clap::{command, Args, Command, Parser, Subcommand}; use color_eyre::Result; use ratatui::crossterm; @@ -168,7 +167,7 @@ impl SyncDLSiteCommand { impl FolderAddCommand { pub async fn handle(&self) -> Result<()> { - let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?; + let mut config = ApplicationConfig::get_config()?; let path = PathBuf::from(&self.path); let abs_path = path.canonicalize()?; if !abs_path.is_dir() { diff --git a/src/config/mod.rs b/src/config/mod.rs index e15d764..cda74c7 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -15,13 +15,13 @@ impl ApplicationConfig { } } - pub fn from_file(path: &PathBuf) -> Result { + fn from_file(path: &PathBuf) -> Result { let reader = std::fs::File::open(path)?; let result = serde_json::from_reader(reader)?; Ok(result) } - pub fn new() -> Self { + fn new() -> Self { let conf = Self { basic_config: BasicConfig { db_path: APP_DATA_DIR diff --git a/src/crawler/dlsite.rs b/src/crawler/dlsite.rs index 8adf557..bccdb39 100644 --- a/src/crawler/dlsite.rs +++ b/src/crawler/dlsite.rs @@ -4,12 +4,14 @@ use color_eyre::eyre::eyre; use reqwest::Url; use color_eyre::Result; use lazy_static::lazy_static; +use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; use crate::constants::APP_DATA_DIR; use crate::crawler::Crawler; const DLSITE_URL: &str = "https://www.dlsite.com/"; const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax"; +const DLSITE_MANIAX_PATH: &str = "/maniax/work/=/product_id/"; lazy_static! { pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img"); } @@ -25,7 +27,9 @@ pub struct DLSiteManiax { #[serde(rename = "work_image")] work_image_url: String, #[serde(rename = "dl_count")] - pub sells_count: u32 + pub sells_count: u32, + #[serde(skip)] + pub genre_ids: Vec } impl DLSiteCrawler { @@ -51,18 +55,25 @@ impl DLSiteCrawler { pub async fn get_game_info(&self, rj_num: &str) -> Result { if !Self::is_valid_number(rj_num) { - return Err(eyre!("Invalid number: {}", rj_num)); + return Err(eyre!("Invalid number: {rj_num}")); } let mut api_url = self.crawler.base_url.clone(); api_url.set_path(DLSITE_API_ENDPOINT); - api_url.set_query(Some(&format!("product_id={}", rj_num))); - let res = self.crawler.client.get(api_url).send().await?; - let maniax_result = match res.json::>().await { + api_url.set_query(Some(&format!("product_id={rj_num}"))); + let api_res = self.crawler.client.get(api_url).send().await?; + let maniax_result = match api_res.json::>().await { Ok(maniax_result) => maniax_result, - Err(_) => return Err(eyre!("Maniax {} is restricted/removed", rj_num)), + Err(_) => return Err(eyre!("Maniax {rj_num} is restricted/removed")), }; - let maniax_info = maniax_result.iter().next().unwrap().1.clone(); + let mut maniax_info = maniax_result.iter().next().unwrap().1.clone(); + self.save_main_image(&maniax_info, rj_num).await?; + + let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}"); + let (html, _) = self.crawler.get_html(&html_path).await?; + let genres = self.get_genres(&html)?; + maniax_info.genre_ids = genres; + Ok(maniax_info) } @@ -72,7 +83,35 @@ impl DLSiteCrawler { let img_res = self.crawler.client.get(url).send().await?; let img_bytes = img_res.bytes().await?; let img = image::load_from_memory(&img_bytes)?; - img.save(DLSITE_IMG_FOLDER.clone().join(format!("{}.jpg", rj_num)).as_path())?; + img.save(DLSITE_IMG_FOLDER.clone().join(format!("{rj_num}.jpg")).as_path())?; Ok(()) } + + fn get_genres(&self, html: &Html) -> Result> { + let selector = Result::unwrap( + Selector::parse( + "#work_outline > tbody:nth-child(1)" + ) + ); + let result = html.select(&selector).next().unwrap(); + let genre_row = result.child_elements() + .filter(|e| + e.child_elements().any(|e| e.inner_html() == "ジャンル") + ).next().unwrap(); + let data = genre_row + .child_elements().skip(1).next().unwrap() + .child_elements().next().unwrap(); + let genre_urls = data.child_elements() + .map(|e| e.attr("href").unwrap()) + .map(|s| Url::parse(s).unwrap()) + .collect::>(); + let genre_ids = genre_urls.iter() + .map(|x| { + x.path_segments().unwrap() + .skip(4).next().unwrap() + .parse::().unwrap() + }) + .collect::>(); + Ok(genre_ids) + } } \ No newline at end of file