diff --git a/.gitignore b/.gitignore index 97cc94f..bbd291d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ target .idea -games.db -*.env -diesel.toml -migrations/.* \ No newline at end of file +*.env \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 96f827f..b387dc1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "aligned-vec" version = "0.6.4" @@ -167,6 +176,24 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + [[package]] name = "bit_field" version = "0.10.3" @@ -221,6 +248,16 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "cassowary" version = "0.3.0" @@ -248,6 +285,15 @@ dependencies = [ "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-expr" version = "0.15.8" @@ -264,6 +310,17 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.5.48" @@ -343,6 +400,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "colored" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "compact_str" version = "0.8.1" @@ -495,18 +561,8 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core 0.20.11", - "darling_macro 0.20.11", -] - -[[package]] -name = "darling" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" -dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "darling_core", + "darling_macro", ] [[package]] @@ -523,38 +579,13 @@ dependencies = [ "syn", ] -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn", -] - [[package]] name = "darling_macro" version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core 0.20.11", - "quote", - "syn", -] - -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", + "darling_core", "quote", "syn", ] @@ -589,41 +620,6 @@ dependencies = [ "syn", ] -[[package]] -name = "diesel" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8496eeb328dce26ee9d9b73275d396d9bddb433fa30106cf6056dd8c3c2764c" -dependencies = [ - "diesel_derives", - "downcast-rs", - "libsqlite3-sys", - "sqlite-wasm-rs", - "time", -] - -[[package]] -name = "diesel_derives" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09af0e983035368439f1383011cd87c46f41da81d0f21dc3727e2857d5a43c8e" -dependencies = [ - "diesel_table_macro_syntax", - "dsl_auto_type", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "diesel_table_macro_syntax" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c" -dependencies = [ - "syn", -] - [[package]] name = "directories" version = "6.0.0" @@ -665,26 +661,6 @@ dependencies = [ "litrs", ] -[[package]] -name = "downcast-rs" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" - -[[package]] -name = "dsl_auto_type" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd122633e4bef06db27737f21d3738fb89c8f6d5360d6d9d7635dda142a7757e" -dependencies = [ - "darling 0.21.3", - "either", - "heck", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "dtoa" version = "1.0.10" @@ -1025,6 +1001,12 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "h2" version = "0.4.12" @@ -1384,7 +1366,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a" dependencies = [ - "darling 0.20.11", + "darling", "indoc", "proc-macro2", "quote", @@ -1507,6 +1489,16 @@ dependencies = [ "cc", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.0", +] + [[package]] name = "libredox" version = "0.1.10" @@ -1518,11 +1510,27 @@ dependencies = [ ] [[package]] -name = "libsqlite3-sys" -version = "0.35.0" +name = "librocksdb-sys" +version = "0.17.3+10.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" +checksum = "cef2a00ee60fe526157c9023edab23943fae1ce2ab6f4abb2a807c1746835de9" dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "libc", + "libz-sys", + "lz4-sys", + "zstd-sys", +] + +[[package]] +name = "libz-sys" +version = "1.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" +dependencies = [ + "cc", "pkg-config", "vcpkg", ] @@ -1584,6 +1592,16 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "mac" version = "0.1.1" @@ -2216,6 +2234,35 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "reqwest" version = "0.12.23" @@ -2284,12 +2331,28 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbc52377db80e3fec3a2c748ca603b8b6cacdd34ff89ff4b742a635361d4b4a7" +[[package]] +name = "rocksdb" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddb7af00d2b17dbd07d82c0063e25411959748ff03e8d4f96134c2ff41fce34f" +dependencies = [ + "libc", + "librocksdb-sys", +] + [[package]] name = "rustc-demangle" version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustix" version = "0.38.44" @@ -2594,21 +2657,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "sqlite-wasm-rs" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aead1c279716985b981b7940ef9b652d3f93d70a7296853c633b7ce8fa8088a" -dependencies = [ - "js-sys", - "once_cell", - "thiserror 2.0.17", - "tokio", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -2686,8 +2734,8 @@ version = "0.1.0" dependencies = [ "clap", "color-eyre", + "colored", "crossterm 0.29.0", - "diesel", "directories", "futures", "image", @@ -2696,6 +2744,7 @@ dependencies = [ "ratatui", "reqwest", "robotstxt", + "rocksdb", "scraper", "serde", "serde_json", @@ -2868,14 +2917,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", - "itoa", "libc", "num-conv", "num_threads", "powerfmt", "serde", "time-core", - "time-macros", ] [[package]] @@ -2884,16 +2931,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" -[[package]] -name = "time-macros" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" -dependencies = [ - "num-conv", - "time-core", -] - [[package]] name = "tinystr" version = "0.8.1" @@ -3690,6 +3727,16 @@ dependencies = [ "syn", ] +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "zune-core" version = "0.4.12" diff --git a/Cargo.toml b/Cargo.toml index 5dae21b..5ab6e31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ scraper = "0.24.0" rat-cursor = "1.2.1" serde_json = "1.0.145" image = "0.25.8" +colored = "3.0.0" +rocksdb = "0.24.0" [dependencies.serde] version = "1.0.228" @@ -45,7 +47,3 @@ features = ["blocking", "json"] [dependencies.tokio] version = "1.47.1" features = ["full"] - -[dependencies.diesel] -version = "2.3.2" -features = ["sqlite"] diff --git a/migrations/2025-10-06-111516-0000_dl_games/down.sql b/migrations/2025-10-06-111516-0000_dl_games/down.sql deleted file mode 100644 index 5ae38d5..0000000 --- a/migrations/2025-10-06-111516-0000_dl_games/down.sql +++ /dev/null @@ -1,2 +0,0 @@ --- This file should undo anything in `up.sql` -DROP TABLE dl_games; \ No newline at end of file diff --git a/migrations/2025-10-06-111516-0000_dl_games/up.sql b/migrations/2025-10-06-111516-0000_dl_games/up.sql deleted file mode 100644 index 9159bbf..0000000 --- a/migrations/2025-10-06-111516-0000_dl_games/up.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Your SQL goes here -CREATE TABLE dl_games ( - serial_number CHARACTER(10) NOT NULL PRIMARY KEY -) \ No newline at end of file diff --git a/src/app.rs b/src/app.rs index 5bb88c5..8b749fa 100644 --- a/src/app.rs +++ b/src/app.rs @@ -6,7 +6,6 @@ use crate::widgets::views::View; use color_eyre::Result; use crossterm::event::Event as CrosstermEvent; use crossterm::event::{Event, KeyEvent}; -use diesel::{Connection, SqliteConnection}; use rat_cursor::HasScreenCursor; use ratatui::{DefaultTerminal, Frame}; use std::any::Any; @@ -16,7 +15,6 @@ use crate::crawler::DLSITE_IMG_FOLDER; pub(crate) struct App { events: EventHandler, - db_connection: SqliteConnection, state: AppState, } @@ -27,24 +25,16 @@ struct AppState { impl App { pub async fn create() -> Result { let config = ApplicationConfig::get_config()?; - let db_conn = Self::establish_db_connection(&config); let state = AppState { view: Some(Box::new(MainView::new())), }; let app = Self { events: EventHandler::new(Duration::from_millis(config.basic_config.tick_rate)), - db_connection: db_conn, state, }; Ok(app) } - fn establish_db_connection(application_config: &ApplicationConfig) -> SqliteConnection { - let database_url = application_config.clone().basic_config.db_path; - SqliteConnection::establish(&database_url) - .unwrap_or_else(|_| panic!("Error connecting to {}", database_url)) - } - pub async fn run(mut self, terminal: &mut DefaultTerminal) -> Result<()> { loop { terminal.draw(|frame| self.draw(frame))?; diff --git a/src/cli.rs b/src/cli.rs index 297153a..79f4c13 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -3,9 +3,11 @@ use crate::config::types::ApplicationConfig; use clap::{command, Args, Command, Parser, Subcommand}; use color_eyre::Result; use ratatui::crossterm; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use color_eyre::eyre::eyre; +use colored::Colorize; use crate::crawler::DLSiteCrawler; +use crate::crawler::dlsite; // region Folder Command #[derive(Parser, Debug)] @@ -159,8 +161,45 @@ impl SyncSubCommand { impl SyncDLSiteCommand { pub async fn handle(&self) -> Result<()> { + let app_conf = ApplicationConfig::get_config()?; + Self::sync_genres(&app_conf).await?; + Self::sync_works(&app_conf).await?; + Ok(()) + } + + async fn sync_genres(app_conf: &ApplicationConfig) -> Result<()> { + Ok(()) + } + + async fn sync_works(app_conf: &ApplicationConfig) -> Result<()> { let crawler = DLSiteCrawler::new(); - crawler.get_game_info("RJ163319").await?; + let mut rj_nums: Vec = Vec::new(); + for path_str in app_conf.path_config.dlsite_paths.iter() { + let path = Path::new(path_str); + if !path.exists() { + return Err(eyre!("{} {}", path_str.blue(), "does not exist".red())); + } + let dir_paths = path.read_dir()? + .filter_map(Result::ok) + .map(|e| e.path()) + .collect::>(); + for dir_path in dir_paths.iter() { + if !dir_path.is_dir() { + println!("{dir_path:?} is not a directory"); + continue; + } + let dir_name = dir_path + .file_name().unwrap() + .to_str().unwrap(); + if !dlsite::is_valid_rj_number(dir_name) { + println!("{} {}", dir_path.to_str().unwrap().blue(), "is not a valid rj number, please add it manually".red()); + continue; + } + rj_nums.push(dir_name.to_string()); + } + } + let maniaxes = crawler.get_game_infos(rj_nums).await?; + //TODO: save into db/probably change to use jsonb Ok(()) } } diff --git a/src/crawler/dlsite.rs b/src/crawler/dlsite.rs index bccdb39..92fa1a4 100644 --- a/src/crawler/dlsite.rs +++ b/src/crawler/dlsite.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use color_eyre::eyre::eyre; use reqwest::Url; @@ -6,11 +6,13 @@ use color_eyre::Result; use lazy_static::lazy_static; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; -use crate::constants::APP_DATA_DIR; +use crate::constants::{APP_DATA_DIR}; use crate::crawler::Crawler; +//TODO: override locale with user one const DLSITE_URL: &str = "https://www.dlsite.com/"; -const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax"; +const DLSITE_PRODUCT_API_ENDPOINT: &str = "/maniax/product/info/ajax"; +const DLSITE_FILTER_OPTIONS_ENDPOINT: &str = "/maniax/fs/=/api_access/1/locale/ja_JP"; const DLSITE_MANIAX_PATH: &str = "/maniax/work/=/product_id/"; lazy_static! { pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img"); @@ -23,7 +25,8 @@ pub struct DLSiteCrawler { #[derive(Clone, Debug, Serialize, Deserialize)] pub struct DLSiteManiax { - pub work_name: String, + #[serde(rename = "work_name")] + pub title: String, #[serde(rename = "work_image")] work_image_url: String, #[serde(rename = "dl_count")] @@ -39,51 +42,56 @@ impl DLSiteCrawler { } } - fn is_valid_number(rj_num: &str) -> bool { - let len = rj_num.len(); - if len != 8 && len != 10 { - return false; + pub async fn get_game_infos(&self, rj_nums: Vec) -> Result> { + let invalid_nums = rj_nums.iter() + .filter(|&n| !is_valid_rj_number(n)) + .map(|n| n.to_string()) + .collect::>(); + if !invalid_nums.is_empty() { + return Err( + eyre!("Invalid numbers: {}", invalid_nums.join(", ")) + ); } - if !rj_num.starts_with("RJ") { - return false; + let query = &format!("product_id={}", rj_nums.join(",")); + let (maniax_result, _) = self.crawler + .get_json::>(DLSITE_PRODUCT_API_ENDPOINT, Some(query)) + .await?; + let keys = maniax_result.keys() + .map(|k| k.to_string()) + .collect::>(); + let keys_hash: HashSet = HashSet::from_iter(keys); + let nums_hash: HashSet = HashSet::from_iter(rj_nums); + let nums_diff = nums_hash.difference(&keys_hash) + .map(|n| n.to_string()) + .collect::>(); + if !nums_diff.is_empty() { + return Err(eyre!("Restricted/Removed Works: {}", nums_diff.join(", "))); } - if !rj_num.chars().skip(2).all(|c| c.is_numeric()) { - return false; + + let mut maniax_infos = Vec::new(); + for (rj_num, mut info) in maniax_result { + self.save_main_image(&info, &rj_num).await?; + + let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}"); + let (html, _) = self.crawler.get_html(&html_path).await?; + let genres = self.get_genres(&html)?; + info.genre_ids = genres; + maniax_infos.push(info); } - true - } - - pub async fn get_game_info(&self, rj_num: &str) -> Result { - if !Self::is_valid_number(rj_num) { - return Err(eyre!("Invalid number: {rj_num}")); - } - let mut api_url = self.crawler.base_url.clone(); - api_url.set_path(DLSITE_API_ENDPOINT); - api_url.set_query(Some(&format!("product_id={rj_num}"))); - let api_res = self.crawler.client.get(api_url).send().await?; - let maniax_result = match api_res.json::>().await { - Ok(maniax_result) => maniax_result, - Err(_) => return Err(eyre!("Maniax {rj_num} is restricted/removed")), - }; - let mut maniax_info = maniax_result.iter().next().unwrap().1.clone(); - - self.save_main_image(&maniax_info, rj_num).await?; - - let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}"); - let (html, _) = self.crawler.get_html(&html_path).await?; - let genres = self.get_genres(&html)?; - maniax_info.genre_ids = genres; - - Ok(maniax_info) + Ok(maniax_infos) } async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> { + let img_file_name = format!("{rj_num}.jpg"); + let img_save_path = DLSITE_IMG_FOLDER.clone().join(img_file_name); + if img_save_path.exists() { + return Ok(()); + } + let url_string = format!("https:{}", info.work_image_url); let url = Url::parse(&url_string)?; - let img_res = self.crawler.client.get(url).send().await?; - let img_bytes = img_res.bytes().await?; - let img = image::load_from_memory(&img_bytes)?; - img.save(DLSITE_IMG_FOLDER.clone().join(format!("{rj_num}.jpg")).as_path())?; + let (img, _) = self.crawler.get_img(&url).await?; + img.save(img_save_path)?; Ok(()) } @@ -96,7 +104,7 @@ impl DLSiteCrawler { let result = html.select(&selector).next().unwrap(); let genre_row = result.child_elements() .filter(|e| - e.child_elements().any(|e| e.inner_html() == "ジャンル") + e.child_elements().any(|e| e.inner_html() == "ジャンル") // TODO: will not work with english ).next().unwrap(); let data = genre_row .child_elements().skip(1).next().unwrap() @@ -114,4 +122,18 @@ impl DLSiteCrawler { .collect::>(); Ok(genre_ids) } +} + +pub fn is_valid_rj_number(rj_num: &str) -> bool { + let len = rj_num.len(); + if len != 8 && len != 10 { + return false; + } + if !rj_num.starts_with("RJ") { + return false; + } + if !rj_num.chars().skip(2).all(|c| c.is_numeric()) { + return false; + } + true } \ No newline at end of file diff --git a/src/crawler/mod.rs b/src/crawler/mod.rs index 5de643f..9e3465e 100644 --- a/src/crawler/mod.rs +++ b/src/crawler/mod.rs @@ -1,18 +1,20 @@ -mod dlsite; +pub mod dlsite; pub use dlsite::*; use color_eyre::eyre::eyre; use crate::constants::APP_CACHE_PATH; use color_eyre::Result; +use image::DynamicImage; use reqwest::{Client, StatusCode, Url}; use robotstxt::DefaultMatcher; use scraper::Html; +use serde::de::DeserializeOwned; #[derive(Clone)] struct Crawler { id: String, pub(crate) base_url: Url, - pub(crate) client: Client, + client: Client, robots_txt: Option, } @@ -32,7 +34,7 @@ impl Crawler { let is_access_allowed = matcher.one_agent_allowed_by_robots( &self.get_robots_txt().await?, "reqwest", - self.base_url.as_str(), + url.as_str(), ); if !is_access_allowed { return Err(eyre!("Crawler cannot access site {}", self.base_url.as_str())); @@ -74,4 +76,35 @@ impl Crawler { let html_text = &res.text().await?; Ok((Html::parse_document(html_text), status)) } + + pub async fn get_json(&self, path: &str, query: Option<&str>) -> Result<(T, StatusCode)> + where T : DeserializeOwned { + let mut url = self.base_url.clone(); + url.set_path(path); + url.set_query(query); + self.check_access(&url).await?; + let res = self.client.get(url).send().await?; + let status = res.status(); + let json = res.json().await?; + Ok((json, status)) + } + + pub async fn get_img(&self, url: &Url) -> Result<(DynamicImage, StatusCode)> { + self.check_access(url).await?; + let res = self.client.get(url.clone()).send().await?; + let status = res.status(); + let bytes = res.bytes().await?; + let img = image::load_from_memory(&bytes)?; + Ok((img, status)) + } + + pub async fn get_bytes(&self, path: &str) -> Result<(Vec, StatusCode)> { + let mut url = self.base_url.clone(); + url.set_path(path); + self.check_access(&url).await?; + let res = self.client.get(url).send().await?; + let status = res.status(); + let bytes = res.bytes().await?; + Ok((bytes.to_vec(), status)) + } } diff --git a/src/main.rs b/src/main.rs index a252ffb..8054051 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,6 @@ mod constants; mod crawler; mod event; mod helpers; -mod schema; mod models; mod widgets; diff --git a/src/models/game.rs b/src/models/game.rs index 42353cd..0f3a168 100644 --- a/src/models/game.rs +++ b/src/models/game.rs @@ -1,14 +1,6 @@ -use diesel::{Queryable, Selectable}; use ratatui::widgets::ListState; pub(crate) struct GameList { games: Vec, state: ListState, } - -#[derive(Queryable, Selectable)] -#[diesel(table_name = crate::schema::dl_games)] -#[diesel(check_for_backend(diesel::sqlite::Sqlite))] -pub(crate) struct DLSiteGame { - serial_number: String, -} diff --git a/src/models/mod.rs b/src/models/mod.rs index f7ee1cd..f8f629a 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -1 +1,2 @@ -pub mod game; +mod game; +pub use game::*; \ No newline at end of file diff --git a/src/schema.rs b/src/schema.rs deleted file mode 100644 index 8144187..0000000 --- a/src/schema.rs +++ /dev/null @@ -1,7 +0,0 @@ -// @generated automatically by Diesel CLI. - -diesel::table! { - dl_games (serial_number) { - serial_number -> Text, - } -}