diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml index ce9475e..6c85eaf 100755 --- a/crawler/Cargo.toml +++ b/crawler/Cargo.toml @@ -5,7 +5,7 @@ edition = "2024" [dependencies] image = "0.25.9" -reqwest = { version = "0.12.25", features = ["json"] } +reqwest = { version = "0.12.25", features = ["json", "rustls-tls"] } scraper = "0.25.0" robotstxt = "0.3.0" diff --git a/crawler/src/dlsite.rs b/crawler/src/dlsite.rs index a16e372..8c3c435 100755 --- a/crawler/src/dlsite.rs +++ b/crawler/src/dlsite.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet}; use std::path::{PathBuf}; use color_eyre::eyre::eyre; use color_eyre::owo_colors::OwoColorize; -use reqwest::{Url}; +use reqwest::{StatusCode, Url}; use color_eyre::{Report, Result}; use futures::stream::FuturesUnordered; use itertools::Itertools; @@ -42,7 +42,7 @@ impl DLSiteCrawler { Ok(crawler) } - pub async fn get_game_infos(&self, rj_nums: Vec, locale: &LanguageTag) -> Result>>> + pub async fn get_game_infos(&self, rj_nums: Vec, locale: &LanguageTag) -> Result, Report>>>> { let invalid_nums = rj_nums.iter() .filter(|&n| !is_valid_rj_number(n)) @@ -77,11 +77,14 @@ impl DLSiteCrawler { self.save_main_image(&info, &rj_num), self.crawler.get_html(&html_path, Some(&query)) ); - let (html, _) = html_result?; - let genres = self.get_work_genres(&html, locale.try_into()?).await?; - info.genre_ids = genres; + let (html, status) = html_result?; + if StatusCode::NOT_FOUND == status { + println!("{} is no longer available", rj_num); + return Ok(None); + } + info.genre_ids = self.get_work_genres(&html, locale.try_into()?).await?; info.rj_num = rj_num; - Ok::(info) + Ok::, Report>(Some(info)) }) } Ok(tasks) @@ -118,33 +121,29 @@ impl DLSiteCrawler { async fn get_work_genres(&self, html: &Html, primary_language: PrimaryLanguage) -> Result> { let selector = Result::unwrap( Selector::parse( - "#work_outline > tbody:nth-child(1)" + ".main_genre" ) ); - let genre_str = match primary_language { - PrimaryLanguage::EN => "Genre", - PrimaryLanguage::JP => "ジャンル" + let Some(result) = html.select(&selector).next() else { + return Err(eyre!("Genre is empty")); }; - - let result = html.select(&selector).next().unwrap(); - let genre_rows = result.child_elements().collect::>(); - let genre_row = genre_rows.iter() - .find(|v| v.first_element_child().unwrap().text().next().unwrap() == genre_str) - .unwrap(); - let data = genre_row - .child_elements().skip(1).next().unwrap() - .child_elements().next().unwrap(); - let genre_urls = data.child_elements() - .map(|e| e.attr("href").unwrap()) - .map(|s| Url::parse(s).unwrap()) - .collect::>(); - let genre_ids = genre_urls.iter() - .map(|x| { - x.path_segments().unwrap() - .skip(4).next().unwrap() - .parse::().unwrap() - }) - .collect::>(); + let mut genre_ids = Vec::new(); + for elem in result.child_elements() { + let Some(genre_href) = elem.attr("href") else { + return Err(eyre!("Genre url is empty")); + }; + let genre_url = Url::parse(genre_href)?; + let Some(path_segments) = genre_url.path_segments() else { + return Err(eyre!("Genre url has no segment: {}", genre_href)); + }; + let Some(genre_id) = genre_url.path_segments().unwrap() + .into_iter() + .skip(4) + .next() else { + return Err(eyre!("Invalid url: {}", genre_href)); + }; + genre_ids.push(genre_id.parse::()?); + } Ok(genre_ids) } } diff --git a/ui/src/cli/sync.rs b/ui/src/cli/sync.rs index bda4418..805c6d3 100755 --- a/ui/src/cli/sync.rs +++ b/ui/src/cli/sync.rs @@ -6,10 +6,9 @@ use crossterm::style::{style, Stylize}; use futures::StreamExt; use indicatif::{ProgressBar, ProgressStyle}; use itertools::Itertools; -use tokio::sync::Mutex; use tokio::time::Instant; use crawler::DLSiteCrawler; -use db::{RocksDBFactory}; +use db::RocksDBFactory; use models::config::ApplicationConfig; use models::dlsite::{DLSiteCategory, DLSiteGenre, DLSiteManiax, DLSiteTranslation}; use crate::helpers; @@ -48,7 +47,10 @@ impl DLSiteSyncCommand { pub async fn handle(&self) -> Result<()> { let now = Instant::now(); let app_conf = ApplicationConfig::get_config()?; - let db_factory = RocksDBFactory::default(); + let mut db_factory = RocksDBFactory::default(); + db_factory.register::(); + db_factory.register::(); + db_factory.register::(); let crawler = DLSiteCrawler::new()?; if self.do_sync_genre { let genre_now = Instant::now(); @@ -125,9 +127,10 @@ impl DLSiteSyncCommand { let progress = ProgressBar::new(game_infos.len() as u64) .with_style(ProgressStyle::default_bar()); - let shared_progress = Mutex::new(progress); while let Some(info) = game_infos.next().await { - let maniax = info?; + let Some(maniax) = info? else { + continue; + }; let existing_maniax = existing_game_infos.iter() .find(|v| v.rj_num == maniax.rj_num); if let Some(existing_maniax) = existing_maniax { @@ -145,7 +148,6 @@ impl DLSiteSyncCommand { value.folder_path = maniax_folder; modified_maniaxes.push(value); } - let progress = shared_progress.lock().await; progress.inc(1); } db.set_values(&modified_maniaxes)?; diff --git a/ui/src/widgets/views/main_view.rs b/ui/src/widgets/views/main_view.rs index de44cf2..73a9568 100755 --- a/ui/src/widgets/views/main_view.rs +++ b/ui/src/widgets/views/main_view.rs @@ -39,7 +39,20 @@ enum Status { impl MainView { pub fn new(mut db_factory: RocksDBFactory) -> color_eyre::Result { let db = db_factory.get_current_context()?; - let games = db.get_all_values::()?; + let mut games = db.get_all_values::()?; + games.sort_by(|a, b| { + let left = a.rj_num + .chars().skip(2) + .collect::() + .parse::() + .unwrap(); + let right = b.rj_num + .chars().skip(2) + .collect::() + .parse::() + .unwrap(); + left.cmp(&right) + }); let dl_game_list = GameList::new(games)?; let view = Self { state: MainViewState {