Improve genre parsing
Fix game list sorting
This commit is contained in:
@@ -5,7 +5,7 @@ edition = "2024"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
image = "0.25.9"
|
image = "0.25.9"
|
||||||
reqwest = { version = "0.12.25", features = ["json"] }
|
reqwest = { version = "0.12.25", features = ["json", "rustls-tls"] }
|
||||||
scraper = "0.25.0"
|
scraper = "0.25.0"
|
||||||
robotstxt = "0.3.0"
|
robotstxt = "0.3.0"
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet};
|
|||||||
use std::path::{PathBuf};
|
use std::path::{PathBuf};
|
||||||
use color_eyre::eyre::eyre;
|
use color_eyre::eyre::eyre;
|
||||||
use color_eyre::owo_colors::OwoColorize;
|
use color_eyre::owo_colors::OwoColorize;
|
||||||
use reqwest::{Url};
|
use reqwest::{StatusCode, Url};
|
||||||
use color_eyre::{Report, Result};
|
use color_eyre::{Report, Result};
|
||||||
use futures::stream::FuturesUnordered;
|
use futures::stream::FuturesUnordered;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
@@ -42,7 +42,7 @@ impl DLSiteCrawler {
|
|||||||
Ok(crawler)
|
Ok(crawler)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_game_infos(&self, rj_nums: Vec<String>, locale: &LanguageTag) -> Result<FuturesUnordered<impl Future<Output=Result<DLSiteManiax, Report>>>>
|
pub async fn get_game_infos(&self, rj_nums: Vec<String>, locale: &LanguageTag) -> Result<FuturesUnordered<impl Future<Output=Result<Option<DLSiteManiax>, Report>>>>
|
||||||
{
|
{
|
||||||
let invalid_nums = rj_nums.iter()
|
let invalid_nums = rj_nums.iter()
|
||||||
.filter(|&n| !is_valid_rj_number(n))
|
.filter(|&n| !is_valid_rj_number(n))
|
||||||
@@ -77,11 +77,14 @@ impl DLSiteCrawler {
|
|||||||
self.save_main_image(&info, &rj_num),
|
self.save_main_image(&info, &rj_num),
|
||||||
self.crawler.get_html(&html_path, Some(&query))
|
self.crawler.get_html(&html_path, Some(&query))
|
||||||
);
|
);
|
||||||
let (html, _) = html_result?;
|
let (html, status) = html_result?;
|
||||||
let genres = self.get_work_genres(&html, locale.try_into()?).await?;
|
if StatusCode::NOT_FOUND == status {
|
||||||
info.genre_ids = genres;
|
println!("{} is no longer available", rj_num);
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
info.genre_ids = self.get_work_genres(&html, locale.try_into()?).await?;
|
||||||
info.rj_num = rj_num;
|
info.rj_num = rj_num;
|
||||||
Ok::<DLSiteManiax, Report>(info)
|
Ok::<Option<DLSiteManiax>, Report>(Some(info))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
@@ -118,33 +121,29 @@ impl DLSiteCrawler {
|
|||||||
async fn get_work_genres(&self, html: &Html, primary_language: PrimaryLanguage) -> Result<Vec<u16>> {
|
async fn get_work_genres(&self, html: &Html, primary_language: PrimaryLanguage) -> Result<Vec<u16>> {
|
||||||
let selector = Result::unwrap(
|
let selector = Result::unwrap(
|
||||||
Selector::parse(
|
Selector::parse(
|
||||||
"#work_outline > tbody:nth-child(1)"
|
".main_genre"
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
let genre_str = match primary_language {
|
let Some(result) = html.select(&selector).next() else {
|
||||||
PrimaryLanguage::EN => "Genre",
|
return Err(eyre!("Genre is empty"));
|
||||||
PrimaryLanguage::JP => "ジャンル"
|
|
||||||
};
|
};
|
||||||
|
let mut genre_ids = Vec::new();
|
||||||
let result = html.select(&selector).next().unwrap();
|
for elem in result.child_elements() {
|
||||||
let genre_rows = result.child_elements().collect::<Vec<_>>();
|
let Some(genre_href) = elem.attr("href") else {
|
||||||
let genre_row = genre_rows.iter()
|
return Err(eyre!("Genre url is empty"));
|
||||||
.find(|v| v.first_element_child().unwrap().text().next().unwrap() == genre_str)
|
};
|
||||||
.unwrap();
|
let genre_url = Url::parse(genre_href)?;
|
||||||
let data = genre_row
|
let Some(path_segments) = genre_url.path_segments() else {
|
||||||
.child_elements().skip(1).next().unwrap()
|
return Err(eyre!("Genre url has no segment: {}", genre_href));
|
||||||
.child_elements().next().unwrap();
|
};
|
||||||
let genre_urls = data.child_elements()
|
let Some(genre_id) = genre_url.path_segments().unwrap()
|
||||||
.map(|e| e.attr("href").unwrap())
|
.into_iter()
|
||||||
.map(|s| Url::parse(s).unwrap())
|
.skip(4)
|
||||||
.collect::<Vec<_>>();
|
.next() else {
|
||||||
let genre_ids = genre_urls.iter()
|
return Err(eyre!("Invalid url: {}", genre_href));
|
||||||
.map(|x| {
|
};
|
||||||
x.path_segments().unwrap()
|
genre_ids.push(genre_id.parse::<u16>()?);
|
||||||
.skip(4).next().unwrap()
|
}
|
||||||
.parse::<u16>().unwrap()
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
Ok(genre_ids)
|
Ok(genre_ids)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,10 +6,9 @@ use crossterm::style::{style, Stylize};
|
|||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use tokio::sync::Mutex;
|
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use crawler::DLSiteCrawler;
|
use crawler::DLSiteCrawler;
|
||||||
use db::{RocksDBFactory};
|
use db::RocksDBFactory;
|
||||||
use models::config::ApplicationConfig;
|
use models::config::ApplicationConfig;
|
||||||
use models::dlsite::{DLSiteCategory, DLSiteGenre, DLSiteManiax, DLSiteTranslation};
|
use models::dlsite::{DLSiteCategory, DLSiteGenre, DLSiteManiax, DLSiteTranslation};
|
||||||
use crate::helpers;
|
use crate::helpers;
|
||||||
@@ -48,7 +47,10 @@ impl DLSiteSyncCommand {
|
|||||||
pub async fn handle(&self) -> Result<()> {
|
pub async fn handle(&self) -> Result<()> {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let app_conf = ApplicationConfig::get_config()?;
|
let app_conf = ApplicationConfig::get_config()?;
|
||||||
let db_factory = RocksDBFactory::default();
|
let mut db_factory = RocksDBFactory::default();
|
||||||
|
db_factory.register::<DLSiteManiax>();
|
||||||
|
db_factory.register::<DLSiteGenre>();
|
||||||
|
db_factory.register::<DLSiteCategory>();
|
||||||
let crawler = DLSiteCrawler::new()?;
|
let crawler = DLSiteCrawler::new()?;
|
||||||
if self.do_sync_genre {
|
if self.do_sync_genre {
|
||||||
let genre_now = Instant::now();
|
let genre_now = Instant::now();
|
||||||
@@ -125,9 +127,10 @@ impl DLSiteSyncCommand {
|
|||||||
|
|
||||||
let progress = ProgressBar::new(game_infos.len() as u64)
|
let progress = ProgressBar::new(game_infos.len() as u64)
|
||||||
.with_style(ProgressStyle::default_bar());
|
.with_style(ProgressStyle::default_bar());
|
||||||
let shared_progress = Mutex::new(progress);
|
|
||||||
while let Some(info) = game_infos.next().await {
|
while let Some(info) = game_infos.next().await {
|
||||||
let maniax = info?;
|
let Some(maniax) = info? else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
let existing_maniax = existing_game_infos.iter()
|
let existing_maniax = existing_game_infos.iter()
|
||||||
.find(|v| v.rj_num == maniax.rj_num);
|
.find(|v| v.rj_num == maniax.rj_num);
|
||||||
if let Some(existing_maniax) = existing_maniax {
|
if let Some(existing_maniax) = existing_maniax {
|
||||||
@@ -145,7 +148,6 @@ impl DLSiteSyncCommand {
|
|||||||
value.folder_path = maniax_folder;
|
value.folder_path = maniax_folder;
|
||||||
modified_maniaxes.push(value);
|
modified_maniaxes.push(value);
|
||||||
}
|
}
|
||||||
let progress = shared_progress.lock().await;
|
|
||||||
progress.inc(1);
|
progress.inc(1);
|
||||||
}
|
}
|
||||||
db.set_values(&modified_maniaxes)?;
|
db.set_values(&modified_maniaxes)?;
|
||||||
|
|||||||
@@ -39,7 +39,20 @@ enum Status {
|
|||||||
impl MainView {
|
impl MainView {
|
||||||
pub fn new(mut db_factory: RocksDBFactory) -> color_eyre::Result<Self> {
|
pub fn new(mut db_factory: RocksDBFactory) -> color_eyre::Result<Self> {
|
||||||
let db = db_factory.get_current_context()?;
|
let db = db_factory.get_current_context()?;
|
||||||
let games = db.get_all_values::<DLSiteManiax>()?;
|
let mut games = db.get_all_values::<DLSiteManiax>()?;
|
||||||
|
games.sort_by(|a, b| {
|
||||||
|
let left = a.rj_num
|
||||||
|
.chars().skip(2)
|
||||||
|
.collect::<String>()
|
||||||
|
.parse::<u32>()
|
||||||
|
.unwrap();
|
||||||
|
let right = b.rj_num
|
||||||
|
.chars().skip(2)
|
||||||
|
.collect::<String>()
|
||||||
|
.parse::<u32>()
|
||||||
|
.unwrap();
|
||||||
|
left.cmp(&right)
|
||||||
|
});
|
||||||
let dl_game_list = GameList::new(games)?;
|
let dl_game_list = GameList::new(games)?;
|
||||||
let view = Self {
|
let view = Self {
|
||||||
state: MainViewState {
|
state: MainViewState {
|
||||||
|
|||||||
Reference in New Issue
Block a user