lots of things

changed db storing method from json to message pack
remove tui
fix progressbar
add logging
This commit is contained in:
William
2026-01-08 22:15:45 +08:00
parent 743261ce4f
commit eb9dcd15e8
23 changed files with 100 additions and 896 deletions

View File

@@ -12,10 +12,11 @@ robotstxt = "0.3.0"
models = { path = "../models" }
tokio.workspace = true
serde.workspace = true
color-eyre.workspace = true
lazy_static.workspace = true
serde.workspace = true
serde_json.workspace = true
log.workspace = true
futures = "0.3.31"
itertools = "0.14.0"

View File

@@ -16,7 +16,6 @@ use models::dlsite::{matches_primary_language, PrimaryLanguage, JP_LOCALE};
use super::Crawler;
use models::dlsite::crawler::*;
//TODO: override locale with user one
const DLSITE_URL: &str = "https://www.dlsite.com/";
const DLSITE_PRODUCT_API_ENDPOINT: &str = "/maniax/product/info/ajax";
const DLSITE_FS_ENDPOINT: &str = "/maniax/fs/=/api_access/1/";
@@ -42,7 +41,14 @@ impl DLSiteCrawler {
Ok(crawler)
}
pub async fn get_game_infos(&self, rj_nums: Vec<String>, locale: &LanguageTag) -> Result<FuturesUnordered<impl Future<Output=Result<Option<DLSiteManiax>, Report>>>>
pub async fn get_game_infos(&self, rj_nums: Vec<String>, locale: &LanguageTag)
-> Result<
FuturesUnordered<
impl Future<
Output = Result<(DLSiteManiax, bool)>
>
>
>
{
let invalid_nums = rj_nums.iter()
.filter(|&n| !is_valid_rj_number(n))
@@ -65,8 +71,9 @@ impl DLSiteCrawler {
// try to catch '[]' empty result from the api
let value_downcast_result: Result<HashMap<String, DLSiteManiax>, _> = serde_json::from_value(value);
let maniax_result = value_downcast_result.unwrap_or(HashMap::new());
Self::verify_all_works_exists(&maniax_result, rj_nums);
if let Err(e) = Self::verify_all_works_exists(&maniax_result, rj_nums) {
println!("{}", e);
}
let tasks = FuturesUnordered::new();
for (rj_num, mut info) in maniax_result {
@@ -78,19 +85,18 @@ impl DLSiteCrawler {
self.crawler.get_html(&html_path, Some(&query))
);
let (html, status) = html_result?;
info.rj_num = rj_num;
if StatusCode::NOT_FOUND == status {
println!("{} is no longer available", rj_num);
return Ok(None);
return Ok((info, false));
}
info.genre_ids = self.get_work_genres(&html, locale.try_into()?).await?;
info.rj_num = rj_num;
Ok::<Option<DLSiteManiax>, Report>(Some(info))
Ok::<(DLSiteManiax, bool), Report>((info, true))
})
}
Ok(tasks)
}
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) {
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) -> Result<()> {
let keys = maniax_result.keys()
.map(|k| k.to_string())
.collect::<Vec<String>>();
@@ -100,8 +106,9 @@ impl DLSiteCrawler {
.map(|n| n.to_string())
.collect::<Vec<String>>();
if !nums_diff.is_empty() {
println!("Restricted/Removed Works: {}", nums_diff.join(", ").red());
return Err(eyre!("Restricted/Removed Works: {}", nums_diff.join(", ").red()));
}
Ok(())
}
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
@@ -133,9 +140,6 @@ impl DLSiteCrawler {
return Err(eyre!("Genre url is empty"));
};
let genre_url = Url::parse(genre_href)?;
let Some(path_segments) = genre_url.path_segments() else {
return Err(eyre!("Genre url has no segment: {}", genre_href));
};
let Some(genre_id) = genre_url.path_segments().unwrap()
.into_iter()
.skip(4)