lots of things
changed db storing method from json to message pack remove tui fix progressbar add logging
This commit is contained in:
@@ -12,10 +12,11 @@ robotstxt = "0.3.0"
|
||||
models = { path = "../models" }
|
||||
|
||||
tokio.workspace = true
|
||||
serde.workspace = true
|
||||
color-eyre.workspace = true
|
||||
lazy_static.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
log.workspace = true
|
||||
|
||||
futures = "0.3.31"
|
||||
itertools = "0.14.0"
|
||||
|
||||
@@ -16,7 +16,6 @@ use models::dlsite::{matches_primary_language, PrimaryLanguage, JP_LOCALE};
|
||||
use super::Crawler;
|
||||
use models::dlsite::crawler::*;
|
||||
|
||||
//TODO: override locale with user one
|
||||
const DLSITE_URL: &str = "https://www.dlsite.com/";
|
||||
const DLSITE_PRODUCT_API_ENDPOINT: &str = "/maniax/product/info/ajax";
|
||||
const DLSITE_FS_ENDPOINT: &str = "/maniax/fs/=/api_access/1/";
|
||||
@@ -42,7 +41,14 @@ impl DLSiteCrawler {
|
||||
Ok(crawler)
|
||||
}
|
||||
|
||||
pub async fn get_game_infos(&self, rj_nums: Vec<String>, locale: &LanguageTag) -> Result<FuturesUnordered<impl Future<Output=Result<Option<DLSiteManiax>, Report>>>>
|
||||
pub async fn get_game_infos(&self, rj_nums: Vec<String>, locale: &LanguageTag)
|
||||
-> Result<
|
||||
FuturesUnordered<
|
||||
impl Future<
|
||||
Output = Result<(DLSiteManiax, bool)>
|
||||
>
|
||||
>
|
||||
>
|
||||
{
|
||||
let invalid_nums = rj_nums.iter()
|
||||
.filter(|&n| !is_valid_rj_number(n))
|
||||
@@ -65,8 +71,9 @@ impl DLSiteCrawler {
|
||||
// try to catch '[]' empty result from the api
|
||||
let value_downcast_result: Result<HashMap<String, DLSiteManiax>, _> = serde_json::from_value(value);
|
||||
let maniax_result = value_downcast_result.unwrap_or(HashMap::new());
|
||||
|
||||
Self::verify_all_works_exists(&maniax_result, rj_nums);
|
||||
if let Err(e) = Self::verify_all_works_exists(&maniax_result, rj_nums) {
|
||||
println!("{}", e);
|
||||
}
|
||||
|
||||
let tasks = FuturesUnordered::new();
|
||||
for (rj_num, mut info) in maniax_result {
|
||||
@@ -78,19 +85,18 @@ impl DLSiteCrawler {
|
||||
self.crawler.get_html(&html_path, Some(&query))
|
||||
);
|
||||
let (html, status) = html_result?;
|
||||
info.rj_num = rj_num;
|
||||
if StatusCode::NOT_FOUND == status {
|
||||
println!("{} is no longer available", rj_num);
|
||||
return Ok(None);
|
||||
return Ok((info, false));
|
||||
}
|
||||
info.genre_ids = self.get_work_genres(&html, locale.try_into()?).await?;
|
||||
info.rj_num = rj_num;
|
||||
Ok::<Option<DLSiteManiax>, Report>(Some(info))
|
||||
Ok::<(DLSiteManiax, bool), Report>((info, true))
|
||||
})
|
||||
}
|
||||
Ok(tasks)
|
||||
}
|
||||
|
||||
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) {
|
||||
fn verify_all_works_exists(maniax_result: &HashMap<String, DLSiteManiax>, rj_nums: Vec<String>) -> Result<()> {
|
||||
let keys = maniax_result.keys()
|
||||
.map(|k| k.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
@@ -100,8 +106,9 @@ impl DLSiteCrawler {
|
||||
.map(|n| n.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
if !nums_diff.is_empty() {
|
||||
println!("Restricted/Removed Works: {}", nums_diff.join(", ").red());
|
||||
return Err(eyre!("Restricted/Removed Works: {}", nums_diff.join(", ").red()));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
|
||||
@@ -133,9 +140,6 @@ impl DLSiteCrawler {
|
||||
return Err(eyre!("Genre url is empty"));
|
||||
};
|
||||
let genre_url = Url::parse(genre_href)?;
|
||||
let Some(path_segments) = genre_url.path_segments() else {
|
||||
return Err(eyre!("Genre url has no segment: {}", genre_href));
|
||||
};
|
||||
let Some(genre_id) = genre_url.path_segments().unwrap()
|
||||
.into_iter()
|
||||
.skip(4)
|
||||
|
||||
Reference in New Issue
Block a user