Add dlsite cli add folder

This commit is contained in:
2025-10-10 23:57:29 +08:00
parent 473ef8452b
commit 4a0fa965c6
11 changed files with 443 additions and 50 deletions

10
src/crawler/dlsite.rs Normal file
View File

@@ -0,0 +1,10 @@
use crate::crawler::Crawler;
#[derive(Clone)]
pub(crate) struct DLSiteCrawler {
crawler: Crawler,
}
impl DLSiteCrawler {
}

View File

@@ -1,6 +1,9 @@
mod dlsite;
use reqwest::{Client, Url};
use robotstxt::{DefaultMatcher};
use color_eyre::Result;
use scraper::Html;
use crate::constants::{APP_CACHE_PATH};
#[derive(Clone)]
@@ -20,8 +23,10 @@ impl Crawler {
base_url,
};
let mut matcher = DefaultMatcher::default();
let access_allowed = matcher.one_agent_allowed_by_robots(&crawler.robots_txt, "reqwest", crawler.base_url.as_str());
assert_eq!(true, access_allowed);
let is_access_allowed = matcher.one_agent_allowed_by_robots(&crawler.robots_txt, "reqwest", crawler.base_url.as_str());
if !is_access_allowed {
panic!("Crawler cannot access site {}", crawler.base_url.as_str());
}
crawler
}
@@ -43,4 +48,11 @@ impl Crawler {
Ok(tokio::fs::read_to_string(&local_robots_path).await?)
}
}
pub async fn get_html(&self, path: &str) -> Result<Html> {
let mut url = self.base_url.clone();
url.set_path(path);
let html_text = &self.client.get(url).send().await?.text().await?;
Ok(Html::parse_document(html_text))
}
}