Add dlsite cli add folder
This commit is contained in:
10
src/crawler/dlsite.rs
Normal file
10
src/crawler/dlsite.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
use crate::crawler::Crawler;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct DLSiteCrawler {
|
||||
crawler: Crawler,
|
||||
}
|
||||
|
||||
impl DLSiteCrawler {
|
||||
|
||||
}
|
||||
@@ -1,6 +1,9 @@
|
||||
mod dlsite;
|
||||
|
||||
use reqwest::{Client, Url};
|
||||
use robotstxt::{DefaultMatcher};
|
||||
use color_eyre::Result;
|
||||
use scraper::Html;
|
||||
use crate::constants::{APP_CACHE_PATH};
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -20,8 +23,10 @@ impl Crawler {
|
||||
base_url,
|
||||
};
|
||||
let mut matcher = DefaultMatcher::default();
|
||||
let access_allowed = matcher.one_agent_allowed_by_robots(&crawler.robots_txt, "reqwest", crawler.base_url.as_str());
|
||||
assert_eq!(true, access_allowed);
|
||||
let is_access_allowed = matcher.one_agent_allowed_by_robots(&crawler.robots_txt, "reqwest", crawler.base_url.as_str());
|
||||
if !is_access_allowed {
|
||||
panic!("Crawler cannot access site {}", crawler.base_url.as_str());
|
||||
}
|
||||
crawler
|
||||
}
|
||||
|
||||
@@ -43,4 +48,11 @@ impl Crawler {
|
||||
Ok(tokio::fs::read_to_string(&local_robots_path).await?)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_html(&self, path: &str) -> Result<Html> {
|
||||
let mut url = self.base_url.clone();
|
||||
url.set_path(path);
|
||||
let html_text = &self.client.get(url).send().await?.text().await?;
|
||||
Ok(Html::parse_document(html_text))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user