change robots.txt to cache
This commit is contained in:
@@ -1,9 +1,6 @@
|
||||
use std::fs;
|
||||
use reqwest::{Client, Url};
|
||||
use robotstxt::DefaultMatcher;
|
||||
use robotstxt::matcher::{LongestMatchRobotsMatchStrategy, RobotsMatcher};
|
||||
use crate::constants::APP_DATA_DIR;
|
||||
use crate::crawler;
|
||||
use reqwest::{Client, ClientBuilder, Url};
|
||||
use robotstxt::{DefaultMatcher, RobotsParseHandler};
|
||||
use crate::constants::{APP_CACHE_PATH, APP_DATA_DIR};
|
||||
|
||||
pub(crate) struct Crawler {
|
||||
id: String,
|
||||
@@ -27,7 +24,8 @@ impl Crawler {
|
||||
}
|
||||
|
||||
async fn get_robots_txt(id: &str, base_url: &Url) -> String {
|
||||
let local_robots_path = APP_DATA_DIR.clone().join(id).join("robots.txt");
|
||||
let local_robots_path = APP_CACHE_PATH.clone()
|
||||
.join(id).join("robots.txt");
|
||||
if !local_robots_path.exists() {
|
||||
let mut robots_url = base_url.clone();
|
||||
robots_url.set_path("/robots.txt");
|
||||
|
||||
Reference in New Issue
Block a user