change robots.txt to cache

This commit is contained in:
fromost
2025-10-09 17:19:04 +08:00
parent 2556781b45
commit 6f8a1eada1
3 changed files with 9 additions and 9 deletions

View File

@@ -1,9 +1,6 @@
use std::fs;
use reqwest::{Client, Url};
use robotstxt::DefaultMatcher;
use robotstxt::matcher::{LongestMatchRobotsMatchStrategy, RobotsMatcher};
use crate::constants::APP_DATA_DIR;
use crate::crawler;
use reqwest::{Client, ClientBuilder, Url};
use robotstxt::{DefaultMatcher, RobotsParseHandler};
use crate::constants::{APP_CACHE_PATH, APP_DATA_DIR};
pub(crate) struct Crawler {
id: String,
@@ -27,7 +24,8 @@ impl Crawler {
}
async fn get_robots_txt(id: &str, base_url: &Url) -> String {
let local_robots_path = APP_DATA_DIR.clone().join(id).join("robots.txt");
let local_robots_path = APP_CACHE_PATH.clone()
.join(id).join("robots.txt");
if !local_robots_path.exists() {
let mut robots_url = base_url.clone();
robots_url.set_path("/robots.txt");