From 6f8a1eada15360f9a84f5875d2a28e57e600bb99 Mon Sep 17 00:00:00 2001 From: fromost Date: Thu, 9 Oct 2025 17:19:04 +0800 Subject: [PATCH] change robots.txt to cache --- src/config/mod.rs | 4 ++-- src/constants.rs | 2 ++ src/crawler/mod.rs | 12 +++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index aa3d599..59810f0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,7 +1,7 @@ use std::path::{PathBuf}; use ini::Ini; use crate::config::types::{ApplicationConfig, BasicConfig}; -use crate::constants::{APP_CONFIG_DIR, APP_CONIFG_FILE_PATH}; +use crate::constants::{APP_CONFIG_DIR, APP_CONIFG_FILE_PATH, APP_DATA_DIR}; pub mod types; @@ -21,7 +21,7 @@ impl ApplicationConfig { pub fn new() -> Self { let conf = Self { basic_config: BasicConfig { - db_path: APP_CONFIG_DIR.clone().join("games.db").to_str().unwrap().to_string(), + db_path: APP_DATA_DIR.clone().join("games.db").to_str().unwrap().to_string(), tick_rate: 250 } }; diff --git a/src/constants.rs b/src/constants.rs index 2dd2da1..7eb462a 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -9,6 +9,8 @@ lazy_static!( .join(APP_DIR_NAME); pub static ref APP_DATA_DIR: PathBuf = BASE_DIRS.data_dir().to_path_buf() .join(APP_DIR_NAME); + pub static ref APP_CACHE_PATH: PathBuf = BASE_DIRS.cache_dir().to_path_buf() + .join(APP_DIR_NAME); pub static ref APP_CONIFG_FILE_PATH: PathBuf = APP_CONFIG_DIR.clone() .join("config.ini"); ); \ No newline at end of file diff --git a/src/crawler/mod.rs b/src/crawler/mod.rs index ec994a8..aa38899 100644 --- a/src/crawler/mod.rs +++ b/src/crawler/mod.rs @@ -1,9 +1,6 @@ -use std::fs; -use reqwest::{Client, Url}; -use robotstxt::DefaultMatcher; -use robotstxt::matcher::{LongestMatchRobotsMatchStrategy, RobotsMatcher}; -use crate::constants::APP_DATA_DIR; -use crate::crawler; +use reqwest::{Client, ClientBuilder, Url}; +use robotstxt::{DefaultMatcher, RobotsParseHandler}; +use crate::constants::{APP_CACHE_PATH, APP_DATA_DIR}; pub(crate) struct Crawler { id: String, @@ -27,7 +24,8 @@ impl Crawler { } async fn get_robots_txt(id: &str, base_url: &Url) -> String { - let local_robots_path = APP_DATA_DIR.clone().join(id).join("robots.txt"); + let local_robots_path = APP_CACHE_PATH.clone() + .join(id).join("robots.txt"); if !local_robots_path.exists() { let mut robots_url = base_url.clone(); robots_url.set_path("/robots.txt");