Add basic dlsite crawler
Reformat application config
This commit is contained in:
788
Cargo.lock
generated
788
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -9,14 +9,13 @@ edition = "2024"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
color-eyre = "0.6.3"
|
color-eyre = "0.6.3"
|
||||||
futures = "0.3.28"
|
futures = "0.3.28"
|
||||||
tokio-util = "0.7.9"
|
|
||||||
tokio-utils = "0.1.2"
|
|
||||||
directories = "6.0.0"
|
directories = "6.0.0"
|
||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
robotstxt = "0.3.0"
|
robotstxt = "0.3.0"
|
||||||
scraper = "0.24.0"
|
scraper = "0.24.0"
|
||||||
rat-cursor = "1.2.1"
|
rat-cursor = "1.2.1"
|
||||||
serde_json = "1.0.145"
|
serde_json = "1.0.145"
|
||||||
|
image = "0.25.8"
|
||||||
|
|
||||||
[dependencies.serde]
|
[dependencies.serde]
|
||||||
version = "1.0.228"
|
version = "1.0.228"
|
||||||
@@ -41,7 +40,7 @@ features = ["derive", "cargo"]
|
|||||||
|
|
||||||
[dependencies.reqwest]
|
[dependencies.reqwest]
|
||||||
version = "0.12.23"
|
version = "0.12.23"
|
||||||
features = ["blocking"]
|
features = ["blocking", "json"]
|
||||||
|
|
||||||
[dependencies.tokio]
|
[dependencies.tokio]
|
||||||
version = "1.47.1"
|
version = "1.47.1"
|
||||||
@@ -50,11 +49,3 @@ features = ["full"]
|
|||||||
[dependencies.diesel]
|
[dependencies.diesel]
|
||||||
version = "2.3.2"
|
version = "2.3.2"
|
||||||
features = ["sqlite"]
|
features = ["sqlite"]
|
||||||
|
|
||||||
[dependencies.libsqlite3-sys]
|
|
||||||
version = "0.35.0"
|
|
||||||
features = ["bundled"]
|
|
||||||
|
|
||||||
[dependencies.uuid]
|
|
||||||
version = "1.18.1"
|
|
||||||
features = ["v4"]
|
|
||||||
|
|||||||
54
src/app.rs
54
src/app.rs
@@ -1,5 +1,5 @@
|
|||||||
use crate::config::types::ApplicationConfig;
|
use crate::config::types::ApplicationConfig;
|
||||||
use crate::constants::{APP_CONFIG_DIR, APP_CONIFG_FILE_PATH, APP_DATA_DIR};
|
use crate::constants::{APP_CONFIG_DIR, APP_DATA_DIR};
|
||||||
use crate::event::{AppEvent, EventHandler};
|
use crate::event::{AppEvent, EventHandler};
|
||||||
use crate::widgets::views::MainView;
|
use crate::widgets::views::MainView;
|
||||||
use crate::widgets::views::View;
|
use crate::widgets::views::View;
|
||||||
@@ -11,11 +11,12 @@ use rat_cursor::HasScreenCursor;
|
|||||||
use ratatui::{DefaultTerminal, Frame};
|
use ratatui::{DefaultTerminal, Frame};
|
||||||
use std::any::Any;
|
use std::any::Any;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
use tokio::fs;
|
||||||
|
use crate::crawler::DLSITE_IMG_FOLDER;
|
||||||
|
|
||||||
pub(crate) struct App {
|
pub(crate) struct App {
|
||||||
events: EventHandler,
|
events: EventHandler,
|
||||||
db_connection: SqliteConnection,
|
db_connection: SqliteConnection,
|
||||||
app_config: ApplicationConfig,
|
|
||||||
state: AppState,
|
state: AppState,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -24,36 +25,22 @@ struct AppState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl App {
|
impl App {
|
||||||
pub async fn create() -> Self {
|
pub async fn create() -> Result<Self> {
|
||||||
let app_conf = if APP_CONIFG_FILE_PATH.exists() {
|
let config = ApplicationConfig::get_config()?;
|
||||||
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH).unwrap()
|
let db_conn = Self::establish_db_connection(&config);
|
||||||
} else {
|
|
||||||
ApplicationConfig::new()
|
|
||||||
};
|
|
||||||
Self::initialize_folders();
|
|
||||||
let db_conn = Self::establish_db_connection(app_conf.clone());
|
|
||||||
let state = AppState {
|
let state = AppState {
|
||||||
view: Some(Box::new(MainView::new(&app_conf))),
|
view: Some(Box::new(MainView::new())),
|
||||||
};
|
};
|
||||||
Self {
|
let app = Self {
|
||||||
events: EventHandler::new(Duration::from_millis(app_conf.basic_config.tick_rate)),
|
events: EventHandler::new(Duration::from_millis(config.basic_config.tick_rate)),
|
||||||
db_connection: db_conn,
|
db_connection: db_conn,
|
||||||
app_config: app_conf,
|
|
||||||
state,
|
state,
|
||||||
}
|
};
|
||||||
|
Ok(app)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn initialize_folders() {
|
fn establish_db_connection(application_config: &ApplicationConfig) -> SqliteConnection {
|
||||||
if !APP_CONFIG_DIR.exists() {
|
let database_url = application_config.clone().basic_config.db_path;
|
||||||
std::fs::create_dir_all(APP_CONFIG_DIR.as_path()).unwrap();
|
|
||||||
}
|
|
||||||
if !APP_DATA_DIR.exists() {
|
|
||||||
std::fs::create_dir_all(APP_DATA_DIR.as_path()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn establish_db_connection(application_config: ApplicationConfig) -> SqliteConnection {
|
|
||||||
let database_url = application_config.basic_config.db_path;
|
|
||||||
SqliteConnection::establish(&database_url)
|
SqliteConnection::establish(&database_url)
|
||||||
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
|
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
|
||||||
}
|
}
|
||||||
@@ -105,7 +92,7 @@ impl App {
|
|||||||
if let Some(view) = self.state.view.as_mut() {
|
if let Some(view) = self.state.view.as_mut() {
|
||||||
if let Some(main_view) = view.downcast_mut::<MainView>() {
|
if let Some(main_view) = view.downcast_mut::<MainView>() {
|
||||||
frame.render_stateful_widget(
|
frame.render_stateful_widget(
|
||||||
MainView::new(&self.app_config),
|
MainView::new(),
|
||||||
frame.area(),
|
frame.area(),
|
||||||
&mut main_view.state,
|
&mut main_view.state,
|
||||||
);
|
);
|
||||||
@@ -116,3 +103,16 @@ impl App {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn initialize_folders() -> Result<()> {
|
||||||
|
if !APP_CONFIG_DIR.exists() {
|
||||||
|
fs::create_dir_all(APP_CONFIG_DIR.as_path()).await?;
|
||||||
|
}
|
||||||
|
if !APP_DATA_DIR.exists() {
|
||||||
|
fs::create_dir_all(APP_DATA_DIR.as_path()).await?;
|
||||||
|
}
|
||||||
|
if !DLSITE_IMG_FOLDER.exists() {
|
||||||
|
fs::create_dir_all(DLSITE_IMG_FOLDER.as_path()).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|||||||
63
src/cli.rs
63
src/cli.rs
@@ -6,7 +6,9 @@ use color_eyre::Result;
|
|||||||
use ratatui::crossterm;
|
use ratatui::crossterm;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use color_eyre::eyre::eyre;
|
use color_eyre::eyre::eyre;
|
||||||
|
use crate::crawler::DLSiteCrawler;
|
||||||
|
|
||||||
|
// region Folder Command
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
struct FolderAddCommand {
|
struct FolderAddCommand {
|
||||||
path: String,
|
path: String,
|
||||||
@@ -22,10 +24,29 @@ struct FolderCommand {
|
|||||||
#[command(subcommand)]
|
#[command(subcommand)]
|
||||||
subcommand: FolderSubCommand,
|
subcommand: FolderSubCommand,
|
||||||
}
|
}
|
||||||
|
// endregion
|
||||||
|
|
||||||
|
// region Sync
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
struct SyncCommand {
|
||||||
|
#[command(subcommand)]
|
||||||
|
subcommand: SyncSubCommand,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
enum SyncSubCommand {
|
||||||
|
DLSite(SyncDLSiteCommand)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
struct SyncDLSiteCommand;
|
||||||
|
|
||||||
|
// endregion
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
enum CliSubCommand {
|
enum CliSubCommand {
|
||||||
Folder(FolderCommand),
|
Folder(FolderCommand),
|
||||||
|
Sync(SyncCommand),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
@@ -39,15 +60,19 @@ impl Subcommand for Cli {
|
|||||||
fn augment_subcommands(cmd: Command) -> Command {
|
fn augment_subcommands(cmd: Command) -> Command {
|
||||||
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
|
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
|
||||||
.subcommand_required(true)
|
.subcommand_required(true)
|
||||||
|
.subcommand(SyncCommand::augment_args(Command::new("sync")))
|
||||||
|
.subcommand_required(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn augment_subcommands_for_update(cmd: Command) -> Command {
|
fn augment_subcommands_for_update(cmd: Command) -> Command {
|
||||||
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
|
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
|
||||||
.subcommand_required(true)
|
.subcommand_required(true)
|
||||||
|
.subcommand(SyncCommand::augment_args(Command::new("sync")))
|
||||||
|
.subcommand_required(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn has_subcommand(name: &str) -> bool {
|
fn has_subcommand(name: &str) -> bool {
|
||||||
matches!(name, "folder")
|
matches!(name, "folder" | "sync")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,8 +92,25 @@ impl Subcommand for FolderCommand {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Subcommand for SyncCommand {
|
||||||
|
fn augment_subcommands(cmd: Command) -> Command {
|
||||||
|
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
|
||||||
|
.subcommand_required(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn augment_subcommands_for_update(cmd: Command) -> Command {
|
||||||
|
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
|
||||||
|
.subcommand_required(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_subcommand(name: &str) -> bool {
|
||||||
|
matches!(name, "dlsite")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Cli {
|
impl Cli {
|
||||||
pub async fn run(&self) -> Result<()> {
|
pub async fn run(&self) -> Result<()> {
|
||||||
|
app::initialize_folders().await?;
|
||||||
if self.subcommand.is_none() {
|
if self.subcommand.is_none() {
|
||||||
return self.start_tui().await;
|
return self.start_tui().await;
|
||||||
}
|
}
|
||||||
@@ -82,7 +124,7 @@ impl Cli {
|
|||||||
crossterm::terminal::enable_raw_mode()?;
|
crossterm::terminal::enable_raw_mode()?;
|
||||||
|
|
||||||
let mut terminal = ratatui::init();
|
let mut terminal = ratatui::init();
|
||||||
let app = app::App::create().await;
|
let app = app::App::create().await?;
|
||||||
let result = app.run(&mut terminal).await;
|
let result = app.run(&mut terminal).await;
|
||||||
ratatui::restore();
|
ratatui::restore();
|
||||||
|
|
||||||
@@ -95,6 +137,7 @@ impl CliSubCommand {
|
|||||||
pub async fn handle(&self) -> Result<()> {
|
pub async fn handle(&self) -> Result<()> {
|
||||||
match self {
|
match self {
|
||||||
CliSubCommand::Folder(cmd) => cmd.subcommand.handle().await,
|
CliSubCommand::Folder(cmd) => cmd.subcommand.handle().await,
|
||||||
|
CliSubCommand::Sync(cmd) => cmd.subcommand.handle().await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -107,6 +150,22 @@ impl FolderSubCommand {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl SyncSubCommand {
|
||||||
|
pub async fn handle(&self) -> Result<()> {
|
||||||
|
match self {
|
||||||
|
Self::DLSite(cmd) => cmd.handle().await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncDLSiteCommand {
|
||||||
|
pub async fn handle(&self) -> Result<()> {
|
||||||
|
let crawler = DLSiteCrawler::new();
|
||||||
|
crawler.get_game_info("RJ163319").await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl FolderAddCommand {
|
impl FolderAddCommand {
|
||||||
pub async fn handle(&self) -> Result<()> {
|
pub async fn handle(&self) -> Result<()> {
|
||||||
let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?;
|
let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?;
|
||||||
|
|||||||
@@ -7,6 +7,14 @@ use serde_json;
|
|||||||
pub mod types;
|
pub mod types;
|
||||||
|
|
||||||
impl ApplicationConfig {
|
impl ApplicationConfig {
|
||||||
|
pub fn get_config() -> Result<Self> {
|
||||||
|
if APP_CONIFG_FILE_PATH.exists() {
|
||||||
|
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH)
|
||||||
|
} else {
|
||||||
|
Ok(ApplicationConfig::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn from_file(path: &PathBuf) -> Result<Self> {
|
pub fn from_file(path: &PathBuf) -> Result<Self> {
|
||||||
let reader = std::fs::File::open(path)?;
|
let reader = std::fs::File::open(path)?;
|
||||||
let result = serde_json::from_reader(reader)?;
|
let result = serde_json::from_reader(reader)?;
|
||||||
|
|||||||
@@ -1,18 +1,18 @@
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
pub(crate) struct ApplicationConfig {
|
pub struct ApplicationConfig {
|
||||||
pub(crate) basic_config: BasicConfig,
|
pub basic_config: BasicConfig,
|
||||||
pub(crate) path_config: PathConfig,
|
pub path_config: PathConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
pub(crate) struct BasicConfig {
|
pub(crate) struct BasicConfig {
|
||||||
pub(crate) db_path: String,
|
pub db_path: String,
|
||||||
pub(crate) tick_rate: u64,
|
pub tick_rate: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
pub(crate) struct PathConfig {
|
pub struct PathConfig {
|
||||||
pub(crate) dlsite_paths: Vec<String>,
|
pub dlsite_paths: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use directories::BaseDirs;
|
use directories::BaseDirs;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use crate::config::types::ApplicationConfig;
|
||||||
|
|
||||||
const APP_DIR_NAME: &str = "sus_manager";
|
const APP_DIR_NAME: &str = "sus_manager";
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
|
|||||||
@@ -1,8 +1,78 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use color_eyre::eyre::eyre;
|
||||||
|
use reqwest::Url;
|
||||||
|
use color_eyre::Result;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use crate::constants::APP_DATA_DIR;
|
||||||
use crate::crawler::Crawler;
|
use crate::crawler::Crawler;
|
||||||
|
|
||||||
|
const DLSITE_URL: &str = "https://www.dlsite.com/";
|
||||||
|
const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax";
|
||||||
|
lazy_static! {
|
||||||
|
pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img");
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct DLSiteCrawler {
|
pub struct DLSiteCrawler {
|
||||||
crawler: Crawler,
|
crawler: Crawler,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DLSiteCrawler {}
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct DLSiteManiax {
|
||||||
|
pub work_name: String,
|
||||||
|
#[serde(rename = "work_image")]
|
||||||
|
work_image_url: String,
|
||||||
|
#[serde(rename = "dl_count")]
|
||||||
|
pub sells_count: u32
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DLSiteCrawler {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
crawler: Crawler::new("DLSite", Url::parse(DLSITE_URL).unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_valid_number(rj_num: &str) -> bool {
|
||||||
|
let len = rj_num.len();
|
||||||
|
if len != 8 && len != 10 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if !rj_num.starts_with("RJ") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if !rj_num.chars().skip(2).all(|c| c.is_numeric()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_game_info(&self, rj_num: &str) -> Result<DLSiteManiax> {
|
||||||
|
if !Self::is_valid_number(rj_num) {
|
||||||
|
return Err(eyre!("Invalid number: {}", rj_num));
|
||||||
|
}
|
||||||
|
let mut api_url = self.crawler.base_url.clone();
|
||||||
|
api_url.set_path(DLSITE_API_ENDPOINT);
|
||||||
|
api_url.set_query(Some(&format!("product_id={}", rj_num)));
|
||||||
|
let res = self.crawler.client.get(api_url).send().await?;
|
||||||
|
let maniax_result = match res.json::<HashMap<String, DLSiteManiax>>().await {
|
||||||
|
Ok(maniax_result) => maniax_result,
|
||||||
|
Err(_) => return Err(eyre!("Maniax {} is restricted/removed", rj_num)),
|
||||||
|
};
|
||||||
|
let maniax_info = maniax_result.iter().next().unwrap().1.clone();
|
||||||
|
self.save_main_image(&maniax_info, rj_num).await?;
|
||||||
|
Ok(maniax_info)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
|
||||||
|
let url_string = format!("https:{}", info.work_image_url);
|
||||||
|
let url = Url::parse(&url_string)?;
|
||||||
|
let img_res = self.crawler.client.get(url).send().await?;
|
||||||
|
let img_bytes = img_res.bytes().await?;
|
||||||
|
let img = image::load_from_memory(&img_bytes)?;
|
||||||
|
img.save(DLSITE_IMG_FOLDER.clone().join(format!("{}.jpg", rj_num)).as_path())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,48 +1,58 @@
|
|||||||
mod dlsite;
|
mod dlsite;
|
||||||
|
|
||||||
|
pub use dlsite::*;
|
||||||
|
use color_eyre::eyre::eyre;
|
||||||
use crate::constants::APP_CACHE_PATH;
|
use crate::constants::APP_CACHE_PATH;
|
||||||
use color_eyre::Result;
|
use color_eyre::Result;
|
||||||
use reqwest::{Client, Url};
|
use reqwest::{Client, StatusCode, Url};
|
||||||
use robotstxt::DefaultMatcher;
|
use robotstxt::DefaultMatcher;
|
||||||
use scraper::Html;
|
use scraper::Html;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct Crawler {
|
struct Crawler {
|
||||||
id: String,
|
id: String,
|
||||||
base_url: Url,
|
pub(crate) base_url: Url,
|
||||||
client: Client,
|
pub(crate) client: Client,
|
||||||
robots_txt: String,
|
robots_txt: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Crawler {
|
impl Crawler {
|
||||||
pub async fn new(id: &str, base_url: Url) -> Self {
|
pub fn new(id: &str, base_url: Url) -> Self {
|
||||||
let crawler = Self {
|
let crawler = Self {
|
||||||
id: id.to_string(),
|
id: id.to_string(),
|
||||||
client: Client::new(),
|
client: Client::new(),
|
||||||
robots_txt: Self::get_robots_txt(id, &base_url).await.unwrap(),
|
robots_txt: None,
|
||||||
base_url,
|
base_url,
|
||||||
};
|
};
|
||||||
let mut matcher = DefaultMatcher::default();
|
|
||||||
let is_access_allowed = matcher.one_agent_allowed_by_robots(
|
|
||||||
&crawler.robots_txt,
|
|
||||||
"reqwest",
|
|
||||||
crawler.base_url.as_str(),
|
|
||||||
);
|
|
||||||
if !is_access_allowed {
|
|
||||||
panic!("Crawler cannot access site {}", crawler.base_url.as_str());
|
|
||||||
}
|
|
||||||
crawler
|
crawler
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_robots_txt(id: &str, base_url: &Url) -> Result<String> {
|
async fn check_access(&self, url: &Url) -> Result<()> {
|
||||||
let local_robots_path = APP_CACHE_PATH.clone().join(id).join("robots.txt");
|
let mut matcher = DefaultMatcher::default();
|
||||||
|
let is_access_allowed = matcher.one_agent_allowed_by_robots(
|
||||||
|
&self.get_robots_txt().await?,
|
||||||
|
"reqwest",
|
||||||
|
self.base_url.as_str(),
|
||||||
|
);
|
||||||
|
if !is_access_allowed {
|
||||||
|
return Err(eyre!("Crawler cannot access site {}", self.base_url.as_str()));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_robots_txt(&self) -> Result<String> {
|
||||||
|
if let Some(txt) = &self.robots_txt {
|
||||||
|
return Ok(txt.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
let local_robots_path = APP_CACHE_PATH.clone().join(&self.id).join("robots.txt");
|
||||||
if !local_robots_path.exists() {
|
if !local_robots_path.exists() {
|
||||||
let mut robots_url = base_url.clone();
|
let mut robots_url = self.base_url.clone();
|
||||||
robots_url.set_path("/robots.txt");
|
robots_url.set_path("/robots.txt");
|
||||||
let response = reqwest::get(robots_url).await.expect(
|
let response = reqwest::get(robots_url).await.expect(
|
||||||
format!(
|
format!(
|
||||||
"Failed to get robots.txt in `{}/robots.txt`",
|
"Failed to get robots.txt in `{}/robots.txt`",
|
||||||
base_url.as_str()
|
self.base_url.as_str()
|
||||||
)
|
)
|
||||||
.as_str(),
|
.as_str(),
|
||||||
);
|
);
|
||||||
@@ -55,10 +65,13 @@ impl Crawler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_html(&self, path: &str) -> Result<Html> {
|
pub async fn get_html(&self, path: &str) -> Result<(Html, StatusCode)> {
|
||||||
let mut url = self.base_url.clone();
|
let mut url = self.base_url.clone();
|
||||||
|
self.check_access(&url).await?;
|
||||||
url.set_path(path);
|
url.set_path(path);
|
||||||
let html_text = &self.client.get(url).send().await?.text().await?;
|
let res = self.client.get(url).send().await?;
|
||||||
Ok(Html::parse_document(html_text))
|
let status = res.status();
|
||||||
|
let html_text = &res.text().await?;
|
||||||
|
Ok((Html::parse_document(html_text), status))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ use ratatui::widgets::{Block, Borders, Paragraph, StatefulWidget};
|
|||||||
use std::any::Any;
|
use std::any::Any;
|
||||||
|
|
||||||
pub struct MainView {
|
pub struct MainView {
|
||||||
app_config: ApplicationConfig,
|
|
||||||
pub state: MainViewState,
|
pub state: MainViewState,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -29,20 +28,19 @@ enum Status {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MainView {
|
impl MainView {
|
||||||
pub fn new(app_conf: &ApplicationConfig) -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
state: MainViewState {
|
state: MainViewState {
|
||||||
popup: None,
|
popup: None,
|
||||||
status: Status::Running,
|
status: Status::Running,
|
||||||
},
|
}
|
||||||
app_config: app_conf.clone(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn quit(&mut self) -> color_eyre::Result<()> {
|
fn quit(&mut self) -> color_eyre::Result<()> {
|
||||||
if self.state.popup.is_none() {
|
if self.state.popup.is_none() {
|
||||||
self.state.status = Status::Exiting;
|
self.state.status = Status::Exiting;
|
||||||
self.app_config.save()?;
|
ApplicationConfig::get_config()?.save()?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -73,9 +71,11 @@ impl View for MainView {
|
|||||||
let Some(value) = popup.get_folder_value() &&
|
let Some(value) = popup.get_folder_value() &&
|
||||||
key.code.is_enter()
|
key.code.is_enter()
|
||||||
{
|
{
|
||||||
self.app_config.path_config.dlsite_paths.push(value);
|
let mut config = ApplicationConfig::get_config()?;
|
||||||
|
config.path_config.dlsite_paths.push(value);
|
||||||
|
|
||||||
popup.textarea.reset_value()?;
|
popup.textarea.reset_value()?;
|
||||||
self.app_config.save()?;
|
config.save()?;
|
||||||
}
|
}
|
||||||
if !matches!(self.state.status, Status::Popup) && matches!(key.kind, KeyEventKind::Press) {
|
if !matches!(self.state.status, Status::Popup) && matches!(key.kind, KeyEventKind::Press) {
|
||||||
match key.code {
|
match key.code {
|
||||||
|
|||||||
Reference in New Issue
Block a user