Add basic dlsite crawler

Reformat application config
This commit is contained in:
2025-10-18 01:32:00 +08:00
parent fea4e8d35e
commit 27bee0cfde
10 changed files with 922 additions and 166 deletions

788
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,14 +9,13 @@ edition = "2024"
[dependencies] [dependencies]
color-eyre = "0.6.3" color-eyre = "0.6.3"
futures = "0.3.28" futures = "0.3.28"
tokio-util = "0.7.9"
tokio-utils = "0.1.2"
directories = "6.0.0" directories = "6.0.0"
lazy_static = "1.5.0" lazy_static = "1.5.0"
robotstxt = "0.3.0" robotstxt = "0.3.0"
scraper = "0.24.0" scraper = "0.24.0"
rat-cursor = "1.2.1" rat-cursor = "1.2.1"
serde_json = "1.0.145" serde_json = "1.0.145"
image = "0.25.8"
[dependencies.serde] [dependencies.serde]
version = "1.0.228" version = "1.0.228"
@@ -41,7 +40,7 @@ features = ["derive", "cargo"]
[dependencies.reqwest] [dependencies.reqwest]
version = "0.12.23" version = "0.12.23"
features = ["blocking"] features = ["blocking", "json"]
[dependencies.tokio] [dependencies.tokio]
version = "1.47.1" version = "1.47.1"
@@ -50,11 +49,3 @@ features = ["full"]
[dependencies.diesel] [dependencies.diesel]
version = "2.3.2" version = "2.3.2"
features = ["sqlite"] features = ["sqlite"]
[dependencies.libsqlite3-sys]
version = "0.35.0"
features = ["bundled"]
[dependencies.uuid]
version = "1.18.1"
features = ["v4"]

View File

@@ -1,5 +1,5 @@
use crate::config::types::ApplicationConfig; use crate::config::types::ApplicationConfig;
use crate::constants::{APP_CONFIG_DIR, APP_CONIFG_FILE_PATH, APP_DATA_DIR}; use crate::constants::{APP_CONFIG_DIR, APP_DATA_DIR};
use crate::event::{AppEvent, EventHandler}; use crate::event::{AppEvent, EventHandler};
use crate::widgets::views::MainView; use crate::widgets::views::MainView;
use crate::widgets::views::View; use crate::widgets::views::View;
@@ -11,11 +11,12 @@ use rat_cursor::HasScreenCursor;
use ratatui::{DefaultTerminal, Frame}; use ratatui::{DefaultTerminal, Frame};
use std::any::Any; use std::any::Any;
use std::time::Duration; use std::time::Duration;
use tokio::fs;
use crate::crawler::DLSITE_IMG_FOLDER;
pub(crate) struct App { pub(crate) struct App {
events: EventHandler, events: EventHandler,
db_connection: SqliteConnection, db_connection: SqliteConnection,
app_config: ApplicationConfig,
state: AppState, state: AppState,
} }
@@ -24,36 +25,22 @@ struct AppState {
} }
impl App { impl App {
pub async fn create() -> Self { pub async fn create() -> Result<Self> {
let app_conf = if APP_CONIFG_FILE_PATH.exists() { let config = ApplicationConfig::get_config()?;
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH).unwrap() let db_conn = Self::establish_db_connection(&config);
} else {
ApplicationConfig::new()
};
Self::initialize_folders();
let db_conn = Self::establish_db_connection(app_conf.clone());
let state = AppState { let state = AppState {
view: Some(Box::new(MainView::new(&app_conf))), view: Some(Box::new(MainView::new())),
}; };
Self { let app = Self {
events: EventHandler::new(Duration::from_millis(app_conf.basic_config.tick_rate)), events: EventHandler::new(Duration::from_millis(config.basic_config.tick_rate)),
db_connection: db_conn, db_connection: db_conn,
app_config: app_conf,
state, state,
} };
Ok(app)
} }
fn initialize_folders() { fn establish_db_connection(application_config: &ApplicationConfig) -> SqliteConnection {
if !APP_CONFIG_DIR.exists() { let database_url = application_config.clone().basic_config.db_path;
std::fs::create_dir_all(APP_CONFIG_DIR.as_path()).unwrap();
}
if !APP_DATA_DIR.exists() {
std::fs::create_dir_all(APP_DATA_DIR.as_path()).unwrap();
}
}
fn establish_db_connection(application_config: ApplicationConfig) -> SqliteConnection {
let database_url = application_config.basic_config.db_path;
SqliteConnection::establish(&database_url) SqliteConnection::establish(&database_url)
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url)) .unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
} }
@@ -105,7 +92,7 @@ impl App {
if let Some(view) = self.state.view.as_mut() { if let Some(view) = self.state.view.as_mut() {
if let Some(main_view) = view.downcast_mut::<MainView>() { if let Some(main_view) = view.downcast_mut::<MainView>() {
frame.render_stateful_widget( frame.render_stateful_widget(
MainView::new(&self.app_config), MainView::new(),
frame.area(), frame.area(),
&mut main_view.state, &mut main_view.state,
); );
@@ -116,3 +103,16 @@ impl App {
} }
} }
} }
pub async fn initialize_folders() -> Result<()> {
if !APP_CONFIG_DIR.exists() {
fs::create_dir_all(APP_CONFIG_DIR.as_path()).await?;
}
if !APP_DATA_DIR.exists() {
fs::create_dir_all(APP_DATA_DIR.as_path()).await?;
}
if !DLSITE_IMG_FOLDER.exists() {
fs::create_dir_all(DLSITE_IMG_FOLDER.as_path()).await?;
}
Ok(())
}

View File

@@ -6,7 +6,9 @@ use color_eyre::Result;
use ratatui::crossterm; use ratatui::crossterm;
use std::path::PathBuf; use std::path::PathBuf;
use color_eyre::eyre::eyre; use color_eyre::eyre::eyre;
use crate::crawler::DLSiteCrawler;
// region Folder Command
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
struct FolderAddCommand { struct FolderAddCommand {
path: String, path: String,
@@ -22,10 +24,29 @@ struct FolderCommand {
#[command(subcommand)] #[command(subcommand)]
subcommand: FolderSubCommand, subcommand: FolderSubCommand,
} }
// endregion
// region Sync
#[derive(Parser, Debug)]
struct SyncCommand {
#[command(subcommand)]
subcommand: SyncSubCommand,
}
#[derive(Parser, Debug)]
enum SyncSubCommand {
DLSite(SyncDLSiteCommand)
}
#[derive(Parser, Debug)]
struct SyncDLSiteCommand;
// endregion
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
enum CliSubCommand { enum CliSubCommand {
Folder(FolderCommand), Folder(FolderCommand),
Sync(SyncCommand),
} }
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@@ -39,15 +60,19 @@ impl Subcommand for Cli {
fn augment_subcommands(cmd: Command) -> Command { fn augment_subcommands(cmd: Command) -> Command {
cmd.subcommand(FolderCommand::augment_args(Command::new("folder"))) cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
.subcommand_required(true) .subcommand_required(true)
.subcommand(SyncCommand::augment_args(Command::new("sync")))
.subcommand_required(true)
} }
fn augment_subcommands_for_update(cmd: Command) -> Command { fn augment_subcommands_for_update(cmd: Command) -> Command {
cmd.subcommand(FolderCommand::augment_args(Command::new("folder"))) cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
.subcommand_required(true) .subcommand_required(true)
.subcommand(SyncCommand::augment_args(Command::new("sync")))
.subcommand_required(true)
} }
fn has_subcommand(name: &str) -> bool { fn has_subcommand(name: &str) -> bool {
matches!(name, "folder") matches!(name, "folder" | "sync")
} }
} }
@@ -67,8 +92,25 @@ impl Subcommand for FolderCommand {
} }
} }
impl Subcommand for SyncCommand {
fn augment_subcommands(cmd: Command) -> Command {
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
.subcommand_required(true)
}
fn augment_subcommands_for_update(cmd: Command) -> Command {
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
.subcommand_required(true)
}
fn has_subcommand(name: &str) -> bool {
matches!(name, "dlsite")
}
}
impl Cli { impl Cli {
pub async fn run(&self) -> Result<()> { pub async fn run(&self) -> Result<()> {
app::initialize_folders().await?;
if self.subcommand.is_none() { if self.subcommand.is_none() {
return self.start_tui().await; return self.start_tui().await;
} }
@@ -82,7 +124,7 @@ impl Cli {
crossterm::terminal::enable_raw_mode()?; crossterm::terminal::enable_raw_mode()?;
let mut terminal = ratatui::init(); let mut terminal = ratatui::init();
let app = app::App::create().await; let app = app::App::create().await?;
let result = app.run(&mut terminal).await; let result = app.run(&mut terminal).await;
ratatui::restore(); ratatui::restore();
@@ -95,6 +137,7 @@ impl CliSubCommand {
pub async fn handle(&self) -> Result<()> { pub async fn handle(&self) -> Result<()> {
match self { match self {
CliSubCommand::Folder(cmd) => cmd.subcommand.handle().await, CliSubCommand::Folder(cmd) => cmd.subcommand.handle().await,
CliSubCommand::Sync(cmd) => cmd.subcommand.handle().await,
} }
} }
} }
@@ -107,6 +150,22 @@ impl FolderSubCommand {
} }
} }
impl SyncSubCommand {
pub async fn handle(&self) -> Result<()> {
match self {
Self::DLSite(cmd) => cmd.handle().await,
}
}
}
impl SyncDLSiteCommand {
pub async fn handle(&self) -> Result<()> {
let crawler = DLSiteCrawler::new();
crawler.get_game_info("RJ163319").await?;
Ok(())
}
}
impl FolderAddCommand { impl FolderAddCommand {
pub async fn handle(&self) -> Result<()> { pub async fn handle(&self) -> Result<()> {
let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?; let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?;

View File

@@ -7,6 +7,14 @@ use serde_json;
pub mod types; pub mod types;
impl ApplicationConfig { impl ApplicationConfig {
pub fn get_config() -> Result<Self> {
if APP_CONIFG_FILE_PATH.exists() {
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH)
} else {
Ok(ApplicationConfig::new())
}
}
pub fn from_file(path: &PathBuf) -> Result<Self> { pub fn from_file(path: &PathBuf) -> Result<Self> {
let reader = std::fs::File::open(path)?; let reader = std::fs::File::open(path)?;
let result = serde_json::from_reader(reader)?; let result = serde_json::from_reader(reader)?;

View File

@@ -1,18 +1,18 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct ApplicationConfig { pub struct ApplicationConfig {
pub(crate) basic_config: BasicConfig, pub basic_config: BasicConfig,
pub(crate) path_config: PathConfig, pub path_config: PathConfig,
} }
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct BasicConfig { pub(crate) struct BasicConfig {
pub(crate) db_path: String, pub db_path: String,
pub(crate) tick_rate: u64, pub tick_rate: u64,
} }
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct PathConfig { pub struct PathConfig {
pub(crate) dlsite_paths: Vec<String>, pub dlsite_paths: Vec<String>,
} }

View File

@@ -1,6 +1,7 @@
use directories::BaseDirs; use directories::BaseDirs;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use std::path::PathBuf; use std::path::PathBuf;
use crate::config::types::ApplicationConfig;
const APP_DIR_NAME: &str = "sus_manager"; const APP_DIR_NAME: &str = "sus_manager";
lazy_static! { lazy_static! {

View File

@@ -1,8 +1,78 @@
use std::collections::HashMap;
use std::path::PathBuf;
use color_eyre::eyre::eyre;
use reqwest::Url;
use color_eyre::Result;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use crate::constants::APP_DATA_DIR;
use crate::crawler::Crawler; use crate::crawler::Crawler;
const DLSITE_URL: &str = "https://www.dlsite.com/";
const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax";
lazy_static! {
pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img");
}
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct DLSiteCrawler { pub struct DLSiteCrawler {
crawler: Crawler, crawler: Crawler,
} }
impl DLSiteCrawler {} #[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DLSiteManiax {
pub work_name: String,
#[serde(rename = "work_image")]
work_image_url: String,
#[serde(rename = "dl_count")]
pub sells_count: u32
}
impl DLSiteCrawler {
pub fn new() -> Self {
Self {
crawler: Crawler::new("DLSite", Url::parse(DLSITE_URL).unwrap())
}
}
fn is_valid_number(rj_num: &str) -> bool {
let len = rj_num.len();
if len != 8 && len != 10 {
return false;
}
if !rj_num.starts_with("RJ") {
return false;
}
if !rj_num.chars().skip(2).all(|c| c.is_numeric()) {
return false;
}
true
}
pub async fn get_game_info(&self, rj_num: &str) -> Result<DLSiteManiax> {
if !Self::is_valid_number(rj_num) {
return Err(eyre!("Invalid number: {}", rj_num));
}
let mut api_url = self.crawler.base_url.clone();
api_url.set_path(DLSITE_API_ENDPOINT);
api_url.set_query(Some(&format!("product_id={}", rj_num)));
let res = self.crawler.client.get(api_url).send().await?;
let maniax_result = match res.json::<HashMap<String, DLSiteManiax>>().await {
Ok(maniax_result) => maniax_result,
Err(_) => return Err(eyre!("Maniax {} is restricted/removed", rj_num)),
};
let maniax_info = maniax_result.iter().next().unwrap().1.clone();
self.save_main_image(&maniax_info, rj_num).await?;
Ok(maniax_info)
}
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
let url_string = format!("https:{}", info.work_image_url);
let url = Url::parse(&url_string)?;
let img_res = self.crawler.client.get(url).send().await?;
let img_bytes = img_res.bytes().await?;
let img = image::load_from_memory(&img_bytes)?;
img.save(DLSITE_IMG_FOLDER.clone().join(format!("{}.jpg", rj_num)).as_path())?;
Ok(())
}
}

View File

@@ -1,48 +1,58 @@
mod dlsite; mod dlsite;
pub use dlsite::*;
use color_eyre::eyre::eyre;
use crate::constants::APP_CACHE_PATH; use crate::constants::APP_CACHE_PATH;
use color_eyre::Result; use color_eyre::Result;
use reqwest::{Client, Url}; use reqwest::{Client, StatusCode, Url};
use robotstxt::DefaultMatcher; use robotstxt::DefaultMatcher;
use scraper::Html; use scraper::Html;
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct Crawler { struct Crawler {
id: String, id: String,
base_url: Url, pub(crate) base_url: Url,
client: Client, pub(crate) client: Client,
robots_txt: String, robots_txt: Option<String>,
} }
impl Crawler { impl Crawler {
pub async fn new(id: &str, base_url: Url) -> Self { pub fn new(id: &str, base_url: Url) -> Self {
let crawler = Self { let crawler = Self {
id: id.to_string(), id: id.to_string(),
client: Client::new(), client: Client::new(),
robots_txt: Self::get_robots_txt(id, &base_url).await.unwrap(), robots_txt: None,
base_url, base_url,
}; };
let mut matcher = DefaultMatcher::default();
let is_access_allowed = matcher.one_agent_allowed_by_robots(
&crawler.robots_txt,
"reqwest",
crawler.base_url.as_str(),
);
if !is_access_allowed {
panic!("Crawler cannot access site {}", crawler.base_url.as_str());
}
crawler crawler
} }
async fn get_robots_txt(id: &str, base_url: &Url) -> Result<String> { async fn check_access(&self, url: &Url) -> Result<()> {
let local_robots_path = APP_CACHE_PATH.clone().join(id).join("robots.txt"); let mut matcher = DefaultMatcher::default();
let is_access_allowed = matcher.one_agent_allowed_by_robots(
&self.get_robots_txt().await?,
"reqwest",
self.base_url.as_str(),
);
if !is_access_allowed {
return Err(eyre!("Crawler cannot access site {}", self.base_url.as_str()));
}
Ok(())
}
async fn get_robots_txt(&self) -> Result<String> {
if let Some(txt) = &self.robots_txt {
return Ok(txt.clone());
}
let local_robots_path = APP_CACHE_PATH.clone().join(&self.id).join("robots.txt");
if !local_robots_path.exists() { if !local_robots_path.exists() {
let mut robots_url = base_url.clone(); let mut robots_url = self.base_url.clone();
robots_url.set_path("/robots.txt"); robots_url.set_path("/robots.txt");
let response = reqwest::get(robots_url).await.expect( let response = reqwest::get(robots_url).await.expect(
format!( format!(
"Failed to get robots.txt in `{}/robots.txt`", "Failed to get robots.txt in `{}/robots.txt`",
base_url.as_str() self.base_url.as_str()
) )
.as_str(), .as_str(),
); );
@@ -55,10 +65,13 @@ impl Crawler {
} }
} }
pub async fn get_html(&self, path: &str) -> Result<Html> { pub async fn get_html(&self, path: &str) -> Result<(Html, StatusCode)> {
let mut url = self.base_url.clone(); let mut url = self.base_url.clone();
self.check_access(&url).await?;
url.set_path(path); url.set_path(path);
let html_text = &self.client.get(url).send().await?.text().await?; let res = self.client.get(url).send().await?;
Ok(Html::parse_document(html_text)) let status = res.status();
let html_text = &res.text().await?;
Ok((Html::parse_document(html_text), status))
} }
} }

View File

@@ -11,7 +11,6 @@ use ratatui::widgets::{Block, Borders, Paragraph, StatefulWidget};
use std::any::Any; use std::any::Any;
pub struct MainView { pub struct MainView {
app_config: ApplicationConfig,
pub state: MainViewState, pub state: MainViewState,
} }
@@ -29,20 +28,19 @@ enum Status {
} }
impl MainView { impl MainView {
pub fn new(app_conf: &ApplicationConfig) -> Self { pub fn new() -> Self {
Self { Self {
state: MainViewState { state: MainViewState {
popup: None, popup: None,
status: Status::Running, status: Status::Running,
}, }
app_config: app_conf.clone(),
} }
} }
fn quit(&mut self) -> color_eyre::Result<()> { fn quit(&mut self) -> color_eyre::Result<()> {
if self.state.popup.is_none() { if self.state.popup.is_none() {
self.state.status = Status::Exiting; self.state.status = Status::Exiting;
self.app_config.save()?; ApplicationConfig::get_config()?.save()?;
} }
Ok(()) Ok(())
} }
@@ -73,9 +71,11 @@ impl View for MainView {
let Some(value) = popup.get_folder_value() && let Some(value) = popup.get_folder_value() &&
key.code.is_enter() key.code.is_enter()
{ {
self.app_config.path_config.dlsite_paths.push(value); let mut config = ApplicationConfig::get_config()?;
config.path_config.dlsite_paths.push(value);
popup.textarea.reset_value()?; popup.textarea.reset_value()?;
self.app_config.save()?; config.save()?;
} }
if !matches!(self.state.status, Status::Popup) && matches!(key.kind, KeyEventKind::Press) { if !matches!(self.state.status, Status::Popup) && matches!(key.kind, KeyEventKind::Press) {
match key.code { match key.code {