Add basic dlsite crawler

Reformat application config
This commit is contained in:
2025-10-18 01:32:00 +08:00
parent fea4e8d35e
commit 27bee0cfde
10 changed files with 922 additions and 166 deletions

View File

@@ -1,5 +1,5 @@
use crate::config::types::ApplicationConfig;
use crate::constants::{APP_CONFIG_DIR, APP_CONIFG_FILE_PATH, APP_DATA_DIR};
use crate::constants::{APP_CONFIG_DIR, APP_DATA_DIR};
use crate::event::{AppEvent, EventHandler};
use crate::widgets::views::MainView;
use crate::widgets::views::View;
@@ -11,11 +11,12 @@ use rat_cursor::HasScreenCursor;
use ratatui::{DefaultTerminal, Frame};
use std::any::Any;
use std::time::Duration;
use tokio::fs;
use crate::crawler::DLSITE_IMG_FOLDER;
pub(crate) struct App {
events: EventHandler,
db_connection: SqliteConnection,
app_config: ApplicationConfig,
state: AppState,
}
@@ -24,36 +25,22 @@ struct AppState {
}
impl App {
pub async fn create() -> Self {
let app_conf = if APP_CONIFG_FILE_PATH.exists() {
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH).unwrap()
} else {
ApplicationConfig::new()
};
Self::initialize_folders();
let db_conn = Self::establish_db_connection(app_conf.clone());
pub async fn create() -> Result<Self> {
let config = ApplicationConfig::get_config()?;
let db_conn = Self::establish_db_connection(&config);
let state = AppState {
view: Some(Box::new(MainView::new(&app_conf))),
view: Some(Box::new(MainView::new())),
};
Self {
events: EventHandler::new(Duration::from_millis(app_conf.basic_config.tick_rate)),
let app = Self {
events: EventHandler::new(Duration::from_millis(config.basic_config.tick_rate)),
db_connection: db_conn,
app_config: app_conf,
state,
}
};
Ok(app)
}
fn initialize_folders() {
if !APP_CONFIG_DIR.exists() {
std::fs::create_dir_all(APP_CONFIG_DIR.as_path()).unwrap();
}
if !APP_DATA_DIR.exists() {
std::fs::create_dir_all(APP_DATA_DIR.as_path()).unwrap();
}
}
fn establish_db_connection(application_config: ApplicationConfig) -> SqliteConnection {
let database_url = application_config.basic_config.db_path;
fn establish_db_connection(application_config: &ApplicationConfig) -> SqliteConnection {
let database_url = application_config.clone().basic_config.db_path;
SqliteConnection::establish(&database_url)
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
}
@@ -105,7 +92,7 @@ impl App {
if let Some(view) = self.state.view.as_mut() {
if let Some(main_view) = view.downcast_mut::<MainView>() {
frame.render_stateful_widget(
MainView::new(&self.app_config),
MainView::new(),
frame.area(),
&mut main_view.state,
);
@@ -116,3 +103,16 @@ impl App {
}
}
}
pub async fn initialize_folders() -> Result<()> {
if !APP_CONFIG_DIR.exists() {
fs::create_dir_all(APP_CONFIG_DIR.as_path()).await?;
}
if !APP_DATA_DIR.exists() {
fs::create_dir_all(APP_DATA_DIR.as_path()).await?;
}
if !DLSITE_IMG_FOLDER.exists() {
fs::create_dir_all(DLSITE_IMG_FOLDER.as_path()).await?;
}
Ok(())
}

View File

@@ -6,7 +6,9 @@ use color_eyre::Result;
use ratatui::crossterm;
use std::path::PathBuf;
use color_eyre::eyre::eyre;
use crate::crawler::DLSiteCrawler;
// region Folder Command
#[derive(Parser, Debug)]
struct FolderAddCommand {
path: String,
@@ -22,10 +24,29 @@ struct FolderCommand {
#[command(subcommand)]
subcommand: FolderSubCommand,
}
// endregion
// region Sync
#[derive(Parser, Debug)]
struct SyncCommand {
#[command(subcommand)]
subcommand: SyncSubCommand,
}
#[derive(Parser, Debug)]
enum SyncSubCommand {
DLSite(SyncDLSiteCommand)
}
#[derive(Parser, Debug)]
struct SyncDLSiteCommand;
// endregion
#[derive(Parser, Debug)]
enum CliSubCommand {
Folder(FolderCommand),
Sync(SyncCommand),
}
#[derive(Parser, Debug)]
@@ -39,15 +60,19 @@ impl Subcommand for Cli {
fn augment_subcommands(cmd: Command) -> Command {
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
.subcommand_required(true)
.subcommand(SyncCommand::augment_args(Command::new("sync")))
.subcommand_required(true)
}
fn augment_subcommands_for_update(cmd: Command) -> Command {
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
.subcommand_required(true)
.subcommand(SyncCommand::augment_args(Command::new("sync")))
.subcommand_required(true)
}
fn has_subcommand(name: &str) -> bool {
matches!(name, "folder")
matches!(name, "folder" | "sync")
}
}
@@ -67,8 +92,25 @@ impl Subcommand for FolderCommand {
}
}
impl Subcommand for SyncCommand {
fn augment_subcommands(cmd: Command) -> Command {
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
.subcommand_required(true)
}
fn augment_subcommands_for_update(cmd: Command) -> Command {
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
.subcommand_required(true)
}
fn has_subcommand(name: &str) -> bool {
matches!(name, "dlsite")
}
}
impl Cli {
pub async fn run(&self) -> Result<()> {
app::initialize_folders().await?;
if self.subcommand.is_none() {
return self.start_tui().await;
}
@@ -82,7 +124,7 @@ impl Cli {
crossterm::terminal::enable_raw_mode()?;
let mut terminal = ratatui::init();
let app = app::App::create().await;
let app = app::App::create().await?;
let result = app.run(&mut terminal).await;
ratatui::restore();
@@ -95,6 +137,7 @@ impl CliSubCommand {
pub async fn handle(&self) -> Result<()> {
match self {
CliSubCommand::Folder(cmd) => cmd.subcommand.handle().await,
CliSubCommand::Sync(cmd) => cmd.subcommand.handle().await,
}
}
}
@@ -107,6 +150,22 @@ impl FolderSubCommand {
}
}
impl SyncSubCommand {
pub async fn handle(&self) -> Result<()> {
match self {
Self::DLSite(cmd) => cmd.handle().await,
}
}
}
impl SyncDLSiteCommand {
pub async fn handle(&self) -> Result<()> {
let crawler = DLSiteCrawler::new();
crawler.get_game_info("RJ163319").await?;
Ok(())
}
}
impl FolderAddCommand {
pub async fn handle(&self) -> Result<()> {
let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?;

View File

@@ -7,6 +7,14 @@ use serde_json;
pub mod types;
impl ApplicationConfig {
pub fn get_config() -> Result<Self> {
if APP_CONIFG_FILE_PATH.exists() {
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH)
} else {
Ok(ApplicationConfig::new())
}
}
pub fn from_file(path: &PathBuf) -> Result<Self> {
let reader = std::fs::File::open(path)?;
let result = serde_json::from_reader(reader)?;

View File

@@ -1,18 +1,18 @@
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct ApplicationConfig {
pub(crate) basic_config: BasicConfig,
pub(crate) path_config: PathConfig,
pub struct ApplicationConfig {
pub basic_config: BasicConfig,
pub path_config: PathConfig,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct BasicConfig {
pub(crate) db_path: String,
pub(crate) tick_rate: u64,
pub db_path: String,
pub tick_rate: u64,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub(crate) struct PathConfig {
pub(crate) dlsite_paths: Vec<String>,
pub struct PathConfig {
pub dlsite_paths: Vec<String>,
}

View File

@@ -1,6 +1,7 @@
use directories::BaseDirs;
use lazy_static::lazy_static;
use std::path::PathBuf;
use crate::config::types::ApplicationConfig;
const APP_DIR_NAME: &str = "sus_manager";
lazy_static! {

View File

@@ -1,8 +1,78 @@
use std::collections::HashMap;
use std::path::PathBuf;
use color_eyre::eyre::eyre;
use reqwest::Url;
use color_eyre::Result;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use crate::constants::APP_DATA_DIR;
use crate::crawler::Crawler;
const DLSITE_URL: &str = "https://www.dlsite.com/";
const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax";
lazy_static! {
pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img");
}
#[derive(Clone)]
pub(crate) struct DLSiteCrawler {
pub struct DLSiteCrawler {
crawler: Crawler,
}
impl DLSiteCrawler {}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DLSiteManiax {
pub work_name: String,
#[serde(rename = "work_image")]
work_image_url: String,
#[serde(rename = "dl_count")]
pub sells_count: u32
}
impl DLSiteCrawler {
pub fn new() -> Self {
Self {
crawler: Crawler::new("DLSite", Url::parse(DLSITE_URL).unwrap())
}
}
fn is_valid_number(rj_num: &str) -> bool {
let len = rj_num.len();
if len != 8 && len != 10 {
return false;
}
if !rj_num.starts_with("RJ") {
return false;
}
if !rj_num.chars().skip(2).all(|c| c.is_numeric()) {
return false;
}
true
}
pub async fn get_game_info(&self, rj_num: &str) -> Result<DLSiteManiax> {
if !Self::is_valid_number(rj_num) {
return Err(eyre!("Invalid number: {}", rj_num));
}
let mut api_url = self.crawler.base_url.clone();
api_url.set_path(DLSITE_API_ENDPOINT);
api_url.set_query(Some(&format!("product_id={}", rj_num)));
let res = self.crawler.client.get(api_url).send().await?;
let maniax_result = match res.json::<HashMap<String, DLSiteManiax>>().await {
Ok(maniax_result) => maniax_result,
Err(_) => return Err(eyre!("Maniax {} is restricted/removed", rj_num)),
};
let maniax_info = maniax_result.iter().next().unwrap().1.clone();
self.save_main_image(&maniax_info, rj_num).await?;
Ok(maniax_info)
}
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
let url_string = format!("https:{}", info.work_image_url);
let url = Url::parse(&url_string)?;
let img_res = self.crawler.client.get(url).send().await?;
let img_bytes = img_res.bytes().await?;
let img = image::load_from_memory(&img_bytes)?;
img.save(DLSITE_IMG_FOLDER.clone().join(format!("{}.jpg", rj_num)).as_path())?;
Ok(())
}
}

View File

@@ -1,48 +1,58 @@
mod dlsite;
pub use dlsite::*;
use color_eyre::eyre::eyre;
use crate::constants::APP_CACHE_PATH;
use color_eyre::Result;
use reqwest::{Client, Url};
use reqwest::{Client, StatusCode, Url};
use robotstxt::DefaultMatcher;
use scraper::Html;
#[derive(Clone)]
pub(crate) struct Crawler {
struct Crawler {
id: String,
base_url: Url,
client: Client,
robots_txt: String,
pub(crate) base_url: Url,
pub(crate) client: Client,
robots_txt: Option<String>,
}
impl Crawler {
pub async fn new(id: &str, base_url: Url) -> Self {
pub fn new(id: &str, base_url: Url) -> Self {
let crawler = Self {
id: id.to_string(),
client: Client::new(),
robots_txt: Self::get_robots_txt(id, &base_url).await.unwrap(),
robots_txt: None,
base_url,
};
let mut matcher = DefaultMatcher::default();
let is_access_allowed = matcher.one_agent_allowed_by_robots(
&crawler.robots_txt,
"reqwest",
crawler.base_url.as_str(),
);
if !is_access_allowed {
panic!("Crawler cannot access site {}", crawler.base_url.as_str());
}
crawler
}
async fn get_robots_txt(id: &str, base_url: &Url) -> Result<String> {
let local_robots_path = APP_CACHE_PATH.clone().join(id).join("robots.txt");
async fn check_access(&self, url: &Url) -> Result<()> {
let mut matcher = DefaultMatcher::default();
let is_access_allowed = matcher.one_agent_allowed_by_robots(
&self.get_robots_txt().await?,
"reqwest",
self.base_url.as_str(),
);
if !is_access_allowed {
return Err(eyre!("Crawler cannot access site {}", self.base_url.as_str()));
}
Ok(())
}
async fn get_robots_txt(&self) -> Result<String> {
if let Some(txt) = &self.robots_txt {
return Ok(txt.clone());
}
let local_robots_path = APP_CACHE_PATH.clone().join(&self.id).join("robots.txt");
if !local_robots_path.exists() {
let mut robots_url = base_url.clone();
let mut robots_url = self.base_url.clone();
robots_url.set_path("/robots.txt");
let response = reqwest::get(robots_url).await.expect(
format!(
"Failed to get robots.txt in `{}/robots.txt`",
base_url.as_str()
self.base_url.as_str()
)
.as_str(),
);
@@ -55,10 +65,13 @@ impl Crawler {
}
}
pub async fn get_html(&self, path: &str) -> Result<Html> {
pub async fn get_html(&self, path: &str) -> Result<(Html, StatusCode)> {
let mut url = self.base_url.clone();
self.check_access(&url).await?;
url.set_path(path);
let html_text = &self.client.get(url).send().await?.text().await?;
Ok(Html::parse_document(html_text))
let res = self.client.get(url).send().await?;
let status = res.status();
let html_text = &res.text().await?;
Ok((Html::parse_document(html_text), status))
}
}

View File

@@ -11,7 +11,6 @@ use ratatui::widgets::{Block, Borders, Paragraph, StatefulWidget};
use std::any::Any;
pub struct MainView {
app_config: ApplicationConfig,
pub state: MainViewState,
}
@@ -29,20 +28,19 @@ enum Status {
}
impl MainView {
pub fn new(app_conf: &ApplicationConfig) -> Self {
pub fn new() -> Self {
Self {
state: MainViewState {
popup: None,
status: Status::Running,
},
app_config: app_conf.clone(),
}
}
}
fn quit(&mut self) -> color_eyre::Result<()> {
if self.state.popup.is_none() {
self.state.status = Status::Exiting;
self.app_config.save()?;
ApplicationConfig::get_config()?.save()?;
}
Ok(())
}
@@ -73,9 +71,11 @@ impl View for MainView {
let Some(value) = popup.get_folder_value() &&
key.code.is_enter()
{
self.app_config.path_config.dlsite_paths.push(value);
let mut config = ApplicationConfig::get_config()?;
config.path_config.dlsite_paths.push(value);
popup.textarea.reset_value()?;
self.app_config.save()?;
config.save()?;
}
if !matches!(self.state.status, Status::Popup) && matches!(key.kind, KeyEventKind::Press) {
match key.code {