Add basic dlsite crawler
Reformat application config
This commit is contained in:
788
Cargo.lock
generated
788
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -9,14 +9,13 @@ edition = "2024"
|
||||
[dependencies]
|
||||
color-eyre = "0.6.3"
|
||||
futures = "0.3.28"
|
||||
tokio-util = "0.7.9"
|
||||
tokio-utils = "0.1.2"
|
||||
directories = "6.0.0"
|
||||
lazy_static = "1.5.0"
|
||||
robotstxt = "0.3.0"
|
||||
scraper = "0.24.0"
|
||||
rat-cursor = "1.2.1"
|
||||
serde_json = "1.0.145"
|
||||
image = "0.25.8"
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1.0.228"
|
||||
@@ -41,7 +40,7 @@ features = ["derive", "cargo"]
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.12.23"
|
||||
features = ["blocking"]
|
||||
features = ["blocking", "json"]
|
||||
|
||||
[dependencies.tokio]
|
||||
version = "1.47.1"
|
||||
@@ -50,11 +49,3 @@ features = ["full"]
|
||||
[dependencies.diesel]
|
||||
version = "2.3.2"
|
||||
features = ["sqlite"]
|
||||
|
||||
[dependencies.libsqlite3-sys]
|
||||
version = "0.35.0"
|
||||
features = ["bundled"]
|
||||
|
||||
[dependencies.uuid]
|
||||
version = "1.18.1"
|
||||
features = ["v4"]
|
||||
|
||||
54
src/app.rs
54
src/app.rs
@@ -1,5 +1,5 @@
|
||||
use crate::config::types::ApplicationConfig;
|
||||
use crate::constants::{APP_CONFIG_DIR, APP_CONIFG_FILE_PATH, APP_DATA_DIR};
|
||||
use crate::constants::{APP_CONFIG_DIR, APP_DATA_DIR};
|
||||
use crate::event::{AppEvent, EventHandler};
|
||||
use crate::widgets::views::MainView;
|
||||
use crate::widgets::views::View;
|
||||
@@ -11,11 +11,12 @@ use rat_cursor::HasScreenCursor;
|
||||
use ratatui::{DefaultTerminal, Frame};
|
||||
use std::any::Any;
|
||||
use std::time::Duration;
|
||||
use tokio::fs;
|
||||
use crate::crawler::DLSITE_IMG_FOLDER;
|
||||
|
||||
pub(crate) struct App {
|
||||
events: EventHandler,
|
||||
db_connection: SqliteConnection,
|
||||
app_config: ApplicationConfig,
|
||||
state: AppState,
|
||||
}
|
||||
|
||||
@@ -24,36 +25,22 @@ struct AppState {
|
||||
}
|
||||
|
||||
impl App {
|
||||
pub async fn create() -> Self {
|
||||
let app_conf = if APP_CONIFG_FILE_PATH.exists() {
|
||||
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH).unwrap()
|
||||
} else {
|
||||
ApplicationConfig::new()
|
||||
};
|
||||
Self::initialize_folders();
|
||||
let db_conn = Self::establish_db_connection(app_conf.clone());
|
||||
pub async fn create() -> Result<Self> {
|
||||
let config = ApplicationConfig::get_config()?;
|
||||
let db_conn = Self::establish_db_connection(&config);
|
||||
let state = AppState {
|
||||
view: Some(Box::new(MainView::new(&app_conf))),
|
||||
view: Some(Box::new(MainView::new())),
|
||||
};
|
||||
Self {
|
||||
events: EventHandler::new(Duration::from_millis(app_conf.basic_config.tick_rate)),
|
||||
let app = Self {
|
||||
events: EventHandler::new(Duration::from_millis(config.basic_config.tick_rate)),
|
||||
db_connection: db_conn,
|
||||
app_config: app_conf,
|
||||
state,
|
||||
}
|
||||
};
|
||||
Ok(app)
|
||||
}
|
||||
|
||||
fn initialize_folders() {
|
||||
if !APP_CONFIG_DIR.exists() {
|
||||
std::fs::create_dir_all(APP_CONFIG_DIR.as_path()).unwrap();
|
||||
}
|
||||
if !APP_DATA_DIR.exists() {
|
||||
std::fs::create_dir_all(APP_DATA_DIR.as_path()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn establish_db_connection(application_config: ApplicationConfig) -> SqliteConnection {
|
||||
let database_url = application_config.basic_config.db_path;
|
||||
fn establish_db_connection(application_config: &ApplicationConfig) -> SqliteConnection {
|
||||
let database_url = application_config.clone().basic_config.db_path;
|
||||
SqliteConnection::establish(&database_url)
|
||||
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
|
||||
}
|
||||
@@ -105,7 +92,7 @@ impl App {
|
||||
if let Some(view) = self.state.view.as_mut() {
|
||||
if let Some(main_view) = view.downcast_mut::<MainView>() {
|
||||
frame.render_stateful_widget(
|
||||
MainView::new(&self.app_config),
|
||||
MainView::new(),
|
||||
frame.area(),
|
||||
&mut main_view.state,
|
||||
);
|
||||
@@ -116,3 +103,16 @@ impl App {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn initialize_folders() -> Result<()> {
|
||||
if !APP_CONFIG_DIR.exists() {
|
||||
fs::create_dir_all(APP_CONFIG_DIR.as_path()).await?;
|
||||
}
|
||||
if !APP_DATA_DIR.exists() {
|
||||
fs::create_dir_all(APP_DATA_DIR.as_path()).await?;
|
||||
}
|
||||
if !DLSITE_IMG_FOLDER.exists() {
|
||||
fs::create_dir_all(DLSITE_IMG_FOLDER.as_path()).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
63
src/cli.rs
63
src/cli.rs
@@ -6,7 +6,9 @@ use color_eyre::Result;
|
||||
use ratatui::crossterm;
|
||||
use std::path::PathBuf;
|
||||
use color_eyre::eyre::eyre;
|
||||
use crate::crawler::DLSiteCrawler;
|
||||
|
||||
// region Folder Command
|
||||
#[derive(Parser, Debug)]
|
||||
struct FolderAddCommand {
|
||||
path: String,
|
||||
@@ -22,10 +24,29 @@ struct FolderCommand {
|
||||
#[command(subcommand)]
|
||||
subcommand: FolderSubCommand,
|
||||
}
|
||||
// endregion
|
||||
|
||||
// region Sync
|
||||
#[derive(Parser, Debug)]
|
||||
struct SyncCommand {
|
||||
#[command(subcommand)]
|
||||
subcommand: SyncSubCommand,
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
enum SyncSubCommand {
|
||||
DLSite(SyncDLSiteCommand)
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct SyncDLSiteCommand;
|
||||
|
||||
// endregion
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
enum CliSubCommand {
|
||||
Folder(FolderCommand),
|
||||
Sync(SyncCommand),
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -39,15 +60,19 @@ impl Subcommand for Cli {
|
||||
fn augment_subcommands(cmd: Command) -> Command {
|
||||
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
|
||||
.subcommand_required(true)
|
||||
.subcommand(SyncCommand::augment_args(Command::new("sync")))
|
||||
.subcommand_required(true)
|
||||
}
|
||||
|
||||
fn augment_subcommands_for_update(cmd: Command) -> Command {
|
||||
cmd.subcommand(FolderCommand::augment_args(Command::new("folder")))
|
||||
.subcommand_required(true)
|
||||
.subcommand(SyncCommand::augment_args(Command::new("sync")))
|
||||
.subcommand_required(true)
|
||||
}
|
||||
|
||||
fn has_subcommand(name: &str) -> bool {
|
||||
matches!(name, "folder")
|
||||
matches!(name, "folder" | "sync")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,8 +92,25 @@ impl Subcommand for FolderCommand {
|
||||
}
|
||||
}
|
||||
|
||||
impl Subcommand for SyncCommand {
|
||||
fn augment_subcommands(cmd: Command) -> Command {
|
||||
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
|
||||
.subcommand_required(true)
|
||||
}
|
||||
|
||||
fn augment_subcommands_for_update(cmd: Command) -> Command {
|
||||
cmd.subcommand(SyncDLSiteCommand::augment_args(Command::new("dlsite")))
|
||||
.subcommand_required(true)
|
||||
}
|
||||
|
||||
fn has_subcommand(name: &str) -> bool {
|
||||
matches!(name, "dlsite")
|
||||
}
|
||||
}
|
||||
|
||||
impl Cli {
|
||||
pub async fn run(&self) -> Result<()> {
|
||||
app::initialize_folders().await?;
|
||||
if self.subcommand.is_none() {
|
||||
return self.start_tui().await;
|
||||
}
|
||||
@@ -82,7 +124,7 @@ impl Cli {
|
||||
crossterm::terminal::enable_raw_mode()?;
|
||||
|
||||
let mut terminal = ratatui::init();
|
||||
let app = app::App::create().await;
|
||||
let app = app::App::create().await?;
|
||||
let result = app.run(&mut terminal).await;
|
||||
ratatui::restore();
|
||||
|
||||
@@ -95,6 +137,7 @@ impl CliSubCommand {
|
||||
pub async fn handle(&self) -> Result<()> {
|
||||
match self {
|
||||
CliSubCommand::Folder(cmd) => cmd.subcommand.handle().await,
|
||||
CliSubCommand::Sync(cmd) => cmd.subcommand.handle().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -107,6 +150,22 @@ impl FolderSubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
impl SyncSubCommand {
|
||||
pub async fn handle(&self) -> Result<()> {
|
||||
match self {
|
||||
Self::DLSite(cmd) => cmd.handle().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SyncDLSiteCommand {
|
||||
pub async fn handle(&self) -> Result<()> {
|
||||
let crawler = DLSiteCrawler::new();
|
||||
crawler.get_game_info("RJ163319").await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl FolderAddCommand {
|
||||
pub async fn handle(&self) -> Result<()> {
|
||||
let mut config = ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH.to_path_buf())?;
|
||||
|
||||
@@ -7,6 +7,14 @@ use serde_json;
|
||||
pub mod types;
|
||||
|
||||
impl ApplicationConfig {
|
||||
pub fn get_config() -> Result<Self> {
|
||||
if APP_CONIFG_FILE_PATH.exists() {
|
||||
ApplicationConfig::from_file(&APP_CONIFG_FILE_PATH)
|
||||
} else {
|
||||
Ok(ApplicationConfig::new())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_file(path: &PathBuf) -> Result<Self> {
|
||||
let reader = std::fs::File::open(path)?;
|
||||
let result = serde_json::from_reader(reader)?;
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct ApplicationConfig {
|
||||
pub(crate) basic_config: BasicConfig,
|
||||
pub(crate) path_config: PathConfig,
|
||||
pub struct ApplicationConfig {
|
||||
pub basic_config: BasicConfig,
|
||||
pub path_config: PathConfig,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct BasicConfig {
|
||||
pub(crate) db_path: String,
|
||||
pub(crate) tick_rate: u64,
|
||||
pub db_path: String,
|
||||
pub tick_rate: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct PathConfig {
|
||||
pub(crate) dlsite_paths: Vec<String>,
|
||||
pub struct PathConfig {
|
||||
pub dlsite_paths: Vec<String>,
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use directories::BaseDirs;
|
||||
use lazy_static::lazy_static;
|
||||
use std::path::PathBuf;
|
||||
use crate::config::types::ApplicationConfig;
|
||||
|
||||
const APP_DIR_NAME: &str = "sus_manager";
|
||||
lazy_static! {
|
||||
|
||||
@@ -1,8 +1,78 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use color_eyre::eyre::eyre;
|
||||
use reqwest::Url;
|
||||
use color_eyre::Result;
|
||||
use lazy_static::lazy_static;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::constants::APP_DATA_DIR;
|
||||
use crate::crawler::Crawler;
|
||||
|
||||
const DLSITE_URL: &str = "https://www.dlsite.com/";
|
||||
const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax";
|
||||
lazy_static! {
|
||||
pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img");
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct DLSiteCrawler {
|
||||
pub struct DLSiteCrawler {
|
||||
crawler: Crawler,
|
||||
}
|
||||
|
||||
impl DLSiteCrawler {}
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct DLSiteManiax {
|
||||
pub work_name: String,
|
||||
#[serde(rename = "work_image")]
|
||||
work_image_url: String,
|
||||
#[serde(rename = "dl_count")]
|
||||
pub sells_count: u32
|
||||
}
|
||||
|
||||
impl DLSiteCrawler {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
crawler: Crawler::new("DLSite", Url::parse(DLSITE_URL).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn is_valid_number(rj_num: &str) -> bool {
|
||||
let len = rj_num.len();
|
||||
if len != 8 && len != 10 {
|
||||
return false;
|
||||
}
|
||||
if !rj_num.starts_with("RJ") {
|
||||
return false;
|
||||
}
|
||||
if !rj_num.chars().skip(2).all(|c| c.is_numeric()) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
pub async fn get_game_info(&self, rj_num: &str) -> Result<DLSiteManiax> {
|
||||
if !Self::is_valid_number(rj_num) {
|
||||
return Err(eyre!("Invalid number: {}", rj_num));
|
||||
}
|
||||
let mut api_url = self.crawler.base_url.clone();
|
||||
api_url.set_path(DLSITE_API_ENDPOINT);
|
||||
api_url.set_query(Some(&format!("product_id={}", rj_num)));
|
||||
let res = self.crawler.client.get(api_url).send().await?;
|
||||
let maniax_result = match res.json::<HashMap<String, DLSiteManiax>>().await {
|
||||
Ok(maniax_result) => maniax_result,
|
||||
Err(_) => return Err(eyre!("Maniax {} is restricted/removed", rj_num)),
|
||||
};
|
||||
let maniax_info = maniax_result.iter().next().unwrap().1.clone();
|
||||
self.save_main_image(&maniax_info, rj_num).await?;
|
||||
Ok(maniax_info)
|
||||
}
|
||||
|
||||
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
|
||||
let url_string = format!("https:{}", info.work_image_url);
|
||||
let url = Url::parse(&url_string)?;
|
||||
let img_res = self.crawler.client.get(url).send().await?;
|
||||
let img_bytes = img_res.bytes().await?;
|
||||
let img = image::load_from_memory(&img_bytes)?;
|
||||
img.save(DLSITE_IMG_FOLDER.clone().join(format!("{}.jpg", rj_num)).as_path())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,48 +1,58 @@
|
||||
mod dlsite;
|
||||
|
||||
pub use dlsite::*;
|
||||
use color_eyre::eyre::eyre;
|
||||
use crate::constants::APP_CACHE_PATH;
|
||||
use color_eyre::Result;
|
||||
use reqwest::{Client, Url};
|
||||
use reqwest::{Client, StatusCode, Url};
|
||||
use robotstxt::DefaultMatcher;
|
||||
use scraper::Html;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct Crawler {
|
||||
struct Crawler {
|
||||
id: String,
|
||||
base_url: Url,
|
||||
client: Client,
|
||||
robots_txt: String,
|
||||
pub(crate) base_url: Url,
|
||||
pub(crate) client: Client,
|
||||
robots_txt: Option<String>,
|
||||
}
|
||||
|
||||
impl Crawler {
|
||||
pub async fn new(id: &str, base_url: Url) -> Self {
|
||||
pub fn new(id: &str, base_url: Url) -> Self {
|
||||
let crawler = Self {
|
||||
id: id.to_string(),
|
||||
client: Client::new(),
|
||||
robots_txt: Self::get_robots_txt(id, &base_url).await.unwrap(),
|
||||
robots_txt: None,
|
||||
base_url,
|
||||
};
|
||||
let mut matcher = DefaultMatcher::default();
|
||||
let is_access_allowed = matcher.one_agent_allowed_by_robots(
|
||||
&crawler.robots_txt,
|
||||
"reqwest",
|
||||
crawler.base_url.as_str(),
|
||||
);
|
||||
if !is_access_allowed {
|
||||
panic!("Crawler cannot access site {}", crawler.base_url.as_str());
|
||||
}
|
||||
crawler
|
||||
}
|
||||
|
||||
async fn get_robots_txt(id: &str, base_url: &Url) -> Result<String> {
|
||||
let local_robots_path = APP_CACHE_PATH.clone().join(id).join("robots.txt");
|
||||
async fn check_access(&self, url: &Url) -> Result<()> {
|
||||
let mut matcher = DefaultMatcher::default();
|
||||
let is_access_allowed = matcher.one_agent_allowed_by_robots(
|
||||
&self.get_robots_txt().await?,
|
||||
"reqwest",
|
||||
self.base_url.as_str(),
|
||||
);
|
||||
if !is_access_allowed {
|
||||
return Err(eyre!("Crawler cannot access site {}", self.base_url.as_str()));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_robots_txt(&self) -> Result<String> {
|
||||
if let Some(txt) = &self.robots_txt {
|
||||
return Ok(txt.clone());
|
||||
}
|
||||
|
||||
let local_robots_path = APP_CACHE_PATH.clone().join(&self.id).join("robots.txt");
|
||||
if !local_robots_path.exists() {
|
||||
let mut robots_url = base_url.clone();
|
||||
let mut robots_url = self.base_url.clone();
|
||||
robots_url.set_path("/robots.txt");
|
||||
let response = reqwest::get(robots_url).await.expect(
|
||||
format!(
|
||||
"Failed to get robots.txt in `{}/robots.txt`",
|
||||
base_url.as_str()
|
||||
self.base_url.as_str()
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
@@ -55,10 +65,13 @@ impl Crawler {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_html(&self, path: &str) -> Result<Html> {
|
||||
pub async fn get_html(&self, path: &str) -> Result<(Html, StatusCode)> {
|
||||
let mut url = self.base_url.clone();
|
||||
self.check_access(&url).await?;
|
||||
url.set_path(path);
|
||||
let html_text = &self.client.get(url).send().await?.text().await?;
|
||||
Ok(Html::parse_document(html_text))
|
||||
let res = self.client.get(url).send().await?;
|
||||
let status = res.status();
|
||||
let html_text = &res.text().await?;
|
||||
Ok((Html::parse_document(html_text), status))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,7 +11,6 @@ use ratatui::widgets::{Block, Borders, Paragraph, StatefulWidget};
|
||||
use std::any::Any;
|
||||
|
||||
pub struct MainView {
|
||||
app_config: ApplicationConfig,
|
||||
pub state: MainViewState,
|
||||
}
|
||||
|
||||
@@ -29,20 +28,19 @@ enum Status {
|
||||
}
|
||||
|
||||
impl MainView {
|
||||
pub fn new(app_conf: &ApplicationConfig) -> Self {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
state: MainViewState {
|
||||
popup: None,
|
||||
status: Status::Running,
|
||||
},
|
||||
app_config: app_conf.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn quit(&mut self) -> color_eyre::Result<()> {
|
||||
if self.state.popup.is_none() {
|
||||
self.state.status = Status::Exiting;
|
||||
self.app_config.save()?;
|
||||
ApplicationConfig::get_config()?.save()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -73,9 +71,11 @@ impl View for MainView {
|
||||
let Some(value) = popup.get_folder_value() &&
|
||||
key.code.is_enter()
|
||||
{
|
||||
self.app_config.path_config.dlsite_paths.push(value);
|
||||
let mut config = ApplicationConfig::get_config()?;
|
||||
config.path_config.dlsite_paths.push(value);
|
||||
|
||||
popup.textarea.reset_value()?;
|
||||
self.app_config.save()?;
|
||||
config.save()?;
|
||||
}
|
||||
if !matches!(self.state.status, Status::Popup) && matches!(key.kind, KeyEventKind::Press) {
|
||||
match key.code {
|
||||
|
||||
Reference in New Issue
Block a user