Remove diesel

This commit is contained in:
2025-10-19 23:25:56 +08:00
parent eacf897f8c
commit 5c466d37e9
13 changed files with 318 additions and 213 deletions

3
.gitignore vendored
View File

@@ -1,6 +1,3 @@
target
.idea
games.db
*.env
diesel.toml
migrations/.*

297
Cargo.lock generated
View File

@@ -17,6 +17,15 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "aligned-vec"
version = "0.6.4"
@@ -167,6 +176,24 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bindgen"
version = "0.72.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"itertools 0.13.0",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
]
[[package]]
name = "bit_field"
version = "0.10.3"
@@ -221,6 +248,16 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "bzip2-sys"
version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "cassowary"
version = "0.3.0"
@@ -248,6 +285,15 @@ dependencies = [
"shlex",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-expr"
version = "0.15.8"
@@ -264,6 +310,17 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]]
name = "clang-sys"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "clap"
version = "4.5.48"
@@ -343,6 +400,15 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "colored"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "compact_str"
version = "0.8.1"
@@ -495,18 +561,8 @@ version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
dependencies = [
"darling_core 0.20.11",
"darling_macro 0.20.11",
]
[[package]]
name = "darling"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
dependencies = [
"darling_core 0.21.3",
"darling_macro 0.21.3",
"darling_core",
"darling_macro",
]
[[package]]
@@ -523,38 +579,13 @@ dependencies = [
"syn",
]
[[package]]
name = "darling_core"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.20.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
dependencies = [
"darling_core 0.20.11",
"quote",
"syn",
]
[[package]]
name = "darling_macro"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
dependencies = [
"darling_core 0.21.3",
"darling_core",
"quote",
"syn",
]
@@ -589,41 +620,6 @@ dependencies = [
"syn",
]
[[package]]
name = "diesel"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8496eeb328dce26ee9d9b73275d396d9bddb433fa30106cf6056dd8c3c2764c"
dependencies = [
"diesel_derives",
"downcast-rs",
"libsqlite3-sys",
"sqlite-wasm-rs",
"time",
]
[[package]]
name = "diesel_derives"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09af0e983035368439f1383011cd87c46f41da81d0f21dc3727e2857d5a43c8e"
dependencies = [
"diesel_table_macro_syntax",
"dsl_auto_type",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "diesel_table_macro_syntax"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c"
dependencies = [
"syn",
]
[[package]]
name = "directories"
version = "6.0.0"
@@ -665,26 +661,6 @@ dependencies = [
"litrs",
]
[[package]]
name = "downcast-rs"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc"
[[package]]
name = "dsl_auto_type"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd122633e4bef06db27737f21d3738fb89c8f6d5360d6d9d7635dda142a7757e"
dependencies = [
"darling 0.21.3",
"either",
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "dtoa"
version = "1.0.10"
@@ -1025,6 +1001,12 @@ version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "h2"
version = "0.4.12"
@@ -1384,7 +1366,7 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a"
dependencies = [
"darling 0.20.11",
"darling",
"indoc",
"proc-macro2",
"quote",
@@ -1507,6 +1489,16 @@ dependencies = [
"cc",
]
[[package]]
name = "libloading"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if",
"windows-link 0.2.0",
]
[[package]]
name = "libredox"
version = "0.1.10"
@@ -1518,11 +1510,27 @@ dependencies = [
]
[[package]]
name = "libsqlite3-sys"
version = "0.35.0"
name = "librocksdb-sys"
version = "0.17.3+10.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f"
checksum = "cef2a00ee60fe526157c9023edab23943fae1ce2ab6f4abb2a807c1746835de9"
dependencies = [
"bindgen",
"bzip2-sys",
"cc",
"libc",
"libz-sys",
"lz4-sys",
"zstd-sys",
]
[[package]]
name = "libz-sys"
version = "1.1.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
@@ -1584,6 +1592,16 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "lz4-sys"
version = "1.11.1+lz4-1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "mac"
version = "0.1.1"
@@ -2216,6 +2234,35 @@ dependencies = [
"thiserror 2.0.17",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "reqwest"
version = "0.12.23"
@@ -2284,12 +2331,28 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbc52377db80e3fec3a2c748ca603b8b6cacdd34ff89ff4b742a635361d4b4a7"
[[package]]
name = "rocksdb"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddb7af00d2b17dbd07d82c0063e25411959748ff03e8d4f96134c2ff41fce34f"
dependencies = [
"libc",
"librocksdb-sys",
]
[[package]]
name = "rustc-demangle"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustix"
version = "0.38.44"
@@ -2594,21 +2657,6 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "sqlite-wasm-rs"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aead1c279716985b981b7940ef9b652d3f93d70a7296853c633b7ce8fa8088a"
dependencies = [
"js-sys",
"once_cell",
"thiserror 2.0.17",
"tokio",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -2686,8 +2734,8 @@ version = "0.1.0"
dependencies = [
"clap",
"color-eyre",
"colored",
"crossterm 0.29.0",
"diesel",
"directories",
"futures",
"image",
@@ -2696,6 +2744,7 @@ dependencies = [
"ratatui",
"reqwest",
"robotstxt",
"rocksdb",
"scraper",
"serde",
"serde_json",
@@ -2868,14 +2917,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
dependencies = [
"deranged",
"itoa",
"libc",
"num-conv",
"num_threads",
"powerfmt",
"serde",
"time-core",
"time-macros",
]
[[package]]
@@ -2884,16 +2931,6 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
[[package]]
name = "time-macros"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
dependencies = [
"num-conv",
"time-core",
]
[[package]]
name = "tinystr"
version = "0.8.1"
@@ -3690,6 +3727,16 @@ dependencies = [
"syn",
]
[[package]]
name = "zstd-sys"
version = "2.0.16+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
dependencies = [
"cc",
"pkg-config",
]
[[package]]
name = "zune-core"
version = "0.4.12"

View File

@@ -16,6 +16,8 @@ scraper = "0.24.0"
rat-cursor = "1.2.1"
serde_json = "1.0.145"
image = "0.25.8"
colored = "3.0.0"
rocksdb = "0.24.0"
[dependencies.serde]
version = "1.0.228"
@@ -45,7 +47,3 @@ features = ["blocking", "json"]
[dependencies.tokio]
version = "1.47.1"
features = ["full"]
[dependencies.diesel]
version = "2.3.2"
features = ["sqlite"]

View File

@@ -1,2 +0,0 @@
-- This file should undo anything in `up.sql`
DROP TABLE dl_games;

View File

@@ -1,4 +0,0 @@
-- Your SQL goes here
CREATE TABLE dl_games (
serial_number CHARACTER(10) NOT NULL PRIMARY KEY
)

View File

@@ -6,7 +6,6 @@ use crate::widgets::views::View;
use color_eyre::Result;
use crossterm::event::Event as CrosstermEvent;
use crossterm::event::{Event, KeyEvent};
use diesel::{Connection, SqliteConnection};
use rat_cursor::HasScreenCursor;
use ratatui::{DefaultTerminal, Frame};
use std::any::Any;
@@ -16,7 +15,6 @@ use crate::crawler::DLSITE_IMG_FOLDER;
pub(crate) struct App {
events: EventHandler,
db_connection: SqliteConnection,
state: AppState,
}
@@ -27,24 +25,16 @@ struct AppState {
impl App {
pub async fn create() -> Result<Self> {
let config = ApplicationConfig::get_config()?;
let db_conn = Self::establish_db_connection(&config);
let state = AppState {
view: Some(Box::new(MainView::new())),
};
let app = Self {
events: EventHandler::new(Duration::from_millis(config.basic_config.tick_rate)),
db_connection: db_conn,
state,
};
Ok(app)
}
fn establish_db_connection(application_config: &ApplicationConfig) -> SqliteConnection {
let database_url = application_config.clone().basic_config.db_path;
SqliteConnection::establish(&database_url)
.unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
}
pub async fn run(mut self, terminal: &mut DefaultTerminal) -> Result<()> {
loop {
terminal.draw(|frame| self.draw(frame))?;

View File

@@ -3,9 +3,11 @@ use crate::config::types::ApplicationConfig;
use clap::{command, Args, Command, Parser, Subcommand};
use color_eyre::Result;
use ratatui::crossterm;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use color_eyre::eyre::eyre;
use colored::Colorize;
use crate::crawler::DLSiteCrawler;
use crate::crawler::dlsite;
// region Folder Command
#[derive(Parser, Debug)]
@@ -159,8 +161,45 @@ impl SyncSubCommand {
impl SyncDLSiteCommand {
pub async fn handle(&self) -> Result<()> {
let app_conf = ApplicationConfig::get_config()?;
Self::sync_genres(&app_conf).await?;
Self::sync_works(&app_conf).await?;
Ok(())
}
async fn sync_genres(app_conf: &ApplicationConfig) -> Result<()> {
Ok(())
}
async fn sync_works(app_conf: &ApplicationConfig) -> Result<()> {
let crawler = DLSiteCrawler::new();
crawler.get_game_info("RJ163319").await?;
let mut rj_nums: Vec<String> = Vec::new();
for path_str in app_conf.path_config.dlsite_paths.iter() {
let path = Path::new(path_str);
if !path.exists() {
return Err(eyre!("{} {}", path_str.blue(), "does not exist".red()));
}
let dir_paths = path.read_dir()?
.filter_map(Result::ok)
.map(|e| e.path())
.collect::<Vec<_>>();
for dir_path in dir_paths.iter() {
if !dir_path.is_dir() {
println!("{dir_path:?} is not a directory");
continue;
}
let dir_name = dir_path
.file_name().unwrap()
.to_str().unwrap();
if !dlsite::is_valid_rj_number(dir_name) {
println!("{} {}", dir_path.to_str().unwrap().blue(), "is not a valid rj number, please add it manually".red());
continue;
}
rj_nums.push(dir_name.to_string());
}
}
let maniaxes = crawler.get_game_infos(rj_nums).await?;
//TODO: save into db/probably change to use jsonb
Ok(())
}
}

View File

@@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;
use color_eyre::eyre::eyre;
use reqwest::Url;
@@ -6,11 +6,13 @@ use color_eyre::Result;
use lazy_static::lazy_static;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use crate::constants::APP_DATA_DIR;
use crate::constants::{APP_DATA_DIR};
use crate::crawler::Crawler;
//TODO: override locale with user one
const DLSITE_URL: &str = "https://www.dlsite.com/";
const DLSITE_API_ENDPOINT: &str = "/maniax/product/info/ajax";
const DLSITE_PRODUCT_API_ENDPOINT: &str = "/maniax/product/info/ajax";
const DLSITE_FILTER_OPTIONS_ENDPOINT: &str = "/maniax/fs/=/api_access/1/locale/ja_JP";
const DLSITE_MANIAX_PATH: &str = "/maniax/work/=/product_id/";
lazy_static! {
pub static ref DLSITE_IMG_FOLDER: PathBuf = APP_DATA_DIR.clone().join("dlsite").join("img");
@@ -23,7 +25,8 @@ pub struct DLSiteCrawler {
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DLSiteManiax {
pub work_name: String,
#[serde(rename = "work_name")]
pub title: String,
#[serde(rename = "work_image")]
work_image_url: String,
#[serde(rename = "dl_count")]
@@ -39,51 +42,56 @@ impl DLSiteCrawler {
}
}
fn is_valid_number(rj_num: &str) -> bool {
let len = rj_num.len();
if len != 8 && len != 10 {
return false;
pub async fn get_game_infos(&self, rj_nums: Vec<String>) -> Result<Vec<DLSiteManiax>> {
let invalid_nums = rj_nums.iter()
.filter(|&n| !is_valid_rj_number(n))
.map(|n| n.to_string())
.collect::<Vec<String>>();
if !invalid_nums.is_empty() {
return Err(
eyre!("Invalid numbers: {}", invalid_nums.join(", "))
);
}
if !rj_num.starts_with("RJ") {
return false;
let query = &format!("product_id={}", rj_nums.join(","));
let (maniax_result, _) = self.crawler
.get_json::<HashMap<String, DLSiteManiax>>(DLSITE_PRODUCT_API_ENDPOINT, Some(query))
.await?;
let keys = maniax_result.keys()
.map(|k| k.to_string())
.collect::<Vec<String>>();
let keys_hash: HashSet<String> = HashSet::from_iter(keys);
let nums_hash: HashSet<String> = HashSet::from_iter(rj_nums);
let nums_diff = nums_hash.difference(&keys_hash)
.map(|n| n.to_string())
.collect::<Vec<String>>();
if !nums_diff.is_empty() {
return Err(eyre!("Restricted/Removed Works: {}", nums_diff.join(", ")));
}
if !rj_num.chars().skip(2).all(|c| c.is_numeric()) {
return false;
let mut maniax_infos = Vec::new();
for (rj_num, mut info) in maniax_result {
self.save_main_image(&info, &rj_num).await?;
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
let (html, _) = self.crawler.get_html(&html_path).await?;
let genres = self.get_genres(&html)?;
info.genre_ids = genres;
maniax_infos.push(info);
}
true
}
pub async fn get_game_info(&self, rj_num: &str) -> Result<DLSiteManiax> {
if !Self::is_valid_number(rj_num) {
return Err(eyre!("Invalid number: {rj_num}"));
}
let mut api_url = self.crawler.base_url.clone();
api_url.set_path(DLSITE_API_ENDPOINT);
api_url.set_query(Some(&format!("product_id={rj_num}")));
let api_res = self.crawler.client.get(api_url).send().await?;
let maniax_result = match api_res.json::<HashMap<String, DLSiteManiax>>().await {
Ok(maniax_result) => maniax_result,
Err(_) => return Err(eyre!("Maniax {rj_num} is restricted/removed")),
};
let mut maniax_info = maniax_result.iter().next().unwrap().1.clone();
self.save_main_image(&maniax_info, rj_num).await?;
let html_path = format!("{DLSITE_MANIAX_PATH}{rj_num}");
let (html, _) = self.crawler.get_html(&html_path).await?;
let genres = self.get_genres(&html)?;
maniax_info.genre_ids = genres;
Ok(maniax_info)
Ok(maniax_infos)
}
async fn save_main_image(&self, info: &DLSiteManiax, rj_num: &str) -> Result<()> {
let img_file_name = format!("{rj_num}.jpg");
let img_save_path = DLSITE_IMG_FOLDER.clone().join(img_file_name);
if img_save_path.exists() {
return Ok(());
}
let url_string = format!("https:{}", info.work_image_url);
let url = Url::parse(&url_string)?;
let img_res = self.crawler.client.get(url).send().await?;
let img_bytes = img_res.bytes().await?;
let img = image::load_from_memory(&img_bytes)?;
img.save(DLSITE_IMG_FOLDER.clone().join(format!("{rj_num}.jpg")).as_path())?;
let (img, _) = self.crawler.get_img(&url).await?;
img.save(img_save_path)?;
Ok(())
}
@@ -96,7 +104,7 @@ impl DLSiteCrawler {
let result = html.select(&selector).next().unwrap();
let genre_row = result.child_elements()
.filter(|e|
e.child_elements().any(|e| e.inner_html() == "ジャンル")
e.child_elements().any(|e| e.inner_html() == "ジャンル") // TODO: will not work with english
).next().unwrap();
let data = genre_row
.child_elements().skip(1).next().unwrap()
@@ -115,3 +123,17 @@ impl DLSiteCrawler {
Ok(genre_ids)
}
}
pub fn is_valid_rj_number(rj_num: &str) -> bool {
let len = rj_num.len();
if len != 8 && len != 10 {
return false;
}
if !rj_num.starts_with("RJ") {
return false;
}
if !rj_num.chars().skip(2).all(|c| c.is_numeric()) {
return false;
}
true
}

View File

@@ -1,18 +1,20 @@
mod dlsite;
pub mod dlsite;
pub use dlsite::*;
use color_eyre::eyre::eyre;
use crate::constants::APP_CACHE_PATH;
use color_eyre::Result;
use image::DynamicImage;
use reqwest::{Client, StatusCode, Url};
use robotstxt::DefaultMatcher;
use scraper::Html;
use serde::de::DeserializeOwned;
#[derive(Clone)]
struct Crawler {
id: String,
pub(crate) base_url: Url,
pub(crate) client: Client,
client: Client,
robots_txt: Option<String>,
}
@@ -32,7 +34,7 @@ impl Crawler {
let is_access_allowed = matcher.one_agent_allowed_by_robots(
&self.get_robots_txt().await?,
"reqwest",
self.base_url.as_str(),
url.as_str(),
);
if !is_access_allowed {
return Err(eyre!("Crawler cannot access site {}", self.base_url.as_str()));
@@ -74,4 +76,35 @@ impl Crawler {
let html_text = &res.text().await?;
Ok((Html::parse_document(html_text), status))
}
pub async fn get_json<T>(&self, path: &str, query: Option<&str>) -> Result<(T, StatusCode)>
where T : DeserializeOwned {
let mut url = self.base_url.clone();
url.set_path(path);
url.set_query(query);
self.check_access(&url).await?;
let res = self.client.get(url).send().await?;
let status = res.status();
let json = res.json().await?;
Ok((json, status))
}
pub async fn get_img(&self, url: &Url) -> Result<(DynamicImage, StatusCode)> {
self.check_access(url).await?;
let res = self.client.get(url.clone()).send().await?;
let status = res.status();
let bytes = res.bytes().await?;
let img = image::load_from_memory(&bytes)?;
Ok((img, status))
}
pub async fn get_bytes(&self, path: &str) -> Result<(Vec<u8>, StatusCode)> {
let mut url = self.base_url.clone();
url.set_path(path);
self.check_access(&url).await?;
let res = self.client.get(url).send().await?;
let status = res.status();
let bytes = res.bytes().await?;
Ok((bytes.to_vec(), status))
}
}

View File

@@ -5,7 +5,6 @@ mod constants;
mod crawler;
mod event;
mod helpers;
mod schema;
mod models;
mod widgets;

View File

@@ -1,14 +1,6 @@
use diesel::{Queryable, Selectable};
use ratatui::widgets::ListState;
pub(crate) struct GameList<T> {
games: Vec<T>,
state: ListState,
}
#[derive(Queryable, Selectable)]
#[diesel(table_name = crate::schema::dl_games)]
#[diesel(check_for_backend(diesel::sqlite::Sqlite))]
pub(crate) struct DLSiteGame {
serial_number: String,
}

View File

@@ -1 +1,2 @@
pub mod game;
mod game;
pub use game::*;

View File

@@ -1,7 +0,0 @@
// @generated automatically by Diesel CLI.
diesel::table! {
dl_games (serial_number) {
serial_number -> Text,
}
}