use GhostClient for ex
This commit is contained in:
parent
feaa9e7c47
commit
251fe56475
|
@ -104,8 +104,8 @@ impl Collector for EHCollector {
|
||||||
|
|
||||||
// Since paged returns at least one page, we can safely get it.
|
// Since paged returns at least one page, we can safely get it.
|
||||||
let title = match_first_group(&TITLE_RE, &gallery_pages[0])
|
let title = match_first_group(&TITLE_RE, &gallery_pages[0])
|
||||||
.unwrap_or("No Title")
|
.map(|s| s.to_string())
|
||||||
.to_string();
|
.unwrap_or_else(|| format!("e-hentai-{album_id}"));
|
||||||
|
|
||||||
let mut image_page_links = Vec::new();
|
let mut image_page_links = Vec::new();
|
||||||
for gallery_page in gallery_pages.iter() {
|
for gallery_page in gallery_pages.iter() {
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use again::RetryPolicy;
|
use again::RetryPolicy;
|
||||||
|
use ipnet::Ipv6Net;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::header;
|
use reqwest::header::{self, HeaderMap};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
config,
|
config,
|
||||||
http_proxy::ProxiedClient,
|
http_client::{GhostClient, GhostClientBuilder},
|
||||||
stream::AsyncStream,
|
stream::AsyncStream,
|
||||||
util::match_first_group,
|
util::{get_bytes, get_string, match_first_group},
|
||||||
util::{get_bytes, get_string},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
|
@ -32,8 +32,8 @@ const TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct EXCollector {
|
pub struct EXCollector {
|
||||||
proxy_client: ProxiedClient,
|
ghost_client: GhostClient,
|
||||||
client: reqwest::Client,
|
raw_client: reqwest::Client,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
|
@ -43,40 +43,49 @@ pub struct ExConfig {
|
||||||
pub igneous: String,
|
pub igneous: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EXCollector {
|
impl ExConfig {
|
||||||
pub fn new(config: &ExConfig, proxy_client: ProxiedClient) -> anyhow::Result<Self> {
|
fn build_header(&self) -> HeaderMap {
|
||||||
let cookie_value = format!(
|
let cookie_value = format!(
|
||||||
"ipb_pass_hash={};ipb_member_id={};igneous={};nw=1",
|
"ipb_pass_hash={};ipb_member_id={};igneous={};nw=1",
|
||||||
config.ipb_pass_hash, config.ipb_member_id, config.igneous
|
self.ipb_pass_hash, self.ipb_member_id, self.igneous
|
||||||
);
|
);
|
||||||
|
|
||||||
// set headers with exhentai cookies
|
// set headers with exhentai cookies
|
||||||
let mut request_headers = header::HeaderMap::new();
|
let mut request_headers = header::HeaderMap::new();
|
||||||
request_headers.insert(
|
request_headers.insert(
|
||||||
header::COOKIE,
|
header::COOKIE,
|
||||||
header::HeaderValue::from_str(&cookie_value)?,
|
header::HeaderValue::from_str(&cookie_value)
|
||||||
|
.expect("invalid ExConfig settings, unable to build header map"),
|
||||||
);
|
);
|
||||||
|
request_headers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EXCollector {
|
||||||
|
pub fn new(config: &ExConfig, prefix: Option<Ipv6Net>) -> anyhow::Result<Self> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
client: {
|
ghost_client: GhostClientBuilder::default()
|
||||||
reqwest::Client::builder()
|
.with_default_headers(config.build_header())
|
||||||
.default_headers(request_headers.clone())
|
.with_cf_resolve(&["exhentai.org"])
|
||||||
.timeout(TIMEOUT)
|
.build(prefix),
|
||||||
.build()
|
raw_client: reqwest::Client::builder().timeout(TIMEOUT).build().unwrap(),
|
||||||
.expect("build reqwest client failed")
|
|
||||||
},
|
|
||||||
proxy_client: proxy_client.with_default_headers(request_headers),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_from_config() -> anyhow::Result<Self> {
|
pub fn new_from_config() -> anyhow::Result<Self> {
|
||||||
let config: ExConfig = config::parse(CONFIG_KEY)?
|
let config: ExConfig = config::parse(CONFIG_KEY)?
|
||||||
.ok_or_else(|| anyhow::anyhow!("exhentai config(key: exhentai) not found"))?;
|
.ok_or_else(|| anyhow::anyhow!("exhentai config(key: exhentai) not found"))?;
|
||||||
let proxy_client = ProxiedClient::new_from_config();
|
Ok(Self {
|
||||||
Self::new(&config, proxy_client)
|
ghost_client: GhostClientBuilder::default()
|
||||||
|
.with_default_headers(config.build_header())
|
||||||
|
.with_cf_resolve(&["exhentai.org"])
|
||||||
|
.build_from_config()?,
|
||||||
|
raw_client: reqwest::Client::builder().timeout(TIMEOUT).build().unwrap(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_client(&self) -> reqwest::Client {
|
pub fn get_client(&self) -> reqwest::Client {
|
||||||
self.client.clone()
|
self.raw_client.clone()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,7 +96,7 @@ impl Collector for EXCollector {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn name() -> &'static str {
|
fn name() -> &'static str {
|
||||||
"exhentai"
|
"e-hentai"
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch(
|
async fn fetch(
|
||||||
|
@ -109,7 +118,7 @@ impl Collector for EXCollector {
|
||||||
tracing::info!("[exhentai] process {url}");
|
tracing::info!("[exhentai] process {url}");
|
||||||
|
|
||||||
let mut paged = Paged::new(0, EXPageIndicator { base: url.clone() });
|
let mut paged = Paged::new(0, EXPageIndicator { base: url.clone() });
|
||||||
let gallery_pages = paged.pages(&self.proxy_client).await.map_err(|e| {
|
let gallery_pages = paged.pages(&self.ghost_client).await.map_err(|e| {
|
||||||
tracing::error!("[exhentai] load page failed: {e:?}");
|
tracing::error!("[exhentai] load page failed: {e:?}");
|
||||||
e
|
e
|
||||||
})?;
|
})?;
|
||||||
|
@ -117,8 +126,8 @@ impl Collector for EXCollector {
|
||||||
|
|
||||||
// Since paged returns at least one page, we can safely get it.
|
// Since paged returns at least one page, we can safely get it.
|
||||||
let title = match_first_group(&TITLE_RE, &gallery_pages[0])
|
let title = match_first_group(&TITLE_RE, &gallery_pages[0])
|
||||||
.unwrap_or("No Title")
|
.map(|s| s.to_string())
|
||||||
.to_string();
|
.unwrap_or_else(|| format!("exhentai-{album_id}"));
|
||||||
|
|
||||||
let mut image_page_links = Vec::new();
|
let mut image_page_links = Vec::new();
|
||||||
for gallery_page in gallery_pages.iter() {
|
for gallery_page in gallery_pages.iter() {
|
||||||
|
@ -144,8 +153,8 @@ impl Collector for EXCollector {
|
||||||
tags: None,
|
tags: None,
|
||||||
},
|
},
|
||||||
EXImageStream {
|
EXImageStream {
|
||||||
client: self.client.clone(),
|
raw_client: self.raw_client.clone(),
|
||||||
proxy_client: self.proxy_client.clone(),
|
ghost_client: self.ghost_client.clone(),
|
||||||
image_page_links: image_page_links.into_iter(),
|
image_page_links: image_page_links.into_iter(),
|
||||||
},
|
},
|
||||||
))
|
))
|
||||||
|
@ -154,24 +163,24 @@ impl Collector for EXCollector {
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct EXImageStream {
|
pub struct EXImageStream {
|
||||||
client: reqwest::Client,
|
raw_client: reqwest::Client,
|
||||||
proxy_client: ProxiedClient,
|
ghost_client: GhostClient,
|
||||||
image_page_links: std::vec::IntoIter<String>,
|
image_page_links: std::vec::IntoIter<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EXImageStream {
|
impl EXImageStream {
|
||||||
async fn load_image(
|
async fn load_image(
|
||||||
proxy_client: ProxiedClient,
|
ghost_client: GhostClient,
|
||||||
client: reqwest::Client,
|
raw_client: reqwest::Client,
|
||||||
link: String,
|
link: String,
|
||||||
) -> anyhow::Result<(ImageMeta, ImageData)> {
|
) -> anyhow::Result<(ImageMeta, ImageData)> {
|
||||||
let content = RETRY_POLICY
|
let content = RETRY_POLICY
|
||||||
.retry(|| async { get_string(&proxy_client, &link).await })
|
.retry(|| async { get_string(&ghost_client, &link).await })
|
||||||
.await?;
|
.await?;
|
||||||
let img_url = match_first_group(&IMG_RE, &content)
|
let img_url = match_first_group(&IMG_RE, &content)
|
||||||
.ok_or_else(|| anyhow::anyhow!("unable to find image in page"))?;
|
.ok_or_else(|| anyhow::anyhow!("unable to find image in page"))?;
|
||||||
let image_data = RETRY_POLICY
|
let image_data = RETRY_POLICY
|
||||||
.retry(|| async { get_bytes(&client, img_url).await })
|
.retry(|| async { get_bytes(&raw_client, img_url).await })
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
tracing::trace!(
|
tracing::trace!(
|
||||||
|
@ -194,9 +203,9 @@ impl AsyncStream for EXImageStream {
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Future> {
|
fn next(&mut self) -> Option<Self::Future> {
|
||||||
let link = self.image_page_links.next()?;
|
let link = self.image_page_links.next()?;
|
||||||
let client = self.client.clone();
|
let ghost_client = self.ghost_client.clone();
|
||||||
let proxy_client = self.proxy_client.clone();
|
let raw_client = self.raw_client.clone();
|
||||||
Some(async move { Self::load_image(proxy_client, client, link).await })
|
Some(async move { Self::load_image(ghost_client, raw_client, link).await })
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -238,7 +247,7 @@ mod tests {
|
||||||
igneous: "balabala".to_string(),
|
igneous: "balabala".to_string(),
|
||||||
};
|
};
|
||||||
println!("config {config:#?}");
|
println!("config {config:#?}");
|
||||||
let collector = EXCollector::new(&config, ProxiedClient::default()).unwrap();
|
let collector = EXCollector::new(&config, None).unwrap();
|
||||||
let (album, mut image_stream) = collector
|
let (album, mut image_stream) = collector
|
||||||
.fetch("/g/2129939/01a6e086b9".to_string())
|
.fetch("/g/2129939/01a6e086b9".to_string())
|
||||||
.await
|
.await
|
||||||
|
@ -261,7 +270,7 @@ mod tests {
|
||||||
igneous: "balabala".to_string(),
|
igneous: "balabala".to_string(),
|
||||||
};
|
};
|
||||||
println!("config {config:#?}");
|
println!("config {config:#?}");
|
||||||
let collector = EXCollector::new(&config, ProxiedClient::default()).unwrap();
|
let collector = EXCollector::new(&config, None).unwrap();
|
||||||
let output = collector.fetch("/g/2129939/00000".to_string()).await;
|
let output = collector.fetch("/g/2129939/00000".to_string()).await;
|
||||||
assert!(output.is_err());
|
assert!(output.is_err());
|
||||||
println!("output err {output:?}");
|
println!("output err {output:?}");
|
||||||
|
|
Loading…
Reference in New Issue