Compare commits
2 Commits
c7866a1702
...
8b54eeac81
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8b54eeac81 | ||
|
|
41373bf937 |
@@ -16,8 +16,8 @@ ntex-files = "2.0.0"
|
|||||||
serde = "1.0.228"
|
serde = "1.0.228"
|
||||||
serde_json = "1.0.145"
|
serde_json = "1.0.145"
|
||||||
tokio = { version = "1.49", features = ["full"] }
|
tokio = { version = "1.49", features = ["full"] }
|
||||||
wreq = { version = "5.3.0", features = ["full", "cookies", "multipart"] }
|
wreq = { version = "6.0.0-rc.26", features = ["cookies", "multipart", "json"] }
|
||||||
wreq-util = "2"
|
wreq-util = "3.0.0-rc.10"
|
||||||
percent-encoding = "2.3.2"
|
percent-encoding = "2.3.2"
|
||||||
capitalize = "0.3.4"
|
capitalize = "0.3.4"
|
||||||
url = "2.5.7"
|
url = "2.5.7"
|
||||||
|
|||||||
@@ -346,6 +346,7 @@ impl SpankbangProvider {
|
|||||||
|
|
||||||
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
|
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
|
||||||
let document = Html::parse_document(&html);
|
let document = Html::parse_document(&html);
|
||||||
|
let video_list_selector = Selector::parse(r#"[data-testid="video-list"]"#).unwrap();
|
||||||
let card_selector = Selector::parse(r#"[data-testid="video-item"]"#).unwrap();
|
let card_selector = Selector::parse(r#"[data-testid="video-item"]"#).unwrap();
|
||||||
let video_link_selector = Selector::parse(r#"a[href*="/video/"]"#).unwrap();
|
let video_link_selector = Selector::parse(r#"a[href*="/video/"]"#).unwrap();
|
||||||
let title_selector = Selector::parse(r#"a[title]"#).unwrap();
|
let title_selector = Selector::parse(r#"a[title]"#).unwrap();
|
||||||
@@ -359,7 +360,14 @@ impl SpankbangProvider {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let mut items = Vec::new();
|
let mut items = Vec::new();
|
||||||
for card in document.select(&card_selector) {
|
let roots = document.select(&video_list_selector).collect::<Vec<_>>();
|
||||||
|
let cards = if let Some(root) = roots.last() {
|
||||||
|
root.select(&card_selector).collect::<Vec<_>>()
|
||||||
|
} else {
|
||||||
|
document.select(&card_selector).collect::<Vec<_>>()
|
||||||
|
};
|
||||||
|
|
||||||
|
for card in cards {
|
||||||
if let Some(item) = self.parse_card(
|
if let Some(item) = self.parse_card(
|
||||||
card,
|
card,
|
||||||
&video_link_selector,
|
&video_link_selector,
|
||||||
@@ -688,4 +696,41 @@ mod tests {
|
|||||||
assert_eq!(items[0].id, "2");
|
assert_eq!(items[0].id, "2");
|
||||||
assert_eq!(items[0].title, "Free video");
|
assert_eq!(items[0].title, "Free video");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn prefers_primary_video_list_over_header_dropdown_cards() {
|
||||||
|
let provider = SpankbangProvider::new();
|
||||||
|
let html = r#"
|
||||||
|
<div data-testid="video-list">
|
||||||
|
<div data-testid="video-item" data-id="111">
|
||||||
|
<a href="/wrong/video/header-card">
|
||||||
|
<picture><img src="https://example.com/wrong.jpg" alt="Wrong header card" /></picture>
|
||||||
|
<div data-testid="video-item-length">5m</div>
|
||||||
|
</a>
|
||||||
|
<div data-testid="video-info-with-badge">
|
||||||
|
<span data-testid="views"><span>1K</span></span>
|
||||||
|
<p><a href="/wrong/video/header-card" title="Wrong header card"><span>Wrong header card</span></a></p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div data-testid="video-list">
|
||||||
|
<div data-testid="video-item" data-id="222">
|
||||||
|
<a href="/right/video/adriana+chechik">
|
||||||
|
<picture><img src="https://example.com/right.jpg" alt="Right result" /></picture>
|
||||||
|
<div data-testid="video-item-length">17m</div>
|
||||||
|
</a>
|
||||||
|
<div data-testid="video-info-with-badge">
|
||||||
|
<span data-testid="views"><span>35K</span></span>
|
||||||
|
<span data-testid="rates"><span>96%</span></span>
|
||||||
|
<p><a href="/right/video/adriana+chechik" title="Right result"><span>Right result</span></a></p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let items = provider.get_video_items_from_html(html.to_string());
|
||||||
|
assert_eq!(items.len(), 1);
|
||||||
|
assert_eq!(items[0].id, "222");
|
||||||
|
assert_eq!(items[0].title, "Right result");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,13 @@
|
|||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
use std::fmt;
|
||||||
|
use std::sync::Arc;
|
||||||
use wreq::Client;
|
use wreq::Client;
|
||||||
use wreq::Proxy;
|
use wreq::Proxy;
|
||||||
use wreq::Response;
|
use wreq::Response;
|
||||||
use wreq::Version;
|
use wreq::Version;
|
||||||
use wreq::header::HeaderValue;
|
use wreq::cookie::Jar;
|
||||||
|
use wreq::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||||
use wreq::multipart::Form;
|
use wreq::multipart::Form;
|
||||||
use wreq::redirect::Policy;
|
use wreq::redirect::Policy;
|
||||||
use wreq_util::Emulation;
|
use wreq_util::Emulation;
|
||||||
@@ -16,28 +19,56 @@ use crate::util::proxy;
|
|||||||
// A Send + Sync error type for all async paths
|
// A Send + Sync error type for all async paths
|
||||||
type AnyErr = Box<dyn std::error::Error + Send + Sync + 'static>;
|
type AnyErr = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||||
|
|
||||||
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
|
#[derive(serde::Serialize, serde::Deserialize, Clone)]
|
||||||
pub struct Requester {
|
pub struct Requester {
|
||||||
#[serde(skip)]
|
#[serde(skip)]
|
||||||
client: Client,
|
client: Client,
|
||||||
|
#[serde(skip)]
|
||||||
|
cookie_jar: Arc<Jar>,
|
||||||
proxy: bool,
|
proxy: bool,
|
||||||
flaresolverr_session: Option<String>,
|
flaresolverr_session: Option<String>,
|
||||||
|
user_agent: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Requester {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("Requester")
|
||||||
|
.field("proxy", &self.proxy)
|
||||||
|
.field("flaresolverr_session", &self.flaresolverr_session)
|
||||||
|
.field("user_agent", &self.user_agent)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Requester {
|
impl Requester {
|
||||||
pub fn new() -> Self {
|
fn build_client(cookie_jar: Arc<Jar>, user_agent: Option<&str>) -> Client {
|
||||||
let client = Client::builder()
|
let mut builder = Client::builder()
|
||||||
.cert_verification(false)
|
.cert_verification(false)
|
||||||
.emulation(Emulation::Firefox136)
|
.emulation(Emulation::Firefox136)
|
||||||
.cookie_store(true)
|
.cookie_provider(cookie_jar)
|
||||||
.redirect(Policy::default())
|
.redirect(Policy::default());
|
||||||
.build()
|
|
||||||
.expect("Failed to create HTTP client");
|
if let Some(user_agent) = user_agent {
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
if let Ok(value) = HeaderValue::from_str(user_agent) {
|
||||||
|
headers.insert(USER_AGENT, value);
|
||||||
|
builder = builder.default_headers(headers);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.build().expect("Failed to create HTTP client")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let cookie_jar = Arc::new(Jar::default());
|
||||||
|
let client = Self::build_client(cookie_jar.clone(), None);
|
||||||
|
|
||||||
let requester = Requester {
|
let requester = Requester {
|
||||||
client,
|
client,
|
||||||
|
cookie_jar,
|
||||||
proxy: false,
|
proxy: false,
|
||||||
flaresolverr_session: None,
|
flaresolverr_session: None,
|
||||||
|
user_agent: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
proxy::init_all_proxies_background(requester.clone());
|
proxy::init_all_proxies_background(requester.clone());
|
||||||
@@ -53,12 +84,7 @@ impl Requester {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_raw(&mut self, url: &str) -> Result<Response, wreq::Error> {
|
pub async fn get_raw(&mut self, url: &str) -> Result<Response, wreq::Error> {
|
||||||
let client = Client::builder()
|
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
|
||||||
.cert_verification(false)
|
|
||||||
.emulation(Emulation::Firefox136)
|
|
||||||
.cookie_store(true)
|
|
||||||
.build()
|
|
||||||
.expect("Failed to create HTTP client");
|
|
||||||
|
|
||||||
let mut request = client.get(url).version(Version::HTTP_11);
|
let mut request = client.get(url).version(Version::HTTP_11);
|
||||||
|
|
||||||
@@ -77,12 +103,7 @@ impl Requester {
|
|||||||
url: &str,
|
url: &str,
|
||||||
headers: Vec<(String, String)>,
|
headers: Vec<(String, String)>,
|
||||||
) -> Result<Response, wreq::Error> {
|
) -> Result<Response, wreq::Error> {
|
||||||
let client = Client::builder()
|
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
|
||||||
.cert_verification(false)
|
|
||||||
.emulation(Emulation::Firefox136)
|
|
||||||
.cookie_store(true)
|
|
||||||
.build()
|
|
||||||
.expect("Failed to create HTTP client");
|
|
||||||
|
|
||||||
let mut request = client.get(url).version(Version::HTTP_11);
|
let mut request = client.get(url).version(Version::HTTP_11);
|
||||||
|
|
||||||
@@ -250,31 +271,18 @@ impl Requester {
|
|||||||
// Rebuild client and apply UA/cookies from FlareSolverr
|
// Rebuild client and apply UA/cookies from FlareSolverr
|
||||||
let cookie_origin = url.split('/').take(3).collect::<Vec<&str>>().join("/");
|
let cookie_origin = url.split('/').take(3).collect::<Vec<&str>>().join("/");
|
||||||
|
|
||||||
self.client = Client::builder()
|
|
||||||
.cert_verification(false)
|
|
||||||
.emulation(Emulation::Firefox136)
|
|
||||||
.cookie_store(true)
|
|
||||||
.redirect(Policy::default())
|
|
||||||
.build()
|
|
||||||
.expect("Failed to create HTTP client");
|
|
||||||
|
|
||||||
let useragent = res.solution.userAgent;
|
let useragent = res.solution.userAgent;
|
||||||
self.client
|
self.user_agent = Some(useragent);
|
||||||
.update()
|
|
||||||
.headers(|headers| {
|
|
||||||
headers.insert("User-Agent", HeaderValue::from_str(&useragent).unwrap());
|
|
||||||
})
|
|
||||||
.apply()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
if let Ok(origin) = url::Url::parse(&cookie_origin) {
|
if url::Url::parse(&cookie_origin).is_ok() {
|
||||||
for cookie in res.solution.cookies {
|
for cookie in res.solution.cookies {
|
||||||
let header =
|
self.cookie_jar
|
||||||
HeaderValue::from_str(&format!("{}={}", cookie.name, cookie.value)).unwrap();
|
.add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin);
|
||||||
self.client.set_cookie(&origin, header);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
|
||||||
|
|
||||||
// Retry the original URL with the updated client & (optional) proxy
|
// Retry the original URL with the updated client & (optional) proxy
|
||||||
let mut request = self.client.get(url).version(Version::HTTP_11);
|
let mut request = self.client.get(url).version(Version::HTTP_11);
|
||||||
for (key, value) in headers.iter() {
|
for (key, value) in headers.iter() {
|
||||||
|
|||||||
Reference in New Issue
Block a user