archivebate fix

This commit is contained in:
Simon
2026-04-22 10:18:15 +00:00
parent 47631b8a70
commit a47a69962f
4 changed files with 474 additions and 37 deletions

View File

@@ -1,7 +1,8 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
@@ -18,7 +19,7 @@ use regex::Regex;
use scraper::{Html, Selector};
use serde::Deserialize;
use serde_json::Value;
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
use std::thread;
use std::time::Duration as StdDuration;
@@ -119,6 +120,12 @@ struct LivewireInitialData {
server_memo_json: String,
}
#[derive(Debug, Clone)]
struct ResolvedMixdropMedia {
media_url: String,
embed_url: String,
}
impl ArchivebateProvider {
pub fn new() -> Self {
let provider = Self {
@@ -526,6 +533,28 @@ impl ArchivebateProvider {
.and_then(|captures| captures.name("id").map(|value| value.as_str().to_string()))
}
fn is_allowed_detail_watch_url(url: &str) -> bool {
let Some(parsed) = url::Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "archivebate.com" || host == "www.archivebate.com")
&& parsed.path().starts_with("/watch/")
}
fn proxied_video(options: &ServerOptions, detail_url: &str) -> String {
if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) {
return String::new();
}
build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url))
}
fn parse_duration(text: &str) -> u32 {
let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else {
return 0;
@@ -827,6 +856,81 @@ impl ArchivebateProvider {
host.contains("mixdrop") || host.contains("m1xdrop")
}
fn download_fid_from_detail_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("input[name='fid'][value]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("value"))
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
let parsed = url::Url::parse(url).ok()?;
let host = parsed.host_str()?;
let host_lc = host.to_ascii_lowercase();
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
return None;
}
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
let kind = segments.next()?.to_ascii_lowercase();
if kind != "e" && kind != "f" {
return None;
}
let media_id = segments.next()?.trim();
if media_id.is_empty() {
return None;
}
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
}
fn video_format_with_headers(
format: VideoFormat,
headers: Vec<(String, String)>,
) -> VideoFormat {
if headers.is_empty() {
return format;
}
let header_map: HashMap<String, String> = headers
.into_iter()
.filter_map(|(key, value)| {
let key = key.trim().to_string();
let value = value.trim().to_string();
if key.is_empty() || value.is_empty() {
return None;
}
Some((key, value))
})
.collect();
if header_map.is_empty() {
return format;
}
let mut value = match serde_json::to_value(&format) {
Ok(value) => value,
Err(_) => return format,
};
if let Value::Object(object) = &mut value {
let Ok(headers_value) = serde_json::to_value(header_map) else {
return format;
};
object.insert("http_headers".to_string(), headers_value);
if let Ok(updated) = serde_json::from_value::<VideoFormat>(value) {
return updated;
}
}
format
}
fn first_video_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let source_selector = Selector::parse("video source[src]").ok()?;
@@ -861,23 +965,37 @@ impl ArchivebateProvider {
iframe_url: &str,
referer: &str,
options: &ServerOptions,
) -> Option<String> {
) -> Option<ResolvedMixdropMedia> {
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
let iframe_html = requester
.get_with_headers(
let response = requester
.get_raw_with_headers_timeout(
iframe_url,
self.html_headers(referer),
Some(wreq::Version::HTTP_11),
Some(StdDuration::from_secs(6)),
)
.await
.ok()?;
Self::extract_mixdrop_media_url(&iframe_html)
if !response.status().is_success() {
return None;
}
let iframe_html = response.text().await.ok()?;
let media_url = Self::extract_mixdrop_media_url(&iframe_html)?;
Some(ResolvedMixdropMedia {
media_url,
embed_url: iframe_url.to_string(),
})
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let page_url = item.url.clone();
let format_url = Self::proxied_video(options, &page_url);
if format_url.is_empty() {
return item;
}
let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string());
let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video");
let detail_html = match requester
if let Ok(detail_html) = requester
.get_with_headers(
&page_url,
self.html_headers(&format!("{}/", self.url)),
@@ -885,40 +1003,27 @@ impl ArchivebateProvider {
)
.await
{
Ok(value) => value,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_video.fetch_detail",
&format!("url={page_url}; error={error}"),
let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html)
.map(|value| self.absolute_url(&value))
.filter(|value| Self::is_mixdrop_host(value));
if mixdrop_embed_url.is_none() {
mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html)
.map(|value| self.absolute_url(&value))
.and_then(|value| Self::mixdrop_embed_url_from_download_url(&value));
}
if let Some(embed_url) = mixdrop_embed_url {
format = Self::video_format_with_headers(
format,
vec![
("Referer".to_string(), embed_url),
("User-Agent".to_string(), FIREFOX_UA.to_string()),
],
);
return item;
}
};
let mut media_url = Self::first_video_source_from_html(&detail_html)
.map(|value| self.absolute_url(&value));
if media_url.is_none() {
let iframe_url = Self::first_iframe_source_from_html(&detail_html)
.map(|value| self.absolute_url(&value));
if let Some(iframe_url) = iframe_url {
if Self::is_mixdrop_host(&iframe_url) {
if let Some(resolved) = self
.resolve_mixdrop_media_from_iframe(&iframe_url, &page_url, options)
.await
{
media_url = Some(resolved);
}
}
}
}
let Some(media_url) = media_url else {
return item;
};
let format = VideoFormat::new(media_url, "source".to_string(), "mp4".to_string());
let mut enriched = item;
enriched.formats = Some(vec![format]);
enriched

321
src/proxies/archivebate.rs Normal file
View File

@@ -0,0 +1,321 @@
use std::time::Duration as StdDuration;
use ntex::web;
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use wreq::Version;
use crate::util::requester::Requester;
const FIREFOX_UA: &str =
"Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0";
#[derive(Debug, Clone)]
pub struct ArchivebateProxy {}
impl ArchivebateProxy {
pub fn new() -> Self {
Self {}
}
fn normalize_detail_request(endpoint: &str) -> Option<String> {
let endpoint = endpoint.trim().trim_start_matches('/');
if endpoint.is_empty() {
return None;
}
let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(parsed) = Url::parse(url).ok() else {
return false;
};
if parsed.scheme() != "https" {
return false;
}
let Some(host) = parsed.host_str() else {
return false;
};
(host == "archivebate.com" || host == "www.archivebate.com")
&& parsed.path().starts_with("/watch/")
}
fn host_from_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
parsed.host_str().map(|value| value.to_ascii_lowercase())
}
fn is_mixdrop_host(url: &str) -> bool {
let Some(host) = Self::host_from_url(url) else {
return false;
};
host.contains("mixdrop") || host.contains("m1xdrop")
}
fn html_headers(referer: &str) -> Vec<(String, String)> {
vec![
("Referer".to_string(), referer.to_string()),
("User-Agent".to_string(), FIREFOX_UA.to_string()),
(
"Accept".to_string(),
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
.to_string(),
),
("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
]
}
fn first_iframe_source_from_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("iframe[src]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("src"))
.map(str::to_string)
}
fn download_fid_from_detail_html(html: &str) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse("input[name='fid'][value]").ok()?;
document
.select(&selector)
.next()
.and_then(|node| node.value().attr("value"))
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToOwned::to_owned)
}
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
let parsed = Url::parse(url).ok()?;
let host = parsed.host_str()?;
let host_lc = host.to_ascii_lowercase();
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
return None;
}
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
let kind = segments.next()?.to_ascii_lowercase();
if kind != "e" && kind != "f" {
return None;
}
let media_id = segments.next()?.trim();
if media_id.is_empty() {
return None;
}
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
}
fn normalize_possible_protocol_relative(value: &str) -> String {
let trimmed = value.trim();
if trimmed.starts_with("//") {
format!("https:{trimmed}")
} else {
trimmed.to_string()
}
}
fn extract_mixdrop_media_url(html: &str) -> Option<String> {
let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
if let Some(url) = direct_regex
.captures(html)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
{
return Some(Self::normalize_possible_protocol_relative(&url));
}
let unpacked = Self::parse_mixin_packed_eval(html)?;
let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
unpacked_regex
.captures(&unpacked)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
.map(|value| Self::normalize_possible_protocol_relative(&value))
}
fn parse_mixin_packed_eval(html: &str) -> Option<String> {
let eval_regex = Regex::new(
r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
)
.ok()?;
let captures = eval_regex.captures(html)?;
let payload_raw = captures.name("payload")?.as_str();
let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
if !(2..=36).contains(&radix) {
return None;
}
let payload = Self::unescape_js_single_quoted(payload_raw);
let tokens_raw = captures.name("tokens")?.as_str();
let tokens = tokens_raw.split('|').collect::<Vec<_>>();
let mut unpacked = payload;
for index in (0..count).rev() {
let Some(token) = tokens.get(index) else {
continue;
};
if token.is_empty() {
continue;
}
let key = Self::to_radix(index, radix);
let pattern = format!(r"\b{}\b", regex::escape(&key));
let re = Regex::new(&pattern).ok()?;
unpacked = re.replace_all(&unpacked, *token).into_owned();
}
Some(unpacked)
}
fn unescape_js_single_quoted(value: &str) -> String {
let mut output = String::with_capacity(value.len());
let mut chars = value.chars();
while let Some(character) = chars.next() {
if character != '\\' {
output.push(character);
continue;
}
let Some(next) = chars.next() else {
break;
};
match next {
'\\' => output.push('\\'),
'\'' => output.push('\''),
'"' => output.push('"'),
'n' => output.push('\n'),
'r' => output.push('\r'),
't' => output.push('\t'),
_ => output.push(next),
}
}
output
}
fn to_radix(mut value: usize, radix: u32) -> String {
if value == 0 {
return "0".to_string();
}
let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
let mut out = Vec::new();
while value > 0 {
let digit = value % radix as usize;
out.push(alphabet[digit] as char);
value /= radix as usize;
}
out.iter().rev().collect()
}
fn absolute_url(value: &str) -> String {
if value.starts_with("http://") || value.starts_with("https://") {
return value.to_string();
}
if value.starts_with("//") {
return format!("https:{value}");
}
format!("https://archivebate.com/{}", value.trim_start_matches('/'))
}
async fn resolve_mixdrop_media_from_embed(
detail_url: &str,
embed_url: &str,
requester: &mut Requester,
) -> Option<String> {
let response = requester
.get_raw_with_headers_timeout(
embed_url,
Self::html_headers(detail_url),
Some(StdDuration::from_secs(8)),
)
.await
.ok()?;
if !response.status().is_success() {
return None;
}
let html = response.text().await.ok()?;
Self::extract_mixdrop_media_url(&html)
}
}
impl crate::proxies::Proxy for ArchivebateProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
let Some(detail_url) = Self::normalize_detail_request(&url) else {
return String::new();
};
let mut requester = requester.get_ref().clone();
let detail_html = requester
.get_with_headers(
&detail_url,
Self::html_headers("https://archivebate.com/"),
Some(Version::HTTP_11),
)
.await
.unwrap_or_default();
if detail_html.is_empty() {
return String::new();
}
if let Some(iframe_url) = Self::first_iframe_source_from_html(&detail_html).map(|value| Self::absolute_url(&value)) {
if Self::is_mixdrop_host(&iframe_url) {
if let Some(media_url) =
Self::resolve_mixdrop_media_from_embed(&detail_url, &iframe_url, &mut requester).await
{
return media_url;
}
}
}
if let Some(download_fid) = Self::download_fid_from_detail_html(&detail_html).map(|value| Self::absolute_url(&value)) {
if let Some(embed_url) = Self::mixdrop_embed_url_from_download_url(&download_fid) {
if let Some(media_url) =
Self::resolve_mixdrop_media_from_embed(&detail_url, &embed_url, &mut requester).await
{
return media_url;
}
}
}
String::new()
}
}
#[cfg(test)]
mod tests {
use super::ArchivebateProxy;
#[test]
fn normalizes_detail_request() {
let detail = ArchivebateProxy::normalize_detail_request("archivebate.com/watch/123456");
assert_eq!(detail.as_deref(), Some("https://archivebate.com/watch/123456"));
}
#[test]
fn rejects_non_watch_paths() {
assert!(ArchivebateProxy::normalize_detail_request("archivebate.com/profile/test").is_none());
}
#[test]
fn extracts_mixdrop_wurl_from_packed_eval() {
let html = r#"
<script>
eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//o230m5y6z.3.4/5/6.7?8=9&a=b";',12,12,'|MDCore|wurl|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
</script>
"#;
let extracted = ArchivebateProxy::extract_mixdrop_media_url(html)
.expect("expected extracted media url");
assert_eq!(
extracted,
"https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
);
}
}

View File

@@ -1,3 +1,4 @@
use crate::proxies::archivebate::ArchivebateProxy;
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::heavyfetish::HeavyfetishProxy;
use crate::proxies::hqporner::HqpornerProxy;
@@ -11,6 +12,7 @@ use crate::proxies::spankbang::SpankbangProxy;
use crate::proxies::vjav::VjavProxy;
use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
pub mod archivebate;
pub mod doodstream;
pub mod hanimecdn;
pub mod heavyfetish;
@@ -30,6 +32,7 @@ pub mod vjav;
#[derive(Debug, Clone)]
pub enum AnyProxy {
Archivebate(ArchivebateProxy),
Doodstream(DoodstreamProxy),
Sxyprn(SxyprnProxy),
Javtiful(javtiful::JavtifulProxy),
@@ -50,6 +53,7 @@ pub trait Proxy {
impl Proxy for AnyProxy {
async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
match self {
AnyProxy::Archivebate(p) => p.get_video_url(url, requester).await,
AnyProxy::Doodstream(p) => p.get_video_url(url, requester).await,
AnyProxy::Sxyprn(p) => p.get_video_url(url, requester).await,
AnyProxy::Javtiful(p) => p.get_video_url(url, requester).await,

View File

@@ -1,5 +1,6 @@
use ntex::web::{self, HttpRequest};
use crate::proxies::archivebate::ArchivebateProxy;
use crate::proxies::doodstream::DoodstreamProxy;
use crate::proxies::heavyfetish::HeavyfetishProxy;
use crate::proxies::hqporner::HqpornerProxy;
@@ -16,6 +17,11 @@ use crate::util::requester::Requester;
pub fn config(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("/archivebate/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
)
.service(
web::resource("/doodstream/{endpoint}*")
.route(web::post().to(proxy2redirect))
.route(web::get().to(proxy2redirect)),
@@ -124,6 +130,7 @@ async fn proxy2redirect(
fn get_proxy(proxy: &str) -> Option<AnyProxy> {
match proxy {
"archivebate" => Some(AnyProxy::Archivebate(ArchivebateProxy::new())),
"doodstream" => Some(AnyProxy::Doodstream(DoodstreamProxy::new())),
"sxyprn" => Some(AnyProxy::Sxyprn(SxyprnProxy::new())),
"javtiful" => Some(AnyProxy::Javtiful(JavtifulProxy::new())),