archivebate fix
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
|
||||
requester_or_default, strip_url_scheme,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
@@ -18,7 +19,7 @@ use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread;
|
||||
use std::time::Duration as StdDuration;
|
||||
@@ -119,6 +120,12 @@ struct LivewireInitialData {
|
||||
server_memo_json: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ResolvedMixdropMedia {
|
||||
media_url: String,
|
||||
embed_url: String,
|
||||
}
|
||||
|
||||
impl ArchivebateProvider {
|
||||
pub fn new() -> Self {
|
||||
let provider = Self {
|
||||
@@ -526,6 +533,28 @@ impl ArchivebateProvider {
|
||||
.and_then(|captures| captures.name("id").map(|value| value.as_str().to_string()))
|
||||
}
|
||||
|
||||
fn is_allowed_detail_watch_url(url: &str) -> bool {
|
||||
let Some(parsed) = url::Url::parse(url).ok() else {
|
||||
return false;
|
||||
};
|
||||
if parsed.scheme() != "https" {
|
||||
return false;
|
||||
}
|
||||
let Some(host) = parsed.host_str() else {
|
||||
return false;
|
||||
};
|
||||
(host == "archivebate.com" || host == "www.archivebate.com")
|
||||
&& parsed.path().starts_with("/watch/")
|
||||
}
|
||||
|
||||
fn proxied_video(options: &ServerOptions, detail_url: &str) -> String {
|
||||
if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url))
|
||||
}
|
||||
|
||||
fn parse_duration(text: &str) -> u32 {
|
||||
let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else {
|
||||
return 0;
|
||||
@@ -827,6 +856,81 @@ impl ArchivebateProvider {
|
||||
host.contains("mixdrop") || host.contains("m1xdrop")
|
||||
}
|
||||
|
||||
fn download_fid_from_detail_html(html: &str) -> Option<String> {
|
||||
let document = Html::parse_document(html);
|
||||
let selector = Selector::parse("input[name='fid'][value]").ok()?;
|
||||
document
|
||||
.select(&selector)
|
||||
.next()
|
||||
.and_then(|node| node.value().attr("value"))
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
}
|
||||
|
||||
fn mixdrop_embed_url_from_download_url(url: &str) -> Option<String> {
|
||||
let parsed = url::Url::parse(url).ok()?;
|
||||
let host = parsed.host_str()?;
|
||||
let host_lc = host.to_ascii_lowercase();
|
||||
if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty());
|
||||
let kind = segments.next()?.to_ascii_lowercase();
|
||||
if kind != "e" && kind != "f" {
|
||||
return None;
|
||||
}
|
||||
let media_id = segments.next()?.trim();
|
||||
if media_id.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(format!("{}://{host}/e/{media_id}", parsed.scheme()))
|
||||
}
|
||||
|
||||
fn video_format_with_headers(
|
||||
format: VideoFormat,
|
||||
headers: Vec<(String, String)>,
|
||||
) -> VideoFormat {
|
||||
if headers.is_empty() {
|
||||
return format;
|
||||
}
|
||||
|
||||
let header_map: HashMap<String, String> = headers
|
||||
.into_iter()
|
||||
.filter_map(|(key, value)| {
|
||||
let key = key.trim().to_string();
|
||||
let value = value.trim().to_string();
|
||||
if key.is_empty() || value.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some((key, value))
|
||||
})
|
||||
.collect();
|
||||
|
||||
if header_map.is_empty() {
|
||||
return format;
|
||||
}
|
||||
|
||||
let mut value = match serde_json::to_value(&format) {
|
||||
Ok(value) => value,
|
||||
Err(_) => return format,
|
||||
};
|
||||
|
||||
if let Value::Object(object) = &mut value {
|
||||
let Ok(headers_value) = serde_json::to_value(header_map) else {
|
||||
return format;
|
||||
};
|
||||
object.insert("http_headers".to_string(), headers_value);
|
||||
if let Ok(updated) = serde_json::from_value::<VideoFormat>(value) {
|
||||
return updated;
|
||||
}
|
||||
}
|
||||
|
||||
format
|
||||
}
|
||||
|
||||
fn first_video_source_from_html(html: &str) -> Option<String> {
|
||||
let document = Html::parse_document(html);
|
||||
let source_selector = Selector::parse("video source[src]").ok()?;
|
||||
@@ -861,23 +965,37 @@ impl ArchivebateProvider {
|
||||
iframe_url: &str,
|
||||
referer: &str,
|
||||
options: &ServerOptions,
|
||||
) -> Option<String> {
|
||||
) -> Option<ResolvedMixdropMedia> {
|
||||
let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
|
||||
let iframe_html = requester
|
||||
.get_with_headers(
|
||||
let response = requester
|
||||
.get_raw_with_headers_timeout(
|
||||
iframe_url,
|
||||
self.html_headers(referer),
|
||||
Some(wreq::Version::HTTP_11),
|
||||
Some(StdDuration::from_secs(6)),
|
||||
)
|
||||
.await
|
||||
.ok()?;
|
||||
Self::extract_mixdrop_media_url(&iframe_html)
|
||||
if !response.status().is_success() {
|
||||
return None;
|
||||
}
|
||||
let iframe_html = response.text().await.ok()?;
|
||||
let media_url = Self::extract_mixdrop_media_url(&iframe_html)?;
|
||||
Some(ResolvedMixdropMedia {
|
||||
media_url,
|
||||
embed_url: iframe_url.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
||||
let page_url = item.url.clone();
|
||||
let format_url = Self::proxied_video(options, &page_url);
|
||||
if format_url.is_empty() {
|
||||
return item;
|
||||
}
|
||||
|
||||
let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string());
|
||||
let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video");
|
||||
let detail_html = match requester
|
||||
if let Ok(detail_html) = requester
|
||||
.get_with_headers(
|
||||
&page_url,
|
||||
self.html_headers(&format!("{}/", self.url)),
|
||||
@@ -885,40 +1003,27 @@ impl ArchivebateProvider {
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(value) => value,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"enrich_video.fetch_detail",
|
||||
&format!("url={page_url}; error={error}"),
|
||||
);
|
||||
return item;
|
||||
let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html)
|
||||
.map(|value| self.absolute_url(&value))
|
||||
.filter(|value| Self::is_mixdrop_host(value));
|
||||
|
||||
if mixdrop_embed_url.is_none() {
|
||||
mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html)
|
||||
.map(|value| self.absolute_url(&value))
|
||||
.and_then(|value| Self::mixdrop_embed_url_from_download_url(&value));
|
||||
}
|
||||
};
|
||||
|
||||
let mut media_url = Self::first_video_source_from_html(&detail_html)
|
||||
.map(|value| self.absolute_url(&value));
|
||||
|
||||
if media_url.is_none() {
|
||||
let iframe_url = Self::first_iframe_source_from_html(&detail_html)
|
||||
.map(|value| self.absolute_url(&value));
|
||||
if let Some(iframe_url) = iframe_url {
|
||||
if Self::is_mixdrop_host(&iframe_url) {
|
||||
if let Some(resolved) = self
|
||||
.resolve_mixdrop_media_from_iframe(&iframe_url, &page_url, options)
|
||||
.await
|
||||
{
|
||||
media_url = Some(resolved);
|
||||
}
|
||||
}
|
||||
if let Some(embed_url) = mixdrop_embed_url {
|
||||
format = Self::video_format_with_headers(
|
||||
format,
|
||||
vec![
|
||||
("Referer".to_string(), embed_url),
|
||||
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let Some(media_url) = media_url else {
|
||||
return item;
|
||||
};
|
||||
|
||||
let format = VideoFormat::new(media_url, "source".to_string(), "mp4".to_string());
|
||||
let mut enriched = item;
|
||||
enriched.formats = Some(vec![format]);
|
||||
enriched
|
||||
|
||||
Reference in New Issue
Block a user