use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ Provider, build_proxy_url, report_provider_error, report_provider_error_background, requester_or_default, strip_url_scheme, }; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use chrono::{Duration as ChronoDuration, Utc}; use error_chain::error_chain; use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode}; use regex::Regex; use scraper::{Html, Selector}; use serde::Deserialize; use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::sync::{Arc, RwLock}; use std::thread; use std::time::Duration as StdDuration; use tokio::time::timeout; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "live-cams", tags: &["archive", "cams", "recordings"], }; error_chain! { foreign_links { Io(std::io::Error); Json(serde_json::Error); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } const BASE_URL: &str = "https://archivebate.com"; const CHANNEL_ID: &str = "archivebate"; const FIREFOX_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; const HTML_ACCEPT: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[ ("chaturbate", "Y2hhdHVyYmF0ZQ=="), ("stripchat", "c3RyaXBjaGF0"), ("camsoda", "Y2Ftc29kYQ=="), ("cam4", "Y2FtNA=="), ("bongacams", "Ym9uZ2FjYW1z"), ("onlyfans", "b25seWZhbnM="), ("twitch", "dHdpdGNo"), ("youtube", "eW91dHViZQ=="), ("instagram", "aW5zdGFncmFt"), ("tiktok", "dGlrdG9r"), ]; const GENDER_SHORTCUTS: &[(&str, &str)] = &[ ("female", "ZmVtYWxl"), ("couple", "Y291cGxl"), ("male", "bWFsZQ=="), ("trans", "dHJhbnM="), ]; #[derive(Debug, Clone)] pub struct ArchivebateProvider { url: String, filters: Arc>>, uploaders: Arc>>, } #[derive(Debug, Clone)] enum Target { Home, Platform { id: String, title: String, }, Gender { id: String, title: String, }, Profile { username: String, platform: Option, gender: Option, }, } #[derive(Debug, Deserialize, Clone)] struct SearchResponse { #[serde(default)] data: Vec, } #[derive(Debug, Deserialize, Clone)] struct SearchProfile { #[serde(default)] username: String, #[serde(default)] platform: Option, #[serde(default)] gender: Option, } #[derive(Debug, Clone)] struct LivewireInitialData { component: String, method: String, fingerprint_json: String, server_memo_json: String, } #[derive(Debug, Clone)] struct ResolvedMixdropMedia { media_url: String, embed_url: String, } impl ArchivebateProvider { pub fn new() -> Self { let provider = Self { url: BASE_URL.to_string(), filters: Arc::new(RwLock::new(Self::build_default_filters())), uploaders: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), }; provider.spawn_initial_load(); provider } fn spawn_initial_load(&self) { let provider = self.clone(); thread::spawn(move || { let runtime = match tokio::runtime::Builder::new_current_thread() .enable_all() .build() { Ok(runtime) => runtime, Err(error) => { report_provider_error_background( CHANNEL_ID, "spawn_initial_load.runtime_build", &error.to_string(), ); return; } }; runtime.block_on(async move { let options = ServerOptions { featured: None, category: None, sites: None, filter: None, language: None, public_url_base: None, requester: None, network: None, stars: None, categories: None, duration: None, sort: Some("new".to_string()), sexuality: None, }; if let Err(error) = provider.refresh_uploaders_from_home(&options).await { report_provider_error_background( CHANNEL_ID, "spawn_initial_load.refresh_uploaders", &error.to_string(), ); } }); }); } fn build_default_filters() -> Vec { let mut filters = vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }]; for (title, id) in GENDER_SHORTCUTS { filters.push(FilterOption { id: format!("gender:{id}"), title: format!("Gender: {}", Self::title_case(title)), }); } for (title, id) in PLATFORM_SHORTCUTS { filters.push(FilterOption { id: format!("platform:{id}"), title: format!("Platform: {}", Self::title_case(title)), }); } filters } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { let filters = self .filters .read() .map(|value| value.clone()) .unwrap_or_default(); let uploaders = self .uploaders .read() .map(|value| value.clone()) .unwrap_or_default(); Channel { id: CHANNEL_ID.to_string(), name: "ArchiveBate".to_string(), description: "ArchiveBate cam recording feeds with platform/gender/profile shortcuts and yt-dlp compatible watch URLs.".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), status: "work in progress".to_string(), categories: filters.iter().skip(1).map(|value| value.title.clone()).collect(), options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse ArchiveBate newest uploads.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![FilterOption { id: "new".to_string(), title: "Latest".to_string(), }], multiSelect: false, }, ChannelOption { id: "filter".to_string(), title: "Shortcuts".to_string(), description: "Direct platform and gender archive routes.".to_string(), systemImage: "tag.fill".to_string(), colorName: "green".to_string(), options: filters, multiSelect: false, }, ChannelOption { id: "sites".to_string(), title: "Uploaders".to_string(), description: "Profile shortcuts discovered from latest listings.".to_string(), systemImage: "person.crop.square".to_string(), colorName: "purple".to_string(), options: uploaders, multiSelect: false, }, ], nsfw: true, cacheDuration: Some(900), } } fn selector(value: &str) -> Result { Selector::parse(value) .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) } fn regex(value: &str) -> Result { Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` parse failed: {error}"))) } fn decode_text(text: &str) -> String { decode(text.as_bytes()) .to_string() .unwrap_or_else(|_| text.to_string()) .replace('\u{a0}', " ") .trim() .to_string() } fn collapse_whitespace(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } fn absolute_url(&self, value: &str) -> String { if value.starts_with("http://") || value.starts_with("https://") { return value.to_string(); } if value.starts_with("//") { return format!("https:{value}"); } format!( "{}/{}", self.url.trim_end_matches('/'), value.trim_start_matches('/') ) } fn normalize_title(value: &str) -> String { let mut normalized = String::new(); let mut last_space = true; for character in value.chars().flat_map(|character| character.to_lowercase()) { if character.is_alphanumeric() { normalized.push(character); last_space = false; } else if !last_space { normalized.push(' '); last_space = true; } } normalized.trim().to_string() } fn title_case(value: &str) -> String { let mut chars = value.chars(); let Some(first) = chars.next() else { return String::new(); }; let mut out = first.to_uppercase().to_string(); out.push_str(chars.as_str()); out } fn html_headers(&self, referer: &str) -> Vec<(String, String)> { vec![ ("Referer".to_string(), referer.to_string()), ("User-Agent".to_string(), FIREFOX_UA.to_string()), ("Accept".to_string(), HTML_ACCEPT.to_string()), ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), ] } fn search_headers(&self, referer: &str) -> Vec<(String, String)> { vec![ ("Referer".to_string(), referer.to_string()), ("User-Agent".to_string(), FIREFOX_UA.to_string()), ( "Accept".to_string(), "application/json, text/javascript, */*; q=0.01".to_string(), ), ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), ("X-Requested-With".to_string(), "XMLHttpRequest".to_string()), ] } fn build_target_url(&self, target: &Target, page: u16) -> String { let page = page.max(1); match target { Target::Home => { if page <= 1 { format!("{}/", self.url) } else { format!("{}/?page={page}", self.url) } } Target::Platform { id, .. } => { if page <= 1 { format!("{}/platform/{id}", self.url) } else { format!("{}/platform/{id}?page={page}", self.url) } } Target::Gender { id, .. } => { if page <= 1 { format!("{}/gender/{id}", self.url) } else { format!("{}/gender/{id}?page={page}", self.url) } } Target::Profile { username, .. } => { if page <= 1 { format!("{}/profile/{username}", self.url) } else { format!("{}/profile/{username}?page={page}", self.url) } } } } fn canonical_uploader_id(username: &str) -> String { format!( "{CHANNEL_ID}:{}", utf8_percent_encode(username, NON_ALPHANUMERIC) ) } fn username_from_uploader_id(value: &str) -> Option { let suffix = match value.split_once(':') { Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix, _ => return None, }; percent_decode_str(suffix) .decode_utf8() .ok() .map(|value| value.into_owned()) .and_then(|value| (!value.trim().is_empty()).then_some(value)) } fn target_from_shortcut(&self, value: &str) -> Option { let trimmed = value.trim(); if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("all") { return None; } if let Some(username) = Self::username_from_uploader_id(trimmed) { return Some(Target::Profile { username, platform: None, gender: None, }); } if let Some(username) = trimmed.strip_prefix("profile:") { let username = username.trim(); if !username.is_empty() { return Some(Target::Profile { username: username.to_string(), platform: None, gender: None, }); } } if let Some(id) = trimmed.strip_prefix("platform:") { if let Some((name, _)) = PLATFORM_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { return Some(Target::Platform { id: id.to_string(), title: Self::title_case(name), }); } } if let Some(id) = trimmed.strip_prefix("gender:") { if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { return Some(Target::Gender { id: id.to_string(), title: Self::title_case(name), }); } } let normalized = Self::normalize_title(trimmed); if let Some((name, id)) = PLATFORM_SHORTCUTS .iter() .find(|(name, _)| Self::normalize_title(name) == normalized) { return Some(Target::Platform { id: (*id).to_string(), title: Self::title_case(name), }); } if let Some((name, id)) = GENDER_SHORTCUTS .iter() .find(|(name, _)| Self::normalize_title(name) == normalized) { return Some(Target::Gender { id: (*id).to_string(), title: Self::title_case(name), }); } if let Some(option) = self.find_uploader_option(trimmed) { if let Some(username) = option.id.strip_prefix("profile:") { return Some(Target::Profile { username: username.to_string(), platform: None, gender: None, }); } } None } fn resolve_option_target(&self, options: &ServerOptions) -> Target { if let Some(value) = options.sites.as_deref() { if let Some(target) = self.target_from_shortcut(value) { return target; } } if let Some(value) = options.filter.as_deref() { if let Some(target) = self.target_from_shortcut(value) { return target; } } Target::Home } fn find_uploader_option(&self, value: &str) -> Option { let normalized = Self::normalize_title(value); self.uploaders .read() .ok()? .iter() .find(|option| { option.id.eq_ignore_ascii_case(value) || Self::normalize_title(&option.title) == normalized }) .cloned() } fn upsert_uploader_option(&self, username: &str) { let trimmed = username.trim(); if trimmed.is_empty() { return; } let option = FilterOption { id: format!("profile:{trimmed}"), title: trimmed.to_string(), }; let Ok(mut uploaders) = self.uploaders.write() else { return; }; if uploaders .iter() .any(|value| value.id.eq_ignore_ascii_case(option.id.as_str())) { return; } uploaders.push(option); } fn parse_watch_id(url: &str) -> Option { let regex = Regex::new(r"/watch/(?P[0-9]+)").ok()?; regex .captures(url) .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) } fn is_allowed_detail_watch_url(url: &str) -> bool { let Some(parsed) = url::Url::parse(url).ok() else { return false; }; if parsed.scheme() != "https" { return false; } let Some(host) = parsed.host_str() else { return false; }; (host == "archivebate.com" || host == "www.archivebate.com") && parsed.path().starts_with("/watch/") } fn proxied_video(options: &ServerOptions, detail_url: &str) -> String { if detail_url.is_empty() || !Self::is_allowed_detail_watch_url(detail_url) { return String::new(); } build_proxy_url(options, CHANNEL_ID, &strip_url_scheme(detail_url)) } fn parse_duration(text: &str) -> u32 { let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else { return 0; }; let Some(captures) = regex.captures(text) else { return 0; }; let Some(value) = captures.get(1) else { return 0; }; parse_time_to_seconds(value.as_str()).unwrap_or(0) as u32 } fn parse_views(value: &str) -> Option { let lowered = value.to_lowercase(); let cleaned = lowered.replace("views", "").trim().to_string(); parse_abbreviated_number(&cleaned) } fn parse_uploaded_at(value: &str) -> Option { let text = value.trim().to_lowercase(); if text.is_empty() { return None; } if text == "just now" { return Some(Utc::now().timestamp() as u64); } let regex = Regex::new(r"^([0-9]+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$").ok()?; let captures = regex.captures(&text)?; let amount = captures.get(1)?.as_str().parse::().ok()?; let unit = captures.get(2)?.as_str(); let now = Utc::now(); let parsed = match unit { "second" => now - ChronoDuration::seconds(amount), "minute" => now - ChronoDuration::minutes(amount), "hour" => now - ChronoDuration::hours(amount), "day" => now - ChronoDuration::days(amount), "week" => now - ChronoDuration::weeks(amount), "month" => now - ChronoDuration::days(amount * 30), "year" => now - ChronoDuration::days(amount * 365), _ => return None, }; Some(parsed.timestamp() as u64) } fn extra_tags_for_target(target: &Target) -> Vec { match target { Target::Platform { title, .. } => vec![title.clone()], Target::Gender { title, .. } => vec![title.clone()], Target::Profile { platform, gender, .. } => { let mut tags = Vec::new(); if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { tags.push(platform.trim().to_string()); } if let Some(gender) = gender.as_ref().filter(|value| !value.trim().is_empty()) { tags.push(gender.trim().to_string()); } tags } Target::Home => Vec::new(), } } fn parse_video_items(&self, html: &str, target: &Target, limit: usize) -> Result> { let document = Html::parse_fragment(html); let item_selector = Self::selector("section.video_item")?; let watch_selector = Self::selector("a[href*='/watch/']")?; let uploader_selector = Self::selector("div.info a[href*='/profile/']")?; let info_selector = Self::selector("div.info p")?; let duration_selector = Self::selector("div.duration span")?; let video_selector = Self::selector("video[poster]")?; let source_selector = Self::selector("video source[src]")?; let base_tags = Self::extra_tags_for_target(target); let mut items = Vec::new(); for element in document.select(&item_selector) { let Some(watch_link) = element.select(&watch_selector).next() else { continue; }; let Some(watch_href) = watch_link.value().attr("href") else { continue; }; let watch_url = self.absolute_url(watch_href); let Some(video_id) = Self::parse_watch_id(&watch_url) else { continue; }; let duration_text = element .select(&duration_selector) .next() .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) .unwrap_or_default(); let duration = Self::parse_duration(&duration_text); let thumb = element .select(&video_selector) .next() .and_then(|value| value.value().attr("poster")) .map(|value| self.absolute_url(value)) .unwrap_or_default(); if thumb.is_empty() { continue; } let preview = element .select(&source_selector) .next() .and_then(|value| value.value().attr("src")) .map(|value| self.absolute_url(value)); let uploader_element = element.select(&uploader_selector).next(); let uploader_name = uploader_element .as_ref() .map(|value| Self::decode_text(&value.text().collect::())) .filter(|value| !value.is_empty()); let uploader_url = uploader_element .as_ref() .and_then(|value| value.value().attr("href")) .map(|value| self.absolute_url(value)); let uploader_id = uploader_name .as_ref() .map(|value| Self::canonical_uploader_id(value)); let info_text = element .select(&info_selector) .next() .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) .unwrap_or_default(); let info_parts = info_text .split('ยท') .map(|value| value.trim()) .filter(|value| !value.is_empty()) .collect::>(); let uploaded_at = info_parts.first().and_then(|value| Self::parse_uploaded_at(value)); let platform = info_parts.get(1).map(|value| value.trim().to_string()); let views = info_parts.get(2).and_then(|value| Self::parse_views(value)); let title = match (uploader_name.as_deref(), platform.as_deref()) { (Some(uploader), Some(platform)) => format!("{uploader} on {platform}"), (Some(uploader), None) => format!("{uploader} on ArchiveBate"), _ => format!("ArchiveBate video {video_id}"), }; let mut tags = base_tags.clone(); if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { tags.push(platform.clone()); } let mut unique = HashSet::new(); let mut deduped = Vec::new(); for tag in tags { let normalized = Self::normalize_title(&tag); if normalized.is_empty() || unique.contains(&normalized) { continue; } unique.insert(normalized); deduped.push(tag); } let mut item = VideoItem::new( video_id, title, watch_url, CHANNEL_ID.to_string(), thumb, duration, ); item.uploader = uploader_name.clone(); item.uploaderUrl = uploader_url; item.uploaderId = uploader_id; item.uploadedAt = uploaded_at; item.views = views; item.tags = (!deduped.is_empty()).then_some(deduped); item.preview = preview; if let Some(uploader) = uploader_name { self.upsert_uploader_option(&uploader); } items.push(item); if items.len() >= limit { break; } } Ok(items) } fn parse_mixin_packed_eval(html: &str) -> Option { let eval_regex = Regex::new( r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P.*?)',\s*(?P[0-9]+),\s*(?P[0-9]+),\s*'(?P.*?)'\.split\('\|'\)"#, ) .ok()?; let captures = eval_regex.captures(html)?; let payload_raw = captures.name("payload")?.as_str(); let radix = captures.name("radix")?.as_str().parse::().ok()?; let count = captures.name("count")?.as_str().parse::().ok()?; if !(2..=36).contains(&radix) { return None; } let payload = Self::unescape_js_single_quoted(payload_raw); let tokens_raw = captures.name("tokens")?.as_str(); let tokens = tokens_raw.split('|').collect::>(); let mut unpacked = payload; for index in (0..count).rev() { let Some(token) = tokens.get(index) else { continue; }; if token.is_empty() { continue; } let key = Self::to_radix(index, radix); let pattern = format!(r"\b{}\b", regex::escape(&key)); let re = Regex::new(&pattern).ok()?; unpacked = re.replace_all(&unpacked, *token).into_owned(); } Some(unpacked) } fn unescape_js_single_quoted(value: &str) -> String { let mut output = String::with_capacity(value.len()); let mut chars = value.chars(); while let Some(character) = chars.next() { if character != '\\' { output.push(character); continue; } let Some(next) = chars.next() else { break; }; match next { '\\' => output.push('\\'), '\'' => output.push('\''), '"' => output.push('"'), 'n' => output.push('\n'), 'r' => output.push('\r'), 't' => output.push('\t'), _ => output.push(next), } } output } fn to_radix(mut value: usize, radix: u32) -> String { if value == 0 { return "0".to_string(); } let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz"; let mut out = Vec::new(); while value > 0 { let digit = value % radix as usize; out.push(alphabet[digit] as char); value /= radix as usize; } out.iter().rev().collect() } fn extract_mixdrop_media_url(html: &str) -> Option { let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; if let Some(url) = direct_regex .captures(html) .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) { return Some(Self::normalize_possible_protocol_relative(&url)); } let unpacked = Self::parse_mixin_packed_eval(html)?; let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?; unpacked_regex .captures(&unpacked) .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) .map(|value| Self::normalize_possible_protocol_relative(&value)) } fn normalize_possible_protocol_relative(value: &str) -> String { let trimmed = value.trim(); if trimmed.starts_with("//") { format!("https:{trimmed}") } else { trimmed.to_string() } } fn host_from_url(url: &str) -> Option { let parsed = url::Url::parse(url).ok()?; parsed.host_str().map(|value| value.to_ascii_lowercase()) } fn is_mixdrop_host(url: &str) -> bool { let Some(host) = Self::host_from_url(url) else { return false; }; host.contains("mixdrop") || host.contains("m1xdrop") } fn download_fid_from_detail_html(html: &str) -> Option { let document = Html::parse_document(html); let selector = Selector::parse("input[name='fid'][value]").ok()?; document .select(&selector) .next() .and_then(|node| node.value().attr("value")) .map(str::trim) .filter(|value| !value.is_empty()) .map(ToOwned::to_owned) } fn mixdrop_embed_url_from_download_url(url: &str) -> Option { let parsed = url::Url::parse(url).ok()?; let host = parsed.host_str()?; let host_lc = host.to_ascii_lowercase(); if !host_lc.contains("mixdrop") && !host_lc.contains("m1xdrop") { return None; } let mut segments = parsed.path_segments()?.filter(|segment| !segment.is_empty()); let kind = segments.next()?.to_ascii_lowercase(); if kind != "e" && kind != "f" { return None; } let media_id = segments.next()?.trim(); if media_id.is_empty() { return None; } Some(format!("{}://{host}/e/{media_id}", parsed.scheme())) } fn video_format_with_headers( format: VideoFormat, headers: Vec<(String, String)>, ) -> VideoFormat { if headers.is_empty() { return format; } let header_map: HashMap = headers .into_iter() .filter_map(|(key, value)| { let key = key.trim().to_string(); let value = value.trim().to_string(); if key.is_empty() || value.is_empty() { return None; } Some((key, value)) }) .collect(); if header_map.is_empty() { return format; } let mut value = match serde_json::to_value(&format) { Ok(value) => value, Err(_) => return format, }; if let Value::Object(object) = &mut value { let Ok(headers_value) = serde_json::to_value(header_map) else { return format; }; object.insert("http_headers".to_string(), headers_value); if let Ok(updated) = serde_json::from_value::(value) { return updated; } } format } fn first_video_source_from_html(html: &str) -> Option { let document = Html::parse_document(html); let source_selector = Selector::parse("video source[src]").ok()?; let video_src_selector = Selector::parse("video[src]").ok()?; if let Some(value) = document .select(&source_selector) .next() .and_then(|node| node.value().attr("src")) { return Some(value.to_string()); } document .select(&video_src_selector) .next() .and_then(|node| node.value().attr("src")) .map(|value| value.to_string()) } fn first_iframe_source_from_html(html: &str) -> Option { let document = Html::parse_document(html); let iframe_selector = Selector::parse("iframe[src]").ok()?; document .select(&iframe_selector) .next() .and_then(|node| node.value().attr("src")) .map(|value| value.to_string()) } async fn resolve_mixdrop_media_from_iframe( &self, iframe_url: &str, referer: &str, options: &ServerOptions, ) -> Option { let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media"); let response = requester .get_raw_with_headers_timeout( iframe_url, self.html_headers(referer), Some(StdDuration::from_secs(6)), ) .await .ok()?; if !response.status().is_success() { return None; } let iframe_html = response.text().await.ok()?; let media_url = Self::extract_mixdrop_media_url(&iframe_html)?; Some(ResolvedMixdropMedia { media_url, embed_url: iframe_url.to_string(), }) } async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { let page_url = item.url.clone(); let format_url = Self::proxied_video(options, &page_url); if format_url.is_empty() { return item; } let mut format = VideoFormat::new(format_url, "source".to_string(), "mp4".to_string()); let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video"); if let Ok(detail_html) = requester .get_with_headers( &page_url, self.html_headers(&format!("{}/", self.url)), Some(wreq::Version::HTTP_11), ) .await { let mut mixdrop_embed_url = Self::first_iframe_source_from_html(&detail_html) .map(|value| self.absolute_url(&value)) .filter(|value| Self::is_mixdrop_host(value)); if mixdrop_embed_url.is_none() { mixdrop_embed_url = Self::download_fid_from_detail_html(&detail_html) .map(|value| self.absolute_url(&value)) .and_then(|value| Self::mixdrop_embed_url_from_download_url(&value)); } if let Some(embed_url) = mixdrop_embed_url { format = Self::video_format_with_headers( format, vec![ ("Referer".to_string(), embed_url), ("User-Agent".to_string(), FIREFOX_UA.to_string()), ], ); } } let mut enriched = item; enriched.formats = Some(vec![format]); enriched } fn extract_csrf_token(html: &str) -> Option { let regex = Regex::new(r#" Option { let decoded = Self::decode_text(raw); (!decoded.trim().is_empty()).then_some(decoded) } fn livewire_method(component: &str) -> Option<&'static str> { match component { "home-videos" => Some("loadVideos"), "filter.platform" => Some("load_platform_videos"), "profile.model-videos" => Some("load_profile_videos"), _ => None, } } fn extract_json_object(source: &str, key: &str) -> Option { let needle = format!("\"{key}\":"); let start = source.find(&needle)? + needle.len(); let bytes = source.as_bytes(); let mut index = start; while index < bytes.len() && bytes[index].is_ascii_whitespace() { index += 1; } if index >= bytes.len() || bytes[index] != b'{' { return None; } let mut depth = 0usize; let mut in_string = false; let mut escaped = false; for end in index..bytes.len() { let byte = bytes[end]; if in_string { if escaped { escaped = false; } else if byte == b'\\' { escaped = true; } else if byte == b'"' { in_string = false; } continue; } match byte { b'"' => in_string = true, b'{' => depth += 1, b'}' => { depth = depth.saturating_sub(1); if depth == 0 { return Some(source[index..=end].to_string()); } } _ => {} } } None } fn extract_livewire_initial(html: &str) -> Result> { let regex = Self::regex(r#"wire:initial-data="([^"]+)""#)?; for captures in regex.captures_iter(html) { let Some(raw) = captures.get(1).map(|value| value.as_str()) else { continue; }; let Some(decoded) = Self::decode_livewire_json_attr(raw) else { continue; }; let Some(fingerprint_json) = Self::extract_json_object(&decoded, "fingerprint") else { continue; }; let Some(server_memo_json) = Self::extract_json_object(&decoded, "serverMemo") else { continue; }; let component_regex = Self::regex(r#""name":"([^"]+)""#)?; let Some(component) = component_regex .captures(&fingerprint_json) .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) else { continue; }; let Some(method) = Self::livewire_method(&component) else { continue; }; return Ok(Some(LivewireInitialData { component, method: method.to_string(), fingerprint_json, server_memo_json, })); } Ok(None) } async fn fetch_livewire_html( &self, target_url: &str, options: &ServerOptions, ) -> Result { let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_livewire_html.page"); let page_html = requester .get_with_headers(target_url, self.html_headers(&format!("{}/", self.url)), Some(wreq::Version::HTTP_11)) .await .map_err(|error| Error::from(format!("fetch page failed for {target_url}: {error}")))?; let csrf = Self::extract_csrf_token(&page_html) .ok_or_else(|| Error::from(format!("missing csrf token on {target_url}")))?; let Some(initial) = Self::extract_livewire_initial(&page_html)? else { return Err(Error::from(format!( "missing supported livewire initial-data on {target_url}" ))); }; let update_id = format!("ab{}", Utc::now().timestamp_micros()); let payload = format!( "{{\"fingerprint\":{},\"serverMemo\":{},\"updates\":[{{\"type\":\"callMethod\",\"payload\":{{\"id\":\"{}\",\"method\":\"{}\",\"params\":[]}}}}]}}", initial.fingerprint_json, initial.server_memo_json, update_id, initial.method ); let livewire_url = format!("{}/livewire/message/{}", self.url, initial.component); let referer = target_url.to_string(); let user_agent = FIREFOX_UA.to_string(); let accept = "application/json, text/plain, */*".to_string(); let response = requester .post( &livewire_url, &payload, vec![ ("Content-Type", "application/json"), ("X-Requested-With", "XMLHttpRequest"), ("X-CSRF-TOKEN", csrf.as_str()), ("Referer", referer.as_str()), ("User-Agent", user_agent.as_str()), ("Accept", accept.as_str()), ], ) .await .map_err(|error| { Error::from(format!( "livewire request failed for {target_url} component={}: {error}", initial.component )) })?; let response_text = response .text() .await .map_err(|error| Error::from(format!("livewire response text failed: {error}")))?; let parsed: Value = serde_json::from_str(&response_text)?; let effects_html = parsed .get("effects") .and_then(|value| value.get("html")) .and_then(Value::as_str) .unwrap_or("") .to_string(); crate::flow_debug!( "archivebate livewire component={} target={} effects_html_len={} body_preview={}", initial.component, crate::util::flow_debug::preview(target_url, 120), effects_html.len(), crate::util::flow_debug::preview(&response_text, 120) ); if !effects_html.trim().is_empty() { return Ok(effects_html); } Ok(page_html) } async fn fetch_items_for_target( &self, cache: VideoCache, target: &Target, page: u16, per_page: usize, options: &ServerOptions, ) -> Result> { let target_url = self.build_target_url(target, page); let cache_key = format!("{CHANNEL_ID}:{target_url}"); if let Some((time, items)) = cache.get(&cache_key) { if time.elapsed().unwrap_or_default().as_secs() < 60 * 10 { return Ok(items.iter().take(per_page).cloned().collect()); } } let html = self.fetch_livewire_html(&target_url, options).await?; let items = self.parse_video_items(&html, target, per_page)?; crate::flow_debug!( "archivebate parsed target={} page={} items={} html_preview={}", crate::util::flow_debug::preview(&target_url, 120), page, items.len(), crate::util::flow_debug::preview(&html, 120) ); if !items.is_empty() { cache.insert(cache_key, items.clone()); } Ok(items) } async fn refresh_uploaders_from_home(&self, options: &ServerOptions) -> Result<()> { let items = self .fetch_items_for_target( VideoCache::new(), &Target::Home, 1, 120, options, ) .await?; for item in items { if let Some(uploader) = item.uploader { self.upsert_uploader_option(&uploader); } } Ok(()) } async fn search_profiles( &self, query: &str, page: u16, options: &ServerOptions, ) -> Result> { let query_string = { let mut serializer = url::form_urlencoded::Serializer::new(String::new()); serializer.append_pair("query", query); serializer.append_pair("page", &page.max(1).to_string()); serializer.finish() }; let search_url = format!("{}/api/v1/search?{query_string}", self.url); let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_search_target"); let response = requester .get_raw_with_headers(&search_url, self.search_headers(&format!("{}/", self.url))) .await .map_err(|error| Error::from(format!("search request failed: {error}")))?; if !response.status().is_success() { return Err(Error::from(format!( "search request returned status {}", response.status() ))); } let body = response .text() .await .map_err(|error| Error::from(format!("search response text failed: {error}")))?; let parsed: SearchResponse = serde_json::from_str(&body)?; Ok(parsed.data) } async fn get_default( &self, cache: VideoCache, page: u16, per_page: usize, options: ServerOptions, ) -> Result> { let target = self.resolve_option_target(&options); self.fetch_items_for_target(cache, &target, page, per_page, &options) .await } async fn query( &self, cache: VideoCache, page: u16, per_page: usize, query: &str, options: ServerOptions, ) -> Result> { if let Some(target) = self.target_from_shortcut(query) { return self .fetch_items_for_target(cache, &target, page, per_page, &options) .await; } let mut candidates = self.search_profiles(query, page, &options).await?; if candidates.is_empty() { return Ok(vec![]); } let normalized_query = Self::normalize_title(query); candidates.sort_by_key(|entry| { if Self::normalize_title(&entry.username) == normalized_query { 0 } else { 1 } }); for candidate in candidates.into_iter().take(8) { let username = candidate.username.trim(); if username.is_empty() { continue; } let target = Target::Profile { username: username.to_string(), platform: candidate.platform.clone(), gender: candidate.gender.clone(), }; let items = self .fetch_items_for_target(cache.clone(), &target, 1, per_page, &options) .await?; if !items.is_empty() { return Ok(items); } } Ok(vec![]) } } #[async_trait] impl Provider for ArchivebateProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, _sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { let _ = pool; let page = page.parse::().unwrap_or(1); let per_page = per_page.parse::().unwrap_or(36).clamp(1, 120); let result = match query { Some(query) if !query.trim().is_empty() => { self.query(cache, page, per_page, &query, options.clone()).await } _ => self.get_default(cache, page, per_page, options.clone()).await, }; match result { Ok(videos) => { if videos.is_empty() { return videos; } stream::iter(videos.into_iter().map(|video| { let provider = self.clone(); let options = options.clone(); async move { let timeout_result = timeout( StdDuration::from_secs(8), provider.enrich_video(video.clone(), &options), ) .await; match timeout_result { Ok(enriched) => enriched, Err(_) => video, } } })) .buffer_unordered(4) .collect::>() .await } Err(error) => { report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::ArchivebateProvider; #[test] fn extracts_mixdrop_wurl_from_packed_eval() { let html = r#" "#; let actual = ArchivebateProvider::extract_mixdrop_media_url(html) .expect("expected mixdrop media url"); assert_eq!( actual, "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168" ); } }