diff --git a/build.rs b/build.rs index 04cd09c..707fc04 100644 --- a/build.rs +++ b/build.rs @@ -11,6 +11,11 @@ struct ProviderDef { } const PROVIDERS: &[ProviderDef] = &[ + ProviderDef { + id: "pornxp", + module: "pornxp", + ty: "PornxpProvider", + }, ProviderDef { id: "all", module: "all", @@ -261,6 +266,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "chaturbate", ty: "ChaturbateProvider", }, + ProviderDef { + id: "archivebate", + module: "archivebate", + ty: "ArchivebateProvider", + }, ]; fn main() { @@ -360,7 +370,7 @@ fn main() { let selection = match selected.as_deref() { Some(selected_id) => format!( - "pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = Some(\"{selected_id}\");" + "pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = Some(\"{selected_id}\");", ), None => "pub const COMPILE_TIME_SELECTED_PROVIDER: Option<&str> = None;".to_string(), }; diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 827c90c..74616fc 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -7,6 +7,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | Provider | Group | `/api/uploaders` | Uses local `/proxy` | Notes | | --- | --- | --- | --- | --- | | `all` | `meta-search` | no | no | Aggregates all compiled providers. | +| `archivebate` | `live-cams` | no | no | Livewire-backed cam archive listings with platform/gender/profile shortcuts. | | `beeg` | `mainstream-tube` | no | no | Basic mainstream tube pattern. | | `chaturbate` | `live-cams` | no | no | Live cam channel. | | `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. | diff --git a/prompts/new-channel.md b/prompts/new-channel.md index f8afba6..5f10b2d 100644 --- a/prompts/new-channel.md +++ b/prompts/new-channel.md @@ -1,4 +1,4 @@ -Implement a new Hottub provider for `` at ``. +Implement a new Hottub provider for `archivebate1` at `https://archivebate1.com`. You are working inside the Hottub Rust server. Your job is to add a functioning provider module that can survive handoff to another model with minimal guesswork. Do not stop at code generation. Carry the work through code, validation, and documentation updates. @@ -70,6 +70,7 @@ Implementation requirements: - If uploader support is implemented, use a namespaced `uploaderId` such as `:` so `/api/uploaders` can route directly. - If the query matches a known tag/uploader shortcut, use the direct archive URL instead of generic search. - If the site exposes real media URLs or HLS manifests, populate `formats`. +- If the video page URL can be directly downloaded by yt-dlp, set `video.url` to the page URL and do not populate `formats`, as yt-dlp will extract formats dynamically. - If direct playback needs a referer/cookie transform, use a local `/proxy/...` route built with `build_proxy_url(&options, "...", target)`. - Keep the first version small and reliable. Add extra filters only after the default feed, search, and pagination are working. @@ -85,7 +86,7 @@ Validation requirements: - page 2 - at least one tag/uploader shortcut if implemented 6. Verify thumbnails load. -7. Verify `yt-dlp` can resolve `video.url` or one of `formats[*].url`. +7. Verify `yt-dlp` can resolve `video.url` (if formats are not populated) or one of `formats[*].url` (if formats are populated). 8. If a proxy route exists, verify it directly with `curl -I` or equivalent. Testing commands to run: @@ -115,6 +116,7 @@ Important Hottub-specific rules: - Do not create a brand-new requester in normal provider fetches unless you have a strong reason. - Do not assume page URLs are playable media URLs. - Do not expose status filters that you did not implement in `get_videos`. +- Do not populate `formats` if the page URL is yt-dlp compatible; instead, set `video.url` to the page URL. - Do not finish without checking at least one returned media URL with `yt-dlp`. - Do not claim pagination works unless page 2 was verified. diff --git a/src/providers/archivebate.rs b/src/providers/archivebate.rs new file mode 100644 index 0000000..6525fea --- /dev/null +++ b/src/providers/archivebate.rs @@ -0,0 +1,1067 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; +use chrono::{Duration as ChronoDuration, Utc}; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode}; +use regex::Regex; +use scraper::{Html, Selector}; +use serde::Deserialize; +use serde_json::Value; +use std::collections::HashSet; +use std::sync::{Arc, RwLock}; +use std::thread; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "live-cams", + tags: &["archive", "cams", "recordings"], + }; + +error_chain! { + foreign_links { + Io(std::io::Error); + Json(serde_json::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +const BASE_URL: &str = "https://archivebate.com"; +const CHANNEL_ID: &str = "archivebate"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +const PLATFORM_SHORTCUTS: &[(&str, &str)] = &[ + ("chaturbate", "Y2hhdHVyYmF0ZQ=="), + ("stripchat", "c3RyaXBjaGF0"), + ("camsoda", "Y2Ftc29kYQ=="), + ("cam4", "Y2FtNA=="), + ("bongacams", "Ym9uZ2FjYW1z"), + ("onlyfans", "b25seWZhbnM="), + ("twitch", "dHdpdGNo"), + ("youtube", "eW91dHViZQ=="), + ("instagram", "aW5zdGFncmFt"), + ("tiktok", "dGlrdG9r"), +]; + +const GENDER_SHORTCUTS: &[(&str, &str)] = &[ + ("female", "ZmVtYWxl"), + ("couple", "Y291cGxl"), + ("male", "bWFsZQ=="), + ("trans", "dHJhbnM="), +]; + +#[derive(Debug, Clone)] +pub struct ArchivebateProvider { + url: String, + filters: Arc>>, + uploaders: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Home, + Platform { + id: String, + title: String, + }, + Gender { + id: String, + title: String, + }, + Profile { + username: String, + platform: Option, + gender: Option, + }, +} + +#[derive(Debug, Deserialize, Clone)] +struct SearchResponse { + #[serde(default)] + data: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +struct SearchProfile { + #[serde(default)] + username: String, + #[serde(default)] + platform: Option, + #[serde(default)] + gender: Option, +} + +#[derive(Debug, Clone)] +struct LivewireInitialData { + component: String, + method: String, + fingerprint_json: String, + server_memo_json: String, +} + +impl ArchivebateProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + filters: Arc::new(RwLock::new(Self::build_default_filters())), + uploaders: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let provider = self.clone(); + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + let options = ServerOptions { + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: None, + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sort: Some("new".to_string()), + sexuality: None, + }; + if let Err(error) = provider.refresh_uploaders_from_home(&options).await { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.refresh_uploaders", + &error.to_string(), + ); + } + }); + }); + } + + fn build_default_filters() -> Vec { + let mut filters = vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }]; + + for (title, id) in GENDER_SHORTCUTS { + filters.push(FilterOption { + id: format!("gender:{id}"), + title: format!("Gender: {}", Self::title_case(title)), + }); + } + + for (title, id) in PLATFORM_SHORTCUTS { + filters.push(FilterOption { + id: format!("platform:{id}"), + title: format!("Platform: {}", Self::title_case(title)), + }); + } + + filters + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let filters = self + .filters + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let uploaders = self + .uploaders + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "ArchiveBate".to_string(), + description: "ArchiveBate cam recording feeds with platform/gender/profile shortcuts and yt-dlp compatible watch URLs.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=archivebate.com".to_string(), + status: "work in progress".to_string(), + categories: filters.iter().skip(1).map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse ArchiveBate newest uploads.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }], + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Shortcuts".to_string(), + description: "Direct platform and gender archive routes.".to_string(), + systemImage: "tag.fill".to_string(), + colorName: "green".to_string(), + options: filters, + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Uploaders".to_string(), + description: "Profile shortcuts discovered from latest listings.".to_string(), + systemImage: "person.crop.square".to_string(), + colorName: "purple".to_string(), + options: uploaders, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(900), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` parse failed: {error}"))) + } + + fn decode_text(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + .replace('\u{a0}', " ") + .trim() + .to_string() + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn absolute_url(&self, value: &str) -> String { + if value.starts_with("http://") || value.starts_with("https://") { + return value.to_string(); + } + format!( + "{}/{}", + self.url.trim_end_matches('/'), + value.trim_start_matches('/') + ) + } + + fn normalize_title(value: &str) -> String { + let mut normalized = String::new(); + let mut last_space = true; + for character in value.chars().flat_map(|character| character.to_lowercase()) { + if character.is_alphanumeric() { + normalized.push(character); + last_space = false; + } else if !last_space { + normalized.push(' '); + last_space = true; + } + } + normalized.trim().to_string() + } + + fn title_case(value: &str) -> String { + let mut chars = value.chars(); + let Some(first) = chars.next() else { + return String::new(); + }; + let mut out = first.to_uppercase().to_string(); + out.push_str(chars.as_str()); + out + } + + fn html_headers(&self, referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] + } + + fn search_headers(&self, referer: &str) -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), referer.to_string()), + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ( + "Accept".to_string(), + "application/json, text/javascript, */*; q=0.01".to_string(), + ), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ("X-Requested-With".to_string(), "XMLHttpRequest".to_string()), + ] + } + + fn build_target_url(&self, target: &Target, page: u16) -> String { + let page = page.max(1); + match target { + Target::Home => { + if page <= 1 { + format!("{}/", self.url) + } else { + format!("{}/?page={page}", self.url) + } + } + Target::Platform { id, .. } => { + if page <= 1 { + format!("{}/platform/{id}", self.url) + } else { + format!("{}/platform/{id}?page={page}", self.url) + } + } + Target::Gender { id, .. } => { + if page <= 1 { + format!("{}/gender/{id}", self.url) + } else { + format!("{}/gender/{id}?page={page}", self.url) + } + } + Target::Profile { username, .. } => { + if page <= 1 { + format!("{}/profile/{username}", self.url) + } else { + format!("{}/profile/{username}?page={page}", self.url) + } + } + } + } + + fn canonical_uploader_id(username: &str) -> String { + format!( + "{CHANNEL_ID}:{}", + utf8_percent_encode(username, NON_ALPHANUMERIC) + ) + } + + fn username_from_uploader_id(value: &str) -> Option { + let suffix = match value.split_once(':') { + Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix, + _ => return None, + }; + percent_decode_str(suffix) + .decode_utf8() + .ok() + .map(|value| value.into_owned()) + .and_then(|value| (!value.trim().is_empty()).then_some(value)) + } + + fn target_from_shortcut(&self, value: &str) -> Option { + let trimmed = value.trim(); + if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("all") { + return None; + } + + if let Some(username) = Self::username_from_uploader_id(trimmed) { + return Some(Target::Profile { + username, + platform: None, + gender: None, + }); + } + + if let Some(username) = trimmed.strip_prefix("profile:") { + let username = username.trim(); + if !username.is_empty() { + return Some(Target::Profile { + username: username.to_string(), + platform: None, + gender: None, + }); + } + } + + if let Some(id) = trimmed.strip_prefix("platform:") { + if let Some((name, _)) = PLATFORM_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { + return Some(Target::Platform { + id: id.to_string(), + title: Self::title_case(name), + }); + } + } + + if let Some(id) = trimmed.strip_prefix("gender:") { + if let Some((name, _)) = GENDER_SHORTCUTS.iter().find(|(_, known_id)| *known_id == id) { + return Some(Target::Gender { + id: id.to_string(), + title: Self::title_case(name), + }); + } + } + + let normalized = Self::normalize_title(trimmed); + if let Some((name, id)) = PLATFORM_SHORTCUTS + .iter() + .find(|(name, _)| Self::normalize_title(name) == normalized) + { + return Some(Target::Platform { + id: (*id).to_string(), + title: Self::title_case(name), + }); + } + + if let Some((name, id)) = GENDER_SHORTCUTS + .iter() + .find(|(name, _)| Self::normalize_title(name) == normalized) + { + return Some(Target::Gender { + id: (*id).to_string(), + title: Self::title_case(name), + }); + } + + if let Some(option) = self.find_uploader_option(trimmed) { + if let Some(username) = option.id.strip_prefix("profile:") { + return Some(Target::Profile { + username: username.to_string(), + platform: None, + gender: None, + }); + } + } + + None + } + + fn resolve_option_target(&self, options: &ServerOptions) -> Target { + if let Some(value) = options.sites.as_deref() { + if let Some(target) = self.target_from_shortcut(value) { + return target; + } + } + if let Some(value) = options.filter.as_deref() { + if let Some(target) = self.target_from_shortcut(value) { + return target; + } + } + Target::Home + } + + fn find_uploader_option(&self, value: &str) -> Option { + let normalized = Self::normalize_title(value); + self.uploaders + .read() + .ok()? + .iter() + .find(|option| { + option.id.eq_ignore_ascii_case(value) + || Self::normalize_title(&option.title) == normalized + }) + .cloned() + } + + fn upsert_uploader_option(&self, username: &str) { + let trimmed = username.trim(); + if trimmed.is_empty() { + return; + } + let option = FilterOption { + id: format!("profile:{trimmed}"), + title: trimmed.to_string(), + }; + let Ok(mut uploaders) = self.uploaders.write() else { + return; + }; + if uploaders + .iter() + .any(|value| value.id.eq_ignore_ascii_case(option.id.as_str())) + { + return; + } + uploaders.push(option); + } + + fn parse_watch_id(url: &str) -> Option { + let regex = Regex::new(r"/watch/(?P[0-9]+)").ok()?; + regex + .captures(url) + .and_then(|captures| captures.name("id").map(|value| value.as_str().to_string())) + } + + fn parse_duration(text: &str) -> u32 { + let Ok(regex) = Self::regex(r"([0-9]{1,2}:[0-9]{2}(?::[0-9]{2})?)") else { + return 0; + }; + let Some(captures) = regex.captures(text) else { + return 0; + }; + let Some(value) = captures.get(1) else { + return 0; + }; + parse_time_to_seconds(value.as_str()).unwrap_or(0) as u32 + } + + fn parse_views(value: &str) -> Option { + let lowered = value.to_lowercase(); + let cleaned = lowered.replace("views", "").trim().to_string(); + parse_abbreviated_number(&cleaned) + } + + fn parse_uploaded_at(value: &str) -> Option { + let text = value.trim().to_lowercase(); + if text.is_empty() { + return None; + } + if text == "just now" { + return Some(Utc::now().timestamp() as u64); + } + + let regex = + Regex::new(r"^([0-9]+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$").ok()?; + let captures = regex.captures(&text)?; + let amount = captures.get(1)?.as_str().parse::().ok()?; + let unit = captures.get(2)?.as_str(); + let now = Utc::now(); + let parsed = match unit { + "second" => now - ChronoDuration::seconds(amount), + "minute" => now - ChronoDuration::minutes(amount), + "hour" => now - ChronoDuration::hours(amount), + "day" => now - ChronoDuration::days(amount), + "week" => now - ChronoDuration::weeks(amount), + "month" => now - ChronoDuration::days(amount * 30), + "year" => now - ChronoDuration::days(amount * 365), + _ => return None, + }; + Some(parsed.timestamp() as u64) + } + + fn extra_tags_for_target(target: &Target) -> Vec { + match target { + Target::Platform { title, .. } => vec![title.clone()], + Target::Gender { title, .. } => vec![title.clone()], + Target::Profile { platform, gender, .. } => { + let mut tags = Vec::new(); + if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { + tags.push(platform.trim().to_string()); + } + if let Some(gender) = gender.as_ref().filter(|value| !value.trim().is_empty()) { + tags.push(gender.trim().to_string()); + } + tags + } + Target::Home => Vec::new(), + } + } + + fn parse_video_items(&self, html: &str, target: &Target, limit: usize) -> Result> { + let document = Html::parse_fragment(html); + let item_selector = Self::selector("section.video_item")?; + let watch_selector = Self::selector("a[href*='/watch/']")?; + let uploader_selector = Self::selector("div.info a[href*='/profile/']")?; + let info_selector = Self::selector("div.info p")?; + let duration_selector = Self::selector("div.duration span")?; + let video_selector = Self::selector("video[poster]")?; + let source_selector = Self::selector("video source[src]")?; + + let base_tags = Self::extra_tags_for_target(target); + let mut items = Vec::new(); + + for element in document.select(&item_selector) { + let Some(watch_link) = element.select(&watch_selector).next() else { + continue; + }; + let Some(watch_href) = watch_link.value().attr("href") else { + continue; + }; + let watch_url = self.absolute_url(watch_href); + let Some(video_id) = Self::parse_watch_id(&watch_url) else { + continue; + }; + + let duration_text = element + .select(&duration_selector) + .next() + .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) + .unwrap_or_default(); + let duration = Self::parse_duration(&duration_text); + + let thumb = element + .select(&video_selector) + .next() + .and_then(|value| value.value().attr("poster")) + .map(|value| self.absolute_url(value)) + .unwrap_or_default(); + if thumb.is_empty() { + continue; + } + + let preview = element + .select(&source_selector) + .next() + .and_then(|value| value.value().attr("src")) + .map(|value| self.absolute_url(value)); + + let uploader_element = element.select(&uploader_selector).next(); + let uploader_name = uploader_element + .as_ref() + .map(|value| Self::decode_text(&value.text().collect::())) + .filter(|value| !value.is_empty()); + let uploader_url = uploader_element + .as_ref() + .and_then(|value| value.value().attr("href")) + .map(|value| self.absolute_url(value)); + let uploader_id = uploader_name + .as_ref() + .map(|value| Self::canonical_uploader_id(value)); + + let info_text = element + .select(&info_selector) + .next() + .map(|value| Self::decode_text(&Self::collapse_whitespace(&value.text().collect::()))) + .unwrap_or_default(); + let info_parts = info_text + .split('ยท') + .map(|value| value.trim()) + .filter(|value| !value.is_empty()) + .collect::>(); + + let uploaded_at = info_parts.first().and_then(|value| Self::parse_uploaded_at(value)); + let platform = info_parts.get(1).map(|value| value.trim().to_string()); + let views = info_parts.get(2).and_then(|value| Self::parse_views(value)); + + let title = match (uploader_name.as_deref(), platform.as_deref()) { + (Some(uploader), Some(platform)) => format!("{uploader} on {platform}"), + (Some(uploader), None) => format!("{uploader} on ArchiveBate"), + _ => format!("ArchiveBate video {video_id}"), + }; + + let mut tags = base_tags.clone(); + if let Some(platform) = platform.as_ref().filter(|value| !value.trim().is_empty()) { + tags.push(platform.clone()); + } + let mut unique = HashSet::new(); + let mut deduped = Vec::new(); + for tag in tags { + let normalized = Self::normalize_title(&tag); + if normalized.is_empty() || unique.contains(&normalized) { + continue; + } + unique.insert(normalized); + deduped.push(tag); + } + + let mut item = VideoItem::new( + video_id, + title, + watch_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + item.uploader = uploader_name.clone(); + item.uploaderUrl = uploader_url; + item.uploaderId = uploader_id; + item.uploadedAt = uploaded_at; + item.views = views; + item.tags = (!deduped.is_empty()).then_some(deduped); + item.preview = preview; + + if let Some(uploader) = uploader_name { + self.upsert_uploader_option(&uploader); + } + + items.push(item); + if items.len() >= limit { + break; + } + } + + Ok(items) + } + + fn extract_csrf_token(html: &str) -> Option { + let regex = Regex::new(r#" Option { + let decoded = Self::decode_text(raw); + (!decoded.trim().is_empty()).then_some(decoded) + } + + fn livewire_method(component: &str) -> Option<&'static str> { + match component { + "home-videos" => Some("loadVideos"), + "filter.platform" => Some("load_platform_videos"), + "profile.model-videos" => Some("load_profile_videos"), + _ => None, + } + } + + fn extract_json_object(source: &str, key: &str) -> Option { + let needle = format!("\"{key}\":"); + let start = source.find(&needle)? + needle.len(); + let bytes = source.as_bytes(); + let mut index = start; + while index < bytes.len() && bytes[index].is_ascii_whitespace() { + index += 1; + } + if index >= bytes.len() || bytes[index] != b'{' { + return None; + } + + let mut depth = 0usize; + let mut in_string = false; + let mut escaped = false; + for end in index..bytes.len() { + let byte = bytes[end]; + if in_string { + if escaped { + escaped = false; + } else if byte == b'\\' { + escaped = true; + } else if byte == b'"' { + in_string = false; + } + continue; + } + + match byte { + b'"' => in_string = true, + b'{' => depth += 1, + b'}' => { + depth = depth.saturating_sub(1); + if depth == 0 { + return Some(source[index..=end].to_string()); + } + } + _ => {} + } + } + + None + } + + fn extract_livewire_initial(html: &str) -> Result> { + let regex = Self::regex(r#"wire:initial-data="([^"]+)""#)?; + for captures in regex.captures_iter(html) { + let Some(raw) = captures.get(1).map(|value| value.as_str()) else { + continue; + }; + let Some(decoded) = Self::decode_livewire_json_attr(raw) else { + continue; + }; + let Some(fingerprint_json) = Self::extract_json_object(&decoded, "fingerprint") else { + continue; + }; + let Some(server_memo_json) = Self::extract_json_object(&decoded, "serverMemo") else { + continue; + }; + let component_regex = Self::regex(r#""name":"([^"]+)""#)?; + let Some(component) = component_regex + .captures(&fingerprint_json) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + else { + continue; + }; + let Some(method) = Self::livewire_method(&component) else { + continue; + }; + return Ok(Some(LivewireInitialData { + component, + method: method.to_string(), + fingerprint_json, + server_memo_json, + })); + } + Ok(None) + } + + async fn fetch_livewire_html( + &self, + target_url: &str, + options: &ServerOptions, + ) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_livewire_html.page"); + let page_html = requester + .get_with_headers(target_url, self.html_headers(&format!("{}/", self.url)), Some(wreq::Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("fetch page failed for {target_url}: {error}")))?; + + let csrf = Self::extract_csrf_token(&page_html) + .ok_or_else(|| Error::from(format!("missing csrf token on {target_url}")))?; + let Some(initial) = Self::extract_livewire_initial(&page_html)? else { + return Err(Error::from(format!( + "missing supported livewire initial-data on {target_url}" + ))); + }; + + let update_id = format!("ab{}", Utc::now().timestamp_micros()); + let payload = format!( + "{{\"fingerprint\":{},\"serverMemo\":{},\"updates\":[{{\"type\":\"callMethod\",\"payload\":{{\"id\":\"{}\",\"method\":\"{}\",\"params\":[]}}}}]}}", + initial.fingerprint_json, initial.server_memo_json, update_id, initial.method + ); + let livewire_url = format!("{}/livewire/message/{}", self.url, initial.component); + let referer = target_url.to_string(); + let user_agent = FIREFOX_UA.to_string(); + let accept = "application/json, text/plain, */*".to_string(); + let response = requester + .post( + &livewire_url, + &payload, + vec![ + ("Content-Type", "application/json"), + ("X-Requested-With", "XMLHttpRequest"), + ("X-CSRF-TOKEN", csrf.as_str()), + ("Referer", referer.as_str()), + ("User-Agent", user_agent.as_str()), + ("Accept", accept.as_str()), + ], + ) + .await + .map_err(|error| { + Error::from(format!( + "livewire request failed for {target_url} component={}: {error}", + initial.component + )) + })?; + + let response_text = response + .text() + .await + .map_err(|error| Error::from(format!("livewire response text failed: {error}")))?; + let parsed: Value = serde_json::from_str(&response_text)?; + let effects_html = parsed + .get("effects") + .and_then(|value| value.get("html")) + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + crate::flow_debug!( + "archivebate livewire component={} target={} effects_html_len={} body_preview={}", + initial.component, + crate::util::flow_debug::preview(target_url, 120), + effects_html.len(), + crate::util::flow_debug::preview(&response_text, 120) + ); + if !effects_html.trim().is_empty() { + return Ok(effects_html); + } + + Ok(page_html) + } + + async fn fetch_items_for_target( + &self, + cache: VideoCache, + target: &Target, + page: u16, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let target_url = self.build_target_url(target, page); + let cache_key = format!("{CHANNEL_ID}:{target_url}"); + if let Some((time, items)) = cache.get(&cache_key) { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 10 { + return Ok(items.iter().take(per_page).cloned().collect()); + } + } + + let html = self.fetch_livewire_html(&target_url, options).await?; + let items = self.parse_video_items(&html, target, per_page)?; + crate::flow_debug!( + "archivebate parsed target={} page={} items={} html_preview={}", + crate::util::flow_debug::preview(&target_url, 120), + page, + items.len(), + crate::util::flow_debug::preview(&html, 120) + ); + if !items.is_empty() { + cache.insert(cache_key, items.clone()); + } + Ok(items) + } + + async fn refresh_uploaders_from_home(&self, options: &ServerOptions) -> Result<()> { + let items = self + .fetch_items_for_target( + VideoCache::new(), + &Target::Home, + 1, + 120, + options, + ) + .await?; + for item in items { + if let Some(uploader) = item.uploader { + self.upsert_uploader_option(&uploader); + } + } + Ok(()) + } + + async fn search_profiles( + &self, + query: &str, + page: u16, + options: &ServerOptions, + ) -> Result> { + let query_string = { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + serializer.append_pair("query", query); + serializer.append_pair("page", &page.max(1).to_string()); + serializer.finish() + }; + let search_url = format!("{}/api/v1/search?{query_string}", self.url); + + let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_search_target"); + let response = requester + .get_raw_with_headers(&search_url, self.search_headers(&format!("{}/", self.url))) + .await + .map_err(|error| Error::from(format!("search request failed: {error}")))?; + if !response.status().is_success() { + return Err(Error::from(format!( + "search request returned status {}", + response.status() + ))); + } + let body = response + .text() + .await + .map_err(|error| Error::from(format!("search response text failed: {error}")))?; + let parsed: SearchResponse = serde_json::from_str(&body)?; + Ok(parsed.data) + } + + async fn get_default( + &self, + cache: VideoCache, + page: u16, + per_page: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_option_target(&options); + self.fetch_items_for_target(cache, &target, page, per_page, &options) + .await + } + + async fn query( + &self, + cache: VideoCache, + page: u16, + per_page: usize, + query: &str, + options: ServerOptions, + ) -> Result> { + if let Some(target) = self.target_from_shortcut(query) { + return self + .fetch_items_for_target(cache, &target, page, per_page, &options) + .await; + } + + let mut candidates = self.search_profiles(query, page, &options).await?; + if candidates.is_empty() { + return Ok(vec![]); + } + + let normalized_query = Self::normalize_title(query); + candidates.sort_by_key(|entry| { + if Self::normalize_title(&entry.username) == normalized_query { + 0 + } else { + 1 + } + }); + + for candidate in candidates.into_iter().take(8) { + let username = candidate.username.trim(); + if username.is_empty() { + continue; + } + let target = Target::Profile { + username: username.to_string(), + platform: candidate.platform.clone(), + gender: candidate.gender.clone(), + }; + let items = self + .fetch_items_for_target(cache.clone(), &target, 1, per_page, &options) + .await?; + if !items.is_empty() { + return Ok(items); + } + } + + Ok(vec![]) + } +} + +#[async_trait] +impl Provider for ArchivebateProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + _sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let page = page.parse::().unwrap_or(1); + let per_page = per_page.parse::().unwrap_or(36).clamp(1, 120); + + let result = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, per_page, &query, options).await + } + _ => self.get_default(cache, page, per_page, options).await, + }; + + match result { + Ok(videos) => videos, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} diff --git a/src/providers/pornxp.rs b/src/providers/pornxp.rs index 1e08719..2ec15cb 100644 --- a/src/providers/pornxp.rs +++ b/src/providers/pornxp.rs @@ -8,8 +8,6 @@ use crate::videos::{ServerOptions, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; -// use std::sync::{Arc, RwLock}; -// use std::thread; use std::vec; error_chain! { @@ -19,6 +17,12 @@ error_chain! { } } +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["tube", "mainstream"], + }; + #[derive(Debug, Clone)] pub struct PornxpProvider { url: String, diff --git a/src/util/requester.rs b/src/util/requester.rs index 34a08b6..474fcee 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -174,6 +174,7 @@ impl Requester { #[cfg(any( not(hottub_single_provider), + hottub_provider = "archivebate", hottub_provider = "hypnotube", hottub_provider = "vjav", ))] @@ -381,6 +382,7 @@ impl Requester { #[cfg(any( not(hottub_single_provider), + hottub_provider = "archivebate", hottub_provider = "hypnotube", hottub_provider = "freeuseporn", hottub_provider = "vjav",