use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{Provider, report_provider_error_background, requester_or_default}; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::discord::send_discord_error_report; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use futures::stream::{FuturesUnordered, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use std::collections::HashSet; use std::fmt::Write; use std::vec; use url::form_urlencoded::Serializer; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } } #[derive(Debug, Clone)] pub struct ViralxxxpornProvider { url: String, } #[derive(Debug, Clone)] struct RawVideo { id: String, title: String, detail_url: String, thumb: String, duration: u32, views: Option, rating: Option, quality: Option, } impl ViralxxxpornProvider { pub fn new() -> Self { Self { url: "https://viralxxxporn.com".to_string(), } } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { Channel { id: "viralxxxporn".to_string(), name: "ViralXXXPorn".to_string(), description: "Trending free porn and onlyfans leaks".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=viralxxxporn.com".to_string(), status: "active".to_string(), categories: vec![], options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse different video feeds".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "latest".to_string(), title: "Latest".to_string(), }, FilterOption { id: "top-rated".to_string(), title: "Top Rated".to_string(), }, FilterOption { id: "most-viewed".to_string(), title: "Most Viewed".to_string(), }, FilterOption { id: "premium".to_string(), title: "Premium".to_string(), }, ], multiSelect: false, }, ChannelOption { id: "filter".to_string(), title: "Period".to_string(), description: "Time range for top-rated feed".to_string(), systemImage: "clock".to_string(), colorName: "green".to_string(), options: vec![ FilterOption { id: "all-time".to_string(), title: "All Time".to_string(), }, FilterOption { id: "month".to_string(), title: "This Month".to_string(), }, FilterOption { id: "week".to_string(), title: "This Week".to_string(), }, FilterOption { id: "today".to_string(), title: "Today".to_string(), }, ], multiSelect: false, }, ], nsfw: true, cacheDuration: Some(300), } } fn normalize_ws(s: &str) -> String { s.split_whitespace().collect::>().join(" ") } fn decode_html(s: &str) -> String { decode(s.as_bytes()) .to_string() .unwrap_or_else(|_| s.to_string()) } fn absolute_url(&self, raw: &str) -> String { if raw.starts_with("http://") || raw.starts_with("https://") { return raw.to_string(); } if raw.starts_with("//") { return format!("https:{raw}"); } if raw.starts_with('/') { return format!("{}{}", self.url, raw); } format!("{}/{}", self.url, raw.trim_start_matches('/')) } fn slugify_query(query: &str) -> String { let mut out = String::new(); let mut prev_dash = false; for c in query.to_ascii_lowercase().chars() { if c.is_ascii_alphanumeric() { out.push(c); prev_dash = false; } else if !prev_dash { out.push('-'); prev_dash = true; } } out.trim_matches('-').to_string() } fn encode_query_value(value: &str) -> String { let mut serializer = Serializer::new(String::new()); serializer.append_pair("q", value); let encoded = serializer.finish(); encoded.strip_prefix("q=").unwrap_or(&encoded).to_string() } fn with_page(url: &str, page: u8) -> Vec { if page <= 1 { return vec![url.to_string()]; } let base = url.trim_end_matches('/'); let mut candidates = vec![ format!("{base}/page/{page}/"), format!("{base}/{page}/"), format!("{base}/?page={page}"), ]; if url.contains('?') { candidates.push(format!("{url}&page={page}")); } candidates } fn unique_urls(urls: Vec) -> Vec { let mut seen = HashSet::new(); let mut out = vec![]; for u in urls { if seen.insert(u.clone()) { out.push(u); } } out } fn build_listing_urls(&self, page: u8, query: &str, options: &ServerOptions) -> Vec { let sort = options .sort .clone() .unwrap_or_else(|| "latest".to_string()) .to_ascii_lowercase(); let filter = options .filter .clone() .unwrap_or_else(|| "all-time".to_string()) .to_ascii_lowercase(); let query = query.trim(); let slug = Self::slugify_query(query); let encoded = Self::encode_query_value(query); let mut urls = vec![]; if !query.is_empty() { urls.extend(Self::with_page(&format!("{}/tags/{slug}/", self.url), page)); urls.extend(Self::with_page( &format!("{}/search/{}/", self.url, slug), page, )); urls.extend(Self::with_page( &format!("{}/?s={}", self.url, encoded), page, )); urls.extend(Self::with_page( &format!("{}/search?q={}", self.url, encoded), page, )); return Self::unique_urls(urls); } match sort.as_str() { "top-rated" => { urls.extend(Self::with_page(&format!("{}/top-rated/", self.url), page)); urls.extend(Self::with_page( &format!("{}/top-rated/?period={}", self.url, filter), page, )); urls.extend(Self::with_page( &format!("{}/top-rated/{}/", self.url, filter), page, )); } "most-viewed" => { urls.extend(Self::with_page( &format!("{}/most-popular/", self.url), page, )); urls.extend(Self::with_page(&format!("{}/most-viewed/", self.url), page)); } "premium" => { urls.extend(Self::with_page(&format!("{}/premium/", self.url), page)); } _ => { urls.extend(Self::with_page( &format!("{}/latest-updates/", self.url), page, )); urls.extend(Self::with_page(&self.url, page)); } } Self::unique_urls(urls) } fn strip_tags(text: &str) -> String { let Ok(tag_re) = Regex::new(r"(?is)<[^>]+>") else { return text.to_string(); }; tag_re.replace_all(text, " ").to_string() } fn clean_media_url(raw: &str) -> String { let mut out = raw .trim_matches(|c: char| c == '"' || c == '\'' || c == '\\' || c.is_whitespace()) .to_string(); out = out .replace("\\u0026", "&") .replace("\\u002F", "/") .replace("\\/", "/") .replace("&", "&"); out = out .trim_end_matches(|c: char| matches!(c, ',' | ';' | ')' | ']' | '}')) .to_string(); if out.starts_with("//") { return format!("https:{out}"); } out } fn is_media_url(url: &str) -> bool { let lower = url.to_ascii_lowercase(); (lower.starts_with("http://") || lower.starts_with("https://")) && (lower.contains(".mp4") || lower.contains(".m3u8")) } fn extract_duration_seconds(text: &str) -> Option { let Ok(duration_re) = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b") else { return None; }; duration_re .captures(text) .and_then(|c| c.get(1)) .and_then(|m| parse_time_to_seconds(m.as_str())) .map(|v| v as u32) } fn extract_quality(text: &str) -> Option { let Ok(q_re) = Regex::new(r"(?i)\b((?:\d{3,4})p|hd|4k)\b") else { return None; }; q_re.captures(text) .and_then(|c| c.get(1)) .map(|m| m.as_str().to_ascii_lowercase()) } fn extract_rating(text: &str) -> Option { let Ok(r_re) = Regex::new(r"(?i)\b(\d{1,3})%") else { return None; }; r_re.captures(text) .and_then(|c| c.get(1)) .and_then(|m| m.as_str().parse::().ok()) } fn extract_views(text: &str) -> Option { let Ok(v_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s+\d{1,3}%") else { return None; }; let raw = v_re .captures(text) .and_then(|c| c.get(1)) .map(|m| m.as_str().to_string())?; parse_abbreviated_number(&raw) } fn parse_listing_items(&self, html: &str) -> Vec { if html.trim().is_empty() { return vec![]; } let Ok(link_re) = Regex::new( r#"(?is)]+href="(?P/video/(?P\d+)/[^"]+)"[^>]*>(?P.*?)"#, ) else { return vec![]; }; let Ok(title_attr_re) = Regex::new(r#"(?is)\btitle="([^"]+)""#) else { return vec![]; }; let Ok(thumb_re) = Regex::new(r#"(?is)(?:src|data-src|data-original|poster)="([^"]+)""#) else { return vec![]; }; let mut out = vec![]; let mut seen = HashSet::new(); for caps in link_re.captures_iter(html) { let Some(id) = caps.name("id").map(|m| m.as_str().to_string()) else { continue; }; if !seen.insert(id.clone()) { continue; } let href = caps.name("href").map(|m| m.as_str()).unwrap_or_default(); let body = caps.name("body").map(|m| m.as_str()).unwrap_or_default(); let m0 = match caps.get(0) { Some(v) => v, None => continue, }; let seg_start = m0.start().saturating_sub(600); let seg_end = (m0.end() + 1800).min(html.len()); let segment = html.get(seg_start..seg_end).unwrap_or(body); let title_from_attr = title_attr_re .captures(m0.as_str()) .and_then(|c| c.get(1)) .map(|m| m.as_str().to_string()) .unwrap_or_default(); let title_from_body = Self::strip_tags(body); let title = Self::normalize_ws(&Self::decode_html(if !title_from_attr.is_empty() { &title_from_attr } else { &title_from_body })); if title.is_empty() { continue; } let thumb = thumb_re .captures(segment) .and_then(|c| c.get(1)) .map(|m| self.absolute_url(m.as_str())) .unwrap_or_default(); let duration = Self::extract_duration_seconds(segment).unwrap_or(0); let rating = Self::extract_rating(segment); let views = Self::extract_views(segment); let quality = Self::extract_quality(segment); out.push(RawVideo { id, title, detail_url: self.absolute_url(href), thumb, duration, views, rating, quality, }); } out } fn extract_media_urls(&self, html: &str) -> Vec { let patterns = [ r#"https?:\\?/\\?/[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#, r#"https?://[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#, r#"(?is)]+src=["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, r#"(?is)["']file["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, r#"(?is)["']src["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, ]; let mut urls = vec![]; let mut seen = HashSet::new(); for pattern in patterns { let Ok(re) = Regex::new(pattern) else { continue; }; for caps in re.captures_iter(html) { let raw = caps .get(1) .map(|m| m.as_str()) .unwrap_or_else(|| caps.get(0).map(|m| m.as_str()).unwrap_or("")); if raw.is_empty() { continue; } let url = Self::clean_media_url(raw); if !Self::is_media_url(&url) { continue; } if seen.insert(url.clone()) { urls.push(url); } } } urls } fn extract_tags_from_detail(html: &str) -> Vec { let Ok(tag_re) = Regex::new(r#"(?is)href="/tags?/([^"?#/]+)/""#) else { return vec![]; }; let mut out = vec![]; let mut seen = HashSet::new(); for caps in tag_re.captures_iter(html) { let Some(raw) = caps.get(1).map(|m| m.as_str()) else { continue; }; let tag = raw.replace('-', " ").to_ascii_lowercase(); if !tag.is_empty() && seen.insert(tag.clone()) { out.push(tag); } } out } fn quality_from_url(url: &str, fallback: Option<&String>) -> String { let Ok(q_re) = Regex::new(r"(?i)(\d{3,4})p") else { return fallback .cloned() .unwrap_or_else(|| "1080p".to_string()) .replace('p', ""); }; if let Some(q) = q_re.captures(url).and_then(|c| c.get(1)) { return q.as_str().to_string(); } if url.to_ascii_lowercase().contains(".m3u8") { return "hls".to_string(); } fallback .cloned() .unwrap_or_else(|| "1080p".to_string()) .replace('p', "") } async fn fetch_detailed_item( &self, raw: RawVideo, mut requester: crate::util::requester::Requester, ) -> Option { let detail_html = match requester.get(&raw.detail_url, None).await { Ok(text) => text, Err(e) => { report_provider_error_background( "viralxxxporn", "detail.request", &format!("url={}; error={e}", raw.detail_url), ); return None; } }; let media_urls = self.extract_media_urls(&detail_html); if media_urls.is_empty() { report_provider_error_background( "viralxxxporn", "detail.media", &format!("no_media_url_found; url={}", raw.detail_url), ); return None; } let mut formats = vec![]; for media in &media_urls { let format_kind = if media.to_ascii_lowercase().contains(".m3u8") { "m3u8".to_string() } else { "mp4".to_string() }; let quality = Self::quality_from_url(media, raw.quality.as_ref()); formats.push(VideoFormat::new(media.clone(), quality, format_kind)); } let url = media_urls .iter() .find(|u| u.to_ascii_lowercase().contains(".mp4")) .cloned() .unwrap_or_else(|| media_urls.first().cloned().unwrap_or_default()); if url.is_empty() { return None; } let mut tags = Self::extract_tags_from_detail(&detail_html); if tags.is_empty() { tags = vec![]; } let mut item = VideoItem::new( raw.id, raw.title, url, "viralxxxporn".to_string(), raw.thumb, raw.duration, ) .formats(formats) .preview( media_urls .first() .cloned() .unwrap_or_else(|| raw.detail_url.clone()), ); if let Some(views) = raw.views { item = item.views(views); } if let Some(rating) = raw.rating { item = item.rating(rating); } if !tags.is_empty() { item = item.tags(tags); } Some(item) } async fn collect_video_items( &self, html: String, requester: crate::util::requester::Requester, ) -> Vec { let raw_items = self.parse_listing_items(&html); if raw_items.is_empty() { return vec![]; } let mut in_flight = FuturesUnordered::new(); let mut out = vec![]; let mut iter = raw_items.into_iter(); const MAX_IN_FLIGHT: usize = 6; loop { while in_flight.len() < MAX_IN_FLIGHT { let Some(raw) = iter.next() else { break; }; in_flight.push(self.fetch_detailed_item(raw, requester.clone())); } let Some(result) = in_flight.next().await else { break; }; if let Some(item) = result { out.push(item); } } out } async fn fetch( &self, cache: VideoCache, page: u8, query: &str, options: ServerOptions, ) -> Result> { let mut requester = requester_or_default(&options, "viralxxxporn", "fetch"); let urls = self.build_listing_urls(page, query, &options); let mut stale_items: Vec = vec![]; for url in urls { if let Some((time, items)) = cache.get(&url) { if time.elapsed().unwrap_or_default().as_secs() < 300 { return Ok(items.clone()); } if stale_items.is_empty() && !items.is_empty() { stale_items = items.clone(); } } let html = match requester.get(&url, None).await { Ok(text) => text, Err(e) => { report_provider_error_background( "viralxxxporn", "listing.request", &format!("url={url}; error={e}"), ); continue; } }; let items = self.collect_video_items(html, requester.clone()).await; if !items.is_empty() { cache.remove(&url); cache.insert(url, items.clone()); return Ok(items); } } Ok(stale_items) } } #[async_trait] impl Provider for ViralxxxpornProvider { async fn get_videos( &self, cache: VideoCache, _pool: DbPool, _sort: String, query: Option, page: String, _per_page: String, options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1); let query = query.unwrap_or_default(); match self.fetch(cache, page, &query, options).await { Ok(v) => v, Err(e) => { let mut chain = String::new(); for (idx, cause) in e.iter().enumerate() { let _ = writeln!(chain, "{}. {}", idx + 1, cause); } send_discord_error_report( e.to_string(), Some(chain), Some("Viralxxxporn Provider"), Some("Failed to fetch videos"), file!(), line!(), module_path!(), ) .await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::ViralxxxpornProvider; #[test] fn parses_listing_card() { let provider = ViralxxxpornProvider::new(); let html = r#" 1080p12:344.2K 95% "#; let items = provider.parse_listing_items(html); assert_eq!(items.len(), 1); assert_eq!(items[0].id, "12345"); assert_eq!(items[0].title, "Test Title"); assert_eq!(items[0].duration, 754); assert_eq!(items[0].views, Some(4200)); assert_eq!(items[0].rating, Some(95.0)); } #[test] fn extracts_media_urls() { let provider = ViralxxxpornProvider::new(); let html = r#" "#; let urls = provider.extract_media_urls(html); assert_eq!(urls.len(), 2); assert!(urls.iter().any(|u| u.contains("video_720p.mp4"))); assert!(urls.iter().any(|u| u.contains("master.m3u8"))); } }