diff --git a/src/providers/viralxxxporn.rs b/src/providers/viralxxxporn.rs index 3bfec50..291fc5f 100644 --- a/src/providers/viralxxxporn.rs +++ b/src/providers/viralxxxporn.rs @@ -1,21 +1,16 @@ use crate::DbPool; use crate::api::ClientVersion; -use crate::providers::{Provider, report_provider_error_background, requester_or_default}; +use crate::providers::{Provider, report_provider_error, requester_or_default}; use crate::status::*; use crate::util::cache::VideoCache; -use crate::util::discord::send_discord_error_report; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use crate::videos::{ServerOptions, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; -use futures::stream::{FuturesUnordered, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use std::collections::HashSet; -use std::fmt::Write; -use std::vec; -use url::form_urlencoded::Serializer; error_chain! { foreign_links { @@ -29,18 +24,6 @@ pub struct ViralxxxpornProvider { url: String, } -#[derive(Debug, Clone)] -struct RawVideo { - id: String, - title: String, - detail_url: String, - thumb: String, - duration: u32, - views: Option, - rating: Option, - quality: Option, -} - impl ViralxxxpornProvider { pub fn new() -> Self { Self { @@ -51,209 +34,241 @@ impl ViralxxxpornProvider { fn build_channel(&self, _clientversion: ClientVersion) -> Channel { Channel { id: "viralxxxporn".to_string(), - name: "ViralXXXPorn".to_string(), - description: "Trending free porn and onlyfans leaks".to_string(), + name: "Viralxxxporn".to_string(), + description: "Latest viral porn videos.".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=viralxxxporn.com".to_string(), status: "active".to_string(), categories: vec![], - options: vec![ - ChannelOption { - id: "sort".to_string(), - title: "Sort".to_string(), - description: "Browse different video feeds".to_string(), - systemImage: "list.number".to_string(), - colorName: "blue".to_string(), - options: vec![ - FilterOption { - id: "latest".to_string(), - title: "Latest".to_string(), - }, - FilterOption { - id: "top-rated".to_string(), - title: "Top Rated".to_string(), - }, - FilterOption { - id: "most-viewed".to_string(), - title: "Most Viewed".to_string(), - }, - FilterOption { - id: "premium".to_string(), - title: "Premium".to_string(), - }, - ], - multiSelect: false, - }, - ChannelOption { - id: "filter".to_string(), - title: "Period".to_string(), - description: "Time range for top-rated feed".to_string(), - systemImage: "clock".to_string(), - colorName: "green".to_string(), - options: vec![ - FilterOption { - id: "all-time".to_string(), - title: "All Time".to_string(), - }, - FilterOption { - id: "month".to_string(), - title: "This Month".to_string(), - }, - FilterOption { - id: "week".to_string(), - title: "This Week".to_string(), - }, - FilterOption { - id: "today".to_string(), - title: "Today".to_string(), - }, - ], - multiSelect: false, - }, - ], + options: vec![], nsfw: true, - cacheDuration: Some(300), + cacheDuration: Some(1800), } } - fn normalize_ws(s: &str) -> String { - s.split_whitespace().collect::>().join(" ") + fn build_latest_url(&self, page: u32) -> String { + format!( + "{}/latest-updates/?mode=async&function=get_block&block_id=list_videos_latest_videos_list&sort_by=post_date&from={page}", + self.url + ) } - fn decode_html(s: &str) -> String { - decode(s.as_bytes()) + fn build_latest_headers(&self) -> Vec<(String, String)> { + vec![( + "Referer".to_string(), + format!("{}/latest-updates/", self.url), + )] + } + + fn build_search_path_query(query: &str, separator: &str) -> String { + query.split_whitespace().collect::>().join(separator) + } + + fn build_search_url(&self, query: &str, page: u32) -> String { + let query_param = Self::build_search_path_query(query, "+"); + let path_query = Self::build_search_path_query(query, "-"); + format!( + "{}/search/{path_query}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={query_param}&from_videos={page}", + self.url + ) + } + + fn build_search_headers(&self, query: &str) -> Vec<(String, String)> { + let path_query = Self::build_search_path_query(query, "-"); + vec![( + "Referer".to_string(), + format!("{}/search/{path_query}/", self.url), + )] + } + + async fn get( + &self, + cache: VideoCache, + page: u32, + options: ServerOptions, + ) -> Result> { + let video_url = self.build_latest_url(page); + let old_items = match cache.get(&video_url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = requester_or_default( + &options, + "viralxxxporn", + "viralxxxporn.get.missing_requester", + ); + let text = match requester + .get_with_headers(&video_url, self.build_latest_headers(), None) + .await + { + Ok(text) => text, + Err(e) => { + report_provider_error( + "viralxxxporn", + "get.request", + &format!("url={video_url}; error={e}"), + ) + .await; + return Ok(old_items); + } + }; + + if text.trim().is_empty() { + report_provider_error( + "viralxxxporn", + "get.empty_response", + &format!("url={video_url}"), + ) + .await; + return Ok(old_items); + } + + let video_items = self.get_video_items_from_html(text); + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + return Ok(video_items); + } + + Ok(old_items) + } + + async fn query( + &self, + cache: VideoCache, + page: u32, + query: &str, + options: ServerOptions, + ) -> Result> { + let video_url = self.build_search_url(query, page); + let old_items = match cache.get(&video_url) { + Some((time, items)) => { + if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { + return Ok(items.clone()); + } + items.clone() + } + None => vec![], + }; + + let mut requester = requester_or_default( + &options, + "viralxxxporn", + "viralxxxporn.query.missing_requester", + ); + let text = match requester + .get_with_headers(&video_url, self.build_search_headers(query), None) + .await + { + Ok(text) => text, + Err(e) => { + report_provider_error( + "viralxxxporn", + "query.request", + &format!("url={video_url}; error={e}"), + ) + .await; + return Ok(old_items); + } + }; + + if text.trim().is_empty() { + report_provider_error( + "viralxxxporn", + "query.empty_response", + &format!("url={video_url}"), + ) + .await; + return Ok(old_items); + } + + let video_items = self.get_video_items_from_html(text); + if !video_items.is_empty() { + cache.remove(&video_url); + cache.insert(video_url.clone(), video_items.clone()); + return Ok(video_items); + } + + Ok(old_items) + } + + fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> { + text.split(start).nth(1)?.split(end).next() + } + + fn normalize_ws(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) .to_string() - .unwrap_or_else(|_| s.to_string()) + .unwrap_or_else(|_| text.to_string()) } - fn absolute_url(&self, raw: &str) -> String { - if raw.starts_with("http://") || raw.starts_with("https://") { - return raw.to_string(); - } - if raw.starts_with("//") { - return format!("https:{raw}"); - } - if raw.starts_with('/') { - return format!("{}{}", self.url, raw); - } - format!("{}/{}", self.url, raw.trim_start_matches('/')) + fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option { + attrs.iter().find_map(|attr| { + Self::extract_between(segment, attr, "\"") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToString::to_string) + }) } - fn slugify_query(query: &str) -> String { - let mut out = String::new(); - let mut prev_dash = false; - for c in query.to_ascii_lowercase().chars() { - if c.is_ascii_alphanumeric() { - out.push(c); - prev_dash = false; - } else if !prev_dash { - out.push('-'); - prev_dash = true; - } + fn extract_thumb_url(&self, segment: &str) -> String { + let thumb_raw = Self::first_non_empty_attr( + segment, + &[ + "data-original=\"", + "data-webp=\"", + "data-src=\"", + "poster=\"", + "src=\"", + ], + ) + .unwrap_or_default(); + + if thumb_raw.starts_with("data:image/") { + return String::new(); } - out.trim_matches('-').to_string() + + self.normalize_url(&thumb_raw) } - fn encode_query_value(value: &str) -> String { - let mut serializer = Serializer::new(String::new()); - serializer.append_pair("q", value); - let encoded = serializer.finish(); - encoded.strip_prefix("q=").unwrap_or(&encoded).to_string() + fn normalize_url(&self, url: &str) -> String { + if url.starts_with("http://") || url.starts_with("https://") { + return url.to_string(); + } + if url.starts_with("//") { + return format!("https:{url}"); + } + if url.starts_with('/') { + return format!("{}{}", self.url, url); + } + format!("{}/{}", self.url, url.trim_start_matches("./")) } - fn with_page(url: &str, page: u8) -> Vec { - if page <= 1 { - return vec![url.to_string()]; - } - let base = url.trim_end_matches('/'); - let mut candidates = vec![ - format!("{base}/page/{page}/"), - format!("{base}/{page}/"), - format!("{base}/?page={page}"), - ]; - if url.contains('?') { - candidates.push(format!("{url}&page={page}")); - } - candidates - } + fn extract_id_from_url(url: &str) -> String { + let parts = url + .trim_end_matches('/') + .split('/') + .filter(|part| !part.is_empty()) + .collect::>(); - fn unique_urls(urls: Vec) -> Vec { - let mut seen = HashSet::new(); - let mut out = vec![]; - for u in urls { - if seen.insert(u.clone()) { - out.push(u); - } - } - out - } - - fn build_listing_urls(&self, page: u8, query: &str, options: &ServerOptions) -> Vec { - let sort = options - .sort - .clone() - .unwrap_or_else(|| "latest".to_string()) - .to_ascii_lowercase(); - let filter = options - .filter - .clone() - .unwrap_or_else(|| "all-time".to_string()) - .to_ascii_lowercase(); - let query = query.trim(); - let slug = Self::slugify_query(query); - let encoded = Self::encode_query_value(query); - - let mut urls = vec![]; - - if !query.is_empty() { - urls.extend(Self::with_page(&format!("{}/tags/{slug}/", self.url), page)); - urls.extend(Self::with_page( - &format!("{}/search/{}/", self.url, slug), - page, - )); - urls.extend(Self::with_page( - &format!("{}/?s={}", self.url, encoded), - page, - )); - urls.extend(Self::with_page( - &format!("{}/search?q={}", self.url, encoded), - page, - )); - return Self::unique_urls(urls); - } - - match sort.as_str() { - "top-rated" => { - urls.extend(Self::with_page(&format!("{}/top-rated/", self.url), page)); - urls.extend(Self::with_page( - &format!("{}/top-rated/?period={}", self.url, filter), - page, - )); - urls.extend(Self::with_page( - &format!("{}/top-rated/{}/", self.url, filter), - page, - )); - } - "most-viewed" => { - urls.extend(Self::with_page( - &format!("{}/most-popular/", self.url), - page, - )); - urls.extend(Self::with_page(&format!("{}/most-viewed/", self.url), page)); - } - "premium" => { - urls.extend(Self::with_page(&format!("{}/premium/", self.url), page)); - } - _ => { - urls.extend(Self::with_page( - &format!("{}/latest-updates/", self.url), - page, - )); - urls.extend(Self::with_page(&self.url, page)); - } - } - - Self::unique_urls(urls) + parts + .windows(2) + .find_map(|window| match window { + ["video", id] | ["videos", id] => Some((*id).to_string()), + _ => None, + }) + .or_else(|| parts.last().map(|id| (*id).to_string())) + .unwrap_or_default() } fn strip_tags(text: &str) -> String { @@ -263,381 +278,221 @@ impl ViralxxxpornProvider { tag_re.replace_all(text, " ").to_string() } - fn clean_media_url(raw: &str) -> String { - let mut out = raw - .trim_matches(|c: char| c == '"' || c == '\'' || c == '\\' || c.is_whitespace()) - .to_string(); - out = out - .replace("\\u0026", "&") - .replace("\\u002F", "/") - .replace("\\/", "/") - .replace("&", "&"); - out = out - .trim_end_matches(|c: char| matches!(c, ',' | ';' | ')' | ']' | '}')) - .to_string(); - if out.starts_with("//") { - return format!("https:{out}"); - } - out - } - - fn is_media_url(url: &str) -> bool { - let lower = url.to_ascii_lowercase(); - (lower.starts_with("http://") || lower.starts_with("https://")) - && (lower.contains(".mp4") || lower.contains(".m3u8")) - } - fn extract_duration_seconds(text: &str) -> Option { - let Ok(duration_re) = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b") else { - return None; - }; - duration_re - .captures(text) - .and_then(|c| c.get(1)) + let colon_duration = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) .and_then(|m| parse_time_to_seconds(m.as_str())) - .map(|v| v as u32) - } + .map(|seconds| seconds as u32); + if colon_duration.is_some() { + return colon_duration; + } - fn extract_quality(text: &str) -> Option { - let Ok(q_re) = Regex::new(r"(?i)\b((?:\d{3,4})p|hd|4k)\b") else { - return None; - }; - q_re.captures(text) - .and_then(|c| c.get(1)) - .map(|m| m.as_str().to_ascii_lowercase()) - } + let minute = Regex::new(r"(?i)\b(\d{1,3})\s*(?:min|mins|minute|minutes)\b") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().parse::().ok()); + let second = Regex::new(r"(?i)\b(\d{1,3})\s*(?:sec|secs|second|seconds)\b") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().parse::().ok()); - fn extract_rating(text: &str) -> Option { - let Ok(r_re) = Regex::new(r"(?i)\b(\d{1,3})%") else { - return None; - }; - r_re.captures(text) - .and_then(|c| c.get(1)) - .and_then(|m| m.as_str().parse::().ok()) + match (minute, second) { + (Some(min), Some(sec)) => Some(min * 60 + sec), + (Some(min), None) => Some(min * 60), + (None, Some(sec)) => Some(sec), + (None, None) => None, + } } fn extract_views(text: &str) -> Option { - let Ok(v_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s+\d{1,3}%") else { - return None; - }; - let raw = v_re - .captures(text) - .and_then(|c| c.get(1)) - .map(|m| m.as_str().to_string())?; - parse_abbreviated_number(&raw) - } - - fn parse_listing_items(&self, html: &str) -> Vec { - if html.trim().is_empty() { - return vec![]; + let with_label = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s*views?\b") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) + .and_then(|m| parse_abbreviated_number(m.as_str().trim())); + if with_label.is_some() { + return with_label; } + Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb])\b") + .ok() + .and_then(|re| re.captures(text)) + .and_then(|caps| caps.get(1)) + .and_then(|m| parse_abbreviated_number(m.as_str().trim())) + } + + fn parse_anchor_items(&self, html: &str) -> Vec { let Ok(link_re) = Regex::new( - r#"(?is)]+href="(?P/video/(?P\d+)/[^"]+)"[^>]*>(?P.*?)"#, + r#"(?is)]+href="(?P(?:https?://[^"]+)?/video/(?P\d+)/[^"]+)"[^>]*>(?P.*?)"#, ) else { return vec![]; }; let Ok(title_attr_re) = Regex::new(r#"(?is)\btitle="([^"]+)""#) else { return vec![]; }; - let Ok(thumb_re) = Regex::new(r#"(?is)(?:src|data-src|data-original|poster)="([^"]+)""#) - else { - return vec![]; - }; - - let mut out = vec![]; + let mut items = Vec::new(); let mut seen = HashSet::new(); - for caps in link_re.captures_iter(html) { - let Some(id) = caps.name("id").map(|m| m.as_str().to_string()) else { + for captures in link_re.captures_iter(html) { + let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else { continue; }; if !seen.insert(id.clone()) { continue; } - let href = caps.name("href").map(|m| m.as_str()).unwrap_or_default(); - let body = caps.name("body").map(|m| m.as_str()).unwrap_or_default(); - let m0 = match caps.get(0) { - Some(v) => v, - None => continue, + let href = captures + .name("href") + .map(|m| self.normalize_url(m.as_str())) + .unwrap_or_default(); + let body = captures + .name("body") + .map(|m| m.as_str()) + .unwrap_or_default(); + let Some(full_match) = captures.get(0) else { + continue; }; - let seg_start = m0.start().saturating_sub(600); - let seg_end = (m0.end() + 1800).min(html.len()); + + let seg_start = full_match.start().saturating_sub(600); + let seg_end = (full_match.end() + 1800).min(html.len()); let segment = html.get(seg_start..seg_end).unwrap_or(body); let title_from_attr = title_attr_re - .captures(m0.as_str()) - .and_then(|c| c.get(1)) + .captures(full_match.as_str()) + .and_then(|caps| caps.get(1)) .map(|m| m.as_str().to_string()) .unwrap_or_default(); let title_from_body = Self::strip_tags(body); - let title = Self::normalize_ws(&Self::decode_html(if !title_from_attr.is_empty() { - &title_from_attr + let title_source = if !title_from_attr.is_empty() { + title_from_attr } else { - &title_from_body - })); + title_from_body + }; + let title = Self::normalize_ws(&Self::decode_html(&title_source)); if title.is_empty() { continue; } - let thumb = thumb_re - .captures(segment) - .and_then(|c| c.get(1)) - .map(|m| self.absolute_url(m.as_str())) - .unwrap_or_default(); + let thumb = self.extract_thumb_url(segment); - let duration = Self::extract_duration_seconds(segment).unwrap_or(0); - let rating = Self::extract_rating(segment); - let views = Self::extract_views(segment); - let quality = Self::extract_quality(segment); + let text_segment = Self::normalize_ws(&Self::decode_html(&Self::strip_tags(segment))); + let duration = Self::extract_duration_seconds(segment) + .or_else(|| Self::extract_duration_seconds(&text_segment)) + .unwrap_or(0); + let views = Self::extract_views(segment) + .or_else(|| Self::extract_views(&text_segment)) + .unwrap_or(0); - out.push(RawVideo { - id, - title, - detail_url: self.absolute_url(href), - thumb, - duration, - views, - rating, - quality, - }); + let mut item = + VideoItem::new(id, title, href, "viralxxxporn".to_string(), thumb, duration); + if views > 0 { + item = item.views(views); + } + items.push(item); } - out + items } - fn extract_media_urls(&self, html: &str) -> Vec { - let patterns = [ - r#"https?:\\?/\\?/[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#, - r#"https?://[^"' <>\s]+?\.(?:mp4|m3u8)[^"' <>\s]*"#, - r#"(?is)]+src=["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, - r#"(?is)["']file["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, - r#"(?is)["']src["']\s*:\s*["']([^"']+\.(?:mp4|m3u8)[^"']*)["']"#, + fn get_video_items_from_html(&self, html: String) -> Vec { + if html.trim().is_empty() { + return vec![]; + } + + let anchor_items = self.parse_anchor_items(&html); + if !anchor_items.is_empty() { + return anchor_items; + } + + let mut items = Vec::new(); + let content = html + .split("
", + "
", + "
Vec { - let Ok(tag_re) = Regex::new(r#"(?is)href="/tags?/([^"?#/]+)/""#) else { - return vec![]; - }; - let mut out = vec![]; - let mut seen = HashSet::new(); - for caps in tag_re.captures_iter(html) { - let Some(raw) = caps.get(1).map(|m| m.as_str()) else { - continue; - }; - let tag = raw.replace('-', " ").to_ascii_lowercase(); - if !tag.is_empty() && seen.insert(tag.clone()) { - out.push(tag); - } - } - out - } - - fn quality_from_url(url: &str, fallback: Option<&String>) -> String { - let Ok(q_re) = Regex::new(r"(?i)(\d{3,4})p") else { - return fallback - .cloned() - .unwrap_or_else(|| "1080p".to_string()) - .replace('p', ""); - }; - if let Some(q) = q_re.captures(url).and_then(|c| c.get(1)) { - return q.as_str().to_string(); - } - if url.to_ascii_lowercase().contains(".m3u8") { - return "hls".to_string(); - } - fallback - .cloned() - .unwrap_or_else(|| "1080p".to_string()) - .replace('p', "") - } - - async fn fetch_detailed_item( - &self, - raw: RawVideo, - mut requester: crate::util::requester::Requester, - ) -> Option { - let detail_html = match requester.get(&raw.detail_url, None).await { - Ok(text) => text, - Err(e) => { - report_provider_error_background( - "viralxxxporn", - "detail.request", - &format!("url={}; error={e}", raw.detail_url), - ); - return None; - } - }; - - let media_urls = self.extract_media_urls(&detail_html); - if media_urls.is_empty() { - report_provider_error_background( - "viralxxxporn", - "detail.media", - &format!("no_media_url_found; url={}", raw.detail_url), - ); - return None; - } - - let mut formats = vec![]; - for media in &media_urls { - let format_kind = if media.to_ascii_lowercase().contains(".m3u8") { - "m3u8".to_string() - } else { - "mp4".to_string() - }; - let quality = Self::quality_from_url(media, raw.quality.as_ref()); - formats.push(VideoFormat::new(media.clone(), quality, format_kind)); - } - - let url = media_urls - .iter() - .find(|u| u.to_ascii_lowercase().contains(".mp4")) - .cloned() - .unwrap_or_else(|| media_urls.first().cloned().unwrap_or_default()); - if url.is_empty() { - return None; - } - - let mut tags = Self::extract_tags_from_detail(&detail_html); - if tags.is_empty() { - tags = vec![]; - } - - let mut item = VideoItem::new( - raw.id, - raw.title, - url, - "viralxxxporn".to_string(), - raw.thumb, - raw.duration, - ) - .formats(formats) - .preview( - media_urls - .first() - .cloned() - .unwrap_or_else(|| raw.detail_url.clone()), - ); - - if let Some(views) = raw.views { - item = item.views(views); - } - if let Some(rating) = raw.rating { - item = item.rating(rating); - } - if !tags.is_empty() { - item = item.tags(tags); - } - - Some(item) - } - - async fn collect_video_items( - &self, - html: String, - requester: crate::util::requester::Requester, - ) -> Vec { - let raw_items = self.parse_listing_items(&html); - if raw_items.is_empty() { - return vec![]; - } - - let mut in_flight = FuturesUnordered::new(); - let mut out = vec![]; - let mut iter = raw_items.into_iter(); - const MAX_IN_FLIGHT: usize = 6; - - loop { - while in_flight.len() < MAX_IN_FLIGHT { - let Some(raw) = iter.next() else { - break; }; - in_flight.push(self.fetch_detailed_item(raw, requester.clone())); - } - - let Some(result) = in_flight.next().await else { - break; - }; - if let Some(item) = result { - out.push(item); - } - } - - out - } - - async fn fetch( - &self, - cache: VideoCache, - page: u8, - query: &str, - options: ServerOptions, - ) -> Result> { - let mut requester = requester_or_default(&options, "viralxxxporn", "fetch"); - let urls = self.build_listing_urls(page, query, &options); - let mut stale_items: Vec = vec![]; - - for url in urls { - if let Some((time, items)) = cache.get(&url) { - if time.elapsed().unwrap_or_default().as_secs() < 300 { - return Ok(items.clone()); - } - if stale_items.is_empty() && !items.is_empty() { - stale_items = items.clone(); - } - } - - let html = match requester.get(&url, None).await { - Ok(text) => text, - Err(e) => { - report_provider_error_background( - "viralxxxporn", - "listing.request", - &format!("url={url}; error={e}"), - ); + let video_url = self.normalize_url(&video_url_raw); + let id = Self::extract_id_from_url(&video_url); + if id.is_empty() { continue; } - }; - let items = self.collect_video_items(html, requester.clone()).await; + let title_raw = Self::first_non_empty_attr(segment, &["\" title=\"", "alt=\""]) + .or_else(|| { + Self::extract_between(segment, "", "<") + .map(ToString::to_string) + }) + .unwrap_or_default(); + let title = decode(title_raw.as_bytes()) + .to_string() + .unwrap_or(title_raw) + .trim() + .to_string(); + if title.is_empty() { + continue; + } + + let thumb = self.extract_thumb_url(segment); + + let raw_duration = Self::extract_between(segment, "
", "<") + .or_else(|| Self::extract_between(segment, "
", "<")) + .or_else(|| Self::extract_between(segment, "class=\"duration\">", "<")) + .or_else(|| Self::extract_between(segment, "class=\"time\">", "<")) + .unwrap_or_default() + .trim() + .to_string(); + let duration = parse_time_to_seconds(&raw_duration) + .map(|v| v as u32) + .or_else(|| Self::extract_duration_seconds(&raw_duration)) + .unwrap_or(0); + + let views = Self::extract_between(segment, "
", "<") + .or_else(|| Self::extract_between(segment, "class=\"views\">", "<")) + .and_then(|value| parse_abbreviated_number(value.trim())) + .or_else(|| Self::extract_views(segment)) + .unwrap_or(0); + + let mut item = VideoItem::new( + id, + title, + video_url, + "viralxxxporn".to_string(), + thumb, + duration, + ); + if views > 0 { + item = item.views(views); + } + items.push(item); + } + if !items.is_empty() { - cache.remove(&url); - cache.insert(url, items.clone()); - return Ok(items); + return items; } } - Ok(stale_items) + vec![] } } @@ -646,31 +501,30 @@ impl Provider for ViralxxxpornProvider { async fn get_videos( &self, cache: VideoCache, - _pool: DbPool, - _sort: String, + pool: DbPool, + sort: String, query: Option, page: String, - _per_page: String, + per_page: String, options: ServerOptions, ) -> Vec { - let page = page.parse::().unwrap_or(1); - let query = query.unwrap_or_default(); + let _ = pool; + let _ = sort; + let _ = per_page; + let page = page.parse::().unwrap_or(1); - match self.fetch(cache, page, &query, options).await { - Ok(v) => v, + let videos = match query { + Some(q) if !q.trim().is_empty() => self.query(cache, page, &q, options).await, + _ => self.get(cache, page, options).await, + }; + + match videos { + Ok(videos) => videos, Err(e) => { - let mut chain = String::new(); - for (idx, cause) in e.iter().enumerate() { - let _ = writeln!(chain, "{}. {}", idx + 1, cause); - } - send_discord_error_report( - e.to_string(), - Some(chain), - Some("Viralxxxporn Provider"), - Some("Failed to fetch videos"), - file!(), - line!(), - module_path!(), + report_provider_error( + "viralxxxporn", + "get_videos", + &format!("page={page}; error={e}"), ) .await; vec![] @@ -688,35 +542,110 @@ mod tests { use super::ViralxxxpornProvider; #[test] - fn parses_listing_card() { + fn builds_latest_url_with_expected_endpoint() { let provider = ViralxxxpornProvider::new(); - let html = r#" - - - 1080p12:344.2K 95% - - "#; - let items = provider.parse_listing_items(html); - assert_eq!(items.len(), 1); - assert_eq!(items[0].id, "12345"); - assert_eq!(items[0].title, "Test Title"); - assert_eq!(items[0].duration, 754); - assert_eq!(items[0].views, Some(4200)); - assert_eq!(items[0].rating, Some(95.0)); + assert_eq!( + provider.build_latest_url(3), + "https://viralxxxporn.com/latest-updates/?mode=async&function=get_block&block_id=list_videos_latest_videos_list&sort_by=post_date&from=3" + ); } #[test] - fn extracts_media_urls() { + fn builds_search_url_and_referer_with_requested_encoding() { + let provider = ViralxxxpornProvider::new(); + assert_eq!( + provider.build_search_url("adriana chechik", 4), + "https://viralxxxporn.com/search/adriana-chechik/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q=adriana+chechik&from_videos=4" + ); + assert_eq!( + provider.build_search_headers("adriana chechik"), + vec![( + "Referer".to_string(), + "https://viralxxxporn.com/search/adriana-chechik/".to_string() + )] + ); + } + + #[test] + fn parses_common_kvs_item_markup() { let provider = ViralxxxpornProvider::new(); let html = r#" - +
+ + + +
12:34
+
1.2M
+
+ "#; - let urls = provider.extract_media_urls(html); - assert_eq!(urls.len(), 2); - assert!(urls.iter().any(|u| u.contains("video_720p.mp4"))); - assert!(urls.iter().any(|u| u.contains("master.m3u8"))); + + let items = provider.get_video_items_from_html(html.to_string()); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "336186"); + assert_eq!(items[0].title, "Sample & Title"); + assert_eq!( + items[0].url, + "https://viralxxxporn.com/videos/336186/sample-video/" + ); + assert_eq!(items[0].thumb, "https://cdn.example/thumb.jpg"); + assert_eq!(items[0].duration, 754); + assert_eq!(items[0].views, Some(1_200_000)); + } + + #[test] + fn parses_anchor_only_async_markup() { + let provider = ViralxxxpornProvider::new(); + let html = r#" + + "#; + + let items = provider.get_video_items_from_html(html.to_string()); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "336186"); + assert_eq!( + items[0].url, + "https://viralxxxporn.com/video/336186/jax-slayher-teases-her-gorgeous-ebony-ass-in-steamy-video/" + ); + assert_eq!(items[0].thumb, "https://cdn.example.com/thumb.jpg"); + assert_eq!(items[0].duration, 780); + assert_eq!(items[0].views, Some(29_000)); + } + + #[test] + fn prefers_real_thumb_url_over_base64_placeholder() { + let provider = ViralxxxpornProvider::new(); + let html = r#" +
+
+ + Adriana Chechik Kazumi Tease Wet Threesome Fuckfest Video Leaked +
25:15
+
+
+
    +
  • 9.9K Views
  • +
+
+
+
+ "#; + + let items = provider.get_video_items_from_html(html.to_string()); + assert_eq!(items.len(), 1); + assert_eq!( + items[0].thumb, + "https://imgcdn.viralxxxporn.com/contents/videos_screenshots/229000/229322/800x450/2.jpg" + ); + assert_eq!(items[0].views, Some(9_900)); } }