Files
hottub/src/providers/viralxxxporn.rs
2026-03-18 12:13:28 +00:00

658 lines
22 KiB
Rust

use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{Provider, report_provider_error, requester_or_default};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use std::collections::HashSet;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
crate::providers::ProviderChannelMetadata {
group_id: "mainstream-tube",
tags: &["tube", "viral", "mixed"],
};
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
}
#[derive(Debug, Clone)]
pub struct ViralxxxpornProvider {
url: String,
}
impl ViralxxxpornProvider {
pub fn new() -> Self {
Self {
url: "https://viralxxxporn.com".to_string(),
}
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "viralxxxporn".to_string(),
name: "Viralxxxporn".to_string(),
description: "Latest viral porn videos.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=viralxxxporn.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn build_latest_url(&self, page: u32) -> String {
format!(
"{}/latest-updates/?mode=async&function=get_block&block_id=list_videos_latest_videos_list&sort_by=post_date&from={page}",
self.url
)
}
fn build_latest_headers(&self) -> Vec<(String, String)> {
vec![(
"Referer".to_string(),
format!("{}/latest-updates/", self.url),
)]
}
fn build_search_path_query(query: &str, separator: &str) -> String {
query.split_whitespace().collect::<Vec<_>>().join(separator)
}
fn build_search_url(&self, query: &str, page: u32) -> String {
let query_param = Self::build_search_path_query(query, "+");
let path_query = Self::build_search_path_query(query, "-");
format!(
"{}/search/{path_query}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={query_param}&from_videos={page}",
self.url
)
}
fn build_search_headers(&self, query: &str) -> Vec<(String, String)> {
let path_query = Self::build_search_path_query(query, "-");
vec![(
"Referer".to_string(),
format!("{}/search/{path_query}/", self.url),
)]
}
async fn get(
&self,
cache: VideoCache,
page: u32,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = self.build_latest_url(page);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester = requester_or_default(
&options,
"viralxxxporn",
"viralxxxporn.get.missing_requester",
);
let text = match requester
.get_with_headers(&video_url, self.build_latest_headers(), None)
.await
{
Ok(text) => text,
Err(e) => {
report_provider_error(
"viralxxxporn",
"get.request",
&format!("url={video_url}; error={e}"),
)
.await;
return Ok(old_items);
}
};
if text.trim().is_empty() {
report_provider_error(
"viralxxxporn",
"get.empty_response",
&format!("url={video_url}"),
)
.await;
return Ok(old_items);
}
let video_items = self.get_video_items_from_html(text);
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
return Ok(video_items);
}
Ok(old_items)
}
async fn query(
&self,
cache: VideoCache,
page: u32,
query: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = self.build_search_url(query, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
}
items.clone()
}
None => vec![],
};
let mut requester = requester_or_default(
&options,
"viralxxxporn",
"viralxxxporn.query.missing_requester",
);
let text = match requester
.get_with_headers(&video_url, self.build_search_headers(query), None)
.await
{
Ok(text) => text,
Err(e) => {
report_provider_error(
"viralxxxporn",
"query.request",
&format!("url={video_url}; error={e}"),
)
.await;
return Ok(old_items);
}
};
if text.trim().is_empty() {
report_provider_error(
"viralxxxporn",
"query.empty_response",
&format!("url={video_url}"),
)
.await;
return Ok(old_items);
}
let video_items = self.get_video_items_from_html(text);
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
return Ok(video_items);
}
Ok(old_items)
}
fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> {
text.split(start).nth(1)?.split(end).next()
}
fn normalize_ws(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option<String> {
attrs.iter().find_map(|attr| {
Self::extract_between(segment, attr, "\"")
.map(str::trim)
.filter(|value| !value.is_empty())
.map(ToString::to_string)
})
}
fn extract_thumb_url(&self, segment: &str) -> String {
let thumb_raw = Self::first_non_empty_attr(
segment,
&[
"data-original=\"",
"data-webp=\"",
"data-src=\"",
"poster=\"",
"src=\"",
],
)
.unwrap_or_default();
if thumb_raw.starts_with("data:image/") {
return String::new();
}
self.normalize_url(&thumb_raw)
}
fn normalize_url(&self, url: &str) -> String {
if url.starts_with("http://") || url.starts_with("https://") {
return url.to_string();
}
if url.starts_with("//") {
return format!("https:{url}");
}
if url.starts_with('/') {
return format!("{}{}", self.url, url);
}
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn extract_id_from_url(url: &str) -> String {
let parts = url
.trim_end_matches('/')
.split('/')
.filter(|part| !part.is_empty())
.collect::<Vec<_>>();
parts
.windows(2)
.find_map(|window| match window {
["video", id] | ["videos", id] => Some((*id).to_string()),
_ => None,
})
.or_else(|| parts.last().map(|id| (*id).to_string()))
.unwrap_or_default()
}
fn strip_tags(text: &str) -> String {
let Ok(tag_re) = Regex::new(r"(?is)<[^>]+>") else {
return text.to_string();
};
tag_re.replace_all(text, " ").to_string()
}
fn extract_duration_seconds(text: &str) -> Option<u32> {
let colon_duration = Regex::new(r"\b(\d{1,2}:\d{2}(?::\d{2})?)\b")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| parse_time_to_seconds(m.as_str()))
.map(|seconds| seconds as u32);
if colon_duration.is_some() {
return colon_duration;
}
let minute = Regex::new(r"(?i)\b(\d{1,3})\s*(?:min|mins|minute|minutes)\b")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| m.as_str().parse::<u32>().ok());
let second = Regex::new(r"(?i)\b(\d{1,3})\s*(?:sec|secs|second|seconds)\b")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| m.as_str().parse::<u32>().ok());
match (minute, second) {
(Some(min), Some(sec)) => Some(min * 60 + sec),
(Some(min), None) => Some(min * 60),
(None, Some(sec)) => Some(sec),
(None, None) => None,
}
}
fn extract_views(text: &str) -> Option<u32> {
let with_label = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s*views?\b")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| parse_abbreviated_number(m.as_str().trim()));
if with_label.is_some() {
return with_label;
}
Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb])\b")
.ok()
.and_then(|re| re.captures(text))
.and_then(|caps| caps.get(1))
.and_then(|m| parse_abbreviated_number(m.as_str().trim()))
}
fn parse_anchor_items(&self, html: &str) -> Vec<VideoItem> {
let Ok(link_re) = Regex::new(
r#"(?is)<a[^>]+href="(?P<href>(?:https?://[^"]+)?/video/(?P<id>\d+)/[^"]+)"[^>]*>(?P<body>.*?)</a>"#,
) else {
return vec![];
};
let Ok(title_attr_re) = Regex::new(r#"(?is)\btitle="([^"]+)""#) else {
return vec![];
};
let mut items = Vec::new();
let mut seen = HashSet::new();
for captures in link_re.captures_iter(html) {
let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else {
continue;
};
if !seen.insert(id.clone()) {
continue;
}
let href = captures
.name("href")
.map(|m| self.normalize_url(m.as_str()))
.unwrap_or_default();
let body = captures
.name("body")
.map(|m| m.as_str())
.unwrap_or_default();
let Some(full_match) = captures.get(0) else {
continue;
};
let seg_start = full_match.start().saturating_sub(600);
let seg_end = (full_match.end() + 1800).min(html.len());
let segment = html.get(seg_start..seg_end).unwrap_or(body);
let title_from_attr = title_attr_re
.captures(full_match.as_str())
.and_then(|caps| caps.get(1))
.map(|m| m.as_str().to_string())
.unwrap_or_default();
let title_from_body = Self::strip_tags(body);
let title_source = if !title_from_attr.is_empty() {
title_from_attr
} else {
title_from_body
};
let title = Self::normalize_ws(&Self::decode_html(&title_source));
if title.is_empty() {
continue;
}
let thumb = self.extract_thumb_url(segment);
let text_segment = Self::normalize_ws(&Self::decode_html(&Self::strip_tags(segment)));
let duration = Self::extract_duration_seconds(segment)
.or_else(|| Self::extract_duration_seconds(&text_segment))
.unwrap_or(0);
let views = Self::extract_views(segment)
.or_else(|| Self::extract_views(&text_segment))
.unwrap_or(0);
let mut item =
VideoItem::new(id, title, href, "viralxxxporn".to_string(), thumb, duration);
if views > 0 {
item = item.views(views);
}
items.push(item);
}
items
}
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
if html.trim().is_empty() {
return vec![];
}
let anchor_items = self.parse_anchor_items(&html);
if !anchor_items.is_empty() {
return anchor_items;
}
let mut items = Vec::new();
let content = html
.split("<div class=\"pagination\"")
.next()
.unwrap_or(&html)
.split("class=\"pagination\"")
.next()
.unwrap_or(&html);
let markers = [
"<div class=\"thumb thumb_rel item \">",
"<div class=\"item \">",
"<div class=\"item thumb video_",
"<article class=\"thumb",
"<article class=\"item",
];
for marker in markers {
for segment in content.split(marker).skip(1) {
let Some(video_url_raw) =
Self::first_non_empty_attr(segment, &["<a href=\"", "href=\""])
else {
continue;
};
let video_url = self.normalize_url(&video_url_raw);
let id = Self::extract_id_from_url(&video_url);
if id.is_empty() {
continue;
}
let title_raw = Self::first_non_empty_attr(segment, &["\" title=\"", "alt=\""])
.or_else(|| {
Self::extract_between(segment, "<strong class=\"title\">", "<")
.map(ToString::to_string)
})
.unwrap_or_default();
let title = decode(title_raw.as_bytes())
.to_string()
.unwrap_or(title_raw)
.trim()
.to_string();
if title.is_empty() {
continue;
}
let thumb = self.extract_thumb_url(segment);
let raw_duration = Self::extract_between(segment, "<div class=\"duration\">", "<")
.or_else(|| Self::extract_between(segment, "<div class=\"time\">", "<"))
.or_else(|| Self::extract_between(segment, "class=\"duration\">", "<"))
.or_else(|| Self::extract_between(segment, "class=\"time\">", "<"))
.unwrap_or_default()
.trim()
.to_string();
let duration = parse_time_to_seconds(&raw_duration)
.map(|v| v as u32)
.or_else(|| Self::extract_duration_seconds(&raw_duration))
.unwrap_or(0);
let views = Self::extract_between(segment, "<div class=\"views\">", "<")
.or_else(|| Self::extract_between(segment, "class=\"views\">", "<"))
.and_then(|value| parse_abbreviated_number(value.trim()))
.or_else(|| Self::extract_views(segment))
.unwrap_or(0);
let mut item = VideoItem::new(
id,
title,
video_url,
"viralxxxporn".to_string(),
thumb,
duration,
);
if views > 0 {
item = item.views(views);
}
items.push(item);
}
if !items.is_empty() {
return items;
}
}
vec![]
}
}
#[async_trait]
impl Provider for ViralxxxpornProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = sort;
let _ = per_page;
let page = page.parse::<u32>().unwrap_or(1);
let videos = match query {
Some(q) if !q.trim().is_empty() => self.query(cache, page, &q, options).await,
_ => self.get(cache, page, options).await,
};
match videos {
Ok(videos) => videos,
Err(e) => {
report_provider_error(
"viralxxxporn",
"get_videos",
&format!("page={page}; error={e}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
#[cfg(test)]
mod tests {
use super::ViralxxxpornProvider;
#[test]
fn builds_latest_url_with_expected_endpoint() {
let provider = ViralxxxpornProvider::new();
assert_eq!(
provider.build_latest_url(3),
"https://viralxxxporn.com/latest-updates/?mode=async&function=get_block&block_id=list_videos_latest_videos_list&sort_by=post_date&from=3"
);
}
#[test]
fn builds_search_url_and_referer_with_requested_encoding() {
let provider = ViralxxxpornProvider::new();
assert_eq!(
provider.build_search_url("adriana chechik", 4),
"https://viralxxxporn.com/search/adriana-chechik/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q=adriana+chechik&from_videos=4"
);
assert_eq!(
provider.build_search_headers("adriana chechik"),
vec![(
"Referer".to_string(),
"https://viralxxxporn.com/search/adriana-chechik/".to_string()
)]
);
}
#[test]
fn parses_common_kvs_item_markup() {
let provider = ViralxxxpornProvider::new();
let html = r#"
<div class="item ">
<a href="/videos/336186/sample-video/" title="Sample &amp; Title">
<img class="thumb lazy-load" data-original="https://cdn.example/thumb.jpg" />
</a>
<div class="duration">12:34</div>
<div class="views">1.2M</div>
</div>
<div class="pagination"></div>
"#;
let items = provider.get_video_items_from_html(html.to_string());
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "336186");
assert_eq!(items[0].title, "Sample & Title");
assert_eq!(
items[0].url,
"https://viralxxxporn.com/videos/336186/sample-video/"
);
assert_eq!(items[0].thumb, "https://cdn.example/thumb.jpg");
assert_eq!(items[0].duration, 754);
assert_eq!(items[0].views, Some(1_200_000));
}
#[test]
fn parses_anchor_only_async_markup() {
let provider = ViralxxxpornProvider::new();
let html = r#"
<div class="list-videos">
<a href="/video/336186/jax-slayher-teases-her-gorgeous-ebony-ass-in-steamy-video/" title="Jax Slayher Teases Her Gorgeous Ebony Ass In Steamy Video">
<img src="https://cdn.example.com/thumb.jpg" />
<span class="video-deck">720p 13 min 29K 99%</span>
</a>
</div>
"#;
let items = provider.get_video_items_from_html(html.to_string());
assert_eq!(items.len(), 1);
assert_eq!(items[0].id, "336186");
assert_eq!(
items[0].url,
"https://viralxxxporn.com/video/336186/jax-slayher-teases-her-gorgeous-ebony-ass-in-steamy-video/"
);
assert_eq!(items[0].thumb, "https://cdn.example.com/thumb.jpg");
assert_eq!(items[0].duration, 780);
assert_eq!(items[0].views, Some(29_000));
}
#[test]
fn prefers_real_thumb_url_over_base64_placeholder() {
let provider = ViralxxxpornProvider::new();
let html = r#"
<div class=" th item ">
<div class="main-card">
<a class="media" href="https://viralxxxporn.com/video/229322/adriana-chechik-kazumi-tease-wet-threesome-fuckfest-video-leaked-993ee5d/" title="Adriana Chechik Kazumi Tease Wet Threesome Fuckfest Video Leaked">
<img class="img lazy-load"
src="data:image/svg+xml;base64,AAAA"
data-original="https://imgcdn.viralxxxporn.com/contents/videos_screenshots/229000/229322/800x450/2.jpg"
data-webp="https://imgcdn.viralxxxporn.com/contents/videos_screenshots/229000/229322/800x450/2.jpg"
alt="Adriana Chechik Kazumi Tease Wet Threesome Fuckfest Video Leaked">
<div class="duration">25:15</div>
</a>
<div class="content">
<ul class="list">
<li><span>9.9K Views</span></li>
</ul>
</div>
</div>
</div>
"#;
let items = provider.get_video_items_from_html(html.to_string());
assert_eq!(items.len(), 1);
assert_eq!(
items[0].thumb,
"https://imgcdn.viralxxxporn.com/contents/videos_screenshots/229000/229322/800x450/2.jpg"
);
assert_eq!(items[0].views, Some(9_900));
}
}