This commit is contained in:
Simon
2026-04-05 20:31:38 +00:00
parent 9773590f64
commit 7b464fe796
10 changed files with 815 additions and 565 deletions

View File

@@ -177,15 +177,18 @@ impl HomoxxxProvider {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = value.trim().replace(' ', "-");
if !value.is_empty()
&& matches!(
kind.as_str(),
"models" | "pornstars" | "stars" | "channels" | "categories" | "tags"
)
{
video_url = format!("{}/{}/{}/", self.url, kind, value);
}
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {

View File

@@ -14,6 +14,8 @@ use diesel::r2d2;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Version;
@@ -41,15 +43,58 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct MissavProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl MissavProvider {
pub fn new() -> Self {
MissavProvider {
url: "https://missav.ws".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, key: &str, path_or_url: &str) {
let normalized = Self::normalize_key(key);
if normalized.is_empty() || path_or_url.trim().is_empty() {
return;
}
if let Ok(mut map) = self.tag_map.write() {
map.insert(normalized, path_or_url.trim().to_string());
}
}
fn resolve_query_url(&self, query: &str, page: u8, sort: &str) -> Option<String> {
let normalized = Self::normalize_key(query);
let mapped = self.tag_map.read().ok()?.get(&normalized)?.clone();
let separator = if mapped.contains('?') { "&" } else { "?" };
let mut url = format!("{mapped}{separator}page={page}");
if !sort.is_empty() {
url.push_str("&sort=");
url.push_str(sort);
}
Some(url)
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "missav".to_string(),
@@ -248,10 +293,13 @@ impl MissavProvider {
if !sort.is_empty() {
sort = format!("&sort={}", sort);
}
let url_str = format!(
let mut url_str = format!(
"{}/{}/search/{}?page={}{}",
self.url, language, search_string, page, sort
);
if let Some(mapped_url) = self.resolve_query_url(query, page, &sort.replace("&sort=", "")) {
url_str = mapped_url;
}
if let Some((time, items)) = cache.get(&url_str) {
if time.elapsed().unwrap_or_default().as_secs() < 3600 {
@@ -386,19 +434,54 @@ impl MissavProvider {
// 3. Extract Tags (Generic approach to avoid repetitive code)
let mut tags = vec![];
for (label, prefix) in [
("Actress:", "@actress"),
("Actor:", "@actor"),
("Maker:", "@maker"),
("Genre:", "@genre"),
for (label, route_kind) in [
("Actress:", "actress"),
("Actor:", "actor"),
("Maker:", "maker"),
("Genre:", "genre"),
] {
let marker = format!("<span>{}</span>", label);
if let Some(section) = extract(&vid, &marker, "</div>") {
for part in section.split("class=\"text-nord13 font-medium\">").skip(1) {
if let Some(val) = part.split('<').next() {
let clean = val.trim();
if !clean.is_empty() {
tags.push(format!("{}:{}", prefix, clean));
for anchor in section.split("<a ").skip(1) {
let href = anchor
.split("href=\"")
.nth(1)
.and_then(|value| value.split('"').next())
.unwrap_or_default()
.to_string();
let title = anchor
.split("class=\"text-nord13 font-medium\">")
.nth(1)
.and_then(|value| value.split('<').next())
.map(str::trim)
.unwrap_or_default()
.to_string();
if !title.is_empty() {
tags.push(title.clone());
if !href.is_empty() {
let full_url = if href.starts_with("http://") || href.starts_with("https://") {
href.clone()
} else {
format!("{}{}", self.url, href)
};
self.insert_tag_mapping(&title, &full_url);
let slug = href
.trim_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !slug.is_empty() {
self.insert_tag_mapping(&slug, &full_url);
self.insert_tag_mapping(
&format!("{route_kind}:{}", slug),
&full_url,
);
self.insert_tag_mapping(
&format!("{route_kind}:{}", Self::humanize_slug(&slug)),
&full_url,
);
}
}
}
}

View File

@@ -10,7 +10,9 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::env;
use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Client;
use wreq_util::Emulation;
@@ -31,14 +33,66 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct OkxxxProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl OkxxxProvider {
pub fn new() -> Self {
OkxxxProvider {
url: "https://ok.xxx".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "sites" | "models") {
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "okxxx".to_string(),
@@ -177,16 +231,8 @@ impl OkxxxProvider {
async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result<Vec<VideoItem>> {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
if let Some(path) = self.resolve_query_path(query) {
video_url = format!("{}/{}/{}/", self.url, path, page);
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
@@ -405,7 +451,8 @@ impl OkxxxProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@sites:{}", tag));
self.insert_tag_mapping("sites", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}
@@ -425,7 +472,8 @@ impl OkxxxProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@models:{}", tag));
self.insert_tag_mapping("models", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}

View File

@@ -883,11 +883,35 @@ impl OmgxxxProvider {
) -> Result<Vec<VideoItem>> {
let mut search_type = "search";
let mut search_string = query.to_string().to_ascii_lowercase().trim().to_string();
let trimmed = query.trim().trim_start_matches('@').to_ascii_lowercase();
if let Some((kind, raw)) = trimmed.split_once(':') {
let candidate = raw.trim().replace(' ', "-");
if !candidate.is_empty() {
match kind.trim() {
"models" | "model" | "stars" => {
search_type = "models";
search_string = candidate;
}
"sites" | "site" => {
search_type = "sites";
search_string = candidate;
}
"networks" | "network" => {
search_type = "networks";
search_string = candidate;
}
_ => {}
}
}
}
match self.stars.read() {
Ok(stars) => {
if let Some(star) = stars
.iter()
.find(|s| s.title.to_ascii_lowercase() == search_string)
.find(|s| {
s.title.eq_ignore_ascii_case(&search_string)
|| s.id.eq_ignore_ascii_case(&search_string)
})
{
search_type = "models";
search_string = star.id.clone();
@@ -901,7 +925,10 @@ impl OmgxxxProvider {
Ok(sites) => {
if let Some(site) = sites
.iter()
.find(|s| s.title.to_ascii_lowercase() == search_string)
.find(|s| {
s.title.eq_ignore_ascii_case(&search_string)
|| s.id.eq_ignore_ascii_case(&search_string)
})
{
search_type = "sites";
search_string = site.id.clone();
@@ -911,6 +938,23 @@ impl OmgxxxProvider {
report_provider_error_background("omgxxx", "query.sites_read", &e.to_string());
}
}
match self.networks.read() {
Ok(networks) => {
if let Some(network) = networks
.iter()
.find(|n| {
n.title.eq_ignore_ascii_case(&search_string)
|| n.id.eq_ignore_ascii_case(&search_string)
})
{
search_type = "networks";
search_string = network.id.clone();
}
}
Err(e) => {
report_provider_error_background("omgxxx", "query.networks_read", &e.to_string());
}
}
let mut video_url = format!("{}/{}/{}/{}/", self.url, search_type, search_string, page);
video_url = video_url.replace(" ", "+");
// Check our Video Cache. If the result is younger than 1 hour, we return it.

View File

@@ -10,7 +10,9 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::env;
use std::sync::{Arc, RwLock};
use std::vec;
use wreq::Client;
use wreq_util::Emulation;
@@ -31,14 +33,66 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct PerfectgirlsProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PerfectgirlsProvider {
pub fn new() -> Self {
PerfectgirlsProvider {
url: "https://www.perfectgirls.xxx".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "channels" | "pornstars") {
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "perfectgirls".to_string(),
@@ -177,16 +231,8 @@ impl PerfectgirlsProvider {
async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result<Vec<VideoItem>> {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
if let Some(path) = self.resolve_query_path(query) {
video_url = format!("{}/{}/{}/", self.url, path, page);
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
@@ -407,7 +453,8 @@ impl PerfectgirlsProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@channels:{}", tag));
self.insert_tag_mapping("channels", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}
@@ -427,7 +474,8 @@ impl PerfectgirlsProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@pornstars:{}", tag));
self.insert_tag_mapping("pornstars", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}

View File

@@ -13,6 +13,8 @@ use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use wreq::Client;
use wreq::Version;
use wreq_util::Emulation;
@@ -40,11 +42,13 @@ struct PerverzijaDbEntry {
#[derive(Debug, Clone)]
pub struct PerverzijaProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PerverzijaProvider {
pub fn new() -> Self {
PerverzijaProvider {
url: "https://tube.perverzija.com/".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
@@ -143,6 +147,104 @@ impl PerverzijaProvider {
title.trim().to_string()
}
fn clip_at_first<'a>(haystack: &'a str, end_markers: &[&str]) -> &'a str {
let mut end = haystack.len();
for marker in end_markers {
if let Some(index) = haystack.find(marker) {
end = end.min(index);
}
}
&haystack[..end]
}
fn listing_item_scope(haystack: &str) -> &str {
Self::clip_at_first(haystack, &["</article>", "</li>", "<article ", "video-item post"])
}
fn detail_meta_section<'a>(text: &'a str, label: &str) -> &'a str {
let section = text
.split(label)
.nth(1)
.unwrap_or_default();
Self::clip_at_first(
section,
&["</div>", "</p>", "<strong>", "<div class=\"related", "<section", "<aside"],
)
}
fn push_unique(tags: &mut Vec<String>, value: String) {
let normalized = value.trim();
if normalized.is_empty() {
return;
}
if !tags
.iter()
.any(|existing| existing.eq_ignore_ascii_case(normalized))
{
tags.push(normalized.to_string());
}
}
fn parse_href_values(section: &str) -> Vec<String> {
section
.split("<a href=\"")
.skip(1)
.filter_map(|part| part.split('"').next())
.map(|value| value.to_string())
.collect()
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "studio" | "stars" | "tag" | "genre")
{
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
async fn get(
&self,
cache: VideoCache,
@@ -212,13 +314,8 @@ impl PerverzijaProvider {
url_str = format!("{}?s={}", self.url, search_string);
}
if query.starts_with("@studio:") {
let studio_name = query.replace("@studio:", "");
url_str = format!("{}studio/{}/page/{}/", self.url, studio_name, page);
query_parse = false;
} else if query.starts_with("@stars:") {
let stars_name = query.replace("@stars:", "");
url_str = format!("{}stars/{}/page/{}/", self.url, stars_name, page);
if let Some(path) = self.resolve_query_path(query) {
url_str = format!("{}/{}/page/{}/", self.url.trim_end_matches('/'), path, page);
query_parse = false;
}
url_str = url_str.replace("page/1/", "");
@@ -292,7 +389,8 @@ impl PerverzijaProvider {
return vec![];
}
for video_segment in raw_videos {
for raw_video_segment in raw_videos {
let video_segment = Self::listing_item_scope(raw_video_segment);
let title = Self::extract_title(video_segment);
let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
@@ -370,15 +468,17 @@ impl PerverzijaProvider {
let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
let slug = studio
.split("/\"")
.collect::<Vec<&str>>()
.first()
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "");
self.insert_tag_mapping("studio", &slug, None);
Self::push_unique(
&mut tags,
Self::humanize_slug(&slug),
);
}
}
@@ -396,7 +496,8 @@ impl PerverzijaProvider {
.unwrap_or_default()
.to_string();
if !tag_name.is_empty() {
tags.push(format!("@stars:{}", tag_name));
self.insert_tag_mapping("stars", &tag_name, None);
Self::push_unique(&mut tags, Self::humanize_slug(&tag_name));
}
}
}
@@ -407,7 +508,7 @@ impl PerverzijaProvider {
if token.starts_with("tag-") {
let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
if !tag_name.is_empty() {
tags.push(tag_name.replace("-", " ").to_string());
Self::push_unique(&mut tags, tag_name.replace("-", " ").to_string());
}
}
}
@@ -579,88 +680,58 @@ impl PerverzijaProvider {
url_str = "!".to_string()
}
let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
let mut tags: Vec<String> = Vec::new();
let studios_parts = text
.split("<strong>Studio: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
);
}
}
if text.contains("<strong>Stars: </strong>") {
let stars_parts: Vec<&str> = text
.split("<strong>Stars: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in stars_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
}
let studios_section = Self::detail_meta_section(&text, "<strong>Studio: </strong>");
for href in Self::parse_href_values(studios_section) {
if href.starts_with("https://tube.perverzija.com/studio/") {
let studio_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/studio/", "");
self.insert_tag_mapping("studio", &studio_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&studio_slug));
}
}
let tags_parts: Vec<&str> = text
.split("<strong>Tags: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in tags_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
let stars_section = Self::detail_meta_section(&text, "<strong>Stars: </strong>");
for href in Self::parse_href_values(stars_section) {
if href.starts_with("https://tube.perverzija.com/stars/") {
let star_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/stars/", "");
self.insert_tag_mapping("stars", &star_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
}
}
let tags_section = if text.contains("<strong>Tags: </strong>") {
Self::detail_meta_section(&text, "<strong>Tags: </strong>")
} else {
Self::detail_meta_section(&text, "<strong>Genres: </strong>")
};
for href in Self::parse_href_values(tags_section) {
if href.starts_with("https://tube.perverzija.com/stars/") {
let star_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/stars/", "");
self.insert_tag_mapping("stars", &star_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
continue;
}
if href.starts_with("https://tube.perverzija.com/tag/") {
let tag_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/tag/", "");
self.insert_tag_mapping("tag", &tag_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&tag_slug));
continue;
}
if href.starts_with("https://tube.perverzija.com/genre/") {
let genre_slug = href
.trim_end_matches('/')
.replace("https://tube.perverzija.com/genre/", "");
self.insert_tag_mapping("genre", &genre_slug, None);
Self::push_unique(&mut tags, Self::humanize_slug(&genre_slug));
}
}

View File

@@ -9,6 +9,8 @@ use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::vec;
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
@@ -27,14 +29,66 @@ error_chain! {
#[derive(Debug, Clone)]
pub struct PornhatProvider {
url: String,
tag_map: Arc<RwLock<HashMap<String, String>>>,
}
impl PornhatProvider {
pub fn new() -> Self {
PornhatProvider {
url: "https://www.pornhat.com".to_string(),
tag_map: Arc::new(RwLock::new(HashMap::new())),
}
}
fn normalize_key(value: &str) -> String {
value
.trim()
.to_ascii_lowercase()
.replace(['_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn humanize_slug(value: &str) -> String {
value
.trim_matches('/')
.replace('-', " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
}
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
let slug = slug.trim().trim_matches('/');
if slug.is_empty() {
return;
}
let path = format!("{kind}/{slug}");
if let Ok(mut map) = self.tag_map.write() {
map.insert(Self::normalize_key(slug), path.clone());
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
if !normalized_title.is_empty() {
map.insert(normalized_title, path);
}
}
}
fn resolve_query_path(&self, query: &str) -> Option<String> {
let trimmed = query.trim().trim_start_matches('@');
if let Some((kind, raw_value)) = trimmed.split_once(':') {
let kind = kind.trim().to_ascii_lowercase();
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
if !value.is_empty() && matches!(kind.as_str(), "sites" | "models") {
return Some(format!("{kind}/{value}"));
}
}
let normalized = Self::normalize_key(trimmed);
if normalized.is_empty() {
return None;
}
self.tag_map.read().ok()?.get(&normalized).cloned()
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
Channel {
id: "pornhat".to_string(),
@@ -127,16 +181,8 @@ impl PornhatProvider {
) -> Result<Vec<VideoItem>> {
let search_string = query.to_lowercase().trim().replace(" ", "-");
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
if search_string.starts_with("@") {
let url_part = search_string
.split("@")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.replace(":", "/");
video_url = format!("{}/{}/", self.url, url_part);
if let Some(path) = self.resolve_query_path(query) {
video_url = format!("{}/{}/{}/", self.url, path, page);
}
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
@@ -296,7 +342,8 @@ impl PornhatProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@sites:{}", tag));
self.insert_tag_mapping("sites", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}
@@ -316,7 +363,8 @@ impl PornhatProvider {
.collect::<Vec<String>>();
for tag in raw_tags {
if !tag.is_empty() {
tags.push(format!("@models:{}", tag));
self.insert_tag_mapping("models", &tag, None);
tags.push(Self::humanize_slug(&tag));
}
}
}

View File

@@ -1,23 +1,18 @@
use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, report_provider_error, report_provider_error_background, requester_or_default,
build_proxy_url, strip_url_scheme,
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use crate::videos::{ServerOptions, VideoItem};
use async_trait::async_trait;
use chrono::{DateTime, NaiveDate, Utc};
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use serde_json::Value;
use std::collections::HashSet;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
@@ -32,7 +27,6 @@ pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
const BASE_URL: &str = "https://www.pornhub.com";
const CHANNEL_ID: &str = "pornhub";
const DETAIL_ENRICH_LIMIT: usize = 12;
error_chain! {
foreign_links {
@@ -216,11 +210,6 @@ impl PornhubProvider {
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}"))))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value)
.map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}"))))
}
fn text_of(element: &ElementRef<'_>) -> String {
element
.text()
@@ -376,7 +365,12 @@ impl PornhubProvider {
let title_selector = Self::selector(".title a, .thumbnailTitle, span.title a")?;
let image_selector = Self::selector("img")?;
let duration_selector = Self::selector(".duration")?;
let views_selector = Self::selector(".views var")?;
let views_selector = Self::selector(".views var, .views")?;
let rating_selector =
Self::selector(".value, .rating, .ratingInfo, .percent, .ratingPercent")?;
let tag_link_selector = Self::selector(
"a[href*=\"/categories/\"], a[href*=\"/video/search\"], a[href*=\"/pornstar/\"], a[href*=\"/model/\"], a[href*=\"/channels/\"], a[href*=\"/users/\"]",
)?;
let uploader_selector = Self::selector(
".videoUploaderBlock a[href], .usernameWrap a[href], .usernameWrapper a[href]",
)?;
@@ -438,6 +432,8 @@ impl PornhubProvider {
.value()
.attr("src")
.or_else(|| value.value().attr("data-mediumthumb"))
.or_else(|| value.value().attr("data-path"))
.or_else(|| value.value().attr("data-src"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
@@ -449,10 +445,20 @@ impl PornhubProvider {
.and_then(|value| parse_time_to_seconds(&value))
.unwrap_or(0) as u32;
let views = card
.select(&views_selector)
.next()
.and_then(|value| parse_abbreviated_number(&Self::text_of(&value)));
let views = card.select(&views_selector).find_map(|value| {
let text = Self::text_of(&value);
parse_abbreviated_number(&text)
.or_else(|| parse_abbreviated_number(text.replace("views", "").trim()))
});
let rating = card.select(&rating_selector).find_map(|value| {
let text = Self::text_of(&value);
let cleaned = text
.trim()
.trim_end_matches('%')
.replace(',', "")
.replace(' ', "");
cleaned.parse::<f32>().ok()
});
let uploader_link = card.select(&uploader_selector).next();
let uploader = uploader_link
@@ -486,13 +492,18 @@ impl PornhubProvider {
item.uploaderId = uploader_url
.as_deref()
.and_then(Self::uploader_identity_from_url);
item.rating = rating;
let mut tags = Vec::new();
if let Some(tag) = uploader_url
.as_deref()
.and_then(|url| self.query_tag_from_uploader_url(url))
{
tags.push(tag);
Self::push_unique(&mut tags, tag);
}
for tag_link in card.select(&tag_link_selector) {
let tag = Self::decode_html(&Self::text_of(&tag_link));
Self::push_unique(&mut tags, tag);
}
if !tags.is_empty() {
item.tags = Some(tags);
@@ -549,292 +560,6 @@ impl PornhubProvider {
values.push(normalized.to_string());
}
fn collect_named_links(&self, document: &Html, selector_text: &str) -> Result<Vec<String>> {
let selector = Self::selector(selector_text)?;
let mut values = Vec::new();
for element in document.select(&selector) {
Self::push_unique(&mut values, Self::decode_html(&Self::text_of(&element)));
}
Ok(values)
}
fn parse_upload_date(value: &str) -> Option<u64> {
if let Ok(parsed) = DateTime::parse_from_rfc3339(value.trim()) {
return Some(parsed.timestamp() as u64);
}
NaiveDate::parse_from_str(value.trim(), "%Y-%m-%d")
.ok()
.and_then(|date| date.and_hms_opt(0, 0, 0))
.map(|date| DateTime::<Utc>::from_naive_utc_and_offset(date, Utc).timestamp() as u64)
}
fn json_string(value: Option<&Value>) -> Option<String> {
value.and_then(|value| match value {
Value::String(value) => Some(value.to_string()),
Value::Number(value) => Some(value.to_string()),
_ => None,
})
}
fn json_u32(value: Option<&Value>) -> Option<u32> {
match value {
Some(Value::Number(value)) => value.as_u64().and_then(|value| u32::try_from(value).ok()),
Some(Value::String(value)) => value.parse::<u32>().ok(),
_ => None,
}
}
fn extract_flashvars(&self, html: &str) -> Result<Option<Value>> {
let regex = Self::regex(r#"(?s)var\s+flashvars_\d+\s*=\s*(\{.*?\});"#)?;
let Some(raw) = regex
.captures(html)
.and_then(|captures| captures.get(1))
.map(|value| value.as_str())
else {
return Ok(None);
};
Ok(Some(serde_json::from_str::<Value>(raw)?))
}
fn extract_ld_video_object(&self, document: &Html) -> Result<Option<Value>> {
let script_selector = Self::selector("script[type=\"application/ld+json\"]")?;
for script in document.select(&script_selector) {
let raw = script.inner_html();
let Ok(value) = serde_json::from_str::<Value>(&raw) else {
continue;
};
if Self::is_video_object(&value) {
return Ok(Some(value));
}
if let Some(array) = value.as_array() {
for entry in array {
if Self::is_video_object(entry) {
return Ok(Some(entry.clone()));
}
}
}
}
Ok(None)
}
fn is_video_object(value: &Value) -> bool {
value
.get("@type")
.and_then(|value| value.as_str())
.is_some_and(|value| value.eq_ignore_ascii_case("VideoObject"))
}
fn build_formats_from_flashvars(&self, flashvars: &Value) -> Vec<VideoFormat> {
let mut entries = flashvars
.get("mediaDefinitions")
.and_then(|value| value.as_array())
.into_iter()
.flatten()
.filter_map(|entry| {
let format = entry
.get("format")
.and_then(|value| value.as_str())
.unwrap_or_default()
.to_ascii_lowercase();
if format != "hls" {
return None;
}
let url = entry
.get("videoUrl")
.and_then(|value| value.as_str())
.map(|value| self.normalize_url(value))
.filter(|value| !value.is_empty())?;
let quality = entry
.get("quality")
.and_then(|value| value.as_str())
.unwrap_or("auto");
let label = match quality {
"auto" => "auto".to_string(),
value if value.ends_with('p') => value.to_string(),
value => format!("{value}p"),
};
let rank = if label == "auto" {
0
} else {
label
.trim_end_matches('p')
.parse::<u32>()
.unwrap_or(0)
};
Some((rank, label, url))
})
.collect::<Vec<_>>();
entries.sort_by_key(|(rank, _, _)| *rank);
entries.dedup_by(|a, b| a.2 == b.2);
entries
.into_iter()
.map(|(_, label, url)| {
VideoFormat::new(url, label.clone(), "m3u8".to_string())
.format_id(label.clone())
.format_note(label)
})
.collect()
}
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
let document = Html::parse_document(html);
if let Some(flashvars) = self.extract_flashvars(html)? {
if let Some(title) = Self::json_string(flashvars.get("video_title")) {
let decoded = Self::decode_html(&title);
if !decoded.is_empty() {
item.title = decoded;
}
}
if let Some(thumb) = Self::json_string(flashvars.get("image_url")) {
let normalized = self.normalize_url(&thumb);
if !normalized.is_empty() {
item.thumb = normalized;
}
}
if let Some(duration) = Self::json_u32(flashvars.get("video_duration")) {
item.duration = duration;
}
if let Some(link_url) = Self::json_string(flashvars.get("link_url")) {
let normalized = self.normalize_url(&link_url);
if !normalized.is_empty() {
item.url = normalized;
}
}
}
if let Some(ld_video) = self.extract_ld_video_object(&document)? {
if let Some(thumb) = ld_video
.get("thumbnailUrl")
.and_then(|value| match value {
Value::String(value) => Some(value.to_string()),
Value::Array(values) => values
.iter()
.find_map(|entry| entry.as_str().map(ToOwned::to_owned)),
_ => None,
})
{
let normalized = self.normalize_url(&thumb);
if !normalized.is_empty() {
item.thumb = normalized;
}
}
if let Some(uploaded_at) = ld_video
.get("uploadDate")
.and_then(|value| value.as_str())
.and_then(Self::parse_upload_date)
{
item.uploadedAt = Some(uploaded_at);
}
if item.views.is_none() {
item.views = Self::json_string(ld_video.get("interactionCount"))
.and_then(|value| value.parse::<u32>().ok());
}
if item.uploader.is_none() {
item.uploader = ld_video
.get("author")
.and_then(|value| match value {
Value::String(value) => Some(value.to_string()),
Value::Object(values) => values
.get("name")
.and_then(|value| value.as_str())
.map(ToOwned::to_owned),
_ => None,
})
.filter(|value| !value.trim().is_empty());
}
}
let mut tags = item.tags.clone().unwrap_or_default();
for value in self.collect_named_links(
&document,
".categoriesWrapper a.item, .categoriesWrapper a[href*=\"/categories/\"]",
)? {
Self::push_unique(&mut tags, value);
}
for value in self.collect_named_links(
&document,
".tagsWrapper a.item, .tagsWrapper a[href*=\"/video/search\"]",
)? {
Self::push_unique(&mut tags, value);
}
for value in self.collect_named_links(
&document,
".pornstarsWrapper a.item, .pornstarsWrapper a[href*=\"/pornstar/\"], a[href*=\"/pornstar/\"]",
)? {
Self::push_unique(&mut tags, value);
}
for value in self.collect_named_links(
&document,
".modelsWrapper a.item, .modelsWrapper a[href*=\"/model/\"], a[href*=\"/model/\"]",
)? {
Self::push_unique(&mut tags, value);
}
if !tags.is_empty() {
item.tags = Some(tags);
}
Ok(item)
}
async fn enrich_listing_items(&self, items: Vec<VideoItem>, options: &ServerOptions) -> Vec<VideoItem> {
let requester = requester_or_default(options, CHANNEL_ID, "enrich_listing_items.requester");
let mut enriched = stream::iter(items.into_iter().enumerate().map(|(index, item)| {
let provider = self.clone();
let requester = requester.clone();
async move {
if index >= DETAIL_ENRICH_LIMIT || item.url.is_empty() {
return (index, item);
}
let fallback = item.clone();
let enriched = match provider.fetch_detail(item, requester).await {
Ok(value) => value,
Err(error) => {
report_provider_error_background(
CHANNEL_ID,
"enrich_listing_items.detail",
&format!("url={}; error={error}", fallback.url),
);
fallback
}
};
(index, enriched)
}
}))
.buffer_unordered(4)
.collect::<Vec<_>>()
.await;
enriched.sort_by_key(|(index, _)| *index);
enriched.into_iter().map(|(_, item)| item).collect()
}
async fn fetch_detail(&self, item: VideoItem, mut requester: crate::util::requester::Requester) -> Result<VideoItem> {
let html = requester
.get(&item.url, None)
.await
.map_err(|error| ErrorKind::Parse(format!("detail request failed: {error}")))?;
self.apply_detail_video(item, &html)
}
async fn fetch_listing(
&self,
cache: VideoCache,
@@ -843,6 +568,10 @@ impl PornhubProvider {
query: Option<&str>,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
if query.is_some() && self.tag_map.read().unwrap().is_empty() {
let _ = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await;
}
let (video_url, scope) = self.build_listing_request(page, sort, query);
let old_items = match cache.get(&video_url) {
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
@@ -883,20 +612,14 @@ impl PornhubProvider {
return Ok(old_items);
}
let mut items = self.enrich_listing_items(items, &options).await;
let mut items = items;
// Rewrite thumbs and previews to use the proxy when appropriate
// Rewrite thumbs to use the page-driven thumb proxy when appropriate
for item in items.iter_mut() {
let proxied = self.proxied_thumb(&options, &item.thumb);
let proxied = self.proxied_thumb(&options, &item.url);
if !proxied.is_empty() {
item.thumb = proxied;
}
if let Some(prev) = item.preview.clone() {
let proxied_prev = self.proxied_thumb(&options, &prev);
if !proxied_prev.is_empty() {
item.preview = Some(proxied_prev);
}
}
}
cache.remove(&video_url);
@@ -906,28 +629,33 @@ impl PornhubProvider {
}
impl PornhubProvider {
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
if thumb.is_empty() {
fn proxied_thumb(&self, options: &ServerOptions, page_url: &str) -> String {
if page_url.is_empty() {
return String::new();
}
if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) {
if !PornhubThumbPolicy::is_allowed_video_page_url(page_url) {
return String::new();
}
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb))
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(page_url))
}
}
struct PornhubThumbPolicy;
impl PornhubThumbPolicy {
fn is_allowed_thumb_url(url: &str) -> bool {
fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else { return false; };
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else { return false; };
// Only allow the specific Pornhub CDN host used for thumbnails
host.eq_ignore_ascii_case("pix-cdn77.phncdn.com")
if !host.eq_ignore_ascii_case("pornhub.com")
&& !host.eq_ignore_ascii_case("www.pornhub.com")
&& !host.ends_with(".pornhub.com")
{
return false;
}
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
}
}
@@ -986,6 +714,33 @@ mod tests {
assert!(provider.parse_query_target("teacher").is_none());
}
#[test]
fn resolves_query_from_tag_map_by_id_or_title() {
let provider = PornhubProvider::new();
{
let mut map = provider.tag_map.write().unwrap();
let info = TagInfo {
kind: QueryTargetKind::Channel,
slug: "mature-4k".to_string(),
title: "Mature 4K".to_string(),
};
map.insert("mature-4k".to_string(), info.clone());
map.insert("mature 4k".to_string(), info);
}
let by_id = provider
.parse_query_target("mature-4k")
.expect("id lookup should resolve");
assert!(matches!(by_id.kind, QueryTargetKind::Channel));
assert_eq!(by_id.slug, "mature-4k");
let by_title = provider
.parse_query_target("Mature 4K")
.expect("title lookup should resolve");
assert!(matches!(by_title.kind, QueryTargetKind::Channel));
assert_eq!(by_title.slug, "mature-4k");
}
#[test]
fn parses_browse_listing_cards() {
let provider = PornhubProvider::new();
@@ -1030,66 +785,47 @@ mod tests {
assert!(items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "@model:honeycore")));
.is_some_and(|values| values.iter().any(|value| value.eq_ignore_ascii_case("honeycore"))));
}
#[test]
fn applies_detail_video_metadata() {
fn parses_listing_metadata_without_detail_fetch() {
let provider = PornhubProvider::new();
let item = VideoItem::new(
"69cfa159b1377".to_string(),
"placeholder".to_string(),
"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377".to_string(),
CHANNEL_ID.to_string(),
"https://example.com/thumb.jpg".to_string(),
0,
);
let html = r#"
<script>
var flashvars_482929735 = {
"video_title":"Brazzers Detail Title",
"image_url":"https://example.com/detail.jpg",
"video_duration":"930",
"link_url":"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377",
"mediaDefinitions":[
{"format":"hls","quality":"240","videoUrl":"https://cdn.example.com/master-240.m3u8"},
{"format":"hls","quality":"720","videoUrl":"https://cdn.example.com/master-720.m3u8"},
{"format":"mp4","quality":"720","videoUrl":"https://cdn.example.com/video.mp4","remote":true}
]
};
</script>
<script type="application/ld+json">
{
"@type":"VideoObject",
"thumbnailUrl":"https://example.com/ld-thumb.jpg",
"uploadDate":"2026-04-03T00:00:00+00:00",
"interactionCount":"5700",
"author":{"name":"Brazzers"}
}
</script>
<div class="categoriesWrapper">
<a class="item" href="/categories/big-tits">Big Tits</a>
</div>
<div class="tagsWrapper">
<a class="item" href="/video/search?search=maid">Maid</a>
</div>
<ul id="videoCategory" class="nf-videos videos search-video-thumbs">
<li class="pcVideoListItem js-pop videoblock videoBox withKebabMenu"
data-video-id="466705435"
data-video-vkey="67ed937c986b1">
<a href="/view_video.php?viewkey=67ed937c986b1" title="Black asian teen"></a>
<img data-src="https://example.com/thumb.jpg"
data-mediabook="https://example.com/preview.webm" />
<div class="marker-overlays"><var class="duration">12:18</var></div>
<div class="videoDetailsBlock">
<span class="views"><var>199K</var> views</span>
<span class="value">95%</span>
</div>
<a href="/categories/anal">Anal</a>
<a href="/pornstar/jane-doe">Jane Doe</a>
</li>
</ul>
"#;
let item = provider
.apply_detail_video(item, html)
.expect("detail page should enrich item");
assert_eq!(item.title, "Brazzers Detail Title");
assert_eq!(item.thumb, "https://example.com/ld-thumb.jpg");
assert_eq!(item.duration, 930);
assert_eq!(item.views, Some(5700));
assert_eq!(item.uploader.as_deref(), Some("Brazzers"));
assert!(item.uploadedAt.is_some());
assert_eq!(item.formats.as_ref().map(|values| values.len()), Some(2));
assert!(item.tags.as_ref().is_some_and(|values| values
.iter()
.any(|value| value == "Big Tits")));
assert!(item.tags.as_ref().is_some_and(|values| values
.iter()
.any(|value| value == "Maid")));
let items = provider
.parse_listing_page(html, ListingScope::Browse)
.expect("browse listing should parse");
assert_eq!(items.len(), 1);
assert_eq!(items[0].thumb, "https://example.com/thumb.jpg");
assert_eq!(items[0].preview.as_deref(), Some("https://example.com/preview.webm"));
assert_eq!(items[0].views, Some(199000));
assert_eq!(items[0].rating, Some(95.0));
assert!(items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "Anal")));
assert!(items[0]
.tags
.as_ref()
.is_some_and(|values| values.iter().any(|value| value == "Jane Doe")));
}
}

View File

@@ -10,7 +10,6 @@ use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
pub mod doodstream;
pub mod hanimecdn;
pub mod hqpornerthumb;
pub mod pornhubthumb;
pub mod javtiful;
pub mod noodlemagazine;
pub mod pimpbunny;
@@ -18,6 +17,7 @@ pub mod pimpbunnythumb;
pub mod porndish;
pub mod porndishthumb;
pub mod pornhd3x;
pub mod pornhubthumb;
pub mod shooshtime;
pub mod spankbang;
pub mod sxyprn;

View File

@@ -1,51 +1,220 @@
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use ntex::{
http::Response,
web::{self, HttpRequest, error},
};
use ntex::web::{self, HttpRequest};
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;
use crate::util::requester::Requester;
const PORNHUB_ROOT: &str = "https://www.pornhub.com/";
fn endpoint_to_page_url(req: &HttpRequest) -> String {
let endpoint = req.match_info().query("endpoint").trim_start_matches('/');
let mut page_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint.to_string()
} else {
format!("https://{endpoint}")
};
let query = req.query_string();
if !query.is_empty() && !page_url.contains('?') {
page_url.push('?');
page_url.push_str(query);
}
page_url
}
fn is_allowed_video_page_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if host != "pornhub.com" && host != "www.pornhub.com" && !host.ends_with(".pornhub.com") {
return false;
}
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
}
fn normalize_candidate_url(candidate: &str, page_url: &Url) -> Option<String> {
if candidate.is_empty() {
return None;
}
if candidate.starts_with("//") {
return Some(format!("https:{candidate}"));
}
if candidate.starts_with("https://") || candidate.starts_with("http://") {
return Some(candidate.to_string());
}
if candidate.starts_with('/') {
let host = page_url.host_str()?;
return Some(format!("{}://{}{}", page_url.scheme(), host, candidate));
}
None
}
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
let allowed_host = host == "pornhub.com"
|| host == "www.pornhub.com"
|| host.ends_with(".pornhub.com")
|| host.ends_with(".phncdn.com");
if !allowed_host {
return false;
}
let path = url.path().to_ascii_lowercase();
[".jpg", ".jpeg", ".png", ".webp", ".avif"]
.iter()
.any(|ext| path.ends_with(ext))
}
fn decode_js_string(value: &str) -> String {
value
.replace("\\/", "/")
.replace("\\u002F", "/")
.replace("\\u003A", ":")
}
fn find_thumb_in_html(html: &str, page_url: &Url) -> Option<String> {
let document = Html::parse_document(html);
let selector = Selector::parse(
"meta[property=\"og:image\"], meta[name=\"twitter:image\"], meta[itemprop=\"thumbnailUrl\"]",
)
.ok()?;
for meta in document.select(&selector) {
let value = meta.value().attr("content").unwrap_or_default().trim();
if let Some(candidate) = normalize_candidate_url(value, page_url) {
if is_allowed_thumb_url(&candidate) {
return Some(candidate);
}
}
}
let image_url_re = Regex::new(r#""image_url"\s*:\s*"([^"]+)""#).ok()?;
if let Some(captures) = image_url_re.captures(html) {
let raw = captures
.get(1)
.map(|value| value.as_str())
.unwrap_or_default();
let decoded = decode_js_string(raw);
if let Some(candidate) = normalize_candidate_url(&decoded, page_url) {
if is_allowed_thumb_url(&candidate) {
return Some(candidate);
}
}
}
None
}
pub async fn get_image(
req: HttpRequest,
requester: web::types::State<Requester>,
) -> Result<impl web::Responder, web::Error> {
let endpoint = req.match_info().query("endpoint").to_string();
let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
endpoint
} else {
format!("https://{}", endpoint.trim_start_matches('/'))
};
let page_url = endpoint_to_page_url(&req);
if !is_allowed_video_page_url(&page_url) {
return Ok(web::HttpResponse::BadRequest().finish());
}
let upstream = match requester
.get_ref()
.clone()
.get_raw_with_headers(
image_url.as_str(),
vec![("Referer".to_string(), "https://www.pornhub.com/".to_string())],
let mut requester = requester.get_ref().clone();
let html = match requester
.get_with_headers(
page_url.as_str(),
vec![("Referer".to_string(), PORNHUB_ROOT.to_string())],
None,
)
.await
{
Ok(response) => response,
Ok(value) => value,
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
};
let status = upstream.status();
let headers = upstream.headers().clone();
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
let parsed_page_url = match Url::parse(&page_url) {
Ok(value) => value,
Err(_) => return Ok(web::HttpResponse::BadRequest().finish()),
};
let mut resp = Response::build(status);
let Some(image_url) = find_thumb_in_html(&html, &parsed_page_url) else {
return Ok(web::HttpResponse::NotFound().finish());
};
if let Some(ct) = headers.get(CONTENT_TYPE) {
if let Ok(ct_str) = ct.to_str() {
resp.set_header(CONTENT_TYPE, ct_str);
}
}
if let Some(cl) = headers.get(CONTENT_LENGTH) {
if let Ok(cl_str) = cl.to_str() {
resp.set_header(CONTENT_LENGTH, cl_str);
}
}
Ok(resp.body(bytes.to_vec()))
Ok(web::HttpResponse::Found()
.header("Location", image_url)
.finish())
}
#[cfg(test)]
mod tests {
use super::{
decode_js_string, is_allowed_thumb_url, is_allowed_video_page_url, normalize_candidate_url,
};
use url::Url;
#[test]
fn validates_allowed_video_pages() {
assert!(is_allowed_video_page_url(
"https://www.pornhub.com/view_video.php?viewkey=abc123"
));
assert!(is_allowed_video_page_url(
"https://www.pornhub.com/video/search?search=test"
));
assert!(!is_allowed_video_page_url(
"https://example.com/view_video.php?viewkey=abc123"
));
assert!(!is_allowed_video_page_url(
"http://www.pornhub.com/view_video.php?viewkey=abc123"
));
}
#[test]
fn validates_allowed_thumb_hosts_and_extensions() {
assert!(is_allowed_thumb_url(
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/(m=eafTGgaaaa)(mh=abc123)1.jpg"
));
assert!(is_allowed_thumb_url(
"https://www.pornhub.com/webmasters/thumb.webp"
));
assert!(!is_allowed_thumb_url("https://example.com/thumb.jpg"));
assert!(!is_allowed_thumb_url(
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/manifest.m3u8"
));
}
#[test]
fn normalizes_protocol_relative_and_root_relative_urls() {
let page_url = Url::parse("https://www.pornhub.com/view_video.php?viewkey=abc").unwrap();
let protocol_relative =
normalize_candidate_url("//pix-cdn77.phncdn.com/thumb.jpg", &page_url);
assert_eq!(
protocol_relative.as_deref(),
Some("https://pix-cdn77.phncdn.com/thumb.jpg")
);
let root_relative = normalize_candidate_url("/assets/thumb.jpg", &page_url);
assert_eq!(
root_relative.as_deref(),
Some("https://www.pornhub.com/assets/thumb.jpg")
);
}
#[test]
fn decodes_js_escaped_urls() {
assert_eq!(
decode_js_string(r#"https:\/\/pix-cdn77.phncdn.com\/thumb.jpg"#),
"https://pix-cdn77.phncdn.com/thumb.jpg"
);
}
}