From 78e852c29d8f762836ab2f29e7cf83e1aad4fdc0 Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 5 Apr 2026 15:53:16 +0000 Subject: [PATCH] pornhub fix --- src/providers/pornhub.rs | 157 ++++++++++++++++++++++++++++++++---- src/proxies/mod.rs | 1 + src/proxies/pornhubthumb.rs | 51 ++++++++++++ src/proxy.rs | 5 ++ 4 files changed, 197 insertions(+), 17 deletions(-) create mode 100644 src/proxies/pornhubthumb.rs diff --git a/src/providers/pornhub.rs b/src/providers/pornhub.rs index 49e3113..f8429aa 100644 --- a/src/providers/pornhub.rs +++ b/src/providers/pornhub.rs @@ -2,6 +2,7 @@ use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ Provider, report_provider_error, report_provider_error_background, requester_or_default, + build_proxy_url, strip_url_scheme, }; use crate::status::*; use crate::util::cache::VideoCache; @@ -18,6 +19,9 @@ use regex::Regex; use scraper::{ElementRef, Html, Selector}; use serde_json::Value; use std::collections::HashSet; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; +use std::thread; use url::Url; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = @@ -48,6 +52,7 @@ error_chain! { #[derive(Debug, Clone)] pub struct PornhubProvider { url: String, + tag_map: Arc>>, } #[derive(Debug, Clone, Copy)] @@ -71,6 +76,13 @@ struct QueryTarget { slug: String, } +#[derive(Debug, Clone)] +struct TagInfo { + kind: QueryTargetKind, + slug: String, + title: String, +} + impl QueryTargetKind { fn path_segment(self) -> &'static str { match self { @@ -84,8 +96,69 @@ impl QueryTargetKind { impl PornhubProvider { pub fn new() -> Self { - Self { + let provider = Self { url: BASE_URL.to_string(), + tag_map: Arc::new(RwLock::new(HashMap::new())), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let tag_map = Arc::clone(&self.tag_map); + thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rt.block_on(async { + let _ = Self::load_tags(&url, tag_map).await; + }); + }); + } + + async fn load_tags(base_url: &str, tag_map: Arc>>) -> Result<()> { + Self::load_kind(base_url, "channel", QueryTargetKind::Channel, &tag_map).await?; + Self::load_kind(base_url, "pornstar", QueryTargetKind::Pornstar, &tag_map).await?; + Self::load_kind(base_url, "model", QueryTargetKind::Model, &tag_map).await?; + Self::load_kind(base_url, "user", QueryTargetKind::User, &tag_map).await?; + Ok(()) + } + + async fn load_kind(base_url: &str, path_segment: &str, kind: QueryTargetKind, tag_map: &Arc>>) -> Result<()> { + let url = format!("{}/{}/top", base_url, path_segment); + let mut requester = crate::util::requester::Requester::new(); + let body = requester.get(&url, None).await.map_err(|e| Error::from(ErrorKind::Parse(format!("http request failed: {e}"))))?; + let document = Html::parse_document(&body); + let selector = Self::selector(&format!("a[href^='/{}/']", path_segment))?; + for element in document.select(&selector) { + if let Some(href) = element.attr("href") { + if let Some(slug) = Self::slug_from_url(href, path_segment) { + let title = element.text().collect::().trim().to_string(); + if !title.is_empty() && !slug.is_empty() { + let info = TagInfo { + kind, + slug: slug.clone(), + title: title.clone(), + }; + let mut map = tag_map.write().unwrap(); + map.insert(title.to_ascii_lowercase(), info.clone()); + map.insert(slug.to_ascii_lowercase(), info); + } + } + } + } + Ok(()) + } + + fn slug_from_url(url: &str, path_segment: &str) -> Option { + let parsed = Url::parse(url).ok()?; + let mut segments = parsed.path_segments()?; + if segments.next() == Some(path_segment) { + segments.next().map(|s| s.to_string()) + } else { + None } } @@ -140,12 +213,12 @@ impl PornhubProvider { fn selector(value: &str) -> Result { Selector::parse(value) - .map_err(|error| ErrorKind::Parse(format!("selector parse failed for {value}: {error}")).into()) + .map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}")))) } fn regex(value: &str) -> Result { Regex::new(value) - .map_err(|error| ErrorKind::Parse(format!("regex parse failed for {value}: {error}")).into()) + .map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}")))) } fn text_of(element: &ElementRef<'_>) -> String { @@ -191,10 +264,14 @@ impl PornhubProvider { } } - fn parse_query_target(query: &str) -> Option { + fn parse_query_target(&self, query: &str) -> Option { + let normalized = query.trim().to_ascii_lowercase(); + if let Some(info) = self.tag_map.read().unwrap().get(&normalized) { + return Some(QueryTarget { kind: info.kind, slug: info.slug.clone() }); + } + // Fallback to kind:slug without @ let trimmed = query.trim(); - let trimmed = trimmed.strip_prefix('@')?; - let (kind, raw_slug) = trimmed.split_once(':')?; + let (kind_str, raw_slug) = trimmed.split_once(':')?; let slug = raw_slug .trim() .trim_matches('/') @@ -204,7 +281,7 @@ impl PornhubProvider { return None; } - let kind = match kind.trim().to_ascii_lowercase().as_str() { + let kind = match kind_str.trim().to_ascii_lowercase().as_str() { "channel" | "channels" => QueryTargetKind::Channel, "pornstar" | "pornstars" => QueryTargetKind::Pornstar, "model" | "models" => QueryTargetKind::Model, @@ -257,7 +334,7 @@ impl PornhubProvider { fn build_listing_request(&self, page: u8, sort: &str, query: Option<&str>) -> (String, ListingScope) { match query.map(str::trim).filter(|value| !value.is_empty()) { Some(query) => { - if let Some(target) = Self::parse_query_target(query) { + if let Some(target) = self.parse_query_target(query) { (self.build_creator_url(page, sort, &target), ListingScope::Creator) } else { let encoded = query.to_ascii_lowercase().replace(' ', "+"); @@ -413,7 +490,7 @@ impl PornhubProvider { let mut tags = Vec::new(); if let Some(tag) = uploader_url .as_deref() - .and_then(Self::query_tag_from_uploader_url) + .and_then(|url| self.query_tag_from_uploader_url(url)) { tags.push(tag); } @@ -446,15 +523,19 @@ impl PornhubProvider { Some(format!("{CHANNEL_ID}:{kind}:{slug}")) } - fn query_tag_from_uploader_url(url: &str) -> Option { + fn query_tag_from_uploader_url(&self, url: &str) -> Option { let parsed = Url::parse(url).ok()?; let mut segments = parsed.path_segments()?; - let kind = segments.next()?.trim_matches('/'); + let kind_str = segments.next()?.trim_matches('/'); let slug = segments.next()?.trim_matches('/'); - if kind.is_empty() || slug.is_empty() { + if kind_str.is_empty() || slug.is_empty() { return None; } - Some(format!("@{kind}:{}", slug.replace('-', " "))) + let normalized_slug = slug.to_ascii_lowercase(); + if let Some(info) = self.tag_map.read().unwrap().get(&normalized_slug) { + return Some(info.title.clone()); + } + Some(slug.replace('-', " ")) } fn push_unique(values: &mut Vec, value: String) { @@ -802,13 +883,54 @@ impl PornhubProvider { return Ok(old_items); } - let items = self.enrich_listing_items(items, &options).await; + let mut items = self.enrich_listing_items(items, &options).await; + + // Rewrite thumbs and previews to use the proxy when appropriate + for item in items.iter_mut() { + let proxied = self.proxied_thumb(&options, &item.thumb); + if !proxied.is_empty() { + item.thumb = proxied; + } + if let Some(prev) = item.preview.clone() { + let proxied_prev = self.proxied_thumb(&options, &prev); + if !proxied_prev.is_empty() { + item.preview = Some(proxied_prev); + } + } + } + cache.remove(&video_url); cache.insert(video_url, items.clone()); Ok(items) } } +impl PornhubProvider { + fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String { + if thumb.is_empty() { + return String::new(); + } + if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) { + return String::new(); + } + build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb)) + } +} + +struct PornhubThumbPolicy; + +impl PornhubThumbPolicy { + fn is_allowed_thumb_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { return false; }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { return false; }; + // Allow official Pornhub hosts and their CDN hosts + host.contains("phncdn") || host.ends_with("pornhub.com") + } +} + #[async_trait] impl Provider for PornhubProvider { async fn get_videos( @@ -850,17 +972,18 @@ mod tests { #[test] fn parses_creator_queries() { - let target = PornhubProvider::parse_query_target("@channels:Brazzers") + let provider = PornhubProvider::new(); + let target = provider.parse_query_target("channels:Brazzers") .expect("channel target should parse"); assert!(matches!(target.kind, QueryTargetKind::Channel)); assert_eq!(target.slug, "brazzers"); - let target = PornhubProvider::parse_query_target("@pornstar:Alex Mack") + let target = provider.parse_query_target("pornstar:Alex Mack") .expect("pornstar target should parse"); assert!(matches!(target.kind, QueryTargetKind::Pornstar)); assert_eq!(target.slug, "alex-mack"); - assert!(PornhubProvider::parse_query_target("teacher").is_none()); + assert!(provider.parse_query_target("teacher").is_none()); } #[test] diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 0cc673c..332fcb8 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -10,6 +10,7 @@ use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester}; pub mod doodstream; pub mod hanimecdn; pub mod hqpornerthumb; +pub mod pornhubthumb; pub mod javtiful; pub mod noodlemagazine; pub mod pimpbunny; diff --git a/src/proxies/pornhubthumb.rs b/src/proxies/pornhubthumb.rs new file mode 100644 index 0000000..f30dbe7 --- /dev/null +++ b/src/proxies/pornhubthumb.rs @@ -0,0 +1,51 @@ +use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE}; +use ntex::{ + http::Response, + web::{self, HttpRequest, error}, +}; + +use crate::util::requester::Requester; + +pub async fn get_image( + req: HttpRequest, + requester: web::types::State, +) -> Result { + let endpoint = req.match_info().query("endpoint").to_string(); + let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { + endpoint + } else { + format!("https://{}", endpoint.trim_start_matches('/')) + }; + + let upstream = match requester + .get_ref() + .clone() + .get_raw_with_headers( + image_url.as_str(), + vec![("Referer".to_string(), "https://www.pornhub.com/".to_string())], + ) + .await + { + Ok(response) => response, + Err(_) => return Ok(web::HttpResponse::NotFound().finish()), + }; + + let status = upstream.status(); + let headers = upstream.headers().clone(); + let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?; + + let mut resp = Response::build(status); + + if let Some(ct) = headers.get(CONTENT_TYPE) { + if let Ok(ct_str) = ct.to_str() { + resp.set_header(CONTENT_TYPE, ct_str); + } + } + if let Some(cl) = headers.get(CONTENT_LENGTH) { + if let Ok(cl_str) = cl.to_str() { + resp.set_header(CONTENT_LENGTH, cl_str); + } + } + + Ok(resp.body(bytes.to_vec())) +} diff --git a/src/proxy.rs b/src/proxy.rs index 8d8354b..9b34069 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -76,6 +76,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::post().to(crate::proxies::porndishthumb::get_image)) .route(web::get().to(crate::proxies::porndishthumb::get_image)), ); + cfg.service( + web::resource("/pornhub-thumb/{endpoint}*") + .route(web::post().to(crate::proxies::pornhubthumb::get_image)) + .route(web::get().to(crate::proxies::pornhubthumb::get_image)), + ); cfg.service( web::resource("/pimpbunny-thumb/{endpoint}*") .route(web::post().to(crate::proxies::pimpbunnythumb::get_image))