use ntex::web::{self, HttpRequest}; use regex::Regex; use scraper::{Html, Selector}; use url::Url; use crate::util::requester::Requester; const PORNHUB_ROOT: &str = "https://www.pornhub.com/"; fn endpoint_to_page_url(req: &HttpRequest) -> String { let endpoint = req.match_info().query("endpoint").trim_start_matches('/'); let mut page_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { endpoint.to_string() } else { format!("https://{endpoint}") }; let query = req.query_string(); if !query.is_empty() && !page_url.contains('?') { page_url.push('?'); page_url.push_str(query); } page_url } fn is_allowed_video_page_url(url: &str) -> bool { let Some(url) = Url::parse(url).ok() else { return false; }; if url.scheme() != "https" { return false; } let Some(host) = url.host_str() else { return false; }; if host != "pornhub.com" && host != "www.pornhub.com" && !host.ends_with(".pornhub.com") { return false; } url.path().starts_with("/view_video.php") || url.path().starts_with("/video/") } fn normalize_candidate_url(candidate: &str, page_url: &Url) -> Option { if candidate.is_empty() { return None; } if candidate.starts_with("//") { return Some(format!("https:{candidate}")); } if candidate.starts_with("https://") || candidate.starts_with("http://") { return Some(candidate.to_string()); } if candidate.starts_with('/') { let host = page_url.host_str()?; return Some(format!("{}://{}{}", page_url.scheme(), host, candidate)); } None } fn is_allowed_thumb_url(url: &str) -> bool { let Some(url) = Url::parse(url).ok() else { return false; }; if url.scheme() != "https" { return false; } let Some(host) = url.host_str() else { return false; }; let allowed_host = host == "pornhub.com" || host == "www.pornhub.com" || host.ends_with(".pornhub.com") || host.ends_with(".phncdn.com"); if !allowed_host { return false; } let path = url.path().to_ascii_lowercase(); [".jpg", ".jpeg", ".png", ".webp", ".avif"] .iter() .any(|ext| path.ends_with(ext)) } fn decode_js_string(value: &str) -> String { value .replace("\\/", "/") .replace("\\u002F", "/") .replace("\\u003A", ":") } fn find_thumb_in_html(html: &str, page_url: &Url) -> Option { let document = Html::parse_document(html); let selector = Selector::parse( "meta[property=\"og:image\"], meta[name=\"twitter:image\"], meta[itemprop=\"thumbnailUrl\"]", ) .ok()?; for meta in document.select(&selector) { let value = meta.value().attr("content").unwrap_or_default().trim(); if let Some(candidate) = normalize_candidate_url(value, page_url) { if is_allowed_thumb_url(&candidate) { return Some(candidate); } } } let image_url_re = Regex::new(r#""image_url"\s*:\s*"([^"]+)""#).ok()?; if let Some(captures) = image_url_re.captures(html) { let raw = captures .get(1) .map(|value| value.as_str()) .unwrap_or_default(); let decoded = decode_js_string(raw); if let Some(candidate) = normalize_candidate_url(&decoded, page_url) { if is_allowed_thumb_url(&candidate) { return Some(candidate); } } } None } pub async fn get_image( req: HttpRequest, requester: web::types::State, ) -> Result { let page_url = endpoint_to_page_url(&req); if !is_allowed_video_page_url(&page_url) { return Ok(web::HttpResponse::BadRequest().finish()); } let mut requester = requester.get_ref().clone(); let html = match requester .get_with_headers( page_url.as_str(), vec![("Referer".to_string(), PORNHUB_ROOT.to_string())], None, ) .await { Ok(value) => value, Err(_) => return Ok(web::HttpResponse::NotFound().finish()), }; let parsed_page_url = match Url::parse(&page_url) { Ok(value) => value, Err(_) => return Ok(web::HttpResponse::BadRequest().finish()), }; let Some(image_url) = find_thumb_in_html(&html, &parsed_page_url) else { return Ok(web::HttpResponse::NotFound().finish()); }; Ok(web::HttpResponse::Found() .header("Location", image_url) .finish()) } #[cfg(test)] mod tests { use super::{ decode_js_string, is_allowed_thumb_url, is_allowed_video_page_url, normalize_candidate_url, }; use url::Url; #[test] fn validates_allowed_video_pages() { assert!(is_allowed_video_page_url( "https://www.pornhub.com/view_video.php?viewkey=abc123" )); assert!(is_allowed_video_page_url( "https://www.pornhub.com/video/search?search=test" )); assert!(!is_allowed_video_page_url( "https://example.com/view_video.php?viewkey=abc123" )); assert!(!is_allowed_video_page_url( "http://www.pornhub.com/view_video.php?viewkey=abc123" )); } #[test] fn validates_allowed_thumb_hosts_and_extensions() { assert!(is_allowed_thumb_url( "https://pix-cdn77.phncdn.com/videos/2026/04/01/1/(m=eafTGgaaaa)(mh=abc123)1.jpg" )); assert!(is_allowed_thumb_url( "https://www.pornhub.com/webmasters/thumb.webp" )); assert!(!is_allowed_thumb_url("https://example.com/thumb.jpg")); assert!(!is_allowed_thumb_url( "https://pix-cdn77.phncdn.com/videos/2026/04/01/1/manifest.m3u8" )); } #[test] fn normalizes_protocol_relative_and_root_relative_urls() { let page_url = Url::parse("https://www.pornhub.com/view_video.php?viewkey=abc").unwrap(); let protocol_relative = normalize_candidate_url("//pix-cdn77.phncdn.com/thumb.jpg", &page_url); assert_eq!( protocol_relative.as_deref(), Some("https://pix-cdn77.phncdn.com/thumb.jpg") ); let root_relative = normalize_candidate_url("/assets/thumb.jpg", &page_url); assert_eq!( root_relative.as_deref(), Some("https://www.pornhub.com/assets/thumb.jpg") ); } #[test] fn decodes_js_escaped_urls() { assert_eq!( decode_js_string(r#"https:\/\/pix-cdn77.phncdn.com\/thumb.jpg"#), "https://pix-cdn77.phncdn.com/thumb.jpg" ); } }