From 0c11959d9438d58ae295298217ecb5e5e3883749 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 17 Mar 2026 00:24:29 +0000 Subject: [PATCH] porndish --- Dockerfile | 3 +- src/providers/mod.rs | 5 + src/providers/porndish.rs | 1303 ++++++++++++++++++++++++++++++++++ src/proxies/mod.rs | 1 + src/proxies/porndishthumb.rs | 80 +++ src/proxy.rs | 5 + 6 files changed, 1396 insertions(+), 1 deletion(-) create mode 100644 src/providers/porndish.rs create mode 100644 src/proxies/porndishthumb.rs diff --git a/Dockerfile b/Dockerfile index 8fb2179..a58d10e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,5 +28,6 @@ RUN apt install -yq libssl-dev \ sudo \ && apt-get clean -USER 1000 +RUN python3 -m pip install --break-system-packages --no-cache-dir curl_cffi +USER 1000 diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 2d7d499..9d74d27 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -34,6 +34,7 @@ pub mod omgxxx; pub mod paradisehill; pub mod porn00; pub mod porn4fans; +pub mod porndish; pub mod pornzog; pub mod shooshtime; pub mod sxyprn; @@ -134,6 +135,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "porn4fans", Arc::new(porn4fans::Porn4fansProvider::new()) as DynProvider, ); + m.insert( + "porndish", + Arc::new(porndish::PorndishProvider::new()) as DynProvider, + ); m.insert( "shooshtime", Arc::new(shooshtime::ShooshtimeProvider::new()) as DynProvider, diff --git a/src/providers/porndish.rs b/src/providers/porndish.rs new file mode 100644 index 0000000..51d6d29 --- /dev/null +++ b/src/providers/porndish.rs @@ -0,0 +1,1303 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, build_proxy_url, report_provider_error, report_provider_error_background, + requester_or_default, strip_url_scheme, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::videos::{ServerOptions, VideoEmbed, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::DateTime; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use std::process::Command; +use std::sync::{Arc, RwLock}; +use std::thread; +use std::time::{SystemTime, UNIX_EPOCH}; +use url::Url; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + Json(serde_json::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct PorndishProvider { + url: String, + sites: Arc>>, + tags: Arc>>, + uploaders: Arc>>, +} + +impl PorndishProvider { + pub fn new() -> Self { + let provider = Self { + url: "https://www.porndish.com".to_string(), + sites: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![])), + uploaders: Arc::new(RwLock::new(vec![])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let base_url = self.url.clone(); + let sites = Arc::clone(&self.sites); + let tags = Arc::clone(&self.tags); + let uploaders = Arc::clone(&self.uploaders); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + "porndish", + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_filters(&base_url, sites, tags, uploaders).await { + report_provider_error_background( + "porndish", + "spawn_initial_load.load_filters", + &error.to_string(), + ); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let sites = self + .sites + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: "porndish".to_string(), + name: "Porndish".to_string(), + description: "Porndish archive pages, tags, and source studios.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=porndish.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse the latest, popular, hot, or trending archives." + .to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Newest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Popular".to_string(), + }, + FilterOption { + id: "hot".to_string(), + title: "Hot".to_string(), + }, + FilterOption { + id: "trending".to_string(), + title: "Trending".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Sites".to_string(), + description: "Browse a Porndish source archive directly.".to_string(), + systemImage: "network".to_string(), + colorName: "purple".to_string(), + options: sites, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn text_of(element: &ElementRef<'_>) -> String { + Self::decode_html(&Self::collapse_whitespace( + &element.text().collect::>().join(" "), + )) + } + + fn normalize_title(title: &str) -> String { + title + .trim() + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() + } + + fn slug_to_title(slug: &str) -> String { + slug.split('-') + .filter(|value| !value.is_empty()) + .map(|value| { + let mut chars = value.chars(); + match chars.next() { + Some(first) => format!( + "{}{}", + first.to_ascii_uppercase(), + chars.collect::() + ), + None => String::new(), + } + }) + .collect::>() + .join(" ") + } + + fn normalize_url(&self, url: &str) -> String { + if url.is_empty() { + return String::new(); + } + if url.starts_with("http://") || url.starts_with("https://") { + return url.to_string(); + } + if url.starts_with("//") { + return format!("https:{url}"); + } + if url.starts_with('/') { + return format!("{}{}", self.url, url); + } + format!("{}/{}", self.url, url.trim_start_matches("./")) + } + + fn parse_url(url: &str) -> Option { + Url::parse(url).ok() + } + + fn is_porndish_host(host: &str) -> bool { + matches!(host, "www.porndish.com" | "porndish.com") + } + + fn is_myvidplay_host(host: &str) -> bool { + matches!(host, "myvidplay.com" | "www.myvidplay.com") + } + + fn is_allowed_list_url(url: &str) -> bool { + let Some(url) = Self::parse_url(url) else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + if !Self::is_porndish_host(host) { + return false; + } + let path = url.path(); + path == "/" + || path.starts_with("/page/") + || path.starts_with("/popular/") + || path.starts_with("/hot/") + || path.starts_with("/trending/") + || path.starts_with("/search/") + || path.starts_with("/videos2/") + || path.starts_with("/video2/") + } + + fn is_allowed_detail_url(url: &str) -> bool { + let Some(url) = Self::parse_url(url) else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + Self::is_porndish_host(host) && url.path().starts_with("/porn/") + } + + fn is_allowed_myvidplay_iframe_url(url: &str) -> bool { + let Some(url) = Self::parse_url(url) else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + Self::is_myvidplay_host(host) && url.path().starts_with("/e/") + } + + fn is_allowed_myvidplay_pass_url(url: &str) -> bool { + let Some(url) = Self::parse_url(url) else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + Self::is_myvidplay_host(host) && url.path().starts_with("/pass_md5/") + } + + fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String { + if thumb.is_empty() { + return String::new(); + } + if !PorndishThumbPolicy::is_allowed_thumb_url(thumb) { + return String::new(); + } + build_proxy_url(options, "porndish-thumb", &strip_url_scheme(thumb)) + } + + fn push_unique(target: &Arc>>, item: FilterOption) { + if let Ok(mut values) = target.write() { + if !values.iter().any(|value| value.id == item.id) { + values.push(item); + } + } + } + + fn add_tag_filter(&self, slug: &str, title: &str) { + let clean_slug = slug.trim_matches('/').trim(); + let clean_title = title.trim(); + if clean_slug.is_empty() || clean_title.is_empty() { + return; + } + Self::push_unique( + &self.tags, + FilterOption { + id: format!("{}/video2/{clean_slug}/", self.url), + title: clean_title.to_string(), + }, + ); + } + + fn add_uploader_filter(&self, url: &str, title: &str) { + let clean_title = title.trim(); + if url.is_empty() || clean_title.is_empty() || !Self::is_allowed_list_url(url) { + return; + } + Self::push_unique( + &self.uploaders, + FilterOption { + id: url.to_string(), + title: clean_title.to_string(), + }, + ); + } + + async fn fetch_with_curl_cffi(url: &str, referer: Option<&str>) -> Result { + let url = url.to_string(); + let referer = referer.unwrap_or("").to_string(); + + let output = tokio::task::spawn_blocking(move || { + Command::new("python3") + .arg("-c") + .arg( + r#" +import sys +from curl_cffi import requests + +url = sys.argv[1] +referer = sys.argv[2] if len(sys.argv) > 2 else "" +headers = {} +if referer: + headers["Referer"] = referer +response = requests.get( + url, + impersonate="chrome", + timeout=30, + allow_redirects=True, + headers=headers, +) +if response.status_code >= 400: + sys.stderr.write(f"status={response.status_code} url={response.url}\n") + sys.exit(1) +sys.stdout.buffer.write(response.content) +"#, + ) + .arg(url) + .arg(referer) + .output() + }) + .await + .map_err(|error| Error::from(format!("spawn_blocking failed: {error}")))? + .map_err(|error| Error::from(format!("python3 execution failed: {error}")))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + return Err(Error::from(format!("curl_cffi request failed: {stderr}"))); + } + + Ok(String::from_utf8_lossy(&output.stdout).to_string()) + } + + async fn fetch_html(url: &str, referer: Option<&str>) -> Result { + Self::fetch_with_curl_cffi(url, referer).await + } + + async fn load_filters( + base_url: &str, + sites: Arc>>, + tags: Arc>>, + uploaders: Arc>>, + ) -> Result<()> { + let link_selector = Self::selector("a[href]")?; + let article_selector = Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy, article.post")?; + let pages = vec![ + format!("{base_url}/"), + format!("{base_url}/page/2/"), + format!("{base_url}/popular/"), + format!("{base_url}/hot/"), + format!("{base_url}/trending/"), + ]; + + for url in pages { + let html = match Self::fetch_html(&url, Some(base_url)).await { + Ok(html) => html, + Err(error) => { + report_provider_error_background( + "porndish", + "load_filters.fetch_html", + &format!("url={url}; error={error}"), + ); + continue; + } + }; + + let document = Html::parse_document(&html); + + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let normalized = href.trim_end_matches('/'); + let prefix = format!("{base_url}/videos2/"); + if !normalized.starts_with(&prefix) { + continue; + } + + let remainder = normalized.strip_prefix(&prefix).unwrap_or_default(); + if remainder.is_empty() || remainder.contains("/page/") { + continue; + } + + let title = Self::text_of(&link); + if title.is_empty() { + continue; + } + + let item = FilterOption { + id: format!("{normalized}/"), + title: title.clone(), + }; + Self::push_unique(&sites, item.clone()); + Self::push_unique(&uploaders, item); + } + + for article in document.select(&article_selector) { + let Some(classes) = article.value().attr("class") else { + continue; + }; + + for class_name in classes.split_whitespace() { + if let Some(slug) = class_name.strip_prefix("tag-") { + if slug.is_empty() || slug == "format-video" { + continue; + } + Self::push_unique( + &tags, + FilterOption { + id: format!("{base_url}/video2/{slug}/"), + title: Self::slug_to_title(slug), + }, + ); + } + } + } + } + + Ok(()) + } + + fn parse_duration(text: &str) -> u32 { + let parts = text + .trim() + .split(':') + .filter_map(|value| value.parse::().ok()) + .collect::>(); + + match parts.as_slice() { + [minutes, seconds] => minutes.saturating_mul(60).saturating_add(*seconds), + [hours, minutes, seconds] => hours + .saturating_mul(3600) + .saturating_add(minutes.saturating_mul(60)) + .saturating_add(*seconds), + _ => 0, + } + } + + fn parse_views(text: &str) -> Option { + parse_abbreviated_number( + &text + .replace("Views", "") + .replace("View", "") + .replace(' ', "") + .trim() + .to_string(), + ) + } + + fn parse_uploaded_at(text: &str) -> Option { + DateTime::parse_from_rfc3339(text) + .ok() + .map(|value| value.timestamp() as u64) + } + + fn encoded_query(query: &str) -> String { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + serializer.append_pair("", query); + serializer.finish().trim_start_matches('=').to_string() + } + + fn build_top_level_url(&self, page: u32, sort: &str) -> String { + let base = match sort { + "popular" => format!("{}/popular/", self.url), + "hot" => format!("{}/hot/", self.url), + "trending" => format!("{}/trending/", self.url), + _ => format!("{}/", self.url), + }; + + if page > 1 { + format!("{base}page/{page}/") + } else { + base + } + } + + fn build_archive_page_url(base: &str, page: u32) -> String { + let base = if base.ends_with('/') { + base.to_string() + } else { + format!("{base}/") + }; + if page > 1 { + format!("{base}page/{page}/") + } else { + base + } + } + + fn build_search_url(&self, query: &str, page: u32) -> String { + let encoded = Self::encoded_query(query); + if page > 1 { + format!("{}/search/{encoded}/page/{page}/", self.url) + } else { + format!("{}/search/{encoded}/", self.url) + } + } + + fn resolve_option_target(&self, options: &ServerOptions) -> Option { + let site = options.sites.as_deref()?; + if site.is_empty() || site == "all" { + return None; + } + if !Self::is_allowed_list_url(site) { + return None; + } + Some(site.to_string()) + } + + fn match_filter(options: &[FilterOption], query: &str) -> Option { + let normalized_query = Self::normalize_title(query); + options + .iter() + .find(|value| Self::normalize_title(&value.title) == normalized_query) + .map(|value| value.id.clone()) + } + + fn resolve_query_target(&self, query: &str) -> Option { + if let Ok(uploaders) = self.uploaders.read() { + if let Some(target) = Self::match_filter(&uploaders, query) { + return Some(target); + } + } + + if let Ok(sites) = self.sites.read() { + if let Some(target) = Self::match_filter(&sites, query) { + return Some(target); + } + } + + if let Ok(tags) = self.tags.read() { + if let Some(target) = Self::match_filter(&tags, query) { + return Some(target); + } + } + + None + } + + fn parse_list_videos(&self, html: &str) -> Result> { + let document = Html::parse_document(html); + let article_selector = + Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy")?; + let title_selector = Self::selector(".entry-title a[href]")?; + let image_selector = Self::selector(".entry-featured-media img")?; + let duration_selector = Self::selector(".mace-video-duration")?; + let source_selector = Self::selector(".entry-categories a[href]")?; + let views_selector = Self::selector(".entry-views strong")?; + let time_selector = Self::selector("time.entry-date[datetime]")?; + let author_selector = Self::selector(".entry-author a[href] strong, .entry-author a[href]")?; + + let mut items = Vec::new(); + + for article in document.select(&article_selector) { + let Some(title_link) = article.select(&title_selector).next() else { + continue; + }; + + let Some(url) = title_link.value().attr("href") else { + continue; + }; + let page_url = self.normalize_url(url); + if page_url.is_empty() || !Self::is_allowed_detail_url(&page_url) { + continue; + } + + let title = Self::text_of(&title_link); + if title.is_empty() { + continue; + } + + let slug = page_url + .trim_end_matches('/') + .rsplit('/') + .next() + .unwrap_or_default() + .to_string(); + + let thumb = article + .select(&image_selector) + .next() + .and_then(|image| { + image + .value() + .attr("data-src") + .or_else(|| image.value().attr("src")) + }) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let duration = article + .select(&duration_selector) + .next() + .map(|value| Self::parse_duration(&Self::text_of(&value))) + .unwrap_or(0); + + let mut item = VideoItem::new( + slug, + title, + page_url.clone(), + "porndish".to_string(), + thumb, + duration, + ); + + if let Some(source_link) = article.select(&source_selector).next() { + let source_title = Self::text_of(&source_link); + if !source_title.is_empty() { + if let Some(source_url) = source_link.value().attr("href") { + let source_url = self.normalize_url(source_url); + item = item + .uploader(source_title.clone()) + .uploader_url(source_url.clone()); + self.add_uploader_filter(&source_url, &source_title); + } else { + item = item.uploader(source_title); + } + } + } else if let Some(author_link) = article.select(&author_selector).next() { + let author = Self::text_of(&author_link); + if !author.is_empty() { + item = item.uploader(author); + } + } + + if let Some(views) = article + .select(&views_selector) + .next() + .and_then(|value| Self::parse_views(&Self::text_of(&value))) + { + item = item.views(views); + } + + if let Some(uploaded_at) = article + .select(&time_selector) + .next() + .and_then(|time| time.value().attr("datetime")) + .and_then(Self::parse_uploaded_at) + { + item = item.uploaded_at(uploaded_at); + } + + if let Some(classes) = article.value().attr("class") { + let mut tags = Vec::new(); + for class_name in classes.split_whitespace() { + if let Some(slug) = class_name.strip_prefix("tag-") { + if slug.is_empty() || slug == "format-video" { + continue; + } + let title = Self::slug_to_title(slug); + if !title.is_empty() && !tags.iter().any(|value| value == &title) { + self.add_tag_filter(slug, &title); + tags.push(title); + } + } + } + if !tags.is_empty() { + item = item.tags(tags); + } + } + + items.push(item); + } + + Ok(items) + } + + fn extract_iframe_fragments(&self, html: &str) -> Result> { + let regex = Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#)?; + let mut fragments = Vec::new(); + + for captures in regex.captures_iter(html) { + let Some(value) = captures.get(1).map(|value| value.as_str()) else { + continue; + }; + let encoded = format!("\"{value}\""); + let decoded = serde_json::from_str::(&encoded).unwrap_or_default(); + if decoded.contains(" Result { + if !Self::is_allowed_myvidplay_iframe_url(iframe_url) { + return Err(Error::from(format!( + "blocked iframe url outside allowlist: {iframe_url}" + ))); + } + let html = Self::fetch_html(iframe_url, Some(&self.url)).await?; + let pass_regex = Self::regex(r#"\$\.get\(\s*['"](/pass_md5/[^'"]+)['"]"#)?; + let path = pass_regex + .captures(&html) + .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string())) + .ok_or_else(|| Error::from("myvidplay resolution failed: missing pass_md5 path"))?; + + let token = path + .trim_end_matches('/') + .rsplit('/') + .next() + .unwrap_or_default() + .to_string(); + if token.is_empty() { + return Err(Error::from( + "myvidplay resolution failed: missing pass_md5 token".to_string(), + )); + } + + let pass_url = if path.starts_with("http://") || path.starts_with("https://") { + path + } else { + let base = url::Url::parse(iframe_url) + .map_err(|error| Error::from(format!("invalid iframe url: {error}")))?; + base.join(&path) + .map_err(|error| Error::from(format!("invalid pass_md5 url: {error}")))? + .to_string() + }; + if !Self::is_allowed_myvidplay_pass_url(&pass_url) { + return Err(Error::from(format!( + "blocked pass_md5 url outside allowlist: {pass_url}" + ))); + } + + let base = Self::fetch_html(&pass_url, Some(iframe_url)) + .await? + .trim() + .to_string(); + if base.is_empty() || base == "RELOAD" || !base.starts_with("http") { + return Err(Error::from(format!( + "myvidplay resolution failed: unusable pass_md5 response: {}", + base.chars().take(120).collect::() + ))); + } + + let chars = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map_err(|error| Error::from(format!("time error: {error}")))? + .as_millis(); + let suffix = (0..10) + .map(|index| { + let pos = ((now + (index as u128 * 17)) % chars.len() as u128) as usize; + chars[pos] as char + }) + .collect::(); + + Ok(format!("{base}{suffix}?token={token}&expiry={now}")) + } + + fn parse_embed_source(fragment: &str) -> Result> { + let iframe_regex = Self::regex(r#"(?is)]+src="([^"]+)"[^>]*>"#)?; + Ok(iframe_regex.captures(fragment).and_then(|captures| { + captures + .get(1) + .map(|value| (fragment.to_string(), value.as_str().to_string())) + })) + } + + async fn apply_detail_video( + &self, + mut item: VideoItem, + html: &str, + page_url: &str, + options: &ServerOptions, + _requester: &mut Requester, + ) -> Result { + let ( + parsed_title, + parsed_thumb, + parsed_uploader, + parsed_uploader_url, + parsed_views, + parsed_uploaded_at, + parsed_tags, + ) = { + let document = Html::parse_document(html); + let title_selector = Self::selector("h1.entry-title")?; + let thumb_selector = Self::selector("meta[property=\"og:image\"]")?; + let category_selector = + Self::selector(".entry-categories-l a[href], .entry-categories a[href]")?; + let views_selector = Self::selector(".entry-views strong")?; + let time_selector = Self::selector("time.entry-date[datetime]")?; + let tag_selector = Self::selector(".entry-tags a[href]")?; + + let parsed_title = document + .select(&title_selector) + .next() + .map(|title| Self::text_of(&title)) + .filter(|title| !title.is_empty()); + + let parsed_thumb = document + .select(&thumb_selector) + .next() + .and_then(|meta| meta.value().attr("content")) + .map(|thumb| self.normalize_url(thumb)) + .filter(|thumb| !thumb.is_empty()); + + let (parsed_uploader, parsed_uploader_url) = document + .select(&category_selector) + .next() + .map(|category| { + let title = Self::text_of(&category); + let url = category + .value() + .attr("href") + .map(|href| self.normalize_url(href)) + .filter(|href| !href.is_empty()); + (title, url) + }) + .filter(|(title, _)| !title.is_empty()) + .map(|(title, url)| (Some(title), url)) + .unwrap_or((None, None)); + + let parsed_views = document + .select(&views_selector) + .next() + .and_then(|value| Self::parse_views(&Self::text_of(&value))); + + let parsed_uploaded_at = document + .select(&time_selector) + .next() + .and_then(|time| time.value().attr("datetime")) + .and_then(Self::parse_uploaded_at); + + let mut parsed_tags = Vec::new(); + for tag_link in document.select(&tag_selector) { + let title = Self::text_of(&tag_link); + let tag_url = tag_link + .value() + .attr("href") + .map(|href| self.normalize_url(href)) + .unwrap_or_default(); + let slug = tag_url + .trim_end_matches('/') + .rsplit('/') + .next() + .unwrap_or_default() + .to_string(); + if !title.is_empty() { + parsed_tags.push((title, slug)); + } + } + + ( + parsed_title, + parsed_thumb, + parsed_uploader, + parsed_uploader_url, + parsed_views, + parsed_uploaded_at, + parsed_tags, + ) + }; + + if let Some(title) = parsed_title { + item.title = title; + } + + if let Some(thumb) = parsed_thumb { + item.thumb = self.proxied_thumb(options, &thumb); + } + + if let Some(uploader) = parsed_uploader { + item.uploader = Some(uploader.clone()); + if let Some(uploader_url) = parsed_uploader_url { + item.uploaderUrl = Some(uploader_url.clone()); + self.add_uploader_filter(&uploader_url, &uploader); + } + } + + if let Some(views) = parsed_views { + item.views = Some(views); + } + + if let Some(uploaded_at) = parsed_uploaded_at { + item.uploadedAt = Some(uploaded_at); + } + + if !parsed_tags.is_empty() { + let mut tags = Vec::new(); + for (title, slug) in parsed_tags { + if !slug.is_empty() { + self.add_tag_filter(&slug, &title); + } + if !tags.iter().any(|value| value == &title) { + tags.push(title); + } + } + item.tags = Some(tags); + } + + for fragment in self.extract_iframe_fragments(html)? { + let Some((embed_html, iframe_url)) = Self::parse_embed_source(&fragment)? else { + continue; + }; + let iframe_url = self.normalize_url(&iframe_url); + + item.embed = Some(VideoEmbed { + html: embed_html, + source: iframe_url.clone(), + }); + + if iframe_url.contains("myvidplay.com") { + match self.resolve_myvidplay_stream(&iframe_url).await { + Ok(stream_url) => { + item.url = stream_url.clone(); + let mut format = + VideoFormat::new(stream_url.clone(), "sd".to_string(), "mp4".to_string()); + format.add_http_header("Referer".to_string(), iframe_url.clone()); + item.formats = Some(vec![format]); + } + Err(error) => { + report_provider_error_background( + "porndish", + "resolve_myvidplay_stream", + &format!("iframe_url={iframe_url}; error={error}"), + ); + item.url = page_url.to_string(); + } + } + } else { + item.url = iframe_url; + } + + break; + } + + if item.formats.is_none() && item.url != page_url { + let mut format = + VideoFormat::new(item.url.clone(), "unknown".to_string(), "mp4".to_string()); + format.add_http_header("Referer".to_string(), page_url.to_string()); + item.formats = Some(vec![format]); + } + + Ok(item) + } + + async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let mut item = item; + if !item.thumb.is_empty() { + item.thumb = self.proxied_thumb(options, &item.thumb); + } + + let page_url = if item.url.starts_with("http://") || item.url.starts_with("https://") { + item.url.clone() + } else { + return item; + }; + if !Self::is_allowed_detail_url(&page_url) { + report_provider_error_background( + "porndish", + "enrich_video.blocked_detail_url", + &format!("url={page_url}"), + ); + return item; + } + let original_item = item.clone(); + + let mut requester = match options.requester.clone() { + Some(requester) => requester, + None => Requester::new(), + }; + + let html = match Self::fetch_html(&page_url, Some(&self.url)).await { + Ok(html) => html, + Err(error) => { + report_provider_error_background( + "porndish", + "enrich_video.request", + &format!("url={page_url}; error={error}"), + ); + return item; + } + }; + + match self + .apply_detail_video(item, &html, &page_url, options, &mut requester) + .await + { + Ok(item) => item, + Err(error) => { + report_provider_error_background( + "porndish", + "enrich_video.parse", + &format!("url={page_url}; error={error}"), + ); + original_item + } + } + } + + async fn fetch_items_for_url( + &self, + cache: VideoCache, + url: String, + options: &ServerOptions, + ) -> Result> { + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.clone()); + } + } + + let _requester = requester_or_default(options, "porndish", "missing_requester"); + if !Self::is_allowed_list_url(&url) { + report_provider_error( + "porndish", + "fetch_items_for_url.blocked_url", + &format!("url={url}"), + ) + .await; + return Ok(vec![]); + } + + let html = match Self::fetch_html(&url, Some(&self.url)).await { + Ok(html) => html, + Err(error) => { + report_provider_error( + "porndish", + "fetch_items_for_url.request", + &format!("url={url}; error={error}"), + ) + .await; + return Ok(vec![]); + } + }; + + let list_videos = self.parse_list_videos(&html)?; + if list_videos.is_empty() { + return Ok(vec![]); + } + + let items = stream::iter(list_videos.into_iter().map(|video| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_video(video, &options).await } + })) + .buffer_unordered(1) + .collect::>() + .await; + + if !items.is_empty() { + cache.insert(url, items.clone()); + } + + Ok(items) + } + + async fn get( + &self, + cache: VideoCache, + page: u32, + sort: &str, + options: ServerOptions, + ) -> Result> { + let url = match self.resolve_option_target(&options) { + Some(target) => Self::build_archive_page_url(&target, page), + None => self.build_top_level_url(page, sort), + }; + self.fetch_items_for_url(cache, url, &options).await + } + + async fn query( + &self, + cache: VideoCache, + page: u32, + query: &str, + options: ServerOptions, + ) -> Result> { + let url = match self.resolve_query_target(query) { + Some(target) => Self::build_archive_page_url(&target, page), + None => self.build_search_url(query, page), + }; + self.fetch_items_for_url(cache, url, &options).await + } +} + +#[async_trait] +impl Provider for PorndishProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let _ = per_page; + let page = page.parse::().unwrap_or(1); + + let videos = match query { + Some(query) if !query.trim().is_empty() => self.query(cache, page, &query, options).await, + _ => self.get(cache, page, &sort, options).await, + }; + + match videos { + Ok(videos) => videos, + Err(error) => { + report_provider_error( + "porndish", + "get_videos", + &format!("page={page}; error={error}"), + ) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +struct PorndishThumbPolicy; + +impl PorndishThumbPolicy { + fn is_allowed_thumb_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + PorndishProvider::is_porndish_host(host) && url.path().starts_with("/wp-content/uploads/") + } +} + +#[cfg(test)] +mod tests { + use super::PorndishProvider; + + #[test] + fn builds_archive_and_search_urls() { + let provider = PorndishProvider::new(); + assert_eq!( + provider.build_top_level_url(1, "new"), + "https://www.porndish.com/" + ); + assert_eq!( + provider.build_top_level_url(2, "popular"), + "https://www.porndish.com/popular/page/2/" + ); + assert_eq!( + provider.build_top_level_url(2, "hot"), + "https://www.porndish.com/hot/page/2/" + ); + assert_eq!( + provider.build_top_level_url(2, "trending"), + "https://www.porndish.com/trending/page/2/" + ); + assert_eq!( + provider.build_search_url("adriana chechik", 1), + "https://www.porndish.com/search/adriana+chechik/" + ); + assert_eq!( + provider.build_search_url("adriana chechik", 2), + "https://www.porndish.com/search/adriana+chechik/page/2/" + ); + } + + #[test] + fn parses_both_grid_and_list_cards() { + let provider = PorndishProvider::new(); + let html = r#" + + + "#; + + let items = provider.parse_list_videos(html).unwrap(); + assert_eq!(items.len(), 2); + assert_eq!(items[0].title, "Example Grid"); + assert_eq!(items[0].duration, 754); + assert_eq!(items[1].title, "Example List"); + assert_eq!(items[1].duration, 3561); + } + + #[test] + fn blocks_non_porndish_and_non_myvidplay_urls() { + assert!(PorndishProvider::is_allowed_list_url( + "https://www.porndish.com/search/test/" + )); + assert!(PorndishProvider::is_allowed_detail_url( + "https://www.porndish.com/porn/example/" + )); + assert!(PorndishProvider::is_allowed_myvidplay_iframe_url( + "https://myvidplay.com/e/abc123" + )); + assert!(PorndishProvider::is_allowed_myvidplay_pass_url( + "https://myvidplay.com/pass_md5/abc/def" + )); + + assert!(!PorndishProvider::is_allowed_list_url( + "https://169.254.169.254/latest/meta-data/" + )); + assert!(!PorndishProvider::is_allowed_detail_url( + "https://example.com/porn/example/" + )); + assert!(!PorndishProvider::is_allowed_myvidplay_iframe_url( + "https://example.com/e/abc123" + )); + assert!(!PorndishProvider::is_allowed_myvidplay_pass_url( + "https://example.com/pass_md5/abc/def" + )); + } +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index c9e5b55..c2af138 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -7,6 +7,7 @@ pub mod hanimecdn; pub mod hqpornerthumb; pub mod javtiful; pub mod noodlemagazine; +pub mod porndishthumb; pub mod spankbang; pub mod sxyprn; diff --git a/src/proxies/porndishthumb.rs b/src/proxies/porndishthumb.rs new file mode 100644 index 0000000..25178ce --- /dev/null +++ b/src/proxies/porndishthumb.rs @@ -0,0 +1,80 @@ +use ntex::http::header::CONTENT_TYPE; +use ntex::{ + http::Response, + web::{self, HttpRequest, error}, +}; +use std::process::Command; +use url::Url; + +use crate::util::requester::Requester; + +fn is_allowed_thumb_url(url: &str) -> bool { + let Some(url) = Url::parse(url).ok() else { + return false; + }; + if url.scheme() != "https" { + return false; + } + let Some(host) = url.host_str() else { + return false; + }; + matches!(host, "www.porndish.com" | "porndish.com") + && url.path().starts_with("/wp-content/uploads/") +} + +pub async fn get_image( + req: HttpRequest, + _requester: web::types::State, +) -> Result { + let endpoint = req.match_info().query("endpoint").to_string(); + let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { + endpoint + } else { + format!("https://{}", endpoint.trim_start_matches('/')) + }; + if !is_allowed_thumb_url(&image_url) { + return Ok(web::HttpResponse::BadRequest().finish()); + } + + let output = tokio::task::spawn_blocking(move || { + Command::new("python3") + .arg("-c") + .arg( + r#" +import sys +from curl_cffi import requests + +url = sys.argv[1] +response = requests.get( + url, + impersonate="chrome", + timeout=30, + allow_redirects=True, + headers={"Referer": "https://www.porndish.com/"}, +) +if response.status_code >= 400: + sys.stderr.write(f"status={response.status_code}\n") + sys.exit(1) +sys.stderr.write(response.headers.get("content-type", "application/octet-stream")) +sys.stdout.buffer.write(response.content) +"#, + ) + .arg(image_url) + .output() + }) + .await + .map_err(error::ErrorBadGateway)? + .map_err(error::ErrorBadGateway)?; + + if !output.status.success() { + return Ok(web::HttpResponse::NotFound().finish()); + } + + let content_type = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let mut resp = Response::build(ntex::http::StatusCode::OK); + if !content_type.is_empty() { + resp.set_header(CONTENT_TYPE, content_type); + } + + Ok(resp.body(output.stdout)) +} diff --git a/src/proxy.rs b/src/proxy.rs index a3b0422..3794696 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -36,6 +36,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { web::resource("/hqporner-thumb/{endpoint}*") .route(web::post().to(crate::proxies::hqpornerthumb::get_image)) .route(web::get().to(crate::proxies::hqpornerthumb::get_image)), + ) + .service( + web::resource("/porndish-thumb/{endpoint}*") + .route(web::post().to(crate::proxies::porndishthumb::get_image)) + .route(web::get().to(crate::proxies::porndishthumb::get_image)), ); }