Files
hottub/src/providers/porndish.rs
2026-03-17 00:57:50 +00:00

1415 lines
48 KiB
Rust

use crate::DbPool;
use crate::api::ClientVersion;
use crate::providers::{
Provider, build_proxy_url, report_provider_error, report_provider_error_background,
requester_or_default, strip_url_scheme,
};
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::parse_abbreviated_number;
use crate::util::requester::Requester;
use crate::videos::{ServerOptions, VideoEmbed, VideoFormat, VideoItem};
use async_trait::async_trait;
use chrono::DateTime;
use error_chain::error_chain;
use futures::stream::{self, StreamExt};
use htmlentity::entity::{ICodedDataTrait, decode};
use regex::Regex;
use scraper::{ElementRef, Html, Selector};
use std::process::Command;
use std::sync::{Arc, RwLock};
use std::thread;
use std::time::{SystemTime, UNIX_EPOCH};
use url::Url;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PorndishProvider {
url: String,
sites: Arc<RwLock<Vec<FilterOption>>>,
tags: Arc<RwLock<Vec<FilterOption>>>,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
}
impl PorndishProvider {
pub fn new() -> Self {
let provider = Self {
url: "https://www.porndish.com".to_string(),
sites: Arc::new(RwLock::new(vec![FilterOption {
id: "all".to_string(),
title: "All".to_string(),
}])),
tags: Arc::new(RwLock::new(vec![])),
uploaders: Arc::new(RwLock::new(vec![])),
};
provider.spawn_initial_load();
provider
}
fn spawn_initial_load(&self) {
let base_url = self.url.clone();
let sites = Arc::clone(&self.sites);
let tags = Arc::clone(&self.tags);
let uploaders = Arc::clone(&self.uploaders);
thread::spawn(move || {
let runtime = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(runtime) => runtime,
Err(error) => {
report_provider_error_background(
"porndish",
"spawn_initial_load.runtime_build",
&error.to_string(),
);
return;
}
};
runtime.block_on(async move {
if let Err(error) = Self::load_filters(&base_url, sites, tags, uploaders).await {
report_provider_error_background(
"porndish",
"spawn_initial_load.load_filters",
&error.to_string(),
);
}
});
});
}
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
let sites = self
.sites
.read()
.map(|value| value.clone())
.unwrap_or_default();
Channel {
id: "porndish".to_string(),
name: "Porndish".to_string(),
description: "Porndish archive pages, tags, and source studios.".to_string(),
premium: false,
favicon: "https://www.google.com/s2/favicons?sz=64&domain=porndish.com".to_string(),
status: "active".to_string(),
categories: vec![],
options: vec![
ChannelOption {
id: "sort".to_string(),
title: "Sort".to_string(),
description: "Browse the latest, popular, hot, or trending archives."
.to_string(),
systemImage: "list.number".to_string(),
colorName: "blue".to_string(),
options: vec![
FilterOption {
id: "new".to_string(),
title: "Newest".to_string(),
},
FilterOption {
id: "popular".to_string(),
title: "Popular".to_string(),
},
FilterOption {
id: "hot".to_string(),
title: "Hot".to_string(),
},
FilterOption {
id: "trending".to_string(),
title: "Trending".to_string(),
},
],
multiSelect: false,
},
ChannelOption {
id: "sites".to_string(),
title: "Sites".to_string(),
description: "Browse a Porndish source archive directly.".to_string(),
systemImage: "network".to_string(),
colorName: "purple".to_string(),
options: sites,
multiSelect: false,
},
],
nsfw: true,
cacheDuration: Some(1800),
}
}
fn selector(value: &str) -> Result<Selector> {
Selector::parse(value)
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
}
fn regex(value: &str) -> Result<Regex> {
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
}
fn collapse_whitespace(text: &str) -> String {
text.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn decode_html(text: &str) -> String {
decode(text.as_bytes())
.to_string()
.unwrap_or_else(|_| text.to_string())
}
fn text_of(element: &ElementRef<'_>) -> String {
Self::decode_html(&Self::collapse_whitespace(
&element.text().collect::<Vec<_>>().join(" "),
))
}
fn normalize_title(title: &str) -> String {
title
.trim()
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase()
}
fn slug_to_title(slug: &str) -> String {
slug.split('-')
.filter(|value| !value.is_empty())
.map(|value| {
let mut chars = value.chars();
match chars.next() {
Some(first) => format!(
"{}{}",
first.to_ascii_uppercase(),
chars.collect::<String>()
),
None => String::new(),
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn normalize_url(&self, url: &str) -> String {
if url.is_empty() {
return String::new();
}
if url.starts_with("http://") || url.starts_with("https://") {
return url.to_string();
}
if url.starts_with("//") {
return format!("https:{url}");
}
if url.starts_with('/') {
return format!("{}{}", self.url, url);
}
format!("{}/{}", self.url, url.trim_start_matches("./"))
}
fn parse_url(url: &str) -> Option<Url> {
Url::parse(url).ok()
}
fn is_porndish_host(host: &str) -> bool {
matches!(host, "www.porndish.com" | "porndish.com")
}
fn is_myvidplay_host(host: &str) -> bool {
matches!(host, "myvidplay.com" | "www.myvidplay.com")
}
fn is_vidara_host(host: &str) -> bool {
matches!(host, "vidara.so" | "www.vidara.so")
}
fn is_allowed_list_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
if !Self::is_porndish_host(host) {
return false;
}
let path = url.path();
path == "/"
|| path.starts_with("/page/")
|| path.starts_with("/popular/")
|| path.starts_with("/hot/")
|| path.starts_with("/trending/")
|| path.starts_with("/search/")
|| path.starts_with("/videos2/")
|| path.starts_with("/video2/")
}
fn is_allowed_detail_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_porndish_host(host) && url.path().starts_with("/porn/")
}
fn is_allowed_myvidplay_iframe_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_myvidplay_host(host) && url.path().starts_with("/e/")
}
fn is_allowed_myvidplay_pass_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_myvidplay_host(host) && url.path().starts_with("/pass_md5/")
}
fn is_allowed_vidara_iframe_url(url: &str) -> bool {
let Some(url) = Self::parse_url(url) else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
Self::is_vidara_host(host) && url.path().starts_with("/e/")
}
fn vidara_api_url(iframe_url: &str) -> Option<String> {
let url = Self::parse_url(iframe_url)?;
if !Self::is_allowed_vidara_iframe_url(iframe_url) {
return None;
}
let filecode = url
.path_segments()?
.filter(|segment| !segment.is_empty())
.next_back()?
.to_string();
if filecode.is_empty() {
return None;
}
Some(format!("https://vidara.so/api/stream?filecode={filecode}"))
}
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
if thumb.is_empty() {
return String::new();
}
if !PorndishThumbPolicy::is_allowed_thumb_url(thumb) {
return String::new();
}
build_proxy_url(options, "porndish-thumb", &strip_url_scheme(thumb))
}
fn proxied_video(&self, options: &ServerOptions, page_url: &str) -> String {
if page_url.is_empty() || !Self::is_allowed_detail_url(page_url) {
return String::new();
}
build_proxy_url(options, "porndish", &strip_url_scheme(page_url))
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if let Ok(mut values) = target.write() {
if !values.iter().any(|value| value.id == item.id) {
values.push(item);
}
}
}
fn add_tag_filter(&self, slug: &str, title: &str) {
let clean_slug = slug.trim_matches('/').trim();
let clean_title = title.trim();
if clean_slug.is_empty() || clean_title.is_empty() {
return;
}
Self::push_unique(
&self.tags,
FilterOption {
id: format!("{}/video2/{clean_slug}/", self.url),
title: clean_title.to_string(),
},
);
}
fn add_uploader_filter(&self, url: &str, title: &str) {
let clean_title = title.trim();
if url.is_empty() || clean_title.is_empty() || !Self::is_allowed_list_url(url) {
return;
}
Self::push_unique(
&self.uploaders,
FilterOption {
id: url.to_string(),
title: clean_title.to_string(),
},
);
}
async fn fetch_with_curl_cffi(url: &str, referer: Option<&str>) -> Result<String> {
let url = url.to_string();
let referer = referer.unwrap_or("").to_string();
let output = tokio::task::spawn_blocking(move || {
Command::new("python3")
.arg("-c")
.arg(
r#"
import sys
from curl_cffi import requests
url = sys.argv[1]
referer = sys.argv[2] if len(sys.argv) > 2 else ""
headers = {}
if referer:
headers["Referer"] = referer
response = requests.get(
url,
impersonate="chrome",
timeout=30,
allow_redirects=True,
headers=headers,
)
if response.status_code >= 400:
sys.stderr.write(f"status={response.status_code} url={response.url}\n")
sys.exit(1)
sys.stdout.buffer.write(response.content)
"#,
)
.arg(url)
.arg(referer)
.output()
})
.await
.map_err(|error| Error::from(format!("spawn_blocking failed: {error}")))?
.map_err(|error| Error::from(format!("python3 execution failed: {error}")))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
return Err(Error::from(format!("curl_cffi request failed: {stderr}")));
}
Ok(String::from_utf8_lossy(&output.stdout).to_string())
}
async fn fetch_html(url: &str, referer: Option<&str>) -> Result<String> {
Self::fetch_with_curl_cffi(url, referer).await
}
async fn load_filters(
base_url: &str,
sites: Arc<RwLock<Vec<FilterOption>>>,
tags: Arc<RwLock<Vec<FilterOption>>>,
uploaders: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let link_selector = Self::selector("a[href]")?;
let article_selector = Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy, article.post")?;
let pages = vec![
format!("{base_url}/"),
format!("{base_url}/page/2/"),
format!("{base_url}/popular/"),
format!("{base_url}/hot/"),
format!("{base_url}/trending/"),
];
for url in pages {
let html = match Self::fetch_html(&url, Some(base_url)).await {
Ok(html) => html,
Err(error) => {
report_provider_error_background(
"porndish",
"load_filters.fetch_html",
&format!("url={url}; error={error}"),
);
continue;
}
};
let document = Html::parse_document(&html);
for link in document.select(&link_selector) {
let Some(href) = link.value().attr("href") else {
continue;
};
let normalized = href.trim_end_matches('/');
let prefix = format!("{base_url}/videos2/");
if !normalized.starts_with(&prefix) {
continue;
}
let remainder = normalized.strip_prefix(&prefix).unwrap_or_default();
if remainder.is_empty() || remainder.contains("/page/") {
continue;
}
let title = Self::text_of(&link);
if title.is_empty() {
continue;
}
let item = FilterOption {
id: format!("{normalized}/"),
title: title.clone(),
};
Self::push_unique(&sites, item.clone());
Self::push_unique(&uploaders, item);
}
for article in document.select(&article_selector) {
let Some(classes) = article.value().attr("class") else {
continue;
};
for class_name in classes.split_whitespace() {
if let Some(slug) = class_name.strip_prefix("tag-") {
if slug.is_empty() || slug == "format-video" {
continue;
}
Self::push_unique(
&tags,
FilterOption {
id: format!("{base_url}/video2/{slug}/"),
title: Self::slug_to_title(slug),
},
);
}
}
}
}
Ok(())
}
fn parse_duration(text: &str) -> u32 {
let parts = text
.trim()
.split(':')
.filter_map(|value| value.parse::<u32>().ok())
.collect::<Vec<_>>();
match parts.as_slice() {
[minutes, seconds] => minutes.saturating_mul(60).saturating_add(*seconds),
[hours, minutes, seconds] => hours
.saturating_mul(3600)
.saturating_add(minutes.saturating_mul(60))
.saturating_add(*seconds),
_ => 0,
}
}
fn parse_views(text: &str) -> Option<u32> {
parse_abbreviated_number(
&text
.replace("Views", "")
.replace("View", "")
.replace(' ', "")
.trim()
.to_string(),
)
}
fn parse_uploaded_at(text: &str) -> Option<u64> {
DateTime::parse_from_rfc3339(text)
.ok()
.map(|value| value.timestamp() as u64)
}
fn encoded_query(query: &str) -> String {
let mut serializer = url::form_urlencoded::Serializer::new(String::new());
serializer.append_pair("", query);
serializer.finish().trim_start_matches('=').to_string()
}
fn build_top_level_url(&self, page: u32, sort: &str) -> String {
let base = match sort {
"popular" => format!("{}/popular/", self.url),
"hot" => format!("{}/hot/", self.url),
"trending" => format!("{}/trending/", self.url),
_ => format!("{}/", self.url),
};
if page > 1 {
format!("{base}page/{page}/")
} else {
base
}
}
fn build_archive_page_url(base: &str, page: u32) -> String {
let base = if base.ends_with('/') {
base.to_string()
} else {
format!("{base}/")
};
if page > 1 {
format!("{base}page/{page}/")
} else {
base
}
}
fn build_search_url(&self, query: &str, page: u32) -> String {
let encoded = Self::encoded_query(query);
if page > 1 {
format!("{}/search/{encoded}/page/{page}/", self.url)
} else {
format!("{}/search/{encoded}/", self.url)
}
}
fn resolve_option_target(&self, options: &ServerOptions) -> Option<String> {
let site = options.sites.as_deref()?;
if site.is_empty() || site == "all" {
return None;
}
if !Self::is_allowed_list_url(site) {
return None;
}
Some(site.to_string())
}
fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
let normalized_query = Self::normalize_title(query);
options
.iter()
.find(|value| Self::normalize_title(&value.title) == normalized_query)
.map(|value| value.id.clone())
}
fn resolve_query_target(&self, query: &str) -> Option<String> {
if let Ok(uploaders) = self.uploaders.read() {
if let Some(target) = Self::match_filter(&uploaders, query) {
return Some(target);
}
}
if let Ok(sites) = self.sites.read() {
if let Some(target) = Self::match_filter(&sites, query) {
return Some(target);
}
}
if let Ok(tags) = self.tags.read() {
if let Some(target) = Self::match_filter(&tags, query) {
return Some(target);
}
}
None
}
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
let document = Html::parse_document(html);
let article_selector =
Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy")?;
let title_selector = Self::selector(".entry-title a[href]")?;
let image_selector = Self::selector(".entry-featured-media img")?;
let duration_selector = Self::selector(".mace-video-duration")?;
let source_selector = Self::selector(".entry-categories a[href]")?;
let views_selector = Self::selector(".entry-views strong")?;
let time_selector = Self::selector("time.entry-date[datetime]")?;
let author_selector = Self::selector(".entry-author a[href] strong, .entry-author a[href]")?;
let mut items = Vec::new();
for article in document.select(&article_selector) {
let Some(title_link) = article.select(&title_selector).next() else {
continue;
};
let Some(url) = title_link.value().attr("href") else {
continue;
};
let page_url = self.normalize_url(url);
if page_url.is_empty() || !Self::is_allowed_detail_url(&page_url) {
continue;
}
let title = Self::text_of(&title_link);
if title.is_empty() {
continue;
}
let slug = page_url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
let thumb = article
.select(&image_selector)
.next()
.and_then(|image| {
image
.value()
.attr("data-src")
.or_else(|| image.value().attr("src"))
})
.map(|value| self.normalize_url(value))
.unwrap_or_default();
let duration = article
.select(&duration_selector)
.next()
.map(|value| Self::parse_duration(&Self::text_of(&value)))
.unwrap_or(0);
let mut item = VideoItem::new(
slug,
title,
page_url.clone(),
"porndish".to_string(),
thumb,
duration,
);
if let Some(source_link) = article.select(&source_selector).next() {
let source_title = Self::text_of(&source_link);
if !source_title.is_empty() {
if let Some(source_url) = source_link.value().attr("href") {
let source_url = self.normalize_url(source_url);
item = item
.uploader(source_title.clone())
.uploader_url(source_url.clone());
self.add_uploader_filter(&source_url, &source_title);
} else {
item = item.uploader(source_title);
}
}
} else if let Some(author_link) = article.select(&author_selector).next() {
let author = Self::text_of(&author_link);
if !author.is_empty() {
item = item.uploader(author);
}
}
if let Some(views) = article
.select(&views_selector)
.next()
.and_then(|value| Self::parse_views(&Self::text_of(&value)))
{
item = item.views(views);
}
if let Some(uploaded_at) = article
.select(&time_selector)
.next()
.and_then(|time| time.value().attr("datetime"))
.and_then(Self::parse_uploaded_at)
{
item = item.uploaded_at(uploaded_at);
}
if let Some(classes) = article.value().attr("class") {
let mut tags = Vec::new();
for class_name in classes.split_whitespace() {
if let Some(slug) = class_name.strip_prefix("tag-") {
if slug.is_empty() || slug == "format-video" {
continue;
}
let title = Self::slug_to_title(slug);
if !title.is_empty() && !tags.iter().any(|value| value == &title) {
self.add_tag_filter(slug, &title);
tags.push(title);
}
}
}
if !tags.is_empty() {
item = item.tags(tags);
}
}
items.push(item);
}
Ok(items)
}
fn extract_iframe_fragments(&self, html: &str) -> Result<Vec<String>> {
let regex = Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#)?;
let mut fragments = Vec::new();
for captures in regex.captures_iter(html) {
let Some(value) = captures.get(1).map(|value| value.as_str()) else {
continue;
};
let encoded = format!("\"{value}\"");
let decoded = serde_json::from_str::<String>(&encoded).unwrap_or_default();
if decoded.contains("<iframe") {
fragments.push(decoded);
}
}
Ok(fragments)
}
async fn resolve_myvidplay_stream(&self, iframe_url: &str) -> Result<String> {
if !Self::is_allowed_myvidplay_iframe_url(iframe_url) {
return Err(Error::from(format!(
"blocked iframe url outside allowlist: {iframe_url}"
)));
}
let html = Self::fetch_html(iframe_url, Some(&self.url)).await?;
let pass_regex = Self::regex(r#"\$\.get\(\s*['"](/pass_md5/[^'"]+)['"]"#)?;
let path = pass_regex
.captures(&html)
.and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
.ok_or_else(|| Error::from("myvidplay resolution failed: missing pass_md5 path"))?;
let token = path
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if token.is_empty() {
return Err(Error::from(
"myvidplay resolution failed: missing pass_md5 token".to_string(),
));
}
let pass_url = if path.starts_with("http://") || path.starts_with("https://") {
path
} else {
let base = url::Url::parse(iframe_url)
.map_err(|error| Error::from(format!("invalid iframe url: {error}")))?;
base.join(&path)
.map_err(|error| Error::from(format!("invalid pass_md5 url: {error}")))?
.to_string()
};
if !Self::is_allowed_myvidplay_pass_url(&pass_url) {
return Err(Error::from(format!(
"blocked pass_md5 url outside allowlist: {pass_url}"
)));
}
let base = Self::fetch_html(&pass_url, Some(iframe_url))
.await?
.trim()
.to_string();
if base.is_empty() || base == "RELOAD" || !base.starts_with("http") {
return Err(Error::from(format!(
"myvidplay resolution failed: unusable pass_md5 response: {}",
base.chars().take(120).collect::<String>()
)));
}
let chars = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map_err(|error| Error::from(format!("time error: {error}")))?
.as_millis();
let suffix = (0..10)
.map(|index| {
let pos = ((now + (index as u128 * 17)) % chars.len() as u128) as usize;
chars[pos] as char
})
.collect::<String>();
Ok(format!("{base}{suffix}?token={token}&expiry={now}"))
}
async fn resolve_vidara_stream(&self, iframe_url: &str) -> Result<String> {
let api_url = Self::vidara_api_url(iframe_url)
.ok_or_else(|| Error::from(format!("blocked vidara iframe url: {iframe_url}")))?;
let response = Self::fetch_html(&api_url, Some(iframe_url)).await?;
let json: serde_json::Value = serde_json::from_str(&response)
.map_err(|error| Error::from(format!("vidara json parse failed: {error}")))?;
let stream_url = json
.get("streaming_url")
.and_then(|value| value.as_str())
.unwrap_or("")
.trim()
.to_string();
if stream_url.is_empty() || !(stream_url.starts_with("https://") || stream_url.starts_with("http://")) {
return Err(Error::from("vidara stream missing streaming_url".to_string()));
}
Ok(stream_url)
}
fn parse_embed_source(fragment: &str) -> Result<Option<(String, String)>> {
let iframe_regex = Self::regex(r#"(?is)<iframe[^>]+src="([^"]+)"[^>]*>"#)?;
Ok(iframe_regex.captures(fragment).and_then(|captures| {
captures
.get(1)
.map(|value| (fragment.to_string(), value.as_str().to_string()))
}))
}
async fn apply_detail_video(
&self,
mut item: VideoItem,
html: &str,
page_url: &str,
options: &ServerOptions,
_requester: &mut Requester,
) -> Result<VideoItem> {
let (
parsed_title,
parsed_thumb,
parsed_uploader,
parsed_uploader_url,
parsed_views,
parsed_uploaded_at,
parsed_tags,
) = {
let document = Html::parse_document(html);
let title_selector = Self::selector("h1.entry-title")?;
let thumb_selector = Self::selector("meta[property=\"og:image\"]")?;
let category_selector =
Self::selector(".entry-categories-l a[href], .entry-categories a[href]")?;
let views_selector = Self::selector(".entry-views strong")?;
let time_selector = Self::selector("time.entry-date[datetime]")?;
let tag_selector = Self::selector(".entry-tags a[href]")?;
let parsed_title = document
.select(&title_selector)
.next()
.map(|title| Self::text_of(&title))
.filter(|title| !title.is_empty());
let parsed_thumb = document
.select(&thumb_selector)
.next()
.and_then(|meta| meta.value().attr("content"))
.map(|thumb| self.normalize_url(thumb))
.filter(|thumb| !thumb.is_empty());
let (parsed_uploader, parsed_uploader_url) = document
.select(&category_selector)
.next()
.map(|category| {
let title = Self::text_of(&category);
let url = category
.value()
.attr("href")
.map(|href| self.normalize_url(href))
.filter(|href| !href.is_empty());
(title, url)
})
.filter(|(title, _)| !title.is_empty())
.map(|(title, url)| (Some(title), url))
.unwrap_or((None, None));
let parsed_views = document
.select(&views_selector)
.next()
.and_then(|value| Self::parse_views(&Self::text_of(&value)));
let parsed_uploaded_at = document
.select(&time_selector)
.next()
.and_then(|time| time.value().attr("datetime"))
.and_then(Self::parse_uploaded_at);
let mut parsed_tags = Vec::new();
for tag_link in document.select(&tag_selector) {
let title = Self::text_of(&tag_link);
let tag_url = tag_link
.value()
.attr("href")
.map(|href| self.normalize_url(href))
.unwrap_or_default();
let slug = tag_url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
if !title.is_empty() {
parsed_tags.push((title, slug));
}
}
(
parsed_title,
parsed_thumb,
parsed_uploader,
parsed_uploader_url,
parsed_views,
parsed_uploaded_at,
parsed_tags,
)
};
if let Some(title) = parsed_title {
item.title = title;
}
if let Some(thumb) = parsed_thumb {
item.thumb = self.proxied_thumb(options, &thumb);
}
if let Some(uploader) = parsed_uploader {
item.uploader = Some(uploader.clone());
if let Some(uploader_url) = parsed_uploader_url {
item.uploaderUrl = Some(uploader_url.clone());
self.add_uploader_filter(&uploader_url, &uploader);
}
}
if let Some(views) = parsed_views {
item.views = Some(views);
}
if let Some(uploaded_at) = parsed_uploaded_at {
item.uploadedAt = Some(uploaded_at);
}
if !parsed_tags.is_empty() {
let mut tags = Vec::new();
for (title, slug) in parsed_tags {
if !slug.is_empty() {
self.add_tag_filter(&slug, &title);
}
if !tags.iter().any(|value| value == &title) {
tags.push(title);
}
}
item.tags = Some(tags);
}
let mut fallback_embed: Option<(String, String)> = None;
let mut selected_embed: Option<(String, String)> = None;
for fragment in self.extract_iframe_fragments(html)? {
let Some((embed_html, iframe_url)) = Self::parse_embed_source(&fragment)? else {
continue;
};
let iframe_url = self.normalize_url(&iframe_url);
if Self::is_allowed_vidara_iframe_url(&iframe_url) {
selected_embed = Some((embed_html, iframe_url));
break;
}
if fallback_embed.is_none()
&& (Self::is_allowed_myvidplay_iframe_url(&iframe_url)
|| iframe_url.starts_with("https://"))
{
fallback_embed = Some((embed_html, iframe_url));
}
}
if let Some((embed_html, iframe_url)) = selected_embed.or(fallback_embed) {
item.embed = Some(VideoEmbed {
html: embed_html,
source: iframe_url.clone(),
});
let proxy_url = self.proxied_video(options, page_url);
if Self::is_allowed_vidara_iframe_url(&iframe_url) {
match self.resolve_vidara_stream(&iframe_url).await {
Ok(_stream_url) => {
if !proxy_url.is_empty() {
item.url = proxy_url.clone();
item.formats = Some(vec![VideoFormat::new(
proxy_url,
"sd".to_string(),
"m3u8".to_string(),
)]);
} else {
item.url = page_url.to_string();
}
}
Err(error) => {
report_provider_error_background(
"porndish",
"resolve_vidara_stream",
&format!("iframe_url={iframe_url}; error={error}"),
);
item.url = page_url.to_string();
}
}
} else if Self::is_allowed_myvidplay_iframe_url(&iframe_url) {
match self.resolve_myvidplay_stream(&iframe_url).await {
Ok(_stream_url) => {
if !proxy_url.is_empty() {
item.url = proxy_url.clone();
item.formats = Some(vec![VideoFormat::new(
proxy_url,
"sd".to_string(),
"mp4".to_string(),
)]);
} else {
item.url = page_url.to_string();
}
}
Err(error) => {
report_provider_error_background(
"porndish",
"resolve_myvidplay_stream",
&format!("iframe_url={iframe_url}; error={error}"),
);
item.url = page_url.to_string();
}
}
} else {
item.url = iframe_url;
}
}
if item.formats.is_none() && item.url != page_url {
let mut format =
VideoFormat::new(item.url.clone(), "unknown".to_string(), "mp4".to_string());
format.add_http_header("Referer".to_string(), page_url.to_string());
item.formats = Some(vec![format]);
}
Ok(item)
}
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
let mut item = item;
if !item.thumb.is_empty() {
item.thumb = self.proxied_thumb(options, &item.thumb);
}
let page_url = if item.url.starts_with("http://") || item.url.starts_with("https://") {
item.url.clone()
} else {
return item;
};
if !Self::is_allowed_detail_url(&page_url) {
report_provider_error_background(
"porndish",
"enrich_video.blocked_detail_url",
&format!("url={page_url}"),
);
return item;
}
let original_item = item.clone();
let mut requester = match options.requester.clone() {
Some(requester) => requester,
None => Requester::new(),
};
let html = match Self::fetch_html(&page_url, Some(&self.url)).await {
Ok(html) => html,
Err(error) => {
report_provider_error_background(
"porndish",
"enrich_video.request",
&format!("url={page_url}; error={error}"),
);
return item;
}
};
match self
.apply_detail_video(item, &html, &page_url, options, &mut requester)
.await
{
Ok(item) => item,
Err(error) => {
report_provider_error_background(
"porndish",
"enrich_video.parse",
&format!("url={page_url}; error={error}"),
);
original_item
}
}
}
async fn fetch_items_for_url(
&self,
cache: VideoCache,
url: String,
options: &ServerOptions,
) -> Result<Vec<VideoItem>> {
if let Some((time, items)) = cache.get(&url) {
if time.elapsed().unwrap_or_default().as_secs() < 300 {
return Ok(items.clone());
}
}
let _requester = requester_or_default(options, "porndish", "missing_requester");
if !Self::is_allowed_list_url(&url) {
report_provider_error(
"porndish",
"fetch_items_for_url.blocked_url",
&format!("url={url}"),
)
.await;
return Ok(vec![]);
}
let html = match Self::fetch_html(&url, Some(&self.url)).await {
Ok(html) => html,
Err(error) => {
report_provider_error(
"porndish",
"fetch_items_for_url.request",
&format!("url={url}; error={error}"),
)
.await;
return Ok(vec![]);
}
};
let list_videos = self.parse_list_videos(&html)?;
if list_videos.is_empty() {
return Ok(vec![]);
}
let items = stream::iter(list_videos.into_iter().map(|video| {
let provider = self.clone();
let options = options.clone();
async move { provider.enrich_video(video, &options).await }
}))
.buffer_unordered(1)
.collect::<Vec<_>>()
.await;
if !items.is_empty() {
cache.insert(url, items.clone());
}
Ok(items)
}
async fn get(
&self,
cache: VideoCache,
page: u32,
sort: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = match self.resolve_option_target(&options) {
Some(target) => Self::build_archive_page_url(&target, page),
None => self.build_top_level_url(page, sort),
};
self.fetch_items_for_url(cache, url, &options).await
}
async fn query(
&self,
cache: VideoCache,
page: u32,
query: &str,
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let url = match self.resolve_query_target(query) {
Some(target) => Self::build_archive_page_url(&target, page),
None => self.build_search_url(query, page),
};
self.fetch_items_for_url(cache, url, &options).await
}
}
#[async_trait]
impl Provider for PorndishProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = pool;
let _ = per_page;
let page = page.parse::<u32>().unwrap_or(1);
let videos = match query {
Some(query) if !query.trim().is_empty() => self.query(cache, page, &query, options).await,
_ => self.get(cache, page, &sort, options).await,
};
match videos {
Ok(videos) => videos,
Err(error) => {
report_provider_error(
"porndish",
"get_videos",
&format!("page={page}; error={error}"),
)
.await;
vec![]
}
}
}
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
Some(self.build_channel(clientversion))
}
}
struct PorndishThumbPolicy;
impl PorndishThumbPolicy {
fn is_allowed_thumb_url(url: &str) -> bool {
let Some(url) = Url::parse(url).ok() else {
return false;
};
if url.scheme() != "https" {
return false;
}
let Some(host) = url.host_str() else {
return false;
};
PorndishProvider::is_porndish_host(host) && url.path().starts_with("/wp-content/uploads/")
}
}
#[cfg(test)]
mod tests {
use super::PorndishProvider;
#[test]
fn builds_archive_and_search_urls() {
let provider = PorndishProvider::new();
assert_eq!(
provider.build_top_level_url(1, "new"),
"https://www.porndish.com/"
);
assert_eq!(
provider.build_top_level_url(2, "popular"),
"https://www.porndish.com/popular/page/2/"
);
assert_eq!(
provider.build_top_level_url(2, "hot"),
"https://www.porndish.com/hot/page/2/"
);
assert_eq!(
provider.build_top_level_url(2, "trending"),
"https://www.porndish.com/trending/page/2/"
);
assert_eq!(
provider.build_search_url("adriana chechik", 1),
"https://www.porndish.com/search/adriana+chechik/"
);
assert_eq!(
provider.build_search_url("adriana chechik", 2),
"https://www.porndish.com/search/adriana+chechik/page/2/"
);
}
#[test]
fn parses_both_grid_and_list_cards() {
let provider = PorndishProvider::new();
let html = r#"
<article class="entry-tpl-grid post type-post tag-anal">
<div class="entry-featured-media">
<a class="g1-frame" href="https://www.porndish.com/porn/foo/">
<div class="g1-frame-inner">
<img data-src="https://www.porndish.com/wp-content/uploads/foo.jpg" />
<span class="mace-video-duration">12:34</span>
</div>
</a>
</div>
<div class="entry-body">
<header class="entry-header">
<div class="entry-before-title">
<p class="entry-meta"><span class="entry-views"><strong>9.1k</strong></span></p>
<span class="entry-categories"><a href="https://www.porndish.com/videos2/tushy/">Tushy</a></span>
</div>
<h3 class="entry-title"><a href="https://www.porndish.com/porn/foo/">Example Grid</a></h3>
</header>
<footer><time class="entry-date" datetime="2026-02-08T07:40:22+00:00"></time></footer>
</div>
</article>
<article class="entry-tpl-list-fancy post type-post tag-big-tits">
<div class="entry-featured-media">
<a class="g1-frame" href="https://www.porndish.com/porn/bar/">
<div class="g1-frame-inner">
<img data-src="https://www.porndish.com/wp-content/uploads/bar.jpg" />
<span class="mace-video-duration">59:21</span>
</div>
</a>
</div>
<div class="entry-body">
<header class="entry-header">
<h3 class="entry-title"><a href="https://www.porndish.com/porn/bar/">Example List</a></h3>
</header>
</div>
</article>
"#;
let items = provider.parse_list_videos(html).unwrap();
assert_eq!(items.len(), 2);
assert_eq!(items[0].title, "Example Grid");
assert_eq!(items[0].duration, 754);
assert_eq!(items[1].title, "Example List");
assert_eq!(items[1].duration, 3561);
}
#[test]
fn blocks_non_porndish_and_non_myvidplay_urls() {
assert!(PorndishProvider::is_allowed_list_url(
"https://www.porndish.com/search/test/"
));
assert!(PorndishProvider::is_allowed_detail_url(
"https://www.porndish.com/porn/example/"
));
assert!(PorndishProvider::is_allowed_myvidplay_iframe_url(
"https://myvidplay.com/e/abc123"
));
assert!(PorndishProvider::is_allowed_myvidplay_pass_url(
"https://myvidplay.com/pass_md5/abc/def"
));
assert!(PorndishProvider::is_allowed_vidara_iframe_url(
"https://vidara.so/e/abc123"
));
assert!(!PorndishProvider::is_allowed_list_url(
"https://169.254.169.254/latest/meta-data/"
));
assert!(!PorndishProvider::is_allowed_detail_url(
"https://example.com/porn/example/"
));
assert!(!PorndishProvider::is_allowed_myvidplay_iframe_url(
"https://example.com/e/abc123"
));
assert!(!PorndishProvider::is_allowed_myvidplay_pass_url(
"https://example.com/pass_md5/abc/def"
));
assert!(!PorndishProvider::is_allowed_vidara_iframe_url(
"https://example.com/e/abc123"
));
}
}