From ff680964cf10b813d938ffd62b824ff5d2e86cd1 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 20 Jun 2026 12:03:23 +0000 Subject: [PATCH] porn4fans fix --- docs/provider-catalog.md | 2 +- src/providers/porn4fans.rs | 1096 +++++++++++++++++++++++------------- 2 files changed, 700 insertions(+), 398 deletions(-) diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 8b20974..5fe257d 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -37,7 +37,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `pimpbunny` | `onlyfans` | no | yes | Proxy-backed playback and thumbnail handling. | | `pmvhaven` | `pmv-compilation` | no | no | PMV grouping example. | | `porn00` | `mainstream-tube` | no | no | Lightweight scraper. | -| `porn4fans` | `onlyfans` | no | no | OnlyFans-like grouping example. | +| `porn4fans` | `onlyfans` | no | no | KVS (Kernel Video Sharing) scraper for porn4fans.com (OnlyFans creator clips); Cloudflare-fronted but serves direct requests (no JS challenge), so the shared requester works without Jina/FlareSolverr and detail-page enrichment is safe; all feeds are fetched as KVS `?mode=async&function=get_block` HTML fragments (cleaner + properly paginated vs the JS-filled full pages), parsed with `scraper` over `div.item` cards (`a.img-wrap.video` href→id/title, `img.thumb` data-webp/src, `div.duration`, `li.video-item.views span`, `li.video-item.model a` for uploader, `div.preview-video[data-src]` preview clip); latest feed is `/onlyfans-videos/` block `custom_list_videos_latest_videos_list` paginated by `from=N` (12/page — note: NOT `/latest-updates/`, which 404s); search is `/search/{dashed-query}/` block `custom_list_videos_videos_list_search_result` with `q={query}&category_ids=&from_videos=N` (24/page); category `/categories/{slug}/` and tag `/tags/{slug}/` share block `custom_list_videos_common_videos_list` (`from=N`, 12/page); model `/models/{slug}/` uses block `custom_list_videos_models_videos_list`; sort maps new→post_date, popular→video_viewed, rated→rating, longest→duration; `cat:`/`category:`, `tag:`, and `model:`/`uploader:`/`pornstar:`/`star:` query prefixes route to the matching archive, and a bare query that exactly matches a background-loaded category title goes to that archive instead of keyword search; background-loads the 55-entry category title→slug map from `/categories/` (`#list_categories_categories_list_items a.item`) for the `categories` filter option (sanitized out of `/api/status` like `stars`/`networks`, but honored in `/api/videos`); `video.url` is the `/video/{id}/{slug}/` page URL (NOT yt-dlp-resolvable — yt-dlp's generic KVS extractor fails on this site's flashvars), so per-card enrichment fetches the detail page and pulls the direct `video_url`/`video_alt_url` flashvars (480p/720p) into `formats` (bounded `buffered(8)` concurrency); KVS `get_file` MP4 URLs come as `…/ID.mp4/?v-acctoken=…` with a trailing slash before the query — the provider strips it to `…/ID.mp4?v-acctoken=…` so the path ends in `.mp4` (health-check/yt-dlp media detection keys off the extension); formats carry a `Referer` header (works with or without it); thumbnails (`/contents/videos_screenshots/…`) need no proxy or referer; uploader name on a card is the OnlyFans handle while the `/models/{slug}/` URL slug is the canonical model name (they legitimately differ — e.g. handle "Blasianflexcouple" at slug `nina-lee`), so `uploader` uses the display handle and `uploaderUrl`/`uploaderId` (`porn4fans:`) use the slug; no `/api/uploaders` profile, no proxy; note "teen" and similar are compliance-blocked keywords that the site itself returns empty for. | | `porndish` | `studio-network` | no | yes | Redirect proxy plus thumb proxy usage. | | `pornhat` | `mainstream-tube` | no | no | Basic tube provider. | | `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. | diff --git a/src/providers/porn4fans.rs b/src/providers/porn4fans.rs index fd423d1..8c06c06 100644 --- a/src/providers/porn4fans.rs +++ b/src/providers/porn4fans.rs @@ -1,18 +1,28 @@ use crate::DbPool; use crate::api::ClientVersion; -use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoItem}; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; -use futures::future::join_all; +use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use scraper::{Html, Selector}; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, RwLock}; +use std::thread; + +const CHANNEL_ID: &str = "porn4fans"; +const BASE_URL: &str = "https://www.porn4fans.com"; +/// Bounded concurrency for detail-page enrichment (one fetch per card). +const ENRICH_CONCURRENCY: usize = 8; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { @@ -30,6 +40,27 @@ error_chain! { #[derive(Debug, Clone)] pub struct Porn4fansProvider { url: String, + /// Display list exposed via the `categories` status option. + categories: Arc>>, + /// normalized category title -> site slug, for shortcut routing. + category_map: Arc>>, +} + +/// Where a `/api/videos` request should be routed on the site. Every variant +/// resolves to the same KVS `get_block` async fragment, just with a different +/// path + `block_id` + pagination parameter. +#[derive(Debug, Clone)] +enum Target { + /// Default `/onlyfans-videos/` latest feed. + Latest, + /// Native keyword search. + Search { query: String }, + /// `/categories/{slug}/` archive. + Category { slug: String }, + /// `/tags/{slug}/` archive. + Tag { slug: String }, + /// `/models/{slug}/` archive (uploader shortcut). + Model { slug: String }, } #[derive(Debug, Clone)] @@ -40,27 +71,80 @@ struct Porn4fansCard { thumb: String, duration: u32, views: Option, - rating: Option, + preview: Option, + uploader: Option, + uploader_url: Option, + uploader_slug: Option, } impl Porn4fansProvider { pub fn new() -> Self { - Self { - url: "https://www.porn4fans.com".to_string(), - } + let provider = Self { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + category_map: Arc::new(RwLock::new(HashMap::new())), + }; + provider.spawn_initial_load(); + provider } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|guard| guard.clone()) + .unwrap_or_default(); + Channel { - id: "porn4fans".to_string(), + id: CHANNEL_ID.to_string(), name: "Porn4Fans".to_string(), - description: "OnlyFans porn videos.".to_string(), + description: "OnlyFans creator clips — latest feed, keyword search, plus category, tag and model shortcuts." + .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=www.porn4fans.com" .to_string(), status: "active".to_string(), - categories: vec![], - options: vec![], + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Feed ordering.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "longest".to_string(), + title: "Longest".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Jump directly to a category archive.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ], nsfw: true, cacheDuration: Some(1800), } @@ -68,56 +152,171 @@ impl Porn4fansProvider { fn sort_by(sort: &str) -> &'static str { match sort { - "popular" => "video_viewed", + "popular" | "viewed" | "most-viewed" => "video_viewed", + "rated" | "rating" | "top-rated" => "rating", + "longest" | "duration" => "duration", _ => "post_date", } } - fn build_latest_url(&self, page: u32, sort: &str) -> String { - format!( - "{}/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by={}&from={page}", - self.url, - Self::sort_by(sort) - ) + /// lowercase, spaces/underscores -> single dash, drop anything else. + fn slugify(value: &str) -> String { + let mut slug = String::new(); + let mut prev_dash = false; + for ch in value.trim().to_ascii_lowercase().chars() { + if ch.is_ascii_alphanumeric() { + slug.push(ch); + prev_dash = false; + } else if matches!(ch, ' ' | '-' | '_' | '+') { + if !prev_dash && !slug.is_empty() { + slug.push('-'); + prev_dash = true; + } + } + } + slug.trim_matches('-').to_string() } - fn build_latest_headers(&self) -> Vec<(String, String)> { - vec![( - "Referer".to_string(), - format!("{}/latest-updates/", self.url), - )] + /// Normalize a display title for lookup map keys. + fn normalize_title(value: &str) -> String { + value + .trim() + .trim_start_matches('#') + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() } - fn build_search_path_query(query: &str, separator: &str) -> String { - query.split_whitespace().collect::>().join(separator) + fn resolve_category_slug(&self, value: &str) -> Option { + let normalized = Self::normalize_title(value); + if normalized.is_empty() || normalized == "all" { + return None; + } + if let Some(slug) = self + .category_map + .read() + .ok() + .and_then(|map| map.get(&normalized).cloned()) + { + return Some(slug); + } + None } - fn build_search_url(&self, query: &str, page: u32, sort: &str) -> String { - let query_param = Self::build_search_path_query(query, "+"); - let path_query = Self::build_search_path_query(query, "-"); - format!( - "{}/search/{path_query}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q={query_param}&sort_by={}&from_videos={page}", - self.url, - Self::sort_by(sort) - ) + /// Decide where a request is routed: explicit option/prefix first, then a + /// bare query that matches a known category, otherwise keyword search. + fn target_from_request( + &self, + query: Option<&str>, + category: Option<&str>, + ) -> Target { + if let Some(category) = category { + if let Some(slug) = self.resolve_category_slug(category) { + return Target::Category { slug }; + } + } + + if let Some(raw) = query { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + for prefix in ["category:", "cat:"] { + if let Some(rest) = trimmed.strip_prefix(prefix) { + let slug = self + .resolve_category_slug(rest) + .unwrap_or_else(|| Self::slugify(rest)); + if !slug.is_empty() { + return Target::Category { slug }; + } + } + } + if let Some(rest) = trimmed.strip_prefix("tag:") { + let slug = Self::slugify(rest); + if !slug.is_empty() { + return Target::Tag { slug }; + } + } + for prefix in ["model:", "uploader:", "pornstar:", "star:"] { + if let Some(rest) = trimmed.strip_prefix(prefix) { + let slug = Self::slugify(rest); + if !slug.is_empty() { + return Target::Model { slug }; + } + } + } + // A bare query that is exactly a known category goes to that + // archive (better curated than keyword search). + if let Some(slug) = self.resolve_category_slug(trimmed) { + return Target::Category { slug }; + } + return Target::Search { + query: trimmed.to_string(), + }; + } + } + + Target::Latest } - fn build_search_headers(&self, query: &str) -> Vec<(String, String)> { - let path_query = Self::build_search_path_query(query, "-"); - vec![( - "Referer".to_string(), - format!("{}/search/{path_query}/", self.url), - )] + /// Build the async `get_block` URL plus the Referer to send with it. + fn build_block_url(&self, target: &Target, sort: &str, page: u32) -> (String, String) { + let sort_by = Self::sort_by(sort); + match target { + Target::Latest => { + let referer = format!("{}/onlyfans-videos/", self.url); + let url = format!( + "{referer}?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by={sort_by}&from={page}" + ); + (url, referer) + } + Target::Category { slug } => { + let referer = format!("{}/categories/{slug}/", self.url); + let url = format!( + "{referer}?mode=async&function=get_block&block_id=custom_list_videos_common_videos_list&sort_by={sort_by}&from={page}" + ); + (url, referer) + } + Target::Tag { slug } => { + let referer = format!("{}/tags/{slug}/", self.url); + let url = format!( + "{referer}?mode=async&function=get_block&block_id=custom_list_videos_common_videos_list&sort_by={sort_by}&from={page}" + ); + (url, referer) + } + Target::Model { slug } => { + let referer = format!("{}/models/{slug}/", self.url); + let url = format!( + "{referer}?mode=async&function=get_block&block_id=custom_list_videos_models_videos_list&sort_by={sort_by}&from={page}" + ); + (url, referer) + } + Target::Search { query } => { + let path = Self::slugify(query); + let q = query.split_whitespace().collect::>().join("+"); + let referer = format!("{}/search/{path}/", self.url); + let url = format!( + "{referer}?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q={q}&category_ids=&sort_by={sort_by}&from_videos={page}" + ); + (url, referer) + } + } } - async fn get( + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![("Referer".to_string(), referer.to_string())] + } + + async fn get_videos_for_target( &self, cache: VideoCache, - page: u32, + target: &Target, sort: &str, + page: u32, options: ServerOptions, ) -> Result> { - let video_url = self.build_latest_url(page, sort); + let (video_url, referer) = self.build_block_url(target, sort, page); + let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { @@ -128,17 +327,16 @@ impl Porn4fansProvider { None => vec![], }; - let mut requester = - requester_or_default(&options, "porn4fans", "porn4fans.get.missing_requester"); + let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos_for_target"); let text = match requester - .get_with_headers(&video_url, self.build_latest_headers(), None) + .get_with_headers(&video_url, Self::html_headers(&referer), None) .await { Ok(text) => text, Err(e) => { report_provider_error( - "porn4fans", - "get.request", + CHANNEL_ID, + "get_videos_for_target.request", &format!("url={video_url}; error={e}"), ) .await; @@ -147,16 +345,15 @@ impl Porn4fansProvider { }; if text.trim().is_empty() { - report_provider_error( - "porn4fans", - "get.empty_response", - &format!("url={video_url}"), - ) - .await; return Ok(old_items); } - let video_items = self.get_video_items_from_html(text, requester).await; + let cards = self.parse_video_cards(&text); + if cards.is_empty() { + return Ok(old_items); + } + + let video_items = self.enrich_cards(cards, &referer, requester).await; if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); @@ -166,113 +363,6 @@ impl Porn4fansProvider { Ok(old_items) } - async fn query( - &self, - cache: VideoCache, - page: u32, - query: &str, - sort: &str, - options: ServerOptions, - ) -> Result> { - let video_url = self.build_search_url(query, page, sort); - let old_items = match cache.get(&video_url) { - Some((time, items)) => { - if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { - return Ok(items.clone()); - } - items.clone() - } - None => vec![], - }; - - let mut requester = - requester_or_default(&options, "porn4fans", "porn4fans.query.missing_requester"); - let text = match requester - .get_with_headers(&video_url, self.build_search_headers(query), None) - .await - { - Ok(text) => text, - Err(e) => { - report_provider_error( - "porn4fans", - "query.request", - &format!("url={video_url}; error={e}"), - ) - .await; - return Ok(old_items); - } - }; - - if text.trim().is_empty() { - report_provider_error( - "porn4fans", - "query.empty_response", - &format!("url={video_url}"), - ) - .await; - return Ok(old_items); - } - - let video_items = self.get_video_items_from_html(text, requester).await; - if !video_items.is_empty() { - cache.remove(&video_url); - cache.insert(video_url.clone(), video_items.clone()); - return Ok(video_items); - } - - Ok(old_items) - } - - fn extract_between<'a>(text: &'a str, start: &str, end: &str) -> Option<&'a str> { - text.split(start).nth(1)?.split(end).next() - } - - fn first_non_empty_attr(segment: &str, attrs: &[&str]) -> Option { - attrs.iter().find_map(|attr| { - Self::extract_between(segment, attr, "\"") - .map(str::trim) - .filter(|value| !value.is_empty()) - .map(ToString::to_string) - }) - } - - fn normalize_url(&self, url: &str) -> String { - if url.starts_with("http://") || url.starts_with("https://") { - return url.to_string(); - } - if url.starts_with("//") { - return format!("https:{url}"); - } - if url.starts_with('/') { - return format!("{}{}", self.url, url); - } - format!("{}/{}", self.url, url.trim_start_matches("./")) - } - - fn extract_thumb_url(&self, segment: &str) -> String { - let thumb_raw = Self::first_non_empty_attr( - segment, - &[ - "data-original=\"", - "data-webp=\"", - "srcset=\"", - "src=\"", - "poster=\"", - ], - ) - .unwrap_or_default(); - - if thumb_raw.starts_with("data:image/") { - return String::new(); - } - - self.normalize_url(&thumb_raw) - } - - fn decode_escaped_text(text: &str) -> String { - text.replace("\\/", "/").replace("&", "&") - } - fn decode_html_text(text: &str) -> String { decode(text.as_bytes()) .to_string() @@ -284,206 +374,403 @@ impl Porn4fansProvider { .to_string() } - fn strip_tags(text: &str) -> String { - Regex::new(r"(?is)<[^>]+>") - .ok() - .map(|regex| regex.replace_all(text, "").to_string()) - .unwrap_or_else(|| text.to_string()) - } - - fn push_unique_tag(values: &mut Vec, value: String) { - let value = value.trim().to_string(); - if value.is_empty() - || values - .iter() - .any(|existing| existing.eq_ignore_ascii_case(&value)) - { - return; + fn normalize_url(&self, url: &str) -> String { + let trimmed = url.trim(); + if trimmed.is_empty() { + return String::new(); } - values.push(value); + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return trimmed.to_string(); + } + if trimmed.starts_with("//") { + return format!("https:{trimmed}"); + } + if trimmed.starts_with('/') { + return format!("{}{}", self.url, trimmed); + } + format!("{}/{}", self.url, trimmed.trim_start_matches("./")) } - fn extract_views(text: &str) -> Option { - Regex::new(r"(?i)]+icon-eye[^>]*>.*?\s*([^<]+)") - .ok() - .and_then(|re| re.captures(text)) - .and_then(|caps| caps.get(1)) - .and_then(|m| parse_abbreviated_number(m.as_str().trim())) - } - - fn extract_rating(text: &str) -> Option { - Regex::new(r"(?i)]+icon-like[^>]*>.*?\s*([^<%]+)%") - .ok() - .and_then(|re| re.captures(text)) - .and_then(|caps| caps.get(1)) - .and_then(|m| m.as_str().trim().parse::().ok()) - } - - fn extract_direct_video_url_from_page(text: &str) -> Option { - let decoded = Self::decode_escaped_text(text); - - for key in ["video_url", "video_alt_url", "contentUrl"] { - let pattern = format!( - r#"(?is)(?:^|[{{\s,])["']?{}["']?\s*[:=]\s*["'](?Phttps?://[^"'<>]+?\.mp4)"#, - regex::escape(key) - ); - let regex = Regex::new(&pattern).ok()?; - if let Some(url) = regex - .captures(&decoded) - .and_then(|captures| captures.name("url")) - .map(|value| value.as_str().to_string()) - { - return Some(url); + /// KVS `get_file` URLs look like `.../10002.mp4/?v-acctoken=...`. Strip the + /// slash sitting between the extension and the query so the path ends in a + /// real media extension (yt-dlp / health-check media detection keys off it). + fn normalize_media_url(raw: &str) -> String { + let mut url = raw.trim().to_string(); + for ext in [".mp4/", ".m4v/", ".webm/", ".mov/"] { + if let Some(idx) = url.find(ext) { + let replacement = &ext[..ext.len() - 1]; + url.replace_range(idx..idx + ext.len(), replacement); + break; } } + url + } + fn id_from_video_url(url: &str) -> Option { + let after = url.split("/video/").nth(1)?; + let id: String = after.chars().take_while(|c| c.is_ascii_digit()).collect(); + (!id.is_empty()).then_some(id) + } + + fn slug_from_model_url(url: &str) -> Option { + url.split("/models/") + .nth(1)? + .split('/') + .next() + .map(str::to_string) + .filter(|slug| !slug.is_empty()) + } + + fn attr(element: &scraper::ElementRef, names: &[&str]) -> Option { + for name in names { + if let Some(value) = element.value().attr(name) { + let trimmed = value.trim(); + if !trimmed.is_empty() && !trimmed.starts_with("data:image/") { + return Some(trimmed.to_string()); + } + } + } None } - fn collect_texts(document: &Html, selector: &str) -> Vec { - let Ok(selector) = Selector::parse(selector) else { + fn parse_video_cards(&self, html: &str) -> Vec { + let document = Html::parse_document(html); + let Ok(item_sel) = Selector::parse("div.item") else { return vec![]; }; - let mut values = Vec::new(); - for element in document.select(&selector) { - let raw_text = element.text().collect::>().join(" "); - let cleaned = Self::decode_html_text(&Self::strip_tags(&raw_text)); - Self::push_unique_tag(&mut values, cleaned); - } + let link_sel = Selector::parse("a.img-wrap").ok(); + let views_sel = Selector::parse("li.video-item.views span").ok(); + let duration_sel = Selector::parse("div.duration").ok(); + let img_sel = Selector::parse("img").ok(); + let source_sel = Selector::parse("picture source").ok(); + let preview_sel = Selector::parse("div.preview-video").ok(); + let model_sel = Selector::parse("li.video-item.model a").ok(); - values - } - - fn extract_page_models_and_categories(text: &str) -> (Vec, Vec) { - let document = Html::parse_document(text); - - let models = Self::collect_texts(&document, ".player-models-list a[href*=\"/models/\"]"); - - let mut categories = - Self::collect_texts(&document, ".categories-row a[href*=\"/categories/\"]"); - for value in Self::collect_texts(&document, ".tags-row a[href*=\"/tags/\"]") { - Self::push_unique_tag(&mut categories, value); - } - - (models, categories) - } - - fn parse_video_cards_from_html(&self, html: &str) -> Vec { - if html.trim().is_empty() { - return vec![]; - } - - let Ok(link_re) = Regex::new( - r#"(?is)]+class="item-link"[^>]+href="(?P[^"]+/video/(?P\d+)/[^"]+)"[^>]+title="(?P[^"]+)"[^>]*>(?P<body>.*?)</a>"#, - ) else { - return vec![]; - }; - - let mut items = Vec::new(); + let mut cards = Vec::new(); let mut seen = HashSet::new(); - for captures in link_re.captures_iter(html) { - let Some(id) = captures.name("id").map(|m| m.as_str().to_string()) else { + for item in document.select(&item_sel) { + let Some(link) = link_sel.as_ref().and_then(|sel| item.select(sel).next()) else { + continue; + }; + let Some(href) = link.value().attr("href") else { + continue; + }; + let page_url = self.normalize_url(href); + let Some(id) = Self::id_from_video_url(&page_url) else { continue; }; if !seen.insert(id.clone()) { continue; } - let href = captures - .name("href") - .map(|m| self.normalize_url(m.as_str())) + // Title: prefer the anchor's title attr, fall back to img alt. + let title_raw = link + .value() + .attr("title") + .map(str::to_string) + .or_else(|| { + img_sel + .as_ref() + .and_then(|sel| item.select(sel).next()) + .and_then(|img| img.value().attr("alt").map(str::to_string)) + }) .unwrap_or_default(); - let title_raw = captures - .name("title") - .map(|m| m.as_str()) - .unwrap_or_default(); - let title = decode(title_raw.as_bytes()) - .to_string() - .unwrap_or_else(|_| title_raw.to_string()); - let body = captures - .name("body") - .map(|m| m.as_str()) - .unwrap_or_default(); - let thumb = self.extract_thumb_url(body); - let duration_raw = Self::extract_between(body, "<div class=\"duration\">", "<") - .unwrap_or_default() - .trim() - .to_string(); - let duration = parse_time_to_seconds(&duration_raw).unwrap_or(0) as u32; - let views = Self::extract_views(body).unwrap_or(0); - let rating = Self::extract_rating(body); + let title = Self::decode_html_text(&title_raw); - items.push(Porn4fansCard { + let thumb = img_sel + .as_ref() + .and_then(|sel| item.select(sel).next()) + .and_then(|img| Self::attr(&img, &["data-webp", "src", "data-src"])) + .or_else(|| { + source_sel + .as_ref() + .and_then(|sel| item.select(sel).next()) + .and_then(|src| Self::attr(&src, &["srcset", "data-srcset"])) + }) + .map(|value| self.normalize_url(value.split_whitespace().next().unwrap_or(&value))) + .unwrap_or_default(); + + let duration = duration_sel + .as_ref() + .and_then(|sel| item.select(sel).next()) + .map(|d| d.text().collect::<String>()) + .and_then(|raw| parse_time_to_seconds(raw.trim())) + .unwrap_or(0) as u32; + + let views = views_sel + .as_ref() + .and_then(|sel| item.select(sel).next()) + .map(|v| v.text().collect::<String>()) + .and_then(|raw| parse_abbreviated_number(raw.trim())) + .filter(|count| *count > 0); + + let preview = preview_sel + .as_ref() + .and_then(|sel| item.select(sel).next()) + .and_then(|p| Self::attr(&p, &["data-src"])) + .map(|raw| self.normalize_url(&raw)); + + let model = model_sel + .as_ref() + .and_then(|sel| item.select(sel).next()); + let uploader = model + .map(|m| Self::decode_html_text(&m.text().collect::<String>())) + .filter(|name| !name.is_empty()); + let uploader_href = model.and_then(|m| m.value().attr("href").map(str::to_string)); + let uploader_url = uploader_href.as_ref().map(|href| self.normalize_url(href)); + let uploader_slug = uploader_href + .as_deref() + .and_then(Self::slug_from_model_url); + + cards.push(Porn4fansCard { id, title, - page_url: href, + page_url, thumb, duration, - views: (views > 0).then_some(views), - rating, + views, + preview, + uploader, + uploader_url, + uploader_slug, }); } - items + cards } - async fn enrich_video_card( + /// Pull the direct MP4 formats out of the KVS player flashvars. + fn extract_formats_from_page(text: &str, referer: &str) -> Vec<VideoFormat> { + let grab = |key: &str| -> Option<String> { + let re = Regex::new(&format!(r#"{}\s*:\s*'([^']+)'"#, regex::escape(key))).ok()?; + re.captures(text) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str().to_string()) + .filter(|value| !value.is_empty()) + }; + + let mut formats = Vec::new(); + let mut seen = HashSet::new(); + // (flashvars url key, quality-label key, fallback label) ordered best-first. + let sources = [ + ("video_alt_url", "video_alt_url_text", "720p"), + ("video_url", "video_url_text", "480p"), + ]; + for (url_key, label_key, fallback) in sources { + let Some(raw_url) = grab(url_key) else { + continue; + }; + let url = Self::normalize_media_url(&raw_url); + if !seen.insert(url.clone()) { + continue; + } + let quality = grab(label_key).unwrap_or_else(|| fallback.to_string()); + let mut format = VideoFormat::new(url, quality.clone(), "mp4".to_string()) + .http_header("Referer".to_string(), referer.to_string()); + if let Some(height) = quality + .trim_end_matches('p') + .parse::<u32>() + .ok() + .filter(|h| *h > 0) + { + format = format.height(height); + } + formats.push(format); + } + formats + } + + fn extract_tags_from_page(text: &str) -> Vec<String> { + let document = Html::parse_document(text); + // The detail page swaps the class names: `.categories-row` holds tag + // links and `.tags-row` holds category links. Collect both. + let Ok(selector) = Selector::parse(".categories-row a, .tags-row a") else { + return vec![]; + }; + let mut tags = Vec::new(); + let mut seen = HashSet::new(); + for link in document.select(&selector) { + let title = Self::decode_html_text(&link.text().collect::<String>()); + if title.is_empty() { + continue; + } + let key = title.to_ascii_lowercase(); + if seen.insert(key) { + tags.push(title); + } + } + tags + } + + async fn enrich_card( &self, card: Porn4fansCard, - mut requester: crate::util::requester::Requester, + listing_referer: String, + mut requester: Requester, ) -> VideoItem { - let direct_url = requester + let detail = requester .get_with_headers( &card.page_url, - vec![("Referer".to_string(), format!("{}/", self.url))], + Self::html_headers(&listing_referer), None, ) .await .ok(); - let (direct_url, models, categories) = match direct_url { - Some(text) => { - let url = Self::extract_direct_video_url_from_page(&text) - .unwrap_or_else(|| card.page_url.clone()); - let (models, categories) = Self::extract_page_models_and_categories(&text); - (url, models, categories) - } - None => (card.page_url.clone(), vec![], vec![]), + let (formats, tags) = match detail.as_deref() { + Some(text) => ( + Self::extract_formats_from_page(text, &self.url), + Self::extract_tags_from_page(text), + ), + None => (vec![], vec![]), }; let mut item = VideoItem::new( card.id, card.title, - direct_url, - "porn4fans".to_string(), + card.page_url.clone(), + CHANNEL_ID.to_string(), card.thumb, card.duration, ); if let Some(views) = card.views { item = item.views(views); } - if let Some(rating) = card.rating { - item = item.rating(rating); + if let Some(uploader) = card.uploader { + item = item.uploader(uploader); } - if let Some(model) = models.first() { - item = item.uploader(model.clone()); + if let Some(url) = card.uploader_url { + item = item.uploader_url(url); + } + if let Some(slug) = card.uploader_slug { + item.uploaderId = Some(format!("{CHANNEL_ID}:{slug}")); + } + if !tags.is_empty() { + item = item.tags(tags); + } + if let Some(preview) = card.preview { + item = item.preview(preview); + } + if !formats.is_empty() { + item = item.formats(formats); } - item = item.tags(categories); item } - async fn get_video_items_from_html( + async fn enrich_cards( &self, - html: String, - requester: crate::util::requester::Requester, + cards: Vec<Porn4fansCard>, + listing_referer: &str, + requester: Requester, ) -> Vec<VideoItem> { - let cards = self.parse_video_cards_from_html(&html); - let futures = cards - .into_iter() - .map(|card| self.enrich_video_card(card, requester.clone())); + stream::iter(cards.into_iter().map(|card| { + let requester = requester.clone(); + let referer = listing_referer.to_string(); + async move { self.enrich_card(card, referer, requester).await } + })) + .buffered(ENRICH_CONCURRENCY) + .collect::<Vec<_>>() + .await + } - join_all(futures).await + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let categories = Arc::clone(&self.categories); + let category_map = Arc::clone(&self.category_map); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_categories(&url, categories, category_map).await { + report_provider_error_background( + CHANNEL_ID, + "load_categories", + &error.to_string(), + ); + } + }); + }); + } + + async fn load_categories( + url: &str, + categories: Arc<RwLock<Vec<FilterOption>>>, + category_map: Arc<RwLock<HashMap<String, String>>>, + ) -> Result<()> { + let mut requester = Requester::new(); + let category_url = format!("{url}/categories/"); + let html = requester + .get_with_headers(&category_url, Self::html_headers(url), None) + .await + .map_err(|error| Error::from(format!("category fetch failed: {error}")))?; + + let document = Html::parse_document(&html); + let selector = Selector::parse("#list_categories_categories_list_items a.item") + .map_err(|error| Error::from(format!("category selector failed: {error}")))?; + + let mut options = vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }]; + let mut map = HashMap::new(); + + for link in document.select(&selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let Some(slug) = href + .split("/categories/") + .nth(1) + .and_then(|rest| rest.split('/').next()) + .filter(|slug| !slug.is_empty()) + else { + continue; + }; + let title = link + .value() + .attr("title") + .map(Self::decode_html_text) + .filter(|title| !title.is_empty()) + .unwrap_or_else(|| Self::decode_html_text(&link.text().collect::<String>())); + if title.is_empty() { + continue; + } + let normalized = Self::normalize_title(&title); + if normalized.is_empty() || map.contains_key(&normalized) { + continue; + } + options.push(FilterOption { + id: normalized.clone(), + title, + }); + map.insert(normalized, slug.to_string()); + } + + if options.len() > 1 { + if let Ok(mut guard) = categories.write() { + *guard = options; + } + if let Ok(mut guard) = category_map.write() { + *guard = map; + } + } + + Ok(()) } } @@ -492,31 +779,26 @@ impl Provider for Porn4fansProvider { async fn get_videos( &self, cache: VideoCache, - pool: DbPool, + _pool: DbPool, sort: String, query: Option<String>, page: String, - per_page: String, + _per_page: String, options: ServerOptions, ) -> Vec<VideoItem> { - let _ = pool; - let _ = per_page; - let page = page.parse::<u32>().unwrap_or(1); + let page = page.parse::<u32>().unwrap_or(1).max(1); + let target = self.target_from_request(query.as_deref(), options.categories.as_deref()); - let videos = match query { - Some(query) if !query.trim().is_empty() => { - self.query(cache, page, &query, &sort, options).await - } - _ => self.get(cache, page, &sort, options).await, - }; - - match videos { + match self + .get_videos_for_target(cache, &target, &sort, page, options) + .await + { Ok(videos) => videos, Err(e) => { report_provider_error( - "porn4fans", + CHANNEL_ID, "get_videos", - &format!("page={page}; error={e}"), + &format!("page={page}; target={target:?}; error={e}"), ) .await; vec![] @@ -531,120 +813,140 @@ impl Provider for Porn4fansProvider { #[cfg(test)] mod tests { - use super::Porn4fansProvider; + use super::*; #[test] - fn builds_latest_url_with_custom_block_id() { + fn slugify_collapses_separators() { + assert_eq!(Porn4fansProvider::slugify("Big Black Cock"), "big-black-cock"); + assert_eq!(Porn4fansProvider::slugify(" big__ass "), "big-ass"); + } + + #[test] + fn builds_latest_block_url() { let provider = Porn4fansProvider::new(); + let (url, referer) = provider.build_block_url(&Target::Latest, "new", 2); + assert_eq!(referer, "https://www.porn4fans.com/onlyfans-videos/"); assert_eq!( - provider.build_latest_url(2, "new"), - "https://www.porn4fans.com/latest-updates/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by=post_date&from=2" + url, + "https://www.porn4fans.com/onlyfans-videos/?mode=async&function=get_block&block_id=custom_list_videos_latest_videos_list&sort_by=post_date&from=2" ); } #[test] - fn builds_search_url_with_custom_block_id() { + fn builds_search_block_url() { let provider = Porn4fansProvider::new(); + let (url, referer) = provider.build_block_url( + &Target::Search { + query: "big ass".to_string(), + }, + "popular", + 3, + ); + assert_eq!(referer, "https://www.porn4fans.com/search/big-ass/"); assert_eq!( - provider.build_search_url("big black cock", 3, "popular"), - "https://www.porn4fans.com/search/big-black-cock/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q=big+black+cock&sort_by=video_viewed&from_videos=3" + url, + "https://www.porn4fans.com/search/big-ass/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search_result&q=big+ass&category_ids=&sort_by=video_viewed&from_videos=3" ); } #[test] - fn parses_porn4fans_search_markup() { + fn category_block_url_uses_common_list() { let provider = Porn4fansProvider::new(); - let html = r##" - <div class="thumbs second grid-1" id="custom_list_videos_videos_list_search_result_items"> - <div class="item"> - <a class="item-link" href="https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" title="Horny Police Officer Melztube Gets Banged By BBC"> - <div class="img-wrap"> - <div class="duration">23:47</div> - <picture> - <source srcset="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" type="image/webp"> - <img class="thumb lazy-load" src="data:image/gif;base64,AAAA" data-original="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-webp="https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" data-preview="https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10194/10194_preview_high.mp4/" alt="Horny Police Officer Melztube Gets Banged By BBC" /> - </picture> - </div> - <div class="video-text">Horny Police Officer Melztube Gets Banged By BBC</div> - <ul class="video-items"> - <li class="video-item"> - <svg class="svg-icon icon-eye"><use xlink:href="#icon-eye"></use></svg> - <span>14K</span> - </li> - <li class="video-item rating"> - <svg class="svg-icon icon-like"><use xlink:href="#icon-like"></use></svg> - <span>66%</span> - </li> - <li class="video-item"> - <span>2 weeks ago</span> - </li> - </ul> - </a> - </div> - </div> - "##; - - let items = provider.parse_video_cards_from_html(html); - assert_eq!(items.len(), 1); - assert_eq!(items[0].id, "10194"); - assert_eq!( - items[0].page_url, - "https://www.porn4fans.com/video/10194/horny-police-officer-melztube-gets-banged-by-bbc/" + let (url, _) = provider.build_block_url( + &Target::Category { + slug: "asian".to_string(), + }, + "new", + 1, ); assert_eq!( - items[0].thumb, - "https://www.porn4fans.com/contents/videos_screenshots/10000/10194/800x450/1.jpg" + url, + "https://www.porn4fans.com/categories/asian/?mode=async&function=get_block&block_id=custom_list_videos_common_videos_list&sort_by=post_date&from=1" ); - assert_eq!(items[0].duration, 1427); - assert_eq!(items[0].views, Some(14_000)); - assert_eq!(items[0].rating, Some(66.0)); } #[test] - fn extracts_direct_video_url_from_video_page() { + fn normalizes_kvs_media_url() { + assert_eq!( + Porn4fansProvider::normalize_media_url( + "https://www.porn4fans.com/get_file/4/abc/10000/10002/10002.mp4/?v-acctoken=zzz" + ), + "https://www.porn4fans.com/get_file/4/abc/10000/10002/10002.mp4?v-acctoken=zzz" + ); + assert_eq!( + Porn4fansProvider::normalize_media_url( + "https://www.porn4fans.com/get_file/4/abc/10000/10002/10002_720p.mp4/" + ), + "https://www.porn4fans.com/get_file/4/abc/10000/10002/10002_720p.mp4" + ); + } + + #[test] + fn extracts_formats_from_flashvars() { let html = r#" <script> var flashvars = { - video_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951.mp4\/', - video_alt_url: 'https:\/\/www.porn4fans.com\/get_file\/3\/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f\/10000\/10951\/10951_720p.mp4\/' + video_url: 'https://www.porn4fans.com/get_file/4/h1/10000/10002/10002.mp4/?v-acctoken=a', + video_url_text: '480p', + video_alt_url: 'https://www.porn4fans.com/get_file/4/h2/10000/10002/10002_720p.mp4/?v-acctoken=b', + video_alt_url_text: '720p', + license_code: '$546156418018655' }; </script> "#; - - assert_eq!( - Porn4fansProvider::extract_direct_video_url_from_page(html).as_deref(), - Some( - "https://www.porn4fans.com/get_file/3/9df8de1fc2da5dfcbf9a4ad512dc8f306c4997e60f/10000/10951/10951.mp4" - ) - ); + let formats = Porn4fansProvider::extract_formats_from_page(html, "https://www.porn4fans.com/"); + assert_eq!(formats.len(), 2); + // best-first: 720p before 480p + assert!(formats[0].url.ends_with("10002_720p.mp4?v-acctoken=b")); + assert!(formats[1].url.ends_with("10002.mp4?v-acctoken=a")); } #[test] - fn extracts_models_and_categories_from_video_page() { - let html = r#" - <div class="player-models-list"> - <div class="player-model-item"> - <a href="/models/piper-rockelle/"><span class="player-model-name">Piper Rockelle</span></a> + fn parses_async_fragment_card() { + let provider = Porn4fansProvider::new(); + let html = r##" + <div id="custom_list_videos_latest_videos_list_items"> + <div class="item "> + <div class="item-link"> + <a class="img-wrap video" href="https://www.porn4fans.com/video/10002/blasianflexcouple-gets-banged-against-the-wall/" title="Blasianflexcouple Gets Banged Against The Wall"> + <ul class="video-items for-grid"> + <li class="video-item views"><i></i><span>103</span></li> + <li class="video-item rating"><i></i><span>0</span></li> + </ul> + <div class="duration">24:37</div> + <picture> + <source srcset="https://www.porn4fans.com/contents/videos_screenshots/10000/10002/800x450/1.jpg" type="image/webp"> + <img class="thumb " src="https://www.porn4fans.com/contents/videos_screenshots/10000/10002/800x450/1.jpg" data-webp="https://www.porn4fans.com/contents/videos_screenshots/10000/10002/800x450/1.jpg" alt="Blasianflexcouple Gets Banged Against The Wall" /> + </picture> + <div class="preview-video" data-src="https://www.porn4fans.com/get_file/4/x/10000/10002/10002_preview_new.mp4/"></div> + </a> + <div class="video-item-bottom"> + <a class="video-text" href="https://www.porn4fans.com/video/10002/blasianflexcouple-gets-banged-against-the-wall/">Blasianflexcouple Gets Banged Against The Wall</a> + <div class="video-items"> + <li class="video-item model single"><a href="https://www.porn4fans.com/models/nina-lee/">Nina Lee</a></li> + </div> + </div> </div> + </div> </div> - <ul class="categories-row"> - <li class="visible"><a href="/categories/striptease/">Striptease</a></li> - <li class="visible"><a href="/categories/teen/">Teen</a></li> - </ul> - <ul class="tags-row"> - <li class="visible"><a href="/tags/bathroom/">Bathroom</a></li> - </ul> - "#; + "##; - let (models, categories) = Porn4fansProvider::extract_page_models_and_categories(html); - assert_eq!(models, vec!["Piper Rockelle".to_string()]); + let cards = provider.parse_video_cards(html); + assert_eq!(cards.len(), 1); + let card = &cards[0]; + assert_eq!(card.id, "10002"); assert_eq!( - categories, - vec![ - "Striptease".to_string(), - "Teen".to_string(), - "Bathroom".to_string() - ] + card.page_url, + "https://www.porn4fans.com/video/10002/blasianflexcouple-gets-banged-against-the-wall/" ); + assert_eq!( + card.thumb, + "https://www.porn4fans.com/contents/videos_screenshots/10000/10002/800x450/1.jpg" + ); + assert_eq!(card.duration, 1477); + assert_eq!(card.views, Some(103)); + assert_eq!(card.uploader.as_deref(), Some("Nina Lee")); + assert_eq!(card.uploader_slug.as_deref(), Some("nina-lee")); + assert!(card.preview.is_some()); } }