From 95d2defa13f56e2bcb98e632aa0bafc1ad90f8f7 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 22 May 2026 10:26:05 +0000 Subject: [PATCH] camsoda and pornhub shorties --- build.rs | 10 + check.py | 39 +- docs/provider-catalog.md | 2 + src/providers/camsoda.rs | 403 +++++++++++++++++++++ src/providers/pornhub_shorties.rs | 566 ++++++++++++++++++++++++++++++ 5 files changed, 1014 insertions(+), 6 deletions(-) create mode 100644 src/providers/camsoda.rs create mode 100644 src/providers/pornhub_shorties.rs diff --git a/build.rs b/build.rs index e617878..4a34229 100644 --- a/build.rs +++ b/build.rs @@ -36,6 +36,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "pornhub", ty: "PornhubProvider", }, + ProviderDef { + id: "pornhub-shorties", + module: "pornhub_shorties", + ty: "PornhubShortiesProvider", + }, ProviderDef { id: "youporn", module: "youporn", @@ -341,6 +346,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "xvideos", ty: "XvideosProvider", }, + ProviderDef { + id: "camsoda", + module: "camsoda", + ty: "CamsodaProvider", + }, ]; fn main() { diff --git a/check.py b/check.py index 73f2ab1..b9a365b 100644 --- a/check.py +++ b/check.py @@ -64,6 +64,23 @@ class Results: _BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0" +# Domains known to be Cloudflare-protected and return 403/connection-refused to direct +# HTTP checks. URL reachability failures for these hosts are downgraded to warnings. +_CF_PROTECTED_HOSTS = { + "www.camsoda.com", + "camsoda.com", +} + + +def _is_cf_protected(url: str) -> bool: + """Return True if the URL's host is known to be CF-protected.""" + try: + from urllib.parse import urlparse + host = urlparse(url).hostname or "" + return host in _CF_PROTECTED_HOSTS + except Exception: + return False + def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]: """Return (ok, http_status). Tries HEAD then ranged GET on 405.""" @@ -234,7 +251,10 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool) else: ok, code = http_ok(vurl) if not ok: - results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}") + if _is_cf_protected(vurl): + results.warn(channel_id, f"{label}: url unreachable HTTP={code} (CF-protected host, expected): {vurl}") + else: + results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}") else: results.info(channel_id, f"{label}: url OK (HTTP {code})") @@ -275,11 +295,18 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool) results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}") yt, stderr = ytdlp_extract(ytdlp_url) if yt is None: - results.err( - channel_id, - f"{label}: yt-dlp failed for {ytdlp_url}" - + (f": {stderr[:300]}" if stderr else ""), - ) + if _is_cf_protected(ytdlp_url): + results.warn( + channel_id, + f"{label}: yt-dlp failed for {ytdlp_url} (CF-protected host, expected)" + + (f": {stderr[:200]}" if stderr else ""), + ) + else: + results.err( + channel_id, + f"{label}: yt-dlp failed for {ytdlp_url}" + + (f": {stderr[:300]}" if stderr else ""), + ) else: yt_title = (yt.get("title") or "").strip() api_title = (video.get("title") or "").strip() diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 8787081..acee089 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -40,6 +40,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `pornhat` | `mainstream-tube` | no | no | Basic tube provider. | | `pornhd3x` | `studio-network` | no | yes | Best template for complex catalogs and redirect proxy generation. | | `pornhub` | `mainstream-tube` | no | no | Rich metadata and format examples. | +| `pornhub-shorties` | `tiktok` | no | no | Pornhub Shorties vertical short-form clips; parses `JSON_SHORTIES` JS variable embedded in HTML; fields: vkey, title, linkUrl, imageUrl, likeNumber, dislikeNumber, name/profileUrl (uploader), pillsData (tags), trackingTimeWatched.video_duration; pagination via `?page=N`; search via `?search=query`; sort via `?sort=trending\|mostviewed\|top_rated\|hottest`; phncdn thumbnails require `Referer: https://www.pornhub.com/` (served via cdnReferrers in /api/status); yt-dlp resolves `video.url` natively (PornHub extractor); no proxy needed. | | `pornmz` | `mainstream-tube` | no | no | Mainstream archive. | | `pornzog` | `mainstream-tube` | no | no | Basic list/detail scraper. | | `porntrex` | `mainstream-tube` | no | no | KVS-style HTML archive with direct MP4 formats and tag-aware search shortcuts. | @@ -70,6 +71,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. | | `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. | | `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. | +| `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). | | `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. | ## Proxy Routes diff --git a/src/providers/camsoda.rs b/src/providers/camsoda.rs new file mode 100644 index 0000000..1dcf6c0 --- /dev/null +++ b/src/providers/camsoda.rs @@ -0,0 +1,403 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use scraper::{Html, Selector}; +use std::collections::HashSet; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "live-cams", + tags: &["cams", "amateur", "recordings", "clips"], + }; + +const BASE_URL: &str = "https://www.camsoda.com"; +const CHANNEL_ID: &str = "camsoda"; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct CamsodaProvider { + url: String, +} + +#[derive(Debug, Clone)] +enum Target { + /// Default listing at /media?page=N + Listing, + /// Model media page at /{username}/media + Model { username: String }, +} + +impl CamsodaProvider { + pub fn new() -> Self { + Self { + url: BASE_URL.to_string(), + } + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "CamSoda".to_string(), + description: + "CamSoda model video clips — recorded amateur cam shows uploaded by performers." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![], + nsfw: true, + cacheDuration: Some(1800), + } + } + + /// Resolve the fetch target from query and options. + fn pick_target(query: Option<&str>) -> Target { + let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else { + return Target::Listing; + }; + + // Support "uploader:username" or "model:username" shortcuts. + for prefix in &["uploader:", "model:", "user:"] { + if let Some(username) = query.strip_prefix(prefix) { + let username = username.trim().to_lowercase(); + if !username.is_empty() { + return Target::Model { username }; + } + } + } + + // For other queries fall back to the default listing; + // the server will apply client-side substring filtering. + Target::Listing + } + + fn build_listing_url(&self, target: &Target, page: u16) -> String { + let page = page.max(1); + match target { + Target::Listing => format!("{}/media?page={}", self.url, page), + Target::Model { username } => { + if page <= 1 { + format!("{}/{}/media", self.url, username) + } else { + format!("{}/{}/media?page={}", self.url, username, page) + } + } + } + } + + /// Parse video cards from the HTML of a CamSoda media page. + /// + /// The page contains anchor elements linking to individual video pages: + /// href="/{username}/media/{slug}/{id}" + /// + /// Inside each anchor: + /// - `[class*="media-item-module__title"]` span: the video title + /// - `[class*="media-item-module__subtitle"]` span: "by UPLOADER (MM:SS)" + /// - `img[src*="media-secure.camsoda.com"]`: video-specific thumbnail + fn parse_html_items(html: &str) -> Vec { + let document = Html::parse_document(html); + + // Select all anchors linking to /{username}/media/{slug}/{id} + let anchor_sel = match Selector::parse(r#"a[href]"#) { + Ok(s) => s, + Err(_) => return vec![], + }; + let title_sel = match Selector::parse(r#"[class*="media-item-module__title"]"#) { + Ok(s) => s, + Err(_) => return vec![], + }; + let subtitle_sel = match Selector::parse(r#"[class*="media-item-module__subtitle"]"#) { + Ok(s) => s, + Err(_) => return vec![], + }; + let img_sel = match Selector::parse(r#"img[src]"#) { + Ok(s) => s, + Err(_) => return vec![], + }; + + // Regex for parsing subtitle "by UPLOADER (MM:SS)" + let sub_re = match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") { + Ok(r) => r, + Err(_) => return vec![], + }; + + // Regex for media URL: /{username}/media/{slug}/{id} + let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") { + Ok(r) => r, + Err(_) => return vec![], + }; + + let mut items: Vec = Vec::new(); + let mut seen_ids: HashSet = HashSet::new(); + + for anchor in document.select(&anchor_sel) { + let href = match anchor.value().attr("href") { + Some(h) => h, + None => continue, + }; + + let caps = match href_re.captures(href) { + Some(c) => c, + None => continue, + }; + + let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); + let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(); + let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string(); + + if video_id.is_empty() || username.is_empty() { + continue; + } + + if !seen_ids.insert(video_id.clone()) { + continue; + } + + // Title + let title = anchor + .select(&title_sel) + .next() + .map(|el| el.text().collect::().trim().to_string()) + .unwrap_or_default(); + let title = if title.is_empty() { + format!("CamSoda video {video_id}") + } else { + title + }; + + // Subtitle: "by UPLOADER (MM:SS)" + let subtitle = anchor + .select(&subtitle_sel) + .next() + .map(|el| el.text().collect::().trim().to_string()) + .unwrap_or_default(); + + let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) { + let u = sc.get(1).map(|m| m.as_str().trim().to_string()).unwrap_or_default(); + let d = sc.get(2) + .and_then(|m| parse_time_to_seconds(m.as_str())) + .and_then(|s| u32::try_from(s).ok()) + .unwrap_or(0); + (if u.is_empty() { None } else { Some(u) }, d) + } else { + (None, 0) + }; + + // Thumbnail — prefer video-specific from media-secure.camsoda.com + let thumb = anchor + .select(&img_sel) + .filter_map(|img| img.value().attr("src")) + .find(|src| src.contains("media-secure.camsoda.com")) + .or_else(|| { + anchor + .select(&img_sel) + .filter_map(|img| img.value().attr("src")) + .find(|src| src.contains("livemediahost.com")) + }) + .unwrap_or("") + .to_string(); + + let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}"); + + let mut item = VideoItem::new( + video_id, + title, + page_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + item.uploader = uploader; + item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media")); + item.uploaderId = Some(format!("{CHANNEL_ID}:{username}")); + + items.push(item); + } + + items + } + + async fn fetch_items( + &self, + target: &Target, + page: u16, + options: &ServerOptions, + ) -> Result> { + let url = self.build_listing_url(target, page); + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items"); + let text = requester + .get(&url, None) + .await + .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?; + + // Guard against CF challenge pages slipping through + if text.contains("cf-browser-verification") + || text.contains("cf-chl") + || text.contains("Just a moment") + { + return Err(Error::from("cloudflare challenge page returned".to_string())); + } + + let items = Self::parse_html_items(&text); + Ok(items) + } +} + +#[async_trait] +impl Provider for CamsodaProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = cache; + let _ = pool; + let _ = sort; + let _ = per_page; + + let page = page.parse::().unwrap_or(1).max(1); + let normalized_query = query + .as_deref() + .map(str::trim) + .filter(|v| !v.is_empty()) + .map(ToOwned::to_owned); + + let target = Self::pick_target(normalized_query.as_deref()); + + match self.fetch_items(&target, page, &options).await { + Ok(items) => items, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_html() -> String { + // Simplified version of the HTML returned by Jina (X-Return-Format: html) + r#" + + Torso ride begging for your cumby jazzyj (24:35) +
thumb
+
+ + newFIRST IR BG SHOW FT JOHNNY LOVEby Coco Dethick (44:14) +
thumb
+
+ + Ultimate squirting video!by Hot Wife Mia (02:47) +
thumb
+
+"#.to_string() + } + + #[test] + fn parses_video_cards_from_html() { + let items = CamsodaProvider::parse_html_items(&sample_html()); + assert_eq!(items.len(), 3, "expected 3 items, got {}: {:?}", items.len(), items.iter().map(|i| &i.id).collect::>()); + + let item = &items[0]; + assert_eq!(item.id, "16984249"); + assert_eq!(item.title, "Torso ride begging for your cum"); + assert_eq!(item.uploader.as_deref(), Some("jazzyj")); + assert_eq!(item.duration, 24 * 60 + 35); + assert!(item.url.contains("16984249"), "url should contain id: {}", item.url); + assert!(item.thumb.contains("media-secure.camsoda.com"), "thumb: {}", item.thumb); + assert_eq!(item.uploaderUrl.as_deref(), Some("https://www.camsoda.com/lil-asian-jaz/media")); + assert_eq!(item.uploaderId.as_deref(), Some("camsoda:lil-asian-jaz")); + + let item2 = &items[1]; + assert_eq!(item2.id, "17009049"); + assert_eq!(item2.uploader.as_deref(), Some("Coco Dethick")); + + let item3 = &items[2]; + assert_eq!(item3.id, "17112135"); + assert_eq!(item3.duration, 2 * 60 + 47); + } + + #[test] + fn deduplicates_items() { + // Same video appears twice + let html = sample_html(); + let doubled = format!("{html}\n{html}"); + let items = CamsodaProvider::parse_html_items(&doubled); + assert_eq!(items.len(), 3, "should deduplicate to 3 unique items"); + } + + #[test] + fn picks_target_correctly() { + assert!(matches!(CamsodaProvider::pick_target(None), Target::Listing)); + assert!(matches!(CamsodaProvider::pick_target(Some("")), Target::Listing)); + assert!(matches!(CamsodaProvider::pick_target(Some("blowjob")), Target::Listing)); + + match CamsodaProvider::pick_target(Some("uploader:lil-asian-jaz")) { + Target::Model { username } => assert_eq!(username, "lil-asian-jaz"), + _ => panic!("expected Model target"), + } + match CamsodaProvider::pick_target(Some("model:katt-leya")) { + Target::Model { username } => assert_eq!(username, "katt-leya"), + _ => panic!("expected Model target"), + } + } + + #[test] + fn builds_listing_urls_correctly() { + let provider = CamsodaProvider::new(); + assert_eq!( + provider.build_listing_url(&Target::Listing, 1), + "https://www.camsoda.com/media?page=1" + ); + assert_eq!( + provider.build_listing_url(&Target::Listing, 3), + "https://www.camsoda.com/media?page=3" + ); + assert_eq!( + provider.build_listing_url( + &Target::Model { username: "lil-asian-jaz".to_string() }, + 1 + ), + "https://www.camsoda.com/lil-asian-jaz/media" + ); + assert_eq!( + provider.build_listing_url( + &Target::Model { username: "lil-asian-jaz".to_string() }, + 2 + ), + "https://www.camsoda.com/lil-asian-jaz/media?page=2" + ); + } +} diff --git a/src/providers/pornhub_shorties.rs b/src/providers/pornhub_shorties.rs new file mode 100644 index 0000000..5cf228a --- /dev/null +++ b/src/providers/pornhub_shorties.rs @@ -0,0 +1,566 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::videos::{ServerOptions, VideoItem}; + +use async_trait::async_trait; +use error_chain::error_chain; +use serde::Deserialize; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "tiktok", + tags: &["shorts", "pornhub", "vertical"], + }; + +const BASE_URL: &str = "https://www.pornhub.com"; +const CHANNEL_ID: &str = "pornhub-shorties"; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + Json(serde_json::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct PornhubShortiesProvider { + url: String, +} + +#[derive(Debug, Clone)] +enum Target { + Home { sort: String }, + Search { query: String, sort: String }, +} + +/// Deserialized structure of each entry in the `JSON_SHORTIES` JS variable. +#[derive(Debug, Deserialize, Clone)] +struct ShortieItem { + #[serde(rename = "vkey")] + vkey: String, + #[serde(rename = "videoTitle")] + video_title: String, + #[serde(rename = "linkUrl")] + link_url: String, + #[serde(rename = "imageUrl")] + image_url: Option, + #[serde(rename = "likeNumber", default)] + like_number: u64, + #[serde(rename = "dislikeNumber", default)] + dislike_number: u64, + #[serde(rename = "name")] + name: Option, + #[serde(rename = "profileUrl")] + profile_url: Option, + #[serde(rename = "entityId")] + entity_id: Option, + #[serde(rename = "entityType")] + entity_type: Option, + #[serde(rename = "trackingTimeWatched")] + tracking_time_watched: Option, + #[serde(rename = "pillsData", default)] + pills_data: Vec, + #[serde(rename = "badges")] + badges: Option, +} + +#[derive(Debug, Deserialize, Clone)] +struct TrackingTimeWatched { + #[serde(rename = "video_duration", default)] + video_duration: u32, +} + +#[derive(Debug, Deserialize, Clone)] +struct PillData { + #[serde(rename = "name")] + name: String, +} + +#[derive(Debug, Deserialize, Clone)] +struct Badges { + #[serde(rename = "verified", default)] + verified: bool, +} + +impl PornhubShortiesProvider { + pub fn new() -> Self { + Self { + url: BASE_URL.to_string(), + } + } + + fn build_target(sort: &str, query: Option<&str>) -> Target { + let sort = Self::normalize_sort(sort).to_string(); + match query.map(str::trim).filter(|q| !q.is_empty()) { + Some(q) => Target::Search { + query: q.to_string(), + sort, + }, + None => Target::Home { sort }, + } + } + + fn normalize_sort(sort: &str) -> &'static str { + match sort.trim().to_ascii_lowercase().as_str() { + "trending" => "trending", + "popular" | "mv" | "mostviewed" => "mostviewed", + "top_rated" | "tr" | "toprated" => "top_rated", + "hottest" | "ht" => "hottest", + _ => "new", + } + } + + fn build_url(base: &str, target: &Target, page: u8) -> String { + match target { + Target::Home { sort } => { + if sort == "new" { + if page <= 1 { + format!("{base}/shorties") + } else { + format!("{base}/shorties?page={page}") + } + } else { + if page <= 1 { + format!("{base}/shorties?sort={sort}") + } else { + format!("{base}/shorties?sort={sort}&page={page}") + } + } + } + Target::Search { query, sort } => { + let encoded = query.replace(' ', "+"); + if sort == "new" { + if page <= 1 { + format!("{base}/shorties?search={encoded}") + } else { + format!("{base}/shorties?search={encoded}&page={page}") + } + } else { + if page <= 1 { + format!("{base}/shorties?search={encoded}&sort={sort}") + } else { + format!("{base}/shorties?search={encoded}&sort={sort}&page={page}") + } + } + } + } + } + + fn extract_json_shorties(html: &str) -> Result> { + // The page embeds: JSON_SHORTIES = insertAfterNthPosition([{...}, ...], AD_POSITION, ...) + // We locate the array by finding the '[' after JSON_SHORTIES and matching brackets. + let marker = "JSON_SHORTIES"; + let start = html.find(marker).ok_or_else(|| { + Error::from(ErrorKind::Parse( + "JSON_SHORTIES marker not found in HTML".to_string(), + )) + })?; + + let arr_start = html[start..].find('[').ok_or_else(|| { + Error::from(ErrorKind::Parse( + "JSON_SHORTIES array open bracket not found".to_string(), + )) + })? + start; + + // Walk the HTML to find the matching closing bracket. + let bytes = html.as_bytes(); + let mut depth: i32 = 0; + let mut in_string = false; + let mut escape_next = false; + let mut arr_end = arr_start; + + for (offset, &b) in bytes[arr_start..].iter().enumerate() { + if escape_next { + escape_next = false; + continue; + } + if b == b'\\' && in_string { + escape_next = true; + continue; + } + if b == b'"' { + in_string = !in_string; + continue; + } + if in_string { + continue; + } + match b { + b'[' => depth += 1, + b']' => { + depth -= 1; + if depth == 0 { + arr_end = arr_start + offset; + break; + } + } + _ => {} + } + } + + if depth != 0 { + return Err( + ErrorKind::Parse("JSON_SHORTIES array bracket mismatch".to_string()).into(), + ); + } + + let raw = &html[arr_start..=arr_end]; + let items: Vec = serde_json::from_str(raw).map_err(|e| { + Error::from(ErrorKind::Parse(format!("JSON_SHORTIES parse error: {e}"))) + })?; + + Ok(items) + } + + fn item_to_video(item: ShortieItem) -> Option { + let id = item.vkey; + if id.is_empty() { + return None; + } + let title = item.video_title.trim().to_string(); + if title.is_empty() { + return None; + } + let url = item.link_url.trim().to_string(); + if url.is_empty() || !url.contains("/view_video.php") { + return None; + } + + let thumb = item + .image_url + .unwrap_or_default() + .replace("\\/", "/") + .trim() + .to_string(); + + let duration = item + .tracking_time_watched + .map(|t| t.video_duration) + .unwrap_or(0); + + let mut video = VideoItem::new( + id, + title, + url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + + // Rating from likes / (likes + dislikes) + let total = item.like_number + item.dislike_number; + if total > 0 { + video.rating = Some((item.like_number as f32 / total as f32) * 100.0); + } + + // Uploader + if let Some(name) = item.name.filter(|n| !n.is_empty()) { + video.uploader = Some(name.clone()); + if let Some(profile_url) = item + .profile_url + .map(|u| u.replace("\\/", "/")) + .filter(|u| !u.is_empty()) + { + video.uploaderUrl = Some(profile_url.clone()); + // Build namespaced uploader ID from entity_type + entity_id + if let (Some(et), Some(eid)) = (item.entity_type.as_deref(), item.entity_id) { + let kind = match et { + "Mpp" | "Model" => "model", + "Channel" => "channels", + "Pornstar" => "pornstar", + _ => "model", + }; + video.uploaderId = Some(format!("{CHANNEL_ID}:{kind}:{eid}")); + } + } + } + + // Tags from pillsData + let tags: Vec = item.pills_data.into_iter().map(|p| p.name).collect(); + if !tags.is_empty() { + video.tags = Some(tags); + } + + // Verified badge + video.verified = item.badges.map(|b| b.verified).filter(|&v| v); + + Some(video) + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "PH Shorties".to_string(), + description: "Pornhub Shorties — vertical short-form porn clips.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=pornhub.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse Pornhub Shorties by sort order.".to_string(), + systemImage: "list.number".to_string(), + colorName: "orange".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "New".to_string(), + }, + FilterOption { + id: "trending".to_string(), + title: "Trending".to_string(), + }, + FilterOption { + id: "mostviewed".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "top_rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "hottest".to_string(), + title: "Hottest".to_string(), + }, + ], + multiSelect: false, + }], + nsfw: true, + cacheDuration: Some(1800), + } + } + + async fn fetch_videos( + &self, + cache: VideoCache, + page: u8, + sort: &str, + query: Option<&str>, + options: ServerOptions, + ) -> Result> { + let target = Self::build_target(sort, query); + let fetch_url = Self::build_url(&self.url, &target, page); + + // Cache hit + let old_items = match cache.get(&fetch_url) { + Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => { + return Ok(items.clone()); + } + Some((_, items)) => items.clone(), + None => vec![], + }; + + let mut requester = requester_or_default(&options, CHANNEL_ID, "fetch_videos"); + + let text = match requester + .get_with_headers( + &fetch_url, + vec![ + ("Referer".to_string(), format!("{}/shorties", self.url)), + ( + "Accept".to_string(), + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + .to_string(), + ), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ], + None, + ) + .await + { + Ok(text) => text, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "fetch_videos.request", + &format!("url={fetch_url}; error={error}"), + ) + .await; + return Ok(old_items); + } + }; + + let shorties = match Self::extract_json_shorties(&text) { + Ok(items) => items, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "fetch_videos.parse", + &format!("url={fetch_url}; error={error}"), + ) + .await; + return Ok(old_items); + } + }; + + let items: Vec = shorties + .into_iter() + .filter_map(Self::item_to_video) + .collect(); + + if items.is_empty() { + return Ok(old_items); + } + + cache.remove(&fetch_url); + cache.insert(fetch_url, items.clone()); + Ok(items) + } +} + +#[async_trait] +impl Provider for PornhubShortiesProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let _ = per_page; + + let page = page.parse::().unwrap_or(1); + + match self + .fetch_videos(cache, page, &sort, query.as_deref(), options) + .await + { + Ok(items) => items, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_json_shorties_from_html() { + let html = r#" + var JSON_SHORTIES = insertAfterNthPosition([ + { + "videoId": 123456, + "vkey": "abcdef1234567", + "videoTitle": "Test Short Video", + "favoriteInfo": "1K", + "likeInfo": "2K", + "likeNumber": 2000, + "dislikeNumber": 100, + "isHD": true, + "linkUrl": "https:\/\/www.pornhub.com\/view_video.php?viewkey=abcdef1234567", + "shortieUrl": "https:\/\/www.pornhub.com\/shorties\/abcdef1234567", + "embedUrl": "", + "imageUrl": "https:\/\/example.com\/thumb.jpg", + "mediaPriority": "hls", + "mediaDefinitions": [], + "isFavorite": false, + "isReported": false, + "isSubscribed": false, + "userVoteDetail": -1, + "trackingTimeWatched": {"video_duration": 45}, + "pillsData": [{"name": "Amateur", "slug": "amateur", "type": "category"}], + "badges": {"verified": true, "premium": false, "award": false}, + "name": "TestUser", + "profileUrl": "https:\/\/www.pornhub.com\/model\/testuser", + "entityType": "Mpp", + "entityId": 9876543 + } + ], AD_POSITION, {}); + "#; + + let items = PornhubShortiesProvider::extract_json_shorties(html) + .expect("should parse JSON_SHORTIES"); + assert_eq!(items.len(), 1); + assert_eq!(items[0].vkey, "abcdef1234567"); + assert_eq!(items[0].video_title, "Test Short Video"); + assert_eq!( + items[0].link_url, + "https://www.pornhub.com/view_video.php?viewkey=abcdef1234567" + ); + assert_eq!(items[0].like_number, 2000); + assert_eq!(items[0].dislike_number, 100); + assert_eq!( + items[0] + .tracking_time_watched + .as_ref() + .map(|t| t.video_duration), + Some(45) + ); + assert_eq!(items[0].pills_data[0].name, "Amateur"); + assert_eq!(items[0].name.as_deref(), Some("TestUser")); + + let video = PornhubShortiesProvider::item_to_video(items[0].clone()) + .expect("should convert to VideoItem"); + assert_eq!(video.id, "abcdef1234567"); + assert_eq!(video.duration, 45); + assert!(video.rating.is_some()); + assert!((video.rating.unwrap() - 95.23).abs() < 0.1); + assert_eq!(video.uploader.as_deref(), Some("TestUser")); + assert!(video.tags.as_ref().is_some_and(|t| t.contains(&"Amateur".to_string()))); + } + + #[test] + fn build_url_home_new() { + let url = PornhubShortiesProvider::build_url( + BASE_URL, + &Target::Home { + sort: "new".to_string(), + }, + 1, + ); + assert_eq!(url, "https://www.pornhub.com/shorties"); + + let url2 = PornhubShortiesProvider::build_url( + BASE_URL, + &Target::Home { + sort: "new".to_string(), + }, + 2, + ); + assert_eq!(url2, "https://www.pornhub.com/shorties?page=2"); + } + + #[test] + fn build_url_search() { + let url = PornhubShortiesProvider::build_url( + BASE_URL, + &Target::Search { + query: "teen".to_string(), + sort: "new".to_string(), + }, + 1, + ); + assert_eq!(url, "https://www.pornhub.com/shorties?search=teen"); + } + + #[test] + fn normalize_sort_variants() { + assert_eq!(PornhubShortiesProvider::normalize_sort("new"), "new"); + assert_eq!(PornhubShortiesProvider::normalize_sort("trending"), "trending"); + assert_eq!(PornhubShortiesProvider::normalize_sort("popular"), "mostviewed"); + assert_eq!(PornhubShortiesProvider::normalize_sort("unknown"), "new"); + } +}