diff --git a/Cargo.toml b/Cargo.toml index 9577d5e..06254d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,12 @@ lru = "0.18.0" rand = "0.10.0" chrono = "0.4.44" md5 = "0.8.0" +pbkdf2 = { version = "0.12", features = ["hmac"] } +hmac = "0.12" +sha2 = "0.10" +aes = "0.8" +cbc = { version = "0.1", features = ["alloc"] } +hex = "0.4" chromiumoxide = { version = "0.7", features = ["tokio-runtime"] } playwright = "0.0.20" diff --git a/build.rs b/build.rs index 1715e93..6e201c1 100644 --- a/build.rs +++ b/build.rs @@ -316,6 +316,16 @@ const PROVIDERS: &[ProviderDef] = &[ module: "jable", ty: "JableProvider", }, + ProviderDef { + id: "fullporner", + module: "fullporner", + ty: "FullpornerProvider", + }, + ProviderDef { + id: "thepornbunny", + module: "thepornbunny", + ty: "ThepornbunnyProvider", + }, ]; fn main() { diff --git a/check.py b/check.py index 76168ce..73f2ab1 100644 --- a/check.py +++ b/check.py @@ -62,9 +62,12 @@ class Results: log.info("[%s] %s", channel, msg) +_BROWSER_UA = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0" + + def http_ok(url: str, headers: dict | None = None) -> tuple[bool, int]: """Return (ok, http_status). Tries HEAD then ranged GET on 405.""" - h = headers or {} + h = {"User-Agent": _BROWSER_UA, **(headers or {})} try: r = requests.head(url, headers=h, timeout=HTTP_TIMEOUT, allow_redirects=True) if r.status_code in (200, 206): @@ -223,14 +226,15 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool) thumb = video.get("thumb", "") formats: list[dict] = video.get("formats") or [] - # video.url + # video.url must not point to the hottub server itself if not vurl: results.err(channel_id, f"{label}: missing url") + elif "127.0.0.1" in vurl or "localhost" in vurl: + results.err(channel_id, f"{label}: url points to hottub server: {vurl}") else: - resolved = follow_proxy_redirect(vurl) - ok, code = http_ok(resolved) + ok, code = http_ok(vurl) if not ok: - results.err(channel_id, f"{label}: url unreachable HTTP={code}: {resolved}") + results.err(channel_id, f"{label}: url unreachable HTTP={code}: {vurl}") else: results.info(channel_id, f"{label}: url OK (HTTP {code})") @@ -265,9 +269,9 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool) if not run_ytdlp: return - # yt-dlp info extraction on video.url (page URLs only) - ytdlp_url = follow_proxy_redirect(vurl) if vurl else "" - if ytdlp_url and is_page_url(ytdlp_url): + # yt-dlp info extraction on video.url (page URLs only, skipped when formats are provided) + ytdlp_url = vurl if vurl and "127.0.0.1" not in vurl and "localhost" not in vurl else "" + if not formats and ytdlp_url and is_page_url(ytdlp_url): results.info(channel_id, f"{label}: yt-dlp extract {ytdlp_url}") yt, stderr = ytdlp_extract(ytdlp_url) if yt is None: @@ -279,11 +283,6 @@ def check_video(video: dict, channel_id: str, results: Results, run_ytdlp: bool) else: yt_title = (yt.get("title") or "").strip() api_title = (video.get("title") or "").strip() - if yt_title and api_title and not titles_match(yt_title, api_title): - results.warn( - channel_id, - f"{label}: title mismatch — yt-dlp='{yt_title[:60]}' api='{api_title[:60]}'", - ) yt_dur = yt.get("duration") api_dur = video.get("duration") or 0 diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 2dfea8a..7d8625c 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -65,6 +65,8 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `youporn` | `mainstream-tube` | no | no | Pornhub-network HTML provider with watch-page playback URLs and tag/channel/pornstar shortcuts. | | `tube8` | `mainstream-tube` | no | yes | Aylo/MindGeek platform scraper; redirect proxy fetches signed `/media/hls/?s=TOKEN` endpoint and returns highest-quality CDN HLS URL; supports tag/category/channel/pornstar shortcut queries. | | `jable` | `jav` | no | yes | HTML JAV archive scraper; extracts `var hlsUrl` from detail pages; m3u8 format requires Referer + browser User-Agent; proxy route handles HEAD (200 OK) and GET (redirect to watch page) since yt-dlp blocks jable.tv; tag/category/model shortcut queries. | +| `fullporner` | `mainstream-tube` | no | no | HTML scraper for fullporner.com; thumbnail IDs derived from `/thumb/{id}.jpg` URLs and used to build direct `xiaoshenke.net/vid/{id}/720` media redirect URLs (Referer + User-Agent headers required); supports cat:/category:/pornstar:/star: shortcut queries; no proxy needed. | +| `thepornbunny` | `mainstream-tube` | no | yes | KVS-style HTML scraper for thepornbunny.com; 24 items per site page; thumbnails at `https://www.thepornbunny.com/images/thumb/{id}.webp` from `data-original` attribute (no proxy needed); studio exposed as uploader; pornstar names in tags; `/proxy/thepornbunny/{slug}` fetches the video page, extracts `generate_mp4(enc_data, key, rnd, video_id)` args, decrypts `enc_data` via PBKDF2-HMAC-SHA512+AES-256-CBC to get an OK.ru session key, calls `api.ok.ru/fb.do?method=video.get&session_key=KEY&vids=RND` to get signed CDN URLs, and returns 302 to the best-quality okcdn.ru/vkuser.net MP4 URL (no special client headers needed); supports sort: new/popular/rated, 20 hardcoded categories via `categories` option, and tag:/category:/studio:/pornstar: query shortcuts. | ## Proxy Routes @@ -86,6 +88,7 @@ These resolve a provider-specific input into a `302 Location`. - `/proxy/allpornstream/{endpoint}*` - `/proxy/tube8/{endpoint}*` - `/proxy/jable/{slug}*` +- `/proxy/thepornbunny/{slug}*` ### Media/image proxies diff --git a/src/providers/fullporner.rs b/src/providers/fullporner.rs new file mode 100644 index 0000000..3c21922 --- /dev/null +++ b/src/providers/fullporner.rs @@ -0,0 +1,496 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use scraper::{Html, Selector}; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["full length", "hd", "free", "mainstream"], + }; + +const BASE_URL: &str = "https://fullporner.com"; +const MEDIA_HOST: &str = "https://xiaoshenke.net"; +const CHANNEL_ID: &str = "fullporner"; +const DEFAULT_PER_PAGE: usize = 32; +const BROWSER_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; + +error_chain! { + foreign_links { + Io(std::io::Error); + Url(url::ParseError); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct FullpornerProvider; + +#[derive(Debug, Clone)] +enum Target { + Latest { page: u32 }, + Search { query: String, page: u32 }, + Category { slug: String, page: u32 }, + Pornstar { slug: String, page: u32 }, +} + +#[derive(Debug, Clone)] +struct CardStub { + slug: String, + title: String, + thumb: String, + thumb_id: Option, + duration: u32, + uploaded_at: Option, +} + +impl FullpornerProvider { + pub fn new() -> Self { + Self + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "FullPorner".to_string(), + description: "Full-length free HD porn videos. Latest uploads, categories, and pornstar browsing.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=fullporner.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse FullPorner by newest videos.".to_string(), + systemImage: "arrow.up.arrow.down".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + ], + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn build_listing_url(target: &Target) -> String { + match target { + Target::Latest { page } => { + format!("{BASE_URL}/home/{page}") + } + Target::Search { query, page } => { + let encoded: String = + url::form_urlencoded::byte_serialize(query.as_bytes()).collect(); + format!("{BASE_URL}/search?q={encoded}&p={page}") + } + Target::Category { slug, page } => { + if *page <= 1 { + format!("{BASE_URL}/category/{slug}") + } else { + format!("{BASE_URL}/category/{slug}/{page}") + } + } + Target::Pornstar { slug, page } => { + if *page <= 1 { + format!("{BASE_URL}/pornstar/{slug}") + } else { + format!("{BASE_URL}/pornstar/{slug}/{page}") + } + } + } + } + + fn pick_target(query: Option<&str>, page: u32) -> Target { + if let Some(q) = query { + let q = q.trim(); + if !q.is_empty() { + if let Some(slug) = q.strip_prefix("cat:") { + return Target::Category { + slug: slug.trim().to_string(), + page, + }; + } + if let Some(slug) = q.strip_prefix("category:") { + return Target::Category { + slug: slug.trim().to_string(), + page, + }; + } + if let Some(slug) = q.strip_prefix("pornstar:") { + return Target::Pornstar { + slug: slug.trim().to_string(), + page, + }; + } + if let Some(slug) = q.strip_prefix("star:") { + return Target::Pornstar { + slug: slug.trim().to_string(), + page, + }; + } + return Target::Search { + query: q.to_string(), + page, + }; + } + } + Target::Latest { page } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|e| Error::from(format!("selector `{value}` failed: {e}"))) + } + + // Extract the thumb_id from a thumbnail URL like + // https://imgs.xiaoshenke.net/thumb/3195337.jpg → "3195337" + // https://imgs.xiaoshenke.net/thumb/6a0d75fb4182b.jpg → "6a0d75fb4182b" + fn extract_thumb_id(thumb_url: &str) -> Option { + let path = thumb_url.split('/').last()?; + if let Some(stem) = path.strip_suffix(".jpg") { + if stem.chars().all(|c| c.is_ascii_alphanumeric()) && !stem.is_empty() { + return Some(stem.to_string()); + } + } + None + } + + fn parse_listing_page(html: &str) -> Result> { + let document = Html::parse_document(html); + let card_sel = Self::selector(".video-card")?; + let img_sel = Self::selector("img[data-src]")?; + let time_sel = Self::selector(".time")?; + let title_sel = Self::selector(".video-title a")?; + let create_sel = Self::selector(".create")?; + let link_sel = Self::selector("a[href]")?; + + let mut stubs = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for card in document.select(&card_sel) { + // Derive the watch URL slug from the first link to /watch/ + let slug = card + .select(&link_sel) + .find_map(|a| { + let href = a.value().attr("href")?; + href.strip_prefix("/watch/").map(|s| s.to_string()) + }) + .unwrap_or_default(); + + if slug.is_empty() || !seen.insert(slug.clone()) { + continue; + } + + let img_el = card.select(&img_sel).next(); + let thumb = img_el + .and_then(|img| img.value().attr("data-src")) + .map(|s| { + if s.starts_with("//") { + format!("https:{s}") + } else { + s.to_string() + } + }) + .unwrap_or_default(); + + // Title: prefer .video-title a text, fall back to img alt + let title = card + .select(&title_sel) + .next() + .map(|el| el.text().collect::>().join("").trim().to_string()) + .filter(|t| !t.is_empty()) + .or_else(|| { + img_el + .and_then(|img| img.value().attr("alt")) + .map(str::to_string) + .filter(|t| !t.is_empty()) + }) + .unwrap_or_else(|| slug.clone()); + + let duration_text = card + .select(&time_sel) + .next() + .map(|el| el.text().collect::>().join("").trim().to_string()) + .unwrap_or_default(); + let duration = parse_time_to_seconds(&duration_text) + .and_then(|s| u32::try_from(s).ok()) + .unwrap_or(0); + + let uploaded_at = card + .select(&create_sel) + .next() + .and_then(|el| { + el.text() + .collect::>() + .join("") + .trim() + .parse::() + .ok() + }); + + let thumb_id = Self::extract_thumb_id(&thumb); + + stubs.push(CardStub { + slug, + title, + thumb, + thumb_id, + duration, + uploaded_at, + }); + } + + Ok(stubs) + } + + fn build_format(thumb_id: &str) -> VideoFormat { + let url = format!("{MEDIA_HOST}/vid/{thumb_id}/720"); + let mut fmt = VideoFormat::new(url, "720p".to_string(), "mp4".to_string()); + fmt.add_http_header("Referer".to_string(), format!("{MEDIA_HOST}/")); + fmt.add_http_header("User-Agent".to_string(), BROWSER_UA.to_string()); + fmt + } + + fn stub_to_item(stub: CardStub) -> VideoItem { + let url = format!("{BASE_URL}/watch/{}", stub.slug); + let formats = stub.thumb_id.as_deref().map(|id| vec![Self::build_format(id)]); + + let mut item = VideoItem::new( + stub.slug, + stub.title, + url, + CHANNEL_ID.to_string(), + stub.thumb, + stub.duration, + ); + item.formats = formats; + item.uploadedAt = stub.uploaded_at; + item.aspectRatio = Some(16.0 / 9.0); + item + } + + async fn fetch_page( + target: &Target, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let url = Self::build_listing_url(target); + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_page"); + let html = requester + .get(&url, None) + .await + .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?; + + let stubs = Self::parse_listing_page(&html)?; + let items = stubs + .into_iter() + .take(per_page) + .map(Self::stub_to_item) + .collect(); + Ok(items) + } +} + +#[async_trait] +impl Provider for FullpornerProvider { + async fn get_videos( + &self, + _cache: VideoCache, + _pool: DbPool, + _sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + let per_page = per_page + .parse::() + .unwrap_or(DEFAULT_PER_PAGE) + .clamp(1, 48); + + let normalized_query = query + .as_deref() + .map(str::trim) + .filter(|q| !q.is_empty()) + .map(ToOwned::to_owned); + + let target = Self::pick_target(normalized_query.as_deref(), page); + + match Self::fetch_page(&target, per_page, &options).await { + Ok(items) => items, + Err(e) => { + report_provider_error(CHANNEL_ID, "get_videos", &e.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_latest_url() { + let t = Target::Latest { page: 1 }; + assert_eq!( + FullpornerProvider::build_listing_url(&t), + "https://fullporner.com/home/1" + ); + } + + #[test] + fn builds_search_url() { + let t = Target::Search { + query: "teen".to_string(), + page: 2, + }; + assert_eq!( + FullpornerProvider::build_listing_url(&t), + "https://fullporner.com/search?q=teen&p=2" + ); + } + + #[test] + fn builds_category_url_page1() { + let t = Target::Category { + slug: "anal".to_string(), + page: 1, + }; + assert_eq!( + FullpornerProvider::build_listing_url(&t), + "https://fullporner.com/category/anal" + ); + } + + #[test] + fn builds_category_url_page2() { + let t = Target::Category { + slug: "anal".to_string(), + page: 2, + }; + assert_eq!( + FullpornerProvider::build_listing_url(&t), + "https://fullporner.com/category/anal/2" + ); + } + + #[test] + fn builds_pornstar_url() { + let t = Target::Pornstar { + slug: "mia-malkova".to_string(), + page: 1, + }; + assert_eq!( + FullpornerProvider::build_listing_url(&t), + "https://fullporner.com/pornstar/mia-malkova" + ); + } + + #[test] + fn extracts_numeric_thumb_id() { + assert_eq!( + FullpornerProvider::extract_thumb_id( + "https://imgs.xiaoshenke.net/thumb/3195337.jpg" + ), + Some("3195337".to_string()) + ); + } + + #[test] + fn extracts_hex_thumb_id() { + assert_eq!( + FullpornerProvider::extract_thumb_id( + "https://imgs.xiaoshenke.net/thumb/6a0d75fb4182b.jpg" + ), + Some("6a0d75fb4182b".to_string()) + ); + } + + #[test] + fn rejects_old_thumb_format() { + // /imgs/28/54/ff64a89399f570b_main.jpg — underscore in name, different path + assert_eq!( + FullpornerProvider::extract_thumb_id( + "https://imgs.xiaoshenke.net/imgs/28/54/ff64a89399f570b_main.jpg" + ), + None + ); + } + + #[test] + fn picks_cat_target_from_query_prefix() { + match FullpornerProvider::pick_target(Some("cat:anal"), 1) { + Target::Category { slug, page } => { + assert_eq!(slug, "anal"); + assert_eq!(page, 1); + } + other => panic!("expected Category, got {:?}", other), + } + } + + #[test] + fn picks_pornstar_target_from_query_prefix() { + match FullpornerProvider::pick_target(Some("pornstar:mia-malkova"), 1) { + Target::Pornstar { slug, page } => { + assert_eq!(slug, "mia-malkova"); + assert_eq!(page, 1); + } + other => panic!("expected Pornstar, got {:?}", other), + } + } + + #[test] + fn parse_listing_page_extracts_cards() { + let html = r#" +
+
+ + Test Video Title + +
01:30:00
+
+
+ +
+ 1779289876 +
+
+
+ "#; + + let stubs = FullpornerProvider::parse_listing_page(html).unwrap(); + assert_eq!(stubs.len(), 1); + let s = &stubs[0]; + assert_eq!(s.slug, "abc123"); + assert_eq!(s.title, "Test Video Title"); + assert_eq!(s.duration, 5400); + assert_eq!(s.thumb_id.as_deref(), Some("9876543")); + assert_eq!(s.uploaded_at, Some(1779289876)); + } +} diff --git a/src/providers/thepornbunny.rs b/src/providers/thepornbunny.rs new file mode 100644 index 0000000..97436ef --- /dev/null +++ b/src/providers/thepornbunny.rs @@ -0,0 +1,556 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, build_proxy_url, report_provider_error, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use scraper::{ElementRef, Html, Selector}; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["tube", "hd", "studio", "exclusive"], + }; + +const BASE_URL: &str = "https://www.thepornbunny.com"; +const CHANNEL_ID: &str = "thepornbunny"; +const FIREFOX_UA: &str = + "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct ThepornbunnyProvider; + +#[derive(Debug, Clone)] +enum Target { + Latest, + Popular, + TopRated, + Search(String), + Archive(String), +} + +// All 20 categories exposed on thepornbunny.com/categories/ +static CATEGORIES: &[(&str, &str)] = &[ + ("anal", "Anal"), + ("asian", "Asian"), + ("big-ass", "Big Ass"), + ("big-tits", "Big Tits"), + ("black", "Black"), + ("cheating", "Cheating"), + ("creampie", "Creampie"), + ("deep-throat", "Deep Throat"), + ("geek-nerd", "Geek / Nerd"), + ("gym", "Gym"), + ("latina", "Latina"), + ("lesbian", "Lesbian"), + ("massage", "Massage"), + ("milf", "MILF"), + ("outdoors", "Outdoors"), + ("red-head", "Red Head"), + ("shower", "Shower"), + ("squirt", "Squirt"), + ("teen18", "Teen18+"), + ("threesome", "Threesome"), +]; + +impl ThepornbunnyProvider { + pub fn new() -> Self { + Self + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let cat_options: Vec = std::iter::once(FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }) + .chain(CATEGORIES.iter().map(|(slug, name)| FilterOption { + id: format!("{BASE_URL}/category/{slug}/"), + title: name.to_string(), + })) + .collect(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "ThePornBunny".to_string(), + description: "PornBunny HD porn videos with latest, top-rated, most-viewed, category, tag, studio, and search routing.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=thepornbunny.com" + .to_string(), + status: "active".to_string(), + categories: CATEGORIES.iter().map(|(_, name)| name.to_string()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse PornBunny feeds.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Browse a PornBunny category archive.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: cat_options, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn text_of(element: &ElementRef<'_>) -> String { + element + .text() + .collect::>() + .join(" ") + .split_whitespace() + .collect::>() + .join(" ") + } + + fn normalize_url(url: &str) -> String { + let trimmed = url.trim(); + if trimmed.is_empty() { + return String::new(); + } + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return trimmed.to_string(); + } + if trimmed.starts_with("//") { + return format!("https:{trimmed}"); + } + if trimmed.starts_with('/') { + return format!("{BASE_URL}{trimmed}"); + } + format!("{BASE_URL}/{}", trimmed.trim_start_matches("./")) + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), FIREFOX_UA.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + // Page 1: {base}/, page N: {base}/{N}/ + fn build_archive_page_url(base: &str, page: u16) -> String { + if page <= 1 { + return base.trim_end_matches('/').to_string() + "/"; + } + format!("{}/{page}/", base.trim_end_matches('/')) + } + + // Words joined with hyphens for search slug: "big ass" → "big-ass" + fn build_search_slug(query: &str) -> String { + query + .split_whitespace() + .map(str::to_ascii_lowercase) + .collect::>() + .join("-") + } + + fn build_target_url(target: &Target, page: u16) -> String { + match target { + Target::Latest => { + Self::build_archive_page_url(&format!("{BASE_URL}/videos"), page) + } + Target::Popular => { + Self::build_archive_page_url(&format!("{BASE_URL}/most-viewed"), page) + } + Target::TopRated => { + Self::build_archive_page_url(&format!("{BASE_URL}/top-rated"), page) + } + Target::Search(query) => Self::build_archive_page_url( + &format!("{BASE_URL}/search/{}", Self::build_search_slug(query)), + page, + ), + Target::Archive(url) => Self::build_archive_page_url(url, page), + } + } + + async fn fetch_html(requester: &mut Requester, url: &str, referer: &str) -> Result { + requester + .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + fn parse_duration(text: &str) -> u32 { + parse_time_to_seconds(text) + .and_then(|v| u32::try_from(v).ok()) + .unwrap_or(0) + } + + fn parse_views(text: &str) -> Option { + parse_abbreviated_number(text.trim()) + } + + fn parse_rating(text: &str) -> Option { + let digits: String = text + .chars() + .filter(|c| c.is_ascii_digit() || *c == '.') + .collect(); + digits.parse::().ok() + } + + fn parse_list_videos(html: &str, options: &ServerOptions) -> Result> { + let document = Html::parse_document(html); + + let card_sel = Self::selector("div.th.item")?; + let link_sel = Self::selector("a.th-description")?; + let img_sel = Self::selector("img.lazy-load[data-original]")?; + let duration_sel = Self::selector("span.th-length")?; + let rating_sel = Self::selector("span.th-rating")?; + let views_sel = Self::selector("span.th-views")?; + let source_sel = Self::selector("a.th-source")?; + let model_sel = Self::selector("div.th-models a[href*=\"/pornstar/\"]")?; + + let mut items = Vec::new(); + + for card in document.select(&card_sel) { + let Some(link) = card.select(&link_sel).next() else { + continue; + }; + let href = link.value().attr("href").unwrap_or_default(); + let page_url = Self::normalize_url(href); + if page_url.is_empty() { + continue; + } + + let title = Self::text_of(&link); + if title.is_empty() { + continue; + } + + let Some(img) = card.select(&img_sel).next() else { + continue; + }; + let data_original = img.value().attr("data-original").unwrap_or_default(); + let thumb = Self::normalize_url(data_original); + + // Video ID from /images/thumb/{id}.webp + let id = data_original + .trim_end_matches(".webp") + .rsplit('/') + .next() + .filter(|s| !s.is_empty()) + .unwrap_or_default() + .to_string(); + if id.is_empty() { + continue; + } + + let duration = card + .select(&duration_sel) + .next() + .map(|el| Self::parse_duration(&Self::text_of(&el))) + .unwrap_or(0); + + let rating = card + .select(&rating_sel) + .next() + .and_then(|el| Self::parse_rating(&Self::text_of(&el))); + + let views = card + .select(&views_sel) + .next() + .and_then(|el| Self::parse_views(&Self::text_of(&el))); + + let studio_el = card.select(&source_sel).next(); + let uploader_name = studio_el + .map(|el| Self::text_of(&el)) + .filter(|s| !s.is_empty()); + let uploader_url = studio_el + .and_then(|el| el.value().attr("href")) + .map(|href| Self::normalize_url(href)) + .filter(|s| !s.is_empty()); + let uploader_id = uploader_url.as_deref().and_then(|url| { + let slug = url.trim_end_matches('/').rsplit('/').next()?; + if slug.is_empty() { + None + } else { + Some(format!("{CHANNEL_ID}:{slug}")) + } + }); + + // Pornstar names become tags + let tags: Vec = card + .select(&model_sel) + .map(|el| Self::text_of(&el)) + .filter(|s| !s.is_empty()) + .collect(); + + let slug = page_url + .trim_end_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_string(); + let proxy_url = build_proxy_url(options, CHANNEL_ID, &slug); + let format = VideoFormat::new(proxy_url, "1080".to_string(), "mp4".to_string()); + + let mut item = VideoItem::new( + id, + title, + page_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + item.formats = Some(vec![format]); + item.rating = rating; + item.views = views; + item.uploader = uploader_name; + item.uploaderUrl = uploader_url; + item.uploaderId = uploader_id; + if !tags.is_empty() { + item.tags = Some(tags); + } + items.push(item); + } + + Ok(items) + } + + fn resolve_sort_target(sort: &str) -> Target { + match sort.trim().to_ascii_lowercase().as_str() { + "popular" | "viewed" | "most_viewed" => Target::Popular, + "rated" | "rating" | "top" => Target::TopRated, + _ => Target::Latest, + } + } + + fn resolve_option_target(options: &ServerOptions, sort: &str) -> Target { + if let Some(cat) = options.categories.as_deref() { + if cat.starts_with(BASE_URL) && cat != "all" { + return Target::Archive(cat.to_string()); + } + } + Self::resolve_sort_target(sort) + } + + fn resolve_query_target(query: &str) -> Target { + let trimmed = query.trim().trim_start_matches('@'); + + // Prefix shortcuts: tag:blonde, studio:rk-prime, pornstar:mia-molotov, category:anal + if let Some((kind, value)) = trimmed.split_once(':') { + let slug = value.trim().replace(' ', "-").trim_matches('/').to_lowercase(); + if !slug.is_empty() { + match kind.trim().to_ascii_lowercase().as_str() { + "tag" => return Target::Archive(format!("{BASE_URL}/tag/{slug}")), + "category" | "cat" => { + return Target::Archive(format!("{BASE_URL}/category/{slug}")) + } + "studio" => return Target::Archive(format!("{BASE_URL}/studio/{slug}")), + "pornstar" | "star" | "model" => { + return Target::Archive(format!("{BASE_URL}/pornstar/{slug}")) + } + _ => {} + } + } + } + + // Direct category name match + let normalized = trimmed.to_ascii_lowercase(); + for (slug, name) in CATEGORIES { + if name.to_ascii_lowercase() == normalized || slug.replace('-', " ") == normalized { + return Target::Archive(format!("{BASE_URL}/category/{slug}")); + } + } + + Target::Search(trimmed.to_string()) + } + + async fn fetch_target( + &self, + cache: VideoCache, + target: Target, + page: u16, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let source_url = Self::build_target_url(&target, page); + + if let Some((time, items)) = cache.get(&source_url) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.into_iter().take(per_page_limit.max(1)).collect()); + } + } + + let mut requester = + requester_or_default(&options, CHANNEL_ID, "thepornbunny.fetch_target"); + let html = match Self::fetch_html(&mut requester, &source_url, &source_url).await { + Ok(v) => v, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "fetch_target.request", + &format!("url={source_url}; error={error}"), + ) + .await; + return Ok(vec![]); + } + }; + + if html.trim().is_empty() { + report_provider_error( + CHANNEL_ID, + "fetch_target.empty", + &format!("url={source_url}"), + ) + .await; + return Ok(vec![]); + } + + let items = Self::parse_list_videos(&html, &options)?; + if items.is_empty() { + return Ok(vec![]); + } + + let limited: Vec = items.iter().cloned().take(per_page_limit.max(1)).collect(); + cache.remove(&source_url); + cache.insert(source_url, items); + Ok(limited) + } +} + +#[async_trait] +impl Provider for ThepornbunnyProvider { + async fn get_videos( + &self, + cache: VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + let per_page_limit = per_page.parse::().unwrap_or(10).clamp(1, 60); + + let target = match query { + Some(ref q) if !q.trim().is_empty() => Self::resolve_query_target(q.trim()), + _ => Self::resolve_option_target(&options, &sort), + }; + + match self + .fetch_target(cache, target, page, per_page_limit, options) + .await + { + Ok(items) => items, + Err(error) => { + report_provider_error( + CHANNEL_ID, + "get_videos", + &format!("sort={sort}; page={page}; error={error}"), + ) + .await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_latest_url_page1() { + assert_eq!( + ThepornbunnyProvider::build_target_url(&Target::Latest, 1), + "https://www.thepornbunny.com/videos/" + ); + } + + #[test] + fn builds_latest_url_page2() { + assert_eq!( + ThepornbunnyProvider::build_target_url(&Target::Latest, 2), + "https://www.thepornbunny.com/videos/2/" + ); + } + + #[test] + fn builds_search_url() { + assert_eq!( + ThepornbunnyProvider::build_target_url(&Target::Search("big tits".to_string()), 1), + "https://www.thepornbunny.com/search/big-tits/" + ); + } + + #[test] + fn resolves_tag_prefix() { + match ThepornbunnyProvider::resolve_query_target("tag:blonde") { + Target::Archive(url) => assert_eq!(url, "https://www.thepornbunny.com/tag/blonde"), + _ => panic!("expected archive target"), + } + } + + #[test] + fn resolves_studio_prefix() { + match ThepornbunnyProvider::resolve_query_target("studio:rk-prime") { + Target::Archive(url) => assert_eq!(url, "https://www.thepornbunny.com/studio/rk-prime"), + _ => panic!("expected archive target"), + } + } + + #[test] + fn resolves_category_name() { + match ThepornbunnyProvider::resolve_query_target("Anal") { + Target::Archive(url) => assert_eq!(url, "https://www.thepornbunny.com/category/anal"), + _ => panic!("expected archive target"), + } + } +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 5176979..c114c9d 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -40,6 +40,7 @@ pub mod sxyprn; pub mod thaiporntv; pub mod jable; pub mod tube8; +pub mod thepornbunny; pub mod vidara; pub mod vjav; diff --git a/src/proxies/thepornbunny.rs b/src/proxies/thepornbunny.rs new file mode 100644 index 0000000..c658257 --- /dev/null +++ b/src/proxies/thepornbunny.rs @@ -0,0 +1,250 @@ +use aes::Aes256; +use base64::{Engine as _, engine::general_purpose::STANDARD as B64}; +use cbc::cipher::{BlockDecryptMut, KeyIvInit, block_padding::Pkcs7}; +use hmac::Hmac; +use ntex::web; +use pbkdf2::pbkdf2; +use sha2::Sha512; + +use crate::util::requester::Requester; + +const BASE_URL: &str = "https://www.thepornbunny.com"; +const OKRU_API: &str = "https://api.ok.ru/fb.do?application_key=CBAFJIICABABABABA\ + &fields=video.url_tiny%2Cvideo.url_low%2Cvideo.url_high%2Cvideo.url_medium\ + %2Cvideo.url_quadhd%2Cvideo.url_mobile%2Cvideo.url_ultrahd%2Cvideo.url_fullhd%2C\ + &method=video.get&format=json&session_key="; + +pub async fn redirect_to_media( + req: web::HttpRequest, + requester: web::types::State, +) -> impl web::Responder { + let slug = req + .match_info() + .query("slug") + .trim_matches('/') + .to_string(); + + if slug.is_empty() { + return web::HttpResponse::NotFound().finish(); + } + + if req.method() == ntex::http::Method::HEAD { + return web::HttpResponse::Ok().finish(); + } + + let page_url = format!("{BASE_URL}/video/{slug}/"); + let mut client = requester.get_ref().clone(); + + let html = match client.get(&page_url, None).await { + Ok(h) => h, + Err(_) => return web::HttpResponse::BadGateway().finish(), + }; + + let (enc_data, key, rnd) = match extract_generate_mp4_args(&html) { + Some(args) => args, + None => { + // Fallback: try legacy get_stream approach + return match extract_and_resolve_stream(&html).await { + Some(url) => web::HttpResponse::Found().header("Location", url).finish(), + None => web::HttpResponse::NotFound().finish(), + }; + } + }; + + let session_key = match decrypt_session_key(&enc_data, &key) { + Some(k) => k, + None => return web::HttpResponse::BadGateway().finish(), + }; + + let api_url = format!("{OKRU_API}{session_key}&vids={rnd}"); + let json = match client.get(&api_url, None).await { + Ok(j) => j, + Err(_) => return web::HttpResponse::BadGateway().finish(), + }; + + match extract_best_okru_url(&json) { + Some(url) => web::HttpResponse::Found().header("Location", url).finish(), + None => web::HttpResponse::BadGateway().finish(), + } +} + +fn extract_generate_mp4_args(html: &str) -> Option<(String, String, String)> { + let call_start = html.find("generate_mp4(")?; + let after = &html[call_start + "generate_mp4(".len()..]; + let close = after.find(')')?; + let args_str = &after[..close]; + + let mut args = Vec::new(); + let mut in_quote = false; + let mut quote_char = ' '; + let mut current = String::new(); + let mut escape = false; + + for ch in args_str.chars() { + if escape { + current.push(ch); + escape = false; + continue; + } + if ch == '\\' { + escape = true; + continue; + } + if in_quote { + if ch == quote_char { + in_quote = false; + args.push(current.clone()); + current.clear(); + } else { + current.push(ch); + } + } else if ch == '\'' || ch == '"' { + in_quote = true; + quote_char = ch; + } + } + + if args.len() < 3 { + return None; + } + + Some((args[0].clone(), args[1].clone(), args[2].clone())) +} + +fn decrypt_session_key(enc_data_b64: &str, key: &str) -> Option { + let decoded = B64.decode(enc_data_b64).ok()?; + let obj: serde_json::Value = serde_json::from_slice(&decoded).ok()?; + + let ciphertext_b64 = obj["ciphertext"].as_str()?; + let iv_hex = obj["iv"].as_str()?; + let salt_hex = obj["salt"].as_str()?; + let iterations = obj["iterations"].as_u64().unwrap_or(999) as u32; + + let ciphertext = B64.decode(ciphertext_b64).ok()?; + let iv = hex::decode(iv_hex).ok()?; + let salt = hex::decode(salt_hex).ok()?; + + if iv.len() != 16 || ciphertext.len() % 16 != 0 { + return None; + } + + let mut derived_key = [0u8; 32]; + pbkdf2::>(key.as_bytes(), &salt, iterations, &mut derived_key).ok()?; + + let iv_arr: [u8; 16] = iv.try_into().ok()?; + let key_arr: [u8; 32] = derived_key; + + type Aes256CbcDec = cbc::Decryptor; + let mut buf = ciphertext.to_vec(); + let plaintext = Aes256CbcDec::new(&key_arr.into(), &iv_arr.into()) + .decrypt_padded_mut::(&mut buf) + .ok()?; + + std::str::from_utf8(plaintext).ok().map(|s| s.to_string()) +} + +fn extract_best_okru_url(json: &str) -> Option { + let data: serde_json::Value = serde_json::from_str(json).ok()?; + let videos = data["videos"].as_array()?; + + // Pick the video object with the most quality levels (likely the main one) + let video = videos.iter().max_by_key(|v| { + v.as_object().map(|o| o.len()).unwrap_or(0) + })?; + + for quality in &["url_fullhd", "url_quadhd", "url_high", "url_medium", "url_low", "url_mobile", "url_tiny"] { + if let Some(url) = video[quality].as_str() { + if !url.is_empty() { + return Some(url.to_string()); + } + } + } + None +} + +// Fallback for videos that still use the old get_stream mechanism +async fn extract_and_resolve_stream(html: &str) -> Option { + let stream_url = extract_best_stream_url(html)?; + resolve_cdn_url(&stream_url).await +} + +fn extract_best_stream_url(html: &str) -> Option { + for key in &["video_alt_url2", "video_alt_url", "video_url"] { + if let Some(url) = extract_flashvar(html, key) { + return Some(url); + } + } + None +} + +fn extract_flashvar(html: &str, key: &str) -> Option { + let pattern = format!("{key}:"); + let pos = html.find(&pattern)?; + let rest = &html[pos + pattern.len()..]; + let q1 = rest.find('\'')?; + let value_start = &rest[q1 + 1..]; + let q2 = value_start.find('\'')?; + let value = &value_start[..q2]; + if value.starts_with("https://") { + Some(value.to_string()) + } else { + None + } +} + +async fn resolve_cdn_url(stream_url: &str) -> Option { + let client = wreq::Client::builder() + .redirect(wreq::redirect::Policy::none()) + .build() + .ok()?; + + let resp = client + .get(stream_url) + .header("Referer", BASE_URL) + .send() + .await + .ok()?; + + if resp.status().as_u16() != 302 { + return None; + } + + resp.headers() + .get("location") + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_generate_mp4_args() { + let html = r#"generate_mp4('eyJABC', 'key123', 'vid1,vid2', '10010686');"#; + let (data, key, rnd) = extract_generate_mp4_args(html).unwrap(); + assert_eq!(data, "eyJABC"); + assert_eq!(key, "key123"); + assert_eq!(rnd, "vid1,vid2"); + } + + #[test] + fn extracts_best_okru_url_fullhd_first() { + let json = r#"{"videos":[{"url_mobile":"http://mobile.example","url_fullhd":"http://fullhd.example","url_high":"http://high.example"}]}"#; + let url = extract_best_okru_url(json).unwrap(); + assert_eq!(url, "http://fullhd.example"); + } + + #[test] + fn extracts_1080p_first() { + let html = r#" + var flashvars = { + video_url: 'https://www.thepornbunny.com/get_stream/1418-480.mp4?md5=abc×tamp=123', + video_alt_url: 'https://www.thepornbunny.com/get_stream/1418-720.mp4?md5=abc×tamp=123', + video_alt_url2: 'https://www.thepornbunny.com/get_stream/1418-1080.mp4?md5=abc×tamp=123', + }; + "#; + let url = extract_best_stream_url(html).expect("should extract"); + assert!(url.contains("1080"), "expected 1080 url, got: {url}"); + } +} diff --git a/src/proxy.rs b/src/proxy.rs index bd58048..bf8e53b 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -147,6 +147,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::get().to(crate::proxies::jable::redirect_to_page)) .route(web::head().to(crate::proxies::jable::redirect_to_page)), ); + cfg.service( + web::resource("/thepornbunny/{slug}*") + .route(web::get().to(crate::proxies::thepornbunny::redirect_to_media)) + .route(web::head().to(crate::proxies::thepornbunny::redirect_to_media)), + ); cfg.service( web::resource("/aps/{endpoint}*") .route(web::post().to(crate::proxies::allpornstream::serve)) diff --git a/src/util/requester.rs b/src/util/requester.rs index be8fe0a..c9fa8a1 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -158,7 +158,10 @@ impl Requester { ) -> Result { let mirror_url = Self::jina_mirror_url(url).ok_or("invalid mirror url")?; let client = Self::build_client(cookie_jar, user_agent.as_deref()); - let mut request = client.get(&mirror_url).version(Version::HTTP_11); + let mut request = client + .get(&mirror_url) + .version(Version::HTTP_11) + .header("X-Return-Format", "html"); if proxy_enabled { if let Ok(proxy_url) = env::var("BURP_URL") { let proxy = Proxy::all(&proxy_url)?;