diff --git a/build.rs b/build.rs index 316cfa9..51cbe06 100644 --- a/build.rs +++ b/build.rs @@ -366,6 +366,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "camsoda", ty: "CamsodaProvider", }, + ProviderDef { + id: "fyptt", + module: "fyptt", + ty: "FypttProvider", + }, ]; fn main() { diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 0707786..0c5f3af 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -16,6 +16,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `erome` | `amateur-homemade` | no | no | HTML album scraper with hot/new feeds, keyword search, and uploader-slug shortcuts (`uploader:`). | | `fikfap` | `tiktok` | yes | yes (thumbs only) | JSON-API provider for fikfap.com (TikTok-style swipe short clips); anonymous auth via a client-generated `Authorization-Anonymous` UUID header (no real login needed); listing via `GET api.fikfap.com/posts?sort=new\|trending\|random&amount=N&afterId=` (cursor pagination — page N costs N sequential requests); search via `GET search?q=` (single fixed-size batch, no pagination — page 2+ returns empty); hashtag feeds via `GET hashtags/label/{label}/posts` and creator feeds via `GET profile/username/{user}/posts`, both also cursor-paginated; `tag:`/`hashtag:`/`#` and `user:`/`uploader:` query prefixes route directly; `categories` option exposes a small curated static hashtag list (no full catalog endpoint exists anonymously); `video.url` is the `fikfap.com/post/{id}` page (a client-rendered SPA, not yt-dlp-resolvable on its own); `videoStreamUrl` from the JSON response is sent directly as `formats[0].url` (signed Bunny CDN HLS `.m3u8`, ~24h token expiry) with `httpHeaders: {Referer: https://fikfap.com/}` — Hot Tub clients apply a format's `http_headers` across the whole HLS playback session (manifest, sub-playlists, and segments), so no proxying of the media itself is needed; thumbnails have no per-field header mechanism, so they're proxied via `/proxy/fikfap-thumb/...` to inject the same Referer; `get_uploader` implemented (`fikfap:` IDs) using `GET profile/username/{user}`. | | `freepornvideosxxx` | `studio-network` | no | no | Studio-style scraper. | +| `fyptt` | `tiktok` | no | no | HTML scraper for fyptt.to (Beaver Builder/WordPress short-form TikTok-style vertical porn); card selector `.fl-post-grid-post[class*="post-ID"]` with `category-{slug}` CSS class doubling as both listing tag and category-archive route; latest feed `/` (page N: `/page/N/`), search `/?s=query` (page N: `/page/N/?s=query`), category archives at bare top-level slugs like `/tiktok-ass/` (12 hardcoded categories exposed via `categories` option, also matched from free-text `cat:`/`category:` query prefixes or bare category-title queries); per-item enrichment fetches the detail page for the JSON-LD `embedURL` (one of three on-site player endpoints: `fypttstr.php`, `fypttjwstr.php`, or `fypttjwstrhls.php`) and `datePublished`, then fetches that embed URL to extract the actual signed `stream.fyptt.to` mp4 or `/hls/*.m3u8` URL (token expires ~2h, no Referer required) for `formats`; thumbnails (`fyptt.to/wp-content/uploads/...webp`) need no proxy; no duration metadata available on listing or detail pages (set to 0); no real uploader/model identity (the `girl-{slug}` CSS class is cosmetic only, not a linkable archive) so `/api/uploaders` is not implemented; `video.url` is the detail page URL (not yt-dlp resolvable directly — the player is sandboxed-iframe-only) so `formats` are populated instead; no proxy needed. | | `freeuseporn` | `fetish-kink` | no | no | Fetish archive pattern. | | `hanime` | `hentai-animation` | no | yes | Uses proxied CDN/thumb handling. | | `heavyfetish` | `fetish-kink` | no | no | Direct media handling. | diff --git a/src/providers/fyptt.rs b/src/providers/fyptt.rs new file mode 100644 index 0000000..5ba687b --- /dev/null +++ b/src/providers/fyptt.rs @@ -0,0 +1,522 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, report_provider_error_background, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::DateTime; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use ntex::http::Version; +use regex::Regex; +use scraper::{Html, Selector}; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "tiktok", + tags: &["tiktok", "vertical", "shorts"], + }; + +const BASE_URL: &str = "https://fyptt.to"; +const CHANNEL_ID: &str = "fyptt"; +const DEFAULT_PER_PAGE: usize = 24; +const ENRICH_CONCURRENCY: usize = 6; + +const CATEGORIES: &[(&str, &str)] = &[ + ("tiktok-nudes", "Nudes"), + ("tiktok-porn", "TikTok"), + ("tiktok-boobs", "Boobs"), + ("instagram-porn", "Instagram"), + ("tiktok-sex", "Sex"), + ("nsfw-tiktok", "NSFW"), + ("tiktok-xxx", "XXX"), + ("tiktok-ass", "Ass"), + ("tiktok-pussy", "Pussy"), + ("tiktok-live", "Live"), + ("tiktok-thots", "Thots"), + ("sexy-tiktok", "Sexy"), +]; + +error_chain! { + foreign_links { + Io(std::io::Error); + Url(url::ParseError); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct FypttProvider; + +#[derive(Debug, Clone)] +enum Target { + Latest, + Search { query: String }, + Category { slug: String }, +} + +#[derive(Debug, Clone)] +struct CardStub { + id: String, + title: String, + url: String, + thumb: String, + category_slug: Option, +} + +impl FypttProvider { + pub fn new() -> Self { + Self + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "FYPTT".to_string(), + description: "FYPTT.to short-form vertical TikTok/Reels/Shorts-style porn videos with category browsing and direct playback.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=fyptt.to".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ChannelOption { + id: "categories".to_string(), + title: "Category".to_string(), + description: "Browse FYPTT by content category.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "purple".to_string(), + options: CATEGORIES + .iter() + .map(|(slug, title)| FilterOption { + id: slug.to_string(), + title: title.to_string(), + }) + .collect(), + multiSelect: false, + }], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` failed: {error}"))) + } + + fn category_slug_for(query: &str) -> Option<&'static str> { + let normalized = query.trim().to_lowercase(); + CATEGORIES + .iter() + .find(|(slug, title)| { + slug.eq_ignore_ascii_case(&normalized) || title.eq_ignore_ascii_case(&normalized) + }) + .map(|(slug, _)| *slug) + } + + fn pick_target(query: Option<&str>) -> Target { + if let Some(query) = query { + let q = query.trim(); + if !q.is_empty() { + if let Some(slug) = q.strip_prefix("cat:").or_else(|| q.strip_prefix("category:")) { + if let Some(known) = Self::category_slug_for(slug) { + return Target::Category { slug: known.to_string() }; + } + return Target::Category { slug: slug.trim().to_string() }; + } + if let Some(slug) = Self::category_slug_for(q) { + return Target::Category { slug: slug.to_string() }; + } + return Target::Search { query: q.to_string() }; + } + } + + Target::Latest + } + + fn build_listing_url(target: &Target, page: u16) -> String { + let page = page.max(1); + match target { + Target::Latest => { + if page <= 1 { + format!("{BASE_URL}/") + } else { + format!("{BASE_URL}/page/{page}/") + } + } + Target::Search { query } => { + let encoded: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect(); + if page <= 1 { + format!("{BASE_URL}/?s={encoded}") + } else { + format!("{BASE_URL}/page/{page}/?s={encoded}") + } + } + Target::Category { slug } => { + if page <= 1 { + format!("{BASE_URL}/{slug}/") + } else { + format!("{BASE_URL}/{slug}/page/{page}/") + } + } + } + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ("User-Agent".to_string(), "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string()), + ("Referer".to_string(), referer.to_string()), + ] + } + + async fn fetch_html(options: &ServerOptions, url: &str, referer: &str) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_html"); + requester + .get_with_headers(url, Self::html_headers(referer), Some(Version::HTTP_11)) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + fn parse_listing_page(html: &str) -> Result> { + let document = Html::parse_document(html); + let card_sel = Self::selector(".fl-post-grid-post")?; + let link_sel = Self::selector(".fl-post-grid-title a[href]")?; + let img_sel = Self::selector("img[src]")?; + + let class_regex = Regex::new(r"category-([a-z0-9-]+)") + .map_err(|error| Error::from(format!("regex failed: {error}")))?; + + let mut stubs = Vec::new(); + let mut seen_ids = std::collections::HashSet::new(); + + for card in document.select(&card_sel) { + let class_attr = card.value().attr("class").unwrap_or_default(); + + let id = class_attr + .split_whitespace() + .find_map(|c| c.strip_prefix("post-")) + .map(str::to_string) + .unwrap_or_default(); + if id.is_empty() || !seen_ids.insert(id.clone()) { + continue; + } + + let link = card.select(&link_sel).next(); + let url = link + .and_then(|a| a.value().attr("href")) + .map(str::to_string) + .unwrap_or_default(); + if url.is_empty() { + continue; + } + + let title = link + .map(|a| a.text().collect::>().join("")) + .map(|t| t.trim().to_string()) + .filter(|t| !t.is_empty()) + .unwrap_or_else(|| id.clone()); + + let thumb = card + .select(&img_sel) + .next() + .and_then(|img| img.value().attr("src")) + .map(str::to_string) + .unwrap_or_default(); + + let category_slug = class_regex + .captures(class_attr) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str().to_string()); + + stubs.push(CardStub { + id, + title, + url, + thumb, + category_slug, + }); + } + + Ok(stubs) + } + + async fn fetch_listing(url: &str, options: &ServerOptions) -> Result> { + let html = Self::fetch_html(options, url, BASE_URL).await?; + Self::parse_listing_page(&html) + } + + fn extract_embed_url(html: &str) -> Option { + let idx = html.find("\"embedURL\":\"")?; + let rest = &html[idx + "\"embedURL\":\"".len()..]; + let end = rest.find('"')?; + Some(rest[..end].replace("\\/", "/")) + } + + fn extract_uploaded_at(html: &str) -> Option { + let idx = html.find("\"datePublished\":\"")?; + let rest = &html[idx + "\"datePublished\":\"".len()..]; + let end = rest.find('"')?; + let raw = rest[..end].replace("\\/", "/"); + DateTime::parse_from_rfc3339(&raw) + .ok() + .map(|dt| dt.timestamp() as u64) + } + + fn extract_stream_url(html: &str) -> Option { + let idx = html.find("https://stream.fyptt.to/")?; + let rest = &html[idx..]; + let end = rest + .find(|c: char| c == '"' || c == '\'') + .unwrap_or(rest.len()); + Some(rest[..end].to_string()) + } + + fn category_title(slug: &str) -> Option<&'static str> { + CATEGORIES + .iter() + .find(|(s, _)| *s == slug) + .map(|(_, title)| *title) + } + + async fn enrich_card(stub: CardStub, options: &ServerOptions) -> Result { + let detail_html = Self::fetch_html(options, &stub.url, BASE_URL).await?; + + let embed_url = Self::extract_embed_url(&detail_html) + .ok_or_else(|| Error::from(format!("no embedURL found in {}", stub.url)))?; + let uploaded_at = Self::extract_uploaded_at(&detail_html); + + let player_html = Self::fetch_html(options, &embed_url, &stub.url).await?; + let stream_url = Self::extract_stream_url(&player_html) + .ok_or_else(|| Error::from(format!("no stream url found via {embed_url}")))?; + + let format = if stream_url.contains(".m3u8") { + VideoFormat::m3u8(stream_url, "auto".to_string(), "m3u8".to_string()) + } else { + VideoFormat::new(stream_url, "auto".to_string(), "mp4".to_string()) + }; + + let mut item = VideoItem::new( + stub.id, + stub.title, + stub.url, + CHANNEL_ID.to_string(), + stub.thumb, + 0, + ); + + item.formats = Some(vec![format]); + item.uploadedAt = uploaded_at; + item.aspectRatio = Some(9.0 / 16.0); + + if let Some(tag) = stub + .category_slug + .as_deref() + .and_then(Self::category_title) + { + item.tags = Some(vec![tag.to_string()]); + } + + Ok(item) + } + + async fn fetch_page( + target: Target, + page: u16, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let url = Self::build_listing_url(&target, page); + let stubs = Self::fetch_listing(&url, options).await?; + + let limited: Vec<_> = stubs.into_iter().take(per_page).collect(); + let options = options.clone(); + + let items = stream::iter(limited.into_iter().map(|stub| { + let options = options.clone(); + async move { + match Self::enrich_card(stub, &options).await { + Ok(item) => Some(item), + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "fetch_page.enrich_card", + &error.to_string(), + ); + None + } + } + } + })) + .buffer_unordered(ENRICH_CONCURRENCY) + .filter_map(async move |v| v) + .collect::>() + .await; + + Ok(items) + } +} + +#[async_trait] +impl Provider for FypttProvider { + async fn get_videos( + &self, + _cache: VideoCache, + _pool: DbPool, + _sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + let per_page = per_page + .parse::() + .unwrap_or(DEFAULT_PER_PAGE) + .clamp(1, 48); + + let normalized_query = query + .as_deref() + .map(str::trim) + .filter(|q| !q.is_empty()) + .map(ToOwned::to_owned); + + let category_option = options + .categories + .as_deref() + .map(str::trim) + .filter(|c| !c.is_empty()); + + let target = match category_option { + Some(cat) => match Self::category_slug_for(cat) { + Some(slug) => Target::Category { slug: slug.to_string() }, + None => Target::Category { slug: cat.to_string() }, + }, + None => Self::pick_target(normalized_query.as_deref()), + }; + + match Self::fetch_page(target, page, per_page, &options).await { + Ok(items) => items, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_listing_urls() { + assert_eq!( + FypttProvider::build_listing_url(&Target::Latest, 1), + "https://fyptt.to/" + ); + assert_eq!( + FypttProvider::build_listing_url(&Target::Latest, 2), + "https://fyptt.to/page/2/" + ); + assert_eq!( + FypttProvider::build_listing_url( + &Target::Search { query: "thot ass".to_string() }, + 1 + ), + "https://fyptt.to/?s=thot+ass" + ); + assert_eq!( + FypttProvider::build_listing_url( + &Target::Search { query: "thot".to_string() }, + 2 + ), + "https://fyptt.to/page/2/?s=thot" + ); + assert_eq!( + FypttProvider::build_listing_url(&Target::Category { slug: "tiktok-ass".to_string() }, 1), + "https://fyptt.to/tiktok-ass/" + ); + assert_eq!( + FypttProvider::build_listing_url(&Target::Category { slug: "tiktok-ass".to_string() }, 2), + "https://fyptt.to/tiktok-ass/page/2/" + ); + } + + #[test] + fn picks_category_target_from_prefix() { + match FypttProvider::pick_target(Some("cat:tiktok-ass")) { + Target::Category { slug } => assert_eq!(slug, "tiktok-ass"), + other => panic!("expected Category, got {:?}", other), + } + } + + #[test] + fn picks_category_target_from_title_match() { + match FypttProvider::pick_target(Some("Boobs")) { + Target::Category { slug } => assert_eq!(slug, "tiktok-boobs"), + other => panic!("expected Category, got {:?}", other), + } + } + + #[test] + fn picks_search_target_for_unknown_query() { + match FypttProvider::pick_target(Some("amateur")) { + Target::Search { query } => assert_eq!(query, "amateur"), + other => panic!("expected Search, got {:?}", other), + } + } + + #[test] + fn extracts_embed_url_from_json_ld() { + let html = r#"{"@type":"VideoObject","embedURL":"https:\/\/fyptt.to\/fypttstr.php?fileid=1jyYHhjN&mainurl=23135%2Fslug","name":"Title"}"#; + assert_eq!( + FypttProvider::extract_embed_url(html).as_deref(), + Some("https://fyptt.to/fypttstr.php?fileid=1jyYHhjN&mainurl=23135%2Fslug") + ); + } + + #[test] + fn extracts_uploaded_at_from_json_ld() { + let html = r#"{"datePublished":"2026-05-29T13:06:45+00:00"}"#; + assert_eq!(FypttProvider::extract_uploaded_at(html), Some(1780060005)); + } + + #[test] + fn extracts_mp4_stream_url() { + let html = r#""#; + assert_eq!( + FypttProvider::extract_stream_url(html).as_deref(), + Some("https://stream.fyptt.to/1jyYHhjN.mp4?token=AAA&expires=111") + ); + } + + #[test] + fn extracts_hls_stream_url() { + let html = r#""#; + assert_eq!( + FypttProvider::extract_stream_url(html).as_deref(), + Some("https://stream.fyptt.to/hls/J2KKe8ya.m3u8?token=BBB&expires=222") + ); + } + + #[test] + fn parses_listing_cards() { + let html = r#"

Some Title

"#; + let stubs = FypttProvider::parse_listing_page(html).unwrap(); + assert_eq!(stubs.len(), 1); + assert_eq!(stubs[0].id, "23135"); + assert_eq!(stubs[0].title, "Some Title"); + assert_eq!(stubs[0].url, "https://fyptt.to/23135/slug/"); + assert_eq!(stubs[0].thumb, "https://fyptt.to/thumb.webp"); + assert_eq!(stubs[0].category_slug.as_deref(), Some("nsfw-tiktok")); + } +}