From 6d18397c5118e2e01e93106fd53b829c68ff9b8c Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 21 Jun 2026 11:21:56 +0000 Subject: [PATCH] camsoda --- Cargo.toml | 4 +- docs/provider-catalog.md | 2 +- src/providers/camsoda.rs | 651 +++++++++++++++++++++++++++++++-------- 3 files changed, 521 insertions(+), 136 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 06254d3..a9d0d56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,7 +54,7 @@ unexpected_cfgs = "allow" [profile.dev] opt-level = 0 -debug = 1 +debug = 0 codegen-units = 256 incremental = true @@ -67,4 +67,4 @@ incremental = true opt-level = 3 codegen-units = 16 lto = false -debug = 0 +debug = 0 \ No newline at end of file diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 5fe257d..5bea193 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -73,7 +73,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. | | `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. | | `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. | -| `camsoda` | `live-cams` | no | no | HTML scraper for camsoda.com `/media` listing; CF-protected so relies on Jina HTML fallback (requester sends `X-Return-Format: html`); parses anchor tags with `[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]` CSS selectors; video-specific thumbnails from `media-secure.camsoda.com/user/videos/{id}/`; video URLs are page URLs (`/{username}/media/{slug}/{id}`) — CF blocks direct access and yt-dlp; supports `uploader:`/`model:` query prefix to browse a model's media page; no search API — keyword queries fall through to client-side filtering; pagination via `?page=N` but CamSoda serves the same 60 items regardless of page (JS-driven infinite scroll). | +| `camsoda` | `live-cams` | no | no | JSON-API provider for camsoda.com recorded model clips. camsoda.com is hard Cloudflare-protected: direct requests and yt-dlp both get HTTP 403, and FlareSolverr was unreliable during development, so the only reliable path through CF is the shared requester's Jina mirror fallback (`r.jina.ai/http://...`, `X-Return-Format: html`) — note Jina rate-limits per IP, so multi-provider builds that burst many concurrent fetches see 429s; validate with a single-provider build (`HOT_TUB_PROVIDER=camsoda`) which makes one fetch at a time. The `/media` page is a CF-protected SPA whose SSR ignores `?page`/`?p`/`sort` (it always renders the same default 60 items); pagination/sort/tag are client-only XHR to a JSON API discovered in the (non-CF-protected) static `pages-media-MediaMainPage` bundle: `GET https://www.camsoda.com/api/v1/media/list/video?page=N&sort_by=&tag=` returning `{"result":true,"data":[...]}` — Jina returns that JSON wrapped in a `
`, so the provider extracts the `{...}` slice and parses it (this gives real pagination across ~166 pages, plus sort and tag filtering — fixing the old HTML-scrape that couldn't paginate or search). Rich item fields come straight from the JSON: `name`→title, `username`→uploader slug, `user_display_name`→uploader, `duration` (seconds), `created_at`→`uploadedAt`, `thumbnail_url` (direct `media-secure.camsoda.com`, no proxy/referer needed). `sort_by` values: `date_added` (default/new), `popular`, `popular_all_time` (top). 49-tag catalog (extracted from the bundle) is exposed via the `categories` option (sanitized out of `/api/status` like other big catalogs, but honored in `/api/videos`) and routed by `tag:`/`cat:`/`category:` prefixes or a bare keyword that exactly matches a tag slug; there is no keyword media-search endpoint, so other bare queries fall back to the default listing for the server's client-side substring filter. `model:`/`uploader:`/`user:`/`performer:` prefixes browse a performer's SSR `/{username}/media` page, parsed via anchor selectors (`[class*="media-item-module__title"]` / `[class*="media-item-module__subtitle"]`). `video.url` is the page URL (`/{username}/media/{slug}/{id}`); recorded clips are token-gated (`token_price`>0, `is_free_no_auth` effectively always false) and CF-blocks both browser and yt-dlp, so no `formats` are populated and playback is not resolvable in this environment (`check.py` reports these as expected CF warnings — `www.camsoda.com` is in its CF allowlist — not errors). No proxy needed. |
 | `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. |
 | `wowxxx` | `studio-network` | no | no | HTML scraper for wow.xxx premium aggregator; default feed `/latest-updates/`, page 2 `/{N}/` suffix (for example `/latest-updates/2/`), search `/search/{query}/relevance/` with the same page suffix; supports `site:`/`studio:`/`network:`/`model:`/`pornstar:`/`tag:`/`cat:` query shortcuts to direct archive routes; list cards expose preview clips (`cast.wow.xxx/preview/*.mp4`), thumbnails (`img.wow.xxx/.../medium@2x/1.jpg`), duration, rating, views, site (as uploader), and model tags; `video.url` is the detail page URL and yt-dlp resolves HTML5 MP4 formats dynamically; no proxy needed. |
 
diff --git a/src/providers/camsoda.rs b/src/providers/camsoda.rs
index 1dcf6c0..d6f3c9e 100644
--- a/src/providers/camsoda.rs
+++ b/src/providers/camsoda.rs
@@ -1,15 +1,16 @@
 use crate::DbPool;
 use crate::api::ClientVersion;
-use crate::providers::{
-    Provider, report_provider_error, requester_or_default,
-};
+use crate::providers::{Provider, report_provider_error, requester_or_default};
 use crate::status::*;
 use crate::util::cache::VideoCache;
 use crate::util::time::parse_time_to_seconds;
 use crate::videos::{ServerOptions, VideoItem};
 use async_trait::async_trait;
+use chrono::NaiveDateTime;
 use error_chain::error_chain;
+use htmlentity::entity::{ICodedDataTrait, decode};
 use scraper::{Html, Selector};
+use serde::Deserialize;
 use std::collections::HashSet;
 
 pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
@@ -20,6 +21,67 @@ pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
 
 const BASE_URL: &str = "https://www.camsoda.com";
 const CHANNEL_ID: &str = "camsoda";
+/// Recorded-media listing API. The site's `/media` page is a CF-protected SPA
+/// that loads this JSON endpoint over XHR for every page/sort/tag change:
+///   GET /api/v1/media/list/video?page=N&sort_by=&tag=
+/// Direct access is Cloudflare-challenged (HTTP 403), so the shared requester
+/// falls back to the Jina mirror, which returns the JSON wrapped in a `
`.
+const API_LIST: &str = "https://www.camsoda.com/api/v1/media/list/video";
+
+/// Tag catalog exposed by the media filter dropdown (extracted from the
+/// MediaMainPage bundle). Used for the `categories` option and to route bare /
+/// `tag:`/`cat:` queries straight to a tag archive instead of dropping them.
+const MEDIA_TAGS: &[&str] = &[
+    "amateur",
+    "anal",
+    "asian",
+    "ass",
+    "bbw",
+    "big-ass",
+    "big-tits",
+    "black",
+    "blonde-hair",
+    "blowjob",
+    "bondage",
+    "brown-hair",
+    "college",
+    "cosplay",
+    "creampie",
+    "cum",
+    "curvy",
+    "dildo",
+    "dp",
+    "ebony",
+    "facial",
+    "feet",
+    "fetish",
+    "hairy-pussy",
+    "hd",
+    "japanese",
+    "latina",
+    "lesbian",
+    "lovense",
+    "lush",
+    "massage",
+    "masturbation",
+    "milf",
+    "muscle",
+    "ohmibod",
+    "outdoor",
+    "petite",
+    "pov",
+    "public",
+    "red-hair",
+    "shaved-pussy",
+    "small-tits",
+    "squirting",
+    "swallow",
+    "teen-18",
+    "threesome",
+    "toys",
+    "tranny",
+    "voyeur",
+];
 
 error_chain! {
     foreign_links {
@@ -38,14 +100,44 @@ pub struct CamsodaProvider {
     url: String,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 enum Target {
-    /// Default listing at /media?page=N
-    Listing,
-    /// Model media page at /{username}/media
+    /// Default/tag listing via the JSON API. `tag` is `None` for "all".
+    Listing { tag: Option },
+    /// A specific model's recorded-media page (`/{username}/media`).
     Model { username: String },
 }
 
+/// Subset of the `media/list` JSON item fields the provider consumes. All
+/// nullable fields are modelled as `Option` so a stray `null` never aborts the
+/// whole page parse.
+#[derive(Debug, Deserialize)]
+struct ApiMediaItem {
+    id: i64,
+    #[serde(default)]
+    name: Option,
+    #[serde(default)]
+    slug: Option,
+    #[serde(default)]
+    duration: Option,
+    #[serde(default)]
+    created_at: Option,
+    #[serde(default)]
+    thumbnail_url: Option,
+    #[serde(default)]
+    user_display_name: Option,
+    #[serde(default)]
+    username: Option,
+    #[serde(default)]
+    is_video: Option,
+}
+
+#[derive(Debug, Deserialize)]
+struct ApiResponse {
+    #[serde(default)]
+    data: Vec,
+}
+
 impl CamsodaProvider {
     pub fn new() -> Self {
         Self {
@@ -54,6 +146,16 @@ impl CamsodaProvider {
     }
 
     fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
+        let cat_options = std::iter::once(FilterOption {
+            id: "all".to_string(),
+            title: "All".to_string(),
+        })
+        .chain(MEDIA_TAGS.iter().map(|slug| FilterOption {
+            id: slug.to_string(),
+            title: Self::pretty_tag(slug),
+        }))
+        .collect::>();
+
         Channel {
             id: CHANNEL_ID.to_string(),
             name: "CamSoda".to_string(),
@@ -64,20 +166,121 @@ impl CamsodaProvider {
             favicon: "https://www.google.com/s2/favicons?sz=64&domain=camsoda.com".to_string(),
             status: "active".to_string(),
             categories: vec![],
-            options: vec![],
+            options: vec![
+                ChannelOption {
+                    id: "sort".to_string(),
+                    title: "Sort".to_string(),
+                    description: "Order the CamSoda media feed.".to_string(),
+                    systemImage: "list.number".to_string(),
+                    colorName: "blue".to_string(),
+                    options: vec![
+                        FilterOption {
+                            id: "new".to_string(),
+                            title: "Newest".to_string(),
+                        },
+                        FilterOption {
+                            id: "popular".to_string(),
+                            title: "Popular".to_string(),
+                        },
+                        FilterOption {
+                            id: "top".to_string(),
+                            title: "Popular (All Time)".to_string(),
+                        },
+                    ],
+                    multiSelect: false,
+                },
+                ChannelOption {
+                    id: "categories".to_string(),
+                    title: "Categories".to_string(),
+                    description: "Filter CamSoda media by tag.".to_string(),
+                    systemImage: "square.grid.2x2".to_string(),
+                    colorName: "orange".to_string(),
+                    options: cat_options,
+                    multiSelect: false,
+                },
+            ],
             nsfw: true,
             cacheDuration: Some(1800),
         }
     }
 
-    /// Resolve the fetch target from query and options.
-    fn pick_target(query: Option<&str>) -> Target {
+    /// Map a Hot Tub sort id to the API's `sort_by` value.
+    fn map_sort(sort: &str) -> &'static str {
+        match sort.trim().to_ascii_lowercase().as_str() {
+            "popular" | "trending" | "hot" | "featured" => "popular",
+            "top" | "rated" | "best" | "mostviewed" | "most_viewed" | "popular_all_time" => {
+                "popular_all_time"
+            }
+            // "new", "newest", "latest", "recent", empty, anything else
+            _ => "date_added",
+        }
+    }
+
+    /// Lowercase/space-normalize a value for tag lookups.
+    fn normalize_key(s: &str) -> String {
+        s.trim()
+            .trim_start_matches('#')
+            .replace(['_', ' '], "-")
+            .to_ascii_lowercase()
+    }
+
+    /// Resolve a user-supplied value to a known tag slug, if it matches one.
+    fn resolve_tag(value: &str) -> Option {
+        let key = Self::normalize_key(value);
+        if key.is_empty() {
+            return None;
+        }
+        MEDIA_TAGS
+            .iter()
+            .find(|slug| **slug == key)
+            .map(|slug| slug.to_string())
+    }
+
+    /// Pretty display title for a tag slug (e.g. `big-tits` -> `Big Tits`).
+    fn pretty_tag(slug: &str) -> String {
+        slug.split('-')
+            .map(|word| match word {
+                "dp" => "DP".to_string(),
+                "pov" => "POV".to_string(),
+                "bbw" => "BBW".to_string(),
+                "hd" => "HD".to_string(),
+                "18" => "18".to_string(),
+                other => {
+                    let mut chars = other.chars();
+                    match chars.next() {
+                        Some(first) => {
+                            first.to_uppercase().collect::() + chars.as_str()
+                        }
+                        None => String::new(),
+                    }
+                }
+            })
+            .collect::>()
+            .join(" ")
+    }
+
+    /// Resolve the fetch target from the query and the selected category option.
+    fn pick_target(query: Option<&str>, category: Option<&str>) -> Target {
+        // An explicitly selected category option wins.
+        if let Some(cat) = category {
+            let cat = cat.trim();
+            if !cat.is_empty() && cat != "all" {
+                if let Some(tag) = Self::resolve_tag(cat) {
+                    return Target::Listing { tag: Some(tag) };
+                }
+                // Unknown but non-empty: still pass a slug through to the API.
+                return Target::Listing {
+                    tag: Some(Self::normalize_key(cat)),
+                };
+            }
+        }
+
         let Some(query) = query.map(str::trim).filter(|v| !v.is_empty()) else {
-            return Target::Listing;
+            return Target::Listing { tag: None };
         };
 
-        // Support "uploader:username" or "model:username" shortcuts.
-        for prefix in &["uploader:", "model:", "user:"] {
+        // Model shortcuts browse a performer's media page.
+        for prefix in &["uploader:", "model:", "user:", "performer:"] {
             if let Some(username) = query.strip_prefix(prefix) {
                 let username = username.trim().to_lowercase();
                 if !username.is_empty() {
@@ -86,38 +289,154 @@ impl CamsodaProvider {
             }
         }
 
-        // For other queries fall back to the default listing;
-        // the server will apply client-side substring filtering.
-        Target::Listing
-    }
-
-    fn build_listing_url(&self, target: &Target, page: u16) -> String {
-        let page = page.max(1);
-        match target {
-            Target::Listing => format!("{}/media?page={}", self.url, page),
-            Target::Model { username } => {
-                if page <= 1 {
-                    format!("{}/{}/media", self.url, username)
-                } else {
-                    format!("{}/{}/media?page={}", self.url, username, page)
+        // Tag/category shortcuts route straight to a tag archive.
+        for prefix in &["tag:", "cat:", "category:"] {
+            if let Some(rest) = query.strip_prefix(prefix) {
+                let slug = Self::resolve_tag(rest).unwrap_or_else(|| Self::normalize_key(rest));
+                if !slug.is_empty() {
+                    return Target::Listing { tag: Some(slug) };
                 }
             }
         }
+
+        // A bare keyword that exactly matches a known tag is much better served
+        // by that tag archive than by the (non-existent) media search endpoint.
+        if let Some(tag) = Self::resolve_tag(query) {
+            return Target::Listing { tag: Some(tag) };
+        }
+
+        // No media keyword search exists; fall back to the default listing and
+        // let the server apply its client-side substring filter for quoted
+        // queries.
+        Target::Listing { tag: None }
     }
 
-    /// Parse video cards from the HTML of a CamSoda media page.
+    fn build_api_url(tag: Option<&str>, sort: &str, page: u16) -> String {
+        let page = page.max(1);
+        match tag {
+            Some(tag) if !tag.is_empty() && tag != "all" => {
+                format!("{API_LIST}?page={page}&sort_by={sort}&tag={tag}")
+            }
+            _ => format!("{API_LIST}?page={page}&sort_by={sort}"),
+        }
+    }
+
+    fn build_model_url(&self, username: &str, page: u16) -> String {
+        if page <= 1 {
+            format!("{}/{}/media", self.url, username)
+        } else {
+            format!("{}/{}/media?page={}", self.url, username, page)
+        }
+    }
+
+    fn clean_text(text: &str) -> String {
+        let decoded = decode(text.as_bytes())
+            .to_string()
+            .unwrap_or_else(|_| text.to_string());
+        decoded.split_whitespace().collect::>().join(" ")
+    }
+
+    fn parse_created_at(value: &str) -> Option {
+        NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%dT%H:%M:%S")
+            .ok()
+            .map(|dt| dt.and_utc().timestamp())
+            .and_then(|ts| u64::try_from(ts).ok())
+    }
+
+    /// Extract the JSON object from a body that may be wrapped in HTML by the
+    /// Jina mirror (`
{...}
`) or returned raw. + fn extract_json(body: &str) -> Option<&str> { + let start = body.find('{')?; + let end = body.rfind('}')?; + if end > start { + Some(&body[start..=end]) + } else { + None + } + } + + /// Parse the `media/list` JSON response into rich `VideoItem`s. + fn parse_api_items(body: &str, tag: Option<&str>) -> Result> { + let json = Self::extract_json(body) + .ok_or_else(|| Error::from("no JSON object found in response".to_string()))?; + let parsed: ApiResponse = serde_json::from_str(json) + .map_err(|e| Error::from(format!("media/list JSON decode failed: {e}")))?; + + let mut items = Vec::with_capacity(parsed.data.len()); + let mut seen: HashSet = HashSet::new(); + + for media in parsed.data { + if matches!(media.is_video, Some(false)) { + continue; + } + if !seen.insert(media.id) { + continue; + } + let username = media.username.unwrap_or_default(); + let slug = media.slug.unwrap_or_default(); + if username.is_empty() || slug.is_empty() { + continue; + } + let video_id = media.id.to_string(); + + let title = media + .name + .as_deref() + .map(Self::clean_text) + .filter(|t| !t.is_empty()) + .unwrap_or_else(|| format!("CamSoda video {video_id}")); + + let duration = media + .duration + .and_then(|d| u32::try_from(d).ok()) + .unwrap_or(0); + + let thumb = media.thumbnail_url.unwrap_or_default(); + let page_url = format!("{BASE_URL}/{username}/media/{slug}/{video_id}"); + + let mut item = VideoItem::new( + video_id, + title, + page_url, + CHANNEL_ID.to_string(), + thumb, + duration, + ); + + let uploader = media + .user_display_name + .as_deref() + .map(Self::clean_text) + .filter(|u| !u.is_empty()) + .unwrap_or_else(|| username.clone()); + item.uploader = Some(uploader); + item.uploaderUrl = Some(format!("{BASE_URL}/{username}/media")); + item.uploaderId = Some(format!("{CHANNEL_ID}:{username}")); + + if let Some(ts) = media.created_at.as_deref().and_then(Self::parse_created_at) { + item.uploadedAt = Some(ts); + } + + if let Some(tag) = tag { + if !tag.is_empty() && tag != "all" { + item.tags = Some(vec![Self::pretty_tag(tag)]); + } + } + + items.push(item); + } + + Ok(items) + } + + /// Parse video cards from the HTML of a CamSoda model media page. /// - /// The page contains anchor elements linking to individual video pages: - /// href="/{username}/media/{slug}/{id}" - /// - /// Inside each anchor: - /// - `[class*="media-item-module__title"]` span: the video title - /// - `[class*="media-item-module__subtitle"]` span: "by UPLOADER (MM:SS)" - /// - `img[src*="media-secure.camsoda.com"]`: video-specific thumbnail + /// Each card is an anchor linking to `/{username}/media/{slug}/{id}` with a + /// `media-item-module__title` span and a `media-item-module__subtitle` span + /// holding `by UPLOADER (MM:SS)`. fn parse_html_items(html: &str) -> Vec { let document = Html::parse_document(html); - // Select all anchors linking to /{username}/media/{slug}/{id} let anchor_sel = match Selector::parse(r#"a[href]"#) { Ok(s) => s, Err(_) => return vec![], @@ -135,13 +454,11 @@ impl CamsodaProvider { Err(_) => return vec![], }; - // Regex for parsing subtitle "by UPLOADER (MM:SS)" - let sub_re = match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") { - Ok(r) => r, - Err(_) => return vec![], - }; - - // Regex for media URL: /{username}/media/{slug}/{id} + let sub_re = + match regex::Regex::new(r"(?i)^by\s+(.+?)\s+\((\d{1,2}:\d{2}(?::\d{2})?)\)\s*$") { + Ok(r) => r, + Err(_) => return vec![], + }; let href_re = match regex::Regex::new(r"^/([^/]+)/media/([^/]+)/(\d+)$") { Ok(r) => r, Err(_) => return vec![], @@ -155,7 +472,6 @@ impl CamsodaProvider { Some(h) => h, None => continue, }; - let caps = match href_re.captures(href) { Some(c) => c, None => continue, @@ -164,16 +480,13 @@ impl CamsodaProvider { let username = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string(); let slug = caps.get(2).map(|m| m.as_str()).unwrap_or("").to_string(); let video_id = caps.get(3).map(|m| m.as_str()).unwrap_or("").to_string(); - if video_id.is_empty() || username.is_empty() { continue; } - if !seen_ids.insert(video_id.clone()) { continue; } - // Title let title = anchor .select(&title_sel) .next() @@ -185,7 +498,6 @@ impl CamsodaProvider { title }; - // Subtitle: "by UPLOADER (MM:SS)" let subtitle = anchor .select(&subtitle_sel) .next() @@ -193,8 +505,12 @@ impl CamsodaProvider { .unwrap_or_default(); let (uploader, duration) = if let Some(sc) = sub_re.captures(&subtitle) { - let u = sc.get(1).map(|m| m.as_str().trim().to_string()).unwrap_or_default(); - let d = sc.get(2) + let u = sc + .get(1) + .map(|m| m.as_str().trim().to_string()) + .unwrap_or_default(); + let d = sc + .get(2) .and_then(|m| parse_time_to_seconds(m.as_str())) .and_then(|s| u32::try_from(s).ok()) .unwrap_or(0); @@ -203,7 +519,6 @@ impl CamsodaProvider { (None, 0) }; - // Thumbnail — prefer video-specific from media-secure.camsoda.com let thumb = anchor .select(&img_sel) .filter_map(|img| img.value().attr("src")) @@ -237,29 +552,55 @@ impl CamsodaProvider { items } - async fn fetch_items( + async fn fetch_listing( &self, - target: &Target, + tag: Option<&str>, + sort: &str, page: u16, options: &ServerOptions, ) -> Result> { - let url = self.build_listing_url(target, page); - let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items"); + let url = Self::build_api_url(tag, sort, page); + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_listing"); let text = requester .get(&url, None) .await .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?; - // Guard against CF challenge pages slipping through if text.contains("cf-browser-verification") || text.contains("cf-chl") || text.contains("Just a moment") { - return Err(Error::from("cloudflare challenge page returned".to_string())); + return Err(Error::from( + "cloudflare challenge page returned".to_string(), + )); } - let items = Self::parse_html_items(&text); - Ok(items) + Self::parse_api_items(&text, tag) + } + + async fn fetch_model( + &self, + username: &str, + page: u16, + options: &ServerOptions, + ) -> Result> { + let url = self.build_model_url(username, page); + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_model"); + let text = requester + .get(&url, None) + .await + .map_err(|e| Error::from(format!("fetch failed for {url}: {e}")))?; + + if text.contains("cf-browser-verification") + || text.contains("cf-chl") + || text.contains("Just a moment") + { + return Err(Error::from( + "cloudflare challenge page returned".to_string(), + )); + } + + Ok(Self::parse_html_items(&text)) } } @@ -277,7 +618,6 @@ impl Provider for CamsodaProvider { ) -> Vec { let _ = cache; let _ = pool; - let _ = sort; let _ = per_page; let page = page.parse::().unwrap_or(1).max(1); @@ -287,9 +627,23 @@ impl Provider for CamsodaProvider { .filter(|v| !v.is_empty()) .map(ToOwned::to_owned); - let target = Self::pick_target(normalized_query.as_deref()); + let category = options + .categories + .as_deref() + .or(options.category.as_deref()); - match self.fetch_items(&target, page, &options).await { + let target = Self::pick_target(normalized_query.as_deref(), category); + let sort_value = Self::map_sort(&sort); + + let result = match &target { + Target::Listing { tag } => { + self.fetch_listing(tag.as_deref(), sort_value, page, &options) + .await + } + Target::Model { username } => self.fetch_model(username, page, &options).await, + }; + + match result { Ok(items) => items, Err(error) => { report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; @@ -307,97 +661,128 @@ impl Provider for CamsodaProvider { mod tests { use super::*; - fn sample_html() -> String { - // Simplified version of the HTML returned by Jina (X-Return-Format: html) - r#" - - Torso ride begging for your cumby jazzyj (24:35) -
thumb
-
- - newFIRST IR BG SHOW FT JOHNNY LOVEby Coco Dethick (44:14) -
thumb
-
- - Ultimate squirting video!by Hot Wife Mia (02:47) -
thumb
-
-"#.to_string() + fn sample_api_body() -> String { + // Mimics the Jina mirror response: JSON wrapped in a
 block.
+        r#"
{"result":true,"data":[
+{"id":15032118,"user_id":18777219,"type_id":2,"name":"Extreme Fuck & Squirt","slug":"fuck-machine-squirt-surprise","token_price":555,"created_at":"2025-06-07T10:13:35","duration":2711,"is_video":true,"thumbnail_url":"https://media-secure.camsoda.com/user/videos/15032118/15032118_1749294653.thumb.jpg","type_name":"Video","user_display_name":"Lola Bunniii","username":"lolabunniii"},
+{"id":17009049,"user_id":1,"type_id":2,"name":"FIRST IR BG SHOW","slug":"first-ir-bg-show","token_price":0,"created_at":"2026-02-12T08:00:00","duration":2654,"is_video":true,"thumbnail_url":"https://media-secure.camsoda.com/user/videos/17009049/17009049.thumb.webp","type_name":"Video","user_display_name":"Coco Dethick","username":"coco-dethick"},
+{"id":99,"user_id":2,"type_id":1,"name":"a picture","slug":"pic","is_video":false,"username":"someone"}
+]}
"#.to_string() } #[test] - fn parses_video_cards_from_html() { - let items = CamsodaProvider::parse_html_items(&sample_html()); - assert_eq!(items.len(), 3, "expected 3 items, got {}: {:?}", items.len(), items.iter().map(|i| &i.id).collect::>()); + fn parses_api_items() { + let items = CamsodaProvider::parse_api_items(&sample_api_body(), None).unwrap(); + assert_eq!(items.len(), 2, "non-video item should be skipped"); - let item = &items[0]; - assert_eq!(item.id, "16984249"); - assert_eq!(item.title, "Torso ride begging for your cum"); - assert_eq!(item.uploader.as_deref(), Some("jazzyj")); - assert_eq!(item.duration, 24 * 60 + 35); - assert!(item.url.contains("16984249"), "url should contain id: {}", item.url); - assert!(item.thumb.contains("media-secure.camsoda.com"), "thumb: {}", item.thumb); - assert_eq!(item.uploaderUrl.as_deref(), Some("https://www.camsoda.com/lil-asian-jaz/media")); - assert_eq!(item.uploaderId.as_deref(), Some("camsoda:lil-asian-jaz")); + let a = &items[0]; + assert_eq!(a.id, "15032118"); + assert_eq!(a.title, "Extreme Fuck & Squirt", "html entity should decode"); + assert_eq!(a.duration, 2711); + assert_eq!( + a.url, + "https://www.camsoda.com/lolabunniii/media/fuck-machine-squirt-surprise/15032118" + ); + assert_eq!(a.uploader.as_deref(), Some("Lola Bunniii")); + assert_eq!(a.uploaderId.as_deref(), Some("camsoda:lolabunniii")); + assert!(a.thumb.contains("media-secure.camsoda.com")); + assert!(a.uploadedAt.is_some(), "created_at should parse"); - let item2 = &items[1]; - assert_eq!(item2.id, "17009049"); - assert_eq!(item2.uploader.as_deref(), Some("Coco Dethick")); - - let item3 = &items[2]; - assert_eq!(item3.id, "17112135"); - assert_eq!(item3.duration, 2 * 60 + 47); + let b = &items[1]; + assert_eq!(b.id, "17009049"); + assert_eq!(b.uploader.as_deref(), Some("Coco Dethick")); } #[test] - fn deduplicates_items() { - // Same video appears twice - let html = sample_html(); - let doubled = format!("{html}\n{html}"); - let items = CamsodaProvider::parse_html_items(&doubled); - assert_eq!(items.len(), 3, "should deduplicate to 3 unique items"); + fn tags_attached_when_filtering() { + let items = + CamsodaProvider::parse_api_items(&sample_api_body(), Some("big-tits")).unwrap(); + assert_eq!(items[0].tags.as_deref(), Some(["Big Tits".to_string()].as_slice())); + } + + #[test] + fn maps_sort_values() { + assert_eq!(CamsodaProvider::map_sort("new"), "date_added"); + assert_eq!(CamsodaProvider::map_sort(""), "date_added"); + assert_eq!(CamsodaProvider::map_sort("popular"), "popular"); + assert_eq!(CamsodaProvider::map_sort("top"), "popular_all_time"); + assert_eq!(CamsodaProvider::map_sort("rated"), "popular_all_time"); } #[test] fn picks_target_correctly() { - assert!(matches!(CamsodaProvider::pick_target(None), Target::Listing)); - assert!(matches!(CamsodaProvider::pick_target(Some("")), Target::Listing)); - assert!(matches!(CamsodaProvider::pick_target(Some("blowjob")), Target::Listing)); - - match CamsodaProvider::pick_target(Some("uploader:lil-asian-jaz")) { - Target::Model { username } => assert_eq!(username, "lil-asian-jaz"), - _ => panic!("expected Model target"), - } - match CamsodaProvider::pick_target(Some("model:katt-leya")) { - Target::Model { username } => assert_eq!(username, "katt-leya"), - _ => panic!("expected Model target"), - } + assert_eq!( + CamsodaProvider::pick_target(None, None), + Target::Listing { tag: None } + ); + // bare non-tag keyword -> default listing (server substring-filters) + assert_eq!( + CamsodaProvider::pick_target(Some("some random phrase"), None), + Target::Listing { tag: None } + ); + // bare keyword matching a known tag -> tag archive + assert_eq!( + CamsodaProvider::pick_target(Some("blowjob"), None), + Target::Listing { + tag: Some("blowjob".to_string()) + } + ); + // tag: prefix + assert_eq!( + CamsodaProvider::pick_target(Some("tag:big tits"), None), + Target::Listing { + tag: Some("big-tits".to_string()) + } + ); + // category option selected + assert_eq!( + CamsodaProvider::pick_target(None, Some("latina")), + Target::Listing { + tag: Some("latina".to_string()) + } + ); + // model shortcut + assert_eq!( + CamsodaProvider::pick_target(Some("model:katt-leya"), None), + Target::Model { + username: "katt-leya".to_string() + } + ); } #[test] - fn builds_listing_urls_correctly() { - let provider = CamsodaProvider::new(); + fn builds_api_urls() { assert_eq!( - provider.build_listing_url(&Target::Listing, 1), - "https://www.camsoda.com/media?page=1" + CamsodaProvider::build_api_url(None, "date_added", 1), + "https://www.camsoda.com/api/v1/media/list/video?page=1&sort_by=date_added" ); assert_eq!( - provider.build_listing_url(&Target::Listing, 3), - "https://www.camsoda.com/media?page=3" + CamsodaProvider::build_api_url(None, "date_added", 3), + "https://www.camsoda.com/api/v1/media/list/video?page=3&sort_by=date_added" ); assert_eq!( - provider.build_listing_url( - &Target::Model { username: "lil-asian-jaz".to_string() }, - 1 - ), - "https://www.camsoda.com/lil-asian-jaz/media" - ); - assert_eq!( - provider.build_listing_url( - &Target::Model { username: "lil-asian-jaz".to_string() }, - 2 - ), - "https://www.camsoda.com/lil-asian-jaz/media?page=2" + CamsodaProvider::build_api_url(Some("big-tits"), "popular", 2), + "https://www.camsoda.com/api/v1/media/list/video?page=2&sort_by=popular&tag=big-tits" ); } + + #[test] + fn pretty_tag_titles() { + assert_eq!(CamsodaProvider::pretty_tag("big-tits"), "Big Tits"); + assert_eq!(CamsodaProvider::pretty_tag("pov"), "POV"); + assert_eq!(CamsodaProvider::pretty_tag("teen-18"), "Teen 18"); + } + + #[test] + fn parses_model_html() { + let html = r#" + Torso rideby jazzyj (24:35) + +"#; + let items = CamsodaProvider::parse_html_items(html); + assert_eq!(items.len(), 1); + assert_eq!(items[0].id, "16984249"); + assert_eq!(items[0].uploader.as_deref(), Some("jazzyj")); + assert_eq!(items[0].duration, 24 * 60 + 35); + } }