diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 4aeed24..b3f45c4 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -68,6 +68,9 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `fullporner` | `mainstream-tube` | no | no | HTML scraper for fullporner.com; thumbnail IDs derived from `/thumb/{id}.jpg` URLs and used to build direct `xiaoshenke.net/vid/{id}/720` media redirect URLs (Referer + User-Agent headers required); supports cat:/category:/pornstar:/star: shortcut queries; no proxy needed. | | `thepornbunny` | `mainstream-tube` | no | yes | KVS-style HTML scraper for thepornbunny.com; 24 items per site page; thumbnails at `https://www.thepornbunny.com/images/thumb/{id}.webp` from `data-original` attribute (no proxy needed); studio exposed as uploader; pornstar names in tags; `/proxy/thepornbunny/{slug}` fetches the video page, extracts `generate_mp4(enc_data, key, rnd, video_id)` args, decrypts `enc_data` via PBKDF2-HMAC-SHA512+AES-256-CBC to get an OK.ru session key, calls `api.ok.ru/fb.do?method=video.get&session_key=KEY&vids=RND` to get signed CDN URLs, and returns 302 to the best-quality okcdn.ru/vkuser.net MP4 URL (no special client headers needed); supports sort: new/popular/rated, 20 hardcoded categories via `categories` option, and tag:/category:/studio:/pornstar: query shortcuts. | | `eporner` | `mainstream-tube` | no | no | HTML scraper for eporner.com (5M+ videos); card selector `div.mb[data-id]` with inline duration/rating/views/uploader; thumbnails at `static-eu-cdn.eporner.com` (no proxy needed); pagination uses `/{N}/` suffix (page 1 = no suffix, page 2 = `/2/`); search queries map to `/tag/{slug}/` (eporner redirects all keyword searches to tag pages — 404 tag pages still return related content); supports sort: new/popular/rated/best; 65 hardcoded categories via `cat:`, `tag:`, `pornstar:`, `uploader:` query shortcuts; background-loads pornstar name→URL map from `/pornstar-list/`; yt-dlp resolves `video.url` natively (Eporner extractor); no proxy needed. | +| `xnxx` | `mainstream-tube` | no | no | HTML scraper for xnxx.com (10M+ videos); unified card parser handles two formats: `div.thumb-block[data-eid]` (search) and `div.thumb-block.video[data-video='{"id":...}']` (hits); eid extracted from `/video-{eid}/{slug}` URL path; thumbnails at `thumb-cdn77.xnxx-cdn.com` and `thumbs-gcore.xnxx-cdn.com` (no proxy, no Referer needed); 0-indexed pagination (page 1 = `/hits`, page N = `/hits/{N-1}`); default feed is `/hits` (most-viewed — xnxx has no chronological listing); search via `/search/{slug}` (works for keywords and tags); supports `tag:`, `cat:`, `category:` query shortcuts; yt-dlp resolves `video.url` natively (XNXX extractor, returns 4-7 HLS formats); no proxy needed. | +| `xhamster` | `mainstream-tube` | no | no | HTML scraper for xhamster.com; card selector `div[data-video-type="video"]` with `data-video-id`; thumbnails via `img[data-role="thumb-preview-img"]` at `ic-vt-nss.xhcdn.com` (no proxy, no Referer needed); pagination via `?page=N` query param (browse feeds use infinite-scroll so only search reliably returns different content per page); feeds: `/newest` (default), `/most-viewed`, `/best`; categories via `/categories/{slug}`; channels via `/channels/{slug}`; 43 hardcoded categories as `categories` option; uploader type inferred from URL path (`/channels/` → channel, `/creators/` → creator, `/pornstars/` → pornstar); supports `cat:`/`category:` and `channel:` query shortcuts, plus static category name matching; preview mp4 clips from `data-previewvideo` attribute; yt-dlp resolves `video.url` natively (xHamster extractor, 28 formats); no proxy needed. | +| `xvideos` | `mainstream-tube` | no | no | HTML scraper for xvideos.com; handles two card formats: homepage (`div.thumb-block[data-id][data-eid]`) uses `p.title a[title]` + `data-pvv` on img, best-of-month page uses `div.thumb-block.video[data-video=JSON]` with `div.title a` text + `previewVideo` JSON key; thumbnails at `thumb-cdn77.xvideos-cdn.com` / `thumbs-gcore.xvideos-cdn.com` (no proxy needed); latest: `/` (page 1) / `/new/{N-1}` (page N≥2); best-of-month: `/best/{YYYY-MM}` (previous calendar month), page N: `/best/{YYYY-MM}/{N-1}`; search: `/?k={query}` / `/?k={query}&p={N-1}` (0-indexed); tag shortcuts: `/tags/{slug}/{N-1}`; category shortcuts: `/c/{Name}-{ID}/{N-1}` (38 hardcoded categories); `cat:`, `tag:`, `uploader:` query prefix routing; yt-dlp resolves `video.url` natively (XVideos extractor → HLS formats); CDN preview mp4 in `preview` field; no proxy needed. | ## Proxy Routes diff --git a/src/providers/supjav.rs b/src/providers/supjav.rs index 58f3004..f5b1169 100644 --- a/src/providers/supjav.rs +++ b/src/providers/supjav.rs @@ -984,11 +984,14 @@ print(json.dumps({ let base_url = Url::parse(master_url).map_err(|error| Error::from(format!("invalid master url: {error}")))?; - let mut formats = vec![ - VideoFormat::new(master_url.to_string(), "auto".to_string(), "m3u8".to_string()) - .format_note("master".to_string()) - .format_id("master".to_string()), - ]; + let mut formats = vec![VideoFormat::new( + master_url.to_string(), + "auto".to_string(), + "m3u8".to_string(), + ) + .format_note("master".to_string()) + .format_id("master".to_string()) + .http_header("Referer".to_string(), BASE_URL.to_string())]; let resolution_regex = Self::regex(r#"RESOLUTION=(\d+)x(\d+)"#)?; let bandwidth_regex = Self::regex(r#"BANDWIDTH=(\d+)"#)?; let mut lines = response.text.lines(); @@ -1033,7 +1036,8 @@ print(json.dumps({ height .map(|value| format!("hls-{value}p")) .unwrap_or_else(|| "hls-variant".to_string()), - ); + ) + .http_header("Referer".to_string(), BASE_URL.to_string()); if let Some(bandwidth) = bandwidth { format = format.format_note(format!("{quality} ({bandwidth}bps)")); } @@ -1261,8 +1265,7 @@ print(json.dumps({ item.tags = Some(parsed_tags); } - if let Some((master_url, formats)) = self.resolve_player(page_url, &players).await? { - item.url = master_url; + if let Some((_master_url, formats)) = self.resolve_player(page_url, &players).await? { if !formats.is_empty() { item.formats = Some(formats); } diff --git a/src/providers/xhamster.rs b/src/providers/xhamster.rs index 48e96d4..be5337d 100644 --- a/src/providers/xhamster.rs +++ b/src/providers/xhamster.rs @@ -211,7 +211,7 @@ impl XhamsterProvider { if page <= 1 { base } else { - format!("{base}/page/{page}") + format!("{base}?page={page}") } } @@ -505,11 +505,11 @@ mod tests { ); assert_eq!( XhamsterProvider::target_url(&Target::Newest, 2), - "https://xhamster.com/newest/page/2" + "https://xhamster.com/newest?page=2" ); assert_eq!( XhamsterProvider::target_url(&Target::MostViewed, 3), - "https://xhamster.com/most-viewed/page/3" + "https://xhamster.com/most-viewed?page=3" ); assert_eq!( XhamsterProvider::target_url(&Target::Search("big ass".to_string()), 1), @@ -517,7 +517,7 @@ mod tests { ); assert_eq!( XhamsterProvider::target_url(&Target::Category("amateur".to_string()), 2), - "https://xhamster.com/categories/amateur/page/2" + "https://xhamster.com/categories/amateur?page=2" ); assert_eq!( XhamsterProvider::target_url(&Target::Channel("vip4k".to_string()), 1), diff --git a/src/providers/xvideos.rs b/src/providers/xvideos.rs index 3f0c647..af7ec83 100644 --- a/src/providers/xvideos.rs +++ b/src/providers/xvideos.rs @@ -9,6 +9,7 @@ use async_trait::async_trait; use chrono::{Datelike, Local, Months}; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; use scraper::{ElementRef, Html, Selector}; use wreq::Version; @@ -261,7 +262,7 @@ impl XvideosProvider { fn parse_duration(text: &str) -> u32 { let text = text.trim(); - // Colon-separated formats MM:SS and HH:MM:SS + // Colon-separated MM:SS and HH:MM:SS let parts: Vec<&str> = text.split(':').collect(); if parts.len() == 2 { let m: u32 = parts[0].trim().parse().unwrap_or(0); @@ -275,26 +276,24 @@ impl XvideosProvider { return h * 3600 + m * 60 + s; } - // Word-based: "1h20min", "30 min", "45sec", etc. + // Word-based: "1h20min", "30 min", "45sec". + // Trim the "before" slice so trailing spaces don't swallow the digits. let low = text.to_ascii_lowercase(); - let h: u32 = low - .find('h') - .and_then(|i| low[..i].trim().parse().ok()) - .unwrap_or(0); - let m: u32 = low.find("min").and_then(|i| { - let start = low[..i] - .rfind(|c: char| !c.is_ascii_digit()) - .map(|j| j + 1) - .unwrap_or(0); - low[start..i].trim().parse().ok() - }).unwrap_or(0); - let s: u32 = low.find("sec").and_then(|i| { - let start = low[..i] - .rfind(|c: char| !c.is_ascii_digit()) - .map(|j| j + 1) - .unwrap_or(0); - low[start..i].trim().parse().ok() - }).unwrap_or(0); + + let extract_num = |needle: &str| -> u32 { + low.find(needle).and_then(|i| { + let before = low[..i].trim_end(); // drop trailing whitespace + let start = before + .rfind(|c: char| !c.is_ascii_digit()) + .map(|j| j + 1) + .unwrap_or(0); + before[start..].parse().ok() + }).unwrap_or(0) + }; + + let h = extract_num("h"); + let m = extract_num("min"); + let s = extract_num("sec"); h * 3600 + m * 60 + s } @@ -315,11 +314,17 @@ impl XvideosProvider { let card_sel = Self::selector("div.thumb-block")?; let img_sel = Self::selector("img[data-src]")?; let link_sel = Self::selector("a[href]")?; - let title_sel = Self::selector("p.title a[title], a.title[title]")?; - let uploader_name_sel = Self::selector("p.metadata a span.name")?; - let uploader_link_sel = Self::selector("p.metadata a[href]")?; + // Homepage format: p.title a[title="…"]; best-page format: div.title a[href] + let title_attr_sel = Self::selector("p.title a[title], a.title[title]")?; + let title_text_sel = Self::selector("div.title a[href]")?; + // Homepage uploader: p.metadata a span.name; best-page: div.video-metadata a.name + let uploader_a_sel = + Self::selector("p.metadata a[href], div.video-metadata a.name[href]")?; let dur_sel = Self::selector(".thumb-under span.duration")?; + // Homepage views: text near "Views" in p.metadata let metadata_sel = Self::selector("p.metadata")?; + // Best-page views: span.views-count + let views_count_sel = Self::selector("span.views-count")?; let mut items = Vec::new(); @@ -354,15 +359,22 @@ impl XvideosProvider { continue; } - // Numeric id from data-id attribute; fall back to eid + // Numeric id: try data-id attr, then data-video JSON, then eid let video_id = card .value() .attr("data-id") .filter(|s| !s.is_empty()) - .unwrap_or(&eid) - .to_string(); + .map(str::to_string) + .or_else(|| { + card.value().attr("data-video").and_then(|dv| { + // {"id":57049413,...} + let re = Regex::new(r#""id"\s*:\s*(\d+)"#).ok()?; + re.captures(dv)?.get(1).map(|m| m.as_str().to_string()) + }) + }) + .unwrap_or_else(|| eid.clone()); - // Thumbnail (lazy-loaded, stored in data-src) + // Thumbnail (lazy-loaded in data-src) let thumb = card .select(&img_sel) .next() @@ -373,20 +385,36 @@ impl XvideosProvider { continue 'card; } - // Preview video clip (data-pvv on the same img element) + // Preview video clip. + // Homepage: data-pvv on the img; best page: "previewVideo" in data-video JSON. let preview = card .select(&img_sel) .next() .and_then(|el| el.value().attr("data-pvv")) .map(str::to_string) - .filter(|s| !s.is_empty()); + .filter(|s| !s.is_empty()) + .or_else(|| { + card.value().attr("data-video").and_then(|dv| { + let re = + Regex::new(r#""previewVideo"\s*:\s*"([^"]+)""#).ok()?; + re.captures(dv) + .and_then(|c| c.get(1)) + .map(|m| m.as_str().replace("\\/", "/")) + }) + }); - // Title from the title attribute on the link inside p.title + // Title: prefer title attr (homepage), fall back to text (best page) let title = card - .select(&title_sel) + .select(&title_attr_sel) .next() .and_then(|el| el.value().attr("title").map(Self::decode_html)) - .filter(|t| !t.trim().is_empty()); + .filter(|t| !t.trim().is_empty()) + .or_else(|| { + card.select(&title_text_sel) + .next() + .map(|el| Self::text_of(&el)) + .filter(|t| !t.trim().is_empty()) + }); let Some(title) = title else { continue; }; @@ -398,30 +426,39 @@ impl XvideosProvider { .map(|el| Self::parse_duration(&Self::text_of(&el))) .unwrap_or(0); - // Uploader name and URL - let uploader_name = card - .select(&uploader_name_sel) - .next() + // Uploader: pick the first non-video anchor in the metadata area. + // Skip anchors that link to a video page. + let uploader_el = card.select(&uploader_a_sel).find(|el| { + !el.value() + .attr("href") + .map(|h| h.contains("/video.")) + .unwrap_or(false) + }); + let uploader_name = uploader_el .map(|el| Self::text_of(&el)) .filter(|s| !s.is_empty()); - let uploader_url = card - .select(&uploader_link_sel) - .next() + let uploader_url = uploader_el .and_then(|el| el.value().attr("href").map(Self::normalize_url)) .filter(|u| !u.is_empty()); - // Views: scan p.metadata text for "NNN Views" - let views = card.select(&metadata_sel).next().and_then(|meta| { - let text = Self::text_of(&meta); - let low = text.to_ascii_lowercase(); - low.find("views").and_then(|idx| { - // grab the token immediately before "views" - text[..idx] - .split_whitespace() - .last() - .and_then(|w| Self::parse_views(w)) - }) - }); + // Views: best page has span.views-count; homepage has text near "Views" + let views = card + .select(&views_count_sel) + .next() + .map(|el| Self::text_of(&el)) + .and_then(|t| Self::parse_views(&t)) + .or_else(|| { + card.select(&metadata_sel).next().and_then(|meta| { + let text = Self::text_of(&meta); + let low = text.to_ascii_lowercase(); + low.find("views").and_then(|idx| { + text[..idx] + .split_whitespace() + .last() + .and_then(|w| Self::parse_views(w)) + }) + }) + }); let mut item = VideoItem::new( video_id,