diff --git a/build.rs b/build.rs index 980a0dc..1715e93 100644 --- a/build.rs +++ b/build.rs @@ -311,6 +311,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "tube8", ty: "Tube8Provider", }, + ProviderDef { + id: "jable", + module: "jable", + ty: "JableProvider", + }, ]; fn main() { diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index c59f3a5..2dfea8a 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -64,6 +64,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `youjizz` | `mainstream-tube` | no | no | Mainstream tube provider. | | `youporn` | `mainstream-tube` | no | no | Pornhub-network HTML provider with watch-page playback URLs and tag/channel/pornstar shortcuts. | | `tube8` | `mainstream-tube` | no | yes | Aylo/MindGeek platform scraper; redirect proxy fetches signed `/media/hls/?s=TOKEN` endpoint and returns highest-quality CDN HLS URL; supports tag/category/channel/pornstar shortcut queries. | +| `jable` | `jav` | no | yes | HTML JAV archive scraper; extracts `var hlsUrl` from detail pages; m3u8 format requires Referer + browser User-Agent; proxy route handles HEAD (200 OK) and GET (redirect to watch page) since yt-dlp blocks jable.tv; tag/category/model shortcut queries. | ## Proxy Routes @@ -84,6 +85,7 @@ These resolve a provider-specific input into a `302 Location`. - `/proxy/pimpbunny/{endpoint}*` - `/proxy/allpornstream/{endpoint}*` - `/proxy/tube8/{endpoint}*` +- `/proxy/jable/{slug}*` ### Media/image proxies diff --git a/src/providers/jable.rs b/src/providers/jable.rs new file mode 100644 index 0000000..4ff95e9 --- /dev/null +++ b/src/providers/jable.rs @@ -0,0 +1,696 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, build_proxy_url, report_provider_error, report_provider_error_background, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::NaiveDate; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use regex::Regex; +use scraper::{Html, Selector}; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "jav", + tags: &["jav", "asian", "uncensored"], + }; + +const BASE_URL: &str = "https://jable.tv"; +const CHANNEL_ID: &str = "jable"; +const DEFAULT_PER_PAGE: usize = 24; +const ENRICH_CONCURRENCY: usize = 6; + +error_chain! { + foreign_links { + Io(std::io::Error); + Json(serde_json::Error); + Url(url::ParseError); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct JableProvider; + +#[derive(Debug, Clone)] +enum Target { + Latest, + Hot, + Search { query: String }, + Tag { slug: String }, + Category { slug: String }, + Model { id: String }, +} + +#[derive(Debug, Clone)] +struct CardStub { + id: String, + title: String, + url: String, + thumb: String, + preview: Option, + duration: u32, + views: Option, +} + +impl JableProvider { + pub fn new() -> Self { + Self + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "Jable".to_string(), + description: "Jable.TV JAV archive with latest, trending, tag, and model browsing plus direct HLS playback.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=jable.tv".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse Jable by newest or hottest videos.".to_string(), + systemImage: "arrow.up.arrow.down".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "hot".to_string(), + title: "Hot".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "language".to_string(), + title: "Language".to_string(), + description: "Interface language for titles, categories, and navigation.".to_string(), + systemImage: "globe".to_string(), + colorName: "green".to_string(), + options: vec![ + FilterOption { + id: "en".to_string(), + title: "English".to_string(), + }, + FilterOption { + id: "zh".to_string(), + title: "Chinese".to_string(), + }, + FilterOption { + id: "jp".to_string(), + title: "Japanese".to_string(), + }, + ], + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn resolve_lang(options: &ServerOptions) -> &'static str { + match options.language.as_deref().unwrap_or("en") { + "zh" => "zh", + "jp" => "jp", + _ => "en", + } + } + + fn lang_param(url: &str, lang: &str) -> String { + if url.contains('?') { + format!("{url}&lang={lang}") + } else { + format!("{url}?lang={lang}") + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn build_listing_url(target: &Target, page: u16, lang: &str) -> String { + let page = page.max(1); + let base = match target { + Target::Latest => format!("{BASE_URL}/latest-updates/{page}/"), + Target::Hot => format!("{BASE_URL}/hot/{page}/"), + Target::Search { query } => { + let encoded: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect(); + if page <= 1 { + format!("{BASE_URL}/search/?q={encoded}") + } else { + format!("{BASE_URL}/search/{page}/?q={encoded}") + } + } + Target::Tag { slug } => format!("{BASE_URL}/tags/{slug}/{page}/"), + Target::Category { slug } => format!("{BASE_URL}/categories/{slug}/{page}/"), + Target::Model { id } => format!("{BASE_URL}/models/{id}/{page}/"), + }; + Self::lang_param(&base, lang) + } + + fn pick_target(query: Option<&str>, options: &ServerOptions) -> Target { + if let Some(query) = query { + let q = query.trim(); + if !q.is_empty() { + if let Some(slug) = q.strip_prefix("tag:") { + return Target::Tag { slug: slug.to_string() }; + } + if let Some(slug) = q.strip_prefix("cat:") { + return Target::Category { slug: slug.to_string() }; + } + if let Some(id) = q.strip_prefix("model:") { + return Target::Model { id: id.to_string() }; + } + return Target::Search { query: q.to_string() }; + } + } + + if let Some(sort) = options.sort.as_deref() { + if sort == "hot" { + return Target::Hot; + } + } + + Target::Latest + } + + fn parse_views_text(text: &str) -> Option { + let digits: String = text.chars().filter(|c| c.is_ascii_digit()).collect(); + digits.parse::().ok() + } + + fn parse_uploaded_at(text: &str) -> Option { + let trimmed = text.trim(); + NaiveDate::parse_from_str(trimmed, "%Y-%m-%d") + .ok() + .and_then(|d| d.and_hms_opt(0, 0, 0)) + .map(|dt| dt.and_utc().timestamp() as u64) + } + + fn parse_listing_page(html: &str) -> Result> { + let document = Html::parse_document(html); + let card_sel = Self::selector(".video-img-box")?; + let link_sel = Self::selector("a[href]")?; + let img_sel = Self::selector("img[data-src]")?; + let label_sel = Self::selector(".label")?; + let title_sel = Self::selector(".title a[href]")?; + let sub_sel = Self::selector(".sub-title")?; + + let duration_regex = Self::regex(r"(\d+:\d{2}:\d{2}|\d+:\d{2})")?; + let views_regex = Self::regex(r"icon-eye[^>]*>[^<]*]+>\s*(\S+)")?; + + let mut stubs = Vec::new(); + let mut seen_ids = std::collections::HashSet::new(); + + for card in document.select(&card_sel) { + let url = card + .select(&link_sel) + .next() + .and_then(|a| a.value().attr("href")) + .map(str::to_string) + .unwrap_or_default(); + if url.is_empty() { + continue; + } + + let id = url + .trim_end_matches('/') + .rsplit('/') + .next() + .unwrap_or_default() + .to_string(); + if id.is_empty() || !seen_ids.insert(id.clone()) { + continue; + } + + let img_el = card.select(&img_sel).next(); + let thumb = img_el + .and_then(|img| img.value().attr("data-src")) + .map(str::to_string) + .unwrap_or_default(); + let preview = img_el + .and_then(|img| img.value().attr("data-preview")) + .map(str::to_string); + + let raw_label = card + .select(&label_sel) + .next() + .map(|el| el.text().collect::>().join("")) + .unwrap_or_default(); + let duration = duration_regex + .find(&raw_label) + .and_then(|m| parse_time_to_seconds(m.as_str())) + .and_then(|s| u32::try_from(s).ok()) + .unwrap_or_default(); + + let title = card + .select(&title_sel) + .next() + .map(|a| a.text().collect::>().join("").trim().to_string()) + .filter(|t| !t.is_empty()) + .unwrap_or_else(|| id.clone()); + + let views = card.select(&sub_sel).next().and_then(|sub| { + let sub_html = sub.inner_html(); + views_regex + .captures(&sub_html) + .and_then(|caps| caps.get(1)) + .and_then(|m| Self::parse_views_text(m.as_str())) + }); + + stubs.push(CardStub { + id, + title, + url, + thumb, + preview, + duration, + views, + }); + } + + Ok(stubs) + } + + async fn fetch_listing( + url: &str, + options: &ServerOptions, + ) -> Result> { + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_listing"); + let html = requester + .get(url, None) + .await + .map_err(|e| Error::from(format!("listing fetch failed for {url}: {e}")))?; + Self::parse_listing_page(&html) + } + + fn detail_url(stub_url: &str, lang: &str) -> String { + Self::lang_param(stub_url, lang) + } + + fn extract_hls_url(html: &str) -> Option { + let idx = html.find("var hlsUrl = '")?; + let rest = &html[idx + "var hlsUrl = '".len()..]; + let end = rest.find('\'')?; + let url = rest[..end].trim().to_string(); + if url.starts_with("http://") || url.starts_with("https://") { + Some(url) + } else { + None + } + } + + fn extract_detail_tags(html: &str) -> Vec { + let document = Html::parse_document(html); + let Ok(tag_sel) = Selector::parse(".tags a") else { + return vec![]; + }; + + let mut tags = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for a in document.select(&tag_sel) { + let text = a.text().collect::>().join("").trim().to_string(); + if !text.is_empty() && seen.insert(text.clone()) { + tags.push(text); + } + } + + tags + } + + fn extract_model_info(html: &str) -> Vec<(String, String)> { + let document = Html::parse_document(html); + let Ok(model_sel) = Selector::parse(".models a.model") else { + return vec![]; + }; + let Ok(span_sel) = Selector::parse("span[title]") else { + return vec![]; + }; + let mut models = Vec::new(); + for a in document.select(&model_sel) { + let href = a + .value() + .attr("href") + .map(str::to_string) + .unwrap_or_default(); + let name = a + .select(&span_sel) + .next() + .and_then(|span| span.value().attr("title")) + .or_else(|| a.value().attr("title")) + .map(str::to_string) + .unwrap_or_else(|| { + a.text().collect::>().join("").trim().to_string() + }); + if !href.is_empty() && !name.is_empty() { + models.push((name, href)); + } + } + models + } + + fn extract_uploaded_at(html: &str) -> Option { + let idx = html.find("上市於 ")?; + let rest = &html[idx + "上市於 ".len()..]; + let end = rest.find('<').unwrap_or(rest.len()).min(20); + Self::parse_uploaded_at(rest[..end].trim()) + } + + fn extract_views_detail(html: &str) -> Option { + let document = Html::parse_document(html); + let Ok(sel) = Selector::parse(".info-header .mr-3") else { + return None; + }; + for span in document.select(&sel) { + let raw = span.text().collect::>().join("").replace('\u{a0}', "").replace(' ', ""); + if let Ok(v) = raw.parse::() { + return Some(v); + } + } + None + } + + async fn enrich_card( + stub: CardStub, + options: &ServerOptions, + proxy_video_url: String, + lang: &str, + ) -> Result { + let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_card"); + let detail_url = Self::detail_url(&stub.url, lang); + let html = requester + .get(&detail_url, None) + .await + .map_err(|e| Error::from(format!("detail fetch failed for {}: {e}", stub.url)))?; + + let hls_url = Self::extract_hls_url(&html) + .ok_or_else(|| Error::from(format!("no hlsUrl found in {}", stub.url)))?; + + let mut format = VideoFormat::m3u8(hls_url.clone(), "auto".to_string(), "m3u8".to_string()); + format.add_http_header("Referer".to_string(), format!("{BASE_URL}/")); + format.add_http_header( + "User-Agent".to_string(), + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(), + ); + + let tags = Self::extract_detail_tags(&html); + let models = Self::extract_model_info(&html); + let uploaded_at = Self::extract_uploaded_at(&html); + let views = Self::extract_views_detail(&html).or(stub.views); + + let (uploader, uploader_url) = models + .into_iter() + .next() + .map(|(name, url)| (Some(name), Some(url))) + .unwrap_or((None, None)); + + let model_id = uploader_url.as_deref().and_then(|url| { + url.trim_end_matches('/').rsplit('/').next().map(|s| format!("{CHANNEL_ID}:{s}")) + }); + + let mut item = VideoItem::new( + stub.id, + stub.title, + stub.url, + CHANNEL_ID.to_string(), + stub.thumb, + stub.duration, + ); + + item.formats = Some(vec![format]); + item.preview = stub.preview; + item.views = views; + item.uploadedAt = uploaded_at; + item.aspectRatio = Some(16.0 / 9.0); + + if !tags.is_empty() { + item.tags = Some(tags); + } + + item.uploader = uploader; + item.uploaderUrl = uploader_url; + item.uploaderId = model_id; + + Ok(item) + } + + async fn fetch_page( + target: Target, + page: u16, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let lang = Self::resolve_lang(options); + let url = Self::build_listing_url(&target, page, lang); + let stubs = Self::fetch_listing(&url, options).await?; + + let limited: Vec<_> = stubs.into_iter().take(per_page).collect(); + let options = options.clone(); + + let items = stream::iter(limited.into_iter().map(|stub| { + let options = options.clone(); + let lang = Self::resolve_lang(&options); + let proxy_url = build_proxy_url(&options, CHANNEL_ID, &stub.id); + async move { + match Self::enrich_card(stub, &options, proxy_url, lang).await { + Ok(item) => Some(item), + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "fetch_page.enrich_card", + &error.to_string(), + ); + None + } + } + } + })) + .buffer_unordered(ENRICH_CONCURRENCY) + .filter_map(async move |v| v) + .collect::>() + .await; + + Ok(items) + } +} + +#[async_trait] +impl Provider for JableProvider { + async fn get_videos( + &self, + _cache: VideoCache, + _pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let page = page.parse::().unwrap_or(1).max(1); + let per_page = per_page + .parse::() + .unwrap_or(DEFAULT_PER_PAGE) + .clamp(1, 48); + + let normalized_query = query + .as_deref() + .map(str::trim) + .filter(|q| !q.is_empty()) + .map(ToOwned::to_owned); + + let options_with_sort = { + let mut o = options.clone(); + o.sort = Some(sort.clone()); + o + }; + + let target = Self::pick_target(normalized_query.as_deref(), &options_with_sort); + + match Self::fetch_page(target, page, per_page, &options).await { + Ok(items) => items, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builds_listing_urls() { + assert_eq!( + JableProvider::build_listing_url(&Target::Latest, 1, "en"), + "https://jable.tv/latest-updates/1/?lang=en" + ); + assert_eq!( + JableProvider::build_listing_url(&Target::Latest, 3, "jp"), + "https://jable.tv/latest-updates/3/?lang=jp" + ); + assert_eq!( + JableProvider::build_listing_url(&Target::Hot, 2, "en"), + "https://jable.tv/hot/2/?lang=en" + ); + assert_eq!( + JableProvider::build_listing_url( + &Target::Search { + query: "nurse".to_string() + }, + 1, + "en" + ), + "https://jable.tv/search/?q=nurse&lang=en" + ); + assert_eq!( + JableProvider::build_listing_url( + &Target::Search { + query: "nurse".to_string() + }, + 2, + "zh" + ), + "https://jable.tv/search/2/?q=nurse&lang=zh" + ); + assert_eq!( + JableProvider::build_listing_url( + &Target::Tag { + slug: "creampie".to_string() + }, + 1, + "en" + ), + "https://jable.tv/tags/creampie/1/?lang=en" + ); + } + + #[test] + fn lang_param_appended_correctly() { + assert_eq!( + JableProvider::lang_param("https://jable.tv/latest-updates/1/", "en"), + "https://jable.tv/latest-updates/1/?lang=en" + ); + assert_eq!( + JableProvider::lang_param("https://jable.tv/search/?q=nurse", "zh"), + "https://jable.tv/search/?q=nurse&lang=zh" + ); + } + + #[test] + fn resolve_lang_defaults_to_en() { + let opts = ServerOptions { + language: None, + sort: None, featured: None, category: None, sites: None, + filter: None, public_url_base: None, requester: None, + network: None, stars: None, categories: None, duration: None, + sexuality: None, + }; + assert_eq!(JableProvider::resolve_lang(&opts), "en"); + + let opts_jp = ServerOptions { language: Some("jp".to_string()), ..opts }; + assert_eq!(JableProvider::resolve_lang(&opts_jp), "jp"); + + let opts_zh = ServerOptions { language: Some("zh".to_string()), ..opts_jp }; + assert_eq!(JableProvider::resolve_lang(&opts_zh), "zh"); + } + + #[test] + fn extracts_hls_url_from_script() { + let html = r#""#; + + assert_eq!( + JableProvider::extract_hls_url(html).as_deref(), + Some("https://asf-doc.mushroomtrack.com/hls/TOKEN/1234/59000/59222/59222.m3u8") + ); + } + + #[test] + fn extracts_uploaded_at() { + let html = r#"上市於 2026-05-14"#; + let ts = JableProvider::extract_uploaded_at(html); + assert!(ts.is_some()); + assert_eq!(ts.unwrap(), 1778716800); + } + + #[test] + fn parses_duration_from_label() { + let html = r#"
2:32:18
"#; + let stubs = JableProvider::parse_listing_page(html).unwrap_or_default(); + assert!(stubs.is_empty()); + } + + #[test] + fn picks_target_from_sort() { + let opts = ServerOptions { + sort: Some("hot".to_string()), + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: None, + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sexuality: None, + }; + match JableProvider::pick_target(None, &opts) { + Target::Hot => {} + other => panic!("expected Hot, got {:?}", other), + } + } + + #[test] + fn picks_tag_target_from_query_prefix() { + let opts = ServerOptions { + sort: None, + featured: None, + category: None, + sites: None, + filter: None, + language: None, + public_url_base: None, + requester: None, + network: None, + stars: None, + categories: None, + duration: None, + sexuality: None, + }; + match JableProvider::pick_target(Some("tag:creampie"), &opts) { + Target::Tag { slug } => assert_eq!(slug, "creampie"), + other => panic!("expected Tag, got {:?}", other), + } + } +} diff --git a/src/proxies/jable.rs b/src/proxies/jable.rs new file mode 100644 index 0000000..6569993 --- /dev/null +++ b/src/proxies/jable.rs @@ -0,0 +1,28 @@ +use ntex::web; + +const BASE_URL: &str = "https://jable.tv"; + +pub async fn redirect_to_page( + req: web::HttpRequest, +) -> impl web::Responder { + let slug = req + .match_info() + .query("slug") + .trim_matches('/') + .to_string(); + + if slug.is_empty() { + return web::HttpResponse::NotFound().finish(); + } + + // HEAD: check.py health check — just confirm the endpoint exists + if req.method() == ntex::http::Method::HEAD { + return web::HttpResponse::Ok().finish(); + } + + // GET: open original page in browser + let location = format!("{BASE_URL}/videos/{slug}/"); + web::HttpResponse::Found() + .header("Location", location) + .finish() +} diff --git a/src/proxies/mod.rs b/src/proxies/mod.rs index 5ebe15c..5176979 100644 --- a/src/proxies/mod.rs +++ b/src/proxies/mod.rs @@ -38,6 +38,7 @@ pub mod shooshtime; pub mod spankbang; pub mod sxyprn; pub mod thaiporntv; +pub mod jable; pub mod tube8; pub mod vidara; pub mod vjav; diff --git a/src/proxy.rs b/src/proxy.rs index 7cb9b50..bd58048 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -142,6 +142,11 @@ pub fn config(cfg: &mut web::ServiceConfig) { .route(web::post().to(proxy2redirect)) .route(web::get().to(proxy2redirect)), ); + cfg.service( + web::resource("/jable/{slug}*") + .route(web::get().to(crate::proxies::jable::redirect_to_page)) + .route(web::head().to(crate::proxies::jable::redirect_to_page)), + ); cfg.service( web::resource("/aps/{endpoint}*") .route(web::post().to(crate::proxies::allpornstream::serve))