diff --git a/build.rs b/build.rs index 63ae4a4..4687c12 100644 --- a/build.rs +++ b/build.rs @@ -36,6 +36,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "pornhub", ty: "PornhubProvider", }, + ProviderDef { + id: "youporn", + module: "youporn", + ty: "YoupornProvider", + }, ProviderDef { id: "pornhd3x", module: "pornhd3x", diff --git a/docs/provider-catalog.md b/docs/provider-catalog.md index 74616fc..642328c 100644 --- a/docs/provider-catalog.md +++ b/docs/provider-catalog.md @@ -57,6 +57,7 @@ This is the current implementation inventory as of this snapshot of the repo. Us | `xxthots` | `onlyfans` | no | no | OnlyFans-like metadata example. | | `yesporn` | `mainstream-tube` | no | no | Preview format examples. | | `youjizz` | `mainstream-tube` | no | no | Mainstream tube provider. | +| `youporn` | `mainstream-tube` | no | no | Pornhub-network HTML provider with watch-page playback URLs and tag/channel/pornstar shortcuts. | ## Proxy Routes diff --git a/src/providers/youporn.rs b/src/providers/youporn.rs new file mode 100644 index 0000000..f40ba7b --- /dev/null +++ b/src/providers/youporn.rs @@ -0,0 +1,587 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, requester_or_default}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoItem}; + +use async_trait::async_trait; +use error_chain::error_chain; +use htmlentity::entity::{ICodedDataTrait, decode}; +use scraper::{ElementRef, Html, Selector}; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; +use std::thread; +use url::{Url, form_urlencoded}; +use wreq::Version; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "mainstream-tube", + tags: &["mainstream", "studio", "search"], + }; + +const BASE_URL: &str = "https://www.youporn.com"; +const CHANNEL_ID: &str = "youporn"; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + } +} + +#[derive(Debug, Clone)] +pub struct YoupornProvider { + url: String, + shortcuts: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Latest { sort: String }, + Search { query: String }, + Tag { slug: String, sort: String }, + Channel { slug: String, sort: String }, + Pornstar { slug: String, sort: String }, + Amateur { slug: String, sort: String }, +} + +impl YoupornProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + shortcuts: Arc::new(RwLock::new(HashMap::new())), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let shortcuts = Arc::clone(&self.shortcuts); + let url = self.url.clone(); + thread::spawn(move || { + let rt = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(v) => v, + Err(_) => return, + }; + rt.block_on(async move { + let mut requester = crate::util::requester::Requester::new(); + if let Ok(html) = requester.get(&url, None).await { + let map = Self::collect_shortcuts(&html); + if let Ok(mut guard) = shortcuts.write() { + *guard = map; + } + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + Channel { + id: CHANNEL_ID.to_string(), + name: "YouPorn".to_string(), + description: "YouPorn listings with search, tag/channel shortcuts, and watch-page playback URLs." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=youporn.com".to_string(), + status: "active".to_string(), + categories: vec![], + options: vec![ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Latest feed ordering.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![FilterOption { + id: "new".to_string(), + title: "Most Recent".to_string(), + }], + multiSelect: false, + }], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Option { + Selector::parse(value).ok() + } + + fn normalize_text(value: &str) -> String { + decode(value.as_bytes()) + .to_string() + .unwrap_or_else(|_| value.to_string()) + .split_whitespace() + .collect::>() + .join(" ") + .trim() + .to_string() + } + + fn normalize_url(&self, value: &str) -> String { + let trimmed = value.trim(); + if trimmed.is_empty() { + return String::new(); + } + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return trimmed.to_string(); + } + if trimmed.starts_with("//") { + return format!("https:{trimmed}"); + } + format!( + "{}/{}", + self.url.trim_end_matches('/'), + trimmed.trim_start_matches('/') + ) + } + + fn normalized_sort(sort: &str) -> &'static str { + let _ = sort; + "new" + } + + fn sort_suffix(sort: &str) -> &'static str { + let _ = sort; + "" + } + + fn page_suffix(page: u8) -> String { + if page > 1 { + format!("?page={page}") + } else { + String::new() + } + } + + fn html_headers(referer: &str) -> Vec<(String, String)> { + vec![ + ( + "accept".to_string(), + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8".to_string(), + ), + ("accept-language".to_string(), "en-US,en;q=0.7".to_string()), + ("cache-control".to_string(), "no-cache".to_string()), + ("pragma".to_string(), "no-cache".to_string()), + ( + "user-agent".to_string(), + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(), + ), + ("referer".to_string(), referer.to_string()), + ] + } + + fn target_from_query(&self, query: &str, sort: &str) -> Target { + let q = query.trim(); + if q.is_empty() { + return Target::Latest { + sort: Self::normalized_sort(sort).to_string(), + }; + } + + let lower = q.to_ascii_lowercase(); + for (prefix, kind) in [ + ("tag:", "tag"), + ("channel:", "channel"), + ("pornstar:", "pornstar"), + ("amateur:", "amateur"), + ] { + if let Some(rest) = lower.strip_prefix(prefix) { + let slug = rest.trim().replace(' ', "-"); + if !slug.is_empty() { + return match kind { + "tag" => Target::Tag { + slug, + sort: Self::normalized_sort(sort).to_string(), + }, + "channel" => Target::Channel { + slug, + sort: Self::normalized_sort(sort).to_string(), + }, + "pornstar" => Target::Pornstar { + slug, + sort: Self::normalized_sort(sort).to_string(), + }, + _ => Target::Amateur { + slug, + sort: Self::normalized_sort(sort).to_string(), + }, + }; + } + } + } + + let shortcut_key = lower.split_whitespace().collect::>().join(" "); + if let Ok(guard) = self.shortcuts.read() + && let Some(target) = guard.get(&shortcut_key) + { + return match target { + Target::Tag { slug, .. } => Target::Tag { + slug: slug.clone(), + sort: Self::normalized_sort(sort).to_string(), + }, + Target::Channel { slug, .. } => Target::Channel { + slug: slug.clone(), + sort: Self::normalized_sort(sort).to_string(), + }, + Target::Pornstar { slug, .. } => Target::Pornstar { + slug: slug.clone(), + sort: Self::normalized_sort(sort).to_string(), + }, + Target::Amateur { slug, .. } => Target::Amateur { + slug: slug.clone(), + sort: Self::normalized_sort(sort).to_string(), + }, + _ => target.clone(), + }; + } + + Target::Search { + query: q.to_string(), + } + } + + fn build_url(&self, target: &Target, page: u8) -> String { + match target { + Target::Latest { sort } => format!( + "{}/{}{}", + self.url, + Self::sort_suffix(sort), + Self::page_suffix(page) + ), + Target::Search { query } => { + let encoded: String = form_urlencoded::byte_serialize(query.as_bytes()).collect(); + if page > 1 { + format!("{}/search/?query={encoded}&page={page}", self.url) + } else { + format!("{}/search/?query={encoded}", self.url) + } + } + Target::Tag { slug, sort } => format!( + "{}/porntags/{}/{}{}", + self.url, + slug.trim_matches('/'), + Self::sort_suffix(sort), + Self::page_suffix(page) + ), + Target::Channel { slug, sort } => format!( + "{}/channel/{}/{}{}", + self.url, + slug.trim_matches('/'), + Self::sort_suffix(sort), + Self::page_suffix(page) + ), + Target::Pornstar { slug, sort } => format!( + "{}/pornstar/{}/{}{}", + self.url, + slug.trim_matches('/'), + Self::sort_suffix(sort), + Self::page_suffix(page) + ), + Target::Amateur { slug, sort } => format!( + "{}/amateur/{}/{}{}", + self.url, + slug.trim_matches('/'), + Self::sort_suffix(sort), + Self::page_suffix(page) + ), + } + } + + fn collect_shortcuts(html: &str) -> HashMap { + let mut map = HashMap::new(); + let document = Html::parse_document(html); + let Some(link_selector) = Self::selector("a[href]") else { + return map; + }; + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let title = Self::normalize_text(&link.text().collect::()).to_ascii_lowercase(); + if title.is_empty() { + continue; + } + let path = if href.starts_with("http://") || href.starts_with("https://") { + Url::parse(href) + .ok() + .map(|u| u.path().to_string()) + .unwrap_or_default() + } else { + href.to_string() + }; + if let Some(slug) = path + .strip_prefix("/porntags/") + .map(|v| v.trim_matches('/').to_string()) + { + if !slug.is_empty() { + map.insert( + title, + Target::Tag { + slug, + sort: "new".to_string(), + }, + ); + } + continue; + } + if let Some(slug) = path + .strip_prefix("/channel/") + .map(|v| v.trim_matches('/').to_string()) + { + if !slug.is_empty() { + map.insert( + title, + Target::Channel { + slug, + sort: "new".to_string(), + }, + ); + } + continue; + } + if let Some(slug) = path + .strip_prefix("/pornstar/") + .map(|v| v.trim_matches('/').to_string()) + { + if !slug.is_empty() { + map.insert( + title, + Target::Pornstar { + slug, + sort: "new".to_string(), + }, + ); + } + continue; + } + if let Some(slug) = path + .strip_prefix("/amateur/") + .map(|v| v.trim_matches('/').to_string()) + && !slug.is_empty() + { + map.insert( + title, + Target::Amateur { + slug, + sort: "new".to_string(), + }, + ); + } + } + map + } + + fn text_of(node: Option>) -> String { + node.map(|v| Self::normalize_text(&v.text().collect::())) + .unwrap_or_default() + } + + fn parse_items(&self, html: &str) -> Vec { + let document = Html::parse_document(html); + + let Some(card_selector) = Self::selector("article.video-box.js_video-box") else { + return vec![]; + }; + let link_selector = Self::selector("a[data-testid='plw_video_thumbnail_link'], a.video-box-image, a.video-title-text"); + let title_selector = Self::selector("a.video-title-text"); + let thumb_selector = Self::selector("img"); + let duration_selector = Self::selector(".tm_video_duration"); + let views_selector = Self::selector("span.info-views"); + let uploader_selector = Self::selector("a.author-title-text"); + let tag_selector = Self::selector("a.bubble-porntag"); + + let mut items = Vec::new(); + for card in document.select(&card_selector) { + let link_node = link_selector + .as_ref() + .and_then(|s| card.select(s).next()); + let href = link_node + .and_then(|v| v.value().attr("href")) + .unwrap_or_default(); + if !href.contains("/watch/") { + continue; + } + + let id = card + .value() + .attr("data-video-id") + .map(|v| v.to_string()) + .or_else(|| { + href.split("/watch/") + .nth(1) + .and_then(|v| v.split('/').next()) + .map(|v| v.to_string()) + }) + .unwrap_or_default(); + if id.is_empty() { + continue; + } + + let title = title_selector + .as_ref() + .and_then(|s| card.select(s).next()) + .map(|v| { + let from_title = v.value().attr("title").unwrap_or_default(); + if from_title.is_empty() { + Self::normalize_text(&v.text().collect::()) + } else { + Self::normalize_text(from_title) + } + }) + .unwrap_or_default(); + + let thumb = thumb_selector + .as_ref() + .and_then(|s| card.select(s).next()) + .and_then(|v| { + v.value() + .attr("data-original") + .or_else(|| v.value().attr("data-src")) + .or_else(|| v.value().attr("src")) + }) + .map(|v| self.normalize_url(v)) + .unwrap_or_default(); + + let duration_text = Self::text_of(duration_selector.as_ref().and_then(|s| card.select(s).next())); + let duration = parse_time_to_seconds(&duration_text).unwrap_or(0) as u32; + + let view_text = views_selector + .as_ref() + .and_then(|s| card.select(s).next()) + .map(|v| Self::normalize_text(&v.text().collect::())) + .unwrap_or_default(); + let views = parse_abbreviated_number(&view_text).unwrap_or(0) as u32; + + let rating = views_selector + .as_ref() + .and_then(|s| card.select(s).nth(1)) + .map(|v| Self::normalize_text(&v.text().collect::()).replace('%', "")) + .and_then(|v| v.parse::().ok()); + + let uploader_node = uploader_selector.as_ref().and_then(|s| card.select(s).next()); + let uploader_name = uploader_node + .as_ref() + .map(|v| Self::normalize_text(&v.text().collect::())) + .unwrap_or_default(); + let uploader_href = uploader_node + .and_then(|v| v.value().attr("href")) + .map(|v| self.normalize_url(v)); + let uploader_id = card + .value() + .attr("data-uploader-id") + .map(|v| format!("{CHANNEL_ID}:{v}")); + + let preview = link_node + .and_then(|v| v.value().attr("data-mediabook")) + .map(|v| v.replace("&", "&")); + + let mut tags = Vec::new(); + if let Some(sel) = &tag_selector { + for tag in card.select(sel) { + let title = Self::normalize_text(&tag.text().collect::()); + if !title.is_empty() { + tags.push(title); + } + } + } + + let mut item = VideoItem::new( + id, + title, + self.normalize_url(href), + CHANNEL_ID.to_string(), + thumb, + duration, + ) + .views(views); + + if let Some(value) = rating { + item = item.rating(value); + } + if !uploader_name.is_empty() { + item = item.uploader(uploader_name); + } + if let Some(value) = uploader_href { + item.uploaderUrl = Some(value); + } + if let Some(value) = uploader_id { + item.uploaderId = Some(value); + } + if let Some(value) = preview { + item = item.preview(value); + } + if !tags.is_empty() { + item = item.tags(tags); + } + + items.push(item); + } + + items + } +} + +#[async_trait] +impl Provider for YoupornProvider { + async fn get_videos( + &self, + cache: VideoCache, + _db_pool: DbPool, + sort: String, + query: Option, + page: String, + _per_page: String, + options: ServerOptions, + ) -> Vec { + let query = query.unwrap_or_default(); + let page = page.parse::().unwrap_or(1); + let target = self.target_from_query(&query, &sort); + let video_url = self.build_url(&target, page); + + let old_items = match cache.get(&video_url) { + Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => { + return items.clone(); + } + Some((_time, items)) => items.clone(), + None => vec![], + }; + + let mut requester = requester_or_default(&options, CHANNEL_ID, "get_videos"); + let referer = format!("{}/", self.url.trim_end_matches('/')); + let text = match requester + .get_with_headers(&video_url, Self::html_headers(&referer), Some(Version::HTTP_11)) + .await + { + Ok(text) => text, + Err(e) => { + report_provider_error( + CHANNEL_ID, + "get_videos.request", + &format!("url={video_url}; error={e}"), + ) + .await; + return old_items; + } + }; + + let items = self.parse_items(&text); + if items.is_empty() { + return old_items; + } + + cache.remove(&video_url); + cache.insert(video_url, items.clone()); + items + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +}