From 9751c25b9546f577174fa2396b9f6069cc27313a Mon Sep 17 00:00:00 2001 From: Simon Date: Mon, 16 Mar 2026 19:37:05 +0000 Subject: [PATCH] shooshtime --- src/providers/mod.rs | 5 + src/providers/porn4fans.rs | 106 ++- src/providers/shooshtime.rs | 1310 +++++++++++++++++++++++++++++++++++ src/proxies/hanimecdn.rs | 1 - 4 files changed, 1418 insertions(+), 4 deletions(-) create mode 100644 src/providers/shooshtime.rs diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 2f57457..2d7d499 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -35,6 +35,7 @@ pub mod paradisehill; pub mod porn00; pub mod porn4fans; pub mod pornzog; +pub mod shooshtime; pub mod sxyprn; pub mod tnaflix; pub mod tokyomotion; @@ -133,6 +134,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "porn4fans", Arc::new(porn4fans::Porn4fansProvider::new()) as DynProvider, ); + m.insert( + "shooshtime", + Arc::new(shooshtime::ShooshtimeProvider::new()) as DynProvider, + ); m.insert( "pornzog", Arc::new(pornzog::PornzogProvider::new()) as DynProvider, diff --git a/src/providers/porn4fans.rs b/src/providers/porn4fans.rs index 620e5bc..f33916c 100644 --- a/src/providers/porn4fans.rs +++ b/src/providers/porn4fans.rs @@ -11,6 +11,7 @@ use error_chain::error_chain; use futures::future::join_all; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; +use scraper::{Html, Selector}; use std::collections::HashSet; error_chain! { @@ -266,6 +267,36 @@ impl Porn4fansProvider { text.replace("\\/", "/").replace("&", "&") } + fn decode_html_text(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + .split_whitespace() + .collect::>() + .join(" ") + .trim() + .to_string() + } + + fn strip_tags(text: &str) -> String { + Regex::new(r"(?is)<[^>]+>") + .ok() + .map(|regex| regex.replace_all(text, "").to_string()) + .unwrap_or_else(|| text.to_string()) + } + + fn push_unique_tag(values: &mut Vec, value: String) { + let value = value.trim().to_string(); + if value.is_empty() + || values + .iter() + .any(|existing| existing.eq_ignore_ascii_case(&value)) + { + return; + } + values.push(value); + } + fn extract_views(text: &str) -> Option { Regex::new(r"(?i)]+icon-eye[^>]*>.*?\s*([^<]+)") .ok() @@ -303,6 +334,34 @@ impl Porn4fansProvider { None } + fn collect_texts(document: &Html, selector: &str) -> Vec { + let Ok(selector) = Selector::parse(selector) else { + return vec![]; + }; + let mut values = Vec::new(); + for element in document.select(&selector) { + let raw_text = element.text().collect::>().join(" "); + let cleaned = Self::decode_html_text(&Self::strip_tags(&raw_text)); + Self::push_unique_tag(&mut values, cleaned); + } + + values + } + + fn extract_page_models_and_categories(text: &str) -> (Vec, Vec) { + let document = Html::parse_document(text); + + let models = Self::collect_texts(&document, ".player-models-list a[href*=\"/models/\"]"); + + let mut categories = + Self::collect_texts(&document, ".categories-row a[href*=\"/categories/\"]"); + for value in Self::collect_texts(&document, ".tags-row a[href*=\"/tags/\"]") { + Self::push_unique_tag(&mut categories, value); + } + + (models, categories) + } + fn parse_video_cards_from_html(&self, html: &str) -> Vec { if html.trim().is_empty() { return vec![]; @@ -375,9 +434,17 @@ impl Porn4fansProvider { None, ) .await - .ok() - .and_then(|text| Self::extract_direct_video_url_from_page(&text)) - .unwrap_or_else(|| card.page_url.clone()); + .ok(); + + let (direct_url, models, categories) = match direct_url { + Some(text) => { + let url = Self::extract_direct_video_url_from_page(&text) + .unwrap_or_else(|| card.page_url.clone()); + let (models, categories) = Self::extract_page_models_and_categories(&text); + (url, models, categories) + } + None => (card.page_url.clone(), vec![], vec![]), + }; let mut item = VideoItem::new( card.id, @@ -393,6 +460,10 @@ impl Porn4fansProvider { if let Some(rating) = card.rating { item = item.rating(rating); } + if let Some(model) = models.first() { + item = item.uploader(model.clone()); + } + item = item.tags(categories); item } @@ -541,4 +612,33 @@ mod tests { ) ); } + + #[test] + fn extracts_models_and_categories_from_video_page() { + let html = r#" +
+ +
+ + + "#; + + let (models, categories) = Porn4fansProvider::extract_page_models_and_categories(html); + assert_eq!(models, vec!["Piper Rockelle".to_string()]); + assert_eq!( + categories, + vec![ + "Striptease".to_string(), + "Teen".to_string(), + "Bathroom".to_string() + ] + ); + } } diff --git a/src/providers/shooshtime.rs b/src/providers/shooshtime.rs new file mode 100644 index 0000000..fdb5657 --- /dev/null +++ b/src/providers/shooshtime.rs @@ -0,0 +1,1310 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{Provider, report_provider_error, report_provider_error_background}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoEmbed, VideoFormat, VideoItem}; +use async_trait::async_trait; +use chrono::NaiveDate; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use std::sync::{Arc, RwLock}; +use std::{thread, vec}; + +error_chain! { + foreign_links { + Io(std::io::Error); + HttpRequest(wreq::Error); + Json(serde_json::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct ShooshtimeProvider { + url: String, + categories: Arc>>, + channels: Arc>>, + pornstars: Arc>>, + tags: Arc>>, + uploaders: Arc>>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FilterKind { + Standard, + MemberVideos, +} + +#[derive(Debug, Clone)] +struct QueryTarget { + kind: FilterKind, + url: String, +} + +impl ShooshtimeProvider { + pub fn new() -> Self { + let provider = Self { + url: "https://shooshtime.com".to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + channels: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + pornstars: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![])), + uploaders: Arc::new(RwLock::new(vec![])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let categories = Arc::clone(&self.categories); + let channels = Arc::clone(&self.channels); + let pornstars = Arc::clone(&self.pornstars); + let tags = Arc::clone(&self.tags); + let uploaders = Arc::clone(&self.uploaders); + + thread::spawn(move || { + let rt = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(rt) => rt, + Err(error) => { + report_provider_error_background( + "shooshtime", + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + rt.block_on(async move { + if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await { + report_provider_error_background( + "shooshtime", + "load_categories", + &error.to_string(), + ); + } + if let Err(error) = Self::load_channels(&url, Arc::clone(&channels)).await { + report_provider_error_background( + "shooshtime", + "load_channels", + &error.to_string(), + ); + } + if let Err(error) = Self::load_pornstars(&url, Arc::clone(&pornstars)).await { + report_provider_error_background( + "shooshtime", + "load_pornstars", + &error.to_string(), + ); + } + if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await { + report_provider_error_background("shooshtime", "load_tags", &error.to_string()); + } + if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await { + report_provider_error_background( + "shooshtime", + "load_uploaders", + &error.to_string(), + ); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let channels = self + .channels + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let pornstars = self + .pornstars + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: "shooshtime".to_string(), + name: "Shooshtime".to_string(), + description: "Videos, tags, pornstars, channels, and user uploads from Shooshtime." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=shooshtime.com".to_string(), + status: "active".to_string(), + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Sort the videos".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Newest".to_string(), + }, + FilterOption { + id: "viewed".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "comments".to_string(), + title: "Most Commented".to_string(), + }, + FilterOption { + id: "recommended".to_string(), + title: "Recommended".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Browse a Shooshtime category".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ChannelOption { + id: "stars".to_string(), + title: "Pornstars".to_string(), + description: "Browse a Shooshtime pornstar".to_string(), + systemImage: "star.fill".to_string(), + colorName: "yellow".to_string(), + options: pornstars, + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Channels".to_string(), + description: "Browse a Shooshtime channel".to_string(), + systemImage: "rectangle.stack".to_string(), + colorName: "purple".to_string(), + options: channels, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn push_unique(target: &Arc>>, item: FilterOption) { + if let Ok(mut values) = target.write() { + if !values.iter().any(|value| value.id == item.id) { + values.push(item); + } + } + } + + fn normalize_url(&self, url: &str) -> String { + if url.is_empty() { + return String::new(); + } + if url.starts_with("http://") || url.starts_with("https://") { + return url.to_string(); + } + if url.starts_with("//") { + return format!("https:{url}"); + } + if url.starts_with('/') { + return format!("{}{}", self.url, url); + } + format!("{}/{}", self.url, url.trim_start_matches("./")) + } + + fn normalize_title(title: &str) -> String { + title + .trim() + .trim_start_matches('#') + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn text_of(element: &ElementRef<'_>) -> String { + Self::collapse_whitespace(&element.text().collect::>().join(" ")) + } + + fn parse_duration(text: &str) -> u32 { + let raw = Self::collapse_whitespace(text); + if raw.is_empty() { + return 0; + } + + if raw.contains(':') { + return parse_time_to_seconds(&raw) + .and_then(|seconds| u32::try_from(seconds).ok()) + .unwrap_or(0); + } + + raw.parse::().unwrap_or(0) + } + + fn parse_views(text: &str) -> Option { + let cleaned = text + .replace("views", "") + .replace("view", "") + .replace(' ', "") + .trim() + .to_string(); + parse_abbreviated_number(&cleaned) + } + + fn parse_percent(text: &str) -> Option { + let cleaned = text.trim().trim_end_matches('%').trim(); + if cleaned.is_empty() { + return None; + } + cleaned.parse::().ok() + } + + fn parse_uploaded_at(date_text: &str) -> Option { + NaiveDate::parse_from_str(date_text.trim(), "%d %B %Y") + .ok() + .and_then(|date| date.and_hms_opt(0, 0, 0)) + .map(|value| value.and_utc().timestamp() as u64) + } + + fn strip_counter_suffix(text: &str) -> String { + let mut parts = text.split_whitespace().collect::>(); + + while parts + .last() + .is_some_and(|value| value.chars().all(|ch| ch.is_ascii_digit())) + { + parts.pop(); + } + + while parts + .last() + .is_some_and(|value| value.eq_ignore_ascii_case("subscribers")) + { + parts.pop(); + } + + parts.join(" ") + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + async fn fetch_html(url: &str) -> Result { + let mut requester = Requester::new(); + requester + .get(url, None) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + async fn load_categories( + base_url: &str, + categories: Arc>>, + ) -> Result<()> { + let html = Self::fetch_html(&format!("{base_url}/categories/")).await?; + let document = Html::parse_document(&html); + let link_selector = Self::selector("a[href]")?; + + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let normalized = href.trim_end_matches('/'); + if !normalized.starts_with(&format!("{base_url}/categories/")) { + continue; + } + + let remainder = normalized + .strip_prefix(&format!("{base_url}/categories/")) + .unwrap_or_default(); + + if remainder.is_empty() + || remainder.contains('/') + || matches!(remainder, "viewed" | "rated" | "videos") + { + continue; + } + + let title = Self::text_of(&link); + if title.is_empty() { + continue; + } + + Self::push_unique( + &categories, + FilterOption { + id: format!("{base_url}/categories/{remainder}/"), + title, + }, + ); + } + + Ok(()) + } + + async fn load_channels(base_url: &str, channels: Arc>>) -> Result<()> { + let link_selector = Self::selector("a[href]")?; + + for page in 1..=5 { + let url = if page == 1 { + format!("{base_url}/channels/") + } else { + format!("{base_url}/channels/{page}/") + }; + let html = Self::fetch_html(&url).await?; + let document = Html::parse_document(&html); + + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let normalized = href.trim_end_matches('/'); + if !normalized.starts_with(&format!("{base_url}/channels/")) { + continue; + } + + let remainder = normalized + .strip_prefix(&format!("{base_url}/channels/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = Self::text_of(&link); + if title.is_empty() { + continue; + } + + Self::push_unique( + &channels, + FilterOption { + id: format!("{base_url}/channels/{remainder}/"), + title, + }, + ); + } + } + + Ok(()) + } + + async fn load_pornstars( + base_url: &str, + pornstars: Arc>>, + ) -> Result<()> { + let link_selector = Self::selector("a[href]")?; + + for page in 1..=5 { + let url = if page == 1 { + format!("{base_url}/pornstars/") + } else { + format!("{base_url}/pornstars/{page}/") + }; + let html = Self::fetch_html(&url).await?; + let document = Html::parse_document(&html); + + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let normalized = href.trim_end_matches('/'); + if !normalized.starts_with(&format!("{base_url}/pornstars/")) { + continue; + } + + let remainder = normalized + .strip_prefix(&format!("{base_url}/pornstars/")) + .unwrap_or_default(); + if remainder.is_empty() + || remainder.contains('/') + || matches!(remainder, "celebrities" | "amateurs" | "couples" | "all") + { + continue; + } + + let title = Self::text_of(&link); + if title.is_empty() { + continue; + } + + Self::push_unique( + &pornstars, + FilterOption { + id: format!("{base_url}/pornstars/{remainder}/"), + title, + }, + ); + } + } + + Ok(()) + } + + async fn load_tags(base_url: &str, tags: Arc>>) -> Result<()> { + let html = Self::fetch_html(&format!("{base_url}/tags/")).await?; + let document = Html::parse_document(&html); + let link_selector = Self::selector("a[href]")?; + + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let normalized = href.trim_end_matches('/'); + if !normalized.starts_with(&format!("{base_url}/tags/")) { + continue; + } + + let remainder = normalized + .strip_prefix(&format!("{base_url}/tags/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = Self::text_of(&link) + .trim_start_matches('#') + .trim() + .to_string(); + if title.is_empty() { + continue; + } + + Self::push_unique( + &tags, + FilterOption { + id: format!("{base_url}/tags/{remainder}/"), + title, + }, + ); + } + + Ok(()) + } + + async fn load_uploaders( + base_url: &str, + uploaders: Arc>>, + ) -> Result<()> { + let link_selector = Self::selector("a[href]")?; + + for page in 1..=5 { + let url = if page == 1 { + format!("{base_url}/members/") + } else { + format!("{base_url}/members/{page}/") + }; + let html = Self::fetch_html(&url).await?; + let document = Html::parse_document(&html); + + for link in document.select(&link_selector) { + let Some(href) = link.value().attr("href") else { + continue; + }; + let normalized = href.trim_end_matches('/'); + if !normalized.starts_with(&format!("{base_url}/members/")) { + continue; + } + + let remainder = normalized + .strip_prefix(&format!("{base_url}/members/")) + .unwrap_or_default(); + + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + if !remainder.chars().all(|value| value.is_ascii_digit()) { + continue; + } + + let title = Self::text_of(&link) + .replace("no photo", "") + .trim() + .to_string(); + if title.is_empty() { + continue; + } + + Self::push_unique( + &uploaders, + FilterOption { + id: format!("{base_url}/members/{remainder}/videos/"), + title, + }, + ); + } + } + + Ok(()) + } + + fn normalize_sort(sort: &str) -> &'static str { + match sort { + "viewed" => "viewed", + "rated" => "rated", + "comments" => "comments", + "recommended" => "recommended", + _ => "new", + } + } + + fn search_sort_param(sort: &str) -> Option<&'static str> { + match Self::normalize_sort(sort) { + "viewed" => Some("video_viewed"), + "rated" => Some("rating"), + "comments" => Some("most_commented"), + "recommended" => Some("most_favourited"), + _ => None, + } + } + + fn append_query_param(url: &str, key: &str, value: &str) -> String { + let separator = if url.contains('?') { "&" } else { "?" }; + format!("{url}{separator}{key}={value}") + } + + fn build_top_level_url(&self, page: u8, sort: &str) -> String { + let base = match Self::normalize_sort(sort) { + "viewed" => format!("{}/videos/viewed/", self.url), + "rated" => format!("{}/videos/rated/", self.url), + "comments" => format!("{}/videos/comments/", self.url), + "recommended" => format!("{}/videos/recommended/", self.url), + _ => format!("{}/videos/", self.url), + }; + + if page > 1 { + format!("{base}{page}/") + } else { + base + } + } + + fn build_standard_filtered_url(&self, base: &str, page: u8, sort: &str) -> String { + let mut url = if page > 1 { + format!("{}{page}/", base.trim_end_matches('/').to_string() + "/") + } else { + base.to_string() + }; + + if let Some(sort_by) = Self::search_sort_param(sort) { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + + url + } + + fn build_member_filtered_url(&self, base: &str, page: u8, sort: &str) -> String { + let mut url = base.to_string(); + + if let Some(sort_by) = Self::search_sort_param(sort) { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + + if page > 1 { + url = Self::append_query_param(&url, "from_videos", &page.to_string()); + } + + url + } + + fn build_search_url(&self, query: &str, page: u8, sort: &str) -> String { + let encoded_query = + url::form_urlencoded::byte_serialize(query.as_bytes()).collect::(); + let mut url = if page > 1 { + format!("{}/search/{page}/?q={encoded_query}", self.url) + } else { + format!("{}/search/?q={encoded_query}", self.url) + }; + + if let Some(sort_by) = Self::search_sort_param(sort) { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + + url + } + + fn resolve_option_target(&self, options: &ServerOptions) -> Option { + if let Some(category) = options.categories.as_deref() { + if !category.is_empty() && category != "all" { + return Some(QueryTarget { + kind: FilterKind::Standard, + url: category.to_string(), + }); + } + } + + if let Some(pornstar) = options.stars.as_deref() { + if !pornstar.is_empty() && pornstar != "all" { + return Some(QueryTarget { + kind: FilterKind::Standard, + url: pornstar.to_string(), + }); + } + } + + if let Some(channel) = options.sites.as_deref() { + if !channel.is_empty() && channel != "all" { + return Some(QueryTarget { + kind: FilterKind::Standard, + url: channel.to_string(), + }); + } + } + + None + } + + fn match_filter(options: &[FilterOption], query: &str) -> Option { + let normalized_query = Self::normalize_title(query); + options + .iter() + .find(|value| Self::normalize_title(&value.title) == normalized_query) + .map(|value| QueryTarget { + kind: if value.id.contains("/members/") { + FilterKind::MemberVideos + } else { + FilterKind::Standard + }, + url: value.id.clone(), + }) + } + + fn resolve_query_target(&self, query: &str) -> Option { + if let Ok(uploaders) = self.uploaders.read() { + if let Some(target) = Self::match_filter(&uploaders, query) { + return Some(target); + } + } + + if let Ok(pornstars) = self.pornstars.read() { + if let Some(target) = Self::match_filter(&pornstars, query) { + return Some(target); + } + } + + if let Ok(channels) = self.channels.read() { + if let Some(target) = Self::match_filter(&channels, query) { + return Some(target); + } + } + + if let Ok(tags) = self.tags.read() { + if let Some(target) = Self::match_filter(&tags, query) { + return Some(target); + } + } + + if let Ok(categories) = self.categories.read() { + if let Some(target) = Self::match_filter(&categories, query) { + return Some(target); + } + } + + None + } + + fn build_url_for_target(&self, target: &QueryTarget, page: u8, sort: &str) -> String { + match target.kind { + FilterKind::Standard => self.build_standard_filtered_url(&target.url, page, sort), + FilterKind::MemberVideos => self.build_member_filtered_url(&target.url, page, sort), + } + } + + fn parse_list_videos(&self, html: &str) -> Result> { + let document = Html::parse_document(html); + let card_selector = Self::selector("div.item.thumb")?; + let video_link_selector = Self::selector("a[href*=\"/videos/\"]")?; + let image_selector = Self::selector("img")?; + let duration_selector = Self::selector("span.duration")?; + let title_selector = Self::selector("span.description, span.description.secondary")?; + let info_selector = Self::selector("ul.info li.val")?; + let uploader_selector = Self::selector("ul.info a[href*=\"/members/\"]")?; + + let mut items = Vec::new(); + + for card in document.select(&card_selector) { + let Some(link) = card.select(&video_link_selector).next() else { + continue; + }; + + let href = link.value().attr("href").unwrap_or_default(); + if href.is_empty() { + continue; + } + + let page_url = self.normalize_url(href); + let id = page_url + .trim_end_matches('/') + .split('/') + .nth_back(1) + .unwrap_or_default() + .to_string(); + + if id.is_empty() { + continue; + } + + let image = card.select(&image_selector).next(); + let thumb = image + .and_then(|value| { + value + .value() + .attr("data-src") + .or_else(|| value.value().attr("src")) + }) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let preview = image + .and_then(|value| value.value().attr("data-preview")) + .map(|value| self.normalize_url(value)); + + let title = card + .select(&title_selector) + .next() + .map(|value| Self::decode_html(&Self::text_of(&value))) + .unwrap_or_else(|| { + Self::decode_html(link.value().attr("title").unwrap_or_default()) + }); + + if title.is_empty() { + continue; + } + + let duration = card + .select(&duration_selector) + .next() + .map(|value| Self::parse_duration(&Self::text_of(&value))) + .unwrap_or(0); + + let views = card + .select(&info_selector) + .next() + .and_then(|value| Self::parse_views(&Self::text_of(&value))); + + let uploader_link = card.select(&uploader_selector).next(); + let uploader = uploader_link + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()); + let uploader_url = uploader_link + .and_then(|value| value.value().attr("href")) + .map(|value| { + let absolute = self.normalize_url(value); + if absolute.ends_with('/') { + format!("{absolute}videos/") + } else { + format!("{absolute}/videos/") + } + }); + + if let (Some(name), Some(url)) = (&uploader, &uploader_url) { + Self::push_unique( + &self.uploaders, + FilterOption { + id: url.clone(), + title: name.clone(), + }, + ); + } + + let mut item = VideoItem::new( + id, + title, + page_url, + "shooshtime".to_string(), + thumb, + duration, + ); + if let Some(views) = views { + item = item.views(views); + } + if let Some(uploader) = uploader { + item = item.uploader(uploader); + } + if let Some(uploader_url) = uploader_url { + item = item.uploader_url(uploader_url); + } + if let Some(preview) = preview { + item = item.preview(preview); + } + + items.push(item); + } + + Ok(items) + } + + fn extract_js_value(block: &str, regex: &Regex) -> Option { + regex + .captures(block) + .and_then(|value| value.get(1)) + .map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'")) + } + + fn apply_detail_video( + &self, + mut item: VideoItem, + html: &str, + page_url: &str, + ) -> Result { + let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?; + let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'([^']*)'"#)); + + let flashvars = flashvars_regex + .captures(html) + .and_then(|value| value.get(1)) + .map(|value| value.as_str().to_string()) + .unwrap_or_default(); + + let video_url_regex = value_regex("video_url")?; + let video_url_text_regex = value_regex("video_url_text")?; + let video_alt_url_regex = value_regex("video_alt_url")?; + let video_alt_url_text_regex = value_regex("video_alt_url_text")?; + let preview_url_regex = value_regex("preview_url")?; + + let primary_url = Self::extract_js_value(&flashvars, &video_url_regex) + .map(|value| self.normalize_url(&value)); + let primary_quality = Self::extract_js_value(&flashvars, &video_url_text_regex) + .unwrap_or_else(|| "480p".to_string()); + let alt_url = Self::extract_js_value(&flashvars, &video_alt_url_regex) + .map(|value| self.normalize_url(&value)); + let alt_quality = Self::extract_js_value(&flashvars, &video_alt_url_text_regex) + .unwrap_or_else(|| "720p".to_string()); + let preview_url = Self::extract_js_value(&flashvars, &preview_url_regex) + .map(|value| self.normalize_url(&value)); + + let mut formats = Vec::new(); + if let Some(url) = &primary_url { + formats.push( + VideoFormat::new(url.clone(), primary_quality.clone(), "mp4".to_string()) + .format_id(primary_quality.clone()) + .http_header("Referer".to_string(), page_url.to_string()), + ); + } + if let Some(url) = &alt_url { + formats.push( + VideoFormat::new(url.clone(), alt_quality.clone(), "mp4".to_string()) + .format_id(alt_quality.clone()) + .http_header("Referer".to_string(), page_url.to_string()), + ); + } + + let document = Html::parse_document(html); + let block_selector = Self::selector(".block-video")?; + let uploader_selector = Self::selector("a[href*=\"/members/\"]")?; + let rating_selector = Self::selector(".headline .value, .headline .rating, .value")?; + let title_selector = Self::selector(".headline h1, .headline .title, h1")?; + let about_section_selector = Self::selector("#tab2")?; + let info_value_selector = Self::selector("ul.video-info li.item span.value")?; + let tags_outer_selector = Self::selector(".tags-outer")?; + let tags_link_selector = Self::selector("a[href]")?; + let pornstar_selector = Self::selector("a[href*=\"/pornstars/\"]")?; + + let block = document.select(&block_selector).next(); + let uploader_link = block + .as_ref() + .and_then(|value| value.select(&uploader_selector).next()); + let uploader = uploader_link + .as_ref() + .map(|value| Self::strip_counter_suffix(&Self::text_of(value))) + .filter(|value| !value.is_empty()); + let uploader_url = uploader_link + .as_ref() + .and_then(|value| value.value().attr("href")) + .map(|value| { + let absolute = self.normalize_url(value); + if absolute.ends_with('/') { + format!("{absolute}videos/") + } else { + format!("{absolute}/videos/") + } + }); + + if let (Some(name), Some(url)) = (&uploader, &uploader_url) { + Self::push_unique( + &self.uploaders, + FilterOption { + id: url.clone(), + title: name.clone(), + }, + ); + } + + let title = block + .as_ref() + .and_then(|value| value.select(&title_selector).next()) + .map(|value| Self::decode_html(&Self::text_of(&value))) + .filter(|value| !value.is_empty()); + + let rating = block.as_ref().and_then(|value| { + value.select(&rating_selector).find_map(|node| { + let text = Self::text_of(&node); + if text.contains('%') { + Self::parse_percent(&text) + } else { + None + } + }) + }); + + let about = document.select(&about_section_selector).next(); + let info_values = about + .as_ref() + .map(|value| { + value + .select(&info_value_selector) + .map(|node| Self::text_of(&node)) + .collect::>() + }) + .unwrap_or_default(); + + let views = info_values.first().and_then(|value| { + let normalized = value.replace(' ', ""); + parse_abbreviated_number(&normalized) + }); + let uploaded_at = info_values + .get(1) + .and_then(|value| Self::parse_uploaded_at(value)); + + let mut tags = Vec::new(); + if let Some(about_value) = about.as_ref() { + for section in about_value.select(&tags_outer_selector) { + let title_text = section + .select(&Self::selector("h4.title")?) + .next() + .map(|value| Self::text_of(&value).to_ascii_lowercase()) + .unwrap_or_default(); + + let values = section + .select(&tags_link_selector) + .filter_map(|link| { + let text = Self::text_of(&link); + if text.is_empty() { + return None; + } + Some(( + text, + link.value().attr("href").unwrap_or_default().to_string(), + )) + }) + .collect::>(); + + if title_text.contains("categories") { + for (text, href) in values { + tags.push(text.clone()); + let normalized = self.normalize_url(&href); + Self::push_unique( + &self.categories, + FilterOption { + id: normalized, + title: text, + }, + ); + } + } else if title_text.contains("tags") { + for (text, href) in values { + tags.push(text.clone()); + let normalized = self.normalize_url(&href); + Self::push_unique( + &self.tags, + FilterOption { + id: normalized, + title: text, + }, + ); + } + } else if title_text.contains("models") { + for (text, href) in values { + tags.push(text.clone()); + let normalized = self.normalize_url(&href); + Self::push_unique( + &self.pornstars, + FilterOption { + id: normalized, + title: text, + }, + ); + } + } + } + } + + if let Some(block_value) = block.as_ref() { + for pornstar in block_value.select(&pornstar_selector) { + let text = Self::strip_counter_suffix(&Self::text_of(&pornstar)); + if text.is_empty() { + continue; + } + + let normalized = + self.normalize_url(pornstar.value().attr("href").unwrap_or_default()); + Self::push_unique( + &self.pornstars, + FilterOption { + id: normalized, + title: text.clone(), + }, + ); + tags.push(text); + } + } + + tags.sort(); + tags.dedup(); + + let embed_url = page_url + .trim_end_matches('/') + .split('/') + .nth_back(1) + .map(|video_id| format!("{}/embed/{video_id}", self.url)); + + if let Some(title) = title { + item.title = title; + } + if !formats.is_empty() { + item = item.formats(formats); + } + if let Some(views) = views { + item = item.views(views); + } + if let Some(rating) = rating { + item = item.rating(rating); + } + if let Some(uploaded_at) = uploaded_at { + item = item.uploaded_at(uploaded_at); + } + if let Some(uploader) = uploader { + item = item.uploader(uploader); + } + if let Some(uploader_url) = uploader_url { + item = item.uploader_url(uploader_url); + } + if !tags.is_empty() { + item = item.tags(tags); + } + if item.preview.is_none() { + if let Some(preview) = preview_url.as_ref() { + item = item.preview(preview.clone()); + } + } + if let Some(thumb) = preview_url { + item.thumb = thumb; + } + if let Some(source) = embed_url { + item = item.embed(VideoEmbed { + html: format!( + "" + ), + source, + }); + } + + Ok(item) + } + + async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let page_url = item.url.clone(); + let original_item = item.clone(); + + let mut requester = match options.requester.clone() { + Some(requester) => requester, + None => Requester::new(), + }; + + let html = match requester.get(&page_url, None).await { + Ok(html) => html, + Err(error) => { + report_provider_error_background( + "shooshtime", + "enrich_video.request", + &format!("url={}; error={error}", page_url), + ); + return item; + } + }; + + match self.apply_detail_video(item, &html, &page_url) { + Ok(item) => item, + Err(error) => { + report_provider_error_background( + "shooshtime", + "enrich_video.parse", + &format!("url={}; error={error}", page_url), + ); + original_item + } + } + } + + async fn fetch_items_for_url( + &self, + cache: VideoCache, + url: String, + options: &ServerOptions, + ) -> Result> { + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.clone()); + } + } + + let mut requester = + crate::providers::requester_or_default(options, module_path!(), "missing_requester"); + + let html = match requester.get(&url, None).await { + Ok(html) => html, + Err(error) => { + report_provider_error( + "shooshtime", + "fetch_items_for_url.request", + &format!("url={url}; error={error}"), + ) + .await; + return Ok(vec![]); + } + }; + + let list_videos = self.parse_list_videos(&html)?; + if list_videos.is_empty() { + return Ok(vec![]); + } + + let items = stream::iter(list_videos.into_iter().map(|video| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_video(video, &options).await } + })) + .buffer_unordered(6) + .collect::>() + .await; + + if !items.is_empty() { + cache.insert(url, items.clone()); + } + + Ok(items) + } + + async fn get( + &self, + cache: VideoCache, + page: u8, + sort: &str, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_option_target(&options); + let url = match target { + Some(target) => self.build_url_for_target(&target, page, sort), + None => self.build_top_level_url(page, sort), + }; + + self.fetch_items_for_url(cache, url, &options).await + } + + async fn query( + &self, + cache: VideoCache, + page: u8, + query: &str, + sort: &str, + options: ServerOptions, + ) -> Result> { + let url = match self.resolve_query_target(query) { + Some(target) => self.build_url_for_target(&target, page, sort), + None => self.build_search_url(query, page, sort), + }; + + self.fetch_items_for_url(cache, url, &options).await + } +} + +#[async_trait] +impl Provider for ShooshtimeProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let _ = per_page; + let page = page.parse::().unwrap_or(1); + + let result = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, &query, &sort, options).await + } + _ => self.get(cache, page, &sort, options).await, + }; + + match result { + Ok(videos) => videos, + Err(error) => { + report_provider_error_background("shooshtime", "get_videos", &error.to_string()); + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} diff --git a/src/proxies/hanimecdn.rs b/src/proxies/hanimecdn.rs index d6c3da5..f619bbf 100644 --- a/src/proxies/hanimecdn.rs +++ b/src/proxies/hanimecdn.rs @@ -8,7 +8,6 @@ use crate::util::requester::Requester; fn normalize_image_url(endpoint: &str) -> String { let endpoint = endpoint.trim_start_matches('/'); - println!("Normalizing image URL: {endpoint}"); if endpoint.starts_with("http://") || endpoint.starts_with("https://") { endpoint.to_string() } else if endpoint.starts_with("hanime-cdn.com/") || endpoint == "hanime-cdn.com" {