From a66f44c747a53ad40ce105a3f7cd2c1fbf0a0f1a Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 17 Mar 2026 21:04:11 +0000 Subject: [PATCH] heavyfetish and other changes --- src/providers/heavyfetish.rs | 1255 +++++++++++++++++++++++++++++++++ src/providers/mod.rs | 5 + src/providers/pimpbunny.rs | 113 ++- src/proxies/pimpbunnythumb.rs | 56 +- src/util/requester.rs | 20 +- 5 files changed, 1393 insertions(+), 56 deletions(-) create mode 100644 src/providers/heavyfetish.rs diff --git a/src/providers/heavyfetish.rs b/src/providers/heavyfetish.rs new file mode 100644 index 0000000..c409481 --- /dev/null +++ b/src/providers/heavyfetish.rs @@ -0,0 +1,1255 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::util::cache::VideoCache; +use crate::util::parse_abbreviated_number; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use htmlentity::entity::{ICodedDataTrait, decode}; +use regex::Regex; +use scraper::{ElementRef, Html, Selector}; +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; +use std::{thread, vec}; + +error_chain! { + foreign_links { + Io(std::io::Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +const BASE_URL: &str = "https://heavyfetish.com"; +const BROWSER_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:146.0) Gecko/20100101 Firefox/146.0"; + +#[derive(Debug, Clone)] +pub struct HeavyfetishProvider { + url: String, + categories: Arc>>, + tags: Arc>>, + models: Arc>>, + uploaders: Arc>>, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum TargetKind { + Archive, + StandardVideos, + MemberVideos, + Search, +} + +#[derive(Debug, Clone)] +struct QueryTarget { + kind: TargetKind, + value: String, +} + +impl HeavyfetishProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + models: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + uploaders: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let url = self.url.clone(); + let categories = Arc::clone(&self.categories); + let tags = Arc::clone(&self.tags); + let models = Arc::clone(&self.models); + let uploaders = Arc::clone(&self.uploaders); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + "heavyfetish", + "spawn_initial_load.runtime_build", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_categories(&url, Arc::clone(&categories)).await { + report_provider_error_background( + "heavyfetish", + "load_categories", + &error.to_string(), + ); + } + if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await { + report_provider_error_background( + "heavyfetish", + "load_tags", + &error.to_string(), + ); + } + if let Err(error) = Self::load_models(&url, Arc::clone(&models)).await { + report_provider_error_background( + "heavyfetish", + "load_models", + &error.to_string(), + ); + } + if let Err(error) = Self::load_uploaders(&url, Arc::clone(&uploaders)).await { + report_provider_error_background( + "heavyfetish", + "load_uploaders", + &error.to_string(), + ); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .categories + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let tags = self + .tags + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let models = self + .models + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + let uploaders = self + .uploaders + .read() + .map(|value| value.clone()) + .unwrap_or_default(); + + Channel { + id: "heavyfetish".to_string(), + name: "HeavyFetish".to_string(), + description: + "HeavyFetish videos, categories, tags, models, and uploader archives.".to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=heavyfetish.com" + .to_string(), + status: "active".to_string(), + categories: categories.iter().map(|value| value.title.clone()).collect(), + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: "Browse HeavyFetish by archive order.".to_string(), + systemImage: "list.number".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Most Popular".to_string(), + }, + FilterOption { + id: "rated".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "longest".to_string(), + title: "Longest".to_string(), + }, + FilterOption { + id: "commented".to_string(), + title: "Most Commented".to_string(), + }, + FilterOption { + id: "recommended".to_string(), + title: "Most Favorited".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Categories".to_string(), + description: "Browse a HeavyFetish category archive.".to_string(), + systemImage: "square.grid.2x2".to_string(), + colorName: "orange".to_string(), + options: categories, + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Tags".to_string(), + description: "Browse a HeavyFetish tag archive.".to_string(), + systemImage: "tag.fill".to_string(), + colorName: "green".to_string(), + options: tags, + multiSelect: false, + }, + ChannelOption { + id: "stars".to_string(), + title: "Models".to_string(), + description: "Browse a HeavyFetish model archive.".to_string(), + systemImage: "star.fill".to_string(), + colorName: "yellow".to_string(), + options: models, + multiSelect: false, + }, + ChannelOption { + id: "sites".to_string(), + title: "Uploaders".to_string(), + description: "Browse a HeavyFetish uploader archive.".to_string(), + systemImage: "person.crop.square".to_string(), + colorName: "purple".to_string(), + options: uploaders, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(1800), + } + } + + fn selector(value: &str) -> Result { + Selector::parse(value) + .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) + } + + fn regex(value: &str) -> Result { + Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) + } + + fn collapse_whitespace(text: &str) -> String { + text.split_whitespace().collect::>().join(" ") + } + + fn decode_html(text: &str) -> String { + decode(text.as_bytes()) + .to_string() + .unwrap_or_else(|_| text.to_string()) + } + + fn text_of(element: &ElementRef<'_>) -> String { + Self::decode_html(&Self::collapse_whitespace( + &element.text().collect::>().join(" "), + )) + } + + fn normalize_title(title: &str) -> String { + title + .trim() + .trim_start_matches('#') + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase() + } + + fn normalize_url(&self, url: &str) -> String { + if url.is_empty() { + return String::new(); + } + if url.starts_with("http://") || url.starts_with("https://") { + return url.to_string(); + } + if url.starts_with("//") { + return format!("https:{url}"); + } + if url.starts_with('/') { + return format!("{}{}", self.url, url); + } + format!("{}/{}", self.url, url.trim_start_matches("./")) + } + + fn parse_duration(text: &str) -> u32 { + parse_time_to_seconds(text) + .and_then(|value| u32::try_from(value).ok()) + .unwrap_or(0) + } + + fn parse_views(text: &str) -> Option { + let cleaned = text + .replace("views", "") + .replace("view", "") + .replace(' ', "") + .trim() + .to_string(); + parse_abbreviated_number(&cleaned) + } + + fn parse_percent(text: &str) -> Option { + text.trim() + .trim_end_matches('%') + .trim() + .parse::() + .ok() + } + + fn sort_param(sort: &str) -> Option<&'static str> { + match sort { + "popular" | "viewed" | "trending" => Some("video_viewed"), + "rated" | "rating" | "top" => Some("rating"), + "longest" | "duration" => Some("duration"), + "commented" | "comments" => Some("most_commented"), + "recommended" | "favorited" | "favourited" => Some("most_favourited"), + _ => None, + } + } + + fn append_query_param(url: &str, key: &str, value: &str) -> String { + let separator = if url.contains('?') { "&" } else { "?" }; + format!("{url}{separator}{key}={value}") + } + + fn encode_search_query(query: &str) -> String { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + serializer.append_pair("q", query); + serializer + .finish() + .strip_prefix("q=") + .unwrap_or_default() + .to_string() + } + + fn push_unique(target: &Arc>>, item: FilterOption) { + if item.id.is_empty() || item.title.is_empty() { + return; + } + if let Ok(mut values) = target.write() { + if !values.iter().any(|value| value.id == item.id) { + values.push(item); + } + } + } + + async fn fetch_html(url: &str) -> Result { + let mut requester = Requester::new(); + requester + .get(url, None) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) + } + + async fn load_categories( + base_url: &str, + categories: Arc>>, + ) -> Result<()> { + let html = Self::fetch_html(&format!("{base_url}/categories/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("#list_categories_categories_list_items a.item[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().to_string(); + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + let normalized = href.trim_end_matches('/').to_string(); + + if !normalized.starts_with(&format!("{base_url}/categories/")) || title.is_empty() { + continue; + } + + Self::push_unique( + &categories, + FilterOption { + id: format!("{normalized}/"), + title, + }, + ); + } + + Ok(()) + } + + async fn load_tags(base_url: &str, tags: Arc>>) -> Result<()> { + let html = Self::fetch_html(&format!("{base_url}/tags/")).await?; + let document = Html::parse_document(&html); + let selector = Self::selector("a[href]")?; + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/tags/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/tags/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + if title.is_empty() { + continue; + } + + Self::push_unique( + &tags, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + + Ok(()) + } + + async fn load_models(base_url: &str, models: Arc>>) -> Result<()> { + let selector = Self::selector("#list_models_models_list_items a.item[href]")?; + + for page in 1..=5 { + let url = if page == 1 { + format!("{base_url}/fetish-models/") + } else { + format!("{base_url}/fetish-models/{page}/") + }; + let html = Self::fetch_html(&url).await?; + let document = Html::parse_document(&html); + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + let title = element + .value() + .attr("title") + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| Self::text_of(&element)); + + if !href.starts_with(&format!("{base_url}/fetish-models/")) || title.is_empty() { + continue; + } + + Self::push_unique( + &models, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + Ok(()) + } + + async fn load_uploaders( + base_url: &str, + uploaders: Arc>>, + ) -> Result<()> { + let selector = Self::selector("#list_members_members_items a[href]")?; + let title_selector = Self::selector("strong.title")?; + + for page in 1..=5 { + let mut url = format!("{base_url}/members/"); + if page > 1 { + url = Self::append_query_param(&url, "from_members", &page.to_string()); + } + + let html = Self::fetch_html(&url).await?; + let document = Html::parse_document(&html); + + for element in document.select(&selector) { + let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); + if !href.starts_with(&format!("{base_url}/members/")) { + continue; + } + + let remainder = href + .strip_prefix(&format!("{base_url}/members/")) + .unwrap_or_default(); + if remainder.is_empty() || remainder.contains('/') { + continue; + } + if !remainder.chars().all(|value| value.is_ascii_digit()) { + continue; + } + + let title = element + .select(&title_selector) + .next() + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::text_of(&element)); + if title.is_empty() { + continue; + } + + Self::push_unique( + &uploaders, + FilterOption { + id: format!("{href}/"), + title, + }, + ); + } + } + + Ok(()) + } + + fn resolve_option_target(&self, options: &ServerOptions) -> QueryTarget { + if let Some(uploader) = options.sites.as_deref() { + if uploader.starts_with(&self.url) && uploader != "all" { + return QueryTarget { + kind: TargetKind::MemberVideos, + value: uploader.to_string(), + }; + } + } + + if let Some(model) = options.stars.as_deref() { + if model.starts_with(&self.url) && model != "all" { + return QueryTarget { + kind: TargetKind::StandardVideos, + value: model.to_string(), + }; + } + } + + if let Some(tag) = options.filter.as_deref() { + if tag.starts_with(&self.url) && tag != "all" { + return QueryTarget { + kind: TargetKind::StandardVideos, + value: tag.to_string(), + }; + } + } + + if let Some(category) = options.categories.as_deref() { + if category.starts_with(&self.url) && category != "all" { + return QueryTarget { + kind: TargetKind::StandardVideos, + value: category.to_string(), + }; + } + } + + QueryTarget { + kind: TargetKind::Archive, + value: String::new(), + } + } + + fn match_filter(options: &[FilterOption], query: &str, kind: TargetKind) -> Option { + let normalized_query = Self::normalize_title(query); + options + .iter() + .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) + .map(|value| QueryTarget { + kind, + value: value.id.clone(), + }) + } + + fn resolve_query_target(&self, query: &str) -> QueryTarget { + if let Ok(uploaders) = self.uploaders.read() { + if let Some(target) = Self::match_filter(&uploaders, query, TargetKind::MemberVideos) { + return target; + } + } + + if let Ok(models) = self.models.read() { + if let Some(target) = Self::match_filter(&models, query, TargetKind::StandardVideos) { + return target; + } + } + + if let Ok(tags) = self.tags.read() { + if let Some(target) = Self::match_filter(&tags, query, TargetKind::StandardVideos) { + return target; + } + } + + if let Ok(categories) = self.categories.read() { + if let Some(target) = Self::match_filter(&categories, query, TargetKind::StandardVideos) + { + return target; + } + } + + QueryTarget { + kind: TargetKind::Search, + value: query.to_string(), + } + } + + fn build_url_for_target(&self, target: &QueryTarget, page: u16, sort: &str) -> String { + let sort_by = Self::sort_param(sort); + + match target.kind { + TargetKind::Archive => { + let mut url = if page > 1 { + format!("{}/fetish-videos/{page}/", self.url) + } else { + format!("{}/fetish-videos/", self.url) + }; + if let Some(sort_by) = sort_by { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + url + } + TargetKind::StandardVideos => { + let mut url = target.value.clone(); + if page > 1 { + url = Self::append_query_param(&url, "from", &page.to_string()); + } + if let Some(sort_by) = sort_by { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + url + } + TargetKind::MemberVideos => { + let mut url = target.value.clone(); + if page > 1 { + url = Self::append_query_param(&url, "from_videos", &page.to_string()); + } + if let Some(sort_by) = sort_by { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + url + } + TargetKind::Search => { + let encoded_query = Self::encode_search_query(&target.value); + let mut url = format!("{}/search/{encoded_query}/", self.url); + if page > 1 { + url = Self::append_query_param(&url, "from_videos", &page.to_string()); + url = Self::append_query_param(&url, "from_albums", &page.to_string()); + } + if let Some(sort_by) = sort_by { + url = Self::append_query_param(&url, "sort_by", sort_by); + } + url + } + } + } + + fn list_container<'a>(&self, document: &'a Html) -> Result>> { + for selector_text in [ + "#list_videos_fetish_videos_list_items", + "#list_videos_videos_list_search_result_items", + "#list_videos_common_videos_list_items", + "#list_videos_uploaded_videos_items", + ] { + let selector = Self::selector(selector_text)?; + if let Some(element) = document.select(&selector).next() { + return Ok(Some(element)); + } + } + Ok(None) + } + + fn parse_list_videos(&self, html: &str) -> Result> { + let document = Html::parse_document(html); + let Some(container) = self.list_container(&document)? else { + return Ok(vec![]); + }; + + let card_selector = Self::selector("div.item.hf-video-item")?; + let link_selector = Self::selector("a[href*=\"/videos/\"]")?; + let image_selector = Self::selector("img.thumb")?; + let title_selector = Self::selector("strong.title")?; + let duration_selector = Self::selector("div.duration")?; + let rating_selector = Self::selector("div.rating")?; + let views_selector = Self::selector("div.views")?; + + let mut items = Vec::new(); + + for card in container.select(&card_selector) { + let Some(link) = card.select(&link_selector).next() else { + continue; + }; + + let href = link.value().attr("href").unwrap_or_default(); + let page_url = self.normalize_url(href); + let id = page_url + .trim_end_matches('/') + .split('/') + .nth_back(1) + .unwrap_or_default() + .to_string(); + + if id.is_empty() || page_url.is_empty() { + continue; + } + + let image = card.select(&image_selector).next(); + let thumb = image + .and_then(|value| { + value + .value() + .attr("data-webp") + .or_else(|| value.value().attr("data-original")) + .or_else(|| value.value().attr("src")) + }) + .map(|value| self.normalize_url(value)) + .unwrap_or_default(); + + let preview = image + .and_then(|value| value.value().attr("data-preview")) + .map(|value| self.normalize_url(value)); + + let title = card + .select(&title_selector) + .next() + .map(|value| Self::text_of(&value)) + .filter(|value| !value.is_empty()) + .or_else(|| { + link.value() + .attr("title") + .map(Self::decode_html) + .filter(|value| !value.is_empty()) + }); + + let Some(title) = title else { + continue; + }; + + let duration = card + .select(&duration_selector) + .next() + .map(|value| Self::parse_duration(&Self::text_of(&value))) + .unwrap_or(0); + + let rating = card + .select(&rating_selector) + .next() + .and_then(|value| Self::parse_percent(&Self::text_of(&value))); + + let views = card + .select(&views_selector) + .next() + .and_then(|value| Self::parse_views(&Self::text_of(&value))); + + let mut item = VideoItem::new( + id, + title, + page_url, + "heavyfetish".to_string(), + thumb, + duration, + ); + if let Some(preview) = preview { + item = item.preview(preview); + } + if let Some(rating) = rating { + item = item.rating(rating); + } + if let Some(views) = views { + item = item.views(views); + } + + items.push(item); + } + + Ok(items) + } + + fn extract_js_value(block: &str, regex: &Regex) -> Option { + regex + .captures(block) + .and_then(|captures| captures.get(1)) + .map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'")) + } + + fn quality_from_url(url: &str) -> String { + for quality in ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"] { + if url.contains(quality) { + return quality.to_string(); + } + } + "480p".to_string() + } + + fn build_formats(&self, html: &str, page_url: &str) -> Result> { + let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?; + let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#)); + let flashvars = flashvars_regex + .captures(html) + .and_then(|value| value.get(1)) + .map(|value| value.as_str().to_string()) + .unwrap_or_default(); + + let mut seen = HashMap::::new(); + for key in ["video_alt_url2", "video_alt_url", "video_url"] { + let url_regex = value_regex(key)?; + let text_regex = value_regex(&format!("{key}_text"))?; + if let Some(url) = Self::extract_js_value(&flashvars, &url_regex) { + let normalized = self.normalize_url(&url); + let quality = Self::extract_js_value(&flashvars, &text_regex) + .filter(|value| !value.is_empty()) + .unwrap_or_else(|| Self::quality_from_url(&normalized)); + seen.entry(quality).or_insert(normalized); + } + } + + let document = Html::parse_document(html); + let download_selector = Self::selector("#download_popup a[href*=\"/get_file/\"]")?; + for element in document.select(&download_selector) { + let href = element.value().attr("href").unwrap_or_default(); + let normalized = self.normalize_url(href); + if normalized.is_empty() { + continue; + } + let quality = Self::quality_from_url(&normalized); + seen.entry(quality).or_insert(normalized); + } + + let order = ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"]; + let mut formats = Vec::new(); + + for quality in order { + let Some(url) = seen.get(quality) else { + continue; + }; + let format = VideoFormat::new(url.clone(), quality.to_string(), "mp4".to_string()) + .format_id(quality.to_string()) + .http_header("Referer".to_string(), page_url.to_string()) + .http_header("User-Agent".to_string(), BROWSER_UA.to_string()); + formats.push(format); + } + + Ok(formats) + } + + fn apply_detail_video( + &self, + mut item: VideoItem, + html: &str, + page_url: &str, + ) -> Result { + let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?; + let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#)); + let flashvars = flashvars_regex + .captures(html) + .and_then(|value| value.get(1)) + .map(|value| value.as_str().to_string()) + .unwrap_or_default(); + + let title_regex = value_regex("video_title")?; + let category_regex = value_regex("video_categories")?; + let tag_regex = value_regex("video_tags")?; + let model_regex = value_regex("video_models")?; + let preview_regex = value_regex("preview_url")?; + let width_regex = value_regex("player_width")?; + let height_regex = value_regex("player_height")?; + + let document = Html::parse_document(html); + let uploader_selector = + Self::selector(".block-details .block-user .username a[href*=\"/members/\"]")?; + let info_span_selector = Self::selector(".block-details .info .item span")?; + let category_selector = Self::selector(".block-details .info a[href*=\"/categories/\"]")?; + let tag_selector = Self::selector(".block-details .info a[href*=\"/tags/\"]")?; + let model_selector = + Self::selector(".block-details .info a[href*=\"/fetish-models/\"]")?; + + if let Some(title) = Self::extract_js_value(&flashvars, &title_regex) { + if !title.is_empty() { + item.title = title; + } + } + + let formats = self.build_formats(html, page_url)?; + if !formats.is_empty() { + item = item.formats(formats); + } + + let uploader_link = document.select(&uploader_selector).next(); + let uploader = uploader_link + .as_ref() + .map(|value| Self::text_of(value)) + .filter(|value| !value.is_empty()); + let uploader_url = uploader_link + .and_then(|value| value.value().attr("href")) + .map(|value| self.normalize_url(value)); + + if let (Some(name), Some(url)) = (&uploader, &uploader_url) { + Self::push_unique( + &self.uploaders, + FilterOption { + id: url.clone(), + title: name.clone(), + }, + ); + } + + for span in document.select(&info_span_selector) { + let text = Self::text_of(&span); + if let Some(value) = text.strip_prefix("Views:") { + if let Some(views) = Self::parse_views(value) { + item = item.views(views); + } + } else if let Some(value) = text.strip_prefix("Duration:") { + let duration = Self::parse_duration(value.trim()); + if duration > 0 { + item.duration = duration; + } + } + } + + let mut tags = item.tags.clone().unwrap_or_default(); + + for element in document.select(&category_selector) { + let title = Self::text_of(&element); + let href = element.value().attr("href").unwrap_or_default(); + if title.is_empty() { + continue; + } + tags.push(title.clone()); + Self::push_unique( + &self.categories, + FilterOption { + id: self.normalize_url(href), + title, + }, + ); + } + + for element in document.select(&tag_selector) { + let title = Self::text_of(&element); + let href = element.value().attr("href").unwrap_or_default(); + if title.is_empty() { + continue; + } + tags.push(title.clone()); + Self::push_unique( + &self.tags, + FilterOption { + id: self.normalize_url(href), + title, + }, + ); + } + + for element in document.select(&model_selector) { + let title = Self::text_of(&element); + let href = element.value().attr("href").unwrap_or_default(); + if title.is_empty() { + continue; + } + tags.push(title.clone()); + Self::push_unique( + &self.models, + FilterOption { + id: self.normalize_url(href), + title, + }, + ); + } + + for raw_group in [category_regex, tag_regex, model_regex] { + if let Some(values) = Self::extract_js_value(&flashvars, &raw_group) { + for value in values.split(',') { + let clean = Self::collapse_whitespace(value.trim()); + if !clean.is_empty() { + tags.push(clean); + } + } + } + } + + tags.sort(); + tags.dedup(); + if !tags.is_empty() { + item = item.tags(tags); + } + + if let Some(uploader) = uploader { + item = item.uploader(uploader); + } + if let Some(uploader_url) = uploader_url { + item = item.uploader_url(uploader_url); + } + if item.thumb.is_empty() { + if let Some(preview) = Self::extract_js_value(&flashvars, &preview_regex) { + item.thumb = self.normalize_url(&preview); + } + } + if item.aspectRatio.is_none() { + let width = Self::extract_js_value(&flashvars, &width_regex) + .and_then(|value| value.parse::().ok()); + let height = Self::extract_js_value(&flashvars, &height_regex) + .and_then(|value| value.parse::().ok()); + if let (Some(width), Some(height)) = (width, height) { + if height > 0.0 { + item = item.aspect_ratio(width / height); + } + } + } + + Ok(item) + } + + async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { + let page_url = item.url.clone(); + let original = item.clone(); + let mut requester = + requester_or_default(options, "heavyfetish", "enrich_video.requester_missing"); + + let html = match requester.get(&page_url, None).await { + Ok(html) => html, + Err(error) => { + report_provider_error_background( + "heavyfetish", + "enrich_video.request", + &format!("url={page_url}; error={error}"), + ); + return original; + } + }; + + match self.apply_detail_video(item, &html, &page_url) { + Ok(item) => item, + Err(error) => { + report_provider_error_background( + "heavyfetish", + "enrich_video.parse", + &format!("url={page_url}; error={error}"), + ); + original + } + } + } + + async fn fetch_items_for_url( + &self, + cache: VideoCache, + url: String, + per_page_limit: usize, + options: &ServerOptions, + ) -> Result> { + if let Some((time, items)) = cache.get(&url) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.iter().take(per_page_limit).cloned().collect()); + } + } + + let mut requester = + requester_or_default(options, "heavyfetish", "fetch_items_for_url.requester_missing"); + let html = requester + .get(&url, None) + .await + .map_err(|error| Error::from(format!("request failed for {url}: {error}")))?; + + let list_items = self.parse_list_videos(&html)?; + if list_items.is_empty() { + return Ok(vec![]); + } + + let limited_items = list_items + .into_iter() + .take(per_page_limit.max(1)) + .collect::>(); + + let items = stream::iter(limited_items.into_iter().map(|item| { + let provider = self.clone(); + let options = options.clone(); + async move { provider.enrich_video(item, &options).await } + })) + .buffer_unordered(4) + .collect::>() + .await; + + if !items.is_empty() { + cache.insert(url, items.clone()); + } + + Ok(items) + } + + async fn get( + &self, + cache: VideoCache, + page: u16, + sort: &str, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_option_target(&options); + let url = self.build_url_for_target(&target, page, sort); + self.fetch_items_for_url(cache, url, per_page_limit, &options) + .await + } + + async fn query( + &self, + cache: VideoCache, + page: u16, + sort: &str, + query: &str, + per_page_limit: usize, + options: ServerOptions, + ) -> Result> { + let target = self.resolve_query_target(query); + let url = self.build_url_for_target(&target, page, sort); + self.fetch_items_for_url(cache, url, per_page_limit, &options) + .await + } +} + +#[async_trait] +impl Provider for HeavyfetishProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = pool; + let page = page.parse::().unwrap_or(1); + let per_page_limit = per_page.parse::().unwrap_or(30); + + let result = match query { + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, &sort, &query, per_page_limit, options) + .await + } + _ => self.get(cache, page, &sort, per_page_limit, options).await, + }; + + match result { + Ok(videos) => videos, + Err(error) => { + report_provider_error("heavyfetish", "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn provider() -> HeavyfetishProvider { + HeavyfetishProvider { + url: BASE_URL.to_string(), + categories: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + models: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + uploaders: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + } + } + + #[test] + fn builds_search_page_two_url() { + let provider = provider(); + let url = provider.build_url_for_target( + &QueryTarget { + kind: TargetKind::Search, + value: "adriana chechik".to_string(), + }, + 2, + "rated", + ); + + assert_eq!( + url, + "https://heavyfetish.com/search/adriana+chechik/?from_videos=2&from_albums=2&sort_by=rating" + ); + } + + #[test] + fn preserves_list_thumb_when_detail_has_preview_image() { + let provider = provider(); + let item = VideoItem::new( + "120660".to_string(), + "Example".to_string(), + "https://heavyfetish.com/videos/120660/example/".to_string(), + "heavyfetish".to_string(), + "https://heavyfetish.com/list-thumb.jpg".to_string(), + 0, + ) + .preview("https://heavyfetish.com/list-preview.mp4".to_string()); + + let html = r#" + + "#; + + let enriched = provider + .apply_detail_video(item, html, "https://heavyfetish.com/videos/120660/example/") + .expect("detail parsing should succeed"); + + assert_eq!(enriched.thumb, "https://heavyfetish.com/list-thumb.jpg"); + assert_eq!( + enriched.preview.as_deref(), + Some("https://heavyfetish.com/list-preview.mp4") + ); + } +} diff --git a/src/providers/mod.rs b/src/providers/mod.rs index 9d74d27..42df4b2 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -47,6 +47,7 @@ pub mod youjizz; // pub mod pornxp; pub mod chaturbate; pub mod freepornvideosxxx; +pub mod heavyfetish; pub mod hentaihaven; pub mod hqporner; pub mod hypnotube; @@ -205,6 +206,10 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| "freepornvideosxxx", Arc::new(freepornvideosxxx::FreepornvideosxxxProvider::new()) as DynProvider, ); + m.insert( + "heavyfetish", + Arc::new(heavyfetish::HeavyfetishProvider::new()) as DynProvider, + ); m.insert( "hentaihaven", Arc::new(hentaihaven::HentaihavenProvider::new()) as DynProvider, diff --git a/src/providers/pimpbunny.rs b/src/providers/pimpbunny.rs index f5a9b3d..90602ba 100644 --- a/src/providers/pimpbunny.rs +++ b/src/providers/pimpbunny.rs @@ -204,8 +204,12 @@ impl PimpbunnyProvider { } fn root_headers(&self) -> Vec<(String, String)> { + Self::html_headers_with_referer(&self.root_referer()) + } + + fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> { vec![ - ("Referer".to_string(), self.root_referer()), + ("Referer".to_string(), referer.to_string()), ( "User-Agent".to_string(), Self::FIREFOX_USER_AGENT.to_string(), @@ -215,20 +219,52 @@ impl PimpbunnyProvider { ] } + fn headers_with_cookies( + &self, + requester: &Requester, + request_url: &str, + referer: &str, + ) -> Vec<(String, String)> { + let mut headers = Self::html_headers_with_referer(referer); + if let Some(cookie) = requester.cookie_header_for_url(request_url) { + headers.push(("Cookie".to_string(), cookie)); + } + headers + } + + async fn warm_root_session(&self, requester: &mut Requester) { + let root_url = self.root_referer(); + let _ = requester + .get_with_headers(&root_url, self.root_headers(), Some(Version::HTTP_11)) + .await; + } + + async fn warm_root_session_for_base(base: &str, requester: &mut Requester) { + let root_url = format!("{}/", base.trim_end_matches('/')); + let _ = requester + .get_with_headers( + &root_url, + Self::html_headers_with_referer(&root_url), + Some(Version::HTTP_11), + ) + .await; + } + async fn load_stars(base: &str, stars: Arc>>) -> Result<()> { let mut requester = Requester::new(); - let headers = vec![ - ("Referer".to_string(), format!("{}/", base.trim_end_matches('/'))), - ( - "User-Agent".to_string(), - Self::FIREFOX_USER_AGENT.to_string(), - ), - ("Accept".to_string(), Self::HTML_ACCEPT.to_string()), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ]; + Self::warm_root_session_for_base(base, &mut requester).await; + let request_url = format!("{base}/onlyfans-models/?models_per_page=20"); + let headers = { + let root_url = format!("{}/", base.trim_end_matches('/')); + let mut headers = Self::html_headers_with_referer(&root_url); + if let Some(cookie) = requester.cookie_header_for_url(&request_url) { + headers.push(("Cookie".to_string(), cookie)); + } + headers + }; let text = requester .get_with_headers( - &format!("{base}/onlyfans-models/?models_per_page=20"), + &request_url, headers, Some(Version::HTTP_2), ) @@ -271,18 +307,19 @@ impl PimpbunnyProvider { async fn load_categories(base: &str, cats: Arc>>) -> Result<()> { let mut requester = Requester::new(); - let headers = vec![ - ("Referer".to_string(), format!("{}/", base.trim_end_matches('/'))), - ( - "User-Agent".to_string(), - Self::FIREFOX_USER_AGENT.to_string(), - ), - ("Accept".to_string(), Self::HTML_ACCEPT.to_string()), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ]; + Self::warm_root_session_for_base(base, &mut requester).await; + let request_url = format!("{base}/categories/?items_per_page=120"); + let headers = { + let root_url = format!("{}/", base.trim_end_matches('/')); + let mut headers = Self::html_headers_with_referer(&root_url); + if let Some(cookie) = requester.cookie_header_for_url(&request_url) { + headers.push(("Cookie".to_string(), cookie)); + } + headers + }; let text = requester .get_with_headers( - &format!("{base}/categories/?items_per_page=120"), + &request_url, headers, Some(Version::HTTP_2), ) @@ -347,8 +384,10 @@ impl PimpbunnyProvider { }; let mut requester = crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); + self.warm_root_session(&mut requester).await; + let headers = self.headers_with_cookies(&requester, &video_url, &self.root_referer()); let text = match requester - .get_with_headers(&video_url, self.root_headers(), Some(Version::HTTP_11)) + .get_with_headers(&video_url, headers, Some(Version::HTTP_11)) .await { Ok(text) => text, @@ -446,9 +485,11 @@ impl PimpbunnyProvider { let mut requester = crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); + self.warm_root_session(&mut requester).await; println!("Fetching URL: {}", video_url); + let headers = self.headers_with_cookies(&requester, &video_url, &self.root_referer()); let text = match requester - .get_with_headers(&video_url, self.root_headers(), Some(Version::HTTP_2)) + .get_with_headers(&video_url, headers, Some(Version::HTTP_2)) .await { Ok(text) => text, @@ -577,8 +618,10 @@ impl PimpbunnyProvider { url: &str, requester: &mut Requester, ) -> Result<(Vec, Vec, u32, u32)> { + self.warm_root_session(requester).await; + let headers = self.headers_with_cookies(requester, url, &self.root_referer()); let text = requester - .get_with_headers(url, self.root_headers(), Some(Version::HTTP_2)) + .get_with_headers(url, headers, Some(Version::HTTP_2)) .await .map_err(|e| Error::from(format!("{}", e)))?; @@ -610,17 +653,17 @@ impl PimpbunnyProvider { .map(|d| parse_time_to_seconds(&d.replace(['P', 'T', 'H', 'M', 'S'], "")).unwrap_or(0)) .unwrap_or(0) as u32; - Ok(( - vec![], - vec![VideoFormat::new(video_url, quality, "video/mp4".into()) - .http_header("Referer".to_string(), url.to_string()) - .http_header( - "User-Agent".to_string(), - Self::FIREFOX_USER_AGENT.to_string(), - )], - views, - duration, - )) + let mut format = VideoFormat::new(video_url.clone(), quality, "video/mp4".into()) + .http_header("Referer".to_string(), url.to_string()) + .http_header( + "User-Agent".to_string(), + Self::FIREFOX_USER_AGENT.to_string(), + ); + if let Some(cookie) = requester.cookie_header_for_url(&video_url) { + format.add_http_header("Cookie".to_string(), cookie); + } + + Ok((vec![], vec![format], views, duration)) } } diff --git a/src/proxies/pimpbunnythumb.rs b/src/proxies/pimpbunnythumb.rs index 9b94a19..cdc508c 100644 --- a/src/proxies/pimpbunnythumb.rs +++ b/src/proxies/pimpbunnythumb.rs @@ -10,9 +10,37 @@ use crate::util::requester::Requester; const FIREFOX_USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; +const HTML_ACCEPT: &str = + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; const IMAGE_ACCEPT: &str = "image/avif,image/webp,image/png,image/svg+xml,image/*;q=0.8,*/*;q=0.5"; +fn root_referer() -> &'static str { + "https://pimpbunny.com/" +} + +fn root_html_headers() -> Vec<(String, String)> { + vec![ + ("Referer".to_string(), root_referer().to_string()), + ("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()), + ("Accept".to_string(), HTML_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ] +} + +fn image_headers(requester: &Requester, image_url: &str) -> Vec<(String, String)> { + let mut headers = vec![ + ("Referer".to_string(), root_referer().to_string()), + ("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()), + ("Accept".to_string(), IMAGE_ACCEPT.to_string()), + ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), + ]; + if let Some(cookie) = requester.cookie_header_for_url(image_url) { + headers.push(("Cookie".to_string(), cookie)); + } + headers +} + fn is_allowed_thumb_url(url: &str) -> bool { let Some(url) = Url::parse(url).ok() else { return false; @@ -43,13 +71,12 @@ pub async fn get_image( return Ok(web::HttpResponse::BadRequest().finish()); } - let headers = vec![ - ("Referer".to_string(), "https://pimpbunny.com/".to_string()), - ("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()), - ("Accept".to_string(), IMAGE_ACCEPT.to_string()), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ]; let mut requester = requester.get_ref().clone(); + let _ = requester + .get_with_headers(root_referer(), root_html_headers(), Some(Version::HTTP_11)) + .await; + + let mut headers = image_headers(&requester, image_url.as_str()); let mut upstream = requester .get_raw_with_headers(image_url.as_str(), headers.clone()) @@ -63,21 +90,9 @@ pub async fn get_image( if needs_warmup { let _ = requester - .get_with_headers( - "https://pimpbunny.com/", - vec![ - ("Referer".to_string(), "https://pimpbunny.com/".to_string()), - ("User-Agent".to_string(), FIREFOX_USER_AGENT.to_string()), - ( - "Accept".to_string(), - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8" - .to_string(), - ), - ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), - ], - Some(Version::HTTP_11), - ) + .get_with_headers(root_referer(), root_html_headers(), Some(Version::HTTP_11)) .await; + headers = image_headers(&requester, image_url.as_str()); upstream = requester .get_raw_with_headers(image_url.as_str(), headers.clone()) @@ -94,6 +109,7 @@ pub async fn get_image( let _ = requester .get_with_headers(image_url.as_str(), headers.clone(), Some(Version::HTTP_11)) .await; + headers = image_headers(&requester, image_url.as_str()); upstream = requester .get_raw_with_headers(image_url.as_str(), headers) diff --git a/src/util/requester.rs b/src/util/requester.rs index 1e6916a..bb38bea 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -5,8 +5,9 @@ use std::sync::Arc; use wreq::Client; use wreq::Proxy; use wreq::Response; +use wreq::Uri; use wreq::Version; -use wreq::cookie::Jar; +use wreq::cookie::{CookieStore, Cookies, Jar}; use wreq::header::{HeaderMap, HeaderValue, USER_AGENT}; use wreq::multipart::Form; use wreq::redirect::Policy; @@ -83,6 +84,23 @@ impl Requester { self.proxy = proxy; } + pub fn cookie_header_for_url(&self, url: &str) -> Option { + let parsed = url.parse::().ok()?; + match self.cookie_jar.cookies(&parsed) { + Cookies::Compressed(value) => value.to_str().ok().map(ToOwned::to_owned), + Cookies::Uncompressed(values) => { + let joined = values + .into_iter() + .filter_map(|value| value.to_str().ok().map(ToOwned::to_owned)) + .collect::>() + .join("; "); + (!joined.is_empty()).then_some(joined) + } + Cookies::Empty => None, + _ => None, + } + } + pub async fn get_raw(&mut self, url: &str) -> Result { let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());