From 9021521c00000566deffa0e522d04aa0531618bd Mon Sep 17 00:00:00 2001 From: Simon Date: Sun, 22 Mar 2026 17:26:12 +0000 Subject: [PATCH] fixes --- build.rs | 5 - src/providers/arabpornxxx.rs | 1171 ---------------------------------- src/providers/sextb.rs | 128 +++- src/proxies/doodstream.rs | 292 +++++++++ src/proxies/mod.rs | 4 + src/proxy.rs | 7 + src/util/hoster_proxy.rs | 84 +++ src/util/mod.rs | 1 + 8 files changed, 515 insertions(+), 1177 deletions(-) delete mode 100644 src/providers/arabpornxxx.rs create mode 100644 src/proxies/doodstream.rs create mode 100644 src/util/hoster_proxy.rs diff --git a/build.rs b/build.rs index 692239b..4c15bc8 100644 --- a/build.rs +++ b/build.rs @@ -89,11 +89,6 @@ const PROVIDERS: &[ProviderDef] = &[ module: "yesporn", ty: "YespornProvider", }, - ProviderDef { - id: "arabpornxxx", - module: "arabpornxxx", - ty: "ArabpornxxxProvider", - }, ProviderDef { id: "sxyprn", module: "sxyprn", diff --git a/src/providers/arabpornxxx.rs b/src/providers/arabpornxxx.rs deleted file mode 100644 index a380205..0000000 --- a/src/providers/arabpornxxx.rs +++ /dev/null @@ -1,1171 +0,0 @@ -use crate::DbPool; -use crate::api::ClientVersion; -use crate::providers::{ - Provider, report_provider_error, report_provider_error_background, requester_or_default, -}; -use crate::status::*; -use crate::util::cache::VideoCache; -use crate::util::parse_abbreviated_number; -use crate::util::requester::Requester; -use crate::util::time::parse_time_to_seconds; -use crate::videos::{ServerOptions, VideoItem}; -use async_trait::async_trait; -use chrono::{DateTime, NaiveDate, Utc}; -use error_chain::error_chain; -use futures::stream::{self, StreamExt}; -use htmlentity::entity::{ICodedDataTrait, decode}; -use regex::Regex; -use scraper::{ElementRef, Html, Selector}; -use std::sync::{Arc, RwLock}; -use std::{thread, vec}; - -pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = - crate::providers::ProviderChannelMetadata { - group_id: "mainstream-tube", - tags: &["arab", "hijab", "niche"], - }; - -error_chain! { - foreign_links { - Io(std::io::Error); - } - errors { - Parse(msg: String) { - description("parse error") - display("parse error: {}", msg) - } - } -} - -const BASE_URL: &str = "https://arabporn.xxx"; -const CHANNEL_ID: &str = "arabpornxxx"; - -#[derive(Debug, Clone)] -pub struct ArabpornxxxProvider { - url: String, - categories: Arc>>, - tags: Arc>>, - uploaders: Arc>>, -} - -#[derive(Debug, Clone)] -enum Target { - Latest, - Archive(String), - Search(String), -} - -impl ArabpornxxxProvider { - pub fn new() -> Self { - let provider = Self { - url: BASE_URL.to_string(), - categories: Arc::new(RwLock::new(vec![FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }])), - tags: Arc::new(RwLock::new(vec![FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }])), - uploaders: Arc::new(RwLock::new(vec![FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }])), - }; - provider.spawn_initial_load(); - provider - } - - fn spawn_initial_load(&self) { - let url = self.url.clone(); - let categories = Arc::clone(&self.categories); - let tags = Arc::clone(&self.tags); - let uploaders = Arc::clone(&self.uploaders); - - thread::spawn(move || { - let runtime = match tokio::runtime::Builder::new_current_thread() - .enable_all() - .build() - { - Ok(runtime) => runtime, - Err(error) => { - report_provider_error_background( - CHANNEL_ID, - "spawn_initial_load.runtime_build", - &error.to_string(), - ); - return; - } - }; - - runtime.block_on(async move { - if let Err(error) = - Self::load_categories(&url, Arc::clone(&categories), Arc::clone(&uploaders)) - .await - { - report_provider_error_background( - CHANNEL_ID, - "load_categories", - &error.to_string(), - ); - } - if let Err(error) = Self::load_tags(&url, Arc::clone(&tags)).await { - report_provider_error_background(CHANNEL_ID, "load_tags", &error.to_string()); - } - }); - }); - } - - fn build_channel(&self, _clientversion: ClientVersion) -> Channel { - let categories = self - .categories - .read() - .map(|value| value.clone()) - .unwrap_or_default(); - let tags = self.tags.read().map(|value| value.clone()).unwrap_or_default(); - let uploaders = self - .uploaders - .read() - .map(|value| value.clone()) - .unwrap_or_default(); - - Channel { - id: CHANNEL_ID.to_string(), - name: "ArabPorn.xxx".to_string(), - description: - "ArabPorn.xxx videos with latest, popular, rated, category, tag, and source archives." - .to_string(), - premium: false, - favicon: "https://www.google.com/s2/favicons?sz=64&domain=arabporn.xxx".to_string(), - status: "active".to_string(), - categories: categories.iter().map(|value| value.title.clone()).collect(), - options: vec![ - ChannelOption { - id: "sort".to_string(), - title: "Sort".to_string(), - description: "Browse ArabPorn.xxx by archive order.".to_string(), - systemImage: "list.number".to_string(), - colorName: "blue".to_string(), - options: vec![ - FilterOption { - id: "new".to_string(), - title: "Latest".to_string(), - }, - FilterOption { - id: "popular".to_string(), - title: "Most Viewed".to_string(), - }, - FilterOption { - id: "rated".to_string(), - title: "Top Rated".to_string(), - }, - FilterOption { - id: "longest".to_string(), - title: "Longest".to_string(), - }, - FilterOption { - id: "commented".to_string(), - title: "Most Commented".to_string(), - }, - FilterOption { - id: "recommended".to_string(), - title: "Most Favorited".to_string(), - }, - ], - multiSelect: false, - }, - ChannelOption { - id: "categories".to_string(), - title: "Categories".to_string(), - description: "Browse an ArabPorn.xxx category archive.".to_string(), - systemImage: "square.grid.2x2".to_string(), - colorName: "orange".to_string(), - options: categories, - multiSelect: false, - }, - ChannelOption { - id: "filter".to_string(), - title: "Tags".to_string(), - description: "Browse an ArabPorn.xxx tag archive.".to_string(), - systemImage: "tag.fill".to_string(), - colorName: "green".to_string(), - options: tags, - multiSelect: false, - }, - ChannelOption { - id: "sites".to_string(), - title: "Uploaders".to_string(), - description: "Browse an ArabPorn.xxx source or uploader archive.".to_string(), - systemImage: "person.crop.square".to_string(), - colorName: "purple".to_string(), - options: uploaders, - multiSelect: false, - }, - ], - nsfw: true, - cacheDuration: Some(1800), - } - } - - fn selector(value: &str) -> Result { - Selector::parse(value) - .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) - } - - fn regex(value: &str) -> Result { - Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) - } - - fn text_of(element: &ElementRef<'_>) -> String { - element - .text() - .collect::>() - .join(" ") - .split_whitespace() - .collect::>() - .join(" ") - .trim() - .to_string() - } - - fn decode_html(text: &str) -> String { - decode(text.as_bytes()) - .to_string() - .unwrap_or_else(|_| text.to_string()) - } - - fn clean_title(value: &str) -> String { - Self::decode_html(value) - .split_whitespace() - .collect::>() - .join(" ") - .trim() - .to_string() - } - - fn normalize_title(value: &str) -> String { - value - .to_ascii_lowercase() - .chars() - .map(|value| { - if value.is_ascii_alphanumeric() { - value - } else { - ' ' - } - }) - .collect::() - .split_whitespace() - .collect::>() - .join(" ") - } - - fn normalize_url(&self, value: &str) -> String { - let value = value.trim(); - if value.is_empty() { - return String::new(); - } - if value.starts_with("http://") || value.starts_with("https://") { - return value.to_string(); - } - if value.starts_with("//") { - return format!("https:{value}"); - } - if value.starts_with('/') { - return format!("{}{}", self.url, value); - } - format!("{}/{}", self.url, value.trim_start_matches("./")) - } - - fn parse_duration(text: &str) -> u32 { - parse_time_to_seconds(text) - .and_then(|value| u32::try_from(value).ok()) - .unwrap_or(0) - } - - fn parse_views(text: &str) -> Option { - let cleaned = text - .replace("views", "") - .replace("view", "") - .replace(' ', "") - .trim() - .to_string(); - parse_abbreviated_number(&cleaned) - } - - fn parse_percent(text: &str) -> Option { - text.trim() - .trim_end_matches('%') - .trim() - .parse::() - .ok() - } - - fn parse_upload_date_timestamp(html: &str) -> Option { - let regex = Self::regex(r#""uploadDate"\s*:\s*"([^"]+)""#).ok()?; - let value = regex.captures(html)?.get(1)?.as_str(); - DateTime::parse_from_rfc3339(value) - .map(|value| value.with_timezone(&Utc).timestamp() as u64) - .ok() - .or_else(|| { - NaiveDate::parse_from_str(value, "%Y-%m-%d") - .ok() - .and_then(|value| value.and_hms_opt(0, 0, 0)) - .map(|value| value.and_utc().timestamp() as u64) - }) - } - - fn parse_iso8601_duration_seconds(value: &str) -> Option { - let regex = Self::regex(r#"^PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?$"#).ok()?; - let captures = regex.captures(value)?; - let hours = captures - .get(1) - .and_then(|value| value.as_str().parse::().ok()) - .unwrap_or(0); - let minutes = captures - .get(2) - .and_then(|value| value.as_str().parse::().ok()) - .unwrap_or(0); - let seconds = captures - .get(3) - .and_then(|value| value.as_str().parse::().ok()) - .unwrap_or(0); - Some( - hours - .saturating_mul(3600) - .saturating_add(minutes.saturating_mul(60)) - .saturating_add(seconds), - ) - } - - fn extract_json_string(html: &str, key: &str) -> Option { - let regex = Self::regex(&format!(r#""{key}"\s*:\s*"([^"]+)""#)).ok()?; - regex - .captures(html) - .and_then(|value| value.get(1)) - .map(|value| value.as_str().to_string()) - } - - fn extract_js_value(html: &str, key: &str) -> Option { - let regex = Self::regex(&format!(r#"{key}\s*:\s*'((?:\\'|[^'])*)'"#)).ok()?; - regex - .captures(html) - .and_then(|value| value.get(1)) - .map(|value| value.as_str().replace("\\'", "'")) - } - - fn encode_search_query(query: &str) -> String { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - serializer.append_pair("q", query); - serializer - .finish() - .strip_prefix("q=") - .unwrap_or_default() - .to_string() - } - - fn build_search_path_query(query: &str) -> String { - Self::normalize_title(query).replace(' ', "-") - } - - fn sort_param(sort: &str) -> &'static str { - match sort { - "popular" | "viewed" | "trending" => "video_viewed", - "rated" | "rating" | "top" => "rating", - "longest" | "duration" => "duration", - "commented" | "comments" => "most_commented", - "recommended" | "favorited" | "favourited" => "most_favourited", - _ => "post_date", - } - } - - fn default_archive_url_for_sort(&self, sort: &str) -> String { - match sort { - "rated" | "rating" | "top" => format!("{}/top-rated/", self.url), - "new" | "latest" | "date" | "recent" | "" => format!("{}/latest-updates/", self.url), - _ => format!("{}/most-popular/", self.url), - } - } - - fn build_latest_url(&self, page: u16) -> String { - format!( - "{}/latest-updates/?mode=async&function=get_block&block_id=list_videos_latest_videos_list&sort_by=post_date&from={page}", - self.url - ) - } - - fn build_archive_url(&self, archive_url: &str, page: u16, sort: &str) -> String { - let block_id = if archive_url.contains("/latest-updates/") { - "list_videos_latest_videos_list" - } else { - "list_videos_common_videos_list" - }; - let page_key = if block_id == "list_videos_latest_videos_list" { - "from" - } else { - "from" - }; - format!( - "{archive_url}?mode=async&function=get_block&block_id={block_id}&sort_by={}&{page_key}={page}", - Self::sort_param(sort) - ) - } - - fn build_search_url(&self, query: &str, page: u16, sort: &str) -> String { - let path_query = Self::build_search_path_query(query); - let encoded_query = Self::encode_search_query(query); - format!( - "{}/search/{path_query}/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q={encoded_query}&category_ids=&sort_by={}&from_videos={page}&from_albums={page}", - self.url, - Self::sort_param(sort) - ) - } - - fn match_filter(options: &[FilterOption], query: &str) -> Option { - let normalized_query = Self::normalize_title(query); - options - .iter() - .find(|value| { - value.id != "all" && Self::normalize_title(&value.title) == normalized_query - }) - .map(|value| value.id.clone()) - } - - fn push_unique(target: &Arc>>, item: FilterOption) { - if item.id.is_empty() || item.title.is_empty() { - return; - } - if let Ok(mut values) = target.write() { - if !values.iter().any(|value| value.id == item.id) { - values.push(item); - } - } - } - - async fn fetch_html(requester: &mut Requester, url: &str) -> Result { - requester - .get(url, None) - .await - .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) - } - - async fn load_categories( - base_url: &str, - categories: Arc>>, - uploaders: Arc>>, - ) -> Result<()> { - let mut requester = Requester::new(); - let html = Self::fetch_html(&mut requester, &format!("{base_url}/categories/")).await?; - let document = Html::parse_document(&html); - let selector = Self::selector("#list_categories_categories_list_items a.item[href]")?; - - for element in document.select(&selector) { - let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); - if !href.starts_with(&format!("{base_url}/categories/")) { - continue; - } - let remainder = href - .strip_prefix(&format!("{base_url}/categories/")) - .unwrap_or_default(); - if remainder.is_empty() || remainder.contains('/') { - continue; - } - - let title = element - .value() - .attr("title") - .map(Self::clean_title) - .filter(|value| !value.is_empty()) - .unwrap_or_else(|| Self::text_of(&element)); - if title.is_empty() { - continue; - } - - let option = FilterOption { - id: format!("{href}/"), - title, - }; - Self::push_unique(&categories, option.clone()); - Self::push_unique(&uploaders, option); - } - - Ok(()) - } - - async fn load_tags(base_url: &str, tags: Arc>>) -> Result<()> { - let mut requester = Requester::new(); - let html = Self::fetch_html(&mut requester, &format!("{base_url}/tags/")).await?; - let document = Html::parse_document(&html); - let selector = Self::selector("a[href]")?; - - for element in document.select(&selector) { - let href = element.value().attr("href").unwrap_or_default().trim_end_matches('/'); - if !href.starts_with(&format!("{base_url}/tags/")) { - continue; - } - let remainder = href - .strip_prefix(&format!("{base_url}/tags/")) - .unwrap_or_default(); - if remainder.is_empty() || remainder.contains('/') { - continue; - } - - let title = element - .value() - .attr("title") - .map(Self::clean_title) - .filter(|value| !value.is_empty()) - .unwrap_or_else(|| Self::text_of(&element)); - if title.is_empty() { - continue; - } - - Self::push_unique( - &tags, - FilterOption { - id: format!("{href}/"), - title, - }, - ); - } - - Ok(()) - } - - fn filters_need_refresh(&self) -> bool { - let categories_len = self - .categories - .read() - .map(|values| values.len()) - .unwrap_or_default(); - let tags_len = self.tags.read().map(|values| values.len()).unwrap_or_default(); - let uploaders_len = self - .uploaders - .read() - .map(|values| values.len()) - .unwrap_or_default(); - categories_len <= 1 || tags_len <= 1 || uploaders_len <= 1 - } - - async fn refresh_filter_catalogs(&self) { - if let Err(error) = Self::load_categories( - &self.url, - Arc::clone(&self.categories), - Arc::clone(&self.uploaders), - ) - .await - { - report_provider_error_background( - CHANNEL_ID, - "refresh_filter_catalogs.categories", - &error.to_string(), - ); - } - if let Err(error) = Self::load_tags(&self.url, Arc::clone(&self.tags)).await { - report_provider_error_background( - CHANNEL_ID, - "refresh_filter_catalogs.tags", - &error.to_string(), - ); - } - } - - fn resolve_option_target(&self, sort: &str, options: &ServerOptions) -> Target { - if let Some(uploader) = options.sites.as_deref() { - if uploader.starts_with(&self.url) && uploader != "all" { - return Target::Archive(uploader.to_string()); - } - } - - if let Some(tag) = options.filter.as_deref() { - if tag.starts_with(&self.url) && tag != "all" { - return Target::Archive(tag.to_string()); - } - } - - if let Some(category) = options.categories.as_deref() { - if category.starts_with(&self.url) && category != "all" { - return Target::Archive(category.to_string()); - } - } - - if matches!(sort, "new" | "latest" | "date" | "recent" | "") { - return Target::Latest; - } - - Target::Archive(self.default_archive_url_for_sort(sort)) - } - - fn resolve_query_target(&self, query: &str) -> Target { - if let Ok(uploaders) = self.uploaders.read() { - if let Some(value) = Self::match_filter(&uploaders, query) { - return Target::Archive(value); - } - } - - if let Ok(tags) = self.tags.read() { - if let Some(value) = Self::match_filter(&tags, query) { - return Target::Archive(value); - } - } - - if let Ok(categories) = self.categories.read() { - if let Some(value) = Self::match_filter(&categories, query) { - return Target::Archive(value); - } - } - - Target::Search(query.to_string()) - } - - fn build_target_request(&self, target: &Target, page: u16, sort: &str) -> String { - match target { - Target::Latest => self.build_latest_url(page), - Target::Archive(url) => self.build_archive_url(url, page, sort), - Target::Search(query) => self.build_search_url(query, page, sort), - } - } - - fn list_container<'a>(&self, document: &'a Html) -> Result>> { - for selector_text in [ - "#list_videos_latest_videos_list_items", - "#list_videos_common_videos_list_items", - "#list_videos_videos_list_search_result_items", - "#list_videos_related_videos_items", - "#list_videos_videos_watched_right_now_items", - ] { - let selector = Self::selector(selector_text)?; - if let Some(element) = document.select(&selector).next() { - return Ok(Some(element)); - } - } - Ok(None) - } - - fn parse_list_videos(&self, html: &str) -> Result> { - let document = Html::parse_document(html); - let Some(container) = self.list_container(&document)? else { - return Ok(vec![]); - }; - - let card_selector = Self::selector("div.item")?; - let link_selector = Self::selector("a[href*=\"/videos/\"]")?; - let image_selector = Self::selector("div.img img")?; - let title_selector = Self::selector("strong.title")?; - let duration_selector = Self::selector("div.duration")?; - let rating_selector = Self::selector("div.rating")?; - let views_selector = Self::selector("div.views")?; - - let mut items = Vec::new(); - - for card in container.select(&card_selector) { - let Some(link) = card.select(&link_selector).next() else { - continue; - }; - - let href = link.value().attr("href").unwrap_or_default(); - let page_url = self.normalize_url(href); - if page_url.is_empty() { - continue; - } - - let id = page_url - .trim_end_matches('/') - .split('/') - .nth_back(1) - .unwrap_or_default() - .to_string(); - if id.is_empty() { - continue; - } - - let title = card - .select(&title_selector) - .next() - .map(|value| Self::clean_title(&Self::text_of(&value))) - .filter(|value| !value.is_empty()) - .or_else(|| { - link.value() - .attr("title") - .map(Self::clean_title) - .filter(|value| !value.is_empty()) - }); - let Some(title) = title else { - continue; - }; - - let image = card.select(&image_selector).next(); - let thumb = image - .and_then(|value| { - value - .value() - .attr("data-webp") - .or_else(|| value.value().attr("data-original")) - .or_else(|| value.value().attr("src")) - }) - .map(|value| self.normalize_url(value)) - .unwrap_or_default(); - if thumb.is_empty() { - continue; - } - - let duration = card - .select(&duration_selector) - .next() - .map(|value| Self::parse_duration(&Self::text_of(&value))) - .unwrap_or(0); - let views = card - .select(&views_selector) - .next() - .and_then(|value| Self::parse_views(&Self::text_of(&value))); - let rating = card - .select(&rating_selector) - .next() - .and_then(|value| Self::parse_percent(&Self::text_of(&value))); - - let mut item = VideoItem::new( - id, - title, - page_url, - CHANNEL_ID.to_string(), - thumb, - duration, - ); - item.views = views; - item.rating = rating; - items.push(item); - } - - Ok(items) - } - - fn lookup_category_url_by_title(&self, title: &str) -> Option { - let categories = self.categories.read().ok()?; - categories - .iter() - .find(|value| { - value.id != "all" && Self::normalize_title(&value.title) == Self::normalize_title(title) - }) - .map(|value| value.id.clone()) - } - - fn lookup_tag_url_by_title(&self, title: &str) -> Option { - let tags = self.tags.read().ok()?; - tags.iter() - .find(|value| { - value.id != "all" && Self::normalize_title(&value.title) == Self::normalize_title(title) - }) - .map(|value| value.id.clone()) - } - - async fn enrich_video(&self, mut item: VideoItem, options: &ServerOptions) -> VideoItem { - let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video.requester"); - let html = match requester.get(&item.url, None).await { - Ok(value) => value, - Err(error) => { - report_provider_error_background( - CHANNEL_ID, - "enrich_video.request", - &format!("url={}; error={error}", item.url), - ); - return item; - } - }; - - let document = Html::parse_document(&html); - let info_selector = match Self::selector(".block-details .info .item") { - Ok(value) => value, - Err(_) => return item, - }; - let span_selector = match Self::selector("span") { - Ok(value) => value, - Err(_) => return item, - }; - let category_selector = match Self::selector(".block-details .info a[href*=\"/categories/\"]") - { - Ok(value) => value, - Err(_) => return item, - }; - let tag_selector = match Self::selector(".block-details .info a[href*=\"/tags/\"]") { - Ok(value) => value, - Err(_) => return item, - }; - - if let Some(title) = Self::extract_json_string(&html, "name") - .or_else(|| Self::extract_json_string(&html, "headline")) - { - let title = Self::clean_title(&title); - if !title.is_empty() { - item.title = title; - } - } - - if let Some(preview) = Self::extract_js_value(&html, "preview_url") - .or_else(|| Self::extract_json_string(&html, "thumbnailUrl")) - { - let preview = self.normalize_url(&preview); - if !preview.is_empty() { - item.preview = Some(preview.clone()); - if item.thumb.is_empty() { - item.thumb = preview; - } - } - } - - if let (Some(width), Some(height)) = ( - Self::extract_js_value(&html, "player_width") - .and_then(|value| value.parse::().ok()), - Self::extract_js_value(&html, "player_height") - .and_then(|value| value.parse::().ok()), - ) { - if width > 0.0 && height > 0.0 { - item.aspectRatio = Some(width / height); - } - } - - if let Some(duration) = Self::extract_json_string(&html, "duration") { - if let Some(duration) = Self::parse_iso8601_duration_seconds(&duration) { - if duration > 0 { - item.duration = duration; - } - } - } - - if let Some(uploaded_at) = Self::parse_upload_date_timestamp(&html) { - item.uploadedAt = Some(uploaded_at); - } - - let watch_action_regex = match Self::regex( - r#"(?s)"interactionType"\s*:\s*"http://schema.org/WatchAction".*?"userInteractionCount"\s*:\s*"(\d+)""#, - ) { - Ok(value) => value, - Err(_) => return item, - }; - if let Some(views) = watch_action_regex - .captures(&html) - .and_then(|value| value.get(1)) - .and_then(|value| value.as_str().parse::().ok()) - { - item.views = Some(views); - } - - let mut category_entries = Vec::<(String, String)>::new(); - let mut tag_entries = Vec::<(String, String)>::new(); - - for element in document.select(&category_selector) { - let title = Self::clean_title(&Self::text_of(&element)); - if title.is_empty() { - continue; - } - let url = self.normalize_url(element.value().attr("href").unwrap_or_default()); - if url.is_empty() { - continue; - } - category_entries.push((title.clone(), url.clone())); - Self::push_unique( - &self.categories, - FilterOption { - id: url.clone(), - title: title.clone(), - }, - ); - Self::push_unique( - &self.uploaders, - FilterOption { - id: url, - title, - }, - ); - } - - for element in document.select(&tag_selector) { - let title = Self::clean_title(&Self::text_of(&element)); - if title.is_empty() { - continue; - } - let url = self.normalize_url(element.value().attr("href").unwrap_or_default()); - if url.is_empty() { - continue; - } - tag_entries.push((title.clone(), url.clone())); - Self::push_unique( - &self.tags, - FilterOption { - id: url, - title, - }, - ); - } - - if category_entries.is_empty() { - if let Some(category_text) = Self::extract_js_value(&html, "video_categories") { - for raw in category_text.split(',') { - let title = Self::clean_title(raw); - if title.is_empty() { - continue; - } - let url = self.lookup_category_url_by_title(&title).unwrap_or_default(); - category_entries.push((title, url)); - } - } - } - - if tag_entries.is_empty() { - if let Some(tag_text) = Self::extract_js_value(&html, "video_tags") { - for raw in tag_text.split(',') { - let title = Self::clean_title(raw); - if title.is_empty() { - continue; - } - let url = self.lookup_tag_url_by_title(&title).unwrap_or_default(); - tag_entries.push((title, url)); - } - } - } - - if let Some((uploader, uploader_url)) = category_entries.first() { - item.uploader = Some(uploader.clone()); - if !uploader_url.is_empty() { - item.uploaderUrl = Some(uploader_url.clone()); - } - } - - let mut tag_values = category_entries - .iter() - .map(|(title, _)| title.clone()) - .collect::>(); - tag_values.extend(tag_entries.into_iter().map(|(title, _)| title)); - tag_values.sort(); - tag_values.dedup(); - if !tag_values.is_empty() { - item.tags = Some(tag_values); - } - - for info in document.select(&info_selector) { - for span in info.select(&span_selector) { - let text = Self::text_of(&span); - if let Some(value) = text.strip_prefix("Duration:") { - let duration = Self::parse_duration(value.trim()); - if duration > 0 { - item.duration = duration; - } - } else if let Some(value) = text.strip_prefix("Views:") { - if let Some(views) = Self::parse_views(value.trim()) { - item.views = Some(views); - } - } - } - } - - let rating_selector = match Self::selector(".rating-container .voters, .rating-container .rating") { - Ok(value) => value, - Err(_) => return item, - }; - for element in document.select(&rating_selector) { - let text = Self::text_of(&element); - if let Some(rating) = Self::parse_percent(&text) { - item.rating = Some(rating); - break; - } - } - - item - } - - async fn fetch_items_for_url( - &self, - cache: VideoCache, - url: String, - per_page_limit: usize, - options: &ServerOptions, - ) -> Result> { - if let Some((time, items)) = cache.get(&url) { - if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { - return Ok(items.into_iter().take(per_page_limit.max(1)).collect()); - } - } - - let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url.requester"); - let html = Self::fetch_html(&mut requester, &url).await?; - let list_items = self.parse_list_videos(&html)?; - if list_items.is_empty() { - return Ok(vec![]); - } - - let enriched = stream::iter(list_items.into_iter().map(|item| { - let provider = self.clone(); - let options = options.clone(); - async move { provider.enrich_video(item, &options).await } - })) - .buffer_unordered(4) - .collect::>() - .await; - - if !enriched.is_empty() { - cache.remove(&url); - cache.insert(url, enriched.clone()); - } - - Ok(enriched.into_iter().take(per_page_limit.max(1)).collect()) - } - - async fn get( - &self, - cache: VideoCache, - page: u16, - sort: &str, - per_page_limit: usize, - options: ServerOptions, - ) -> Result> { - let target = self.resolve_option_target(sort, &options); - let url = self.build_target_request(&target, page, sort); - self.fetch_items_for_url(cache, url, per_page_limit, &options) - .await - } - - async fn query( - &self, - cache: VideoCache, - page: u16, - sort: &str, - query: &str, - per_page_limit: usize, - options: ServerOptions, - ) -> Result> { - let target = self.resolve_query_target(query); - let url = self.build_target_request(&target, page, sort); - self.fetch_items_for_url(cache, url, per_page_limit, &options) - .await - } -} - -#[async_trait] -impl Provider for ArabpornxxxProvider { - async fn get_videos( - &self, - cache: VideoCache, - pool: DbPool, - sort: String, - query: Option, - page: String, - per_page: String, - options: ServerOptions, - ) -> Vec { - let _ = pool; - let page = page.parse::().unwrap_or(1); - let per_page_limit = per_page.parse::().unwrap_or(30); - - if self.filters_need_refresh() { - self.refresh_filter_catalogs().await; - } - - let result = match query { - Some(query) if !query.trim().is_empty() => { - self.query(cache, page, &sort, &query, per_page_limit, options) - .await - } - _ => self.get(cache, page, &sort, per_page_limit, options).await, - }; - - match result { - Ok(videos) => videos, - Err(error) => { - report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; - vec![] - } - } - } - - fn get_channel(&self, clientversion: ClientVersion) -> Option { - Some(self.build_channel(clientversion)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn provider() -> ArabpornxxxProvider { - ArabpornxxxProvider { - url: BASE_URL.to_string(), - categories: Arc::new(RwLock::new(vec![ - FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }, - FilterOption { - id: format!("{BASE_URL}/categories/hijab-mylfs/"), - title: "Hijab Mylfs".to_string(), - }, - ])), - tags: Arc::new(RwLock::new(vec![ - FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }, - FilterOption { - id: format!("{BASE_URL}/tags/arabic-porn/"), - title: "Arabic Porn".to_string(), - }, - ])), - uploaders: Arc::new(RwLock::new(vec![ - FilterOption { - id: "all".to_string(), - title: "All".to_string(), - }, - FilterOption { - id: format!("{BASE_URL}/categories/hijab-mylfs/"), - title: "Hijab Mylfs".to_string(), - }, - ])), - } - } - - #[test] - fn builds_search_page_two_url() { - let provider = provider(); - assert_eq!( - provider.build_search_url("arab hijab", 2, "recommended"), - "https://arabporn.xxx/search/arab-hijab/?mode=async&function=get_block&block_id=list_videos_videos_list_search_result&q=arab+hijab&category_ids=&sort_by=most_favourited&from_videos=2&from_albums=2" - ); - } - - #[test] - fn resolves_known_tag_query_to_archive() { - let provider = provider(); - match provider.resolve_query_target("arabic porn") { - Target::Archive(url) => { - assert_eq!(url, "https://arabporn.xxx/tags/arabic-porn/"); - } - _ => panic!("expected archive target"), - } - } - - #[test] - fn defaults_non_new_sorts_to_common_archive_root() { - let provider = provider(); - match provider.resolve_option_target( - "recommended", - &ServerOptions { - featured: None, - category: None, - sites: None, - filter: None, - language: None, - public_url_base: None, - requester: None, - network: None, - stars: None, - categories: None, - duration: None, - sort: None, - sexuality: None, - }, - ) { - Target::Archive(url) => { - assert_eq!(url, "https://arabporn.xxx/most-popular/"); - } - _ => panic!("expected archive target"), - } - } -} diff --git a/src/providers/sextb.rs b/src/providers/sextb.rs index a1daf4b..1bb7e24 100644 --- a/src/providers/sextb.rs +++ b/src/providers/sextb.rs @@ -5,6 +5,7 @@ use crate::providers::{ }; use crate::status::*; use crate::util::cache::VideoCache; +use crate::util::hoster_proxy::{proxy_name_for_url, rewrite_hoster_url}; use crate::util::parse_abbreviated_number; use crate::util::requester::Requester; use crate::videos::{ServerOptions, VideoItem}; @@ -97,6 +98,13 @@ struct DetailMetadata { rating: Option, } +#[derive(Debug, Clone, PartialEq, Eq)] +struct PlayerEpisode { + label: String, + film_id: String, + episode_id: String, +} + impl SextbProvider { pub fn new() -> Self { let provider = Self { @@ -900,6 +908,79 @@ impl SextbProvider { Ok(metadata) } + fn parse_player_episodes(html: &str) -> Result> { + let regex = Self::regex( + r#"(?is)]*class="[^"]*\bbtn-player\b[^"]*"[^>]*data-source="(?P\d+)"[^>]*data-id="(?P\d+)"[^>]*>.*?\s*(?P