use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{ Provider, report_provider_error, report_provider_error_background, requester_or_default, }; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::requester::Requester; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use chrono::{DateTime, Utc}; use error_chain::error_chain; use futures::stream::{self, StreamExt}; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use scraper::{ElementRef, Html, Selector}; use serde_json::Value; use std::collections::HashSet; use std::sync::{Arc, RwLock}; use std::{thread, vec}; use wreq::Version; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "studio-network", tags: &["vr", "studios", "premium"], }; error_chain! { foreign_links { Io(std::io::Error); Json(serde_json::Error); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } const BASE_URL: &str = "https://vrporn.com"; const CHANNEL_ID: &str = "vrporn"; const FIREFOX_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0"; const HTML_ACCEPT: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; #[derive(Debug, Clone)] pub struct VrpornProvider { url: String, categories: Arc>>, tags: Arc>>, stars: Arc>>, sites: Arc>>, } #[derive(Debug, Clone)] enum Target { Hot, New, Popular, Search(String), Archive(String), } impl VrpornProvider { pub fn new() -> Self { let provider = Self { url: BASE_URL.to_string(), categories: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), tags: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), stars: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), sites: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), }; provider.spawn_initial_load(); provider } fn spawn_initial_load(&self) { let provider = self.clone(); thread::spawn(move || { let runtime = match tokio::runtime::Builder::new_current_thread() .enable_all() .build() { Ok(runtime) => runtime, Err(error) => { report_provider_error_background( CHANNEL_ID, "spawn_initial_load.runtime_build", &error.to_string(), ); return; } }; runtime.block_on(async move { provider.refresh_filter_catalogs().await; }); }); } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { let categories = self .categories .read() .map(|values| values.clone()) .unwrap_or_default(); let tags = self.tags.read().map(|values| values.clone()).unwrap_or_default(); let stars = self .stars .read() .map(|values| values.clone()) .unwrap_or_default(); let sites = self .sites .read() .map(|values| values.clone()) .unwrap_or_default(); Channel { id: CHANNEL_ID.to_string(), name: "VRPorn".to_string(), description: "VRPorn.com browse, search, tag, pornstar, and studio archives with direct VR formats." .to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=vrporn.com".to_string(), status: "active".to_string(), categories: categories.iter().map(|value| value.title.clone()).collect(), options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Browse VRPorn sections.".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "hot".to_string(), title: "Hot Right Now".to_string(), }, FilterOption { id: "new".to_string(), title: "New".to_string(), }, FilterOption { id: "popular".to_string(), title: "Popular".to_string(), }, ], multiSelect: false, }, ChannelOption { id: "categories".to_string(), title: "Categories".to_string(), description: "Browse VRPorn category archives.".to_string(), systemImage: "square.grid.2x2".to_string(), colorName: "orange".to_string(), options: categories, multiSelect: false, }, ChannelOption { id: "filter".to_string(), title: "Tags".to_string(), description: "Browse VRPorn tag archives.".to_string(), systemImage: "tag.fill".to_string(), colorName: "green".to_string(), options: tags, multiSelect: false, }, ChannelOption { id: "stars".to_string(), title: "Pornstars".to_string(), description: "Browse VRPorn pornstar archives.".to_string(), systemImage: "star.fill".to_string(), colorName: "yellow".to_string(), options: stars, multiSelect: false, }, ChannelOption { id: "sites".to_string(), title: "Studios".to_string(), description: "Browse VRPorn studio archives.".to_string(), systemImage: "building.2.fill".to_string(), colorName: "purple".to_string(), options: sites, multiSelect: false, }, ], nsfw: true, cacheDuration: Some(1800), } } fn selector(value: &str) -> Result { Selector::parse(value) .map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}"))) } fn regex(value: &str) -> Result { Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}"))) } fn decode_text(text: &str) -> String { decode(text.as_bytes()) .to_string() .unwrap_or_else(|_| text.to_string()) .replace('\u{a0}', " ") .trim() .to_string() } fn collapse_whitespace(text: &str) -> String { text.split_whitespace().collect::>().join(" ") } fn text_of(element: &ElementRef<'_>) -> String { Self::decode_text(&Self::collapse_whitespace( &element.text().collect::>().join(" "), )) } fn normalize_title(value: &str) -> String { Self::decode_text(value) .to_ascii_lowercase() .split_whitespace() .collect::>() .join(" ") } fn normalize_url(&self, value: &str) -> String { if value.starts_with("http://") || value.starts_with("https://") { return value.to_string(); } if value.starts_with("//") { return format!("https:{value}"); } if value.starts_with('/') { return format!("{}{}", self.url, value); } format!("{}/{}", self.url.trim_end_matches('/'), value) } fn html_headers(&self, referer: &str) -> Vec<(String, String)> { vec![ ("User-Agent".to_string(), FIREFOX_UA.to_string()), ("Accept".to_string(), HTML_ACCEPT.to_string()), ("Referer".to_string(), referer.to_string()), ] } async fn fetch_html( &self, requester: &mut Requester, url: &str, referer: &str, ) -> Result { requester .get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11)) .await .map_err(|error| Error::from(format!("request failed for {url}: {error}"))) } fn push_unique(target: &Arc>>, item: FilterOption) { if item.id.is_empty() || item.title.is_empty() { return; } if let Ok(mut values) = target.write() { let normalized = Self::normalize_title(&item.title); if !values .iter() .any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized) { values.push(item); } } } fn extract_last_page(document: &Html) -> u16 { let selector = match Self::selector("a[href]") { Ok(value) => value, Err(_) => return 1, }; let regex = match Self::regex(r"/page/([0-9]+)/") { Ok(value) => value, Err(_) => return 1, }; document .select(&selector) .filter_map(|element| element.value().attr("href")) .filter_map(|href| { regex .captures(href) .and_then(|captures| captures.get(1)) .and_then(|value| value.as_str().parse::().ok()) }) .max() .unwrap_or(1) } async fn load_tags_and_categories(&self) -> Result<()> { let mut requester = Requester::new(); let home_url = format!("{}/", self.url); let home_html = self.fetch_html(&mut requester, &home_url, &home_url).await?; { let home_document = Html::parse_document(&home_html); let tag_selector = Self::selector("a[href^=\"/tag/\"]")?; for element in home_document.select(&tag_selector) { let href = element.value().attr("href").unwrap_or_default(); let title = Self::text_of(&element); if href.is_empty() || title.is_empty() { continue; } let url = self.normalize_url(href); let option = FilterOption { id: url, title }; Self::push_unique(&self.tags, option.clone()); Self::push_unique(&self.categories, option); } } let categories_url = format!("{}/categories/", self.url); let categories_html = self .fetch_html(&mut requester, &categories_url, &home_url) .await?; { let categories_document = Html::parse_document(&categories_html); let card_selector = Self::selector("article.ui-category-card a[href*=\"/tag/\"]")?; for element in categories_document.select(&card_selector) { let href = element.value().attr("href").unwrap_or_default(); let title = element .value() .attr("title") .map(Self::decode_text) .filter(|value| !value.is_empty()) .unwrap_or_else(|| Self::text_of(&element)); if href.is_empty() || title.is_empty() { continue; } let option = FilterOption { id: self.normalize_url(href), title, }; Self::push_unique(&self.categories, option.clone()); Self::push_unique(&self.tags, option); } } Ok(()) } async fn load_studios(&self) -> Result<()> { let mut requester = Requester::new(); let first_page_url = format!("{}/studios/", self.url); let first_html = self .fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url)) .await?; let last_page = { let first_document = Html::parse_document(&first_html); let last_page = Self::extract_last_page(&first_document).max(1).min(25); self.collect_studios_from_document(&first_document)?; last_page }; for page in 2..=last_page { let url = format!("{}/studios/page/{page}/", self.url); let html = self.fetch_html(&mut requester, &url, &first_page_url).await?; let document = Html::parse_document(&html); self.collect_studios_from_document(&document)?; } Ok(()) } fn collect_studios_from_document(&self, document: &Html) -> Result<()> { let selector = Self::selector("article.ui-studio-card a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default(); if !href.contains("/studio/") { continue; } let title = element .value() .attr("title") .map(Self::decode_text) .filter(|value| !value.is_empty()) .unwrap_or_else(|| Self::text_of(&element)); if title.is_empty() { continue; } Self::push_unique( &self.sites, FilterOption { id: self.normalize_url(href), title, }, ); } Ok(()) } async fn load_pornstars(&self) -> Result<()> { let mut requester = Requester::new(); let first_page_url = format!("{}/pornstars/", self.url); let first_html = self .fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url)) .await?; let last_page = { let first_document = Html::parse_document(&first_html); let last_page = Self::extract_last_page(&first_document).max(1).min(25); self.collect_pornstars_from_document(&first_document)?; last_page }; for page in 2..=last_page { let url = format!("{}/pornstars/page/{page}/", self.url); let html = self.fetch_html(&mut requester, &url, &first_page_url).await?; let document = Html::parse_document(&html); self.collect_pornstars_from_document(&document)?; } Ok(()) } fn collect_pornstars_from_document(&self, document: &Html) -> Result<()> { let selector = Self::selector("article.ui-card-model a[href]")?; for element in document.select(&selector) { let href = element.value().attr("href").unwrap_or_default(); if !href.contains("/pornstars/") { continue; } let title = element .value() .attr("title") .map(Self::decode_text) .filter(|value| !value.is_empty()) .unwrap_or_else(|| Self::text_of(&element)); if title.is_empty() { continue; } Self::push_unique( &self.stars, FilterOption { id: self.normalize_url(href), title, }, ); } Ok(()) } fn filters_need_refresh(&self) -> bool { self.categories .read() .map(|values| values.len()) .unwrap_or_default() <= 1 || self.tags.read().map(|values| values.len()).unwrap_or_default() <= 1 || self.stars.read().map(|values| values.len()).unwrap_or_default() <= 1 || self.sites.read().map(|values| values.len()).unwrap_or_default() <= 1 } async fn refresh_filter_catalogs(&self) { if let Err(error) = self.load_tags_and_categories().await { report_provider_error_background( CHANNEL_ID, "refresh_filter_catalogs.tags_and_categories", &error.to_string(), ); } if let Err(error) = self.load_studios().await { report_provider_error_background( CHANNEL_ID, "refresh_filter_catalogs.studios", &error.to_string(), ); } if let Err(error) = self.load_pornstars().await { report_provider_error_background( CHANNEL_ID, "refresh_filter_catalogs.pornstars", &error.to_string(), ); } } fn match_filter(options: &[FilterOption], query: &str) -> Option { let normalized_query = Self::normalize_title(query); options .iter() .find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query) .map(|value| value.id.clone()) } fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target { for candidate in [ options.sites.as_deref(), options.stars.as_deref(), options.filter.as_deref(), options.categories.as_deref(), ] .into_iter() .flatten() { if candidate.starts_with(&self.url) && candidate != "all" { return Target::Archive(candidate.to_string()); } } match sort { "new" | "latest" => Target::New, "popular" | "all" | "most_viewed" => Target::Popular, _ => Target::Hot, } } async fn resolve_query_target(&self, query: &str, sort: &str) -> Target { let query = query.trim(); if query.is_empty() { return self.resolve_option_target( &ServerOptions { featured: None, category: None, sites: None, filter: None, language: None, public_url_base: None, requester: None, network: None, stars: None, categories: None, duration: None, sort: None, sexuality: None, }, sort, ); } if self.filters_need_refresh() { self.refresh_filter_catalogs().await; } for options in [&self.sites, &self.stars, &self.tags, &self.categories] { if let Ok(values) = options.read() { if let Some(url) = Self::match_filter(&values, query) { return Target::Archive(url); } } } Target::Search(query.to_string()) } fn build_url_for_target(&self, target: &Target, page: u16) -> String { match target { Target::Hot => { if page <= 1 { format!("{}/", self.url) } else { format!("{}/all/page/{page}/", self.url) } } Target::New => { if page <= 1 { format!("{}/new/", self.url) } else { format!("{}/new/page/{page}/", self.url) } } Target::Popular => { if page <= 1 { format!("{}/all/", self.url) } else { format!("{}/all/page/{page}/", self.url) } } Target::Search(query) => { let query = query.split_whitespace().collect::>().join("+"); if page <= 1 { format!("{}/search/videos/?query={query}", self.url) } else { format!("{}/search/videos/page/{page}/?query={query}", self.url) } } Target::Archive(url) => { let base = url.trim_end_matches('/'); if page <= 1 { format!("{base}/") } else { format!("{base}/page/{page}/") } } } } fn parse_list_videos(&self, html: &str) -> Result> { let document = Html::parse_document(html); let article_selector = Self::selector("article.ui-video-card")?; let header_link_selector = Self::selector("header a[href]")?; let title_selector = Self::selector("a.ui-video-card__name")?; let thumb_selector = Self::selector("img.ui-video-card__cover")?; let duration_selector = Self::selector("div.ui-time span")?; let studio_selector = Self::selector("a.ui-video-card__studio-link[href]")?; let info_text_selector = Self::selector(".ui-video-card__info-mini span")?; let footer_text_selector = Self::selector(".ui-video-card__footer .ui-video-card__text")?; let mut items = Vec::new(); for article in document.select(&article_selector) { let header_link = article.select(&header_link_selector).next(); let title_link = article.select(&title_selector).next(); let href = title_link .and_then(|link| link.value().attr("href")) .or_else(|| header_link.and_then(|link| link.value().attr("href"))) .unwrap_or_default(); if href.is_empty() { continue; } let title = title_link .map(|element| Self::text_of(&element)) .filter(|value| !value.is_empty()) .or_else(|| { header_link.and_then(|element| { element .value() .attr("title") .map(Self::decode_text) .filter(|value| !value.is_empty()) }) }) .unwrap_or_default(); if title.is_empty() { continue; } let thumb = article .select(&thumb_selector) .next() .and_then(|img| img.value().attr("src")) .map(|value| self.normalize_url(value)) .unwrap_or_default(); let duration = article .select(&duration_selector) .next() .map(|element| Self::text_of(&element)) .and_then(|value| parse_time_to_seconds(&value)) .unwrap_or(0) .max(0) as u32; let url = self.normalize_url(href); let id = href .trim_matches('/') .split('/') .next_back() .unwrap_or_default() .to_string(); if id.is_empty() { continue; } let mut item = VideoItem::new( id, title, url.clone(), CHANNEL_ID.to_string(), thumb, duration, ); if let Some(studio) = article.select(&studio_selector).next() { let uploader = studio .value() .attr("title") .map(Self::decode_text) .filter(|value| !value.is_empty()) .unwrap_or_else(|| Self::text_of(&studio)); if !uploader.is_empty() { item.uploader = Some(uploader); } if let Some(href) = studio.value().attr("href") { item.uploaderUrl = Some(self.normalize_url(href)); } } let footer_values = article .select(&footer_text_selector) .map(|element| Self::text_of(&element)) .collect::>(); if footer_values.len() >= 2 { item.views = parse_abbreviated_number(&footer_values[1]); } let info_values = article .select(&info_text_selector) .map(|element| Self::text_of(&element)) .collect::>(); if item.uploader.is_none() && !info_values.is_empty() { item.uploader = Some(info_values[0].clone()); } items.push(item); } Ok(items) } fn extract_json_ld_video(&self, html: &str) -> Result> { let script_regex = Self::regex(r#"(?s)]+application/ld\+json[^>]*>(.*?)"#)?; for captures in script_regex.captures_iter(html) { let Some(raw) = captures.get(1).map(|value| value.as_str().trim()) else { continue; }; let parsed: Value = match serde_json::from_str(raw) { Ok(value) => value, Err(_) => continue, }; if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) { for item in graph { if item .get("@type") .and_then(Value::as_str) .is_some_and(|value| value == "VideoObject") { return Ok(Some(item.clone())); } } } if parsed .get("@type") .and_then(Value::as_str) .is_some_and(|value| value == "VideoObject") { return Ok(Some(parsed)); } } Ok(None) } fn parse_uploaded_at(value: &str) -> Option { DateTime::parse_from_rfc3339(value) .ok() .map(|date| date.with_timezone(&Utc).timestamp().max(0) as u64) } fn extract_media_quality(url: &str) -> String { let lowercase = url.to_ascii_lowercase(); for label in ["8k", "6k", "5k", "4k", "hq", "hd", "sd"] { if lowercase.contains(&format!("free_{label}")) || lowercase.contains(&format!("_{label}_")) || lowercase.contains(&format!("/{label}.mp4")) || lowercase.contains(&format!("{label}.mp4")) { return label.to_ascii_uppercase(); } } if lowercase.contains("original") { return "ORIGINAL".to_string(); } "MP4".to_string() } fn quality_rank(label: &str) -> usize { match label.to_ascii_lowercase().as_str() { "sd" => 0, "hq" => 1, "hd" => 2, "4k" => 3, "5k" => 4, "6k" => 5, "8k" => 6, "original" => 7, _ => 8, } } fn extract_formats(html: &str, referer: &str, slug: &str) -> Result> { let regex = Self::regex(r#"https://(?:cdns|mcdnds)\.vrporn\.com/[^"'\s]+\.mp4\?[^"'\s<]+"#)?; let mut seen = HashSet::new(); let mut raw_formats = Vec::new(); let slug = slug.trim_matches('/').to_ascii_lowercase(); for found in regex.find_iter(html).map(|value| value.as_str().to_string()) { let lowercase = found.to_ascii_lowercase(); if lowercase.contains("shortvideo") || lowercase.contains("preview") || lowercase.contains("trailer") || lowercase.contains("video-short") { continue; } if !slug.is_empty() && !lowercase.contains(&format!("/videos/{slug}/")) { continue; } if seen.insert(found.clone()) { let label = Self::extract_media_quality(&found); raw_formats.push((Self::quality_rank(&label), label, found)); } } raw_formats.sort_by_key(|(rank, _, _)| *rank); let formats = raw_formats .into_iter() .map(|(_, label, url)| { let mut format = VideoFormat::new(url, label.clone(), "mp4".to_string()).format_id(label); format.add_http_header("Referer".to_string(), referer.to_string()); format.add_http_header("User-Agent".to_string(), FIREFOX_UA.to_string()); format }) .collect::>(); Ok(formats) } fn extract_preview(html: &str, slug: &str) -> Result> { let regex = Self::regex( r#"https://(?:cdn|mcdn|mcdnds)\.vrporn\.com/[^"'\s]+(?:shortvideo[^"'\s]*|preview[^"'\s]*|trailer[^"'\s]*)\.mp4(?:\?[^"'\s<]+)?"#, )?; let slug = slug.trim_matches('/').to_ascii_lowercase(); Ok(regex.find_iter(html).find_map(|value| { let found = value.as_str().to_string(); if slug.is_empty() || found.to_ascii_lowercase().contains(&slug) { Some(found) } else { None } })) } fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result { let document = Html::parse_document(html); let title_selector = Self::selector("h1")?; let sub_text_selector = Self::selector("span.ui-player-title__sub-text")?; let studio_selector = Self::selector(".ui-detail-video a[href*=\"/studio/\"]")?; let pornstar_selector = Self::selector("div.starrings a[href*=\"/pornstars/\"]")?; let tag_selector = Self::selector("a[href*=\"/tag/\"]")?; let description_selector = Self::selector(".ui-detail-video__content-inner")?; let verified_selector = Self::selector(".detail-video__verfy")?; if let Some(title) = document.select(&title_selector).next() { let title = Self::text_of(&title); if !title.is_empty() { item.title = title; } } let sub_texts = document .select(&sub_text_selector) .map(|element| Self::text_of(&element)) .collect::>(); if let Some(value) = sub_texts.first() { let cleaned = value.trim_end_matches('%').trim(); if let Ok(parsed) = cleaned.parse::() { item.rating = Some(parsed); } } if let Some(value) = sub_texts.get(1) { if item.views.is_none() { item.views = parse_abbreviated_number(value); } } if document.select(&verified_selector).next().is_some() { item.verified = Some(true); } if let Some(studio) = document.select(&studio_selector).next() { let title = Self::text_of(&studio); if !title.is_empty() { item.uploader = Some(title); } if let Some(href) = studio.value().attr("href") { item.uploaderUrl = Some(self.normalize_url(href)); } } let mut tags = item.tags.take().unwrap_or_default(); let mut discovered_formats = Vec::new(); for element in document.select(&pornstar_selector) { let name = Self::text_of(&element); if !name.is_empty() && !tags.iter().any(|value| value == &name) { tags.push(name); } } for element in document.select(&tag_selector) { let href = element.value().attr("href").unwrap_or_default(); if !href.contains("/tag/") { continue; } let name = Self::text_of(&element); if !name.is_empty() && !tags.iter().any(|value| value == &name) { tags.push(name); } } if let Some(json_ld) = self.extract_json_ld_video(html)? { if let Some(title) = json_ld.get("name").and_then(Value::as_str) { if !title.trim().is_empty() { item.title = Self::decode_text(title); } } if item.thumb.is_empty() { if let Some(thumb) = json_ld.get("thumbnailUrl").and_then(Value::as_str) { item.thumb = self.normalize_url(thumb); } } if item.uploadedAt.is_none() { if let Some(uploaded_at) = json_ld.get("uploadDate").and_then(Value::as_str) { item.uploadedAt = Self::parse_uploaded_at(uploaded_at); } } if item.uploader.is_none() { if let Some(author) = json_ld.get("author") { if let Some(name) = author.get("name").and_then(Value::as_str) { item.uploader = Some(Self::decode_text(name)); } if let Some(url) = author.get("url").and_then(Value::as_str) { item.uploaderUrl = Some(self.normalize_url(url)); } } } if let Some(actors) = json_ld.get("actor").and_then(Value::as_array) { for actor in actors { if let Some(name) = actor.get("name").and_then(Value::as_str) { let name = Self::decode_text(name); if !name.is_empty() && !tags.iter().any(|value| value == &name) { tags.push(name); } } } } if let Some(content_url) = json_ld.get("contentUrl").and_then(Value::as_str) { let mut format = VideoFormat::new( content_url.to_string(), Self::extract_media_quality(content_url), "mp4".to_string(), ); format.add_http_header("Referer".to_string(), item.url.clone()); format.add_http_header("User-Agent".to_string(), FIREFOX_UA.to_string()); discovered_formats.push(format); } } if let Some(description) = document.select(&description_selector).next() { let description = Self::text_of(&description); if !description.is_empty() { for tag in description .split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '\'') .map(str::trim) .filter(|value| value.len() > 2 && value.len() < 40) .take(0) { let _ = tag; } } } if let Some(preview) = Self::extract_preview(html, &item.id)? { item.preview = Some(preview); } let mut formats = Self::extract_formats(html, &item.url, &item.id)?; for format in formats.drain(..) { if !discovered_formats.iter().any(|existing| existing.url == format.url) { discovered_formats.push(format); } } if !discovered_formats.is_empty() { item.formats = Some(discovered_formats); } if !tags.is_empty() { item.tags = Some(tags); } Ok(item) } async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem { let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video"); match self .fetch_html(&mut requester, &item.url, &format!("{}/", self.url)) .await { Ok(html) => match self.apply_detail_video(item.clone(), &html) { Ok(enriched) => enriched, Err(error) => { report_provider_error_background( CHANNEL_ID, "apply_detail_video", &error.to_string(), ); item } }, Err(error) => { report_provider_error_background(CHANNEL_ID, "fetch_detail", &error.to_string()); item } } } async fn fetch_items_for_url( &self, cache: VideoCache, url: String, per_page_limit: usize, enrich_details: bool, options: &ServerOptions, ) -> Result> { if let Some((time, items)) = cache.get(&url) { if time.elapsed().unwrap_or_default().as_secs() < 60 * 15 { return Ok(items.clone()); } } let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url"); let html = self .fetch_html(&mut requester, &url, &format!("{}/", self.url)) .await?; let items = self.parse_list_videos(&html)?; if items.is_empty() { return Ok(vec![]); } let limited = items .into_iter() .take(per_page_limit.max(1)) .collect::>(); if !enrich_details { cache.insert(url, limited.clone()); return Ok(limited); } let enriched = stream::iter(limited.into_iter().map(|item| { let provider = self.clone(); let options = options.clone(); async move { provider.enrich_video(item, &options).await } })) .buffer_unordered(4) .collect::>() .await; cache.insert(url, enriched.clone()); Ok(enriched) } async fn get( &self, cache: VideoCache, page: u16, sort: &str, per_page_limit: usize, options: ServerOptions, ) -> Result> { let target = self.resolve_option_target(&options, sort); let url = self.build_url_for_target(&target, page); self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) .await } async fn query( &self, cache: VideoCache, page: u16, sort: &str, query: &str, per_page_limit: usize, options: ServerOptions, ) -> Result> { let target = self.resolve_query_target(query, sort).await; let url = self.build_url_for_target(&target, page); self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options) .await } } #[async_trait] impl Provider for VrpornProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { let _ = pool; let page = page.parse::().unwrap_or(1); let per_page_limit = per_page.parse::().unwrap_or(30); let result = match query { Some(query) if !query.trim().is_empty() => { self.query(cache, page, &sort, &query, per_page_limit, options) .await } _ => self.get(cache, page, &sort, per_page_limit, options).await, }; match result { Ok(videos) => videos, Err(error) => { report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::*; fn provider() -> VrpornProvider { VrpornProvider { url: BASE_URL.to_string(), categories: Arc::new(RwLock::new(vec![FilterOption { id: "https://vrporn.com/tag/anal/".to_string(), title: "Anal".to_string(), }])), tags: Arc::new(RwLock::new(vec![FilterOption { id: "https://vrporn.com/tag/passthrough/".to_string(), title: "Passthrough".to_string(), }])), stars: Arc::new(RwLock::new(vec![FilterOption { id: "https://vrporn.com/pornstars/elena-vrq/".to_string(), title: "Elena VRQ".to_string(), }])), sites: Arc::new(RwLock::new(vec![FilterOption { id: "https://vrporn.com/studio/vrsun/".to_string(), title: "VRSUN".to_string(), }])), } } #[test] fn builds_search_page_two_url() { let provider = provider(); let url = provider.build_url_for_target(&Target::Search("adriana chechik".to_string()), 2); assert_eq!( url, "https://vrporn.com/search/videos/page/2/?query=adriana+chechik" ); } #[test] fn builds_archive_page_two_url() { let provider = provider(); let url = provider.build_url_for_target( &Target::Archive("https://vrporn.com/tag/anal/".to_string()), 2, ); assert_eq!(url, "https://vrporn.com/tag/anal/page/2/"); } #[test] fn extracts_formats_from_detail_html() { let formats = VrpornProvider::extract_formats( r#" "#, "https://vrporn.com/test/", "test", ) .expect("formats should parse"); assert_eq!(formats.len(), 2); assert!(formats[0].url.contains("free_4k.mp4")); assert!(formats[1].url.contains("free_6k.mp4")); } #[test] fn detail_parser_ignores_embed_url() { let provider = provider(); let item = VideoItem::new( "test".to_string(), "Original".to_string(), "https://vrporn.com/videos/test/".to_string(), CHANNEL_ID.to_string(), String::new(), 0, ); let parsed = provider .apply_detail_video( item, r#" "#, ) .expect("detail HTML should parse"); assert_eq!(parsed.title, "Updated Title"); assert!(parsed.embed.is_none()); } }