From 9172941ac6c95ec85e101aef448dd5c36cba557c Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 17 Mar 2026 01:12:52 +0000 Subject: [PATCH] fixes --- src/providers/omgxxx.rs | 273 ++++++++++++++++++++++++++++++-------- src/providers/porndish.rs | 18 ++- src/proxies/porndish.rs | 6 +- 3 files changed, 235 insertions(+), 62 deletions(-) diff --git a/src/providers/omgxxx.rs b/src/providers/omgxxx.rs index a5a3be5..181031b 100644 --- a/src/providers/omgxxx.rs +++ b/src/providers/omgxxx.rs @@ -9,6 +9,7 @@ use crate::{status::*, util}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; +use scraper::{Html, Selector}; use std::sync::{Arc, RwLock}; use std::thread; use std::vec; @@ -586,6 +587,28 @@ impl OmgxxxProvider { return None; } + fn extract_tag_entries(&self, video_segment: &str) -> Vec<(String, String)> { + let fragment = Html::parse_fragment(video_segment); + let selector = Selector::parse("div.models a").expect("valid omgxxx models selector"); + fragment + .select(&selector) + .filter_map(|anchor| { + let href = anchor.value().attr("href")?.to_string(); + let title = anchor + .text() + .collect::>() + .join(" ") + .split_whitespace() + .collect::>() + .join(" "); + if title.is_empty() { + return None; + } + Some((href, title)) + }) + .collect() + } + fn get_video_items_from_html(&self, html: String) -> Vec { if html.is_empty() { println!("HTML is empty"); @@ -751,64 +774,50 @@ impl OmgxxxProvider { let site_id = self .get_site_id_from_name(site_name) .unwrap_or("".to_string()); - let mut tags = match video_segment.contains("class=\"models\">") { - true => video_segment - .split("class=\"models\">") - .collect::>() - .get(1) - .copied() - .unwrap_or_default() - .split("") - .collect::>() - .get(0) - .copied() - .unwrap_or_default() - .split("href=\"") - .collect::>()[1..] - .into_iter() - .map(|s| { + let mut tags = Vec::new(); + for (href, tag_title) in self.extract_tag_entries(video_segment) { + if href.contains("/models/") { + let model_id = href + .split("/models/") + .nth(1) + .unwrap_or_default() + .split('/') + .next() + .unwrap_or_default() + .to_string(); + if !model_id.is_empty() { Self::push_unique( &self.stars, FilterOption { - id: s - .split("/") - .collect::>() - .get(4) - .copied() - .unwrap_or_default() - .to_string(), - title: s - .split(">") - .collect::>() - .get(1) - .copied() - .unwrap_or_default() - .split("<") - .collect::>() - .get(0) - .copied() - .unwrap_or_default() - .trim() - .to_string(), + id: model_id, + title: tag_title.clone(), }, ); - s.split(">") - .collect::>() - .get(1) - .copied() - .unwrap_or_default() - .split("<") - .collect::>() - .get(0) - .copied() - .unwrap_or_default() - .trim() - .to_string() - }) - .collect::>() - .to_vec(), - false => vec![], - }; + } + } + if href.contains("/sites/") { + let site_id = href + .split("/sites/") + .nth(1) + .unwrap_or_default() + .split('/') + .next() + .unwrap_or_default() + .to_string(); + if !site_id.is_empty() { + Self::push_unique( + &self.sites, + FilterOption { + id: site_id, + title: tag_title.clone(), + }, + ); + } + } + if !tags.iter().any(|existing| existing == &tag_title) { + tags.push(tag_title); + } + } if !site_id.is_empty() { Self::push_unique( &self.sites, @@ -817,7 +826,9 @@ impl OmgxxxProvider { title: site_name.to_string(), }, ); - tags.push(site_name.to_string()); + if !tags.iter().any(|existing| existing == site_name) { + tags.push(site_name.to_string()); + } } let video_item = VideoItem::new( @@ -837,6 +848,160 @@ impl OmgxxxProvider { } } +#[cfg(test)] +mod tests { + use super::*; + + fn test_provider() -> OmgxxxProvider { + OmgxxxProvider { + url: "https://www.omg.xxx".to_string(), + sites: Arc::new(RwLock::new(vec![FilterOption { + id: "clubsweethearts".to_string(), + title: "Club Sweethearts".to_string(), + }])), + networks: Arc::new(RwLock::new(vec![])), + stars: Arc::new(RwLock::new(vec![])), + } + } + + #[test] + fn parses_model_and_site_tags_without_empty_strings() { + let provider = test_provider(); + let html = r##" +
+ +
+ "## + .to_string(); + + let items = provider.get_video_items_from_html(html); + assert_eq!(items.len(), 1); + assert_eq!( + items[0].tags, + Some(vec![ + "Club Sweethearts".to_string(), + "Oliver Trunk".to_string(), + "Sara Bork".to_string() + ]) + ); + assert!( + items[0] + .tags + .as_ref() + .unwrap() + .iter() + .all(|tag| !tag.is_empty()) + ); + + let stars = provider.stars.read().unwrap().clone(); + assert!( + stars + .iter() + .any(|tag| tag.id == "oliver-trunk" && tag.title == "Oliver Trunk") + ); + assert!( + stars + .iter() + .any(|tag| tag.id == "sara-bork" && tag.title == "Sara Bork") + ); + } + + #[test] + fn parses_live_item_shape_with_channel_and_pornstar_info() { + let provider = test_provider(); + let html = r##" + + "## + .to_string(); + + let items = provider.get_video_items_from_html(html); + assert_eq!(items.len(), 1); + assert_eq!( + items[0].tags, + Some(vec![ + "Family Sinners".to_string(), + "Vienna Rose".to_string(), + "Mark Wood".to_string() + ]) + ); + + let sites = provider.sites.read().unwrap().clone(); + assert!( + sites + .iter() + .any(|tag| tag.id == "family-sinners" && tag.title == "Family Sinners") + ); + + let stars = provider.stars.read().unwrap().clone(); + assert!( + stars + .iter() + .any(|tag| tag.id == "vienna-rose" && tag.title == "Vienna Rose") + ); + assert!( + stars + .iter() + .any(|tag| tag.id == "mark-wood" && tag.title == "Mark Wood") + ); + } +} + #[async_trait] impl Provider for OmgxxxProvider { async fn get_videos( diff --git a/src/providers/porndish.rs b/src/providers/porndish.rs index decddbd..6c7b399 100644 --- a/src/providers/porndish.rs +++ b/src/providers/porndish.rs @@ -438,7 +438,8 @@ sys.stdout.buffer.write(response.content) uploaders: Arc>>, ) -> Result<()> { let link_selector = Self::selector("a[href]")?; - let article_selector = Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy, article.post")?; + let article_selector = + Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy, article.post")?; let pages = vec![ format!("{base_url}/"), format!("{base_url}/page/2/"), @@ -643,7 +644,8 @@ sys.stdout.buffer.write(response.content) let source_selector = Self::selector(".entry-categories a[href]")?; let views_selector = Self::selector(".entry-views strong")?; let time_selector = Self::selector("time.entry-date[datetime]")?; - let author_selector = Self::selector(".entry-author a[href] strong, .entry-author a[href]")?; + let author_selector = + Self::selector(".entry-author a[href] strong, .entry-author a[href]")?; let mut items = Vec::new(); @@ -859,8 +861,12 @@ sys.stdout.buffer.write(response.content) .trim() .to_string(); - if stream_url.is_empty() || !(stream_url.starts_with("https://") || stream_url.starts_with("http://")) { - return Err(Error::from("vidara stream missing streaming_url".to_string())); + if stream_url.is_empty() + || !(stream_url.starts_with("https://") || stream_url.starts_with("http://")) + { + return Err(Error::from( + "vidara stream missing streaming_url".to_string(), + )); } Ok(stream_url) @@ -1255,7 +1261,9 @@ impl Provider for PorndishProvider { let page = page.parse::().unwrap_or(1); let videos = match query { - Some(query) if !query.trim().is_empty() => self.query(cache, page, &query, options).await, + Some(query) if !query.trim().is_empty() => { + self.query(cache, page, &query, options).await + } _ => self.get(cache, page, &sort, options).await, }; diff --git a/src/proxies/porndish.rs b/src/proxies/porndish.rs index 04ec2c2..3b02b9c 100644 --- a/src/proxies/porndish.rs +++ b/src/proxies/porndish.rs @@ -197,8 +197,7 @@ if location: } fn extract_iframe_fragments(html: &str) -> Vec { - let Some(regex) = - Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#) + let Some(regex) = Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#) else { return vec![]; }; @@ -230,7 +229,8 @@ if location: return None; } - let html = Self::fetch_with_curl_cffi(iframe_url, Some("https://www.porndish.com/")).await?; + let html = + Self::fetch_with_curl_cffi(iframe_url, Some("https://www.porndish.com/")).await?; let pass_regex = Self::regex(r#"\$\.get\(\s*['"](/pass_md5/[^'"]+)['"]"#)?; let path = pass_regex .captures(&html)