use crate::DbPool; use crate::api::ClientVersion; use crate::providers::{Provider, report_provider_error, report_provider_error_background}; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoItem}; use crate::{status::*, util}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; use scraper::{Html, Selector}; use std::sync::{Arc, RwLock}; use std::thread; use std::vec; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "studio-network", tags: &["studio", "networks", "models"], }; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } } #[derive(Debug, Clone)] pub struct OmgxxxProvider { url: String, sites: Arc>>, networks: Arc>>, stars: Arc>>, } impl OmgxxxProvider { pub fn new() -> Self { let provider = OmgxxxProvider { url: "https://www.omg.xxx".to_string(), sites: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), networks: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), stars: Arc::new(RwLock::new(vec![FilterOption { id: "all".to_string(), title: "All".to_string(), }])), }; // Kick off the background load but return immediately provider.spawn_initial_load(); provider } fn spawn_initial_load(&self) { let url = self.url.clone(); let sites = Arc::clone(&self.sites); let networks = Arc::clone(&self.networks); let stars = Arc::clone(&self.stars); thread::spawn(move || { // Create a tiny runtime just for these async tasks let rt = match tokio::runtime::Builder::new_current_thread() .enable_all() .build() { Ok(rt) => rt, Err(e) => { report_provider_error_background( "omgxxx", "spawn_initial_load.runtime_build", &e.to_string(), ); return; } }; rt.block_on(async move { // If you have a streaming sites loader, call it here too if let Err(e) = Self::load_sites(&url, sites).await { eprintln!("load_sites_into failed: {e}"); } if let Err(e) = Self::load_networks(&url, networks).await { eprintln!("load_networks failed: {e}"); } if let Err(e) = Self::load_stars(&url, stars).await { eprintln!("load_stars failed: {e}"); } }); }); } async fn load_stars(base_url: &str, stars: Arc>>) -> Result<()> { let mut requester = util::requester::Requester::new(); for page in [1..10].into_iter().flatten() { let text = match requester .get( format!("{}/models/total-videos/{}/?gender_id=0", &base_url, page).as_str(), None, ) .await { Ok(text) => text, Err(e) => { report_provider_error_background( "omgxxx", "load_stars.request", &format!("url={base_url}; page={page}; error={e}"), ); break; } }; if text.contains("404 Not Found") || text.is_empty() { break; } let stars_div = text .split("
") .collect::>() .last() .copied() .unwrap_or_default() .split("custom_list_models_models_list_pagination") .collect::>() .get(0) .copied() .unwrap_or_default(); for stars_element in stars_div.split(">()[1..].to_vec() { let star_url = stars_element .split("href=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default(); let star_id = star_url .split("/") .collect::>() .get(4) .copied() .unwrap_or_default() .to_string(); let star_name = stars_element .split("") .collect::>() .get(1) .copied() .unwrap_or_default() .split("<") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(); Self::push_unique( &stars, FilterOption { id: star_id, title: star_name, }, ); } } return Ok(()); } async fn load_sites(base_url: &str, sites: Arc>>) -> Result<()> { let mut requester = util::requester::Requester::new(); let mut page = 0; loop { page += 1; let text = requester .get(format!("{}/sites/{}/", &base_url, page).as_str(), None) .await; let text = match text { Ok(text) => text, Err(e) => { report_provider_error_background( "omgxxx", "load_sites.request", &format!("url={base_url}; page={page}; error={e}"), ); break; } }; if text.contains("404 Not Found") || text.is_empty() { break; } let sites_div = text .split("id=\"list_content_sources_sponsors_list_items\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("class=\"pagination\"") .collect::>() .get(0) .copied() .unwrap_or_default(); for sites_element in sites_div.split("class=\"headline\"").collect::>()[1..].to_vec() { let site_url = sites_element .split("href=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default(); let site_id = site_url .split("/") .collect::>() .get(4) .copied() .unwrap_or_default() .to_string(); let site_name = sites_element .split("

") .collect::>() .get(1) .copied() .unwrap_or_default() .split("<") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(); Self::push_unique( &sites, FilterOption { id: site_id, title: site_name, }, ); } } return Ok(()); } async fn load_networks(base_url: &str, networks: Arc>>) -> Result<()> { let mut requester = util::requester::Requester::new(); let text = match requester.get(&base_url, None).await { Ok(text) => text, Err(e) => { report_provider_error_background( "omgxxx", "load_networks.request", &format!("url={base_url}; error={e}"), ); return Ok(()); } }; let networks_div = text .split("class=\"sites__list\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("

") .collect::>() .get(0) .copied() .unwrap_or_default(); for network_element in networks_div.split("sites__item").collect::>()[1..].to_vec() { if network_element.contains("sites__all") { continue; } let network_url = network_element .split("href=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default(); let network_id = network_url .split("/") .collect::>() .get(4) .copied() .unwrap_or_default() .to_string(); let network_name = network_element .split(">") .collect::>() .get(1) .copied() .unwrap_or_default() .split("<") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(); Self::push_unique( &networks, FilterOption { id: network_id, title: network_name, }, ); } return Ok(()); } // Push one item with minimal lock time and dedup by id fn push_unique(target: &Arc>>, item: FilterOption) { if let Ok(mut vec) = target.write() { if !vec.iter().any(|x| x.id == item.id) { vec.push(item); // Optional: keep it sorted for nicer UX // vec.sort_by(|a,b| a.title.cmp(&b.title)); } } } fn build_channel(&self, clientversion: ClientVersion) -> Channel { let _ = clientversion; let sites: Vec = self .sites .read() .map(|g| g.clone()) // or: .map(|g| g.to_vec()) .unwrap_or_default(); // or: .unwrap_or_else(|_| Vec::new()) let networks: Vec = self .networks .read() .map(|g| g.clone()) // or: .map(|g| g.to_vec()) .unwrap_or_default(); // or: .unwrap_or_else(|_| Vec::new()) let stars: Vec = self .stars .read() .map(|g| g.clone()) // or: .map(|g| g.to_vec()) .unwrap_or_default(); // or: .unwrap_or_else(|_| Vec::new()) Channel { id: "omgxxx".to_string(), name: "OMG XXX".to_string(), description: "OMG look at that Collection!".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=www.omg.xxx".to_string(), status: "active".to_string(), categories: vec![], options: vec![ ChannelOption { id: "sort".to_string(), title: "Sort".to_string(), description: "Sort the Videos".to_string(), systemImage: "list.number".to_string(), colorName: "blue".to_string(), options: vec![ FilterOption { id: "latest-updates".into(), title: "Latest".into(), }, FilterOption { id: "most-popular".into(), title: "Most Viewed".into(), }, FilterOption { id: "top-rated".into(), title: "Top Rated".into(), }, ], multiSelect: false, }, ChannelOption { id: "sites".to_string(), title: "Sites".to_string(), description: "Filter for different Sites".to_string(), systemImage: "rectangle.stack".to_string(), colorName: "green".to_string(), options: sites, multiSelect: false, }, ChannelOption { id: "networks".to_string(), title: "Networks".to_string(), description: "Filter for different Networks".to_string(), systemImage: "list.dash".to_string(), colorName: "purple".to_string(), options: networks, multiSelect: false, }, ChannelOption { id: "stars".to_string(), title: "Stars".to_string(), description: "Filter for different Pornstars".to_string(), systemImage: "star.fill".to_string(), colorName: "yellow".to_string(), options: stars, multiSelect: false, }, ], nsfw: true, cacheDuration: None, } } async fn get( &self, cache: VideoCache, page: u8, sort: &str, options: ServerOptions, ) -> Result> { let mut sort_string: String = match sort { "top-rated" => "top-rated".to_string(), "most-popular" => "most-popular".to_string(), _ => "latest-updates".to_string(), }; let alt_sort_string: String = match sort { "top-rated" => "/top-rated".to_string(), "most-popular" => "/most-popular".to_string(), _ => "".to_string(), }; if let Some(network) = options.network.as_deref() { if !network.is_empty() && network != "all" { sort_string = format!("networks/{}{}", network, alt_sort_string); } } if let Some(site) = options.sites.as_deref() { if !site.is_empty() && site != "all" { sort_string = format!("sites/{}{}", site, alt_sort_string); } } if let Some(star) = options.stars.as_deref() { if !star.is_empty() && star != "all" { sort_string = format!("models/{}{}", star, alt_sort_string); } } let video_url = format!("{}/{}/{}/", self.url, sort_string, page); let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } else { items.clone() } } None => { vec![] } }; let mut requester = crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); let text = match requester.get(&video_url, None).await { Ok(text) => text, Err(e) => { report_provider_error( "omgxxx", "get.request", &format!("url={video_url}; error={e}"), ) .await; return Ok(old_items); } }; let video_items: Vec = self.get_video_items_from_html(text.clone()); if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } async fn query( &self, cache: VideoCache, page: u8, query: &str, options: ServerOptions, ) -> Result> { let mut search_type = "search"; let mut search_string = query.to_string().to_ascii_lowercase().trim().to_string(); match self.stars.read() { Ok(stars) => { if let Some(star) = stars .iter() .find(|s| s.title.to_ascii_lowercase() == search_string) { search_type = "models"; search_string = star.id.clone(); } } Err(e) => { report_provider_error_background("omgxxx", "query.stars_read", &e.to_string()); } } match self.sites.read() { Ok(sites) => { if let Some(site) = sites .iter() .find(|s| s.title.to_ascii_lowercase() == search_string) { search_type = "sites"; search_string = site.id.clone(); } } Err(e) => { report_provider_error_background("omgxxx", "query.sites_read", &e.to_string()); } } let mut video_url = format!("{}/{}/{}/{}/", self.url, search_type, search_string, page); video_url = video_url.replace(" ", "+"); // Check our Video Cache. If the result is younger than 1 hour, we return it. let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { return Ok(items.clone()); } else { let _ = cache.check().await; return Ok(items.clone()); } } None => { vec![] } }; let mut requester = crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); let text = match requester.get(&video_url, None).await { Ok(text) => text, Err(e) => { report_provider_error( "omgxxx", "query.request", &format!("url={video_url}; error={e}"), ) .await; return Ok(old_items); } }; let video_items: Vec = self.get_video_items_from_html(text.clone()); if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } fn get_site_id_from_name(&self, site_name: &str) -> Option { // site_name.to_lowercase().replace(" ", "") let sites_guard = match self.sites.read() { Ok(guard) => guard, Err(e) => { report_provider_error_background( "omgxxx", "get_site_id_from_name.sites_read", &e.to_string(), ); return None; } }; for site in sites_guard.iter() { if site .title .to_lowercase() .replace(" ", "") .replace(".com", "") == site_name.to_lowercase().replace(" ", "") { return Some(site.id.clone()); } } return None; } fn extract_tag_entries(&self, video_segment: &str) -> Vec<(String, String)> { let fragment = Html::parse_fragment(video_segment); let selector = Selector::parse("div.models a").expect("valid omgxxx models selector"); fragment .select(&selector) .filter_map(|anchor| { let href = anchor.value().attr("href")?.to_string(); let title = anchor .text() .collect::>() .join(" ") .split_whitespace() .collect::>() .join(" "); if title.is_empty() { return None; } Some((href, title)) }) .collect() } fn get_video_items_from_html(&self, html: String) -> Vec { if html.is_empty() { println!("HTML is empty"); return vec![]; } let mut items: Vec = Vec::new(); if !html.contains("class=\"item\"") { return items; } let raw_videos = html .split("videos_list_pagination") .collect::>() .get(0) .copied() .unwrap_or_default() .split(" class=\"pagination\" ") .collect::>() .get(0) .copied() .unwrap_or_default() .split("class=\"list-videos\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("class=\"item\"") .collect::>()[1..] .to_vec(); for video_segment in &raw_videos { // let vid = video_segment.split("\n").collect::>(); // for (index, line) in vid.iter().enumerate() { // println!("Line {}: {}", index, line); // } let video_url: String = video_segment .split(">() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(); let mut title = video_segment .split(" title=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(); // html decode title = decode(title.as_bytes()).to_string().unwrap_or(title); let id = video_url .split("/") .collect::>() .get(4) .copied() .unwrap_or_default() .to_string(); let thumb = match video_segment .split("img loading") .collect::>() .get(1) .copied() .unwrap_or_default() .contains("data-src=\"") { true => video_segment .split("img loading") .collect::>() .get(1) .copied() .unwrap_or_default() .split("data-src=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(), false => video_segment .split("img loading") .collect::>() .get(1) .copied() .unwrap_or_default() .split("data-original=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(), }; let raw_duration = video_segment .split("") .collect::>() .get(1) .copied() .unwrap_or_default() .split("<") .collect::>() .get(0) .copied() .unwrap_or_default() .split(" ") .collect::>() .last() .unwrap_or(&"") .to_string(); let duration = parse_time_to_seconds(raw_duration.as_str()).unwrap_or(0) as u32; let views = parse_abbreviated_number( video_segment .split("
") .collect::>() .get(1) .copied() .unwrap_or_default() .split("<") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string() .as_str(), ) .unwrap_or(0) as u32; let preview = video_segment .split("data-preview=\"") .collect::>() .get(1) .copied() .unwrap_or_default() .split("\"") .collect::>() .get(0) .copied() .unwrap_or_default() .to_string(); let site_name = title .split("]") .collect::>() .first() .unwrap_or(&"") .trim_start_matches("["); let site_id = self .get_site_id_from_name(site_name) .unwrap_or("".to_string()); let mut tags = Vec::new(); for (href, tag_title) in self.extract_tag_entries(video_segment) { if href.contains("/models/") { let model_id = href .split("/models/") .nth(1) .unwrap_or_default() .split('/') .next() .unwrap_or_default() .to_string(); if !model_id.is_empty() { Self::push_unique( &self.stars, FilterOption { id: model_id, title: tag_title.clone(), }, ); } } if href.contains("/sites/") { let site_id = href .split("/sites/") .nth(1) .unwrap_or_default() .split('/') .next() .unwrap_or_default() .to_string(); if !site_id.is_empty() { Self::push_unique( &self.sites, FilterOption { id: site_id, title: tag_title.clone(), }, ); } } if !tags.iter().any(|existing| existing == &tag_title) { tags.push(tag_title); } } if !site_id.is_empty() { Self::push_unique( &self.sites, FilterOption { id: site_id, title: site_name.to_string(), }, ); if !tags.iter().any(|existing| existing == site_name) { tags.push(site_name.to_string()); } } let video_item = VideoItem::new( id, title, video_url.to_string(), "omgxxx".to_string(), thumb, duration, ) .views(views) .preview(preview) .tags(tags); items.push(video_item); } return items; } } #[cfg(test)] mod tests { use super::*; fn test_provider() -> OmgxxxProvider { OmgxxxProvider { url: "https://www.omg.xxx".to_string(), sites: Arc::new(RwLock::new(vec![FilterOption { id: "clubsweethearts".to_string(), title: "Club Sweethearts".to_string(), }])), networks: Arc::new(RwLock::new(vec![])), stars: Arc::new(RwLock::new(vec![])), } } #[test] fn parses_model_and_site_tags_without_empty_strings() { let provider = test_provider(); let html = r##" "## .to_string(); let items = provider.get_video_items_from_html(html); assert_eq!(items.len(), 1); assert_eq!( items[0].tags, Some(vec![ "Club Sweethearts".to_string(), "Oliver Trunk".to_string(), "Sara Bork".to_string() ]) ); assert!( items[0] .tags .as_ref() .unwrap() .iter() .all(|tag| !tag.is_empty()) ); let stars = provider.stars.read().unwrap().clone(); assert!( stars .iter() .any(|tag| tag.id == "oliver-trunk" && tag.title == "Oliver Trunk") ); assert!( stars .iter() .any(|tag| tag.id == "sara-bork" && tag.title == "Sara Bork") ); } #[test] fn parses_live_item_shape_with_channel_and_pornstar_info() { let provider = test_provider(); let html = r##" "## .to_string(); let items = provider.get_video_items_from_html(html); assert_eq!(items.len(), 1); assert_eq!( items[0].tags, Some(vec![ "Family Sinners".to_string(), "Vienna Rose".to_string(), "Mark Wood".to_string() ]) ); let sites = provider.sites.read().unwrap().clone(); assert!( sites .iter() .any(|tag| tag.id == "family-sinners" && tag.title == "Family Sinners") ); let stars = provider.stars.read().unwrap().clone(); assert!( stars .iter() .any(|tag| tag.id == "vienna-rose" && tag.title == "Vienna Rose") ); assert!( stars .iter() .any(|tag| tag.id == "mark-wood" && tag.title == "Mark Wood") ); } } #[async_trait] impl Provider for OmgxxxProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, sort: String, query: Option, page: String, per_page: String, options: ServerOptions, ) -> Vec { let _ = per_page; let _ = pool; let videos: std::result::Result, Error> = match query { Some(q) => { self.query(cache, page.parse::().unwrap_or(1), &q, options) .await } None => { self.get(cache, page.parse::().unwrap_or(1), &sort, options) .await } }; match videos { Ok(v) => v, Err(e) => { println!("Error fetching videos: {}", e); vec![] } } } fn get_channel(&self, clientversion: ClientVersion) -> Option { Some(self.build_channel(clientversion)) } }