diff --git a/Cargo.toml b/Cargo.toml index 995660f..925495c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ async-trait = "0.1" regex = "1.12.2" titlecase = "3.6.0" dashmap = "6.1.0" +lru = "0.16.3" [lints.rust] unexpected_cfgs = "allow" diff --git a/src/providers/mod.rs b/src/providers/mod.rs index fd855ce..30d36f1 100644 --- a/src/providers/mod.rs +++ b/src/providers/mod.rs @@ -59,6 +59,7 @@ pub static ALL_PROVIDERS: Lazy> = Lazy::new(| m.insert("pimpbunny", Arc::new(pimpbunny::PimpbunnyProvider::new()) as DynProvider); m.insert("javtiful", Arc::new(javtiful::JavtifulProvider::new()) as DynProvider); m.insert("hypnotube", Arc::new(hypnotube::HypnotubeProvider::new()) as DynProvider); + m.insert("perverzija", Arc::new(perverzija::PerverzijaProvider::new()) as DynProvider); // add more here as you migrate them m }); diff --git a/src/providers/pornzog.rs b/src/providers/pornzog.rs index 166f6bc..7c23ef6 100644 --- a/src/providers/pornzog.rs +++ b/src/providers/pornzog.rs @@ -1,6 +1,7 @@ use crate::DbPool; use crate::providers::Provider; use crate::util::cache::VideoCache; +use crate::util::discord::{format_error_chain, send_discord_error_report}; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoItem}; use error_chain::error_chain; @@ -19,12 +20,14 @@ error_chain! { pub struct PornzogProvider { url: String, } + impl PornzogProvider { pub fn new() -> Self { PornzogProvider { url: "https://pornzog.com".to_string(), } } + async fn query( &self, cache: VideoCache, @@ -47,7 +50,7 @@ impl PornzogProvider { search_params.push(format!("{}", &sort_string)); let video_url = format!("{}/search/?{}", self.url, search_params.join("&")); - // Check our Video Cache. If the result is younger than 1 hour, we return it. + let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { @@ -57,14 +60,18 @@ impl PornzogProvider { return Ok(items.clone()); } } - None => { - vec![] - } + None => vec![], }; - let mut requester = options.requester.clone().unwrap(); - let text = requester.get(&video_url, None).await.unwrap(); + // SAFE: Check if requester exists instead of unwrap() + let mut requester = match options.requester.clone() { + Some(r) => r, + None => return Ok(old_items), + }; + + let text = requester.get(&video_url, None).await.map_err(|e| format!("{}", e))?; let video_items: Vec = self.get_video_items_from_html(text.clone()); + if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); @@ -78,79 +85,72 @@ impl PornzogProvider { if html.is_empty() { return vec![]; } + let mut items: Vec = Vec::new(); - let raw_videos = html.split("class=\"paginator\"").collect::>()[0] - .split("class=\"thumb-video ") - .collect::>()[1..] - .to_vec(); - for video_segment in &raw_videos { - // let vid = video_segment.split("\n").collect::>(); - // for (index, line) in vid.iter().enumerate() { - // println!("Line {}: {}", index, line); - // } - let mut video_url: String = video_segment.split("href=\"").collect::>()[1] - .split("\"") - .collect::>()[0] - .to_string(); - if video_url.starts_with("/") { - video_url = format!("{}{}", self.url, video_url); - } - let mut title = video_segment.split("alt=\"").collect::>()[1] - .split("\"") - .collect::>()[0] - .to_string(); - // html decode - title = decode(title.as_bytes()).to_string().unwrap_or(title); - let id = video_url.split("/").collect::>()[4].to_string(); - let thumb = format!( - "{}", - video_segment.split(">()[1] - .split("data-original=\"") - .collect::>()[1] - .split("\"") - .collect::>()[0] - .to_string() - ); - let raw_duration = video_segment - .split("class=\"duration\">") - .collect::>()[1] - .split("<") - .collect::>()[0] - .to_string(); - let duration = parse_time_to_seconds(raw_duration.as_str()).unwrap_or(0) as u32; - // let uploader = video_segment.split("class=\"source\">").collect::>()[1] - // .split(">").collect::>()[1] - // .split("<").collect::>()[0] - // .to_string(); - - let tags = video_segment.split("class=\"tags\"").collect::>()[1] - .split("

") - .collect::>()[0] - .split(">()[1..] - .iter() - .map(|el| { - el.split(">").collect::>()[1] - .split("<") - .collect::>()[0] - .to_string() - }) - .collect::>(); - - let video_item = VideoItem::new( - id, - title, - video_url.to_string(), - "pornzog".to_string(), - thumb, - duration, - ) - // .uploader(uploader) - .tags(tags); - items.push(video_item); + // Helper for safe splitting: returns Option<&str> + fn get_part<'a>(input: &'a str, separator: &str, index: usize) -> Option<&'a str> { + input.split(separator).nth(index) } - return items; + + // Split HTML safely + let sections: Vec<&str> = html.split("class=\"paginator\"").collect(); + let body = match sections.get(0) { + Some(s) => s, + None => return vec![], + }; + + let raw_videos: Vec<&str> = body.split("class=\"thumb-video ").skip(1).collect(); + + for (idx, video_segment) in raw_videos.iter().enumerate() { + // Attempt to parse each item. If one fails, we log it and continue to the next + // instead of crashing the whole request. + let result: Option = (|| { + let mut video_url = get_part(video_segment, "href=\"", 1)? + .split("\"") + .next()? + .to_string(); + + if video_url.starts_with("/") { + video_url = format!("{}{}", self.url, video_url); + } + + let title_raw = get_part(video_segment, "alt=\"", 1)?.split("\"").next()?; + let title = decode(title_raw.as_bytes()).to_string().unwrap_or(title_raw.to_string()); + + // The ID is the 5th element in a "/" split: e.g., "", "video", "123", "title" + let id = video_url.split("/").nth(4)?.to_string(); + + let thumb = get_part(video_segment, "data-original=\"", 1)? + .split("\"") + .next()? + .to_string(); + + let raw_duration = get_part(video_segment, "class=\"duration\">", 1)? + .split("<") + .next()?; + let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32; + let tags_section = get_part(video_segment, "class=\"tags\"", 1)? + .split("

") + .next()?; + + let tags = tags_section.split("
").nth(1)?.split("<").next()?; + Some(name.to_string()) + }) + .collect::>(); + + Some(VideoItem::new(id, title, video_url, "pornzog".to_string(), thumb, duration).tags(tags)) + })(); + + match result { + Some(item) => items.push(item), + None => eprintln!("Warning: Failed to parse video item at index {}", idx), + } + } + items } } @@ -168,21 +168,42 @@ impl Provider for PornzogProvider { ) -> Vec { let _ = per_page; let _ = pool; - let videos: std::result::Result, Error> = self - .query( - cache, - page.parse::().unwrap_or(1), - query.unwrap_or("".to_string()).as_str(), - sort, - options, - ) - .await; - match videos { + + let page_num = page.parse::().unwrap_or(1); + let query_str = query.unwrap_or_default(); + + match self.query(cache, page_num, &query_str, sort, options).await { Ok(v) => v, Err(e) => { - println!("Error fetching videos: {}", e); + eprintln!("Error fetching videos from Pornzog: {}", e); + + // 1. Create a collection of owned data so we don't hold references to `e` + let mut error_reports = Vec::new(); + + // Iterating through the error chain to collect data into owned Strings + for cause in e.iter().skip(1) { + error_reports.push(( + cause.to_string(), // Title + format_error_chain(cause), // Description/Chain + format!("caused by: {}", cause) // Message + )); + } + + // 2. Now that we aren't holding any `&dyn StdError`, we can safely .await + for (title, chain_str, msg) in error_reports { + let _ = send_discord_error_report( + title, + Some(chain_str), + Some("Pornzog Provider"), + Some(&msg), + file!(), + line!(), + module_path!(), + ).await; + } + vec![] } } } -} +} \ No newline at end of file