From 27e2bcdbba312dfeb8b37a378f39617369aeeb9f Mon Sep 17 00:00:00 2001 From: Simon Date: Thu, 15 Jan 2026 19:04:28 +0000 Subject: [PATCH] fixes --- src/providers/rule34video.rs | 310 +++++++++++++++++++---------------- 1 file changed, 171 insertions(+), 139 deletions(-) diff --git a/src/providers/rule34video.rs b/src/providers/rule34video.rs index ccd8859..c7b23e0 100644 --- a/src/providers/rule34video.rs +++ b/src/providers/rule34video.rs @@ -1,203 +1,241 @@ -use crate::util::parse_abbreviated_number; use crate::DbPool; use crate::providers::Provider; use crate::util::cache::VideoCache; +use crate::util::discord::send_discord_error_report; +use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoItem}; +use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; -use std::vec; use std::time::{SystemTime, UNIX_EPOCH}; -use async_trait::async_trait; +use std::vec; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } + errors { + ParsingError(t: String) { + description("html parsing error") + display("HTML parsing error: '{}'", t) + } + } } #[derive(Debug, Clone)] pub struct Rule34videoProvider { url: String, } + impl Rule34videoProvider { pub fn new() -> Self { Rule34videoProvider { url: "https://rule34video.com".to_string(), } } + + /// Helper to safely extract a string between two delimiters + fn extract_between<'a>(content: &'a str, start_pat: &str, end_pat: &str) -> Option<&'a str> { + let start_idx = content.find(start_pat)? + start_pat.len(); + let sub = &content[start_idx..]; + let end_idx = sub.find(end_pat)?; + Some(&sub[..end_idx]) + } + async fn get( &self, cache: VideoCache, page: u8, sort: &str, - options: ServerOptions + options: ServerOptions, ) -> Result> { - let now = SystemTime::now() + let timestamp_millis = SystemTime::now() .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); + .map(|d| d.as_millis()) + .unwrap_or(0); - let timestamp_millis = now.as_millis(); // u128 - let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"]; - let sort = if expected_sorts.contains(&sort) { + let expected_sorts = vec![ + "post_date", + "video_viewed", + "rating", + "duration", + "pseudo_random", + ]; + let sort_val = if expected_sorts.contains(&sort) { sort } else { "post_date" }; - let index = format!("rule34video:{}:{}", page, sort); + let index = format!("rule34video:{}:{}", page, sort_val); - let url = format!("{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}", self.url, sort, page, timestamp_millis); - - let mut old_items: Vec = vec![]; - if !(sort == "pseudo_random") { - old_items = match cache.get(&index) { - Some((time, items)) => { - if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { - // println!("Cache hit for URL: {}", url); - return Ok(items.clone()); - } else { - items.clone() - } + if sort_val != "pseudo_random" { + if let Some((time, items)) = cache.get(&index) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.clone()); } - None => { - vec![] - } - }; + } } - let mut requester = options.requester.clone().unwrap(); - let text = requester.get(&url, None).await.unwrap(); - let video_items: Vec = self.get_video_items_from_html(text.clone()); + + let mut requester = options.requester.clone().ok_or("Requester missing")?; + let url = format!( + "{}/?mode=async&function=get_block&block_id=custom_list_videos_most_recent_videos&tag_ids=&sort_by={}&from={}&_={}", + self.url, sort_val, page, timestamp_millis + ); + + let text = requester.get(&url, None).await.unwrap_or_else(|e| { + eprintln!("Error fetching rule34video URL {}: {}", url, e); + let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!()); + "".to_string() + }); + let video_items = self.get_video_items_from_html(text); + if !video_items.is_empty() { - cache.remove(&url); - cache.insert(url.clone(), video_items.clone()); + cache.insert(index, video_items.clone()); + Ok(video_items) } else { - return Ok(old_items); + // Return empty or old items if available + Ok(cache + .get(&index) + .map(|(_, items)| items) + .unwrap_or_default()) } - Ok(video_items) } + async fn query( &self, cache: VideoCache, page: u8, query: &str, sort: &str, - options: ServerOptions + options: ServerOptions, ) -> Result> { - let now = SystemTime::now() + let timestamp_millis = SystemTime::now() .duration_since(UNIX_EPOCH) - .expect("Time went backwards"); - let timestamp_millis = now.as_millis(); // u128 - let expected_sorts = vec!["post_date", "video_viewed", "rating", "duration", "pseudo_random"]; - let sort = if expected_sorts.contains(&sort) { + .map(|d| d.as_millis()) + .unwrap_or(0); + + let expected_sorts = vec![ + "post_date", + "video_viewed", + "rating", + "duration", + "pseudo_random", + ]; + let sort_val = if expected_sorts.contains(&sort) { sort } else { "post_date" }; - let index = format!("rule34video:{}:{}:{}", page, sort, query); + let index = format!("rule34video:{}:{}:{}", page, sort_val, query); - let url = format!("{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}", self.url, query.replace(" ","-"), sort, page, page, timestamp_millis); + if let Some((time, items)) = cache.get(&index) { + if time.elapsed().unwrap_or_default().as_secs() < 300 { + return Ok(items.clone()); + } + } - // Check our Video Cache. If the result is younger than 1 hour, we return it. - let old_items = match cache.get(&index) { - Some((time, items)) => { - if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 { - return Ok(items.clone()); - } else { - let _ = cache.check().await; - return Ok(items.clone()); - } - } - None => { - vec![] - } - }; - let mut requester = options.requester.clone().unwrap(); - let text = requester.get(&url, None).await.unwrap(); - let video_items: Vec = self.get_video_items_from_html(text.clone()); - if !video_items.is_empty() { - cache.remove(&url); - cache.insert(url.clone(), video_items.clone()); - } else { - return Ok(old_items); - } + let mut requester = options.requester.clone().ok_or("Requester missing")?; + let url = format!( + "{}/search/{}/?mode=async&function=get_block&block_id=custom_list_videos_videos_list_search&tag_ids=&sort_by={}&from_videos={}&from_albums={}&_={}", + self.url, + query.replace(" ", "-"), + sort_val, + page, + page, + timestamp_millis + ); + + let text = requester.get(&url, None).await.unwrap_or_else(|e| { + eprintln!("Error fetching rule34video URL {}: {}", url, e); + let _ = send_discord_error_report(e.to_string(), None, Some(&url), None, file!(), line!(), module_path!()); + "".to_string() + }); + let video_items = self.get_video_items_from_html(text); + + if !video_items.is_empty() { + cache.insert(index, video_items.clone()); Ok(video_items) + } else { + Ok(cache + .get(&index) + .map(|(_, items)| items) + .unwrap_or_default()) + } } fn get_video_items_from_html(&self, html: String) -> Vec { if html.is_empty() { - println!("HTML is empty"); return vec![]; } - let mut items: Vec = Vec::new(); - let video_listing_content = html.split("
>()[1].split("
>()[0].to_string(); - let raw_videos = video_listing_content - .split("
>()[1..] - .to_vec(); - for video_segment in &raw_videos { - // let vid = video_segment.split("\n").collect::>()[1] - // for (index, line) in vid.iter().enumerate() { - // println!("Line {}: {}", index, line); - // } - if video_segment.contains("https://rule34video.com/images/advertisements"){ + // Safely isolate the video listing section + let video_listing = match Self::extract_between( + &html, + "id=\"custom_list_videos", + "
content, + None => return vec![], + }; + + let mut items = Vec::new(); + // Skip the first split result as it's the preamble + let raw_videos = video_listing + .split("
").collect::>()[1] - .split("<") - .collect::>()[0] + // Title extraction + let title_raw = + Self::extract_between(video_segment, "
", "<") + .unwrap_or("Unknown"); + let title = decode(title_raw.as_bytes()) + .to_string() + .unwrap_or_else(|_| title_raw.to_string()); + + // ID extraction + let id = Self::extract_between(video_segment, "https://rule34video.com/video/", "/") + .unwrap_or("0") .to_string(); - // html decode - title = decode(title.as_bytes()).to_string().unwrap_or(title); - let id = video_segment.split("https://rule34video.com/video/").collect::>()[1].split("/").collect::>()[0].to_string(); - let raw_duration = video_segment.split("
").collect::>()[1] - .split("<") - .collect::>()[0] + + // Duration extraction + let raw_duration = + Self::extract_between(video_segment, "
", "<").unwrap_or("0:00"); + let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32; + + // Views extraction + let views_segment = Self::extract_between(video_segment, "
", "<"); + let views_count_str = views_segment + .and_then(|s| s.split("").nth(1)) + .unwrap_or("0"); + let views = parse_abbreviated_number(views_count_str.trim()).unwrap_or(0); + + // Thumbnail extraction + let thumb = Self::extract_between(video_segment, "data-original=\"", "\"") + .unwrap_or("") + .to_string(); + + // URL extraction + let url = + Self::extract_between(video_segment, "").collect::>()[1].split("").collect::>()[1] - .split("<") - .collect::>()[0]).unwrap_or(0); -//https://rule34video.com/get_file/47/5e71602b7642f9b997f90c979a368c99b8aad90d89/3942000/3942353/3942353_preview.mp4/ -//https://rule34video.com/get_file/47/5e71602b7642f9b997f90c979a368c99b8aad90d89/3942000/3942353/3942353_preview.mp4/ - let thumb = video_segment.split(">()[1].split("data-original=\"").collect::>()[1] - .split("\"") - .collect::>()[0] - .to_string(); - let url = video_segment.split(">()[1] - .split("\"") - .collect::>()[0] - .to_string(); - // let preview = video_segment.split("
>()[1] - // .split("\"") - // .collect::>()[0] - // .to_string(); - - let video_item = VideoItem::new( - id, - title, - url.to_string(), - "Rule34video".to_string(), - thumb, - duration, - ) - .views(views) - // .preview(preview) - ; - - - items.push(video_item); + items.push( + VideoItem::new(id, title, url, "Rule34video".to_string(), thumb, duration) + .views(views), + ); } - return items; + items } - - } #[async_trait] @@ -205,30 +243,24 @@ impl Provider for Rule34videoProvider { async fn get_videos( &self, cache: VideoCache, - pool: DbPool, + _pool: DbPool, sort: String, query: Option, page: String, - per_page: String, + _per_page: String, options: ServerOptions, ) -> Vec { - let _ = options; - let _ = per_page; - let _ = pool; // Ignored in this implementation - let videos: std::result::Result, Error> = match query { - Some(q) => { - self.query(cache, page.parse::().unwrap_or(1), &q, &sort, options) - .await - } - None => { - self.get(cache, page.parse::().unwrap_or(1), &sort, options) - .await - } + let page_num = page.parse::().unwrap_or(1); + + let result = match query { + Some(q) => self.query(cache, page_num, &q, &sort, options).await, + None => self.get(cache, page_num, &sort, options).await, }; - match videos { + + match result { Ok(v) => v, Err(e) => { - println!("Error fetching videos: {}", e); + eprintln!("Error fetching videos: {}", e); vec![] } }