use crate::api::ClientVersion; use crate::providers::Provider; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::discord::{format_error_chain, send_discord_error_report}; use crate::util::requester::Requester; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use crate::{DbPool, db}; use async_trait::async_trait; use error_chain::error_chain; use futures::future::join_all; use htmlentity::entity::{ICodedDataTrait, decode}; use std::sync::{Arc, RwLock}; use std::vec; use titlecase::Titlecase; use wreq::Version; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "hentai-animation", tags: &["hentai", "anime", "curated"], }; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); Json(serde_json::Error); } errors { Parse(msg: String) { description("parse error") display("parse error: {}", msg) } } } #[derive(Debug, Clone)] pub struct HentaihavenProvider { url: String, categories: Arc>>, } impl HentaihavenProvider { pub fn new() -> Self { let provider = Self { url: "https://hentaihaven.xxx".to_string(), categories: Arc::new(RwLock::new(vec![])), }; provider } fn build_channel(&self, clientversion: ClientVersion) -> Channel { let _ = clientversion; Channel { id: "hentaihaven".to_string(), name: "Hentai Haven".to_string(), description: "Watch Free Hentai Videos HD!".to_string(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=hentaihaven.xxx".to_string(), status: "active".to_string(), categories: self .categories .read() .map(|categories| categories.iter().map(|c| c.title.clone()).collect()) .unwrap_or_else(|e| { crate::providers::report_provider_error_background( "hentaihaven", "build_channel.categories_read", &e.to_string(), ); vec![] }), options: vec![], nsfw: true, cacheDuration: None, } } fn push_unique(target: &Arc>>, item: FilterOption) { if let Ok(mut vec) = target.write() { if !vec.iter().any(|x| x.id == item.id) { vec.push(item); } } } fn has_playable_formats(item: &VideoItem) -> bool { item.formats .as_ref() .is_some_and(|formats| formats.iter().any(|format| !format.url.trim().is_empty())) } fn decode_cached_video(cached: &str) -> Option { let item = VideoItem::from(cached.to_string()).ok()?; Self::has_playable_formats(&item).then_some(item) } async fn get( &self, cache: VideoCache, page: u8, sort: &str, options: ServerOptions, pool: DbPool, ) -> Result> { let _ = sort; let video_url = format!("{}/hentai/page/{}/", self.url, page); let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 * 24 { return Ok(items.clone()); } else { items.clone() } } None => { vec![] } }; let mut requester = crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); let text = match requester.get(&video_url, Some(Version::HTTP_2)).await { Ok(text) => text, Err(e) => { crate::providers::report_provider_error( "hentaihaven", "get.request", &format!("url={video_url}; error={e}"), ) .await; return Ok(old_items); } }; let video_items: Vec = self .get_video_items_from_html(text.clone(), &mut requester, pool.clone()) .await; if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } async fn query( &self, cache: VideoCache, page: u8, query: &str, options: ServerOptions, pool: DbPool, ) -> Result> { let video_url = format!("{}/?s={}", self.url, query.replace(" ", "+"),); // Check our Video Cache. If the result is younger than 1 hour, we return it. let old_items = match cache.get(&video_url) { Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 * 24 { return Ok(items.clone()); } else { let _ = cache.check().await; return Ok(items.clone()); } } None => { vec![] } }; let mut requester = crate::providers::requester_or_default(&options, module_path!(), "missing_requester"); let text = match requester.get(&video_url, Some(Version::HTTP_2)).await { Ok(text) => text, Err(e) => { crate::providers::report_provider_error( "hentaihaven", "query.request", &format!("url={video_url}; error={e}"), ) .await; return Ok(old_items); } }; if page > 1 { return Ok(vec![]); } let video_items: Vec = self .get_video_items_from_html_search(text.clone(), &mut requester, pool) .await; if !video_items.is_empty() { cache.remove(&video_url); cache.insert(video_url.clone(), video_items.clone()); } else { return Ok(old_items); } Ok(video_items) } async fn get_video_items_from_html( &self, html: String, requester: &mut Requester, pool: DbPool, ) -> Vec { if html.is_empty() || html.contains("404 Not Found") { return vec![]; } let block = match html .split("previouspostslink") .next() .and_then(|s| s.split("vraven_manga_list").nth(1)) { Some(b) => b, None => { eprint!("Hentai Haven Provider: Failed to get block from html"); let e = Error::from(ErrorKind::Parse("html".into())); send_discord_error_report( e.to_string(), Some(format_error_chain(&e)), Some("Hentai Haven Provider"), Some(&format!("Failed to get block from html:\n```{html}\n```")), file!(), line!(), module_path!(), ) .await; return vec![]; } }; let futures = block .split("id=\"manga-item-") .skip(1) .map(|el| self.get_video_item(el.to_string(), pool.clone(), requester.clone())); join_all(futures) .await .into_iter() .inspect(|r| { if let Err(e) = r { eprint!("Hentai Haven Provider: Failed to get video item:{}\n", e); // Prepare data to move into the background task let msg = e.to_string(); let chain = format_error_chain(&e); // Spawn the report into the background - NO .await here tokio::spawn(async move { let _ = send_discord_error_report( msg, Some(chain), Some("Hentai Haven Provider"), Some("Failed to get video item"), file!(), // Note: these might report the utility line line!(), // better to hardcode or pass from outside module_path!(), ) .await; }); } }) .filter_map(Result::ok) .collect() } async fn get_video_items_from_html_search( &self, html: String, requester: &mut Requester, pool: DbPool, ) -> Vec { if html.is_empty() || html.contains("404 Not Found") { return vec![]; } let block = match html .split(" b, None => { eprint!("Hentai Haven Provider: Failed to get block from html"); let e = Error::from(ErrorKind::Parse("html".into())); send_discord_error_report( e.to_string(), Some(format_error_chain(&e)), Some("Hentai Haven Provider"), Some(&format!("Failed to get block from html:\n```{html}\n```")), file!(), line!(), module_path!(), ) .await; return vec![]; } }; let futures = block .split("c-tabs-item__content col-6 col-md-12") .skip(1) .map(|el| self.get_video_item(el.to_string(), pool.clone(), requester.clone())); join_all(futures) .await .into_iter() .inspect(|r| { if let Err(e) = r { eprint!("Hentai Haven Provider: Failed to get video item:{}\n", e); // Prepare data to move into the background task let msg = e.to_string(); let chain = format_error_chain(&e); // Spawn the report into the background - NO .await here tokio::spawn(async move { let _ = send_discord_error_report( msg, Some(chain), Some("Hentai Haven Provider"), Some("Failed to get video item"), file!(), // Note: these might report the utility line line!(), // better to hardcode or pass from outside module_path!(), ) .await; }); } }) .filter_map(Result::ok) .collect() } async fn get_video_item( &self, seg: String, pool: DbPool, mut requester: Requester, ) -> Result { let video_url = seg .split("a href=\"") .nth(1) .and_then(|s| s.split('"').next()) .ok_or_else(|| ErrorKind::Parse("video url\n\n{seg}".into()))? .to_string(); let mut conn = match pool.get() { Ok(conn) => conn, Err(e) => { let msg = format!("DB pool error: {}", e); send_discord_error_report( msg.clone(), None, Some("Hentai Haven Provider"), Some("get_video_item.pool_get"), file!(), line!(), module_path!(), ) .await; return Err(msg.into()); } }; let db_result = db::get_video(&mut conn, video_url.clone()); drop(conn); match db_result { Ok(Some(video)) => { if let Some(item) = Self::decode_cached_video(&video) { return Ok(item); } eprint!("Ignoring stale hentaihaven DB cache entry without playable formats\n"); if let Ok(mut conn) = pool.get() { let _ = db::delete_video(&mut conn, video_url.clone()); } } Ok(None) => { // continue to fetch and parse the video } Err(e) => { eprint!("Database error: {}\n", e); // continue to fetch and parse the video even if there's a DB error } } let html = requester .get(&video_url, Some(Version::HTTP_2)) .await .map_err(|e| Error::from(format!("Failed to fetch video page: {}", e)))?; let mut title = html .split("

") .nth(1) .and_then(|s| s.split("

").next()) .ok_or_else(|| ErrorKind::Parse(format!("video title\n\n{seg}").into()))? .trim() .to_string(); title = decode(title.as_bytes()) .to_string() .unwrap_or(title) .titlecase(); let id = video_url .split('/') .nth(4) .and_then(|s| s.split('.').next()) .ok_or_else(|| ErrorKind::Parse("video id\n\n{seg}".into()))? .to_string(); let thumb = html .split("og:image\" content=\"") .nth(1) .and_then(|s| s.split('"').next()) .unwrap_or("") .to_string(); let raw_tags: Vec = html .split("Genre(s)") .nth(1) .unwrap_or_default() .split("Release") .nth(0) .unwrap_or_default() .split("a href=\"") .skip(1) .map(|tag_block| { let id = tag_block .split("\"") .nth(1) .and_then(|s| s.split('"').next()) .unwrap_or("") .to_string(); let title = tag_block .split('>') .nth(1) .and_then(|s| s.split('<').next()) .map(|s| { decode(s.as_bytes()) .to_string() .unwrap_or(s.to_string()) .titlecase() }) .unwrap_or("".to_string()); FilterOption { id: id.to_ascii_lowercase().replace(" ", "+"), title: title.clone(), } }) .collect::>(); for tag in &raw_tags { Self::push_unique(&self.categories, tag.clone()); } let tags = raw_tags.into_iter().map(|t| t.title).collect(); let views = html .split("Viewed") .last() .and_then(|s| s.split("summary-content\">").nth(1)) .and_then(|s| s.split(" Total").nth(0)) .map(|s| s.trim().parse::().unwrap_or(0)) .unwrap_or(0); let mut formats = vec![]; let episode_block = html .split("manga-chapters-holder") .nth(1) .unwrap_or_default() .split("vraven_read") .nth(0) .unwrap_or_default(); for episode in episode_block.split("wp-manga-chapter").skip(1) { let ep_thumbnail = episode .split(" src=\"") .nth(1) .and_then(|s| s.split('"').next()) .unwrap_or_default(); let episode_title = episode .split("
") .nth(1) .and_then(|s| s.split('<').next()) .unwrap_or_default() .trim() .to_string(); let episode_id = ep_thumbnail.split('/').nth(5).unwrap_or_default(); let episode_url = format!( "https://master-lengs.org/api/v3/hh/{}/master.m3u8", episode_id ); let format = VideoFormat::new(episode_url, "1080p".to_string(), "m3u8".to_string()) .format_id(episode_title.clone()) .http_header("Connection".to_string(), "keep-alive".to_string()) .http_header( "User-Agent".to_string(), "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0" .to_string(), ) .http_header( "Accept".to_string(), "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string(), ) .http_header("Accept-Language".to_string(), "en-US,en;q=0.5".to_string()) .http_header( "Accept-Encoding".to_string(), "gzip, deflate, br".to_string(), ) .http_header("Sec-Fetch-Mode".to_string(), "navigate".to_string()) .http_header("Origin".to_string(), self.url.clone()) .format_note(episode_title.clone()); formats.push(format); } if formats.is_empty() { let e = Error::from(format!("No formats found for video URL: {}", video_url)); return Err(e); } if formats.len() > 1 { title = format!("{} ({} Episodes)", title, formats.len()); } let video_item = VideoItem::new(id, title, video_url.clone(), "hentaihaven".into(), thumb, 0) .formats(formats) .tags(tags) .views(views) .aspect_ratio(0.715); match pool.get() { Ok(mut conn) => { let _ = db::insert_video( &mut conn, &video_url, &serde_json::to_string(&video_item).unwrap_or_default(), ); } Err(e) => { send_discord_error_report( format!("DB pool error: {}", e), None, Some("Hentai Haven Provider"), Some("get_video_item.insert_video.pool_get"), file!(), line!(), module_path!(), ) .await; } } Ok(video_item) } } #[cfg(test)] mod tests { use super::HentaihavenProvider; use crate::videos::{VideoFormat, VideoItem}; #[test] fn accepts_cached_items_with_playable_formats() { let cached = serde_json::to_string( &VideoItem::new( "id".to_string(), "title".to_string(), "https://hentaihaven.xxx/video/test/".to_string(), "hentaihaven".to_string(), "https://example.com/thumb.jpg".to_string(), 0, ) .formats(vec![VideoFormat::new( "https://cdn.example/master.m3u8".to_string(), "1080p".to_string(), "m3u8".to_string(), )]), ) .expect("serializes"); assert!(HentaihavenProvider::decode_cached_video(&cached).is_some()); } #[test] fn rejects_cached_items_without_formats() { let cached = serde_json::to_string(&VideoItem::new( "id".to_string(), "title".to_string(), "https://hentaihaven.xxx/video/test/".to_string(), "hentaihaven".to_string(), "https://example.com/thumb.jpg".to_string(), 0, )) .expect("serializes"); assert!(HentaihavenProvider::decode_cached_video(&cached).is_none()); } } #[async_trait] impl Provider for HentaihavenProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, sort: String, query: Option, page: String, _per_page: String, options: ServerOptions, ) -> Vec { let page = page.parse::().unwrap_or(1); let res = match query { Some(q) => self.to_owned().query(cache, page, &q, options, pool).await, None => self.get(cache, page, &sort, options, pool).await, }; res.unwrap_or_else(|e| { eprintln!("hentai haven error: {e}"); vec![] }) } fn get_channel(&self, v: ClientVersion) -> Option { Some(self.build_channel(v)) } }