use crate::DbPool; use crate::api::ClientVersion; use crate::providers::Provider; use crate::status::*; use crate::util::cache::VideoCache; use crate::util::parse_abbreviated_number; use crate::util::time::parse_time_to_seconds; use crate::videos::{ServerOptions, VideoFormat, VideoItem}; use async_trait::async_trait; use error_chain::error_chain; use htmlentity::entity::{ICodedDataTrait, decode}; use regex::Regex; use std::net::IpAddr; use std::vec; use titlecase::Titlecase; use url::Url; use wreq::Version; pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = crate::providers::ProviderChannelMetadata { group_id: "mainstream-tube", tags: &["search", "mixed", "user-upload"], }; error_chain! { foreign_links { Io(std::io::Error); HttpRequest(wreq::Error); } errors { Parse(msg: String) } } #[derive(Debug, Clone)] pub struct NoodlemagazineProvider { url: String, } impl NoodlemagazineProvider { pub fn new() -> Self { Self { url: "https://noodlemagazine.com".to_string(), } } fn build_channel(&self, _clientversion: ClientVersion) -> Channel { Channel { id: "noodlemagazine".into(), name: "Noodlemagazine".into(), description: "The Best Search Engine of HD Videos".into(), premium: false, favicon: "https://www.google.com/s2/favicons?sz=64&domain=noodlemagazine.com".into(), status: "active".into(), categories: vec![], options: vec![ ChannelOption { id: "category".into(), title: "Popular Period".into(), description: "Pick which popular feed to browse.".into(), systemImage: "clock".into(), colorName: "blue".into(), options: vec![ FilterOption { id: "recent".into(), title: "Recent".into(), }, FilterOption { id: "week".into(), title: "This Week".into(), }, FilterOption { id: "month".into(), title: "This Month".into(), }, ], multiSelect: false, }, ChannelOption { id: "sort".into(), title: "Sort By".into(), description: "Sort popular feed results.".into(), systemImage: "arrow.up.arrow.down".into(), colorName: "orange".into(), options: vec![ FilterOption { id: "views".into(), title: "Views".into(), }, FilterOption { id: "date".into(), title: "Newest".into(), }, FilterOption { id: "duration".into(), title: "Duration".into(), }, ], multiSelect: false, }, ChannelOption { id: "filter".into(), title: "Order".into(), description: "Ascending or descending order.".into(), systemImage: "list.number".into(), colorName: "green".into(), options: vec![ FilterOption { id: "desc".into(), title: "Descending".into(), }, FilterOption { id: "asc".into(), title: "Ascending".into(), }, ], multiSelect: false, }, ], nsfw: true, cacheDuration: Some(1800), } } fn resolve_popular_period(options: &ServerOptions) -> &'static str { match options.category.as_deref() { Some("week") => "week", Some("month") => "month", // The upstream site does not expose a valid /popular/all route. // Keep "all" as a backward-compatible alias for stale clients. Some("all") => "recent", _ => "recent", } } fn resolve_sort_by(sort: &str, options: &ServerOptions) -> &'static str { match options.sort.as_deref().unwrap_or(sort) { "date" | "new" | "latest" => "date", "duration" | "length" => "duration", _ => "views", } } fn resolve_sort_order(options: &ServerOptions) -> &'static str { match options.filter.as_deref() { Some("asc") => "asc", _ => "desc", } } fn mirror_url(url: &str) -> String { let stripped = url .strip_prefix("https://") .or_else(|| url.strip_prefix("http://")) .unwrap_or(url); format!("https://r.jina.ai/http://{stripped}") } fn looks_like_bot_challenge_or_block(html: &str) -> bool { let lower = html.to_ascii_lowercase(); lower.contains("just a moment") || lower.contains("cf-browser-verification") || lower.contains("cf-chl") || lower.contains("access restricted") || lower.contains("cloudflare") } fn parse_markdown_listing_items( &self, markdown: &str, options: &ServerOptions, ) -> Vec { let Some(regex) = Regex::new( r#"(?is)\[\!\[Image\s+\d+:\s*(?P.*?)\]\((?P<thumb>https?://[^)\s]+)\)(?P<meta>.*?)\]\((?P<url>https?://noodlemagazine\.com/watch/[^)\s]+)\)"#, ) .ok() else { return vec![]; }; let Some(duration_regex) = Regex::new(r"(?P<duration>\d{1,2}:\d{2}(?::\d{2})?)").ok() else { return vec![]; }; let Some(views_regex) = Regex::new(r"(?P<views>[0-9]+(?:\.[0-9]+)?[KMB]?)\s+\d{1,2}:\d{2}(?::\d{2})?").ok() else { return vec![]; }; regex .captures_iter(markdown) .filter_map(|caps| { let title_raw = caps.name("title")?.as_str().trim(); let thumb = caps.name("thumb")?.as_str().trim(); let video_url = caps.name("url")?.as_str().trim(); let meta = caps.name("meta").map(|m| m.as_str()).unwrap_or(""); let parsed_url = Url::parse(video_url).ok()?; let id = parsed_url .path_segments() .and_then(|mut segs| segs.next_back()) .filter(|value| !value.is_empty()) .map(|value| value.to_string())?; let duration = duration_regex .captures(meta) .and_then(|m| m.name("duration").map(|v| v.as_str())) .and_then(|v| parse_time_to_seconds(v)) .unwrap_or(0) as u32; let views = views_regex .captures(meta) .and_then(|m| m.name("views").map(|v| v.as_str())) .and_then(|v| parse_abbreviated_number(v.trim())) .unwrap_or(0); let title = decode(title_raw.as_bytes()) .to_string() .unwrap_or_else(|_| title_raw.to_string()) .titlecase(); let proxy_url = self.proxy_url(options, video_url); let proxied_thumb = self.proxied_thumb(options, thumb); Some( VideoItem::new( id, title, proxy_url.clone(), "noodlemagazine".into(), proxied_thumb, duration, ) .views(views) .formats(vec![ VideoFormat::new(proxy_url, "auto".into(), "video/mp4".into()) .format_id("auto".into()) .format_note("proxied".into()) .http_header("Referer".into(), video_url.to_string()), ]), ) }) .collect() } async fn fetch_listing_items( &self, requester: &mut crate::util::requester::Requester, page_url: &str, options: &ServerOptions, ) -> Vec<VideoItem> { let html = requester .get(page_url, Some(Version::HTTP_2)) .await .unwrap_or_default(); let mut items = self.get_video_items_from_html(html.clone(), options); if !items.is_empty() { return items; } if !Self::looks_like_bot_challenge_or_block(&html) { return items; } let mirror = requester .get(&Self::mirror_url(page_url), Some(Version::HTTP_11)) .await .unwrap_or_default(); items = self.parse_markdown_listing_items(&mirror, options); items } async fn get( &self, cache: VideoCache, page: u8, sort: &str, options: ServerOptions, ) -> Result<Vec<VideoItem>> { let period = Self::resolve_popular_period(&options); let sort_by = Self::resolve_sort_by(sort, &options); let sort_order = Self::resolve_sort_order(&options); let video_url = format!( "{}/popular/{period}?sort_by={sort_by}&sort_order={sort_order}&p={}", self.url, page.saturating_sub(1) ); let old_items = match cache.get(&video_url) { Some((t, i)) if t.elapsed().unwrap_or_default().as_secs() < 300 => return Ok(i.clone()), Some((_, i)) => i.clone(), None => vec![], }; let mut requester = match options.requester.clone() { Some(r) => r, None => return Ok(old_items), }; let items = self .fetch_listing_items(&mut requester, &video_url, &options) .await; if items.is_empty() { Ok(old_items) } else { cache.remove(&video_url); cache.insert(video_url, items.clone()); Ok(items) } } async fn query( &self, cache: VideoCache, page: u8, query: &str, options: ServerOptions, ) -> Result<Vec<VideoItem>> { let q = query.trim().replace(' ', "%20"); let video_url = format!("{}/video/{}?p={}", self.url, q, page.saturating_sub(1)); let old_items = match cache.get(&video_url) { Some((t, i)) if t.elapsed().unwrap_or_default().as_secs() < 300 => return Ok(i.clone()), Some((_, i)) => i.clone(), None => vec![], }; let mut requester = match options.requester.clone() { Some(r) => r, None => return Ok(old_items), }; let items = self .fetch_listing_items(&mut requester, &video_url, &options) .await; if items.is_empty() { Ok(old_items) } else { cache.remove(&video_url); cache.insert(video_url, items.clone()); Ok(items) } } fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec<VideoItem> { if html.is_empty() || html.contains("404 Not Found") { return vec![]; } let section = match html.split(">Show more</div>").next() { Some(s) => s, None => return vec![], }; let list = match section .split("<div class=\"list_videos\" id=\"list_videos\">") .nth(1) { Some(l) => l, None => return vec![], }; list.split("<div class=\"item") .skip(1) .filter_map(|segment| { self.get_video_item(segment.to_string(), options).ok() }) .collect() } fn proxy_url(&self, options: &ServerOptions, video_url: &str) -> String { crate::providers::build_proxy_url( options, "noodlemagazine", &crate::providers::strip_url_scheme(video_url), ) } fn normalize_thumb_url(&self, thumb: &str) -> String { let thumb = thumb.trim(); if thumb.is_empty() { return String::new(); } if thumb.starts_with("http://") || thumb.starts_with("https://") { return thumb.to_string(); } if thumb.starts_with("//") { return format!("https:{thumb}"); } if thumb.starts_with('/') { return format!("{}{}", self.url, thumb); } format!("{}/{}", self.url.trim_end_matches('/'), thumb.trim_start_matches('/')) } fn has_allowed_image_extension(path: &str) -> bool { let path = path.to_ascii_lowercase(); [".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"] .iter() .any(|ext| path.ends_with(ext)) } fn is_known_preview_host(host: &str) -> bool { let host = host.to_ascii_lowercase(); host.ends_with("pvvstream.pro") || host.ends_with("okcdn.ru") || host.ends_with("vkuserphoto.ru") || host.ends_with("noodlemagazine.com") } fn has_preview_signature(url: &Url) -> bool { let path = url.path().to_ascii_lowercase(); let query = url.query().unwrap_or("").to_ascii_lowercase(); path.contains("/preview/") || path.contains("/poster/") || path.contains("getvideopreview") || query.contains("type=video_thumb") || query.contains("keep_aspect_ratio=") } fn is_disallowed_thumb_host(host: &str) -> bool { if host.eq_ignore_ascii_case("localhost") { return true; } match host.parse::<IpAddr>() { Ok(IpAddr::V4(ip)) => { ip.is_private() || ip.is_loopback() || ip.is_link_local() || ip.is_broadcast() || ip.is_documentation() || ip.is_unspecified() } Ok(IpAddr::V6(ip)) => { ip.is_loopback() || ip.is_unspecified() || ip.is_multicast() || ip.is_unique_local() || ip.is_unicast_link_local() } Err(_) => false, } } fn is_allowed_thumb_url(&self, url: &str) -> bool { let Some(url) = Url::parse(url).ok() else { return false; }; if url.scheme() != "https" { return false; } let Some(host) = url.host_str() else { return false; }; if Self::is_disallowed_thumb_host(host) { return false; } if Self::has_allowed_image_extension(url.path()) { return true; } Self::is_known_preview_host(host) && Self::has_preview_signature(&url) } fn proxied_thumb(&self, _options: &ServerOptions, thumb: &str) -> String { let normalized = self.normalize_thumb_url(thumb); if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) { return String::new(); } let Some(url) = Url::parse(&normalized).ok() else { return String::new(); }; if url .host_str() .is_some_and(|host| host.eq_ignore_ascii_case("img.pvvstream.pro")) { return crate::providers::build_proxy_url( _options, "noodlemagazine-thumb", &crate::providers::strip_url_scheme(&normalized), ); } normalized } fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result<VideoItem> { let href = video_segment .split("<a href=\"") .nth(1) .and_then(|s| s.split('"').next()) .ok_or_else(|| Error::from("missing href"))?; let video_url = format!("{}{}", self.url, href); let mut title = video_segment .split("<div class=\"title\">") .nth(1) .and_then(|s| s.split('<').next()) .unwrap_or("") .trim() .to_string(); title = decode(title.as_bytes()) .to_string() .unwrap_or(title) .titlecase(); let id = video_url .split('/') .nth(4) .and_then(|s| s.split('.').next()) .ok_or_else(|| Error::from("missing id"))? .to_string(); let thumb = Regex::new( r#"(?i)(?:data-src|data-original|data-webp|src|poster)\s*=\s*"(?P<url>[^"]+)""#, ) .ok() .and_then(|regex| { regex .captures_iter(&video_segment) .filter_map(|captures| captures.name("url").map(|value| value.as_str().to_string())) .find(|candidate| !candidate.starts_with("data:image/")) }) .unwrap_or_default(); let raw_duration = video_segment .split("#clock-o\"></use></svg>") .nth(1) .and_then(|s| s.split('<').next()) .unwrap_or("0:00"); let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32; let views = video_segment .split("#eye\"></use></svg>") .nth(1) .and_then(|s| s.split('<').next()) .and_then(|v| parse_abbreviated_number(v.trim())) .unwrap_or(0); let proxy_url = self.proxy_url(options, &video_url); let proxied_thumb = self.proxied_thumb(options, &thumb); Ok(VideoItem::new( id, title, proxy_url.clone(), "noodlemagazine".into(), proxied_thumb, duration, ) .views(views) .formats(vec![ VideoFormat::new(proxy_url, "auto".into(), "video/mp4".into()) .format_id("auto".into()) .format_note("proxied".into()) .http_header("Referer".into(), video_url), ])) } } #[async_trait] impl Provider for NoodlemagazineProvider { async fn get_videos( &self, cache: VideoCache, pool: DbPool, sort: String, query: Option<String>, page: String, per_page: String, options: ServerOptions, ) -> Vec<VideoItem> { let _ = pool; let _ = per_page; let page = page.parse::<u8>().unwrap_or(1); let res = match query { Some(q) => self.query(cache, page, &q, options).await, None => self.get(cache, page, &sort, options).await, }; res.unwrap_or_else(|e| { eprintln!("Noodlemagazine error: {e}"); vec![] }) } fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> { Some(self.build_channel(clientversion)) } } #[cfg(test)] mod tests { use super::NoodlemagazineProvider; use crate::videos::ServerOptions; fn options() -> ServerOptions { ServerOptions { featured: None, category: None, sites: None, filter: None, language: None, public_url_base: Some("https://example.com".to_string()), requester: None, network: None, stars: None, categories: None, duration: None, sort: None, sexuality: None, } } #[test] fn rewrites_video_pages_to_hottub_proxy() { let provider = NoodlemagazineProvider::new(); let options = options(); assert_eq!( provider.proxy_url(&options, "https://noodlemagazine.com/watch/-123_456"), "https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456" ); } #[test] fn parses_listing_without_detail_page_requests() { let provider = NoodlemagazineProvider::new(); let options = options(); let html = r#" <div class="list_videos" id="list_videos"> <div class="item"> <a href="/watch/-123_456"> <img data-src="https://noodlemagazine.com/thumbs/test.jpg" /> </a> <div class="title">sample & title</div> <svg><use></use></svg>#clock-o"></use></svg>12:34< <svg><use></use></svg>#eye"></use></svg>1.2K< </div> >Show more</div> "#; let items = provider.get_video_items_from_html(html.to_string(), &options); assert_eq!(items.len(), 1); assert_eq!( items[0].url, "https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456" ); assert_eq!( items[0].thumb, "https://noodlemagazine.com/thumbs/test.jpg" ); assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1)); } #[test] fn keeps_https_cdn_thumbs_but_drops_non_images() { let provider = NoodlemagazineProvider::new(); let options = options(); let html = r#" <div class="list_videos" id="list_videos"> <div class="item"> <a href="/watch/-123_456"> <img data-src="https://cdn.example/thumb.jpg" /> </a> <div class="title">sample</div> <svg><use></use></svg>#clock-o"></use></svg>12:34< <svg><use></use></svg>#eye"></use></svg>1.2K< </div> <div class="item"> <a href="/watch/-555_666"> <img data-src="https://noodlemagazine.com/watch/not-an-image" /> </a> <div class="title">sample 2</div> <svg><use></use></svg>#clock-o"></use></svg>00:42< <svg><use></use></svg>#eye"></use></svg>123< </div> >Show more</div> "#; let items = provider.get_video_items_from_html(html.to_string(), &options); assert_eq!(items.len(), 2); assert_eq!( items[0].thumb, "https://cdn.example/thumb.jpg" ); assert!(items[1].thumb.is_empty()); } #[test] fn keeps_preview_urls_without_file_extension() { let provider = NoodlemagazineProvider::new(); let options = options(); let html = r#" <div class="list_videos" id="list_videos"> <div class="item"> <a href="/watch/-111_222"> <img data-src="https://img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l" /> </a> <div class="title">sample</div> <svg><use></use></svg>#clock-o"></use></svg>12:34< <svg><use></use></svg>#eye"></use></svg>1.2K< </div> >Show more</div> "#; let items = provider.get_video_items_from_html(html.to_string(), &options); assert_eq!(items.len(), 1); assert_eq!( items[0].thumb, "https://example.com/proxy/noodlemagazine-thumb/img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l" ); } #[test] fn parses_item_variants_and_alternate_thumb_attributes() { let provider = NoodlemagazineProvider::new(); let options = options(); let html = r#" <div class="list_videos" id="list_videos"> <div class="item has-video" data-id="123"> <a href="/watch/-333_444"> <img data-original="https://cdn2.pvvstream.pro/videos/-333/444/preview_320.jpg" /> </a> <div class="title">sample alt</div> <svg><use></use></svg>#clock-o"></use></svg>00:42< <svg><use></use></svg>#eye"></use></svg>123< </div> >Show more</div> "#; let items = provider.get_video_items_from_html(html.to_string(), &options); assert_eq!(items.len(), 1); assert_eq!( items[0].thumb, "https://cdn2.pvvstream.pro/videos/-333/444/preview_320.jpg" ); } #[test] fn resolves_popular_filters_for_usability_options() { let mut options = options(); options.category = Some("month".to_string()); options.sort = Some("date".to_string()); options.filter = Some("asc".to_string()); assert_eq!(NoodlemagazineProvider::resolve_popular_period(&options), "month"); assert_eq!(NoodlemagazineProvider::resolve_sort_by("views", &options), "date"); assert_eq!(NoodlemagazineProvider::resolve_sort_order(&options), "asc"); } #[test] fn maps_legacy_all_time_period_to_recent_feed() { let mut options = options(); options.category = Some("all".to_string()); options.sort = Some("views".to_string()); options.filter = Some("desc".to_string()); assert_eq!(NoodlemagazineProvider::resolve_popular_period(&options), "recent"); } }