457 lines
14 KiB
Rust
457 lines
14 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::Provider;
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
|
|
use async_trait::async_trait;
|
|
use error_chain::error_chain;
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use std::net::IpAddr;
|
|
use url::Url;
|
|
use std::vec;
|
|
use titlecase::Titlecase;
|
|
use wreq::Version;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "mainstream-tube",
|
|
tags: &["search", "mixed", "user-upload"],
|
|
};
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
HttpRequest(wreq::Error);
|
|
}
|
|
errors {
|
|
Parse(msg: String)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct NoodlemagazineProvider {
|
|
url: String,
|
|
}
|
|
|
|
impl NoodlemagazineProvider {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
url: "https://noodlemagazine.com".to_string(),
|
|
}
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
Channel {
|
|
id: "noodlemagazine".into(),
|
|
name: "Noodlemagazine".into(),
|
|
description: "The Best Search Engine of HD Videos".into(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=noodlemagazine.com".into(),
|
|
status: "active".into(),
|
|
categories: vec![],
|
|
options: vec![],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
async fn get(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u8,
|
|
_sort: &str,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let video_url = format!(
|
|
"{}/popular/recent?sort_by=views&sort_order=desc&p={}",
|
|
self.url,
|
|
page.saturating_sub(1)
|
|
);
|
|
|
|
let old_items = match cache.get(&video_url) {
|
|
Some((t, i)) if t.elapsed().unwrap_or_default().as_secs() < 300 => return Ok(i.clone()),
|
|
Some((_, i)) => i.clone(),
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester = match options.requester.clone() {
|
|
Some(r) => r,
|
|
None => return Ok(old_items),
|
|
};
|
|
|
|
let text = requester
|
|
.get(&video_url, Some(Version::HTTP_2))
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
let items = self.get_video_items_from_html(text, &options);
|
|
|
|
if items.is_empty() {
|
|
Ok(old_items)
|
|
} else {
|
|
cache.remove(&video_url);
|
|
cache.insert(video_url, items.clone());
|
|
Ok(items)
|
|
}
|
|
}
|
|
|
|
async fn query(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u8,
|
|
query: &str,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let q = query.trim().replace(' ', "%20");
|
|
let video_url = format!("{}/video/{}?p={}", self.url, q, page.saturating_sub(1));
|
|
|
|
let old_items = match cache.get(&video_url) {
|
|
Some((t, i)) if t.elapsed().unwrap_or_default().as_secs() < 300 => return Ok(i.clone()),
|
|
Some((_, i)) => i.clone(),
|
|
None => vec![],
|
|
};
|
|
|
|
let mut requester = match options.requester.clone() {
|
|
Some(r) => r,
|
|
None => return Ok(old_items),
|
|
};
|
|
|
|
let text = requester
|
|
.get(&video_url, Some(Version::HTTP_2))
|
|
.await
|
|
.unwrap_or_default();
|
|
|
|
let items = self.get_video_items_from_html(text, &options);
|
|
|
|
if items.is_empty() {
|
|
Ok(old_items)
|
|
} else {
|
|
cache.remove(&video_url);
|
|
cache.insert(video_url, items.clone());
|
|
Ok(items)
|
|
}
|
|
}
|
|
|
|
fn get_video_items_from_html(&self, html: String, options: &ServerOptions) -> Vec<VideoItem> {
|
|
if html.is_empty() || html.contains("404 Not Found") {
|
|
return vec![];
|
|
}
|
|
|
|
let section = match html.split(">Show more</div>").next() {
|
|
Some(s) => s,
|
|
None => return vec![],
|
|
};
|
|
|
|
let list = match section
|
|
.split("<div class=\"list_videos\" id=\"list_videos\">")
|
|
.nth(1)
|
|
{
|
|
Some(l) => l,
|
|
None => return vec![],
|
|
};
|
|
|
|
list.split("<div class=\"item\">")
|
|
.skip(1)
|
|
.filter_map(|segment| {
|
|
self.get_video_item(segment.to_string(), options).ok()
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
fn proxy_url(&self, options: &ServerOptions, video_url: &str) -> String {
|
|
crate::providers::build_proxy_url(
|
|
options,
|
|
"noodlemagazine",
|
|
&crate::providers::strip_url_scheme(video_url),
|
|
)
|
|
}
|
|
|
|
fn normalize_thumb_url(&self, thumb: &str) -> String {
|
|
let thumb = thumb.trim();
|
|
if thumb.is_empty() {
|
|
return String::new();
|
|
}
|
|
|
|
if thumb.starts_with("http://") || thumb.starts_with("https://") {
|
|
return thumb.to_string();
|
|
}
|
|
|
|
if thumb.starts_with("//") {
|
|
return format!("https:{thumb}");
|
|
}
|
|
|
|
if thumb.starts_with('/') {
|
|
return format!("{}{}", self.url, thumb);
|
|
}
|
|
|
|
format!("{}/{}", self.url.trim_end_matches('/'), thumb.trim_start_matches('/'))
|
|
}
|
|
|
|
fn has_allowed_image_extension(path: &str) -> bool {
|
|
let path = path.to_ascii_lowercase();
|
|
[".jpg", ".jpeg", ".png", ".webp", ".avif", ".gif"]
|
|
.iter()
|
|
.any(|ext| path.ends_with(ext))
|
|
}
|
|
|
|
fn is_disallowed_thumb_host(host: &str) -> bool {
|
|
if host.eq_ignore_ascii_case("localhost") {
|
|
return true;
|
|
}
|
|
|
|
match host.parse::<IpAddr>() {
|
|
Ok(IpAddr::V4(ip)) => {
|
|
ip.is_private()
|
|
|| ip.is_loopback()
|
|
|| ip.is_link_local()
|
|
|| ip.is_broadcast()
|
|
|| ip.is_documentation()
|
|
|| ip.is_unspecified()
|
|
}
|
|
Ok(IpAddr::V6(ip)) => {
|
|
ip.is_loopback()
|
|
|| ip.is_unspecified()
|
|
|| ip.is_multicast()
|
|
|| ip.is_unique_local()
|
|
|| ip.is_unicast_link_local()
|
|
}
|
|
Err(_) => false,
|
|
}
|
|
}
|
|
|
|
fn is_allowed_thumb_url(&self, url: &str) -> bool {
|
|
let Some(url) = Url::parse(url).ok() else {
|
|
return false;
|
|
};
|
|
if url.scheme() != "https" {
|
|
return false;
|
|
}
|
|
let Some(host) = url.host_str() else {
|
|
return false;
|
|
};
|
|
|
|
!Self::is_disallowed_thumb_host(host) && Self::has_allowed_image_extension(url.path())
|
|
}
|
|
|
|
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
|
|
let normalized = self.normalize_thumb_url(thumb);
|
|
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
|
|
return String::new();
|
|
}
|
|
|
|
crate::providers::build_proxy_url(
|
|
options,
|
|
"noodlemagazine-thumb",
|
|
&crate::providers::strip_url_scheme(&normalized),
|
|
)
|
|
}
|
|
|
|
fn get_video_item(&self, video_segment: String, options: &ServerOptions) -> Result<VideoItem> {
|
|
let href = video_segment
|
|
.split("<a href=\"")
|
|
.nth(1)
|
|
.and_then(|s| s.split('"').next())
|
|
.ok_or_else(|| Error::from("missing href"))?;
|
|
|
|
let video_url = format!("{}{}", self.url, href);
|
|
|
|
let mut title = video_segment
|
|
.split("<div class=\"title\">")
|
|
.nth(1)
|
|
.and_then(|s| s.split('<').next())
|
|
.unwrap_or("")
|
|
.trim()
|
|
.to_string();
|
|
|
|
title = decode(title.as_bytes())
|
|
.to_string()
|
|
.unwrap_or(title)
|
|
.titlecase();
|
|
|
|
let id = video_url
|
|
.split('/')
|
|
.nth(4)
|
|
.and_then(|s| s.split('.').next())
|
|
.ok_or_else(|| Error::from("missing id"))?
|
|
.to_string();
|
|
|
|
let thumb = video_segment
|
|
.split("data-src=\"")
|
|
.nth(1)
|
|
.and_then(|s| s.split('"').next())
|
|
.unwrap_or("")
|
|
.to_string();
|
|
|
|
let raw_duration = video_segment
|
|
.split("#clock-o\"></use></svg>")
|
|
.nth(1)
|
|
.and_then(|s| s.split('<').next())
|
|
.unwrap_or("0:00");
|
|
|
|
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
|
|
|
|
let views = video_segment
|
|
.split("#eye\"></use></svg>")
|
|
.nth(1)
|
|
.and_then(|s| s.split('<').next())
|
|
.and_then(|v| parse_abbreviated_number(v.trim()))
|
|
.unwrap_or(0);
|
|
let proxy_url = self.proxy_url(options, &video_url);
|
|
let proxied_thumb = self.proxied_thumb(options, &thumb);
|
|
|
|
Ok(VideoItem::new(
|
|
id,
|
|
title,
|
|
proxy_url.clone(),
|
|
"noodlemagazine".into(),
|
|
proxied_thumb,
|
|
duration,
|
|
)
|
|
.views(views)
|
|
.formats(vec![
|
|
VideoFormat::new(proxy_url, "auto".into(), "video/mp4".into())
|
|
.format_id("auto".into())
|
|
.format_note("proxied".into())
|
|
.http_header("Referer".into(), video_url),
|
|
]))
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for NoodlemagazineProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let _ = pool;
|
|
let _ = per_page;
|
|
|
|
let page = page.parse::<u8>().unwrap_or(1);
|
|
|
|
let res = match query {
|
|
Some(q) => self.query(cache, page, &q, options).await,
|
|
None => self.get(cache, page, &sort, options).await,
|
|
};
|
|
|
|
res.unwrap_or_else(|e| {
|
|
eprintln!("Noodlemagazine error: {e}");
|
|
vec![]
|
|
})
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::NoodlemagazineProvider;
|
|
use crate::videos::ServerOptions;
|
|
|
|
fn options() -> ServerOptions {
|
|
ServerOptions {
|
|
featured: None,
|
|
category: None,
|
|
sites: None,
|
|
filter: None,
|
|
language: None,
|
|
public_url_base: Some("https://example.com".to_string()),
|
|
requester: None,
|
|
network: None,
|
|
stars: None,
|
|
categories: None,
|
|
duration: None,
|
|
sort: None,
|
|
sexuality: None,
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn rewrites_video_pages_to_hottub_proxy() {
|
|
let provider = NoodlemagazineProvider::new();
|
|
let options = options();
|
|
|
|
assert_eq!(
|
|
provider.proxy_url(&options, "https://noodlemagazine.com/watch/-123_456"),
|
|
"https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parses_listing_without_detail_page_requests() {
|
|
let provider = NoodlemagazineProvider::new();
|
|
let options = options();
|
|
let html = r#"
|
|
<div class="list_videos" id="list_videos">
|
|
<div class="item">
|
|
<a href="/watch/-123_456">
|
|
<img data-src="https://noodlemagazine.com/thumbs/test.jpg" />
|
|
</a>
|
|
<div class="title">sample & title</div>
|
|
<svg><use></use></svg>#clock-o"></use></svg>12:34<
|
|
<svg><use></use></svg>#eye"></use></svg>1.2K<
|
|
</div>
|
|
>Show more</div>
|
|
"#;
|
|
|
|
let items = provider.get_video_items_from_html(html.to_string(), &options);
|
|
|
|
assert_eq!(items.len(), 1);
|
|
assert_eq!(
|
|
items[0].url,
|
|
"https://example.com/proxy/noodlemagazine/noodlemagazine.com/watch/-123_456"
|
|
);
|
|
assert_eq!(
|
|
items[0].thumb,
|
|
"https://example.com/proxy/noodlemagazine-thumb/noodlemagazine.com/thumbs/test.jpg"
|
|
);
|
|
assert_eq!(items[0].formats.as_ref().map(|f| f.len()), Some(1));
|
|
}
|
|
|
|
#[test]
|
|
fn keeps_https_cdn_thumbs_but_drops_non_images() {
|
|
let provider = NoodlemagazineProvider::new();
|
|
let options = options();
|
|
let html = r#"
|
|
<div class="list_videos" id="list_videos">
|
|
<div class="item">
|
|
<a href="/watch/-123_456">
|
|
<img data-src="https://cdn.example/thumb.jpg" />
|
|
</a>
|
|
<div class="title">sample</div>
|
|
<svg><use></use></svg>#clock-o"></use></svg>12:34<
|
|
<svg><use></use></svg>#eye"></use></svg>1.2K<
|
|
</div>
|
|
<div class="item">
|
|
<a href="/watch/-555_666">
|
|
<img data-src="https://noodlemagazine.com/watch/not-an-image" />
|
|
</a>
|
|
<div class="title">sample 2</div>
|
|
<svg><use></use></svg>#clock-o"></use></svg>00:42<
|
|
<svg><use></use></svg>#eye"></use></svg>123<
|
|
</div>
|
|
>Show more</div>
|
|
"#;
|
|
|
|
let items = provider.get_video_items_from_html(html.to_string(), &options);
|
|
|
|
assert_eq!(items.len(), 2);
|
|
assert_eq!(
|
|
items[0].thumb,
|
|
"https://example.com/proxy/noodlemagazine-thumb/cdn.example/thumb.jpg"
|
|
);
|
|
assert!(items[1].thumb.is_empty());
|
|
}
|
|
}
|