This commit is contained in:
Simon
2026-02-08 08:49:19 +00:00
parent 5a08d2afe7
commit 360b615742
3 changed files with 111 additions and 88 deletions

View File

@@ -29,6 +29,7 @@ async-trait = "0.1"
regex = "1.12.2"
titlecase = "3.6.0"
dashmap = "6.1.0"
lru = "0.16.3"
[lints.rust]
unexpected_cfgs = "allow"

View File

@@ -59,6 +59,7 @@ pub static ALL_PROVIDERS: Lazy<HashMap<&'static str, DynProvider>> = Lazy::new(|
m.insert("pimpbunny", Arc::new(pimpbunny::PimpbunnyProvider::new()) as DynProvider);
m.insert("javtiful", Arc::new(javtiful::JavtifulProvider::new()) as DynProvider);
m.insert("hypnotube", Arc::new(hypnotube::HypnotubeProvider::new()) as DynProvider);
m.insert("perverzija", Arc::new(perverzija::PerverzijaProvider::new()) as DynProvider);
// add more here as you migrate them
m
});

View File

@@ -1,6 +1,7 @@
use crate::DbPool;
use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use error_chain::error_chain;
@@ -19,12 +20,14 @@ error_chain! {
pub struct PornzogProvider {
url: String,
}
impl PornzogProvider {
pub fn new() -> Self {
PornzogProvider {
url: "https://pornzog.com".to_string(),
}
}
async fn query(
&self,
cache: VideoCache,
@@ -47,7 +50,7 @@ impl PornzogProvider {
search_params.push(format!("{}", &sort_string));
let video_url = format!("{}/search/?{}", self.url, search_params.join("&"));
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
@@ -57,14 +60,18 @@ impl PornzogProvider {
return Ok(items.clone());
}
}
None => {
vec![]
}
None => vec![],
};
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, None).await.unwrap();
// SAFE: Check if requester exists instead of unwrap()
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = requester.get(&video_url, None).await.map_err(|e| format!("{}", e))?;
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone());
if !video_items.is_empty() {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
@@ -78,79 +85,72 @@ impl PornzogProvider {
if html.is_empty() {
return vec![];
}
let mut items: Vec<VideoItem> = Vec::new();
let raw_videos = html.split("class=\"paginator\"").collect::<Vec<&str>>()[0]
.split("class=\"thumb-video ")
.collect::<Vec<&str>>()[1..]
.to_vec();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line);
// }
let mut video_url: String = video_segment.split("href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
if video_url.starts_with("/") {
video_url = format!("{}{}", self.url, video_url);
}
let mut title = video_segment.split("alt=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
// html decode
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let id = video_url.split("/").collect::<Vec<&str>>()[4].to_string();
let thumb = format!(
"{}",
video_segment.split("<img ").collect::<Vec<&str>>()[1]
.split("data-original=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string()
);
let raw_duration = video_segment
.split("class=\"duration\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
let duration = parse_time_to_seconds(raw_duration.as_str()).unwrap_or(0) as u32;
// let uploader = video_segment.split("class=\"source\">").collect::<Vec<&str>>()[1]
// .split(">").collect::<Vec<&str>>()[1]
// .split("<").collect::<Vec<&str>>()[0]
// .to_string();
let tags = video_segment.split("class=\"tags\"").collect::<Vec<&str>>()[1]
.split("</p>")
.collect::<Vec<&str>>()[0]
.split("<a href=\"")
.collect::<Vec<&str>>()[1..]
.iter()
.map(|el| {
el.split(">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string()
})
.collect::<Vec<String>>();
let video_item = VideoItem::new(
id,
title,
video_url.to_string(),
"pornzog".to_string(),
thumb,
duration,
)
// .uploader(uploader)
.tags(tags);
items.push(video_item);
// Helper for safe splitting: returns Option<&str>
fn get_part<'a>(input: &'a str, separator: &str, index: usize) -> Option<&'a str> {
input.split(separator).nth(index)
}
return items;
// Split HTML safely
let sections: Vec<&str> = html.split("class=\"paginator\"").collect();
let body = match sections.get(0) {
Some(s) => s,
None => return vec![],
};
let raw_videos: Vec<&str> = body.split("class=\"thumb-video ").skip(1).collect();
for (idx, video_segment) in raw_videos.iter().enumerate() {
// Attempt to parse each item. If one fails, we log it and continue to the next
// instead of crashing the whole request.
let result: Option<VideoItem> = (|| {
let mut video_url = get_part(video_segment, "href=\"", 1)?
.split("\"")
.next()?
.to_string();
if video_url.starts_with("/") {
video_url = format!("{}{}", self.url, video_url);
}
let title_raw = get_part(video_segment, "alt=\"", 1)?.split("\"").next()?;
let title = decode(title_raw.as_bytes()).to_string().unwrap_or(title_raw.to_string());
// The ID is the 5th element in a "/" split: e.g., "", "video", "123", "title"
let id = video_url.split("/").nth(4)?.to_string();
let thumb = get_part(video_segment, "data-original=\"", 1)?
.split("\"")
.next()?
.to_string();
let raw_duration = get_part(video_segment, "class=\"duration\">", 1)?
.split("<")
.next()?;
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
let tags_section = get_part(video_segment, "class=\"tags\"", 1)?
.split("</p>")
.next()?;
let tags = tags_section.split("<a href=\"")
.skip(1)
.filter_map(|el| {
let name = el.split(">").nth(1)?.split("<").next()?;
Some(name.to_string())
})
.collect::<Vec<String>>();
Some(VideoItem::new(id, title, video_url, "pornzog".to_string(), thumb, duration).tags(tags))
})();
match result {
Some(item) => items.push(item),
None => eprintln!("Warning: Failed to parse video item at index {}", idx),
}
}
items
}
}
@@ -168,19 +168,40 @@ impl Provider for PornzogProvider {
) -> Vec<VideoItem> {
let _ = per_page;
let _ = pool;
let videos: std::result::Result<Vec<VideoItem>, Error> = self
.query(
cache,
page.parse::<u8>().unwrap_or(1),
query.unwrap_or("".to_string()).as_str(),
sort,
options,
)
.await;
match videos {
let page_num = page.parse::<u8>().unwrap_or(1);
let query_str = query.unwrap_or_default();
match self.query(cache, page_num, &query_str, sort, options).await {
Ok(v) => v,
Err(e) => {
println!("Error fetching videos: {}", e);
eprintln!("Error fetching videos from Pornzog: {}", e);
// 1. Create a collection of owned data so we don't hold references to `e`
let mut error_reports = Vec::new();
// Iterating through the error chain to collect data into owned Strings
for cause in e.iter().skip(1) {
error_reports.push((
cause.to_string(), // Title
format_error_chain(cause), // Description/Chain
format!("caused by: {}", cause) // Message
));
}
// 2. Now that we aren't holding any `&dyn StdError`, we can safely .await
for (title, chain_str, msg) in error_reports {
let _ = send_discord_error_report(
title,
Some(chain_str),
Some("Pornzog Provider"),
Some(&msg),
file!(),
line!(),
module_path!(),
).await;
}
vec![]
}
}