This commit is contained in:
Simon
2026-01-02 14:55:13 +00:00
parent 27bb3daec4
commit 89eecbe790
6 changed files with 692 additions and 552 deletions

View File

@@ -1,6 +1,7 @@
use crate::DbPool;
use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::ServerOptions;
@@ -17,6 +18,12 @@ error_chain! {
HttpRequest(wreq::Error);
JsonError(serde_json::Error);
}
errors {
Parse(msg: String) {
description("html parse error")
display("html parse error: {}", msg)
}
}
}
// fn has_blacklisted_class(element: &ElementRef, blacklist: &[&str]) -> bool {
@@ -83,9 +90,19 @@ impl SxyprnProvider {
let text = requester.get(&url_str, None).await.unwrap();
// Pass a reference to options if needed, or reconstruct as needed
let video_items: Vec<VideoItem> = self
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester)
.await;
.await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
@@ -135,9 +152,28 @@ impl SxyprnProvider {
}
};
let text = requester.get(&url_str, None).await.unwrap();
let video_items: Vec<VideoItem> = self
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester)
.await;
.await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
let _ = futures::executor::block_on(send_discord_error_report(
&e,
Some("Sxyprn Provider"),
Some(format!("Failed to query videos:\nURL: {}\nQuery: {},", url_str, query).as_str()),
file!(),
line!(),
module_path!(),));
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
@@ -150,41 +186,50 @@ impl SxyprnProvider {
async fn get_video_items_from_html(
&self,
html: String,
pool: DbPool,
requester: Requester,
) -> Vec<VideoItem> {
let _ = requester;
let _ = pool;
_pool: DbPool,
_requester: Requester,
) -> Result<Vec<VideoItem>> {
if html.is_empty() {
println!("HTML is empty");
return vec![];
return Ok(vec![]);
}
let raw_videos = html.split("<script async").collect::<Vec<&str>>()[0]
.split("post_el_small'")
.collect::<Vec<&str>>()[1..]
.to_vec();
let mut items: Vec<VideoItem> = Vec::new();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line.to_string().trim());
// }
// println!("\n\n\n");
let url = video_segment.split("/post/").collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]
// take content before "<script async"
let before_script = html
.split("<script async")
.next()
.ok_or_else(|| ErrorKind::Parse("missing '<script async' split point".into()))?;
// split into video segments (skip the first chunk)
let raw_videos: Vec<&str> = before_script.split("post_el_small'").skip(1).collect();
if raw_videos.is_empty() {
return Err(ErrorKind::Parse("no 'post_el_small\\'' segments found".into()).into());
}
let mut items = Vec::new();
for video_segment in raw_videos {
// url id
let url = video_segment
.split("/post/")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract /post/ url".into()))?
.to_string();
let video_url = format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", url);
let title_parts = video_segment.split("post_text").collect::<Vec<&str>>()[1]
.split("style=''>")
.collect::<Vec<&str>>()[1]
.split("</div>")
.collect::<Vec<&str>>()[0];
// title parts
let title_parts = video_segment
.split("post_text")
.nth(1)
.and_then(|s| s.split("style=''>").nth(1))
.and_then(|s| s.split("</div>").next())
.ok_or_else(|| ErrorKind::Parse("failed to extract title_parts".into()))?;
let document = Html::parse_document(title_parts);
let selector = Selector::parse("*").unwrap();
let selector = Selector::parse("*")
.map_err(|e| ErrorKind::Parse(format!("selector parse failed: {e}")))?;
let mut texts = Vec::new();
for element in document.select(&selector) {
@@ -193,101 +238,96 @@ impl SxyprnProvider {
texts.push(text.trim().to_string());
}
}
let mut title = texts[0].clone();
// html decode
let mut title = texts.join(" ");
title = decode(title.as_bytes())
.to_string()
.unwrap_or(title)
.replace(" ", " ");
title = title
.replace("\n", "")
.replace('\n', "")
.replace(" + ", " ")
.replace(" ", " ")
.trim().to_string();
.trim()
.to_string();
if title.to_ascii_lowercase().starts_with("new ") {
title = title[4..].to_string();
}
// println!("Title: {}", title);
let id = video_url.split("/").collect::<Vec<&str>>()[6]
.split("?")
.collect::<Vec<&str>>()[0]
// id (DON'T index [6])
let id = video_url
.split('/')
.last()
.ok_or_else(|| ErrorKind::Parse("failed to extract id from video_url".into()))?
.split('?')
.next()
.unwrap_or("")
.to_string();
let thumb = format!(
"https:{}",
video_segment
.split("<img class='mini_post_vid_thumb lazyload'")
.collect::<Vec<&str>>()[1]
.split("data-src='")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]
.to_string()
);
// thumb
let thumb_path = video_segment
.split("<img class='mini_post_vid_thumb lazyload'")
.nth(1)
.and_then(|s| s.split("data-src='").nth(1))
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract thumb".into()))?;
let preview = match video_segment.contains("class='hvp_player'") {
true => Some(format!(
let thumb = format!("https:{thumb_path}");
// preview
let preview = if video_segment.contains("class='hvp_player'") {
Some(format!(
"https:{}",
video_segment
.split("class='hvp_player'")
.collect::<Vec<&str>>()[1]
.split(" src='")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]
.to_string()
)),
false => None,
.nth(1)
.and_then(|s| s.split(" src='").nth(1))
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract preview src".into()))?
))
} else {
None
};
// views
let views = video_segment
.split("<strong>·</strong> ")
.collect::<Vec<&str>>()[1]
.split(" ")
.collect::<Vec<&str>>()[0]
.nth(1)
.and_then(|s| s.split_whitespace().next())
.ok_or_else(|| ErrorKind::Parse("failed to extract views".into()))?
.to_string();
let raw_duration = video_segment.split("duration_small").collect::<Vec<&str>>()[1]
.split("title='")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[1]
.split(">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
// duration
let raw_duration = video_segment
.split("duration_small")
.nth(1)
.and_then(|s| s.split("title='").nth(1))
.and_then(|s| s.split('\'').nth(1))
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract duration".into()))?
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
// stream urls (your filter condition looks suspicious; leaving as-is)
let stream_urls = video_segment
.split("extlink_icon extlink")
.collect::<Vec<&str>>()
.iter()
.map(|part| {
let url = part
.split("href='")
.collect::<Vec<&str>>()
.filter_map(|part| {
part.split("href='")
.last()
.unwrap_or(&"")
.split("'")
.collect::<Vec<&str>>()[0]
.to_string();
url
})
.filter(|url| {
url.starts_with("http")
&& !url.starts_with("https://bigwarp.io/")
&& !url.starts_with("https://doodstream.com/")
&& !url.starts_with("https://strmup.")
&& !url.starts_with("https://streamtape.com/")
&& !url.starts_with("https://streamvid.net/")
&& !url.starts_with("https://vtbe.")
.and_then(|s| s.split('\'').next())
.map(|u| u.to_string())
})
.filter(|url| url.starts_with("http") || !url.starts_with("https://lulustream."))
.collect::<Vec<String>>();
let video_item_url = match stream_urls.first() {
Some(u) => u.clone(),
None => format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", id), //video_url.clone(),
};
let video_item_url = stream_urls.first().cloned().unwrap_or_else(|| {
format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", id)
});
let mut video_item = VideoItem::new(
id,
title,
@@ -297,12 +337,15 @@ impl SxyprnProvider {
duration,
)
.views(views.parse::<u32>().unwrap_or(0));
if let Some(p) = preview {
video_item = video_item.preview(p);
}
items.push(video_item);
}
return items;
Ok(items)
}
}