fixes and cleanup

This commit is contained in:
Simon
2026-03-05 18:18:48 +00:00
parent 76fd5a4f4f
commit 2627505ade
49 changed files with 3245 additions and 1376 deletions

View File

@@ -6,16 +6,15 @@ use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::ServerOptions;
use crate::videos::{self, VideoEmbed, VideoItem};
use crate::videos::{self, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use serde::Deserialize;
use serde::Serialize;
use wreq::Version;
use std::vec;
use wreq::Client;
use wreq::Version;
use wreq_util::Emulation;
error_chain! {
@@ -78,6 +77,66 @@ impl PerverzijaProvider {
}
}
fn extract_between<'a>(haystack: &'a str, start: &str, end: &str) -> Option<&'a str> {
let rest = haystack.split(start).nth(1)?;
Some(rest.split(end).next().unwrap_or_default())
}
fn extract_iframe_src(haystack: &str) -> String {
Self::extract_between(haystack, "iframe src=\"", "\"")
.or_else(|| Self::extract_between(haystack, "iframe src=&quot;", "&quot;"))
.unwrap_or_default()
.to_string()
}
fn extract_thumb(haystack: &str) -> String {
let img_segment = haystack.split("<img").nth(1).unwrap_or_default();
let mut thumb = Self::extract_between(img_segment, "data-original=\"", "\"")
.or_else(|| Self::extract_between(img_segment, "data-src=\"", "\""))
.or_else(|| Self::extract_between(img_segment, "src=\"", "\""))
.unwrap_or_default()
.to_string();
if thumb.starts_with("data:image") {
thumb.clear();
} else if thumb.starts_with("//") {
thumb = format!("https:{thumb}");
}
thumb
}
fn extract_title(haystack: &str) -> String {
let mut title = Self::extract_between(haystack, "<h4 class='gv-title'>", "</h4>")
.or_else(|| Self::extract_between(haystack, "<h4 class=\"gv-title\">", "</h4>"))
.or_else(|| Self::extract_between(haystack, " title='", "'"))
.or_else(|| Self::extract_between(haystack, " title=\"", "\""))
.unwrap_or_default()
.to_string();
title = decode(title.as_bytes()).to_string().unwrap_or(title);
if title.contains('<') && title.contains('>') {
let mut plain = String::new();
let mut in_tag = false;
for c in title.chars() {
match c {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag => plain.push(c),
_ => {}
}
}
let normalized = plain.split_whitespace().collect::<Vec<&str>>().join(" ");
if !normalized.is_empty() {
title = normalized;
}
} else {
title = title.split_whitespace().collect::<Vec<&str>>().join(" ");
}
title.trim().to_string()
}
async fn get(
&self,
cache: VideoCache,
@@ -204,93 +263,91 @@ impl PerverzijaProvider {
fn get_video_items_from_html(&self, html: String, pool: DbPool) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
report_provider_error_background(
"perverzija",
"get_video_items_from_html.empty_html",
"empty html response",
);
return vec![];
}
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html.split("video-listing-content").collect::<Vec<&str>>().get(1).copied().unwrap_or_default();
let raw_videos = video_listing_content
let video_listing_content = html.split("video-listing-content").nth(1).unwrap_or(&html);
let raw_videos: Vec<&str> = video_listing_content
.split("video-item post")
.collect::<Vec<&str>>()[1..]
.to_vec();
for video_segment in &raw_videos {
let vid = video_segment.split("\n").collect::<Vec<&str>>();
if vid.len() > 20 || vid.len() < 8 {
.skip(1)
.collect();
if raw_videos.is_empty() {
report_provider_error_background(
"perverzija",
"get_video_items_from_html.no_segments",
&format!("html_len={}", html.len()),
);
return vec![];
}
for video_segment in raw_videos {
let title = Self::extract_title(video_segment);
let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
.or_else(|| Self::extract_between(video_segment, "data-embed=\"", "\""))
.unwrap_or_default()
.to_string();
let embed_html = decode(embed_html_raw.as_bytes())
.to_string()
.unwrap_or(embed_html_raw.clone());
let mut url_str = Self::extract_iframe_src(&embed_html);
if url_str.is_empty() {
url_str = Self::extract_iframe_src(video_segment);
}
if url_str.is_empty() {
report_provider_error_background(
"perverzija",
"get_video_items_from_html.snippet_shape",
&format!("unexpected snippet length={}", vid.len()),
"get_video_items_from_html.url_missing",
"missing iframe src in segment",
);
continue;
}
let line0 = vid.get(0).copied().unwrap_or_default();
let line1 = vid.get(1).copied().unwrap_or_default();
let line4 = vid.get(4).copied().unwrap_or_default();
let line6 = vid.get(6).copied().unwrap_or_default();
let line7 = vid.get(7).copied().unwrap_or_default();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line.to_string().trim());
// }
let mut title = line1.split(">").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("<")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string();
// html decode
title = decode(title.as_bytes()).to_string().unwrap_or(title);
if !line1.contains("iframe src=&quot;") {
continue;
}
let url_str = line1.split("iframe src=&quot;").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("&quot;")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string()
.replace("index.php", "xs1.php");
url_str = url_str.replace("index.php", "xs1.php");
if url_str.starts_with("https://streamtape.com/") {
continue; // Skip Streamtape links
}
let id = url_str.split("data=").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("&")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
let id_url = Self::extract_between(video_segment, "data-url='", "'")
.or_else(|| Self::extract_between(video_segment, "data-url=\"", "\""))
.unwrap_or_default()
.to_string();
let mut id = url_str
.split("data=")
.nth(1)
.unwrap_or_default()
.split('&')
.next()
.unwrap_or_default()
.to_string();
if id.is_empty() {
id = id_url
.trim_end_matches('/')
.rsplit('/')
.next()
.unwrap_or_default()
.to_string();
}
let raw_duration = Self::extract_between(video_segment, "time_dur\">", "<")
.or_else(|| Self::extract_between(video_segment, "class=\"time\">", "<"))
.unwrap_or("00:00")
.to_string();
let raw_duration = match vid.len() {
10 => line6.split("time_dur\">").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("<")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string(),
_ => "00:00".to_string(),
};
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
if !line4.contains("srcset=")
&& line4.split("src=\"").collect::<Vec<&str>>().len() == 1
{
for (index, line) in vid.iter().enumerate() {
println!("Line {}: {}\n\n", index, line);
}
}
let mut thumb = "".to_string();
for v in vid.clone() {
let line = v.trim();
if line.starts_with("<img ") {
thumb = line.split(" src=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string();
}
}
let embed_html = line1.split("data-embed='").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("'")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string();
let id_url = line1.split("data-url='").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("'")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string();
let thumb = Self::extract_thumb(video_segment);
match pool.get() {
Ok(mut conn) => {
let _ = db::insert_video(&mut conn, &id_url, &url_str);
if !id_url.is_empty() {
let _ = db::insert_video(&mut conn, &id_url, &url_str);
}
}
Err(e) => {
report_provider_error_background(
@@ -301,26 +358,36 @@ impl PerverzijaProvider {
}
}
let referer_url = "https://xtremestream.xyz/".to_string();
let embed = VideoEmbed::new(embed_html, url_str.clone());
let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
let mut tags: Vec<String> = Vec::new();
let studios_parts = line7.split("a href=\"").collect::<Vec<&str>>();
let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
);
}
}
for tag in line0.split(" ").collect::<Vec<&str>>() {
if tag.starts_with("stars-") {
let tag_name = tag.split("stars-").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
for tag in video_segment.split_whitespace() {
let token =
tag.trim_matches(|c: char| c == '"' || c == '\'' || c == '>' || c == '<');
if token.starts_with("stars-") {
let tag_name = token
.split("stars-")
.nth(1)
.unwrap_or_default()
.split('"')
.next()
.unwrap_or_default()
.to_string();
if !tag_name.is_empty() {
tags.push(format!("@stars:{}", tag_name));
@@ -328,9 +395,11 @@ impl PerverzijaProvider {
}
}
for tag in line0.split(" ").collect::<Vec<&str>>() {
if tag.starts_with("tag-") {
let tag_name = tag.split("tag-").collect::<Vec<&str>>().get(1).copied().unwrap_or_default().to_string();
for tag in video_segment.split_whitespace() {
let token =
tag.trim_matches(|c: char| c == '"' || c == '\'' || c == '>' || c == '<');
if token.starts_with("tag-") {
let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
if !tag_name.is_empty() {
tags.push(tag_name.replace("-", " ").to_string());
}
@@ -339,7 +408,7 @@ impl PerverzijaProvider {
let mut video_item = VideoItem::new(
id,
title,
embed.source.clone(),
url_str.clone(),
"perverzija".to_string(),
thumb,
duration,
@@ -361,7 +430,15 @@ impl PerverzijaProvider {
}
async fn get_video_items_from_html_query(&self, html: String, pool: DbPool) -> Vec<VideoItem> {
let raw_videos = html.split("video-item post").collect::<Vec<&str>>()[1..].to_vec();
let raw_videos: Vec<&str> = html.split("video-item post").skip(1).collect();
if raw_videos.is_empty() {
report_provider_error_background(
"perverzija",
"get_video_items_from_html_query.no_segments",
&format!("html_len={}", html.len()),
);
return vec![];
}
let futures = raw_videos
.into_iter()
.map(|el| self.get_video_item(el, pool.clone()));
@@ -372,53 +449,39 @@ impl PerverzijaProvider {
}
async fn get_video_item(&self, snippet: &str, pool: DbPool) -> Result<VideoItem> {
let vid = snippet.split("\n").collect::<Vec<&str>>();
if vid.len() > 30 || vid.len() < 7 {
if snippet.trim().is_empty() {
report_provider_error_background(
"perverzija",
"get_video_item.snippet_shape",
&format!("unexpected snippet length={}", vid.len()),
"get_video_item.empty_snippet",
"snippet is empty",
);
return Err("Unexpected video snippet length".into());
return Err("empty snippet".into());
}
let line5 = vid.get(5).copied().unwrap_or_default();
let line6 = vid.get(6).copied().unwrap_or_default();
let mut title = line5.split(" title=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string();
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let title = Self::extract_title(snippet);
let thumb = match line6.split(" src=\"").collect::<Vec<&str>>().len() {
1 => {
for (index, line) in vid.iter().enumerate() {
println!("Line {}: {}", index, line.to_string().trim());
}
return Err("Failed to parse thumbnail URL".into());
}
_ => line6.split(" src=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.to_string(),
};
let thumb = Self::extract_thumb(snippet);
let duration = 0;
let lookup_url = line5.split(" href=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
let lookup_url = Self::extract_between(snippet, " href=\"", "\"")
.or_else(|| Self::extract_between(snippet, "data-url='", "'"))
.unwrap_or_default()
.to_string();
if lookup_url.is_empty() {
report_provider_error_background(
"perverzija",
"get_video_item.lookup_url_missing",
"missing lookup url in snippet",
);
return Err("Failed to parse lookup url".into());
}
let referer_url = "https://xtremestream.xyz/".to_string();
let mut conn = match pool.get() {
Ok(conn) => conn,
Err(e) => {
report_provider_error(
"perverzija",
"get_video_item.pool_get",
&e.to_string(),
)
.await;
report_provider_error("perverzija", "get_video_item.pool_get", &e.to_string())
.await;
return Err("couldn't get db connection from pool".into());
}
};
@@ -433,9 +496,21 @@ impl PerverzijaProvider {
if url_str.starts_with("!") {
return Err("Video was removed".into());
}
let mut id = url_str.split("data=").collect::<Vec<&str>>().get(1).copied().unwrap_or_default().to_string();
let mut id = url_str
.split("data=")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.to_string();
if id.contains("&") {
id = id.split("&").collect::<Vec<&str>>().get(0).copied().unwrap_or_default().to_string()
id = id
.split("&")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.to_string()
}
let mut video_item = VideoItem::new(
id,
@@ -481,9 +556,17 @@ impl PerverzijaProvider {
}
};
let mut url_str = text.split("<iframe src=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
let mut url_str = text
.split("<iframe src=\"")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("\"")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.to_string()
.replace("index.php", "xs1.php");
if !url_str.contains("xtremestream.xyz") {
@@ -494,15 +577,26 @@ impl PerverzijaProvider {
let studios_parts = text
.split("<strong>Studio: </strong>")
.collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for studio in studios_parts.iter().skip(1) {
if studio.starts_with("https://tube.perverzija.com/studio/") {
tags.push(
studio.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
studio
.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/studio/", "@studio:")
.to_string(),
);
@@ -511,15 +605,25 @@ impl PerverzijaProvider {
if text.contains("<strong>Stars: </strong>") {
let stars_parts: Vec<&str> = text
.split("<strong>Stars: </strong>")
.collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in stars_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
@@ -527,15 +631,27 @@ impl PerverzijaProvider {
}
}
let tags_parts: Vec<&str> = text.split("<strong>Tags: </strong>").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
let tags_parts: Vec<&str> = text
.split("<strong>Tags: </strong>")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.split("<a href=\"")
.collect::<Vec<&str>>();
for star in tags_parts.iter().skip(1) {
if star.starts_with("https://tube.perverzija.com/stars/") {
tags.push(
star.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
star.split("/\"")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.replace("https://tube.perverzija.com/stars/", "@stars:")
.to_string(),
);
@@ -574,9 +690,21 @@ impl PerverzijaProvider {
if !url_str.contains("xtremestream.xyz") {
return Err("Video URL does not contain xtremestream.xyz".into());
}
let mut id = url_str.split("data=").collect::<Vec<&str>>().get(1).copied().unwrap_or_default().to_string();
let mut id = url_str
.split("data=")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.to_string();
if id.contains("&") {
id = id.split("&").collect::<Vec<&str>>().get(0).copied().unwrap_or_default().to_string()
id = id
.split("&")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.to_string()
}
// if !vid[6].contains(" src=\""){
// for (index,line) in vid.iter().enumerate() {