fixes and cleanup

2026-03-05 18:18:48 +00:00
parent 76fd5a4f4f
commit 2627505ade
49 changed files with 3245 additions and 1376 deletions
--- a/src/providers/perverzija.rs
+++ b/src/providers/perverzija.rs
@@ -6,16 +6,15 @@ use crate::status::*;
 use crate::util::cache::VideoCache;
 use crate::util::time::parse_time_to_seconds;
 use crate::videos::ServerOptions;
-use crate::videos::{self, VideoEmbed, VideoItem};
+use crate::videos::{self, VideoItem};
 use async_trait::async_trait;
 use error_chain::error_chain;
 use futures::future::join_all;
 use htmlentity::entity::{ICodedDataTrait, decode};
 use serde::Deserialize;
 use serde::Serialize;
-use wreq::Version;
-use std::vec;
 use wreq::Client;
+use wreq::Version;
 use wreq_util::Emulation;

 error_chain! {
@@ -78,6 +77,66 @@ impl PerverzijaProvider {
        }
    }

+    fn extract_between<'a>(haystack: &'a str, start: &str, end: &str) -> Option<&'a str> {
+        let rest = haystack.split(start).nth(1)?;
+        Some(rest.split(end).next().unwrap_or_default())
+    }
+
+    fn extract_iframe_src(haystack: &str) -> String {
+        Self::extract_between(haystack, "iframe src=\"", "\"")
+            .or_else(|| Self::extract_between(haystack, "iframe src=&quot;", "&quot;"))
+            .unwrap_or_default()
+            .to_string()
+    }
+
+    fn extract_thumb(haystack: &str) -> String {
+        let img_segment = haystack.split("<img").nth(1).unwrap_or_default();
+        let mut thumb = Self::extract_between(img_segment, "data-original=\"", "\"")
+            .or_else(|| Self::extract_between(img_segment, "data-src=\"", "\""))
+            .or_else(|| Self::extract_between(img_segment, "src=\"", "\""))
+            .unwrap_or_default()
+            .to_string();
+
+        if thumb.starts_with("data:image") {
+            thumb.clear();
+        } else if thumb.starts_with("//") {
+            thumb = format!("https:{thumb}");
+        }
+
+        thumb
+    }
+
+    fn extract_title(haystack: &str) -> String {
+        let mut title = Self::extract_between(haystack, "<h4 class='gv-title'>", "</h4>")
+            .or_else(|| Self::extract_between(haystack, "<h4 class=\"gv-title\">", "</h4>"))
+            .or_else(|| Self::extract_between(haystack, " title='", "'"))
+            .or_else(|| Self::extract_between(haystack, " title=\"", "\""))
+            .unwrap_or_default()
+            .to_string();
+        title = decode(title.as_bytes()).to_string().unwrap_or(title);
+
+        if title.contains('<') && title.contains('>') {
+            let mut plain = String::new();
+            let mut in_tag = false;
+            for c in title.chars() {
+                match c {
+                    '<' => in_tag = true,
+                    '>' => in_tag = false,
+                    _ if !in_tag => plain.push(c),
+                    _ => {}
+                }
+            }
+            let normalized = plain.split_whitespace().collect::<Vec<&str>>().join(" ");
+            if !normalized.is_empty() {
+                title = normalized;
+            }
+        } else {
+            title = title.split_whitespace().collect::<Vec<&str>>().join(" ");
+        }
+
+        title.trim().to_string()
+    }
+
    async fn get(
        &self,
        cache: VideoCache,
@@ -204,93 +263,91 @@ impl PerverzijaProvider {

    fn get_video_items_from_html(&self, html: String, pool: DbPool) -> Vec<VideoItem> {
        if html.is_empty() {
-            println!("HTML is empty");
+            report_provider_error_background(
+                "perverzija",
+                "get_video_items_from_html.empty_html",
+                "empty html response",
+            );
            return vec![];
        }
        let mut items: Vec<VideoItem> = Vec::new();
-        let video_listing_content = html.split("video-listing-content").collect::<Vec<&str>>().get(1).copied().unwrap_or_default();
-        let raw_videos = video_listing_content
+        let video_listing_content = html.split("video-listing-content").nth(1).unwrap_or(&html);
+        let raw_videos: Vec<&str> = video_listing_content
            .split("video-item post")
-            .collect::<Vec<&str>>()[1..]
-            .to_vec();
-        for video_segment in &raw_videos {
-            let vid = video_segment.split("\n").collect::<Vec<&str>>();
-            if vid.len() > 20 || vid.len() < 8 {
+            .skip(1)
+            .collect();
+
+        if raw_videos.is_empty() {
+            report_provider_error_background(
+                "perverzija",
+                "get_video_items_from_html.no_segments",
+                &format!("html_len={}", html.len()),
+            );
+            return vec![];
+        }
+
+        for video_segment in raw_videos {
+            let title = Self::extract_title(video_segment);
+
+            let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
+                .or_else(|| Self::extract_between(video_segment, "data-embed=\"", "\""))
+                .unwrap_or_default()
+                .to_string();
+            let embed_html = decode(embed_html_raw.as_bytes())
+                .to_string()
+                .unwrap_or(embed_html_raw.clone());
+
+            let mut url_str = Self::extract_iframe_src(&embed_html);
+            if url_str.is_empty() {
+                url_str = Self::extract_iframe_src(video_segment);
+            }
+            if url_str.is_empty() {
                report_provider_error_background(
                    "perverzija",
-                    "get_video_items_from_html.snippet_shape",
-                    &format!("unexpected snippet length={}", vid.len()),
+                    "get_video_items_from_html.url_missing",
+                    "missing iframe src in segment",
                );
                continue;
            }
-            let line0 = vid.get(0).copied().unwrap_or_default();
-            let line1 = vid.get(1).copied().unwrap_or_default();
-            let line4 = vid.get(4).copied().unwrap_or_default();
-            let line6 = vid.get(6).copied().unwrap_or_default();
-            let line7 = vid.get(7).copied().unwrap_or_default();
-            // for (index, line) in vid.iter().enumerate() {
-            //     println!("Line {}: {}", index, line.to_string().trim());
-            // }
-            let mut title = line1.split(">").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                .split("<")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                .to_string();
-            // html decode
-            title = decode(title.as_bytes()).to_string().unwrap_or(title);
-            if !line1.contains("iframe src=&quot;") {
-                continue;
-            }
-            let url_str = line1.split("iframe src=&quot;").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                .split("&quot;")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                .to_string()
-                .replace("index.php", "xs1.php");
+            url_str = url_str.replace("index.php", "xs1.php");
            if url_str.starts_with("https://streamtape.com/") {
                continue; // Skip Streamtape links
            }
-            let id = url_str.split("data=").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                .split("&")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+
+            let id_url = Self::extract_between(video_segment, "data-url='", "'")
+                .or_else(|| Self::extract_between(video_segment, "data-url=\"", "\""))
+                .unwrap_or_default()
+                .to_string();
+
+            let mut id = url_str
+                .split("data=")
+                .nth(1)
+                .unwrap_or_default()
+                .split('&')
+                .next()
+                .unwrap_or_default()
+                .to_string();
+            if id.is_empty() {
+                id = id_url
+                    .trim_end_matches('/')
+                    .rsplit('/')
+                    .next()
+                    .unwrap_or_default()
+                    .to_string();
+            }
+
+            let raw_duration = Self::extract_between(video_segment, "time_dur\">", "<")
+                .or_else(|| Self::extract_between(video_segment, "class=\"time\">", "<"))
+                .unwrap_or("00:00")
                .to_string();
-            let raw_duration = match vid.len() {
-                10 => line6.split("time_dur\">").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                    .split("<")
-                    .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                    .to_string(),
-                _ => "00:00".to_string(),
-            };
            let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
-
-            if !line4.contains("srcset=")
-                && line4.split("src=\"").collect::<Vec<&str>>().len() == 1
-            {
-                for (index, line) in vid.iter().enumerate() {
-                    println!("Line {}: {}\n\n", index, line);
-                }
-            }
-
-            let mut thumb = "".to_string();
-            for v in vid.clone() {
-                let line = v.trim();
-                if line.starts_with("<img ") {
-                    thumb = line.split(" src=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                        .split("\"")
-                        .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                        .to_string();
-                }
-            }
-            let embed_html = line1.split("data-embed='").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                .split("'")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                .to_string();
-            let id_url = line1.split("data-url='").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                .split("'")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                .to_string();
+            let thumb = Self::extract_thumb(video_segment);

            match pool.get() {
                Ok(mut conn) => {
-                    let _ = db::insert_video(&mut conn, &id_url, &url_str);
+                    if !id_url.is_empty() {
+                        let _ = db::insert_video(&mut conn, &id_url, &url_str);
+                    }
                }
                Err(e) => {
                    report_provider_error_background(
@@ -301,26 +358,36 @@ impl PerverzijaProvider {
                }
            }
            let referer_url = "https://xtremestream.xyz/".to_string();
-            let embed = VideoEmbed::new(embed_html, url_str.clone());

-            let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
+            let mut tags: Vec<String> = Vec::new();

-            let studios_parts = line7.split("a href=\"").collect::<Vec<&str>>();
+            let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
            for studio in studios_parts.iter().skip(1) {
                if studio.starts_with("https://tube.perverzija.com/studio/") {
                    tags.push(
-                        studio.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+                        studio
+                            .split("/\"")
+                            .collect::<Vec<&str>>()
+                            .get(0)
+                            .copied()
+                            .unwrap_or_default()
                            .replace("https://tube.perverzija.com/studio/", "@studio:")
                            .to_string(),
                    );
                }
            }

-            for tag in line0.split(" ").collect::<Vec<&str>>() {
-                if tag.starts_with("stars-") {
-                    let tag_name = tag.split("stars-").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                        .split("\"")
-                        .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+            for tag in video_segment.split_whitespace() {
+                let token =
+                    tag.trim_matches(|c: char| c == '"' || c == '\'' || c == '>' || c == '<');
+                if token.starts_with("stars-") {
+                    let tag_name = token
+                        .split("stars-")
+                        .nth(1)
+                        .unwrap_or_default()
+                        .split('"')
+                        .next()
+                        .unwrap_or_default()
                        .to_string();
                    if !tag_name.is_empty() {
                        tags.push(format!("@stars:{}", tag_name));
@@ -328,9 +395,11 @@ impl PerverzijaProvider {
                }
            }

-            for tag in line0.split(" ").collect::<Vec<&str>>() {
-                if tag.starts_with("tag-") {
-                    let tag_name = tag.split("tag-").collect::<Vec<&str>>().get(1).copied().unwrap_or_default().to_string();
+            for tag in video_segment.split_whitespace() {
+                let token =
+                    tag.trim_matches(|c: char| c == '"' || c == '\'' || c == '>' || c == '<');
+                if token.starts_with("tag-") {
+                    let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
                    if !tag_name.is_empty() {
                        tags.push(tag_name.replace("-", " ").to_string());
                    }
@@ -339,7 +408,7 @@ impl PerverzijaProvider {
            let mut video_item = VideoItem::new(
                id,
                title,
-                embed.source.clone(),
+                url_str.clone(),
                "perverzija".to_string(),
                thumb,
                duration,
@@ -361,7 +430,15 @@ impl PerverzijaProvider {
    }

    async fn get_video_items_from_html_query(&self, html: String, pool: DbPool) -> Vec<VideoItem> {
-        let raw_videos = html.split("video-item post").collect::<Vec<&str>>()[1..].to_vec();
+        let raw_videos: Vec<&str> = html.split("video-item post").skip(1).collect();
+        if raw_videos.is_empty() {
+            report_provider_error_background(
+                "perverzija",
+                "get_video_items_from_html_query.no_segments",
+                &format!("html_len={}", html.len()),
+            );
+            return vec![];
+        }
        let futures = raw_videos
            .into_iter()
            .map(|el| self.get_video_item(el, pool.clone()));
@@ -372,53 +449,39 @@ impl PerverzijaProvider {
    }

    async fn get_video_item(&self, snippet: &str, pool: DbPool) -> Result<VideoItem> {
-        let vid = snippet.split("\n").collect::<Vec<&str>>();
-        if vid.len() > 30 || vid.len() < 7 {
+        if snippet.trim().is_empty() {
            report_provider_error_background(
                "perverzija",
-                "get_video_item.snippet_shape",
-                &format!("unexpected snippet length={}", vid.len()),
+                "get_video_item.empty_snippet",
+                "snippet is empty",
            );
-            return Err("Unexpected video snippet length".into());
+            return Err("empty snippet".into());
        }
-        let line5 = vid.get(5).copied().unwrap_or_default();
-        let line6 = vid.get(6).copied().unwrap_or_default();

-        let mut title = line5.split(" title=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-            .split("\"")
-            .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-            .to_string();
-        title = decode(title.as_bytes()).to_string().unwrap_or(title);
+        let title = Self::extract_title(snippet);

-        let thumb = match line6.split(" src=\"").collect::<Vec<&str>>().len() {
-            1 => {
-                for (index, line) in vid.iter().enumerate() {
-                    println!("Line {}: {}", index, line.to_string().trim());
-                }
-                return Err("Failed to parse thumbnail URL".into());
-            }
-            _ => line6.split(" src=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-                .split("\"")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
-                .to_string(),
-        };
+        let thumb = Self::extract_thumb(snippet);
        let duration = 0;

-        let lookup_url = line5.split(" href=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
-            .split("\"")
-            .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+        let lookup_url = Self::extract_between(snippet, " href=\"", "\"")
+            .or_else(|| Self::extract_between(snippet, "data-url='", "'"))
+            .unwrap_or_default()
            .to_string();
+        if lookup_url.is_empty() {
+            report_provider_error_background(
+                "perverzija",
+                "get_video_item.lookup_url_missing",
+                "missing lookup url in snippet",
+            );
+            return Err("Failed to parse lookup url".into());
+        }
        let referer_url = "https://xtremestream.xyz/".to_string();

        let mut conn = match pool.get() {
            Ok(conn) => conn,
            Err(e) => {
-                report_provider_error(
-                    "perverzija",
-                    "get_video_item.pool_get",
-                    &e.to_string(),
-                )
-                .await;
+                report_provider_error("perverzija", "get_video_item.pool_get", &e.to_string())
+                    .await;
                return Err("couldn't get db connection from pool".into());
            }
        };
@@ -433,9 +496,21 @@ impl PerverzijaProvider {
                    if url_str.starts_with("!") {
                        return Err("Video was removed".into());
                    }
-                    let mut id = url_str.split("data=").collect::<Vec<&str>>().get(1).copied().unwrap_or_default().to_string();
+                    let mut id = url_str
+                        .split("data=")
+                        .collect::<Vec<&str>>()
+                        .get(1)
+                        .copied()
+                        .unwrap_or_default()
+                        .to_string();
                    if id.contains("&") {
-                        id = id.split("&").collect::<Vec<&str>>().get(0).copied().unwrap_or_default().to_string()
+                        id = id
+                            .split("&")
+                            .collect::<Vec<&str>>()
+                            .get(0)
+                            .copied()
+                            .unwrap_or_default()
+                            .to_string()
                    }
                    let mut video_item = VideoItem::new(
                        id,
@@ -481,9 +556,17 @@ impl PerverzijaProvider {
            }
        };

-        let mut url_str = text.split("<iframe src=\"").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
+        let mut url_str = text
+            .split("<iframe src=\"")
+            .collect::<Vec<&str>>()
+            .get(1)
+            .copied()
+            .unwrap_or_default()
            .split("\"")
-            .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+            .collect::<Vec<&str>>()
+            .get(0)
+            .copied()
+            .unwrap_or_default()
            .to_string()
            .replace("index.php", "xs1.php");
        if !url_str.contains("xtremestream.xyz") {
@@ -494,15 +577,26 @@ impl PerverzijaProvider {

        let studios_parts = text
            .split("<strong>Studio: </strong>")
-            .collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
+            .collect::<Vec<&str>>()
+            .get(1)
+            .copied()
+            .unwrap_or_default()
            .split("</div>")
-            .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+            .collect::<Vec<&str>>()
+            .get(0)
+            .copied()
+            .unwrap_or_default()
            .split("<a href=\"")
            .collect::<Vec<&str>>();
        for studio in studios_parts.iter().skip(1) {
            if studio.starts_with("https://tube.perverzija.com/studio/") {
                tags.push(
-                    studio.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+                    studio
+                        .split("/\"")
+                        .collect::<Vec<&str>>()
+                        .get(0)
+                        .copied()
+                        .unwrap_or_default()
                        .replace("https://tube.perverzija.com/studio/", "@studio:")
                        .to_string(),
                );
@@ -511,15 +605,25 @@ impl PerverzijaProvider {
        if text.contains("<strong>Stars: </strong>") {
            let stars_parts: Vec<&str> = text
                .split("<strong>Stars: </strong>")
-                .collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
+                .collect::<Vec<&str>>()
+                .get(1)
+                .copied()
+                .unwrap_or_default()
                .split("</div>")
-                .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+                .collect::<Vec<&str>>()
+                .get(0)
+                .copied()
+                .unwrap_or_default()
                .split("<a href=\"")
                .collect::<Vec<&str>>();
            for star in stars_parts.iter().skip(1) {
                if star.starts_with("https://tube.perverzija.com/stars/") {
                    tags.push(
-                        star.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+                        star.split("/\"")
+                            .collect::<Vec<&str>>()
+                            .get(0)
+                            .copied()
+                            .unwrap_or_default()
                            .replace("https://tube.perverzija.com/stars/", "@stars:")
                            .to_string(),
                    );
@@ -527,15 +631,27 @@ impl PerverzijaProvider {
            }
        }

-        let tags_parts: Vec<&str> = text.split("<strong>Tags: </strong>").collect::<Vec<&str>>().get(1).copied().unwrap_or_default()
+        let tags_parts: Vec<&str> = text
+            .split("<strong>Tags: </strong>")
+            .collect::<Vec<&str>>()
+            .get(1)
+            .copied()
+            .unwrap_or_default()
            .split("</div>")
-            .collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+            .collect::<Vec<&str>>()
+            .get(0)
+            .copied()
+            .unwrap_or_default()
            .split("<a href=\"")
            .collect::<Vec<&str>>();
        for star in tags_parts.iter().skip(1) {
            if star.starts_with("https://tube.perverzija.com/stars/") {
                tags.push(
-                    star.split("/\"").collect::<Vec<&str>>().get(0).copied().unwrap_or_default()
+                    star.split("/\"")
+                        .collect::<Vec<&str>>()
+                        .get(0)
+                        .copied()
+                        .unwrap_or_default()
                        .replace("https://tube.perverzija.com/stars/", "@stars:")
                        .to_string(),
                );
@@ -574,9 +690,21 @@ impl PerverzijaProvider {
        if !url_str.contains("xtremestream.xyz") {
            return Err("Video URL does not contain xtremestream.xyz".into());
        }
-        let mut id = url_str.split("data=").collect::<Vec<&str>>().get(1).copied().unwrap_or_default().to_string();
+        let mut id = url_str
+            .split("data=")
+            .collect::<Vec<&str>>()
+            .get(1)
+            .copied()
+            .unwrap_or_default()
+            .to_string();
        if id.contains("&") {
-            id = id.split("&").collect::<Vec<&str>>().get(0).copied().unwrap_or_default().to_string()
+            id = id
+                .split("&")
+                .collect::<Vec<&str>>()
+                .get(0)
+                .copied()
+                .unwrap_or_default()
+                .to_string()
        }
        // if !vid[6].contains(" src=\""){
        //     for (index,line) in vid.iter().enumerate() {