hottub/src/proxies/doodstream.rs

use ntex::web;
use regex::{Captures, Regex};
use url::Url;

use crate::util::requester::Requester;

#[derive(Debug, Clone)]
pub struct DoodstreamProxy {}

impl DoodstreamProxy {
    const ROOT_REFERER: &'static str = "https://turboplayers.xyz/";

    pub fn new() -> Self {
        Self {}
    }

    fn normalize_detail_url(endpoint: &str) -> Option<String> {
        let normalized = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
            endpoint.trim().to_string()
        } else {
            format!("https://{}", endpoint.trim_start_matches('/'))
        };

        Self::is_allowed_detail_url(&normalized).then_some(normalized)
    }

    fn is_allowed_host(host: &str) -> bool {
        matches!(
            host,
            "turboplayers.xyz"
                | "www.turboplayers.xyz"
                | "trailerhg.xyz"
                | "www.trailerhg.xyz"
                | "streamhg.com"
                | "www.streamhg.com"
        )
    }

    fn is_allowed_detail_url(url: &str) -> bool {
        let Some(url) = Url::parse(url).ok() else {
            return false;
        };
        if url.scheme() != "https" {
            return false;
        }
        let Some(host) = url.host_str() else {
            return false;
        };
        if !Self::is_allowed_host(host) {
            return false;
        }

        url.path().starts_with("/t/")
            || url.path().starts_with("/e/")
            || url.path().starts_with("/d/")
    }

    fn request_headers(detail_url: &str) -> Vec<(String, String)> {
        vec![
            ("Referer".to_string(), Self::ROOT_REFERER.to_string()),
            ("Origin".to_string(), "https://turboplayers.xyz".to_string()),
            (
                "Accept".to_string(),
                "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8".to_string(),
            ),
            ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()),
            (
                "Sec-Fetch-Site".to_string(),
                if detail_url.contains("trailerhg.xyz") {
                    "cross-site".to_string()
                } else {
                    "same-origin".to_string()
                },
            ),
        ]
    }

    fn regex(pattern: &str) -> Option<Regex> {
        Regex::new(pattern).ok()
    }

    fn decode_base36(token: &str) -> Option<usize> {
        usize::from_str_radix(token, 36).ok()
    }

    fn sanitize_media_url(url: &str) -> String {
        url.trim()
            .trim_end_matches('\\')
            .trim_end_matches('"')
            .trim_end_matches('\'')
            .to_string()
    }

    fn extract_literal_url(text: &str) -> Option<String> {
        let direct_patterns = [
            r#"urlPlay\s*=\s*'(?P<url>https?://[^']+)'"#,
            r#"data-hash\s*=\s*"(?P<url>https?://[^"]+)""#,
            r#""(?P<url>https?://[^"]+\.(?:m3u8|mp4)(?:\?[^"]*)?)""#,
            r#"'(?P<url>https?://[^']+\.(?:m3u8|mp4)(?:\?[^']*)?)'"#,
        ];

        for pattern in direct_patterns {
            let Some(regex) = Self::regex(pattern) else {
                continue;
            };
            if let Some(url) = regex
                .captures(text)
                .and_then(|captures| captures.name("url"))
                .map(|value| Self::sanitize_media_url(value.as_str()))
            {
                return Some(url);
            }
        }

        None
    }

    fn extract_packed_eval_args(text: &str) -> Option<(String, usize, usize, Vec<String>)> {
        let regex = Self::regex(
            r#"eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>(?:\\'|\\\\|[^'])*)',(?P<radix>\d+),(?P<count>\d+),'(?P<symbols>(?:\\'|\\\\|[^'])*)'\.split\('\|'\)"#,
        )?;
        let captures = regex.captures(text)?;
        let payload = Self::decode_js_single_quoted(captures.name("payload")?.as_str());
        let radix = captures.name("radix")?.as_str().parse::<usize>().ok()?;
        let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
        let symbols = Self::decode_js_single_quoted(captures.name("symbols")?.as_str());
        let parts = symbols.split('|').map(|value| value.to_string()).collect();
        Some((payload, radix, count, parts))
    }

    fn decode_js_single_quoted(value: &str) -> String {
        let mut result = String::with_capacity(value.len());
        let mut chars = value.chars();

        while let Some(ch) = chars.next() {
            if ch != '\\' {
                result.push(ch);
                continue;
            }

            match chars.next() {
                Some('\\') => result.push('\\'),
                Some('\'') => result.push('\''),
                Some('"') => result.push('"'),
                Some('n') => result.push('\n'),
                Some('r') => result.push('\r'),
                Some('t') => result.push('\t'),
                Some(other) => {
                    result.push('\\');
                    result.push(other);
                }
                None => result.push('\\'),
            }
        }

        result
    }

    fn unpack_packer(text: &str) -> Option<String> {
        let (mut payload, radix, count, symbols) = Self::extract_packed_eval_args(text)?;
        if radix != 36 {
            return None;
        }

        let token_regex = Self::regex(r"\b[0-9a-z]+\b")?;
        payload = token_regex
            .replace_all(&payload, |captures: &Captures| {
                let token = captures
                    .get(0)
                    .map(|value| value.as_str())
                    .unwrap_or_default();
                let Some(index) = Self::decode_base36(token) else {
                    return token.to_string();
                };
                if index >= count {
                    return token.to_string();
                }
                let replacement = symbols.get(index).map(|value| value.as_str()).unwrap_or("");
                if replacement.is_empty() {
                    token.to_string()
                } else {
                    replacement.to_string()
                }
            })
            .to_string();

        Some(payload)
    }

    fn collect_media_candidates(text: &str) -> Vec<String> {
        let Some(regex) = Self::regex(r#"https?://[^\s"'<>]+?\.(?:m3u8|mp4|txt)(?:\?[^\s"'<>]*)?"#)
        else {
            return vec![];
        };

        let mut urls = regex
            .find_iter(text)
            .map(|value| Self::sanitize_media_url(value.as_str()))
            .filter(|url| url.starts_with("https://"))
            .collect::<Vec<_>>();

        urls.sort_by_key(|url| {
            if url.contains(".m3u8") {
                0
            } else if url.contains(".mp4") {
                1
            } else {
                2
            }
        });
        urls.dedup();
        urls
    }

    fn extract_stream_url(text: &str) -> Option<String> {
        if let Some(url) = Self::extract_literal_url(text) {
            return Some(url);
        }

        let unpacked = Self::unpack_packer(text)?;
        Self::collect_media_candidates(&unpacked)
            .into_iter()
            .next()
            .or_else(|| Self::extract_literal_url(&unpacked))
    }
}

impl crate::proxies::Proxy for DoodstreamProxy {
    async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
        let Some(detail_url) = Self::normalize_detail_url(&url) else {
            return String::new();
        };

        let mut requester = requester.get_ref().clone();
        let html = match requester
            .get_with_headers(&detail_url, Self::request_headers(&detail_url), None)
            .await
        {
            Ok(text) => text,
            Err(_) => return String::new(),
        };

        Self::extract_stream_url(&html).unwrap_or_default()
    }
}

#[cfg(test)]
mod tests {
    use super::DoodstreamProxy;

    #[test]
    fn allows_only_known_doodstream_hosts() {
        assert!(DoodstreamProxy::is_allowed_detail_url(
            "https://turboplayers.xyz/t/69bdfb21cc640"
        ));
        assert!(DoodstreamProxy::is_allowed_detail_url(
            "https://trailerhg.xyz/e/ttdc7a6qpskt"
        ));
        assert!(!DoodstreamProxy::is_allowed_detail_url(
            "http://turboplayers.xyz/t/69bdfb21cc640"
        ));
        assert!(!DoodstreamProxy::is_allowed_detail_url(
            "https://example.com/t/69bdfb21cc640"
        ));
    }

    #[test]
    fn extracts_clear_hls_url_from_turboplayers_layout() {
        let html = r#"
            <div id="video_player" data-hash="https://cdn4.turboviplay.com/data1/69bdfa8ce1f4d/69bdfa8ce1f4d.m3u8"></div>
            <script>
                var urlPlay = 'https://cdn4.turboviplay.com/data1/69bdfa8ce1f4d/69bdfa8ce1f4d.m3u8';
            </script>
        "#;

        assert_eq!(
            DoodstreamProxy::extract_stream_url(html).as_deref(),
            Some("https://cdn4.turboviplay.com/data1/69bdfa8ce1f4d/69bdfa8ce1f4d.m3u8")
        );
    }

    #[test]
    fn unpacks_streamhg_style_player_config() {
        let html = r#"
            <script type='text/javascript'>
            eval(function(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}('0 1={\"2\":\"https://cdn.example/master.m3u8?t=1\",\"3\":\"https://cdn.example/master.txt\"};4(\"5\").6({7:[{8:1.2,9:\"a\"}]});',36,11,'var|links|hls2|hls3|jwplayer|vplayer|setup|sources|file|type|hls'.split('|')))
            </script>
        "#;

        assert_eq!(
            DoodstreamProxy::extract_stream_url(html).as_deref(),
            Some("https://cdn.example/master.m3u8?t=1")
        );
    }
}