hottub/src/proxies/heavyfetish.rs

use std::collections::HashMap;

use ntex::web;
use regex::Regex;
use scraper::{Html, Selector};
use url::Url;

use crate::util::requester::Requester;

#[derive(Debug, Clone)]
pub struct HeavyfetishProxy {}

impl HeavyfetishProxy {
    pub fn new() -> Self {
        Self {}
    }

    fn normalize_detail_url(endpoint: &str) -> Option<String> {
        let endpoint = endpoint.trim().trim_start_matches('/');
        if endpoint.is_empty() {
            return None;
        }

        let detail_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
            endpoint.to_string()
        } else {
            format!("https://{}", endpoint.trim_start_matches('/'))
        };

        Self::is_allowed_detail_url(&detail_url).then_some(detail_url)
    }

    fn is_allowed_detail_url(url: &str) -> bool {
        let Some(parsed) = Url::parse(url).ok() else {
            return false;
        };
        if parsed.scheme() != "https" {
            return false;
        }
        let Some(host) = parsed.host_str() else {
            return false;
        };
        (host == "heavyfetish.com" || host == "www.heavyfetish.com")
            && parsed.path().starts_with("/videos/")
    }

    fn normalize_url(raw: &str) -> String {
        let value = raw.trim().replace("\\/", "/");
        if value.is_empty() {
            return String::new();
        }
        if value.starts_with("//") {
            return format!("https:{value}");
        }
        if value.starts_with('/') {
            return format!("https://heavyfetish.com{value}");
        }
        if value.starts_with("http://") {
            return value.replacen("http://", "https://", 1);
        }
        value
    }

    fn quality_from_url(url: &str) -> String {
        for quality in ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"] {
            if url.contains(quality) {
                return quality.to_string();
            }
        }
        "480p".to_string()
    }

    fn quality_score(label: &str) -> u32 {
        label
            .chars()
            .filter(|value| value.is_ascii_digit())
            .collect::<String>()
            .parse::<u32>()
            .unwrap_or(0)
    }

    fn regex(value: &str) -> Option<Regex> {
        Regex::new(value).ok()
    }

    fn extract_js_value(block: &str, regex: &Regex) -> Option<String> {
        regex
            .captures(block)
            .and_then(|captures| captures.get(1))
            .map(|value| value.as_str().replace("\\/", "/").replace("\\'", "'"))
    }

    fn selector(value: &str) -> Option<Selector> {
        Selector::parse(value).ok()
    }

    fn extract_source_url(html: &str) -> Option<String> {
        let flashvars_regex = Self::regex(r#"(?s)var\s+flashvars\s*=\s*\{(.*?)\};"#)?;
        let value_regex = |key: &str| Self::regex(&format!(r#"{key}:\s*'((?:\\'|[^'])*)'"#));

        let mut seen = HashMap::<String, String>::new();
        if let Some(flashvars) = flashvars_regex
            .captures(html)
            .and_then(|value| value.get(1))
            .map(|value| value.as_str().to_string())
        {
            for key in ["video_alt_url2", "video_alt_url", "video_url"] {
                let Some(url_regex) = value_regex(key) else {
                    continue;
                };
                let Some(text_regex) = value_regex(&format!("{key}_text")) else {
                    continue;
                };

                let Some(url) = Self::extract_js_value(&flashvars, &url_regex) else {
                    continue;
                };

                let normalized = Self::normalize_url(&url);
                if normalized.is_empty() {
                    continue;
                }

                let quality = Self::extract_js_value(&flashvars, &text_regex)
                    .filter(|value| !value.is_empty())
                    .unwrap_or_else(|| Self::quality_from_url(&normalized));

                seen.entry(quality).or_insert(normalized);
            }
        }

        let document = Html::parse_document(html);
        let Some(download_selector) = Self::selector("#download_popup a[href*='/get_file/']")
        else {
            return seen
                .iter()
                .max_by_key(|(quality, _)| Self::quality_score(quality))
                .map(|(_, url)| url.clone());
        };

        for element in document.select(&download_selector) {
            let href = element.value().attr("href").unwrap_or_default();
            let normalized = Self::normalize_url(href);
            if normalized.is_empty() {
                continue;
            }
            let quality = Self::quality_from_url(&normalized);
            seen.entry(quality).or_insert(normalized);
        }

        seen.iter()
            .max_by_key(|(quality, _)| Self::quality_score(quality))
            .map(|(_, url)| url.clone())
    }
}

impl crate::proxies::Proxy for HeavyfetishProxy {
    async fn get_video_url(&self, url: String, requester: web::types::State<Requester>) -> String {
        let Some(detail_url) = Self::normalize_detail_url(&url) else {
            return String::new();
        };

        let mut requester = requester.get_ref().clone();
        let html = requester.get(&detail_url, None).await.unwrap_or_default();
        if html.is_empty() {
            return String::new();
        }

        Self::extract_source_url(&html).unwrap_or_default()
    }
}