From e6eb85cd5aa162e9ac98d1e0eafd2224d0a0f72a Mon Sep 17 00:00:00 2001
From: Simon <simon.huenecke@gmail.com>
Date: Fri, 17 Apr 2026 21:03:01 +0000
Subject: [PATCH] archivebate still needs work

---
 src/providers/archivebate.rs | 263 ++++++++++++++++++++++++++++++++++-
 1 file changed, 259 insertions(+), 4 deletions(-)
diff --git a/src/providers/archivebate.rs b/src/providers/archivebate.rs
index 6525fea..6996232 100644
--- a/src/providers/archivebate.rs
+++ b/src/providers/archivebate.rs
@@ -7,10 +7,11 @@ use crate::status::*;
 use crate::util::cache::VideoCache;
 use crate::util::parse_abbreviated_number;
 use crate::util::time::parse_time_to_seconds;
-use crate::videos::{ServerOptions, VideoItem};
+use crate::videos::{ServerOptions, VideoFormat, VideoItem};
 use async_trait::async_trait;
 use chrono::{Duration as ChronoDuration, Utc};
 use error_chain::error_chain;
+use futures::stream::{self, StreamExt};
 use htmlentity::entity::{ICodedDataTrait, decode};
 use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
 use regex::Regex;
@@ -20,6 +21,8 @@ use serde_json::Value;
 use std::collections::HashSet;
 use std::sync::{Arc, RwLock};
 use std::thread;
+use std::time::Duration as StdDuration;
+use tokio::time::timeout;
 
 pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
     crate::providers::ProviderChannelMetadata {
@@ -281,6 +284,9 @@ impl ArchivebateProvider {
         if value.starts_with("http://") || value.starts_with("https://") {
             return value.to_string();
         }
+        if value.starts_with("//") {
+            return format!("https:{value}");
+        }
         format!(
             "{}/{}",
             self.url.trim_end_matches('/'),
@@ -711,6 +717,213 @@ impl ArchivebateProvider {
         Ok(items)
     }
 
+    fn parse_mixin_packed_eval(html: &str) -> Option<String> {
+        let eval_regex = Regex::new(
+            r#"(?s)eval\(function\(p,a,c,k,e,d\)\{.*?\}\('(?P<payload>.*?)',\s*(?P<radix>[0-9]+),\s*(?P<count>[0-9]+),\s*'(?P<tokens>.*?)'\.split\('\|'\)"#,
+        )
+        .ok()?;
+        let captures = eval_regex.captures(html)?;
+        let payload_raw = captures.name("payload")?.as_str();
+        let radix = captures.name("radix")?.as_str().parse::<u32>().ok()?;
+        let count = captures.name("count")?.as_str().parse::<usize>().ok()?;
+        if !(2..=36).contains(&radix) {
+            return None;
+        }
+
+        let payload = Self::unescape_js_single_quoted(payload_raw);
+        let tokens_raw = captures.name("tokens")?.as_str();
+        let tokens = tokens_raw.split('|').collect::<Vec<_>>();
+        let mut unpacked = payload;
+
+        for index in (0..count).rev() {
+            let Some(token) = tokens.get(index) else {
+                continue;
+            };
+            if token.is_empty() {
+                continue;
+            }
+            let key = Self::to_radix(index, radix);
+            let pattern = format!(r"\b{}\b", regex::escape(&key));
+            let re = Regex::new(&pattern).ok()?;
+            unpacked = re.replace_all(&unpacked, *token).into_owned();
+        }
+
+        Some(unpacked)
+    }
+
+    fn unescape_js_single_quoted(value: &str) -> String {
+        let mut output = String::with_capacity(value.len());
+        let mut chars = value.chars();
+        while let Some(character) = chars.next() {
+            if character != '\\' {
+                output.push(character);
+                continue;
+            }
+            let Some(next) = chars.next() else {
+                break;
+            };
+            match next {
+                '\\' => output.push('\\'),
+                '\'' => output.push('\''),
+                '"' => output.push('"'),
+                'n' => output.push('\n'),
+                'r' => output.push('\r'),
+                't' => output.push('\t'),
+                _ => output.push(next),
+            }
+        }
+        output
+    }
+
+    fn to_radix(mut value: usize, radix: u32) -> String {
+        if value == 0 {
+            return "0".to_string();
+        }
+        let alphabet = b"0123456789abcdefghijklmnopqrstuvwxyz";
+        let mut out = Vec::new();
+        while value > 0 {
+            let digit = value % radix as usize;
+            out.push(alphabet[digit] as char);
+            value /= radix as usize;
+        }
+        out.iter().rev().collect()
+    }
+
+    fn extract_mixdrop_media_url(html: &str) -> Option<String> {
+        let direct_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
+        if let Some(url) = direct_regex
+            .captures(html)
+            .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
+        {
+            return Some(Self::normalize_possible_protocol_relative(&url));
+        }
+
+        let unpacked = Self::parse_mixin_packed_eval(html)?;
+        let unpacked_regex = Regex::new(r#"MDCore\.wurl\s*=\s*"([^"]+)""#).ok()?;
+        unpacked_regex
+            .captures(&unpacked)
+            .and_then(|captures| captures.get(1).map(|value| value.as_str().to_string()))
+            .map(|value| Self::normalize_possible_protocol_relative(&value))
+    }
+
+    fn normalize_possible_protocol_relative(value: &str) -> String {
+        let trimmed = value.trim();
+        if trimmed.starts_with("//") {
+            format!("https:{trimmed}")
+        } else {
+            trimmed.to_string()
+        }
+    }
+
+    fn host_from_url(url: &str) -> Option<String> {
+        let parsed = url::Url::parse(url).ok()?;
+        parsed.host_str().map(|value| value.to_ascii_lowercase())
+    }
+
+    fn is_mixdrop_host(url: &str) -> bool {
+        let Some(host) = Self::host_from_url(url) else {
+            return false;
+        };
+        host.contains("mixdrop") || host.contains("m1xdrop")
+    }
+
+    fn first_video_source_from_html(html: &str) -> Option<String> {
+        let document = Html::parse_document(html);
+        let source_selector = Selector::parse("video source[src]").ok()?;
+        let video_src_selector = Selector::parse("video[src]").ok()?;
+
+        if let Some(value) = document
+            .select(&source_selector)
+            .next()
+            .and_then(|node| node.value().attr("src"))
+        {
+            return Some(value.to_string());
+        }
+        document
+            .select(&video_src_selector)
+            .next()
+            .and_then(|node| node.value().attr("src"))
+            .map(|value| value.to_string())
+    }
+
+    fn first_iframe_source_from_html(html: &str) -> Option<String> {
+        let document = Html::parse_document(html);
+        let iframe_selector = Selector::parse("iframe[src]").ok()?;
+        document
+            .select(&iframe_selector)
+            .next()
+            .and_then(|node| node.value().attr("src"))
+            .map(|value| value.to_string())
+    }
+
+    async fn resolve_mixdrop_media_from_iframe(
+        &self,
+        iframe_url: &str,
+        referer: &str,
+        options: &ServerOptions,
+    ) -> Option<String> {
+        let mut requester = requester_or_default(options, CHANNEL_ID, "resolve_mixdrop_media");
+        let iframe_html = requester
+            .get_with_headers(
+                iframe_url,
+                self.html_headers(referer),
+                Some(wreq::Version::HTTP_11),
+            )
+            .await
+            .ok()?;
+        Self::extract_mixdrop_media_url(&iframe_html)
+    }
+
+    async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
+        let page_url = item.url.clone();
+        let mut requester = requester_or_default(options, CHANNEL_ID, "archivebate.enrich_video");
+        let detail_html = match requester
+            .get_with_headers(
+                &page_url,
+                self.html_headers(&format!("{}/", self.url)),
+                Some(wreq::Version::HTTP_11),
+            )
+            .await
+        {
+            Ok(value) => value,
+            Err(error) => {
+                report_provider_error_background(
+                    CHANNEL_ID,
+                    "enrich_video.fetch_detail",
+                    &format!("url={page_url}; error={error}"),
+                );
+                return item;
+            }
+        };
+
+        let mut media_url = Self::first_video_source_from_html(&detail_html)
+            .map(|value| self.absolute_url(&value));
+
+        if media_url.is_none() {
+            let iframe_url = Self::first_iframe_source_from_html(&detail_html)
+                .map(|value| self.absolute_url(&value));
+            if let Some(iframe_url) = iframe_url {
+                if Self::is_mixdrop_host(&iframe_url) {
+                    if let Some(resolved) = self
+                        .resolve_mixdrop_media_from_iframe(&iframe_url, &page_url, options)
+                        .await
+                    {
+                        media_url = Some(resolved);
+                    }
+                }
+            }
+        }
+
+        let Some(media_url) = media_url else {
+            return item;
+        };
+
+        let format = VideoFormat::new(media_url, "source".to_string(), "mp4".to_string());
+        let mut enriched = item;
+        enriched.formats = Some(vec![format]);
+        enriched
+    }
+
     fn extract_csrf_token(html: &str) -> Option<String> {
         let regex = Regex::new(r#"<meta name="csrf-token" content="([^"]+)""#).ok()?;
         regex
@@ -1047,13 +1260,35 @@ impl Provider for ArchivebateProvider {
 
         let result = match query {
             Some(query) if !query.trim().is_empty() => {
-                self.query(cache, page, per_page, &query, options).await
+                self.query(cache, page, per_page, &query, options.clone()).await
             }
-            _ => self.get_default(cache, page, per_page, options).await,
+            _ => self.get_default(cache, page, per_page, options.clone()).await,
         };
 
         match result {
-            Ok(videos) => videos,
+            Ok(videos) => {
+                if videos.is_empty() {
+                    return videos;
+                }
+                stream::iter(videos.into_iter().map(|video| {
+                    let provider = self.clone();
+                    let options = options.clone();
+                    async move {
+                        let timeout_result = timeout(
+                            StdDuration::from_secs(8),
+                            provider.enrich_video(video.clone(), &options),
+                        )
+                        .await;
+                        match timeout_result {
+                            Ok(enriched) => enriched,
+                            Err(_) => video,
+                        }
+                    }
+                }))
+                .buffer_unordered(4)
+                .collect::<Vec<_>>()
+                .await
+            }
             Err(error) => {
                 report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
                 vec![]
@@ -1065,3 +1300,23 @@ impl Provider for ArchivebateProvider {
         Some(self.build_channel(clientversion))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::ArchivebateProvider;
+
+    #[test]
+    fn extracts_mixdrop_wurl_from_packed_eval() {
+        let html = r#"
+<script>
+eval(function(p,a,c,k,e,d){e=function(c){return c};if(!''.replace(/^/,String)){while(c--){d[c]=k[c]||c}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('1.2="//3.4.5/6/7.8?9=a&b=c";',13,13,'|MDCore|wurl|o230m5y6z|mxcontent|net|v2|r6pkwozjber741|mp4|s|TvNTJe3_z_6nKveumEHk8Q|e|1776460168'.split('|'),0,{}))
+</script>
+"#;
+        let actual = ArchivebateProvider::extract_mixdrop_media_url(html)
+            .expect("expected mixdrop media url");
+        assert_eq!(
+            actual,
+            "https://o230m5y6z.mxcontent.net/v2/r6pkwozjber741.mp4?s=TvNTJe3_z_6nKveumEHk8Q&e=1776460168"
+        );
+    }
+}