From 1b32df0c35848e36f3b41335afd8967ba9985363 Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 20 Mar 2026 22:08:02 +0000 Subject: [PATCH] pimpbunny fix --- src/proxies/pimpbunny.rs | 107 +++++++++++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 10 deletions(-) diff --git a/src/proxies/pimpbunny.rs b/src/proxies/pimpbunny.rs index b24b12d..dd0d453 100644 --- a/src/proxies/pimpbunny.rs +++ b/src/proxies/pimpbunny.rs @@ -1,4 +1,5 @@ use ntex::web; +use regex::Regex; use serde_json::Value; use url::Url; use wreq::Version; @@ -79,12 +80,65 @@ impl PimpbunnyProxy { } fn extract_json_ld_video(text: &str) -> Option { - let json_str = text - .split("application/ld+json\">") - .nth(1) - .and_then(|value| value.split("").next())?; + let script_regex = Regex::new(r#"(?s)]+application/ld\+json[^>]*>(.*?)"#).ok()?; - serde_json::from_str(json_str).ok() + for captures in script_regex.captures_iter(text) { + let raw = captures.get(1).map(|value| value.as_str().trim())?; + let parsed: Value = serde_json::from_str(raw).ok()?; + + if let Some(video) = Self::find_video_object(&parsed) { + return Some(video); + } + } + + None + } + + fn find_video_object(parsed: &Value) -> Option { + if parsed + .get("@type") + .and_then(Value::as_str) + .is_some_and(|value| value == "VideoObject") + { + return Some(parsed.clone()); + } + + if parsed + .get("contentUrl") + .and_then(Value::as_str) + .is_some_and(|value| !value.trim().is_empty()) + { + return Some(parsed.clone()); + } + + if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) { + for item in graph { + if item + .get("@type") + .and_then(Value::as_str) + .is_some_and(|value| value == "VideoObject") + { + return Some(item.clone()); + } + if item + .get("contentUrl") + .and_then(Value::as_str) + .is_some_and(|value| !value.trim().is_empty()) + { + return Some(item.clone()); + } + } + } + + if let Some(array) = parsed.as_array() { + for item in array { + if let Some(video) = Self::find_video_object(item) { + return Some(video); + } + } + } + + None } fn extract_stream_url(json_ld: &Value) -> Option { @@ -95,6 +149,15 @@ impl PimpbunnyProxy { .filter(|value| !value.is_empty()) .map(str::to_string) } + + fn extract_stream_url_from_html(text: &str) -> Option { + Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#) + .ok()? + .captures(text) + .and_then(|captures| captures.get(1)) + .map(|value| value.as_str().trim().to_string()) + .filter(|value| !value.is_empty()) + } } impl crate::proxies::Proxy for PimpbunnyProxy { @@ -114,11 +177,10 @@ impl crate::proxies::Proxy for PimpbunnyProxy { Err(_) => return String::new(), }; - let Some(json_ld) = Self::extract_json_ld_video(&text) else { - return String::new(); - }; - - Self::extract_stream_url(&json_ld).unwrap_or_default() + Self::extract_json_ld_video(&text) + .and_then(|json_ld| Self::extract_stream_url(&json_ld)) + .or_else(|| Self::extract_stream_url_from_html(&text)) + .unwrap_or_default() } } @@ -157,4 +219,29 @@ mod tests { Some("https://cdn.example/video.mp4") ); } + + #[test] + fn extracts_video_object_from_graph_script() { + let html = r#" + + "#; + + let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse"); + assert_eq!( + PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(), + Some("https://cdn.example/graph.mp4") + ); + } + + #[test] + fn falls_back_to_raw_content_url_match() { + let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#; + + assert_eq!( + PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(), + Some("https://cdn.example/fallback.mp4") + ); + } }