pimpbunny fix
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use ntex::web;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
use url::Url;
|
||||
use wreq::Version;
|
||||
@@ -79,12 +80,65 @@ impl PimpbunnyProxy {
|
||||
}
|
||||
|
||||
fn extract_json_ld_video(text: &str) -> Option<Value> {
|
||||
let json_str = text
|
||||
.split("application/ld+json\">")
|
||||
.nth(1)
|
||||
.and_then(|value| value.split("</script>").next())?;
|
||||
let script_regex = Regex::new(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#).ok()?;
|
||||
|
||||
serde_json::from_str(json_str).ok()
|
||||
for captures in script_regex.captures_iter(text) {
|
||||
let raw = captures.get(1).map(|value| value.as_str().trim())?;
|
||||
let parsed: Value = serde_json::from_str(raw).ok()?;
|
||||
|
||||
if let Some(video) = Self::find_video_object(&parsed) {
|
||||
return Some(video);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn find_video_object(parsed: &Value) -> Option<Value> {
|
||||
if parsed
|
||||
.get("@type")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| value == "VideoObject")
|
||||
{
|
||||
return Some(parsed.clone());
|
||||
}
|
||||
|
||||
if parsed
|
||||
.get("contentUrl")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| !value.trim().is_empty())
|
||||
{
|
||||
return Some(parsed.clone());
|
||||
}
|
||||
|
||||
if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
|
||||
for item in graph {
|
||||
if item
|
||||
.get("@type")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| value == "VideoObject")
|
||||
{
|
||||
return Some(item.clone());
|
||||
}
|
||||
if item
|
||||
.get("contentUrl")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| !value.trim().is_empty())
|
||||
{
|
||||
return Some(item.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(array) = parsed.as_array() {
|
||||
for item in array {
|
||||
if let Some(video) = Self::find_video_object(item) {
|
||||
return Some(video);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_stream_url(json_ld: &Value) -> Option<String> {
|
||||
@@ -95,6 +149,15 @@ impl PimpbunnyProxy {
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(str::to_string)
|
||||
}
|
||||
|
||||
fn extract_stream_url_from_html(text: &str) -> Option<String> {
|
||||
Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#)
|
||||
.ok()?
|
||||
.captures(text)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| value.as_str().trim().to_string())
|
||||
.filter(|value| !value.is_empty())
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::proxies::Proxy for PimpbunnyProxy {
|
||||
@@ -114,11 +177,10 @@ impl crate::proxies::Proxy for PimpbunnyProxy {
|
||||
Err(_) => return String::new(),
|
||||
};
|
||||
|
||||
let Some(json_ld) = Self::extract_json_ld_video(&text) else {
|
||||
return String::new();
|
||||
};
|
||||
|
||||
Self::extract_stream_url(&json_ld).unwrap_or_default()
|
||||
Self::extract_json_ld_video(&text)
|
||||
.and_then(|json_ld| Self::extract_stream_url(&json_ld))
|
||||
.or_else(|| Self::extract_stream_url_from_html(&text))
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,4 +219,29 @@ mod tests {
|
||||
Some("https://cdn.example/video.mp4")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_video_object_from_graph_script() {
|
||||
let html = r#"
|
||||
<script type="application/ld+json">
|
||||
{"@graph":[{"@type":"BreadcrumbList"},{"@type":"VideoObject","contentUrl":"https://cdn.example/graph.mp4"}]}
|
||||
</script>
|
||||
"#;
|
||||
|
||||
let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse");
|
||||
assert_eq!(
|
||||
PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(),
|
||||
Some("https://cdn.example/graph.mp4")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn falls_back_to_raw_content_url_match() {
|
||||
let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#;
|
||||
|
||||
assert_eq!(
|
||||
PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(),
|
||||
Some("https://cdn.example/fallback.mp4")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user