From dc70e2c9a641e0cf10d49096e8444339daa1f5d7 Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 16 May 2026 10:06:23 +0000 Subject: [PATCH] pimpbunny partial fixes --- src/providers/pimpbunny.rs | 6 +- src/proxies/pimpbunny.rs | 365 +++++++++++++++++++++++++------------ src/util/discord.rs | 12 +- 3 files changed, 264 insertions(+), 119 deletions(-) diff --git a/src/providers/pimpbunny.rs b/src/providers/pimpbunny.rs index 3a13f74..68397cd 100644 --- a/src/providers/pimpbunny.rs +++ b/src/providers/pimpbunny.rs @@ -800,11 +800,13 @@ impl PimpbunnyProvider { VideoFormat::new(proxy_url.clone(), "auto".into(), "video/mp4".into()) .format_id("auto".into()) .format_note("proxied".into()) - .http_header("Referer".to_string(), video_url.clone()), + .http_header("Referer".to_string(), video_url.clone()) + .http_header("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()) + , ]; Ok( - VideoItem::new(id, title, proxy_url, "pimpbunny".into(), thumb, duration) + VideoItem::new(id, title, video_url, "pimpbunny".into(), thumb, duration) .formats(formats) .preview(preview) .views(views), diff --git a/src/proxies/pimpbunny.rs b/src/proxies/pimpbunny.rs index 4610d25..cc9228e 100644 --- a/src/proxies/pimpbunny.rs +++ b/src/proxies/pimpbunny.rs @@ -1,11 +1,10 @@ +use crate::util::requester::Requester; use ntex::web; use regex::Regex; -use serde_json::Value; +use std::{collections::HashMap, time::{SystemTime, UNIX_EPOCH}}; use url::Url; use wreq::Version; -use crate::util::requester::Requester; - #[derive(Debug, Clone)] pub struct PimpbunnyProxy {} @@ -15,17 +14,26 @@ impl PimpbunnyProxy { const HTML_ACCEPT: &'static str = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; + /// Site-wide fallback key – only used when the page omits a license_code. + const LICENSE_CODE: &'static str = "$576262819011919"; + pub fn new() -> Self { PimpbunnyProxy {} } + fn js_now_millis() -> u128 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or(0) + } + fn normalize_detail_url(url: &str) -> Option { let normalized = if url.starts_with("http://") || url.starts_with("https://") { url.to_string() } else { format!("https://{}", url.trim_start_matches('/')) }; - Self::is_allowed_detail_url(&normalized).then_some(normalized) } @@ -39,7 +47,6 @@ impl PimpbunnyProxy { let Some(host) = url.host_str() else { return false; }; - matches!(host, "pimpbunny.com" | "www.pimpbunny.com") && !url.path().starts_with("/contents/videos_screenshots/") } @@ -51,10 +58,7 @@ impl PimpbunnyProxy { fn html_headers_with_referer(referer: &str) -> Vec<(String, String)> { vec![ ("Referer".to_string(), referer.to_string()), - ( - "User-Agent".to_string(), - Self::FIREFOX_USER_AGENT.to_string(), - ), + ("User-Agent".to_string(), Self::FIREFOX_USER_AGENT.to_string()), ("Accept".to_string(), Self::HTML_ACCEPT.to_string()), ("Accept-Language".to_string(), "en-US,en;q=0.9".to_string()), ] @@ -82,85 +86,193 @@ impl PimpbunnyProxy { .await; } - fn extract_json_ld_video(text: &str) -> Option { - let script_regex = - Regex::new(r#"(?s)]+application/ld\+json[^>]*>(.*?)"#).ok()?; + // ── Key generation (kt_player.js IIFE "function c", lines 541-586) ──────── + // + // Derives the 32-char shuffle key from the page's license_code value. + // + // Steps: + // a) Build digit-string f from license_code[1:]; replace every 0 with 1. + // b) Split f at midpoint j = len/2; compute f_str = str(4 * |k − l|). + // c) For g in 0..=j and h in 1..=4: + // n = digit(license_code[g+h]) + digit(f_str[g]); + // if n >= 10 { n -= 10 } + // push n onto key. + fn generate_key(license_code: &str) -> String { + let d: Vec = license_code.chars().collect(); - for captures in script_regex.captures_iter(text) { - let raw = captures.get(1).map(|value| value.as_str().trim())?; - let parsed: Value = serde_json::from_str(raw).ok()?; - - if let Some(video) = Self::find_video_object(&parsed) { - return Some(video); + // (a) Build digit string, replacing 0 → 1 + let mut f = String::new(); + for ch in d.iter().skip(1) { + match ch.to_digit(10) { + Some(0) => f.push('1'), + Some(v) => f.push(char::from_digit(v, 10).unwrap()), + None => f.push('1'), } } - None - } + // (b) Split and compute intermediate + let j = f.len() / 2; + let k: i64 = f[..=j].parse().unwrap_or(0); + let l: i64 = f[j..].parse().unwrap_or(0); + let f_str = ((k - l).unsigned_abs() as i64 * 4).to_string(); - fn find_video_object(parsed: &Value) -> Option { - if parsed - .get("@type") - .and_then(Value::as_str) - .is_some_and(|value| value == "VideoObject") - { - return Some(parsed.clone()); - } - - if parsed - .get("contentUrl") - .and_then(Value::as_str) - .is_some_and(|value| !value.trim().is_empty()) - { - return Some(parsed.clone()); - } - - if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) { - for item in graph { - if item - .get("@type") - .and_then(Value::as_str) - .is_some_and(|value| value == "VideoObject") - { - return Some(item.clone()); - } - if item - .get("contentUrl") - .and_then(Value::as_str) - .is_some_and(|value| !value.trim().is_empty()) - { - return Some(item.clone()); - } + // (c) Build 32-char key + let mut key = String::new(); + for g in 0..=j { + for h in 1..=4usize { + let nd = d.get(g + h).and_then(|c| c.to_digit(10)).unwrap_or(0) as i64; + let nf = f_str.chars().nth(g).and_then(|c| c.to_digit(10)).unwrap_or(0) as i64; + let mut n = nd + nf; + if n >= 10 { n -= 10; } + key.push(char::from_digit(n as u32, 10).unwrap()); } } + key + } - if let Some(array) = parsed.as_array() { - for item in array { - if let Some(video) = Self::find_video_object(item) { - return Some(video); - } + // ── Hash permutation (kt_player.js IIFE "function b", lines 521-539) ────── + // + // Reverse Fisher-Yates over the first 32 chars of the hash, driven by + // the cumulative digit-sum suffix of `key`. + // + // for k from 31 downto 0: + // l = k + Σ key_digit[m] for m in k..len(key) + // l %= 32 + // swap hash[k] and hash[l] + fn transform_hash(hash_str: &str, key: &str) -> String { + let mut chars: Vec = hash_str.chars().collect(); + let tail_start = chars.len().min(32); + let (head, _) = chars.split_at(tail_start); + let mut h: Vec = head.to_vec(); + let tail: String = chars.drain(tail_start..).collect(); + + let key_digits: Vec = key + .chars() + .filter_map(|c| c.to_digit(10)) + .map(|d| d as usize) + .collect(); + + let len = h.len(); + for k in (0..len).rev() { + let mut l = k; + for m in k..key_digits.len() { + l += key_digits[m]; } + l %= len; + h.swap(k, l); } - None + let mut result: String = h.into_iter().collect(); + result.push_str(&tail); + result } - fn extract_stream_url(json_ld: &Value) -> Option { - json_ld - .get("contentUrl") - .and_then(Value::as_str) - .map(str::trim) - .filter(|value| !value.is_empty()) - .map(str::to_string) + // ── URL decoder ──────────────────────────────────────────────────────────── + // + // Strips the `function/0/` prefix, un-shuffles the hash at path segment + // index 5, then returns the bare URL (without ?rnd – append separately). + fn decode_function0_url(input: &str, license_code: &str) -> Option { + if !input.starts_with("function/") { + return Some(input.to_string()); + } + + // Strip "function//" + let raw = input.splitn(3, '/').nth(2)?; + + let mut parts: Vec<&str> = raw.split('/').collect(); + + // Expected segments after splitting on '/': + // 0:"https:" 1:"" 2:"host" 3:"get_file" 4:N 5:HASH … + if parts.len() < 6 { + return None; + } + + let key = Self::generate_key(license_code); + let unscrambled = Self::transform_hash(parts[5], &key); + + // Rebuild – we need owned strings only for the one replaced segment + let mut owned: Vec = parts.iter().map(|s| s.to_string()).collect(); + owned[5] = unscrambled; + + Some(owned.join("/")) } + fn append_rnd(url: &str) -> String { + let rnd = Self::js_now_millis(); + if url.contains("?rnd=") { + url.to_string() + } else if url.contains('?') { + format!("{url}&rnd={rnd}") + } else { + format!("{url}?rnd={rnd}") + } + } + + // ── Page parser ──────────────────────────────────────────────────────────── + // + // 1. Extracts license_code: any config key whose name contains "code" + // (not at position 0) and whose value is exactly 16 chars – matching + // the IIFE's own detection logic. + // 2. Finds all video_url / video_alt_url keys, decodes them, picks the + // highest quality. + fn extract_video_from_player_js(text: &str) -> Option { + // Extract license_code from the player config. + // The IIFE looks for a property name containing "code" at pos > 0 + // with a value exactly 16 chars long. + let license_code = Regex::new( + r#"[a-z_$][a-z0-9_$]*code[a-z0-9_$]*\s*:\s*'([^']{16})'"#, + ) + .ok() + .and_then(|re| re.captures(text)) + .and_then(|cap| cap.get(1)) + .map(|m| m.as_str().to_string()) + .unwrap_or_else(|| Self::LICENSE_CODE.to_string()); + + let pair_regex = Regex::new( + r#"(?Pvideo_url\d*|video_alt_url\d*(?:_text)?):\s*'(?P[^']+)'"#, + ) + .ok()?; + + let mut data: HashMap = HashMap::new(); + for cap in pair_regex.captures_iter(text) { + data.insert(cap["key"].to_string(), cap["value"].to_string()); + } + + let mut sources: Vec<(u32, String)> = Vec::new(); + + for (key, value) in &data { + if !(key.starts_with("video_url") || key.starts_with("video_alt_url")) { + continue; + } + if key.ends_with("_text") { + continue; + } + + let quality_key = format!("{key}_text"); + let quality = data + .get(&quality_key) + .and_then(|v| v.replace('p', "").parse::().ok()) + .unwrap_or(0); + + let decoded = match Self::decode_function0_url(value, &license_code) { + Some(v) => v, + None => continue, + }; + + sources.push((quality, Self::append_rnd(&decoded))); + } + + sources.sort_by(|a, b| b.0.cmp(&a.0)); + sources.first().map(|(_, u)| u.clone()) + } + + // Fallback for standard JSON-LD if they ever use it again fn extract_stream_url_from_html(text: &str) -> Option { Regex::new(r#""contentUrl"\s*:\s*"([^"]+)""#) .ok()? .captures(text) .and_then(|captures| captures.get(1)) .map(|value| value.as_str().trim().to_string()) - .filter(|value| !value.is_empty()) } } @@ -171,8 +283,11 @@ impl crate::proxies::Proxy for PimpbunnyProxy { }; let mut requester = requester.get_ref().clone(); + Self::warm_root_session(&mut requester).await; + let headers = Self::headers_with_cookies(&requester, &detail_url, &detail_url); + let text = match requester .get_with_headers(&detail_url, headers, Some(Version::HTTP_2)) .await @@ -181,8 +296,7 @@ impl crate::proxies::Proxy for PimpbunnyProxy { Err(_) => return String::new(), }; - Self::extract_json_ld_video(&text) - .and_then(|json_ld| Self::extract_stream_url(&json_ld)) + Self::extract_video_from_player_js(&text) .or_else(|| Self::extract_stream_url_from_html(&text)) .unwrap_or_default() } @@ -190,63 +304,90 @@ impl crate::proxies::Proxy for PimpbunnyProxy { #[cfg(test)] mod tests { - use super::PimpbunnyProxy; + use super::*; #[test] - fn allows_only_pimpbunny_detail_urls() { - assert!(PimpbunnyProxy::is_allowed_detail_url( - "https://pimpbunny.com/videos/example-video/" - )); - assert!(PimpbunnyProxy::is_allowed_detail_url( - "https://www.pimpbunny.com/video/example/" - )); - assert!(!PimpbunnyProxy::is_allowed_detail_url( - "http://pimpbunny.com/videos/example-video/" - )); - assert!(!PimpbunnyProxy::is_allowed_detail_url( - "https://pimpbunny.com/contents/videos_screenshots/1/2/3.jpg" - )); - assert!(!PimpbunnyProxy::is_allowed_detail_url( - "https://example.com/videos/example-video/" - )); + fn test_generate_key() { + // Deterministic: same license_code always produces the same 32-char key + let key = PimpbunnyProxy::generate_key(PimpbunnyProxy::LICENSE_CODE); + assert_eq!(key.len(), 32, "key must be 32 digits"); + assert!(key.chars().all(|c| c.is_ascii_digit()), "key must be all digits"); } #[test] - fn extracts_content_url_from_json_ld() { - let html = r#" - - "#; + fn test_decode_function0_url() { + // Example taken from live player config, verified against browser output + let input = "function/0/https://pimpbunny.com/get_file/34/a4b50a90de7b3a7fc401e91b4b152b15580b7d689f/530000/530112/530112_pb_720p.mp4/"; + let expected = "https://pimpbunny.com/get_file/34/b54c19bdbf5b7a3b4492111aa70e5e00580b7d689f/530000/530112/530112_pb_720p.mp4/"; - let json_ld = PimpbunnyProxy::extract_json_ld_video(html).expect("json-ld should parse"); - assert_eq!( - PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(), - Some("https://cdn.example/video.mp4") - ); + let decoded = PimpbunnyProxy::decode_function0_url(input, PimpbunnyProxy::LICENSE_CODE) + .expect("decode failed"); + + assert_eq!(decoded, expected); } #[test] - fn extracts_video_object_from_graph_script() { + fn test_decode_canonical_example() { + // The original known-good example used to verify the algorithm + let input = "function/0/https://pimpbunny.com/get_file/40/7e4df9f504c7ec5b02b0101ed28edfa4c687186454/564000/564161/564161_720p.mp4/"; + let expected = "https://pimpbunny.com/get_file/40/fde01fe04b42c7e72d0d0a8c95b1e45fc687186454/564000/564161/564161_720p.mp4/"; + + let decoded = PimpbunnyProxy::decode_function0_url(input, PimpbunnyProxy::LICENSE_CODE) + .expect("decode failed"); + + assert_eq!(decoded, expected); + } + + #[test] + fn test_append_rnd() { + let url = "https://example.com/video.mp4"; + let result = PimpbunnyProxy::append_rnd(url); + assert!(result.starts_with(url)); + assert!(result.contains("?rnd=")); + } + + #[test] + fn test_append_rnd_existing_query() { + let url = "https://example.com/video.mp4?foo=bar"; + let result = PimpbunnyProxy::append_rnd(url); + assert!(result.contains("&rnd=")); + } + + #[test] + fn test_passthrough_non_function_url() { + let url = "https://cdn.example.com/video.mp4"; + let decoded = PimpbunnyProxy::decode_function0_url(url, PimpbunnyProxy::LICENSE_CODE) + .expect("passthrough failed"); + assert_eq!(decoded, url); + } + + #[test] + fn test_extract_highest_quality() { let html = r#" - "#; - let json_ld = - PimpbunnyProxy::extract_json_ld_video(html).expect("video object should parse"); - assert_eq!( - PimpbunnyProxy::extract_stream_url(&json_ld).as_deref(), - Some("https://cdn.example/graph.mp4") - ); - } + let result = PimpbunnyProxy::extract_video_from_player_js(html) + .expect("no url extracted"); - #[test] - fn falls_back_to_raw_content_url_match() { - let html = r#"{"contentUrl":"https://cdn.example/fallback.mp4"}"#; - - assert_eq!( - PimpbunnyProxy::extract_stream_url_from_html(html).as_deref(), - Some("https://cdn.example/fallback.mp4") + assert!(result.contains("530112_pb_720p.mp4"), "did not choose highest quality"); + assert!( + result.contains("b54c19bdbf5b7a3b4492111aa70e5e00580b7d689f"), + "did not decode scrambled token" ); + assert!(result.contains("?rnd="), "missing rnd parameter"); } } diff --git a/src/util/discord.rs b/src/util/discord.rs index 15a5c1b..7be361d 100644 --- a/src/util/discord.rs +++ b/src/util/discord.rs @@ -52,11 +52,6 @@ pub async fn send_discord_error_report( ERROR_CACHE.insert(error_signature, now); // --------------------------- - let webhook_url = match std::env::var("DISCORD_WEBHOOK") { - Ok(url) => url, - Err(_) => return, - }; - const MAX_FIELD: usize = 1024; let truncate = |s: &str| { if s.len() > MAX_FIELD { @@ -103,6 +98,13 @@ pub async fn send_discord_error_report( }] }); + let webhook_url = match std::env::var("DISCORD_WEBHOOK") { + Ok(url) => url, + Err(_) => { + // println!("{:#?}", &payload); + return; + } + }; let mut requester = requester::Requester::new(); let _ = requester.post_json(&webhook_url, &payload, vec![]).await; }