From 3a2e77436e43358ae6a5b0648bff92b486462c59 Mon Sep 17 00:00:00 2001 From: Simon Date: Tue, 17 Mar 2026 08:41:48 +0000 Subject: [PATCH] swap to curl-cffi --- src/util/requester.rs | 460 ++++++++++++++++++++++++++++-------------- 1 file changed, 307 insertions(+), 153 deletions(-) diff --git a/src/util/requester.rs b/src/util/requester.rs index 1e6916a..870ec42 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -1,28 +1,137 @@ +use base64::Engine; +use futures::TryStreamExt; +use ntex::http::header::{CONTENT_TYPE, COOKIE, HeaderMap, HeaderName, HeaderValue, USER_AGENT}; +use ntex::http::StatusCode; use serde::Serialize; use std::env; use std::fmt; +use std::io::Write; +use std::process::{Command, Stdio}; use std::sync::Arc; -use wreq::Client; -use wreq::Proxy; -use wreq::Response; use wreq::Version; -use wreq::cookie::Jar; -use wreq::header::{HeaderMap, HeaderValue, USER_AGENT}; +use wreq::cookie::{CookieStore, Cookies, Jar}; use wreq::multipart::Form; -use wreq::redirect::Policy; -use wreq_util::Emulation; +use wreq::Uri; use crate::util::flaresolverr::FlareSolverrRequest; use crate::util::flaresolverr::Flaresolverr; use crate::util::proxy; -// A Send + Sync error type for all async paths type AnyErr = Box; +const CURL_CFFI_SCRIPT: &str = r#" +import base64 +import json +import sys +from curl_cffi import requests + +def main(): + payload = json.load(sys.stdin) + headers = {k: v for k, v in payload.get("headers", [])} + body_b64 = payload.get("body_base64") + data = base64.b64decode(body_b64) if body_b64 else None + + kwargs = { + "method": payload["method"], + "url": payload["url"], + "headers": headers or None, + "timeout": payload.get("timeout_secs", 60), + "allow_redirects": payload.get("follow_redirects", True), + "verify": False, + "impersonate": payload.get("impersonate", "chrome"), + } + + proxy_url = payload.get("proxy_url") + if proxy_url: + kwargs["proxies"] = {"http": proxy_url, "https": proxy_url} + if data is not None: + kwargs["data"] = data + + response = requests.request(**kwargs) + + cookies = [] + cookie_jar = getattr(response.cookies, "jar", None) + if cookie_jar is not None: + for cookie in cookie_jar: + parts = [f"{cookie.name}={cookie.value}"] + if cookie.domain: + parts.append(f"Domain={cookie.domain}") + if cookie.path: + parts.append(f"Path={cookie.path}") + if cookie.secure: + parts.append("Secure") + cookies.append("; ".join(parts)) + + meta = { + "status": response.status_code, + "headers": list(response.headers.items()), + "cookies": cookies, + } + sys.stderr.write(json.dumps(meta)) + sys.stdout.buffer.write(response.content) + +if __name__ == "__main__": + try: + main() + except Exception as exc: + sys.stderr.write(json.dumps({"error": str(exc)})) + sys.exit(1) +"#; + +#[derive(Debug, Clone)] +pub struct Response { + status: StatusCode, + headers: HeaderMap, + body: Vec, +} + +impl Response { + pub fn status(&self) -> StatusCode { + self.status + } + + pub fn headers(&self) -> &HeaderMap { + &self.headers + } + + pub async fn text(self) -> Result { + String::from_utf8(self.body).map_err(|error| error.into()) + } + + pub async fn bytes(self) -> Result, AnyErr> { + Ok(self.body) + } + + pub async fn json(self) -> Result + where + T: serde::de::DeserializeOwned, + { + Ok(serde_json::from_slice(&self.body)?) + } +} + +#[derive(Serialize)] +struct PythonRequestPayload<'a> { + method: &'a str, + url: &'a str, + headers: Vec<(String, String)>, + body_base64: Option, + follow_redirects: bool, + timeout_secs: u64, + proxy_url: Option, + impersonate: &'a str, +} + +#[derive(serde::Deserialize)] +struct PythonResponseMeta { + status: Option, + headers: Option>, + cookies: Option>, + error: Option, +} + #[derive(serde::Serialize, serde::Deserialize, Clone)] pub struct Requester { - #[serde(skip)] - client: Client, #[serde(skip)] cookie_jar: Arc, proxy: bool, @@ -41,31 +150,9 @@ impl fmt::Debug for Requester { } impl Requester { - fn build_client(cookie_jar: Arc, user_agent: Option<&str>) -> Client { - let mut builder = Client::builder() - .cert_verification(false) - .emulation(Emulation::Firefox146) - .cookie_provider(cookie_jar) - .redirect(Policy::default()); - - if let Some(user_agent) = user_agent { - let mut headers = HeaderMap::new(); - if let Ok(value) = HeaderValue::from_str(user_agent) { - headers.insert(USER_AGENT, value); - builder = builder.default_headers(headers); - } - } - - builder.build().expect("Failed to create HTTP client") - } - pub fn new() -> Self { - let cookie_jar = Arc::new(Jar::default()); - let client = Self::build_client(cookie_jar.clone(), None); - let requester = Requester { - client, - cookie_jar, + cookie_jar: Arc::new(Jar::default()), proxy: false, flaresolverr_session: None, user_agent: None, @@ -83,41 +170,163 @@ impl Requester { self.proxy = proxy; } - pub async fn get_raw(&mut self, url: &str) -> Result { - let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); + fn cookie_headers(&self, url: &str) -> Vec<(String, String)> { + let Ok(uri) = url.parse::() else { + return vec![]; + }; - let mut request = client.get(url).version(Version::HTTP_11); + match self.cookie_jar.cookies(&uri) { + Cookies::Compressed(value) => value + .to_str() + .ok() + .map(|value| vec![(COOKIE.to_string(), value.to_string())]) + .unwrap_or_default(), + Cookies::Uncompressed(values) => values + .into_iter() + .filter_map(|value| value.to_str().ok().map(|value| (COOKIE.to_string(), value.to_string()))) + .collect(), + Cookies::Empty => vec![], + _ => vec![], + } + } - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); + fn merged_headers( + &self, + url: &str, + headers: Vec<(String, String)>, + ensure_json: bool, + ) -> Vec<(String, String)> { + let mut merged = headers; + + if ensure_json + && !merged + .iter() + .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str())) + { + merged.push((CONTENT_TYPE.to_string(), "application/json".to_string())); + } + + if let Some(user_agent) = &self.user_agent { + if !merged + .iter() + .any(|(key, _)| key.eq_ignore_ascii_case(USER_AGENT.as_str())) + { + merged.push((USER_AGENT.to_string(), user_agent.clone())); } } - request.send().await + let has_cookie = merged + .iter() + .any(|(key, _)| key.eq_ignore_ascii_case(COOKIE.as_str())); + if !has_cookie { + merged.extend(self.cookie_headers(url)); + } + + merged + } + + async fn run_python_request( + &mut self, + method: &str, + url: &str, + headers: Vec<(String, String)>, + body: Option>, + follow_redirects: bool, + ) -> Result { + let headers = self.merged_headers(url, headers, false); + let proxy_url = if self.proxy { + env::var("BURP_URL").ok() + } else { + None + }; + + let payload = PythonRequestPayload { + method, + url, + headers, + body_base64: body.map(|body| base64::engine::general_purpose::STANDARD.encode(body)), + follow_redirects, + timeout_secs: 60, + proxy_url, + impersonate: "chrome", + }; + + let payload = serde_json::to_vec(&payload)?; + + let output = tokio::task::spawn_blocking(move || -> Result { + let mut command = Command::new("python3"); + command + .arg("-c") + .arg(CURL_CFFI_SCRIPT) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let mut child = command.spawn()?; + if let Some(stdin) = child.stdin.as_mut() { + stdin.write_all(&payload)?; + } + Ok(child.wait_with_output()?) + }) + .await + .map_err(|error| -> AnyErr { format!("spawn_blocking failed: {error}").into() })??; + + let meta_text = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let meta: PythonResponseMeta = serde_json::from_str(&meta_text) + .map_err(|error| format!("failed to parse curl_cffi metadata: {error}; stderr={meta_text}"))?; + + if !output.status.success() { + let error = meta + .error + .unwrap_or_else(|| format!("curl_cffi request failed for {method} {url}")); + return Err(error.into()); + } + + for cookie in meta.cookies.unwrap_or_default() { + self.cookie_jar.add_cookie_str(&cookie, url); + } + + let status = StatusCode::from_u16(meta.status.unwrap_or(500)) + .map_err(|error| format!("invalid status code from curl_cffi: {error}"))?; + + let mut response_headers = HeaderMap::new(); + for (key, value) in meta.headers.unwrap_or_default() { + let Ok(name) = HeaderName::try_from(key.as_str()) else { + continue; + }; + let Ok(value) = HeaderValue::from_str(&value) else { + continue; + }; + response_headers.append(name, value); + } + + Ok(Response { + status, + headers: response_headers, + body: output.stdout, + }) + } + + async fn multipart_to_body(form: Form) -> Result<(Vec, String), AnyErr> { + let boundary = form.boundary().to_string(); + let chunks: Vec<_> = form.into_stream().try_collect().await?; + let mut body = Vec::new(); + for chunk in chunks { + body.extend_from_slice(&chunk); + } + Ok((body, format!("multipart/form-data; boundary={boundary}"))) + } + + pub async fn get_raw(&mut self, url: &str) -> Result { + self.run_python_request("GET", url, vec![], None, false).await } pub async fn get_raw_with_headers( &mut self, url: &str, headers: Vec<(String, String)>, - ) -> Result { - let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); - - let mut request = client.get(url).version(Version::HTTP_11); - - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); - } - } - // Set custom headers - for (key, value) in headers.iter() { - request = request.header(key, value); - } - request.send().await + ) -> Result { + self.run_python_request("GET", url, headers, None, true).await } pub async fn post_json( @@ -125,25 +334,21 @@ impl Requester { url: &str, data: &S, headers: Vec<(String, String)>, - ) -> Result + ) -> Result where S: Serialize + ?Sized, { - let mut request = self.client.post(url).version(Version::HTTP_11).json(data); - - // Set custom headers - for (key, value) in headers.iter() { - request = request.header(key, value); + let mut headers = self.merged_headers(url, headers, true); + if !headers + .iter() + .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str())) + { + headers.push((CONTENT_TYPE.to_string(), "application/json".to_string())); } - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); - } - } - - request.send().await + let body = serde_json::to_vec(data)?; + self.run_python_request("POST", url, headers, Some(body), true) + .await } pub async fn post( @@ -151,26 +356,14 @@ impl Requester { url: &str, data: &str, headers: Vec<(&str, &str)>, - ) -> Result { - let mut request = self - .client - .post(url) - .version(Version::HTTP_11) - .body(data.to_string()); + ) -> Result { + let headers = headers + .into_iter() + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect::>(); - // Set custom headers - for (key, value) in headers.iter() { - request = request.header(key.to_string(), value.to_string()); - } - - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); - } - } - - request.send().await + self.run_python_request("POST", url, headers, Some(data.as_bytes().to_vec()), true) + .await } pub async fn post_multipart( @@ -179,27 +372,18 @@ impl Requester { form: Form, headers: Vec<(String, String)>, _http_version: Option, - ) -> Result { - let http_version = match _http_version { - Some(v) => v, - None => Version::HTTP_11, - }; - - let mut request = self.client.post(url).multipart(form).version(http_version); - - // Set custom headers - for (key, value) in headers.iter() { - request = request.header(key, value); + ) -> Result { + let (body, content_type) = Self::multipart_to_body(form).await?; + let mut headers = headers; + if !headers + .iter() + .any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str())) + { + headers.push((CONTENT_TYPE.to_string(), content_type)); } - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); - } - } - - request.send().await + self.run_python_request("POST", url, headers, Some(body), true) + .await } pub async fn get( @@ -216,25 +400,15 @@ impl Requester { headers: Vec<(String, String)>, _http_version: Option, ) -> Result { - let http_version = match _http_version { - Some(v) => v, - None => Version::HTTP_11, - }; loop { - let mut request = self.client.get(url).version(http_version); - for (key, value) in headers.iter() { - request = request.header(key, value); - } - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); - } - } - let response = request.send().await?; + let response = self + .run_python_request("GET", url, headers.clone(), None, true) + .await?; + if response.status().is_success() || response.status().as_u16() == 404 { - return Ok(response.text().await?); + return response.text().await; } + if response.status().as_u16() == 429 { tokio::time::sleep(std::time::Duration::from_secs(1)).await; continue; @@ -248,8 +422,6 @@ impl Requester { } } - // If direct request failed, try FlareSolverr. Map its error to a Send+Sync error immediately, - // so no non-Send error value lives across later `.await`s. let flare_url = match env::var("FLARE_URL") { Ok(url) => url, Err(e) => return Err(format!("FLARE_URL not set: {e}").into()), @@ -268,39 +440,21 @@ impl Requester { .await .map_err(|e| -> AnyErr { format!("Failed to solve FlareSolverr: {e}").into() })?; - // Rebuild client and apply UA/cookies from FlareSolverr let cookie_origin = url.split('/').take(3).collect::>().join("/"); + self.user_agent = Some(res.solution.userAgent); - let useragent = res.solution.userAgent; - self.user_agent = Some(useragent); - - if url::Url::parse(&cookie_origin).is_ok() { - for cookie in res.solution.cookies { - self.cookie_jar - .add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin); - } + for cookie in res.solution.cookies { + self.cookie_jar + .add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin); } - self.client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref()); - - // Retry the original URL with the updated client & (optional) proxy - let mut request = self.client.get(url).version(Version::HTTP_11); - for (key, value) in headers.iter() { - request = request.header(key, value); - } - if self.proxy { - if let Ok(proxy_url) = env::var("BURP_URL") { - let proxy = Proxy::all(&proxy_url).unwrap(); - request = request.proxy(proxy); - } - } - - let response = request.send().await?; + let response = self + .run_python_request("GET", url, headers, None, true) + .await?; if response.status().is_success() { - return Ok(response.text().await?); + return response.text().await; } - // Fall back to FlareSolverr-provided body Ok(res.solution.response) } }