swap to curl-cffi
This commit is contained in:
@@ -1,28 +1,137 @@
|
||||
use base64::Engine;
|
||||
use futures::TryStreamExt;
|
||||
use ntex::http::header::{CONTENT_TYPE, COOKIE, HeaderMap, HeaderName, HeaderValue, USER_AGENT};
|
||||
use ntex::http::StatusCode;
|
||||
use serde::Serialize;
|
||||
use std::env;
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::sync::Arc;
|
||||
use wreq::Client;
|
||||
use wreq::Proxy;
|
||||
use wreq::Response;
|
||||
use wreq::Version;
|
||||
use wreq::cookie::Jar;
|
||||
use wreq::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use wreq::cookie::{CookieStore, Cookies, Jar};
|
||||
use wreq::multipart::Form;
|
||||
use wreq::redirect::Policy;
|
||||
use wreq_util::Emulation;
|
||||
use wreq::Uri;
|
||||
|
||||
use crate::util::flaresolverr::FlareSolverrRequest;
|
||||
use crate::util::flaresolverr::Flaresolverr;
|
||||
use crate::util::proxy;
|
||||
|
||||
// A Send + Sync error type for all async paths
|
||||
type AnyErr = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
const CURL_CFFI_SCRIPT: &str = r#"
|
||||
import base64
|
||||
import json
|
||||
import sys
|
||||
from curl_cffi import requests
|
||||
|
||||
def main():
|
||||
payload = json.load(sys.stdin)
|
||||
headers = {k: v for k, v in payload.get("headers", [])}
|
||||
body_b64 = payload.get("body_base64")
|
||||
data = base64.b64decode(body_b64) if body_b64 else None
|
||||
|
||||
kwargs = {
|
||||
"method": payload["method"],
|
||||
"url": payload["url"],
|
||||
"headers": headers or None,
|
||||
"timeout": payload.get("timeout_secs", 60),
|
||||
"allow_redirects": payload.get("follow_redirects", True),
|
||||
"verify": False,
|
||||
"impersonate": payload.get("impersonate", "chrome"),
|
||||
}
|
||||
|
||||
proxy_url = payload.get("proxy_url")
|
||||
if proxy_url:
|
||||
kwargs["proxies"] = {"http": proxy_url, "https": proxy_url}
|
||||
if data is not None:
|
||||
kwargs["data"] = data
|
||||
|
||||
response = requests.request(**kwargs)
|
||||
|
||||
cookies = []
|
||||
cookie_jar = getattr(response.cookies, "jar", None)
|
||||
if cookie_jar is not None:
|
||||
for cookie in cookie_jar:
|
||||
parts = [f"{cookie.name}={cookie.value}"]
|
||||
if cookie.domain:
|
||||
parts.append(f"Domain={cookie.domain}")
|
||||
if cookie.path:
|
||||
parts.append(f"Path={cookie.path}")
|
||||
if cookie.secure:
|
||||
parts.append("Secure")
|
||||
cookies.append("; ".join(parts))
|
||||
|
||||
meta = {
|
||||
"status": response.status_code,
|
||||
"headers": list(response.headers.items()),
|
||||
"cookies": cookies,
|
||||
}
|
||||
sys.stderr.write(json.dumps(meta))
|
||||
sys.stdout.buffer.write(response.content)
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as exc:
|
||||
sys.stderr.write(json.dumps({"error": str(exc)}))
|
||||
sys.exit(1)
|
||||
"#;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Response {
|
||||
status: StatusCode,
|
||||
headers: HeaderMap,
|
||||
body: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Response {
|
||||
pub fn status(&self) -> StatusCode {
|
||||
self.status
|
||||
}
|
||||
|
||||
pub fn headers(&self) -> &HeaderMap {
|
||||
&self.headers
|
||||
}
|
||||
|
||||
pub async fn text(self) -> Result<String, AnyErr> {
|
||||
String::from_utf8(self.body).map_err(|error| error.into())
|
||||
}
|
||||
|
||||
pub async fn bytes(self) -> Result<Vec<u8>, AnyErr> {
|
||||
Ok(self.body)
|
||||
}
|
||||
|
||||
pub async fn json<T>(self) -> Result<T, AnyErr>
|
||||
where
|
||||
T: serde::de::DeserializeOwned,
|
||||
{
|
||||
Ok(serde_json::from_slice(&self.body)?)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct PythonRequestPayload<'a> {
|
||||
method: &'a str,
|
||||
url: &'a str,
|
||||
headers: Vec<(String, String)>,
|
||||
body_base64: Option<String>,
|
||||
follow_redirects: bool,
|
||||
timeout_secs: u64,
|
||||
proxy_url: Option<String>,
|
||||
impersonate: &'a str,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct PythonResponseMeta {
|
||||
status: Option<u16>,
|
||||
headers: Option<Vec<(String, String)>>,
|
||||
cookies: Option<Vec<String>>,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize, Clone)]
|
||||
pub struct Requester {
|
||||
#[serde(skip)]
|
||||
client: Client,
|
||||
#[serde(skip)]
|
||||
cookie_jar: Arc<Jar>,
|
||||
proxy: bool,
|
||||
@@ -41,31 +150,9 @@ impl fmt::Debug for Requester {
|
||||
}
|
||||
|
||||
impl Requester {
|
||||
fn build_client(cookie_jar: Arc<Jar>, user_agent: Option<&str>) -> Client {
|
||||
let mut builder = Client::builder()
|
||||
.cert_verification(false)
|
||||
.emulation(Emulation::Firefox146)
|
||||
.cookie_provider(cookie_jar)
|
||||
.redirect(Policy::default());
|
||||
|
||||
if let Some(user_agent) = user_agent {
|
||||
let mut headers = HeaderMap::new();
|
||||
if let Ok(value) = HeaderValue::from_str(user_agent) {
|
||||
headers.insert(USER_AGENT, value);
|
||||
builder = builder.default_headers(headers);
|
||||
}
|
||||
}
|
||||
|
||||
builder.build().expect("Failed to create HTTP client")
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
let cookie_jar = Arc::new(Jar::default());
|
||||
let client = Self::build_client(cookie_jar.clone(), None);
|
||||
|
||||
let requester = Requester {
|
||||
client,
|
||||
cookie_jar,
|
||||
cookie_jar: Arc::new(Jar::default()),
|
||||
proxy: false,
|
||||
flaresolverr_session: None,
|
||||
user_agent: None,
|
||||
@@ -83,41 +170,163 @@ impl Requester {
|
||||
self.proxy = proxy;
|
||||
}
|
||||
|
||||
pub async fn get_raw(&mut self, url: &str) -> Result<Response, wreq::Error> {
|
||||
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
|
||||
fn cookie_headers(&self, url: &str) -> Vec<(String, String)> {
|
||||
let Ok(uri) = url.parse::<Uri>() else {
|
||||
return vec![];
|
||||
};
|
||||
|
||||
let mut request = client.get(url).version(Version::HTTP_11);
|
||||
match self.cookie_jar.cookies(&uri) {
|
||||
Cookies::Compressed(value) => value
|
||||
.to_str()
|
||||
.ok()
|
||||
.map(|value| vec![(COOKIE.to_string(), value.to_string())])
|
||||
.unwrap_or_default(),
|
||||
Cookies::Uncompressed(values) => values
|
||||
.into_iter()
|
||||
.filter_map(|value| value.to_str().ok().map(|value| (COOKIE.to_string(), value.to_string())))
|
||||
.collect(),
|
||||
Cookies::Empty => vec![],
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
fn merged_headers(
|
||||
&self,
|
||||
url: &str,
|
||||
headers: Vec<(String, String)>,
|
||||
ensure_json: bool,
|
||||
) -> Vec<(String, String)> {
|
||||
let mut merged = headers;
|
||||
|
||||
if ensure_json
|
||||
&& !merged
|
||||
.iter()
|
||||
.any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str()))
|
||||
{
|
||||
merged.push((CONTENT_TYPE.to_string(), "application/json".to_string()));
|
||||
}
|
||||
|
||||
if let Some(user_agent) = &self.user_agent {
|
||||
if !merged
|
||||
.iter()
|
||||
.any(|(key, _)| key.eq_ignore_ascii_case(USER_AGENT.as_str()))
|
||||
{
|
||||
merged.push((USER_AGENT.to_string(), user_agent.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
request.send().await
|
||||
let has_cookie = merged
|
||||
.iter()
|
||||
.any(|(key, _)| key.eq_ignore_ascii_case(COOKIE.as_str()));
|
||||
if !has_cookie {
|
||||
merged.extend(self.cookie_headers(url));
|
||||
}
|
||||
|
||||
merged
|
||||
}
|
||||
|
||||
async fn run_python_request(
|
||||
&mut self,
|
||||
method: &str,
|
||||
url: &str,
|
||||
headers: Vec<(String, String)>,
|
||||
body: Option<Vec<u8>>,
|
||||
follow_redirects: bool,
|
||||
) -> Result<Response, AnyErr> {
|
||||
let headers = self.merged_headers(url, headers, false);
|
||||
let proxy_url = if self.proxy {
|
||||
env::var("BURP_URL").ok()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let payload = PythonRequestPayload {
|
||||
method,
|
||||
url,
|
||||
headers,
|
||||
body_base64: body.map(|body| base64::engine::general_purpose::STANDARD.encode(body)),
|
||||
follow_redirects,
|
||||
timeout_secs: 60,
|
||||
proxy_url,
|
||||
impersonate: "chrome",
|
||||
};
|
||||
|
||||
let payload = serde_json::to_vec(&payload)?;
|
||||
|
||||
let output = tokio::task::spawn_blocking(move || -> Result<std::process::Output, AnyErr> {
|
||||
let mut command = Command::new("python3");
|
||||
command
|
||||
.arg("-c")
|
||||
.arg(CURL_CFFI_SCRIPT)
|
||||
.stdin(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let mut child = command.spawn()?;
|
||||
if let Some(stdin) = child.stdin.as_mut() {
|
||||
stdin.write_all(&payload)?;
|
||||
}
|
||||
Ok(child.wait_with_output()?)
|
||||
})
|
||||
.await
|
||||
.map_err(|error| -> AnyErr { format!("spawn_blocking failed: {error}").into() })??;
|
||||
|
||||
let meta_text = String::from_utf8_lossy(&output.stderr).trim().to_string();
|
||||
let meta: PythonResponseMeta = serde_json::from_str(&meta_text)
|
||||
.map_err(|error| format!("failed to parse curl_cffi metadata: {error}; stderr={meta_text}"))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let error = meta
|
||||
.error
|
||||
.unwrap_or_else(|| format!("curl_cffi request failed for {method} {url}"));
|
||||
return Err(error.into());
|
||||
}
|
||||
|
||||
for cookie in meta.cookies.unwrap_or_default() {
|
||||
self.cookie_jar.add_cookie_str(&cookie, url);
|
||||
}
|
||||
|
||||
let status = StatusCode::from_u16(meta.status.unwrap_or(500))
|
||||
.map_err(|error| format!("invalid status code from curl_cffi: {error}"))?;
|
||||
|
||||
let mut response_headers = HeaderMap::new();
|
||||
for (key, value) in meta.headers.unwrap_or_default() {
|
||||
let Ok(name) = HeaderName::try_from(key.as_str()) else {
|
||||
continue;
|
||||
};
|
||||
let Ok(value) = HeaderValue::from_str(&value) else {
|
||||
continue;
|
||||
};
|
||||
response_headers.append(name, value);
|
||||
}
|
||||
|
||||
Ok(Response {
|
||||
status,
|
||||
headers: response_headers,
|
||||
body: output.stdout,
|
||||
})
|
||||
}
|
||||
|
||||
async fn multipart_to_body(form: Form) -> Result<(Vec<u8>, String), AnyErr> {
|
||||
let boundary = form.boundary().to_string();
|
||||
let chunks: Vec<_> = form.into_stream().try_collect().await?;
|
||||
let mut body = Vec::new();
|
||||
for chunk in chunks {
|
||||
body.extend_from_slice(&chunk);
|
||||
}
|
||||
Ok((body, format!("multipart/form-data; boundary={boundary}")))
|
||||
}
|
||||
|
||||
pub async fn get_raw(&mut self, url: &str) -> Result<Response, AnyErr> {
|
||||
self.run_python_request("GET", url, vec![], None, false).await
|
||||
}
|
||||
|
||||
pub async fn get_raw_with_headers(
|
||||
&mut self,
|
||||
url: &str,
|
||||
headers: Vec<(String, String)>,
|
||||
) -> Result<Response, wreq::Error> {
|
||||
let client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
|
||||
|
||||
let mut request = client.get(url).version(Version::HTTP_11);
|
||||
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
}
|
||||
}
|
||||
// Set custom headers
|
||||
for (key, value) in headers.iter() {
|
||||
request = request.header(key, value);
|
||||
}
|
||||
request.send().await
|
||||
) -> Result<Response, AnyErr> {
|
||||
self.run_python_request("GET", url, headers, None, true).await
|
||||
}
|
||||
|
||||
pub async fn post_json<S>(
|
||||
@@ -125,25 +334,21 @@ impl Requester {
|
||||
url: &str,
|
||||
data: &S,
|
||||
headers: Vec<(String, String)>,
|
||||
) -> Result<Response, wreq::Error>
|
||||
) -> Result<Response, AnyErr>
|
||||
where
|
||||
S: Serialize + ?Sized,
|
||||
{
|
||||
let mut request = self.client.post(url).version(Version::HTTP_11).json(data);
|
||||
|
||||
// Set custom headers
|
||||
for (key, value) in headers.iter() {
|
||||
request = request.header(key, value);
|
||||
let mut headers = self.merged_headers(url, headers, true);
|
||||
if !headers
|
||||
.iter()
|
||||
.any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str()))
|
||||
{
|
||||
headers.push((CONTENT_TYPE.to_string(), "application/json".to_string()));
|
||||
}
|
||||
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
request.send().await
|
||||
let body = serde_json::to_vec(data)?;
|
||||
self.run_python_request("POST", url, headers, Some(body), true)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn post(
|
||||
@@ -151,26 +356,14 @@ impl Requester {
|
||||
url: &str,
|
||||
data: &str,
|
||||
headers: Vec<(&str, &str)>,
|
||||
) -> Result<Response, wreq::Error> {
|
||||
let mut request = self
|
||||
.client
|
||||
.post(url)
|
||||
.version(Version::HTTP_11)
|
||||
.body(data.to_string());
|
||||
) -> Result<Response, AnyErr> {
|
||||
let headers = headers
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.to_string(), value.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Set custom headers
|
||||
for (key, value) in headers.iter() {
|
||||
request = request.header(key.to_string(), value.to_string());
|
||||
}
|
||||
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
request.send().await
|
||||
self.run_python_request("POST", url, headers, Some(data.as_bytes().to_vec()), true)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn post_multipart(
|
||||
@@ -179,27 +372,18 @@ impl Requester {
|
||||
form: Form,
|
||||
headers: Vec<(String, String)>,
|
||||
_http_version: Option<Version>,
|
||||
) -> Result<Response, wreq::Error> {
|
||||
let http_version = match _http_version {
|
||||
Some(v) => v,
|
||||
None => Version::HTTP_11,
|
||||
};
|
||||
|
||||
let mut request = self.client.post(url).multipart(form).version(http_version);
|
||||
|
||||
// Set custom headers
|
||||
for (key, value) in headers.iter() {
|
||||
request = request.header(key, value);
|
||||
) -> Result<Response, AnyErr> {
|
||||
let (body, content_type) = Self::multipart_to_body(form).await?;
|
||||
let mut headers = headers;
|
||||
if !headers
|
||||
.iter()
|
||||
.any(|(key, _)| key.eq_ignore_ascii_case(CONTENT_TYPE.as_str()))
|
||||
{
|
||||
headers.push((CONTENT_TYPE.to_string(), content_type));
|
||||
}
|
||||
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
request.send().await
|
||||
self.run_python_request("POST", url, headers, Some(body), true)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
@@ -216,25 +400,15 @@ impl Requester {
|
||||
headers: Vec<(String, String)>,
|
||||
_http_version: Option<Version>,
|
||||
) -> Result<String, AnyErr> {
|
||||
let http_version = match _http_version {
|
||||
Some(v) => v,
|
||||
None => Version::HTTP_11,
|
||||
};
|
||||
loop {
|
||||
let mut request = self.client.get(url).version(http_version);
|
||||
for (key, value) in headers.iter() {
|
||||
request = request.header(key, value);
|
||||
}
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
}
|
||||
}
|
||||
let response = request.send().await?;
|
||||
let response = self
|
||||
.run_python_request("GET", url, headers.clone(), None, true)
|
||||
.await?;
|
||||
|
||||
if response.status().is_success() || response.status().as_u16() == 404 {
|
||||
return Ok(response.text().await?);
|
||||
return response.text().await;
|
||||
}
|
||||
|
||||
if response.status().as_u16() == 429 {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
continue;
|
||||
@@ -248,8 +422,6 @@ impl Requester {
|
||||
}
|
||||
}
|
||||
|
||||
// If direct request failed, try FlareSolverr. Map its error to a Send+Sync error immediately,
|
||||
// so no non-Send error value lives across later `.await`s.
|
||||
let flare_url = match env::var("FLARE_URL") {
|
||||
Ok(url) => url,
|
||||
Err(e) => return Err(format!("FLARE_URL not set: {e}").into()),
|
||||
@@ -268,39 +440,21 @@ impl Requester {
|
||||
.await
|
||||
.map_err(|e| -> AnyErr { format!("Failed to solve FlareSolverr: {e}").into() })?;
|
||||
|
||||
// Rebuild client and apply UA/cookies from FlareSolverr
|
||||
let cookie_origin = url.split('/').take(3).collect::<Vec<&str>>().join("/");
|
||||
self.user_agent = Some(res.solution.userAgent);
|
||||
|
||||
let useragent = res.solution.userAgent;
|
||||
self.user_agent = Some(useragent);
|
||||
|
||||
if url::Url::parse(&cookie_origin).is_ok() {
|
||||
for cookie in res.solution.cookies {
|
||||
self.cookie_jar
|
||||
.add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin);
|
||||
}
|
||||
for cookie in res.solution.cookies {
|
||||
self.cookie_jar
|
||||
.add_cookie_str(&format!("{}={}", cookie.name, cookie.value), &cookie_origin);
|
||||
}
|
||||
|
||||
self.client = Self::build_client(self.cookie_jar.clone(), self.user_agent.as_deref());
|
||||
|
||||
// Retry the original URL with the updated client & (optional) proxy
|
||||
let mut request = self.client.get(url).version(Version::HTTP_11);
|
||||
for (key, value) in headers.iter() {
|
||||
request = request.header(key, value);
|
||||
}
|
||||
if self.proxy {
|
||||
if let Ok(proxy_url) = env::var("BURP_URL") {
|
||||
let proxy = Proxy::all(&proxy_url).unwrap();
|
||||
request = request.proxy(proxy);
|
||||
}
|
||||
}
|
||||
|
||||
let response = request.send().await?;
|
||||
let response = self
|
||||
.run_python_request("GET", url, headers, None, true)
|
||||
.await?;
|
||||
if response.status().is_success() {
|
||||
return Ok(response.text().await?);
|
||||
return response.text().await;
|
||||
}
|
||||
|
||||
// Fall back to FlareSolverr-provided body
|
||||
Ok(res.solution.response)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user