upgrades
This commit is contained in:
@@ -177,15 +177,18 @@ impl HomoxxxProvider {
|
||||
let search_string = query.to_lowercase().trim().replace(" ", "-");
|
||||
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
|
||||
|
||||
if search_string.starts_with("@") {
|
||||
let url_part = search_string
|
||||
.split("@")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace(":", "/");
|
||||
video_url = format!("{}/{}/", self.url, url_part);
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, value)) = trimmed.split_once(':') {
|
||||
let kind = kind.trim().to_ascii_lowercase();
|
||||
let value = value.trim().replace(' ', "-");
|
||||
if !value.is_empty()
|
||||
&& matches!(
|
||||
kind.as_str(),
|
||||
"models" | "pornstars" | "stars" | "channels" | "categories" | "tags"
|
||||
)
|
||||
{
|
||||
video_url = format!("{}/{}/{}/", self.url, kind, value);
|
||||
}
|
||||
}
|
||||
// Check our Video Cache. If the result is younger than 1 hour, we return it.
|
||||
let old_items = match cache.get(&video_url) {
|
||||
|
||||
@@ -14,6 +14,8 @@ use diesel::r2d2;
|
||||
use error_chain::error_chain;
|
||||
use futures::future::join_all;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::vec;
|
||||
use wreq::Version;
|
||||
|
||||
@@ -41,15 +43,58 @@ error_chain! {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MissavProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
|
||||
impl MissavProvider {
|
||||
pub fn new() -> Self {
|
||||
MissavProvider {
|
||||
url: "https://missav.ws".to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn humanize_slug(value: &str) -> String {
|
||||
value
|
||||
.trim_matches('/')
|
||||
.replace('-', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(&self, key: &str, path_or_url: &str) {
|
||||
let normalized = Self::normalize_key(key);
|
||||
if normalized.is_empty() || path_or_url.trim().is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Ok(mut map) = self.tag_map.write() {
|
||||
map.insert(normalized, path_or_url.trim().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_query_url(&self, query: &str, page: u8, sort: &str) -> Option<String> {
|
||||
let normalized = Self::normalize_key(query);
|
||||
let mapped = self.tag_map.read().ok()?.get(&normalized)?.clone();
|
||||
let separator = if mapped.contains('?') { "&" } else { "?" };
|
||||
let mut url = format!("{mapped}{separator}page={page}");
|
||||
if !sort.is_empty() {
|
||||
url.push_str("&sort=");
|
||||
url.push_str(sort);
|
||||
}
|
||||
Some(url)
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
Channel {
|
||||
id: "missav".to_string(),
|
||||
@@ -248,10 +293,13 @@ impl MissavProvider {
|
||||
if !sort.is_empty() {
|
||||
sort = format!("&sort={}", sort);
|
||||
}
|
||||
let url_str = format!(
|
||||
let mut url_str = format!(
|
||||
"{}/{}/search/{}?page={}{}",
|
||||
self.url, language, search_string, page, sort
|
||||
);
|
||||
if let Some(mapped_url) = self.resolve_query_url(query, page, &sort.replace("&sort=", "")) {
|
||||
url_str = mapped_url;
|
||||
}
|
||||
|
||||
if let Some((time, items)) = cache.get(&url_str) {
|
||||
if time.elapsed().unwrap_or_default().as_secs() < 3600 {
|
||||
@@ -386,19 +434,54 @@ impl MissavProvider {
|
||||
|
||||
// 3. Extract Tags (Generic approach to avoid repetitive code)
|
||||
let mut tags = vec![];
|
||||
for (label, prefix) in [
|
||||
("Actress:", "@actress"),
|
||||
("Actor:", "@actor"),
|
||||
("Maker:", "@maker"),
|
||||
("Genre:", "@genre"),
|
||||
for (label, route_kind) in [
|
||||
("Actress:", "actress"),
|
||||
("Actor:", "actor"),
|
||||
("Maker:", "maker"),
|
||||
("Genre:", "genre"),
|
||||
] {
|
||||
let marker = format!("<span>{}</span>", label);
|
||||
if let Some(section) = extract(&vid, &marker, "</div>") {
|
||||
for part in section.split("class=\"text-nord13 font-medium\">").skip(1) {
|
||||
if let Some(val) = part.split('<').next() {
|
||||
let clean = val.trim();
|
||||
if !clean.is_empty() {
|
||||
tags.push(format!("{}:{}", prefix, clean));
|
||||
for anchor in section.split("<a ").skip(1) {
|
||||
let href = anchor
|
||||
.split("href=\"")
|
||||
.nth(1)
|
||||
.and_then(|value| value.split('"').next())
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let title = anchor
|
||||
.split("class=\"text-nord13 font-medium\">")
|
||||
.nth(1)
|
||||
.and_then(|value| value.split('<').next())
|
||||
.map(str::trim)
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if !title.is_empty() {
|
||||
tags.push(title.clone());
|
||||
if !href.is_empty() {
|
||||
let full_url = if href.starts_with("http://") || href.starts_with("https://") {
|
||||
href.clone()
|
||||
} else {
|
||||
format!("{}{}", self.url, href)
|
||||
};
|
||||
self.insert_tag_mapping(&title, &full_url);
|
||||
let slug = href
|
||||
.trim_matches('/')
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if !slug.is_empty() {
|
||||
self.insert_tag_mapping(&slug, &full_url);
|
||||
self.insert_tag_mapping(
|
||||
&format!("{route_kind}:{}", slug),
|
||||
&full_url,
|
||||
);
|
||||
self.insert_tag_mapping(
|
||||
&format!("{route_kind}:{}", Self::humanize_slug(&slug)),
|
||||
&full_url,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,9 @@ use crate::videos::{ServerOptions, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::vec;
|
||||
use wreq::Client;
|
||||
use wreq_util::Emulation;
|
||||
@@ -31,14 +33,66 @@ error_chain! {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OkxxxProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
impl OkxxxProvider {
|
||||
pub fn new() -> Self {
|
||||
OkxxxProvider {
|
||||
url: "https://ok.xxx".to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn humanize_slug(value: &str) -> String {
|
||||
value
|
||||
.trim_matches('/')
|
||||
.replace('-', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
|
||||
let slug = slug.trim().trim_matches('/');
|
||||
if slug.is_empty() {
|
||||
return;
|
||||
}
|
||||
let path = format!("{kind}/{slug}");
|
||||
if let Ok(mut map) = self.tag_map.write() {
|
||||
map.insert(Self::normalize_key(slug), path.clone());
|
||||
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
|
||||
if !normalized_title.is_empty() {
|
||||
map.insert(normalized_title, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_query_path(&self, query: &str) -> Option<String> {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
||||
let kind = kind.trim().to_ascii_lowercase();
|
||||
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
||||
if !value.is_empty() && matches!(kind.as_str(), "sites" | "models") {
|
||||
return Some(format!("{kind}/{value}"));
|
||||
}
|
||||
}
|
||||
let normalized = Self::normalize_key(trimmed);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.tag_map.read().ok()?.get(&normalized).cloned()
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
Channel {
|
||||
id: "okxxx".to_string(),
|
||||
@@ -177,16 +231,8 @@ impl OkxxxProvider {
|
||||
async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result<Vec<VideoItem>> {
|
||||
let search_string = query.to_lowercase().trim().replace(" ", "-");
|
||||
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
|
||||
|
||||
if search_string.starts_with("@") {
|
||||
let url_part = search_string
|
||||
.split("@")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace(":", "/");
|
||||
video_url = format!("{}/{}/", self.url, url_part);
|
||||
if let Some(path) = self.resolve_query_path(query) {
|
||||
video_url = format!("{}/{}/{}/", self.url, path, page);
|
||||
}
|
||||
// Check our Video Cache. If the result is younger than 1 hour, we return it.
|
||||
let old_items = match cache.get(&video_url) {
|
||||
@@ -405,7 +451,8 @@ impl OkxxxProvider {
|
||||
.collect::<Vec<String>>();
|
||||
for tag in raw_tags {
|
||||
if !tag.is_empty() {
|
||||
tags.push(format!("@sites:{}", tag));
|
||||
self.insert_tag_mapping("sites", &tag, None);
|
||||
tags.push(Self::humanize_slug(&tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -425,7 +472,8 @@ impl OkxxxProvider {
|
||||
.collect::<Vec<String>>();
|
||||
for tag in raw_tags {
|
||||
if !tag.is_empty() {
|
||||
tags.push(format!("@models:{}", tag));
|
||||
self.insert_tag_mapping("models", &tag, None);
|
||||
tags.push(Self::humanize_slug(&tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -883,11 +883,35 @@ impl OmgxxxProvider {
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let mut search_type = "search";
|
||||
let mut search_string = query.to_string().to_ascii_lowercase().trim().to_string();
|
||||
let trimmed = query.trim().trim_start_matches('@').to_ascii_lowercase();
|
||||
if let Some((kind, raw)) = trimmed.split_once(':') {
|
||||
let candidate = raw.trim().replace(' ', "-");
|
||||
if !candidate.is_empty() {
|
||||
match kind.trim() {
|
||||
"models" | "model" | "stars" => {
|
||||
search_type = "models";
|
||||
search_string = candidate;
|
||||
}
|
||||
"sites" | "site" => {
|
||||
search_type = "sites";
|
||||
search_string = candidate;
|
||||
}
|
||||
"networks" | "network" => {
|
||||
search_type = "networks";
|
||||
search_string = candidate;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
match self.stars.read() {
|
||||
Ok(stars) => {
|
||||
if let Some(star) = stars
|
||||
.iter()
|
||||
.find(|s| s.title.to_ascii_lowercase() == search_string)
|
||||
.find(|s| {
|
||||
s.title.eq_ignore_ascii_case(&search_string)
|
||||
|| s.id.eq_ignore_ascii_case(&search_string)
|
||||
})
|
||||
{
|
||||
search_type = "models";
|
||||
search_string = star.id.clone();
|
||||
@@ -901,7 +925,10 @@ impl OmgxxxProvider {
|
||||
Ok(sites) => {
|
||||
if let Some(site) = sites
|
||||
.iter()
|
||||
.find(|s| s.title.to_ascii_lowercase() == search_string)
|
||||
.find(|s| {
|
||||
s.title.eq_ignore_ascii_case(&search_string)
|
||||
|| s.id.eq_ignore_ascii_case(&search_string)
|
||||
})
|
||||
{
|
||||
search_type = "sites";
|
||||
search_string = site.id.clone();
|
||||
@@ -911,6 +938,23 @@ impl OmgxxxProvider {
|
||||
report_provider_error_background("omgxxx", "query.sites_read", &e.to_string());
|
||||
}
|
||||
}
|
||||
match self.networks.read() {
|
||||
Ok(networks) => {
|
||||
if let Some(network) = networks
|
||||
.iter()
|
||||
.find(|n| {
|
||||
n.title.eq_ignore_ascii_case(&search_string)
|
||||
|| n.id.eq_ignore_ascii_case(&search_string)
|
||||
})
|
||||
{
|
||||
search_type = "networks";
|
||||
search_string = network.id.clone();
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
report_provider_error_background("omgxxx", "query.networks_read", &e.to_string());
|
||||
}
|
||||
}
|
||||
let mut video_url = format!("{}/{}/{}/{}/", self.url, search_type, search_string, page);
|
||||
video_url = video_url.replace(" ", "+");
|
||||
// Check our Video Cache. If the result is younger than 1 hour, we return it.
|
||||
|
||||
@@ -10,7 +10,9 @@ use crate::videos::{ServerOptions, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::vec;
|
||||
use wreq::Client;
|
||||
use wreq_util::Emulation;
|
||||
@@ -31,14 +33,66 @@ error_chain! {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerfectgirlsProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
impl PerfectgirlsProvider {
|
||||
pub fn new() -> Self {
|
||||
PerfectgirlsProvider {
|
||||
url: "https://www.perfectgirls.xxx".to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn humanize_slug(value: &str) -> String {
|
||||
value
|
||||
.trim_matches('/')
|
||||
.replace('-', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
|
||||
let slug = slug.trim().trim_matches('/');
|
||||
if slug.is_empty() {
|
||||
return;
|
||||
}
|
||||
let path = format!("{kind}/{slug}");
|
||||
if let Ok(mut map) = self.tag_map.write() {
|
||||
map.insert(Self::normalize_key(slug), path.clone());
|
||||
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
|
||||
if !normalized_title.is_empty() {
|
||||
map.insert(normalized_title, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_query_path(&self, query: &str) -> Option<String> {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
||||
let kind = kind.trim().to_ascii_lowercase();
|
||||
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
||||
if !value.is_empty() && matches!(kind.as_str(), "channels" | "pornstars") {
|
||||
return Some(format!("{kind}/{value}"));
|
||||
}
|
||||
}
|
||||
let normalized = Self::normalize_key(trimmed);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.tag_map.read().ok()?.get(&normalized).cloned()
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
Channel {
|
||||
id: "perfectgirls".to_string(),
|
||||
@@ -177,16 +231,8 @@ impl PerfectgirlsProvider {
|
||||
async fn query(&self, cache: VideoCache, page: u8, query: &str) -> Result<Vec<VideoItem>> {
|
||||
let search_string = query.to_lowercase().trim().replace(" ", "-");
|
||||
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
|
||||
|
||||
if search_string.starts_with("@") {
|
||||
let url_part = search_string
|
||||
.split("@")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace(":", "/");
|
||||
video_url = format!("{}/{}/", self.url, url_part);
|
||||
if let Some(path) = self.resolve_query_path(query) {
|
||||
video_url = format!("{}/{}/{}/", self.url, path, page);
|
||||
}
|
||||
// Check our Video Cache. If the result is younger than 1 hour, we return it.
|
||||
let old_items = match cache.get(&video_url) {
|
||||
@@ -407,7 +453,8 @@ impl PerfectgirlsProvider {
|
||||
.collect::<Vec<String>>();
|
||||
for tag in raw_tags {
|
||||
if !tag.is_empty() {
|
||||
tags.push(format!("@channels:{}", tag));
|
||||
self.insert_tag_mapping("channels", &tag, None);
|
||||
tags.push(Self::humanize_slug(&tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -427,7 +474,8 @@ impl PerfectgirlsProvider {
|
||||
.collect::<Vec<String>>();
|
||||
for tag in raw_tags {
|
||||
if !tag.is_empty() {
|
||||
tags.push(format!("@pornstars:{}", tag));
|
||||
self.insert_tag_mapping("pornstars", &tag, None);
|
||||
tags.push(Self::humanize_slug(&tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,8 @@ use futures::future::join_all;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use wreq::Client;
|
||||
use wreq::Version;
|
||||
use wreq_util::Emulation;
|
||||
@@ -40,11 +42,13 @@ struct PerverzijaDbEntry {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PerverzijaProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
impl PerverzijaProvider {
|
||||
pub fn new() -> Self {
|
||||
PerverzijaProvider {
|
||||
url: "https://tube.perverzija.com/".to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,6 +147,104 @@ impl PerverzijaProvider {
|
||||
title.trim().to_string()
|
||||
}
|
||||
|
||||
fn clip_at_first<'a>(haystack: &'a str, end_markers: &[&str]) -> &'a str {
|
||||
let mut end = haystack.len();
|
||||
for marker in end_markers {
|
||||
if let Some(index) = haystack.find(marker) {
|
||||
end = end.min(index);
|
||||
}
|
||||
}
|
||||
&haystack[..end]
|
||||
}
|
||||
|
||||
fn listing_item_scope(haystack: &str) -> &str {
|
||||
Self::clip_at_first(haystack, &["</article>", "</li>", "<article ", "video-item post"])
|
||||
}
|
||||
|
||||
fn detail_meta_section<'a>(text: &'a str, label: &str) -> &'a str {
|
||||
let section = text
|
||||
.split(label)
|
||||
.nth(1)
|
||||
.unwrap_or_default();
|
||||
Self::clip_at_first(
|
||||
section,
|
||||
&["</div>", "</p>", "<strong>", "<div class=\"related", "<section", "<aside"],
|
||||
)
|
||||
}
|
||||
|
||||
fn push_unique(tags: &mut Vec<String>, value: String) {
|
||||
let normalized = value.trim();
|
||||
if normalized.is_empty() {
|
||||
return;
|
||||
}
|
||||
if !tags
|
||||
.iter()
|
||||
.any(|existing| existing.eq_ignore_ascii_case(normalized))
|
||||
{
|
||||
tags.push(normalized.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_href_values(section: &str) -> Vec<String> {
|
||||
section
|
||||
.split("<a href=\"")
|
||||
.skip(1)
|
||||
.filter_map(|part| part.split('"').next())
|
||||
.map(|value| value.to_string())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn normalize_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn humanize_slug(value: &str) -> String {
|
||||
value
|
||||
.trim_matches('/')
|
||||
.replace('-', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
|
||||
let slug = slug.trim().trim_matches('/');
|
||||
if slug.is_empty() {
|
||||
return;
|
||||
}
|
||||
let path = format!("{kind}/{slug}");
|
||||
if let Ok(mut map) = self.tag_map.write() {
|
||||
map.insert(Self::normalize_key(slug), path.clone());
|
||||
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
|
||||
if !normalized_title.is_empty() {
|
||||
map.insert(normalized_title, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_query_path(&self, query: &str) -> Option<String> {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
||||
let kind = kind.trim().to_ascii_lowercase();
|
||||
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
||||
if !value.is_empty() && matches!(kind.as_str(), "studio" | "stars" | "tag" | "genre")
|
||||
{
|
||||
return Some(format!("{kind}/{value}"));
|
||||
}
|
||||
}
|
||||
let normalized = Self::normalize_key(trimmed);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.tag_map.read().ok()?.get(&normalized).cloned()
|
||||
}
|
||||
|
||||
async fn get(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
@@ -212,13 +314,8 @@ impl PerverzijaProvider {
|
||||
url_str = format!("{}?s={}", self.url, search_string);
|
||||
}
|
||||
|
||||
if query.starts_with("@studio:") {
|
||||
let studio_name = query.replace("@studio:", "");
|
||||
url_str = format!("{}studio/{}/page/{}/", self.url, studio_name, page);
|
||||
query_parse = false;
|
||||
} else if query.starts_with("@stars:") {
|
||||
let stars_name = query.replace("@stars:", "");
|
||||
url_str = format!("{}stars/{}/page/{}/", self.url, stars_name, page);
|
||||
if let Some(path) = self.resolve_query_path(query) {
|
||||
url_str = format!("{}/{}/page/{}/", self.url.trim_end_matches('/'), path, page);
|
||||
query_parse = false;
|
||||
}
|
||||
url_str = url_str.replace("page/1/", "");
|
||||
@@ -292,7 +389,8 @@ impl PerverzijaProvider {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
for video_segment in raw_videos {
|
||||
for raw_video_segment in raw_videos {
|
||||
let video_segment = Self::listing_item_scope(raw_video_segment);
|
||||
let title = Self::extract_title(video_segment);
|
||||
|
||||
let embed_html_raw = Self::extract_between(video_segment, "data-embed='", "'")
|
||||
@@ -370,15 +468,17 @@ impl PerverzijaProvider {
|
||||
let studios_parts = video_segment.split("a href=\"").collect::<Vec<&str>>();
|
||||
for studio in studios_parts.iter().skip(1) {
|
||||
if studio.starts_with("https://tube.perverzija.com/studio/") {
|
||||
tags.push(
|
||||
studio
|
||||
.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/studio/", "@studio:")
|
||||
.to_string(),
|
||||
let slug = studio
|
||||
.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.first()
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/studio/", "");
|
||||
self.insert_tag_mapping("studio", &slug, None);
|
||||
Self::push_unique(
|
||||
&mut tags,
|
||||
Self::humanize_slug(&slug),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -396,7 +496,8 @@ impl PerverzijaProvider {
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
if !tag_name.is_empty() {
|
||||
tags.push(format!("@stars:{}", tag_name));
|
||||
self.insert_tag_mapping("stars", &tag_name, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&tag_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -407,7 +508,7 @@ impl PerverzijaProvider {
|
||||
if token.starts_with("tag-") {
|
||||
let tag_name = token.split("tag-").nth(1).unwrap_or_default().to_string();
|
||||
if !tag_name.is_empty() {
|
||||
tags.push(tag_name.replace("-", " ").to_string());
|
||||
Self::push_unique(&mut tags, tag_name.replace("-", " ").to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -579,88 +680,58 @@ impl PerverzijaProvider {
|
||||
url_str = "!".to_string()
|
||||
}
|
||||
|
||||
let mut tags: Vec<String> = Vec::new(); // Placeholder for tags, adjust as needed
|
||||
let mut tags: Vec<String> = Vec::new();
|
||||
|
||||
let studios_parts = text
|
||||
.split("<strong>Studio: </strong>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</div>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("<a href=\"")
|
||||
.collect::<Vec<&str>>();
|
||||
for studio in studios_parts.iter().skip(1) {
|
||||
if studio.starts_with("https://tube.perverzija.com/studio/") {
|
||||
tags.push(
|
||||
studio
|
||||
.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/studio/", "@studio:")
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
if text.contains("<strong>Stars: </strong>") {
|
||||
let stars_parts: Vec<&str> = text
|
||||
.split("<strong>Stars: </strong>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</div>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("<a href=\"")
|
||||
.collect::<Vec<&str>>();
|
||||
for star in stars_parts.iter().skip(1) {
|
||||
if star.starts_with("https://tube.perverzija.com/stars/") {
|
||||
tags.push(
|
||||
star.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/stars/", "@stars:")
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
let studios_section = Self::detail_meta_section(&text, "<strong>Studio: </strong>");
|
||||
for href in Self::parse_href_values(studios_section) {
|
||||
if href.starts_with("https://tube.perverzija.com/studio/") {
|
||||
let studio_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/studio/", "");
|
||||
self.insert_tag_mapping("studio", &studio_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&studio_slug));
|
||||
}
|
||||
}
|
||||
|
||||
let tags_parts: Vec<&str> = text
|
||||
.split("<strong>Tags: </strong>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("</div>")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.split("<a href=\"")
|
||||
.collect::<Vec<&str>>();
|
||||
for star in tags_parts.iter().skip(1) {
|
||||
if star.starts_with("https://tube.perverzija.com/stars/") {
|
||||
tags.push(
|
||||
star.split("/\"")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(0)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace("https://tube.perverzija.com/stars/", "@stars:")
|
||||
.to_string(),
|
||||
);
|
||||
let stars_section = Self::detail_meta_section(&text, "<strong>Stars: </strong>");
|
||||
for href in Self::parse_href_values(stars_section) {
|
||||
if href.starts_with("https://tube.perverzija.com/stars/") {
|
||||
let star_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/stars/", "");
|
||||
self.insert_tag_mapping("stars", &star_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
|
||||
}
|
||||
}
|
||||
|
||||
let tags_section = if text.contains("<strong>Tags: </strong>") {
|
||||
Self::detail_meta_section(&text, "<strong>Tags: </strong>")
|
||||
} else {
|
||||
Self::detail_meta_section(&text, "<strong>Genres: </strong>")
|
||||
};
|
||||
for href in Self::parse_href_values(tags_section) {
|
||||
if href.starts_with("https://tube.perverzija.com/stars/") {
|
||||
let star_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/stars/", "");
|
||||
self.insert_tag_mapping("stars", &star_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&star_slug));
|
||||
continue;
|
||||
}
|
||||
if href.starts_with("https://tube.perverzija.com/tag/") {
|
||||
let tag_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/tag/", "");
|
||||
self.insert_tag_mapping("tag", &tag_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&tag_slug));
|
||||
continue;
|
||||
}
|
||||
if href.starts_with("https://tube.perverzija.com/genre/") {
|
||||
let genre_slug = href
|
||||
.trim_end_matches('/')
|
||||
.replace("https://tube.perverzija.com/genre/", "");
|
||||
self.insert_tag_mapping("genre", &genre_slug, None);
|
||||
Self::push_unique(&mut tags, Self::humanize_slug(&genre_slug));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@ use crate::videos::{ServerOptions, VideoItem};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::vec;
|
||||
|
||||
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
@@ -27,14 +29,66 @@ error_chain! {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PornhatProvider {
|
||||
url: String,
|
||||
tag_map: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
impl PornhatProvider {
|
||||
pub fn new() -> Self {
|
||||
PornhatProvider {
|
||||
url: "https://www.pornhat.com".to_string(),
|
||||
tag_map: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_key(value: &str) -> String {
|
||||
value
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
.replace(['_', '-'], " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn humanize_slug(value: &str) -> String {
|
||||
value
|
||||
.trim_matches('/')
|
||||
.replace('-', " ")
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn insert_tag_mapping(&self, kind: &str, slug: &str, title: Option<&str>) {
|
||||
let slug = slug.trim().trim_matches('/');
|
||||
if slug.is_empty() {
|
||||
return;
|
||||
}
|
||||
let path = format!("{kind}/{slug}");
|
||||
if let Ok(mut map) = self.tag_map.write() {
|
||||
map.insert(Self::normalize_key(slug), path.clone());
|
||||
let normalized_title = Self::normalize_key(title.unwrap_or(slug));
|
||||
if !normalized_title.is_empty() {
|
||||
map.insert(normalized_title, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_query_path(&self, query: &str) -> Option<String> {
|
||||
let trimmed = query.trim().trim_start_matches('@');
|
||||
if let Some((kind, raw_value)) = trimmed.split_once(':') {
|
||||
let kind = kind.trim().to_ascii_lowercase();
|
||||
let value = raw_value.trim().trim_matches('/').replace(' ', "-");
|
||||
if !value.is_empty() && matches!(kind.as_str(), "sites" | "models") {
|
||||
return Some(format!("{kind}/{value}"));
|
||||
}
|
||||
}
|
||||
let normalized = Self::normalize_key(trimmed);
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.tag_map.read().ok()?.get(&normalized).cloned()
|
||||
}
|
||||
|
||||
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
||||
Channel {
|
||||
id: "pornhat".to_string(),
|
||||
@@ -127,16 +181,8 @@ impl PornhatProvider {
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
let search_string = query.to_lowercase().trim().replace(" ", "-");
|
||||
let mut video_url = format!("{}/search/{}/{}/", self.url, search_string, page);
|
||||
|
||||
if search_string.starts_with("@") {
|
||||
let url_part = search_string
|
||||
.split("@")
|
||||
.collect::<Vec<&str>>()
|
||||
.get(1)
|
||||
.copied()
|
||||
.unwrap_or_default()
|
||||
.replace(":", "/");
|
||||
video_url = format!("{}/{}/", self.url, url_part);
|
||||
if let Some(path) = self.resolve_query_path(query) {
|
||||
video_url = format!("{}/{}/{}/", self.url, path, page);
|
||||
}
|
||||
// Check our Video Cache. If the result is younger than 1 hour, we return it.
|
||||
let old_items = match cache.get(&video_url) {
|
||||
@@ -296,7 +342,8 @@ impl PornhatProvider {
|
||||
.collect::<Vec<String>>();
|
||||
for tag in raw_tags {
|
||||
if !tag.is_empty() {
|
||||
tags.push(format!("@sites:{}", tag));
|
||||
self.insert_tag_mapping("sites", &tag, None);
|
||||
tags.push(Self::humanize_slug(&tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -316,7 +363,8 @@ impl PornhatProvider {
|
||||
.collect::<Vec<String>>();
|
||||
for tag in raw_tags {
|
||||
if !tag.is_empty() {
|
||||
tags.push(format!("@models:{}", tag));
|
||||
self.insert_tag_mapping("models", &tag, None);
|
||||
tags.push(Self::humanize_slug(&tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,23 +1,18 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
build_proxy_url, strip_url_scheme,
|
||||
Provider, build_proxy_url, report_provider_error, requester_or_default, strip_url_scheme,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
||||
use crate::videos::{ServerOptions, VideoItem};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use chrono::{DateTime, NaiveDate, Utc};
|
||||
use error_chain::error_chain;
|
||||
use futures::stream::{self, StreamExt};
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock};
|
||||
@@ -32,7 +27,6 @@ pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
||||
|
||||
const BASE_URL: &str = "https://www.pornhub.com";
|
||||
const CHANNEL_ID: &str = "pornhub";
|
||||
const DETAIL_ENRICH_LIMIT: usize = 12;
|
||||
|
||||
error_chain! {
|
||||
foreign_links {
|
||||
@@ -216,11 +210,6 @@ impl PornhubProvider {
|
||||
.map_err(|error| Error::from(ErrorKind::Parse(format!("selector parse failed for {value}: {error}"))))
|
||||
}
|
||||
|
||||
fn regex(value: &str) -> Result<Regex> {
|
||||
Regex::new(value)
|
||||
.map_err(|error| Error::from(ErrorKind::Parse(format!("regex parse failed for {value}: {error}"))))
|
||||
}
|
||||
|
||||
fn text_of(element: &ElementRef<'_>) -> String {
|
||||
element
|
||||
.text()
|
||||
@@ -376,7 +365,12 @@ impl PornhubProvider {
|
||||
let title_selector = Self::selector(".title a, .thumbnailTitle, span.title a")?;
|
||||
let image_selector = Self::selector("img")?;
|
||||
let duration_selector = Self::selector(".duration")?;
|
||||
let views_selector = Self::selector(".views var")?;
|
||||
let views_selector = Self::selector(".views var, .views")?;
|
||||
let rating_selector =
|
||||
Self::selector(".value, .rating, .ratingInfo, .percent, .ratingPercent")?;
|
||||
let tag_link_selector = Self::selector(
|
||||
"a[href*=\"/categories/\"], a[href*=\"/video/search\"], a[href*=\"/pornstar/\"], a[href*=\"/model/\"], a[href*=\"/channels/\"], a[href*=\"/users/\"]",
|
||||
)?;
|
||||
let uploader_selector = Self::selector(
|
||||
".videoUploaderBlock a[href], .usernameWrap a[href], .usernameWrapper a[href]",
|
||||
)?;
|
||||
@@ -438,6 +432,8 @@ impl PornhubProvider {
|
||||
.value()
|
||||
.attr("src")
|
||||
.or_else(|| value.value().attr("data-mediumthumb"))
|
||||
.or_else(|| value.value().attr("data-path"))
|
||||
.or_else(|| value.value().attr("data-src"))
|
||||
})
|
||||
.map(|value| self.normalize_url(value))
|
||||
.unwrap_or_default();
|
||||
@@ -449,10 +445,20 @@ impl PornhubProvider {
|
||||
.and_then(|value| parse_time_to_seconds(&value))
|
||||
.unwrap_or(0) as u32;
|
||||
|
||||
let views = card
|
||||
.select(&views_selector)
|
||||
.next()
|
||||
.and_then(|value| parse_abbreviated_number(&Self::text_of(&value)));
|
||||
let views = card.select(&views_selector).find_map(|value| {
|
||||
let text = Self::text_of(&value);
|
||||
parse_abbreviated_number(&text)
|
||||
.or_else(|| parse_abbreviated_number(text.replace("views", "").trim()))
|
||||
});
|
||||
let rating = card.select(&rating_selector).find_map(|value| {
|
||||
let text = Self::text_of(&value);
|
||||
let cleaned = text
|
||||
.trim()
|
||||
.trim_end_matches('%')
|
||||
.replace(',', "")
|
||||
.replace(' ', "");
|
||||
cleaned.parse::<f32>().ok()
|
||||
});
|
||||
|
||||
let uploader_link = card.select(&uploader_selector).next();
|
||||
let uploader = uploader_link
|
||||
@@ -486,13 +492,18 @@ impl PornhubProvider {
|
||||
item.uploaderId = uploader_url
|
||||
.as_deref()
|
||||
.and_then(Self::uploader_identity_from_url);
|
||||
item.rating = rating;
|
||||
|
||||
let mut tags = Vec::new();
|
||||
if let Some(tag) = uploader_url
|
||||
.as_deref()
|
||||
.and_then(|url| self.query_tag_from_uploader_url(url))
|
||||
{
|
||||
tags.push(tag);
|
||||
Self::push_unique(&mut tags, tag);
|
||||
}
|
||||
for tag_link in card.select(&tag_link_selector) {
|
||||
let tag = Self::decode_html(&Self::text_of(&tag_link));
|
||||
Self::push_unique(&mut tags, tag);
|
||||
}
|
||||
if !tags.is_empty() {
|
||||
item.tags = Some(tags);
|
||||
@@ -549,292 +560,6 @@ impl PornhubProvider {
|
||||
values.push(normalized.to_string());
|
||||
}
|
||||
|
||||
fn collect_named_links(&self, document: &Html, selector_text: &str) -> Result<Vec<String>> {
|
||||
let selector = Self::selector(selector_text)?;
|
||||
let mut values = Vec::new();
|
||||
for element in document.select(&selector) {
|
||||
Self::push_unique(&mut values, Self::decode_html(&Self::text_of(&element)));
|
||||
}
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
fn parse_upload_date(value: &str) -> Option<u64> {
|
||||
if let Ok(parsed) = DateTime::parse_from_rfc3339(value.trim()) {
|
||||
return Some(parsed.timestamp() as u64);
|
||||
}
|
||||
|
||||
NaiveDate::parse_from_str(value.trim(), "%Y-%m-%d")
|
||||
.ok()
|
||||
.and_then(|date| date.and_hms_opt(0, 0, 0))
|
||||
.map(|date| DateTime::<Utc>::from_naive_utc_and_offset(date, Utc).timestamp() as u64)
|
||||
}
|
||||
|
||||
fn json_string(value: Option<&Value>) -> Option<String> {
|
||||
value.and_then(|value| match value {
|
||||
Value::String(value) => Some(value.to_string()),
|
||||
Value::Number(value) => Some(value.to_string()),
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
fn json_u32(value: Option<&Value>) -> Option<u32> {
|
||||
match value {
|
||||
Some(Value::Number(value)) => value.as_u64().and_then(|value| u32::try_from(value).ok()),
|
||||
Some(Value::String(value)) => value.parse::<u32>().ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_flashvars(&self, html: &str) -> Result<Option<Value>> {
|
||||
let regex = Self::regex(r#"(?s)var\s+flashvars_\d+\s*=\s*(\{.*?\});"#)?;
|
||||
let Some(raw) = regex
|
||||
.captures(html)
|
||||
.and_then(|captures| captures.get(1))
|
||||
.map(|value| value.as_str())
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
Ok(Some(serde_json::from_str::<Value>(raw)?))
|
||||
}
|
||||
|
||||
fn extract_ld_video_object(&self, document: &Html) -> Result<Option<Value>> {
|
||||
let script_selector = Self::selector("script[type=\"application/ld+json\"]")?;
|
||||
for script in document.select(&script_selector) {
|
||||
let raw = script.inner_html();
|
||||
let Ok(value) = serde_json::from_str::<Value>(&raw) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if Self::is_video_object(&value) {
|
||||
return Ok(Some(value));
|
||||
}
|
||||
|
||||
if let Some(array) = value.as_array() {
|
||||
for entry in array {
|
||||
if Self::is_video_object(entry) {
|
||||
return Ok(Some(entry.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn is_video_object(value: &Value) -> bool {
|
||||
value
|
||||
.get("@type")
|
||||
.and_then(|value| value.as_str())
|
||||
.is_some_and(|value| value.eq_ignore_ascii_case("VideoObject"))
|
||||
}
|
||||
|
||||
fn build_formats_from_flashvars(&self, flashvars: &Value) -> Vec<VideoFormat> {
|
||||
let mut entries = flashvars
|
||||
.get("mediaDefinitions")
|
||||
.and_then(|value| value.as_array())
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.filter_map(|entry| {
|
||||
let format = entry
|
||||
.get("format")
|
||||
.and_then(|value| value.as_str())
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase();
|
||||
if format != "hls" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let url = entry
|
||||
.get("videoUrl")
|
||||
.and_then(|value| value.as_str())
|
||||
.map(|value| self.normalize_url(value))
|
||||
.filter(|value| !value.is_empty())?;
|
||||
|
||||
let quality = entry
|
||||
.get("quality")
|
||||
.and_then(|value| value.as_str())
|
||||
.unwrap_or("auto");
|
||||
let label = match quality {
|
||||
"auto" => "auto".to_string(),
|
||||
value if value.ends_with('p') => value.to_string(),
|
||||
value => format!("{value}p"),
|
||||
};
|
||||
|
||||
let rank = if label == "auto" {
|
||||
0
|
||||
} else {
|
||||
label
|
||||
.trim_end_matches('p')
|
||||
.parse::<u32>()
|
||||
.unwrap_or(0)
|
||||
};
|
||||
|
||||
Some((rank, label, url))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
entries.sort_by_key(|(rank, _, _)| *rank);
|
||||
entries.dedup_by(|a, b| a.2 == b.2);
|
||||
|
||||
entries
|
||||
.into_iter()
|
||||
.map(|(_, label, url)| {
|
||||
VideoFormat::new(url, label.clone(), "m3u8".to_string())
|
||||
.format_id(label.clone())
|
||||
.format_note(label)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
|
||||
let document = Html::parse_document(html);
|
||||
|
||||
if let Some(flashvars) = self.extract_flashvars(html)? {
|
||||
if let Some(title) = Self::json_string(flashvars.get("video_title")) {
|
||||
let decoded = Self::decode_html(&title);
|
||||
if !decoded.is_empty() {
|
||||
item.title = decoded;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(thumb) = Self::json_string(flashvars.get("image_url")) {
|
||||
let normalized = self.normalize_url(&thumb);
|
||||
if !normalized.is_empty() {
|
||||
item.thumb = normalized;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(duration) = Self::json_u32(flashvars.get("video_duration")) {
|
||||
item.duration = duration;
|
||||
}
|
||||
|
||||
if let Some(link_url) = Self::json_string(flashvars.get("link_url")) {
|
||||
let normalized = self.normalize_url(&link_url);
|
||||
if !normalized.is_empty() {
|
||||
item.url = normalized;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ld_video) = self.extract_ld_video_object(&document)? {
|
||||
if let Some(thumb) = ld_video
|
||||
.get("thumbnailUrl")
|
||||
.and_then(|value| match value {
|
||||
Value::String(value) => Some(value.to_string()),
|
||||
Value::Array(values) => values
|
||||
.iter()
|
||||
.find_map(|entry| entry.as_str().map(ToOwned::to_owned)),
|
||||
_ => None,
|
||||
})
|
||||
{
|
||||
let normalized = self.normalize_url(&thumb);
|
||||
if !normalized.is_empty() {
|
||||
item.thumb = normalized;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(uploaded_at) = ld_video
|
||||
.get("uploadDate")
|
||||
.and_then(|value| value.as_str())
|
||||
.and_then(Self::parse_upload_date)
|
||||
{
|
||||
item.uploadedAt = Some(uploaded_at);
|
||||
}
|
||||
|
||||
if item.views.is_none() {
|
||||
item.views = Self::json_string(ld_video.get("interactionCount"))
|
||||
.and_then(|value| value.parse::<u32>().ok());
|
||||
}
|
||||
|
||||
if item.uploader.is_none() {
|
||||
item.uploader = ld_video
|
||||
.get("author")
|
||||
.and_then(|value| match value {
|
||||
Value::String(value) => Some(value.to_string()),
|
||||
Value::Object(values) => values
|
||||
.get("name")
|
||||
.and_then(|value| value.as_str())
|
||||
.map(ToOwned::to_owned),
|
||||
_ => None,
|
||||
})
|
||||
.filter(|value| !value.trim().is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
let mut tags = item.tags.clone().unwrap_or_default();
|
||||
for value in self.collect_named_links(
|
||||
&document,
|
||||
".categoriesWrapper a.item, .categoriesWrapper a[href*=\"/categories/\"]",
|
||||
)? {
|
||||
Self::push_unique(&mut tags, value);
|
||||
}
|
||||
for value in self.collect_named_links(
|
||||
&document,
|
||||
".tagsWrapper a.item, .tagsWrapper a[href*=\"/video/search\"]",
|
||||
)? {
|
||||
Self::push_unique(&mut tags, value);
|
||||
}
|
||||
for value in self.collect_named_links(
|
||||
&document,
|
||||
".pornstarsWrapper a.item, .pornstarsWrapper a[href*=\"/pornstar/\"], a[href*=\"/pornstar/\"]",
|
||||
)? {
|
||||
Self::push_unique(&mut tags, value);
|
||||
}
|
||||
for value in self.collect_named_links(
|
||||
&document,
|
||||
".modelsWrapper a.item, .modelsWrapper a[href*=\"/model/\"], a[href*=\"/model/\"]",
|
||||
)? {
|
||||
Self::push_unique(&mut tags, value);
|
||||
}
|
||||
if !tags.is_empty() {
|
||||
item.tags = Some(tags);
|
||||
}
|
||||
|
||||
Ok(item)
|
||||
}
|
||||
|
||||
async fn enrich_listing_items(&self, items: Vec<VideoItem>, options: &ServerOptions) -> Vec<VideoItem> {
|
||||
let requester = requester_or_default(options, CHANNEL_ID, "enrich_listing_items.requester");
|
||||
let mut enriched = stream::iter(items.into_iter().enumerate().map(|(index, item)| {
|
||||
let provider = self.clone();
|
||||
let requester = requester.clone();
|
||||
async move {
|
||||
if index >= DETAIL_ENRICH_LIMIT || item.url.is_empty() {
|
||||
return (index, item);
|
||||
}
|
||||
|
||||
let fallback = item.clone();
|
||||
let enriched = match provider.fetch_detail(item, requester).await {
|
||||
Ok(value) => value,
|
||||
Err(error) => {
|
||||
report_provider_error_background(
|
||||
CHANNEL_ID,
|
||||
"enrich_listing_items.detail",
|
||||
&format!("url={}; error={error}", fallback.url),
|
||||
);
|
||||
fallback
|
||||
}
|
||||
};
|
||||
(index, enriched)
|
||||
}
|
||||
}))
|
||||
.buffer_unordered(4)
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
|
||||
enriched.sort_by_key(|(index, _)| *index);
|
||||
enriched.into_iter().map(|(_, item)| item).collect()
|
||||
}
|
||||
|
||||
async fn fetch_detail(&self, item: VideoItem, mut requester: crate::util::requester::Requester) -> Result<VideoItem> {
|
||||
let html = requester
|
||||
.get(&item.url, None)
|
||||
.await
|
||||
.map_err(|error| ErrorKind::Parse(format!("detail request failed: {error}")))?;
|
||||
self.apply_detail_video(item, &html)
|
||||
}
|
||||
|
||||
async fn fetch_listing(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
@@ -843,6 +568,10 @@ impl PornhubProvider {
|
||||
query: Option<&str>,
|
||||
options: ServerOptions,
|
||||
) -> Result<Vec<VideoItem>> {
|
||||
if query.is_some() && self.tag_map.read().unwrap().is_empty() {
|
||||
let _ = Self::load_tags(&self.url, Arc::clone(&self.tag_map)).await;
|
||||
}
|
||||
|
||||
let (video_url, scope) = self.build_listing_request(page, sort, query);
|
||||
let old_items = match cache.get(&video_url) {
|
||||
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
|
||||
@@ -883,20 +612,14 @@ impl PornhubProvider {
|
||||
return Ok(old_items);
|
||||
}
|
||||
|
||||
let mut items = self.enrich_listing_items(items, &options).await;
|
||||
let mut items = items;
|
||||
|
||||
// Rewrite thumbs and previews to use the proxy when appropriate
|
||||
// Rewrite thumbs to use the page-driven thumb proxy when appropriate
|
||||
for item in items.iter_mut() {
|
||||
let proxied = self.proxied_thumb(&options, &item.thumb);
|
||||
let proxied = self.proxied_thumb(&options, &item.url);
|
||||
if !proxied.is_empty() {
|
||||
item.thumb = proxied;
|
||||
}
|
||||
if let Some(prev) = item.preview.clone() {
|
||||
let proxied_prev = self.proxied_thumb(&options, &prev);
|
||||
if !proxied_prev.is_empty() {
|
||||
item.preview = Some(proxied_prev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cache.remove(&video_url);
|
||||
@@ -906,28 +629,33 @@ impl PornhubProvider {
|
||||
}
|
||||
|
||||
impl PornhubProvider {
|
||||
fn proxied_thumb(&self, options: &ServerOptions, thumb: &str) -> String {
|
||||
if thumb.is_empty() {
|
||||
fn proxied_thumb(&self, options: &ServerOptions, page_url: &str) -> String {
|
||||
if page_url.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
if !PornhubThumbPolicy::is_allowed_thumb_url(thumb) {
|
||||
if !PornhubThumbPolicy::is_allowed_video_page_url(page_url) {
|
||||
return String::new();
|
||||
}
|
||||
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(thumb))
|
||||
build_proxy_url(options, "pornhub-thumb", &strip_url_scheme(page_url))
|
||||
}
|
||||
}
|
||||
|
||||
struct PornhubThumbPolicy;
|
||||
|
||||
impl PornhubThumbPolicy {
|
||||
fn is_allowed_thumb_url(url: &str) -> bool {
|
||||
fn is_allowed_video_page_url(url: &str) -> bool {
|
||||
let Some(url) = Url::parse(url).ok() else { return false; };
|
||||
if url.scheme() != "https" {
|
||||
return false;
|
||||
}
|
||||
let Some(host) = url.host_str() else { return false; };
|
||||
// Only allow the specific Pornhub CDN host used for thumbnails
|
||||
host.eq_ignore_ascii_case("pix-cdn77.phncdn.com")
|
||||
if !host.eq_ignore_ascii_case("pornhub.com")
|
||||
&& !host.eq_ignore_ascii_case("www.pornhub.com")
|
||||
&& !host.ends_with(".pornhub.com")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -986,6 +714,33 @@ mod tests {
|
||||
assert!(provider.parse_query_target("teacher").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_query_from_tag_map_by_id_or_title() {
|
||||
let provider = PornhubProvider::new();
|
||||
{
|
||||
let mut map = provider.tag_map.write().unwrap();
|
||||
let info = TagInfo {
|
||||
kind: QueryTargetKind::Channel,
|
||||
slug: "mature-4k".to_string(),
|
||||
title: "Mature 4K".to_string(),
|
||||
};
|
||||
map.insert("mature-4k".to_string(), info.clone());
|
||||
map.insert("mature 4k".to_string(), info);
|
||||
}
|
||||
|
||||
let by_id = provider
|
||||
.parse_query_target("mature-4k")
|
||||
.expect("id lookup should resolve");
|
||||
assert!(matches!(by_id.kind, QueryTargetKind::Channel));
|
||||
assert_eq!(by_id.slug, "mature-4k");
|
||||
|
||||
let by_title = provider
|
||||
.parse_query_target("Mature 4K")
|
||||
.expect("title lookup should resolve");
|
||||
assert!(matches!(by_title.kind, QueryTargetKind::Channel));
|
||||
assert_eq!(by_title.slug, "mature-4k");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_browse_listing_cards() {
|
||||
let provider = PornhubProvider::new();
|
||||
@@ -1030,66 +785,47 @@ mod tests {
|
||||
assert!(items[0]
|
||||
.tags
|
||||
.as_ref()
|
||||
.is_some_and(|values| values.iter().any(|value| value == "@model:honeycore")));
|
||||
.is_some_and(|values| values.iter().any(|value| value.eq_ignore_ascii_case("honeycore"))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn applies_detail_video_metadata() {
|
||||
fn parses_listing_metadata_without_detail_fetch() {
|
||||
let provider = PornhubProvider::new();
|
||||
let item = VideoItem::new(
|
||||
"69cfa159b1377".to_string(),
|
||||
"placeholder".to_string(),
|
||||
"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377".to_string(),
|
||||
CHANNEL_ID.to_string(),
|
||||
"https://example.com/thumb.jpg".to_string(),
|
||||
0,
|
||||
);
|
||||
let html = r#"
|
||||
<script>
|
||||
var flashvars_482929735 = {
|
||||
"video_title":"Brazzers Detail Title",
|
||||
"image_url":"https://example.com/detail.jpg",
|
||||
"video_duration":"930",
|
||||
"link_url":"https://www.pornhub.com/view_video.php?viewkey=69cfa159b1377",
|
||||
"mediaDefinitions":[
|
||||
{"format":"hls","quality":"240","videoUrl":"https://cdn.example.com/master-240.m3u8"},
|
||||
{"format":"hls","quality":"720","videoUrl":"https://cdn.example.com/master-720.m3u8"},
|
||||
{"format":"mp4","quality":"720","videoUrl":"https://cdn.example.com/video.mp4","remote":true}
|
||||
]
|
||||
};
|
||||
</script>
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@type":"VideoObject",
|
||||
"thumbnailUrl":"https://example.com/ld-thumb.jpg",
|
||||
"uploadDate":"2026-04-03T00:00:00+00:00",
|
||||
"interactionCount":"5700",
|
||||
"author":{"name":"Brazzers"}
|
||||
}
|
||||
</script>
|
||||
<div class="categoriesWrapper">
|
||||
<a class="item" href="/categories/big-tits">Big Tits</a>
|
||||
</div>
|
||||
<div class="tagsWrapper">
|
||||
<a class="item" href="/video/search?search=maid">Maid</a>
|
||||
</div>
|
||||
<ul id="videoCategory" class="nf-videos videos search-video-thumbs">
|
||||
<li class="pcVideoListItem js-pop videoblock videoBox withKebabMenu"
|
||||
data-video-id="466705435"
|
||||
data-video-vkey="67ed937c986b1">
|
||||
<a href="/view_video.php?viewkey=67ed937c986b1" title="Black asian teen"></a>
|
||||
<img data-src="https://example.com/thumb.jpg"
|
||||
data-mediabook="https://example.com/preview.webm" />
|
||||
<div class="marker-overlays"><var class="duration">12:18</var></div>
|
||||
<div class="videoDetailsBlock">
|
||||
<span class="views"><var>199K</var> views</span>
|
||||
<span class="value">95%</span>
|
||||
</div>
|
||||
<a href="/categories/anal">Anal</a>
|
||||
<a href="/pornstar/jane-doe">Jane Doe</a>
|
||||
</li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
let item = provider
|
||||
.apply_detail_video(item, html)
|
||||
.expect("detail page should enrich item");
|
||||
assert_eq!(item.title, "Brazzers Detail Title");
|
||||
assert_eq!(item.thumb, "https://example.com/ld-thumb.jpg");
|
||||
assert_eq!(item.duration, 930);
|
||||
assert_eq!(item.views, Some(5700));
|
||||
assert_eq!(item.uploader.as_deref(), Some("Brazzers"));
|
||||
assert!(item.uploadedAt.is_some());
|
||||
assert_eq!(item.formats.as_ref().map(|values| values.len()), Some(2));
|
||||
assert!(item.tags.as_ref().is_some_and(|values| values
|
||||
.iter()
|
||||
.any(|value| value == "Big Tits")));
|
||||
assert!(item.tags.as_ref().is_some_and(|values| values
|
||||
.iter()
|
||||
.any(|value| value == "Maid")));
|
||||
let items = provider
|
||||
.parse_listing_page(html, ListingScope::Browse)
|
||||
.expect("browse listing should parse");
|
||||
|
||||
assert_eq!(items.len(), 1);
|
||||
assert_eq!(items[0].thumb, "https://example.com/thumb.jpg");
|
||||
assert_eq!(items[0].preview.as_deref(), Some("https://example.com/preview.webm"));
|
||||
assert_eq!(items[0].views, Some(199000));
|
||||
assert_eq!(items[0].rating, Some(95.0));
|
||||
assert!(items[0]
|
||||
.tags
|
||||
.as_ref()
|
||||
.is_some_and(|values| values.iter().any(|value| value == "Anal")));
|
||||
assert!(items[0]
|
||||
.tags
|
||||
.as_ref()
|
||||
.is_some_and(|values| values.iter().any(|value| value == "Jane Doe")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ use crate::{proxies::sxyprn::SxyprnProxy, util::requester::Requester};
|
||||
pub mod doodstream;
|
||||
pub mod hanimecdn;
|
||||
pub mod hqpornerthumb;
|
||||
pub mod pornhubthumb;
|
||||
pub mod javtiful;
|
||||
pub mod noodlemagazine;
|
||||
pub mod pimpbunny;
|
||||
@@ -18,6 +17,7 @@ pub mod pimpbunnythumb;
|
||||
pub mod porndish;
|
||||
pub mod porndishthumb;
|
||||
pub mod pornhd3x;
|
||||
pub mod pornhubthumb;
|
||||
pub mod shooshtime;
|
||||
pub mod spankbang;
|
||||
pub mod sxyprn;
|
||||
|
||||
@@ -1,51 +1,220 @@
|
||||
use ntex::http::header::{CONTENT_LENGTH, CONTENT_TYPE};
|
||||
use ntex::{
|
||||
http::Response,
|
||||
web::{self, HttpRequest, error},
|
||||
};
|
||||
use ntex::web::{self, HttpRequest};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use url::Url;
|
||||
|
||||
use crate::util::requester::Requester;
|
||||
|
||||
const PORNHUB_ROOT: &str = "https://www.pornhub.com/";
|
||||
|
||||
fn endpoint_to_page_url(req: &HttpRequest) -> String {
|
||||
let endpoint = req.match_info().query("endpoint").trim_start_matches('/');
|
||||
let mut page_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
||||
endpoint.to_string()
|
||||
} else {
|
||||
format!("https://{endpoint}")
|
||||
};
|
||||
|
||||
let query = req.query_string();
|
||||
if !query.is_empty() && !page_url.contains('?') {
|
||||
page_url.push('?');
|
||||
page_url.push_str(query);
|
||||
}
|
||||
|
||||
page_url
|
||||
}
|
||||
|
||||
fn is_allowed_video_page_url(url: &str) -> bool {
|
||||
let Some(url) = Url::parse(url).ok() else {
|
||||
return false;
|
||||
};
|
||||
if url.scheme() != "https" {
|
||||
return false;
|
||||
}
|
||||
let Some(host) = url.host_str() else {
|
||||
return false;
|
||||
};
|
||||
if host != "pornhub.com" && host != "www.pornhub.com" && !host.ends_with(".pornhub.com") {
|
||||
return false;
|
||||
}
|
||||
url.path().starts_with("/view_video.php") || url.path().starts_with("/video/")
|
||||
}
|
||||
|
||||
fn normalize_candidate_url(candidate: &str, page_url: &Url) -> Option<String> {
|
||||
if candidate.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if candidate.starts_with("//") {
|
||||
return Some(format!("https:{candidate}"));
|
||||
}
|
||||
if candidate.starts_with("https://") || candidate.starts_with("http://") {
|
||||
return Some(candidate.to_string());
|
||||
}
|
||||
if candidate.starts_with('/') {
|
||||
let host = page_url.host_str()?;
|
||||
return Some(format!("{}://{}{}", page_url.scheme(), host, candidate));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_allowed_thumb_url(url: &str) -> bool {
|
||||
let Some(url) = Url::parse(url).ok() else {
|
||||
return false;
|
||||
};
|
||||
if url.scheme() != "https" {
|
||||
return false;
|
||||
}
|
||||
let Some(host) = url.host_str() else {
|
||||
return false;
|
||||
};
|
||||
let allowed_host = host == "pornhub.com"
|
||||
|| host == "www.pornhub.com"
|
||||
|| host.ends_with(".pornhub.com")
|
||||
|| host.ends_with(".phncdn.com");
|
||||
if !allowed_host {
|
||||
return false;
|
||||
}
|
||||
let path = url.path().to_ascii_lowercase();
|
||||
[".jpg", ".jpeg", ".png", ".webp", ".avif"]
|
||||
.iter()
|
||||
.any(|ext| path.ends_with(ext))
|
||||
}
|
||||
|
||||
fn decode_js_string(value: &str) -> String {
|
||||
value
|
||||
.replace("\\/", "/")
|
||||
.replace("\\u002F", "/")
|
||||
.replace("\\u003A", ":")
|
||||
}
|
||||
|
||||
fn find_thumb_in_html(html: &str, page_url: &Url) -> Option<String> {
|
||||
let document = Html::parse_document(html);
|
||||
let selector = Selector::parse(
|
||||
"meta[property=\"og:image\"], meta[name=\"twitter:image\"], meta[itemprop=\"thumbnailUrl\"]",
|
||||
)
|
||||
.ok()?;
|
||||
|
||||
for meta in document.select(&selector) {
|
||||
let value = meta.value().attr("content").unwrap_or_default().trim();
|
||||
if let Some(candidate) = normalize_candidate_url(value, page_url) {
|
||||
if is_allowed_thumb_url(&candidate) {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let image_url_re = Regex::new(r#""image_url"\s*:\s*"([^"]+)""#).ok()?;
|
||||
if let Some(captures) = image_url_re.captures(html) {
|
||||
let raw = captures
|
||||
.get(1)
|
||||
.map(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
let decoded = decode_js_string(raw);
|
||||
if let Some(candidate) = normalize_candidate_url(&decoded, page_url) {
|
||||
if is_allowed_thumb_url(&candidate) {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn get_image(
|
||||
req: HttpRequest,
|
||||
requester: web::types::State<Requester>,
|
||||
) -> Result<impl web::Responder, web::Error> {
|
||||
let endpoint = req.match_info().query("endpoint").to_string();
|
||||
let image_url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") {
|
||||
endpoint
|
||||
} else {
|
||||
format!("https://{}", endpoint.trim_start_matches('/'))
|
||||
};
|
||||
let page_url = endpoint_to_page_url(&req);
|
||||
if !is_allowed_video_page_url(&page_url) {
|
||||
return Ok(web::HttpResponse::BadRequest().finish());
|
||||
}
|
||||
|
||||
let upstream = match requester
|
||||
.get_ref()
|
||||
.clone()
|
||||
.get_raw_with_headers(
|
||||
image_url.as_str(),
|
||||
vec![("Referer".to_string(), "https://www.pornhub.com/".to_string())],
|
||||
let mut requester = requester.get_ref().clone();
|
||||
let html = match requester
|
||||
.get_with_headers(
|
||||
page_url.as_str(),
|
||||
vec![("Referer".to_string(), PORNHUB_ROOT.to_string())],
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => response,
|
||||
Ok(value) => value,
|
||||
Err(_) => return Ok(web::HttpResponse::NotFound().finish()),
|
||||
};
|
||||
|
||||
let status = upstream.status();
|
||||
let headers = upstream.headers().clone();
|
||||
let bytes = upstream.bytes().await.map_err(error::ErrorBadGateway)?;
|
||||
let parsed_page_url = match Url::parse(&page_url) {
|
||||
Ok(value) => value,
|
||||
Err(_) => return Ok(web::HttpResponse::BadRequest().finish()),
|
||||
};
|
||||
|
||||
let mut resp = Response::build(status);
|
||||
let Some(image_url) = find_thumb_in_html(&html, &parsed_page_url) else {
|
||||
return Ok(web::HttpResponse::NotFound().finish());
|
||||
};
|
||||
|
||||
if let Some(ct) = headers.get(CONTENT_TYPE) {
|
||||
if let Ok(ct_str) = ct.to_str() {
|
||||
resp.set_header(CONTENT_TYPE, ct_str);
|
||||
}
|
||||
}
|
||||
if let Some(cl) = headers.get(CONTENT_LENGTH) {
|
||||
if let Ok(cl_str) = cl.to_str() {
|
||||
resp.set_header(CONTENT_LENGTH, cl_str);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(resp.body(bytes.to_vec()))
|
||||
Ok(web::HttpResponse::Found()
|
||||
.header("Location", image_url)
|
||||
.finish())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
decode_js_string, is_allowed_thumb_url, is_allowed_video_page_url, normalize_candidate_url,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
fn validates_allowed_video_pages() {
|
||||
assert!(is_allowed_video_page_url(
|
||||
"https://www.pornhub.com/view_video.php?viewkey=abc123"
|
||||
));
|
||||
assert!(is_allowed_video_page_url(
|
||||
"https://www.pornhub.com/video/search?search=test"
|
||||
));
|
||||
assert!(!is_allowed_video_page_url(
|
||||
"https://example.com/view_video.php?viewkey=abc123"
|
||||
));
|
||||
assert!(!is_allowed_video_page_url(
|
||||
"http://www.pornhub.com/view_video.php?viewkey=abc123"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validates_allowed_thumb_hosts_and_extensions() {
|
||||
assert!(is_allowed_thumb_url(
|
||||
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/(m=eafTGgaaaa)(mh=abc123)1.jpg"
|
||||
));
|
||||
assert!(is_allowed_thumb_url(
|
||||
"https://www.pornhub.com/webmasters/thumb.webp"
|
||||
));
|
||||
assert!(!is_allowed_thumb_url("https://example.com/thumb.jpg"));
|
||||
assert!(!is_allowed_thumb_url(
|
||||
"https://pix-cdn77.phncdn.com/videos/2026/04/01/1/manifest.m3u8"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalizes_protocol_relative_and_root_relative_urls() {
|
||||
let page_url = Url::parse("https://www.pornhub.com/view_video.php?viewkey=abc").unwrap();
|
||||
let protocol_relative =
|
||||
normalize_candidate_url("//pix-cdn77.phncdn.com/thumb.jpg", &page_url);
|
||||
assert_eq!(
|
||||
protocol_relative.as_deref(),
|
||||
Some("https://pix-cdn77.phncdn.com/thumb.jpg")
|
||||
);
|
||||
|
||||
let root_relative = normalize_candidate_url("/assets/thumb.jpg", &page_url);
|
||||
assert_eq!(
|
||||
root_relative.as_deref(),
|
||||
Some("https://www.pornhub.com/assets/thumb.jpg")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_js_escaped_urls() {
|
||||
assert_eq!(
|
||||
decode_js_string(r#"https:\/\/pix-cdn77.phncdn.com\/thumb.jpg"#),
|
||||
"https://pix-cdn77.phncdn.com/thumb.jpg"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user