This commit is contained in:
Simon
2026-01-02 14:55:13 +00:00
parent 27bb3daec4
commit 89eecbe790
6 changed files with 692 additions and 552 deletions

View File

@@ -4,238 +4,254 @@ use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
use htmlentity::entity::{decode, ICodedDataTrait};
use async_trait::async_trait;
use std::vec;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PornhubProvider {
url: String,
}
impl PornhubProvider {
pub fn new() -> Self {
PornhubProvider {
Self {
url: "https://www.pornhub.com".to_string(),
}
}
async fn get(
&self,
cache: VideoCache,
page: u8,
sort: &str,
options:ServerOptions
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = format!("{}/video?o={}&page={}", self.url, sort, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
} else {
items.clone()
}
}
None => {
vec![]
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, None).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(),"<ul id=\"video");
if !video_items.is_empty() {
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, "<ul id=\"video");
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
Ok(video_items)
}
Ok(video_items)
}
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
sort: &str,
options:ServerOptions
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let mut split_string = "<ul id=\"video";
let search_string = query.to_lowercase().trim().replace(" ", "+");
let mut video_url = format!("{}/video/search?search={}&page={}", self.url, search_string, page);
if query.starts_with("@"){
let url_parts = query[1..].split(":").collect::<Vec<&str>>();
video_url = [self.url.to_string(), url_parts[0].to_string(), url_parts[1].replace(" ", "-").to_string(), "videos?page=".to_string()].join("/");
video_url += &page.to_string();
if query.contains("@model") || query.contains("@pornstar"){
let search_string = query.to_lowercase().trim().replace(' ', "+");
let mut video_url =
format!("{}/video/search?search={}&page={}", self.url, search_string, page);
if query.starts_with('@') {
let mut parts = query[1..].split(':');
let a = parts.next().unwrap_or("");
let b = parts.next().unwrap_or("");
video_url = format!("{}/{}/{}/videos?page={}", self.url, a, b.replace(' ', "-"), page);
if query.contains("@model") || query.contains("@pornstar") {
split_string = "mostRecentVideosSection";
}
if query.contains("@channels"){
if query.contains("@channels") {
split_string = "<ul class=\"videos row-5-thumbs";
}
}
if query.contains("@channels"){
video_url += match sort {
"mr" => "",
"mv" => "&o=vi",
"tr" => "&o=ra",
_ => "",
}
} else{
video_url += match sort {
"mr" => "",
"mv" => "&o=mv",
"tr" => "&o=tr",
"lg" => "&o=lg",
_ => "&o=mv",
}
}
video_url.push_str(match (query.contains("@channels"), sort) {
(true, "mv") => "&o=vi",
(true, "tr") => "&o=ra",
(false, "mv") => "&o=mv",
(false, "tr") => "&o=tr",
(false, "lg") => "&o=lg",
_ => "",
});
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
} else {
let _ = cache.check().await;
return Ok(items.clone());
}
}
None => {
vec![]
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, None).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(),split_string);
if !video_items.is_empty() {
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, split_string);
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
Ok(video_items)
}
Ok(video_items)
}
fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
return vec![];
}
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html.split(split_string).collect::<Vec<&str>>()[1].split("Porn in German").collect::<Vec<&str>>()[0];
let raw_videos = video_listing_content
let content = match html.split(split_string).nth(1) {
Some(c) => c,
None => return vec![],
};
let content = content.split("Porn in German").next().unwrap_or("");
let mut items = Vec::new();
for seg in content
.split("class=\"pcVideoListItem ")
.collect::<Vec<&str>>()[1..]
.to_vec();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line);
// }
if video_segment.contains("wrapVideoBlock"){
continue; // Skip if the segment is a wrapVideoBlock
}
let video_url: String;
if !video_segment.contains("<a href=\"") {
let url_part = video_segment.split("data-video-vkey=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0];
video_url = format!("{}{}", self.url, url_part);
}
else{
let url_part = video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0];
if url_part.is_empty() || url_part == "javascript:void(0)" {
continue;
}
video_url = format!("{}{}", self.url, url_part);
}
if video_url.starts_with("https://www.pornhub.comjavascript:void(0)") {
continue;
}
let mut title = video_segment.split("\" title=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
// html decode
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let id = video_segment.split("data-video-id=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let raw_duration = video_segment.split("duration").collect::<Vec<&str>>()[1].split(">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let view_part = match video_segment.split("iews\">").collect::<Vec<&str>>().len(){
2 => video_segment.split("iews\">").collect::<Vec<&str>>()[1],
3 => video_segment.split("iews\">").collect::<Vec<&str>>()[2],
_ => "<var>0<", // Skip if the format is unexpected
.skip(1)
.filter(|s| !s.contains("wrapVideoBlock"))
{
let url_part = seg
.split("<a href=\"")
.nth(1)
.or_else(|| seg.split("data-video-vkey=\"").nth(1))
.and_then(|s| s.split('"').next());
let video_url = match url_part {
Some(u) if !u.is_empty() && u != "javascript:void(0)" => format!("{}{}", self.url, u),
_ => continue,
};
let views = parse_abbreviated_number(view_part
.split("<var>").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]).unwrap_or(0);
let thumb = video_segment.split("src=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
let mut title = seg
.split("\" title=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let uploaderBlock;
let uploader_href;
let mut tag = String::new();
if video_segment.contains("videoUploaderBlock") {
title = decode(title.as_bytes()).to_string().unwrap_or(title);
uploaderBlock = video_segment.split("videoUploaderBlock").collect::<Vec<&str>>()[1]
.to_string();
uploader_href = uploaderBlock.split("href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.split("/").collect::<Vec<&str>>();
tag = format!("@{}:{}", uploader_href[1], uploader_href[2].replace("-", " "));
let id = match seg
.split("data-video-id=\"")
.nth(1)
.and_then(|s| s.split('"').next())
{
Some(id) => id.to_string(),
None => continue,
};
}
else{
uploader_href = vec![];
}
let raw_duration = seg
.split("duration")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.unwrap_or("0:00");
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
let mut video_item = VideoItem::new(
let views = seg
.split("iews\">")
.filter_map(|p| p.split("<var>").nth(1))
.next()
.and_then(|v| v.split('<').next())
.and_then(|v| parse_abbreviated_number(v))
.unwrap_or(0);
let thumb = seg
.split("src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tag, uploader) = if seg.contains("videoUploaderBlock") {
let href = seg
.split("videoUploaderBlock")
.nth(1)
.and_then(|s| s.split("href=\"").nth(1))
.and_then(|s| s.split('"').next())
.unwrap_or("");
let parts: Vec<&str> = href.split('/').collect();
if parts.len() >= 3 {
(
Some(format!("@{}:{}", parts[1], parts[2].replace('-', " "))),
Some(parts[2].to_string()),
)
} else {
(None, None)
}
} else {
(None, None)
};
let mut item = VideoItem::new(
id,
title,
video_url.to_string(),
"pornhub".to_string(),
video_url,
"pornhub".into(),
thumb,
duration,
)
;
);
if views > 0 {
video_item = video_item.views(views);
item = item.views(views);
}
if !tag.is_empty() {
video_item = video_item.tags(vec![tag])
.uploader(uploader_href[2].to_string());
if let Some(t) = tag {
item = item.tags(vec![t]);
}
items.push(video_item);
if let Some(u) = uploader {
item = item.uploader(u);
}
items.push(item);
}
return items;
items
}
}
#[async_trait]
@@ -250,29 +266,29 @@ impl Provider for PornhubProvider {
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = options;
let _ = pool;
let _ = per_page;
let _ = pool; // Ignored in this implementation
let mut sort = sort.to_lowercase();
if sort.contains("date"){
sort = "mr".to_string();
}
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
Some(q) => {
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, &sort, options)
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
.await
}
let page = page.parse::<u8>().unwrap_or(1);
let mut sort = match sort.as_str() {
"mv" => "mv",
"tr" => "tr",
"cm" => "cm",
"lg" => "lg",
_ => "mr",
};
match videos {
Ok(v) => v,
Err(e) => {
println!("Error fetching videos: {}", e);
vec![]
}
if sort.contains("date") {
sort = "mr".into();
}
let res = match query {
Some(q) => self.query(cache, page, &q, &sort, options).await,
None => self.get(cache, page, &sort, options).await,
};
res.unwrap_or_else(|e| {
eprintln!("PornhubProvider error: {e}");
vec![]
})
}
}