This commit is contained in:
Simon
2026-01-02 14:55:13 +00:00
parent 27bb3daec4
commit 89eecbe790
6 changed files with 692 additions and 552 deletions

View File

@@ -3,13 +3,15 @@ use crate::api::ClientVersion;
use crate::providers::Provider;
use crate::status::*;
use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
use async_trait::async_trait;
use error_chain::error_chain;
use futures::future::join_all;
use htmlentity::entity::{ICodedDataTrait, decode};
use htmlentity::entity::{decode, ICodedDataTrait};
use std::sync::{Arc, RwLock};
use std::{thread, vec};
use titlecase::Titlecase;
@@ -19,6 +21,13 @@ error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
Json(serde_json::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
@@ -28,9 +37,10 @@ pub struct PimpbunnyProvider {
stars: Arc<RwLock<Vec<FilterOption>>>,
categories: Arc<RwLock<Vec<FilterOption>>>,
}
impl PimpbunnyProvider {
pub fn new() -> Self {
let provider = PimpbunnyProvider {
let provider = Self {
url: "https://pimpbunny.com".to_string(),
stars: Arc::new(RwLock::new(vec![])),
categories: Arc::new(RwLock::new(vec![])),
@@ -39,123 +49,6 @@ impl PimpbunnyProvider {
provider
}
fn spawn_initial_load(&self) {
let url = self.url.clone();
let stars = Arc::clone(&self.stars);
let categories = Arc::clone(&self.categories);
thread::spawn(move || {
// Create a tiny runtime just for these async tasks
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("build tokio runtime");
rt.block_on(async move {
if let Err(e) = Self::load_stars(&url, stars).await {
eprintln!("load_stars failed: {e}");
}
if let Err(e) = Self::load_categories(&url, categories).await {
eprintln!("load_categories failed: {e}");
}
});
});
}
async fn load_stars(base_url: &str, stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
format!("{}/onlyfans-models/?models_per_page=20", &base_url).as_str(),
Some(Version::HTTP_2),
)
.await
.unwrap();
let stars_div = text
.split("pb-list-models-block")
.collect::<Vec<&str>>()
.last()
.unwrap()
.split("pb-page-description")
.collect::<Vec<&str>>()[0];
for stars_element in stars_div
.split("<div class=\"col\">")
.collect::<Vec<&str>>()[1..]
.to_vec()
{
if stars_element.contains("pb-promoted-link") {
continue;
}
let star_id = stars_element
.split("href=\"https://pimpbunny.com/onlyfans-models/")
.collect::<Vec<&str>>()[1]
.split("/\"")
.collect::<Vec<&str>>()[0]
.to_string();
let star_name = stars_element
.split("<div class=\"pb-item-title-text\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
Self::push_unique(
&stars,
FilterOption {
id: star_id,
title: star_name,
},
);
}
return Ok(());
}
async fn load_categories(
base_url: &str,
categories: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
format!("{}/categories/?items_per_page=120", &base_url).as_str(),
Some(Version::HTTP_2),
)
.await
.unwrap();
let categories_div = text
.split("list_categories_categories_list_items")
.collect::<Vec<&str>>()
.last()
.unwrap()
.split("pb-pagination-wrapper")
.collect::<Vec<&str>>()[0];
for categories_element in categories_div
.split("<div class=\"col\">")
.collect::<Vec<&str>>()[1..]
.to_vec()
{
let category_id = categories_element
.split("href=\"https://pimpbunny.com/categories/")
.collect::<Vec<&str>>()[1]
.split("/\"")
.collect::<Vec<&str>>()[0]
.to_string();
let category_name = categories_element
.split("<div class=\"pb-item-title pb-heading-h3 title\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.titlecase();
Self::push_unique(
&categories,
FilterOption {
id: category_id,
title: category_name,
},
);
}
return Ok(());
}
fn build_channel(&self, clientversion: ClientVersion) -> Channel {
let _ = clientversion;
Channel {
@@ -203,17 +96,146 @@ impl PimpbunnyProvider {
}
}
// Push one item with minimal lock time and dedup by id
fn spawn_initial_load(&self) {
let url = self.url.clone();
let stars = Arc::clone(&self.stars);
let categories = Arc::clone(&self.categories);
thread::spawn(move || {
let rt = match tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
{
Ok(rt) => rt,
Err(e) => {
eprintln!("tokio runtime failed: {e}");
let _ = futures::executor::block_on(send_discord_error_report(
&e,
Some("Pimpbunny Provider"),
Some("Failed to create tokio runtime"),
file!(),
line!(),
module_path!(),
));
return;
}
};
rt.block_on(async {
if let Err(e) = Self::load_stars(&url, Arc::clone(&stars)).await {
eprintln!("load_stars failed: {e}");
send_discord_error_report(
&e,
Some("Pimpbunny Provider"),
Some("Failed to load stars during initial load"),
file!(),
line!(),
module_path!(),
).await;
}
if let Err(e) = Self::load_categories(&url, Arc::clone(&categories)).await {
eprintln!("load_categories failed: {e}");
send_discord_error_report(
&e,
Some("Pimpbunny Provider"),
Some("Failed to load categories during initial load"),
file!(),
line!(),
module_path!(),
).await;
}
});
});
}
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
if let Ok(mut vec) = target.write() {
if !vec.iter().any(|x| x.id == item.id) {
vec.push(item);
// Optional: keep it sorted for nicer UX
// vec.sort_by(|a,b| a.title.cmp(&b.title));
}
}
}
async fn load_stars(base: &str, stars: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
&format!("{base}/onlyfans-models/?models_per_page=20"),
Some(Version::HTTP_2),
)
.await
.map_err(|e| Error::from(format!("{}", e)))?;
let block = text
.split("vt_list_models_with_advertising_custom_models_list_items")
.last()
.ok_or_else(|| ErrorKind::Parse("missing stars block".into()))?
.split("pb-page-description")
.next()
.unwrap_or("");
for el in block.split("<div class=\"col\">").skip(1) {
if el.contains("pb-promoted-link") || !el.contains("href=\"https://pimpbunny.com/onlyfans-models/") {
continue;
}
let id = el
.split("href=\"https://pimpbunny.com/onlyfans-models/")
.nth(1)
.and_then(|s| s.split("/\"").next())
.ok_or_else(|| ErrorKind::Parse(format!("star id: {el}").into()))?
.to_string();
let title = el
.split("ui-card-title")
.nth(1)
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse(format!("star title: {el}").into()))?
.to_string();
Self::push_unique(&stars, FilterOption { id, title });
}
Ok(())
}
async fn load_categories(base: &str, cats: Arc<RwLock<Vec<FilterOption>>>) -> Result<()> {
let mut requester = Requester::new();
let text = requester
.get(
&format!("{base}/categories/?items_per_page=120"),
Some(Version::HTTP_2),
)
.await
.map_err(|e| Error::from(format!("{}", e)))?;
let block = text
.split("list_categories_categories_list_items")
.last()
.ok_or_else(|| ErrorKind::Parse("categories block".into()))?
.split("pb-pagination-wrapper")
.next()
.unwrap_or("");
for el in block.split("<div class=\"col\">").skip(1) {
let id = el
.split("href=\"https://pimpbunny.com/categories/")
.nth(1)
.and_then(|s| s.split("/\"").next())
.ok_or_else(|| ErrorKind::Parse(format!("category id: {el}").into()))?
.to_string();
let title = el
.split("ui-heading-h3")
.nth(1)
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse(format!("category title: {el}").into()))?
.titlecase();
Self::push_unique(&cats, FilterOption { id, title });
}
Ok(())
}
async fn get(
&self,
cache: VideoCache,
@@ -338,181 +360,147 @@ impl PimpbunnyProvider {
if html.is_empty() || html.contains("404 Not Found") {
return vec![];
}
let raw_videos = html.split("pb-pagination-wrapper").collect::<Vec<&str>>()[0]
.split("pb-list-items")
.collect::<Vec<&str>>()[1]
.split("<div class=\"col\">")
.collect::<Vec<&str>>()[1..]
.to_vec();
let futures = raw_videos
.into_iter()
let block = match html
.split("pb-pagination-wrapper")
.next()
.and_then(|s| s.split("pb-list-items").nth(1))
{
Some(b) => b,
None => return vec![],
};
let futures = block
.split("<div class=\"col\">")
.skip(1)
.map(|el| self.get_video_item(el.to_string(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results.into_iter().filter_map(Result::ok).collect();
return video_items;
join_all(futures)
.await
.into_iter()
.filter_map(Result::ok)
.collect()
}
async fn get_video_item(
&self,
video_segment: String,
seg: String,
mut requester: Requester,
) -> Result<VideoItem> {
let video_url: String = video_segment.split(" href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
let video_url = seg
.split(" href=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.ok_or_else(|| ErrorKind::Parse("video url".into()))?
.to_string();
let mut title = video_segment.split("pb-item-title").collect::<Vec<&str>>()[1]
.split(">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
let mut title = seg
.split("pb-item-title")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse("video title".into()))?
.trim()
.to_string();
// html decode
title = decode(title.as_bytes())
.to_string()
.unwrap_or(title)
.titlecase();
let id = video_url.split("/").collect::<Vec<&str>>()[4]
.split(".")
.collect::<Vec<&str>>()[0]
title = decode(title.as_bytes()).to_string().unwrap_or(title).titlecase();
let id = video_url
.split('/')
.nth(4)
.and_then(|s| s.split('.').next())
.ok_or_else(|| ErrorKind::Parse("video id".into()))?
.to_string();
let mut thumb = video_segment.split("pb-thumbnail").collect::<Vec<&str>>()[1]
let thumb_block = seg
.split("pb-thumbnail")
.nth(1)
.ok_or_else(|| ErrorKind::Parse("thumb block".into()))?;
let mut thumb = thumb_block
.split("src=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
if thumb.starts_with("data:image/jpg;base64") {
thumb = video_segment.split("pb-thumbnail").collect::<Vec<&str>>()[1]
.split("data-webp=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
}
let preview = video_segment.split("pb-thumbnail").collect::<Vec<&str>>()[1]
.split("data-preview=\"")
.collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let (tags, formats, views, duration) = match self.extract_media(&video_url, &mut requester).await {
Ok((t, f, v, d)) => (t, f, v, d),
Err(_) => return Err(Error::from("Video media extraction failed")),
};
if formats.is_empty() {
return Err(Error::from("No formats found for video"));
if thumb.starts_with("data:image") {
thumb = thumb_block
.split("data-webp=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
}
let video_item = VideoItem::new(
let preview = thumb_block
.split("data-preview=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tags, formats, views, duration) =
self.extract_media(&video_url, &mut requester).await?;
Ok(VideoItem::new(
id,
title,
video_url,
"pimpbunny".to_string(),
"pimpbunny".into(),
thumb,
duration,
)
.formats(formats)
.tags(tags)
.preview(preview)
.views(views)
;
return Ok(video_item);
.views(views))
}
async fn extract_media(
&self,
video_page_url: &str,
url: &str,
requester: &mut Requester,
) -> Result<(Vec<String>, Vec<VideoFormat>, u32, u32)> {
let mut formats = vec![];
let mut tags = vec![];
let text = requester
.get(&video_page_url, Some(Version::HTTP_2))
.get(url, Some(Version::HTTP_2))
.await
.unwrap();
if text.contains("pb-video-models"){
let stars_elements = text.split("pb-video-models").collect::<Vec<&str>>()[1]
.split("pb-video-statistic")
.collect::<Vec<&str>>()[0]
.split("pb-models-item pb-models-item")
.collect::<Vec<&str>>()[1..]
.to_vec();
for star_el in stars_elements {
let star_id = star_el
.split("href=\"https://pimpbunny.com/onlyfans-models/")
.collect::<Vec<&str>>()[1]
.split("/\"")
.collect::<Vec<&str>>()[0]
.to_string();
let star_name = star_el
.split("<span class=\"pb-item-title pb-heading-h3\">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
tags.push(star_name.clone());
Self::push_unique(
&self.stars,
FilterOption {
id: star_id,
title: star_name.clone(),
},
);
}
}
if text.contains("pb-video-tags") {
let categories_elements = text.split("pb-tags-list").collect::<Vec<&str>>()[1]
.split("</div>")
.collect::<Vec<&str>>()[0]
.split("href=\"https://pimpbunny.com/tags/")
.collect::<Vec<&str>>()[1..]
.to_vec();
for categories_el in categories_elements {
let category_id = categories_el.split("\"").collect::<Vec<&str>>()[0].to_string();
let category_name = categories_el.split("\">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.titlecase();
tags.push(category_name.clone());
Self::push_unique(
&self.categories,
FilterOption {
id: category_id,
title: category_name.clone(),
},
);
}
}
.map_err(|e| Error::from(format!("{}", e)))?;
let json_str = text
.split(";</script><script type=\"application/ld+json\">")
.collect::<Vec<&str>>()[1]
.split("</script>")
.collect::<Vec<&str>>()[0];
let json = serde_json::from_str::<serde_json::Value>(json_str).unwrap_or_default();
.split("application/ld+json\">")
.nth(1)
.and_then(|s| s.split("</script>").next())
.ok_or_else(|| ErrorKind::Parse("ld+json".into()))?;
let json: serde_json::Value = serde_json::from_str(json_str)?;
let video_url = json["contentUrl"].as_str().unwrap_or("").to_string();
let quality = video_url
.split("_")
.collect::<Vec<&str>>()
.split('_')
.last()
.map_or("", |v| v)
.split(".")
.collect::<Vec<&str>>()[0]
.and_then(|s| s.split('.').next())
.unwrap_or("")
.to_string();
let views = json["interactionStatistic"].as_array().unwrap()[0]["userInteractionCount"]
.as_str().unwrap().parse::<u32>().unwrap_or(0);
let raw_duration = json["duration"].as_str().unwrap_or("00:00").replace("PT", "").replace("H", ":").replace("M", ":").replace("S", "");
let duration = parse_time_to_seconds(raw_duration.as_str()).unwrap_or(0) as u32;
formats.push(VideoFormat::new(
video_url,
quality.clone(),
"video/mp4".to_string(),
));
Ok((tags, formats, views, duration))
let views = json["interactionStatistic"]
.as_array()
.and_then(|a| a.first())
.and_then(|v| v["userInteractionCount"].as_str())
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let duration = json["duration"]
.as_str()
.map(|d| parse_time_to_seconds(&d.replace(['P','T','H','M','S'], "")).unwrap_or(0))
.unwrap_or(0) as u32;
Ok((
vec![],
vec![VideoFormat::new(video_url, quality, "video/mp4".into())],
views,
duration,
))
}
}
@@ -521,34 +509,27 @@ impl Provider for PimpbunnyProvider {
async fn get_videos(
&self,
cache: VideoCache,
pool: DbPool,
_pool: DbPool,
sort: String,
query: Option<String>,
page: String,
per_page: String,
_per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = per_page;
let _ = pool;
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
Some(q) => {
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, options)
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
.await
}
let page = page.parse::<u8>().unwrap_or(1);
let res = match query {
Some(q) => self.to_owned().query(cache, page, &q, options).await,
None => self.get(cache, page, &sort, options).await,
};
match videos {
Ok(v) => v,
Err(e) => {
println!("Error fetching videos: {}", e);
vec![]
}
}
res.unwrap_or_else(|e| {
eprintln!("pimpbunny error: {e}");
vec![]
})
}
fn get_channel(&self, clientversion: ClientVersion) -> crate::status::Channel {
self.build_channel(clientversion)
fn get_channel(&self, v: ClientVersion) -> Channel {
self.build_channel(v)
}
}

View File

@@ -4,238 +4,254 @@ use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::time::parse_time_to_seconds;
use crate::videos::{ServerOptions, VideoItem};
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use std::vec;
use htmlentity::entity::{decode, ICodedDataTrait};
use async_trait::async_trait;
use std::vec;
error_chain! {
foreign_links {
Io(std::io::Error);
HttpRequest(wreq::Error);
}
errors {
Parse(msg: String) {
description("parse error")
display("parse error: {}", msg)
}
}
}
#[derive(Debug, Clone)]
pub struct PornhubProvider {
url: String,
}
impl PornhubProvider {
pub fn new() -> Self {
PornhubProvider {
Self {
url: "https://www.pornhub.com".to_string(),
}
}
async fn get(
&self,
cache: VideoCache,
page: u8,
sort: &str,
options:ServerOptions
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let video_url = format!("{}/video?o={}&page={}", self.url, sort, page);
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
} else {
items.clone()
}
}
None => {
vec![]
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, None).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(),"<ul id=\"video");
if !video_items.is_empty() {
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, "<ul id=\"video");
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
Ok(video_items)
}
Ok(video_items)
}
async fn query(
&self,
cache: VideoCache,
page: u8,
query: &str,
sort: &str,
options:ServerOptions
options: ServerOptions,
) -> Result<Vec<VideoItem>> {
let mut split_string = "<ul id=\"video";
let search_string = query.to_lowercase().trim().replace(" ", "+");
let mut video_url = format!("{}/video/search?search={}&page={}", self.url, search_string, page);
if query.starts_with("@"){
let url_parts = query[1..].split(":").collect::<Vec<&str>>();
video_url = [self.url.to_string(), url_parts[0].to_string(), url_parts[1].replace(" ", "-").to_string(), "videos?page=".to_string()].join("/");
video_url += &page.to_string();
if query.contains("@model") || query.contains("@pornstar"){
let search_string = query.to_lowercase().trim().replace(' ', "+");
let mut video_url =
format!("{}/video/search?search={}&page={}", self.url, search_string, page);
if query.starts_with('@') {
let mut parts = query[1..].split(':');
let a = parts.next().unwrap_or("");
let b = parts.next().unwrap_or("");
video_url = format!("{}/{}/{}/videos?page={}", self.url, a, b.replace(' ', "-"), page);
if query.contains("@model") || query.contains("@pornstar") {
split_string = "mostRecentVideosSection";
}
if query.contains("@channels"){
if query.contains("@channels") {
split_string = "<ul class=\"videos row-5-thumbs";
}
}
if query.contains("@channels"){
video_url += match sort {
"mr" => "",
"mv" => "&o=vi",
"tr" => "&o=ra",
_ => "",
}
} else{
video_url += match sort {
"mr" => "",
"mv" => "&o=mv",
"tr" => "&o=tr",
"lg" => "&o=lg",
_ => "&o=mv",
}
}
video_url.push_str(match (query.contains("@channels"), sort) {
(true, "mv") => "&o=vi",
(true, "tr") => "&o=ra",
(false, "mv") => "&o=mv",
(false, "tr") => "&o=tr",
(false, "lg") => "&o=lg",
_ => "",
});
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&video_url) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 {
return Ok(items.clone());
} else {
let _ = cache.check().await;
return Ok(items.clone());
}
}
None => {
vec![]
Some((time, items)) if time.elapsed().unwrap_or_default().as_secs() < 60 * 5 => {
return Ok(items.clone());
}
Some((_, items)) => items.clone(),
None => vec![],
};
let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, None).await.unwrap();
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(),split_string);
if !video_items.is_empty() {
let mut requester = match options.requester.clone() {
Some(r) => r,
None => return Ok(old_items),
};
let text = match requester.get(&video_url, None).await {
Ok(t) => t,
Err(_) => return Ok(old_items),
};
let video_items = self.get_video_items_from_html(text, split_string);
if video_items.is_empty() {
Ok(old_items)
} else {
cache.remove(&video_url);
cache.insert(video_url.clone(), video_items.clone());
} else {
return Ok(old_items);
Ok(video_items)
}
Ok(video_items)
}
fn get_video_items_from_html(&self, html: String, split_string: &str) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
return vec![];
}
let mut items: Vec<VideoItem> = Vec::new();
let video_listing_content = html.split(split_string).collect::<Vec<&str>>()[1].split("Porn in German").collect::<Vec<&str>>()[0];
let raw_videos = video_listing_content
let content = match html.split(split_string).nth(1) {
Some(c) => c,
None => return vec![],
};
let content = content.split("Porn in German").next().unwrap_or("");
let mut items = Vec::new();
for seg in content
.split("class=\"pcVideoListItem ")
.collect::<Vec<&str>>()[1..]
.to_vec();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line);
// }
if video_segment.contains("wrapVideoBlock"){
continue; // Skip if the segment is a wrapVideoBlock
}
let video_url: String;
if !video_segment.contains("<a href=\"") {
let url_part = video_segment.split("data-video-vkey=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0];
video_url = format!("{}{}", self.url, url_part);
}
else{
let url_part = video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0];
if url_part.is_empty() || url_part == "javascript:void(0)" {
continue;
}
video_url = format!("{}{}", self.url, url_part);
}
if video_url.starts_with("https://www.pornhub.comjavascript:void(0)") {
continue;
}
let mut title = video_segment.split("\" title=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
// html decode
title = decode(title.as_bytes()).to_string().unwrap_or(title);
let id = video_segment.split("data-video-id=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let raw_duration = video_segment.split("duration").collect::<Vec<&str>>()[1].split(">").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
let view_part = match video_segment.split("iews\">").collect::<Vec<&str>>().len(){
2 => video_segment.split("iews\">").collect::<Vec<&str>>()[1],
3 => video_segment.split("iews\">").collect::<Vec<&str>>()[2],
_ => "<var>0<", // Skip if the format is unexpected
.skip(1)
.filter(|s| !s.contains("wrapVideoBlock"))
{
let url_part = seg
.split("<a href=\"")
.nth(1)
.or_else(|| seg.split("data-video-vkey=\"").nth(1))
.and_then(|s| s.split('"').next());
let video_url = match url_part {
Some(u) if !u.is_empty() && u != "javascript:void(0)" => format!("{}{}", self.url, u),
_ => continue,
};
let views = parse_abbreviated_number(view_part
.split("<var>").collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]).unwrap_or(0);
let thumb = video_segment.split("src=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
let mut title = seg
.split("\" title=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let uploaderBlock;
let uploader_href;
let mut tag = String::new();
if video_segment.contains("videoUploaderBlock") {
title = decode(title.as_bytes()).to_string().unwrap_or(title);
uploaderBlock = video_segment.split("videoUploaderBlock").collect::<Vec<&str>>()[1]
.to_string();
uploader_href = uploaderBlock.split("href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.split("/").collect::<Vec<&str>>();
tag = format!("@{}:{}", uploader_href[1], uploader_href[2].replace("-", " "));
let id = match seg
.split("data-video-id=\"")
.nth(1)
.and_then(|s| s.split('"').next())
{
Some(id) => id.to_string(),
None => continue,
};
}
else{
uploader_href = vec![];
}
let raw_duration = seg
.split("duration")
.nth(1)
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.unwrap_or("0:00");
let duration = parse_time_to_seconds(raw_duration).unwrap_or(0) as u32;
let mut video_item = VideoItem::new(
let views = seg
.split("iews\">")
.filter_map(|p| p.split("<var>").nth(1))
.next()
.and_then(|v| v.split('<').next())
.and_then(|v| parse_abbreviated_number(v))
.unwrap_or(0);
let thumb = seg
.split("src=\"")
.nth(1)
.and_then(|s| s.split('"').next())
.unwrap_or("")
.to_string();
let (tag, uploader) = if seg.contains("videoUploaderBlock") {
let href = seg
.split("videoUploaderBlock")
.nth(1)
.and_then(|s| s.split("href=\"").nth(1))
.and_then(|s| s.split('"').next())
.unwrap_or("");
let parts: Vec<&str> = href.split('/').collect();
if parts.len() >= 3 {
(
Some(format!("@{}:{}", parts[1], parts[2].replace('-', " "))),
Some(parts[2].to_string()),
)
} else {
(None, None)
}
} else {
(None, None)
};
let mut item = VideoItem::new(
id,
title,
video_url.to_string(),
"pornhub".to_string(),
video_url,
"pornhub".into(),
thumb,
duration,
)
;
);
if views > 0 {
video_item = video_item.views(views);
item = item.views(views);
}
if !tag.is_empty() {
video_item = video_item.tags(vec![tag])
.uploader(uploader_href[2].to_string());
if let Some(t) = tag {
item = item.tags(vec![t]);
}
items.push(video_item);
if let Some(u) = uploader {
item = item.uploader(u);
}
items.push(item);
}
return items;
items
}
}
#[async_trait]
@@ -250,29 +266,29 @@ impl Provider for PornhubProvider {
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = options;
let _ = pool;
let _ = per_page;
let _ = pool; // Ignored in this implementation
let mut sort = sort.to_lowercase();
if sort.contains("date"){
sort = "mr".to_string();
}
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
Some(q) => {
self.query(cache, page.parse::<u8>().unwrap_or(1), &q, &sort, options)
.await
}
None => {
self.get(cache, page.parse::<u8>().unwrap_or(1), &sort, options)
.await
}
let page = page.parse::<u8>().unwrap_or(1);
let mut sort = match sort.as_str() {
"mv" => "mv",
"tr" => "tr",
"cm" => "cm",
"lg" => "lg",
_ => "mr",
};
match videos {
Ok(v) => v,
Err(e) => {
println!("Error fetching videos: {}", e);
vec![]
}
if sort.contains("date") {
sort = "mr".into();
}
let res = match query {
Some(q) => self.query(cache, page, &q, &sort, options).await,
None => self.get(cache, page, &sort, options).await,
};
res.unwrap_or_else(|e| {
eprintln!("PornhubProvider error: {e}");
vec![]
})
}
}

View File

@@ -1,6 +1,7 @@
use crate::DbPool;
use crate::providers::Provider;
use crate::util::cache::VideoCache;
use crate::util::discord::send_discord_error_report;
use crate::util::requester::Requester;
use crate::util::time::parse_time_to_seconds;
use crate::videos::ServerOptions;
@@ -17,6 +18,12 @@ error_chain! {
HttpRequest(wreq::Error);
JsonError(serde_json::Error);
}
errors {
Parse(msg: String) {
description("html parse error")
display("html parse error: {}", msg)
}
}
}
// fn has_blacklisted_class(element: &ElementRef, blacklist: &[&str]) -> bool {
@@ -83,9 +90,19 @@ impl SxyprnProvider {
let text = requester.get(&url_str, None).await.unwrap();
// Pass a reference to options if needed, or reconstruct as needed
let video_items: Vec<VideoItem> = self
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester)
.await;
.await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
@@ -135,9 +152,28 @@ impl SxyprnProvider {
}
};
let text = requester.get(&url_str, None).await.unwrap();
let video_items: Vec<VideoItem> = self
let video_items = match self
.get_video_items_from_html(text.clone(), pool, requester)
.await;
.await
{
Ok(items) => items,
Err(e) => {
println!("Error parsing video items: {}", e);
let _ = futures::executor::block_on(send_discord_error_report(
&e,
Some("Sxyprn Provider"),
Some(format!("Failed to query videos:\nURL: {}\nQuery: {},", url_str, query).as_str()),
file!(),
line!(),
module_path!(),));
return Ok(old_items);
}
};
// let video_items: Vec<VideoItem> = self
// .get_video_items_from_html(text.clone(), pool, requester)
// .await;
if !video_items.is_empty() {
cache.remove(&url_str);
cache.insert(url_str.clone(), video_items.clone());
@@ -150,41 +186,50 @@ impl SxyprnProvider {
async fn get_video_items_from_html(
&self,
html: String,
pool: DbPool,
requester: Requester,
) -> Vec<VideoItem> {
let _ = requester;
let _ = pool;
_pool: DbPool,
_requester: Requester,
) -> Result<Vec<VideoItem>> {
if html.is_empty() {
println!("HTML is empty");
return vec![];
return Ok(vec![]);
}
let raw_videos = html.split("<script async").collect::<Vec<&str>>()[0]
.split("post_el_small'")
.collect::<Vec<&str>>()[1..]
.to_vec();
let mut items: Vec<VideoItem> = Vec::new();
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line.to_string().trim());
// }
// println!("\n\n\n");
let url = video_segment.split("/post/").collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]
// take content before "<script async"
let before_script = html
.split("<script async")
.next()
.ok_or_else(|| ErrorKind::Parse("missing '<script async' split point".into()))?;
// split into video segments (skip the first chunk)
let raw_videos: Vec<&str> = before_script.split("post_el_small'").skip(1).collect();
if raw_videos.is_empty() {
return Err(ErrorKind::Parse("no 'post_el_small\\'' segments found".into()).into());
}
let mut items = Vec::new();
for video_segment in raw_videos {
// url id
let url = video_segment
.split("/post/")
.nth(1)
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract /post/ url".into()))?
.to_string();
let video_url = format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", url);
let title_parts = video_segment.split("post_text").collect::<Vec<&str>>()[1]
.split("style=''>")
.collect::<Vec<&str>>()[1]
.split("</div>")
.collect::<Vec<&str>>()[0];
// title parts
let title_parts = video_segment
.split("post_text")
.nth(1)
.and_then(|s| s.split("style=''>").nth(1))
.and_then(|s| s.split("</div>").next())
.ok_or_else(|| ErrorKind::Parse("failed to extract title_parts".into()))?;
let document = Html::parse_document(title_parts);
let selector = Selector::parse("*").unwrap();
let selector = Selector::parse("*")
.map_err(|e| ErrorKind::Parse(format!("selector parse failed: {e}")))?;
let mut texts = Vec::new();
for element in document.select(&selector) {
@@ -193,101 +238,96 @@ impl SxyprnProvider {
texts.push(text.trim().to_string());
}
}
let mut title = texts[0].clone();
// html decode
let mut title = texts.join(" ");
title = decode(title.as_bytes())
.to_string()
.unwrap_or(title)
.replace(" ", " ");
title = title
.replace("\n", "")
.replace('\n', "")
.replace(" + ", " ")
.replace(" ", " ")
.trim().to_string();
.trim()
.to_string();
if title.to_ascii_lowercase().starts_with("new ") {
title = title[4..].to_string();
}
// println!("Title: {}", title);
let id = video_url.split("/").collect::<Vec<&str>>()[6]
.split("?")
.collect::<Vec<&str>>()[0]
// id (DON'T index [6])
let id = video_url
.split('/')
.last()
.ok_or_else(|| ErrorKind::Parse("failed to extract id from video_url".into()))?
.split('?')
.next()
.unwrap_or("")
.to_string();
let thumb = format!(
"https:{}",
video_segment
.split("<img class='mini_post_vid_thumb lazyload'")
.collect::<Vec<&str>>()[1]
.split("data-src='")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]
.to_string()
);
// thumb
let thumb_path = video_segment
.split("<img class='mini_post_vid_thumb lazyload'")
.nth(1)
.and_then(|s| s.split("data-src='").nth(1))
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract thumb".into()))?;
let preview = match video_segment.contains("class='hvp_player'") {
true => Some(format!(
let thumb = format!("https:{thumb_path}");
// preview
let preview = if video_segment.contains("class='hvp_player'") {
Some(format!(
"https:{}",
video_segment
.split("class='hvp_player'")
.collect::<Vec<&str>>()[1]
.split(" src='")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[0]
.to_string()
)),
false => None,
.nth(1)
.and_then(|s| s.split(" src='").nth(1))
.and_then(|s| s.split('\'').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract preview src".into()))?
))
} else {
None
};
// views
let views = video_segment
.split("<strong>·</strong> ")
.collect::<Vec<&str>>()[1]
.split(" ")
.collect::<Vec<&str>>()[0]
.nth(1)
.and_then(|s| s.split_whitespace().next())
.ok_or_else(|| ErrorKind::Parse("failed to extract views".into()))?
.to_string();
let raw_duration = video_segment.split("duration_small").collect::<Vec<&str>>()[1]
.split("title='")
.collect::<Vec<&str>>()[1]
.split("'")
.collect::<Vec<&str>>()[1]
.split(">")
.collect::<Vec<&str>>()[1]
.split("<")
.collect::<Vec<&str>>()[0]
// duration
let raw_duration = video_segment
.split("duration_small")
.nth(1)
.and_then(|s| s.split("title='").nth(1))
.and_then(|s| s.split('\'').nth(1))
.and_then(|s| s.split('>').nth(1))
.and_then(|s| s.split('<').next())
.ok_or_else(|| ErrorKind::Parse("failed to extract duration".into()))?
.to_string();
let duration = parse_time_to_seconds(&raw_duration).unwrap_or(0) as u32;
// stream urls (your filter condition looks suspicious; leaving as-is)
let stream_urls = video_segment
.split("extlink_icon extlink")
.collect::<Vec<&str>>()
.iter()
.map(|part| {
let url = part
.split("href='")
.collect::<Vec<&str>>()
.filter_map(|part| {
part.split("href='")
.last()
.unwrap_or(&"")
.split("'")
.collect::<Vec<&str>>()[0]
.to_string();
url
})
.filter(|url| {
url.starts_with("http")
&& !url.starts_with("https://bigwarp.io/")
&& !url.starts_with("https://doodstream.com/")
&& !url.starts_with("https://strmup.")
&& !url.starts_with("https://streamtape.com/")
&& !url.starts_with("https://streamvid.net/")
&& !url.starts_with("https://vtbe.")
.and_then(|s| s.split('\'').next())
.map(|u| u.to_string())
})
.filter(|url| url.starts_with("http") || !url.starts_with("https://lulustream."))
.collect::<Vec<String>>();
let video_item_url = match stream_urls.first() {
Some(u) => u.clone(),
None => format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", id), //video_url.clone(),
};
let video_item_url = stream_urls.first().cloned().unwrap_or_else(|| {
format!("https://hottub.spacemoehre.de/proxy/sxyprn/post/{}", id)
});
let mut video_item = VideoItem::new(
id,
title,
@@ -297,12 +337,15 @@ impl SxyprnProvider {
duration,
)
.views(views.parse::<u32>().unwrap_or(0));
if let Some(p) = preview {
video_item = video_item.preview(p);
}
items.push(video_item);
}
return items;
Ok(items)
}
}