This commit is contained in:
Simon
2026-01-14 15:41:22 +00:00
parent 4f9c7835bf
commit e2f3bc2ecb
3 changed files with 142 additions and 214 deletions

View File

@@ -212,7 +212,6 @@ impl HypnotubeProvider {
.await .await
.unwrap(); .unwrap();
if text.contains("Sorry, no results were found.") { if text.contains("Sorry, no results were found.") {
eprintln!("Hypnotube query returned no results for page {}", page);
return vec![]; return vec![];
} }
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone()).await; let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone()).await;
@@ -281,7 +280,6 @@ impl HypnotubeProvider {
// println!("Hypnotube search POST response status: {}", p.text().await.unwrap_or_default()); // println!("Hypnotube search POST response status: {}", p.text().await.unwrap_or_default());
// let text = requester.get(&video_url, Some(Version::HTTP_11)).await.unwrap(); // let text = requester.get(&video_url, Some(Version::HTTP_11)).await.unwrap();
if text.contains("Sorry, no results were found.") { if text.contains("Sorry, no results were found.") {
eprint!("Hypnotube query returned no results for page {}", page);
return vec![]; return vec![];
} }
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone()).await; let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone()).await;

View File

@@ -133,7 +133,6 @@ impl JavtifulProvider {
let mut requester = options.requester.clone().unwrap(); let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, Some(Version::HTTP_2)).await.unwrap(); let text = requester.get(&video_url, Some(Version::HTTP_2)).await.unwrap();
if page > 1 && !text.contains(&format!("<li class=\"page-item active\"><span class=\"page-link\">{}</span>", page)) { if page > 1 && !text.contains(&format!("<li class=\"page-item active\"><span class=\"page-link\">{}</span>", page)) {
eprint!("Javtiful query returned no results for page {}", page);
return Ok(vec![]); return Ok(vec![]);
} }
let video_items: Vec<VideoItem> = self let video_items: Vec<VideoItem> = self
@@ -182,7 +181,6 @@ impl JavtifulProvider {
let mut requester = options.requester.clone().unwrap(); let mut requester = options.requester.clone().unwrap();
let text = requester.get(&video_url, Some(Version::HTTP_2)).await.unwrap(); let text = requester.get(&video_url, Some(Version::HTTP_2)).await.unwrap();
if page > 1 && !text.contains(&format!("<li class=\"page-item active\"><span class=\"page-link\">{}</span>", page)) { if page > 1 && !text.contains(&format!("<li class=\"page-item active\"><span class=\"page-link\">{}</span>", page)) {
eprint!("Javtiful query returned no results for page {}", page);
return Ok(vec![]); return Ok(vec![]);
} }
let video_items: Vec<VideoItem> = self let video_items: Vec<VideoItem> = self
@@ -203,7 +201,6 @@ impl JavtifulProvider {
requester: &mut Requester, requester: &mut Requester,
) -> Vec<VideoItem> { ) -> Vec<VideoItem> {
if html.is_empty() || html.contains("404 Not Found") { if html.is_empty() || html.contains("404 Not Found") {
eprint!("Javtiful returned empty or 404 html");
return vec![]; return vec![];
} }

View File

@@ -1,5 +1,6 @@
use std::vec; use std::vec;
use async_trait::async_trait; use async_trait::async_trait;
use diesel::r2d2;
use error_chain::error_chain; use error_chain::error_chain;
use htmlentity::entity::{decode, ICodedDataTrait}; use htmlentity::entity::{decode, ICodedDataTrait};
use futures::future::join_all; use futures::future::join_all;
@@ -7,17 +8,24 @@ use wreq::Version;
use crate::db; use crate::db;
use crate::providers::Provider; use crate::providers::Provider;
use crate::util::cache::VideoCache; use crate::util::cache::VideoCache;
use crate::util::discord::{format_error_chain, send_discord_error_report};
use crate::videos::ServerOptions; use crate::videos::ServerOptions;
use crate::videos::{VideoItem}; use crate::videos::{VideoItem};
use crate::DbPool; use crate::DbPool;
use crate::util::requester::Requester; use crate::util::requester::Requester;
error_chain! { error_chain! {
foreign_links { foreign_links {
Io(std::io::Error); Io(std::io::Error);
HttpRequest(wreq::Error); HttpRequest(wreq::Error);
JsonError(serde_json::Error); JsonError(serde_json::Error);
Pool(r2d2::Error); // Assuming r2d2 or similar for pool
}
errors {
ParsingError(t: String) {
description("parsing error")
display("Parsing error: '{}'", t)
}
} }
} }
@@ -25,264 +33,189 @@ error_chain! {
pub struct MissavProvider { pub struct MissavProvider {
url: String, url: String,
} }
impl MissavProvider { impl MissavProvider {
pub fn new() -> Self { pub fn new() -> Self {
MissavProvider { MissavProvider {
url: "https://missav.ws".to_string() url: "https://missav.ws".to_string()
} }
} }
async fn get(&self, cache:VideoCache, pool:DbPool, page: u8, mut sort: String, options: ServerOptions) -> Result<Vec<VideoItem>> {
// Extract needed fields from options at the start async fn get(&self, cache: VideoCache, pool: DbPool, page: u8, mut sort: String, options: ServerOptions) -> Result<Vec<VideoItem>> {
let language = options.language.clone().unwrap(); // Use ok_or to avoid unwrapping options
let filter = options.filter.clone().unwrap(); let language = options.language.as_ref().ok_or("Missing language")?;
let mut requester = options.requester.clone().unwrap(); let filter = options.filter.as_ref().ok_or("Missing filter")?;
if !sort.is_empty(){ let mut requester = options.requester.clone().ok_or("Missing requester")?;
if !sort.is_empty() {
sort = format!("&sort={}", sort); sort = format!("&sort={}", sort);
} }
let url_str = format!("{}/{}/{}?page={}{}", self.url, language, filter, page, sort); let url_str = format!("{}/{}/{}?page={}{}", self.url, language, filter, page, sort);
let old_items = match cache.get(&url_str) { if let Some((time, items)) = cache.get(&url_str) {
Some((time, items)) => { if time.elapsed().unwrap_or_default().as_secs() < 3600 {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 {
return Ok(items.clone()); return Ok(items.clone());
} }
else{
items.clone()
} }
}
None => {
vec![]
}
};
let text = requester.get(&url_str, Some(Version::HTTP_2)).await.unwrap(); let text = requester.get(&url_str, Some(Version::HTTP_2)).await.unwrap_or_else(|e| {
// Pass a reference to options if needed, or reconstruct as needed eprintln!("Error fetching Missav URL {}: {}", url_str, e);
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(), pool, requester).await; let _ = send_discord_error_report(e.to_string(), None, Some(&url_str), None, file!(), line!(), module_path!());
"".to_string()
});
let video_items = self.get_video_items_from_html(text, pool, requester).await;
if !video_items.is_empty() { if !video_items.is_empty() {
cache.remove(&url_str); cache.insert(url_str, video_items.clone());
cache.insert(url_str.clone(), video_items.clone());
} else{
return Ok(old_items);
} }
Ok(video_items) Ok(video_items)
} }
async fn query(&self, cache: VideoCache, pool:DbPool, page: u8, query: &str, mut sort: String, options: ServerOptions) -> Result<Vec<VideoItem>> { async fn query(&self, cache: VideoCache, pool: DbPool, page: u8, query: &str, mut sort: String, options: ServerOptions) -> Result<Vec<VideoItem>> {
// Extract needed fields from options at the start let language = options.language.as_ref().ok_or("Missing language")?;
let language = options.language.clone().unwrap(); let mut requester = options.requester.clone().ok_or("Missing requester")?;
let mut requester = options.requester.clone().unwrap();
let search_string = query.replace(" ", "%20"); let search_string = query.replace(" ", "%20");
if !sort.is_empty(){ if !sort.is_empty() {
sort = format!("&sort={}", sort); sort = format!("&sort={}", sort);
} }
let url_str = format!( let url_str = format!("{}/{}/search/{}?page={}{}", self.url, language, search_string, page, sort);
"{}/{}/search/{}?page={}{}",
self.url, language, search_string, page, sort if let Some((time, items)) = cache.get(&url_str) {
); if time.elapsed().unwrap_or_default().as_secs() < 3600 {
// Check our Video Cache. If the result is younger than 1 hour, we return it.
let old_items = match cache.get(&url_str) {
Some((time, items)) => {
if time.elapsed().unwrap_or_default().as_secs() < 60 * 60 {
return Ok(items.clone()); return Ok(items.clone());
} }
else{
let _ = cache.check().await;
return Ok(items.clone())
} }
}
None => { let text = requester.get(&url_str, Some(Version::HTTP_2)).await.unwrap_or_else(|e| {
vec![] eprintln!("Error fetching Missav URL {}: {}", url_str, e);
} let _ = send_discord_error_report(e.to_string(), None, Some(&url_str), None, file!(), line!(), module_path!());
}; "".to_string()
let text = requester.get(&url_str, Some(Version::HTTP_2)).await.unwrap(); });
let video_items: Vec<VideoItem> = self.get_video_items_from_html(text.clone(), pool, requester).await; let video_items = self.get_video_items_from_html(text, pool, requester).await;
if !video_items.is_empty() { if !video_items.is_empty() {
cache.remove(&url_str); cache.insert(url_str, video_items.clone());
cache.insert(url_str.clone(), video_items.clone());
} else{
return Ok(old_items);
} }
Ok(video_items) Ok(video_items)
} }
async fn get_video_items_from_html(&self, html: String, pool: DbPool, requester: Requester) -> Vec<VideoItem> { async fn get_video_items_from_html(&self, html: String, pool: DbPool, requester: Requester) -> Vec<VideoItem> {
if html.is_empty() { if html.is_empty() { return vec![]; }
println!("HTML is empty");
return vec![]; let segments: Vec<&str> = html.split("@mouseenter=\"setPreview(\'").collect();
if segments.len() < 2 { return vec![]; }
let mut urls = vec![];
for video_segment in &segments[1..] {
// Safer parsing: find start and end of href
if let Some(start) = video_segment.find("<a href=\"") {
let rest = &video_segment[start + 9..];
if let Some(end) = rest.find('\"') {
urls.push(rest[..end].to_string());
}
} }
let raw_videos = html
.split("@mouseenter=\"setPreview(\'")
.collect::<Vec<&str>>()[1..]
.to_vec();
let mut urls: Vec<String> = vec![];
for video_segment in &raw_videos {
// let vid = video_segment.split("\n").collect::<Vec<&str>>();
// for (index, line) in vid.iter().enumerate() {
// println!("Line {}: {}", index, line.to_string().trim());
// }
let url_str = video_segment.split("<a href=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
urls.push(url_str.clone());
} }
let futures = urls.into_iter().map(|el| self.get_video_item(el.clone(), pool.clone(), requester.clone()));
let results: Vec<Result<VideoItem>> = join_all(futures).await;
let video_items: Vec<VideoItem> = results
.into_iter()
.filter_map(Result::ok)
.collect();
return video_items; let futures = urls.into_iter().map(|url| self.get_video_item(url, pool.clone(), requester.clone()));
join_all(futures).await.into_iter().filter_map(Result::ok).collect()
} }
async fn get_video_item(&self, url_str: String, pool: DbPool, mut requester: Requester) -> Result<VideoItem> { async fn get_video_item(&self, url_str: String, pool: DbPool, mut requester: Requester) -> Result<VideoItem> {
let mut conn = pool.get().expect("couldn't get db connection from pool"); // 1. Database Check
let db_result = db::get_video(&mut conn,url_str.clone()); {
match db_result { let mut conn = pool.get().map_err(|e| Error::from(format!("Pool error: {}", e)))?;
Ok(Some(entry)) => { if let Ok(Some(entry)) = db::get_video(&mut conn, url_str.clone()) {
let video_item: VideoItem = serde_json::from_str(entry.as_str()).unwrap(); if let Ok(video_item) = serde_json::from_str::<VideoItem>(entry.as_str()) {
return Ok(video_item) return Ok(video_item);
}
Ok(None) => {
}
Err(e) => {
println!("Error fetching video from database: {}", e);
} }
} }
drop(conn); }
let vid = requester.get(&url_str, Some(Version::HTTP_2)).await.unwrap();
let mut title = vid.split("<meta property=\"og:title\" content=\"").collect::<Vec<&str>>()[1] // 2. Fetch Page
.split("\"") let vid = requester.get(&url_str, Some(Version::HTTP_2)).await.unwrap_or_else(|e| {
.collect::<Vec<&str>>()[0].trim() eprintln!("Error fetching Missav URL {}: {}", url_str, e);
.to_string(); let _ = send_discord_error_report(e.to_string(), None, Some(&url_str), None, file!(), line!(), module_path!());
"".to_string()
});
// Helper closure to extract content between two strings
let extract = |html: &str, start_tag: &str, end_tag: &str| -> Option<String> {
let start = html.find(start_tag)? + start_tag.len();
let rest = &html[start..];
let end = rest.find(end_tag)?;
Some(rest[..end].to_string())
};
let mut title = extract(&vid, "<meta property=\"og:title\" content=\"", "\"")
.ok_or_else(|| ErrorKind::ParsingError(format!("title\n{:?}", vid)))?;
title = decode(title.as_bytes()).to_string().unwrap_or(title); title = decode(title.as_bytes()).to_string().unwrap_or(title);
if url_str.contains("uncensored") { if url_str.contains("uncensored") {
title = format!("[Uncensored] {}", title); title = format!("[Uncensored] {}", title);
} }
let thumb = vid.split("<meta property=\"og:image\" content=\"").collect::<Vec<&str>>()[1]
.split("\"")
.collect::<Vec<&str>>()[0]
.to_string();
let raw_duration = vid.split("<meta property=\"og:video:duration\" content=\"").collect::<Vec<&str>>()[1] let thumb = extract(&vid, "<meta property=\"og:image\" content=\"", "\"")
.split("\"") .unwrap_or_default();
.collect::<Vec<&str>>()[0]
.to_string();
let duration = raw_duration.parse::<u32>().unwrap_or(0);
let id = url_str.split("/").collect::<Vec<&str>>().last().unwrap() let duration = extract(&vid, "<meta property=\"og:video:duration\" content=\"", "\"")
.to_string(); .and_then(|d| d.parse::<u32>().ok())
.unwrap_or(0);
let id = url_str.split('/').last().ok_or("No ID found")?.to_string();
// 3. Extract Tags (Generic approach to avoid repetitive code)
let mut tags = vec![]; let mut tags = vec![];
if vid.contains("<span>Actress:</span>"){ for (label, prefix) in [("Actress:", "@actress"), ("Actor:", "@actor"), ("Maker:", "@maker"), ("Genre:", "@genre")] {
for actress_snippet in vid.split("<span>Actress:</span>").collect::<Vec<&str>>()[1] let marker = format!("<span>{}</span>", label);
.split("</div>").collect::<Vec<&str>>()[0].split("class=\"text-nord13 font-medium\">"){ if let Some(section) = extract(&vid, &marker, "</div>") {
let tag = actress_snippet.split("<").collect::<Vec<&str>>()[0].trim() for part in section.split("class=\"text-nord13 font-medium\">").skip(1) {
.to_string(); if let Some(val) = part.split('<').next() {
if !tag.is_empty(){ let clean = val.trim();
tags.push(format!("@actress:{}", tag)); if !clean.is_empty() {
tags.push(format!("{}:{}", prefix, clean));
} }
} }
} }
if vid.contains("<span>Actor:</span>"){
for actor_snippet in vid.split("<span>Actor:</span>").collect::<Vec<&str>>()[1]
.split("</div>").collect::<Vec<&str>>()[0].split("class=\"text-nord13 font-medium\">"){
let tag = actor_snippet.split("<").collect::<Vec<&str>>()[0].trim()
.to_string();
if !tag.is_empty(){
tags.push(format!("@actor:{}", tag));
}
} }
} }
if vid.contains("<span>Maker:</span>"){ // 4. Extract Video URL (The m3u8 logic)
for maker_snippet in vid.split("<span>Maker:</span>").collect::<Vec<&str>>()[1] let video_url = (|| {
.split("</div>").collect::<Vec<&str>>()[0] let parts_str = vid.split("m3u8").nth(1)?.split("https").next()?;
.split("class=\"text-nord13 font-medium\">"){ let mut parts: Vec<&str> = parts_str.split('|').collect();
let tag = maker_snippet.split("<").collect::<Vec<&str>>()[0].trim() parts.reverse();
.to_string(); if parts.len() < 8 { return None; }
if !tag.is_empty(){ Some(format!("https://{}.{}/{}-{}-{}-{}-{}/playlist.m3u8",
tags.push(format!("@maker:{}", tag)); parts[1], parts[2], parts[3], parts[4], parts[5], parts[6], parts[7]))
} })().ok_or_else(|| ErrorKind::ParsingError(format!("video_url\n{:?}", vid).to_string()))?;
}
}
if vid.contains("<span>Genre:</span>"){ let video_item = VideoItem::new(id, title, video_url, "missav".to_string(), thumb, duration)
for tag_snippet in vid.split("<span>Genre:</span>").collect::<Vec<&str>>()[1]
.split("</div>").collect::<Vec<&str>>()[0].split("class=\"text-nord13 font-medium\">"){
let tag = tag_snippet.split("<").collect::<Vec<&str>>()[0].trim()
.to_string();
if !tag.is_empty(){
tags.push(format!("@genre:{}", tag));
}
}
}
let preview = format!("https://fourhoi.com/{}/preview.mp4",id.clone());
let mut video_url_parts = vid.split("m3u8").collect::<Vec<&str>>()[1]
.split("https").collect::<Vec<&str>>()[0]
.split("|").collect::<Vec<&str>>();
video_url_parts.reverse();
let video_url = format!("https://{}.{}/{}-{}-{}-{}-{}/playlist.m3u8",
video_url_parts[1],
video_url_parts[2],
video_url_parts[3],
video_url_parts[4],
video_url_parts[5],
video_url_parts[6],
video_url_parts[7]
);
let video_item = VideoItem::new(
id,
title,
video_url.clone(),
"missav".to_string(),
thumb,
duration,
)
.tags(tags) .tags(tags)
.preview(preview) .preview(format!("https://fourhoi.com/{}/preview.mp4", url_str.split('/').last().unwrap_or_default()));
;
let mut conn = pool.get().expect("couldn't get db connection from pool"); // 5. Cache to DB
let insert_result = db::insert_video(&mut conn, &url_str, &serde_json::to_string(&video_item)?); if let Ok(mut conn) = pool.get() {
match insert_result{ let _ = db::insert_video(&mut conn, &url_str, &serde_json::to_string(&video_item).unwrap_or_default());
Ok(_) => (),
Err(e) => {println!("{:?}", e); }
} }
drop(conn);
return Ok(video_item); Ok(video_item)
} }
} }
#[async_trait] #[async_trait]
impl Provider for MissavProvider { impl Provider for MissavProvider {
async fn get_videos( async fn get_videos(&self, cache: VideoCache, pool: DbPool, sort: String, query: Option<String>, page: String, _per_page: String, options: ServerOptions) -> Vec<VideoItem> {
&self, let page_num = page.parse::<u8>().unwrap_or(1);
cache: VideoCache, let result = match query {
pool: DbPool, Some(q) => self.query(cache, pool, page_num, &q, sort, options).await,
sort: String, None => self.get(cache, pool, page_num, sort, options).await,
query: Option<String>,
page: String,
per_page: String,
options: ServerOptions,
) -> Vec<VideoItem> {
let _ = per_page;
let videos: std::result::Result<Vec<VideoItem>, Error> = match query {
Some(q) => self.query(cache, pool, page.parse::<u8>().unwrap_or(1), &q, sort, options).await,
None => self.get(cache, pool, page.parse::<u8>().unwrap_or(1), sort, options).await,
}; };
match videos {
Ok(v) => v, result.unwrap_or_else(|e| {
Err(e) => { eprintln!("Error fetching videos: {}", e);
println!("Error fetching videos: {}", e); let _ = send_discord_error_report(e.to_string(), Some(format_error_chain(&e)), None, None, file!(), line!(), module_path!());
vec![] vec![]
} })
}
} }
} }