1250 lines
43 KiB
Rust
1250 lines
43 KiB
Rust
use crate::DbPool;
|
|
use crate::api::ClientVersion;
|
|
use crate::providers::{
|
|
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
|
};
|
|
use crate::status::*;
|
|
use crate::util::cache::VideoCache;
|
|
use crate::util::parse_abbreviated_number;
|
|
use crate::util::requester::Requester;
|
|
use crate::util::time::parse_time_to_seconds;
|
|
use crate::videos::{ServerOptions, VideoFormat, VideoItem};
|
|
use async_trait::async_trait;
|
|
use chrono::{DateTime, Utc};
|
|
use error_chain::error_chain;
|
|
use futures::stream::{self, StreamExt};
|
|
use htmlentity::entity::{ICodedDataTrait, decode};
|
|
use regex::Regex;
|
|
use scraper::{ElementRef, Html, Selector};
|
|
use serde_json::Value;
|
|
use std::collections::HashSet;
|
|
use std::sync::{Arc, RwLock};
|
|
use std::{thread, vec};
|
|
use wreq::Version;
|
|
|
|
pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata =
|
|
crate::providers::ProviderChannelMetadata {
|
|
group_id: "studio-network",
|
|
tags: &["vr", "studios", "premium"],
|
|
};
|
|
|
|
error_chain! {
|
|
foreign_links {
|
|
Io(std::io::Error);
|
|
Json(serde_json::Error);
|
|
}
|
|
errors {
|
|
Parse(msg: String) {
|
|
description("parse error")
|
|
display("parse error: {}", msg)
|
|
}
|
|
}
|
|
}
|
|
|
|
const BASE_URL: &str = "https://vrporn.com";
|
|
const CHANNEL_ID: &str = "vrporn";
|
|
const FIREFOX_UA: &str =
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:147.0) Gecko/20100101 Firefox/147.0";
|
|
const HTML_ACCEPT: &str =
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct VrpornProvider {
|
|
url: String,
|
|
categories: Arc<RwLock<Vec<FilterOption>>>,
|
|
tags: Arc<RwLock<Vec<FilterOption>>>,
|
|
stars: Arc<RwLock<Vec<FilterOption>>>,
|
|
sites: Arc<RwLock<Vec<FilterOption>>>,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
enum Target {
|
|
Hot,
|
|
New,
|
|
Popular,
|
|
Search(String),
|
|
Archive(String),
|
|
}
|
|
|
|
impl VrpornProvider {
|
|
pub fn new() -> Self {
|
|
let provider = Self {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
stars: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
sites: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "all".to_string(),
|
|
title: "All".to_string(),
|
|
}])),
|
|
};
|
|
provider.spawn_initial_load();
|
|
provider
|
|
}
|
|
|
|
fn spawn_initial_load(&self) {
|
|
let provider = self.clone();
|
|
thread::spawn(move || {
|
|
let runtime = match tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()
|
|
{
|
|
Ok(runtime) => runtime,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"spawn_initial_load.runtime_build",
|
|
&error.to_string(),
|
|
);
|
|
return;
|
|
}
|
|
};
|
|
|
|
runtime.block_on(async move {
|
|
provider.refresh_filter_catalogs().await;
|
|
});
|
|
});
|
|
}
|
|
|
|
fn build_channel(&self, _clientversion: ClientVersion) -> Channel {
|
|
let categories = self
|
|
.categories
|
|
.read()
|
|
.map(|values| values.clone())
|
|
.unwrap_or_default();
|
|
let tags = self.tags.read().map(|values| values.clone()).unwrap_or_default();
|
|
let stars = self
|
|
.stars
|
|
.read()
|
|
.map(|values| values.clone())
|
|
.unwrap_or_default();
|
|
let sites = self
|
|
.sites
|
|
.read()
|
|
.map(|values| values.clone())
|
|
.unwrap_or_default();
|
|
|
|
Channel {
|
|
id: CHANNEL_ID.to_string(),
|
|
name: "VRPorn".to_string(),
|
|
description:
|
|
"VRPorn.com browse, search, tag, pornstar, and studio archives with direct VR formats."
|
|
.to_string(),
|
|
premium: false,
|
|
favicon: "https://www.google.com/s2/favicons?sz=64&domain=vrporn.com".to_string(),
|
|
status: "active".to_string(),
|
|
categories: categories.iter().map(|value| value.title.clone()).collect(),
|
|
options: vec![
|
|
ChannelOption {
|
|
id: "sort".to_string(),
|
|
title: "Sort".to_string(),
|
|
description: "Browse VRPorn sections.".to_string(),
|
|
systemImage: "list.number".to_string(),
|
|
colorName: "blue".to_string(),
|
|
options: vec![
|
|
FilterOption {
|
|
id: "hot".to_string(),
|
|
title: "Hot Right Now".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "new".to_string(),
|
|
title: "New".to_string(),
|
|
},
|
|
FilterOption {
|
|
id: "popular".to_string(),
|
|
title: "Popular".to_string(),
|
|
},
|
|
],
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "categories".to_string(),
|
|
title: "Categories".to_string(),
|
|
description: "Browse VRPorn category archives.".to_string(),
|
|
systemImage: "square.grid.2x2".to_string(),
|
|
colorName: "orange".to_string(),
|
|
options: categories,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "filter".to_string(),
|
|
title: "Tags".to_string(),
|
|
description: "Browse VRPorn tag archives.".to_string(),
|
|
systemImage: "tag.fill".to_string(),
|
|
colorName: "green".to_string(),
|
|
options: tags,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "stars".to_string(),
|
|
title: "Pornstars".to_string(),
|
|
description: "Browse VRPorn pornstar archives.".to_string(),
|
|
systemImage: "star.fill".to_string(),
|
|
colorName: "yellow".to_string(),
|
|
options: stars,
|
|
multiSelect: false,
|
|
},
|
|
ChannelOption {
|
|
id: "sites".to_string(),
|
|
title: "Studios".to_string(),
|
|
description: "Browse VRPorn studio archives.".to_string(),
|
|
systemImage: "building.2.fill".to_string(),
|
|
colorName: "purple".to_string(),
|
|
options: sites,
|
|
multiSelect: false,
|
|
},
|
|
],
|
|
nsfw: true,
|
|
cacheDuration: Some(1800),
|
|
}
|
|
}
|
|
|
|
fn selector(value: &str) -> Result<Selector> {
|
|
Selector::parse(value)
|
|
.map_err(|error| Error::from(format!("selector `{value}` parse failed: {error}")))
|
|
}
|
|
|
|
fn regex(value: &str) -> Result<Regex> {
|
|
Regex::new(value).map_err(|error| Error::from(format!("regex `{value}` failed: {error}")))
|
|
}
|
|
|
|
fn decode_text(text: &str) -> String {
|
|
decode(text.as_bytes())
|
|
.to_string()
|
|
.unwrap_or_else(|_| text.to_string())
|
|
.replace('\u{a0}', " ")
|
|
.trim()
|
|
.to_string()
|
|
}
|
|
|
|
fn collapse_whitespace(text: &str) -> String {
|
|
text.split_whitespace().collect::<Vec<_>>().join(" ")
|
|
}
|
|
|
|
fn text_of(element: &ElementRef<'_>) -> String {
|
|
Self::decode_text(&Self::collapse_whitespace(
|
|
&element.text().collect::<Vec<_>>().join(" "),
|
|
))
|
|
}
|
|
|
|
fn normalize_title(value: &str) -> String {
|
|
Self::decode_text(value)
|
|
.to_ascii_lowercase()
|
|
.split_whitespace()
|
|
.collect::<Vec<_>>()
|
|
.join(" ")
|
|
}
|
|
|
|
fn normalize_url(&self, value: &str) -> String {
|
|
if value.starts_with("http://") || value.starts_with("https://") {
|
|
return value.to_string();
|
|
}
|
|
if value.starts_with("//") {
|
|
return format!("https:{value}");
|
|
}
|
|
if value.starts_with('/') {
|
|
return format!("{}{}", self.url, value);
|
|
}
|
|
format!("{}/{}", self.url.trim_end_matches('/'), value)
|
|
}
|
|
|
|
fn html_headers(&self, referer: &str) -> Vec<(String, String)> {
|
|
vec![
|
|
("User-Agent".to_string(), FIREFOX_UA.to_string()),
|
|
("Accept".to_string(), HTML_ACCEPT.to_string()),
|
|
("Referer".to_string(), referer.to_string()),
|
|
]
|
|
}
|
|
|
|
async fn fetch_html(
|
|
&self,
|
|
requester: &mut Requester,
|
|
url: &str,
|
|
referer: &str,
|
|
) -> Result<String> {
|
|
requester
|
|
.get_with_headers(url, self.html_headers(referer), Some(Version::HTTP_11))
|
|
.await
|
|
.map_err(|error| Error::from(format!("request failed for {url}: {error}")))
|
|
}
|
|
|
|
fn push_unique(target: &Arc<RwLock<Vec<FilterOption>>>, item: FilterOption) {
|
|
if item.id.is_empty() || item.title.is_empty() {
|
|
return;
|
|
}
|
|
if let Ok(mut values) = target.write() {
|
|
let normalized = Self::normalize_title(&item.title);
|
|
if !values
|
|
.iter()
|
|
.any(|value| value.id == item.id || Self::normalize_title(&value.title) == normalized)
|
|
{
|
|
values.push(item);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn extract_last_page(document: &Html) -> u16 {
|
|
let selector = match Self::selector("a[href]") {
|
|
Ok(value) => value,
|
|
Err(_) => return 1,
|
|
};
|
|
let regex = match Self::regex(r"/page/([0-9]+)/") {
|
|
Ok(value) => value,
|
|
Err(_) => return 1,
|
|
};
|
|
|
|
document
|
|
.select(&selector)
|
|
.filter_map(|element| element.value().attr("href"))
|
|
.filter_map(|href| {
|
|
regex
|
|
.captures(href)
|
|
.and_then(|captures| captures.get(1))
|
|
.and_then(|value| value.as_str().parse::<u16>().ok())
|
|
})
|
|
.max()
|
|
.unwrap_or(1)
|
|
}
|
|
|
|
async fn load_tags_and_categories(&self) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let home_url = format!("{}/", self.url);
|
|
let home_html = self.fetch_html(&mut requester, &home_url, &home_url).await?;
|
|
{
|
|
let home_document = Html::parse_document(&home_html);
|
|
let tag_selector = Self::selector("a[href^=\"/tag/\"]")?;
|
|
|
|
for element in home_document.select(&tag_selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let title = Self::text_of(&element);
|
|
if href.is_empty() || title.is_empty() {
|
|
continue;
|
|
}
|
|
let url = self.normalize_url(href);
|
|
let option = FilterOption { id: url, title };
|
|
Self::push_unique(&self.tags, option.clone());
|
|
Self::push_unique(&self.categories, option);
|
|
}
|
|
}
|
|
|
|
let categories_url = format!("{}/categories/", self.url);
|
|
let categories_html = self
|
|
.fetch_html(&mut requester, &categories_url, &home_url)
|
|
.await?;
|
|
{
|
|
let categories_document = Html::parse_document(&categories_html);
|
|
let card_selector = Self::selector("article.ui-category-card a[href*=\"/tag/\"]")?;
|
|
|
|
for element in categories_document.select(&card_selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_text)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
if href.is_empty() || title.is_empty() {
|
|
continue;
|
|
}
|
|
let option = FilterOption {
|
|
id: self.normalize_url(href),
|
|
title,
|
|
};
|
|
Self::push_unique(&self.categories, option.clone());
|
|
Self::push_unique(&self.tags, option);
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_studios(&self) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let first_page_url = format!("{}/studios/", self.url);
|
|
let first_html = self
|
|
.fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url))
|
|
.await?;
|
|
let last_page = {
|
|
let first_document = Html::parse_document(&first_html);
|
|
let last_page = Self::extract_last_page(&first_document).max(1).min(25);
|
|
self.collect_studios_from_document(&first_document)?;
|
|
last_page
|
|
};
|
|
|
|
for page in 2..=last_page {
|
|
let url = format!("{}/studios/page/{page}/", self.url);
|
|
let html = self.fetch_html(&mut requester, &url, &first_page_url).await?;
|
|
let document = Html::parse_document(&html);
|
|
self.collect_studios_from_document(&document)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn collect_studios_from_document(&self, document: &Html) -> Result<()> {
|
|
let selector = Self::selector("article.ui-studio-card a[href]")?;
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
if !href.contains("/studio/") {
|
|
continue;
|
|
}
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_text)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
Self::push_unique(
|
|
&self.sites,
|
|
FilterOption {
|
|
id: self.normalize_url(href),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_pornstars(&self) -> Result<()> {
|
|
let mut requester = Requester::new();
|
|
let first_page_url = format!("{}/pornstars/", self.url);
|
|
let first_html = self
|
|
.fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url))
|
|
.await?;
|
|
let last_page = {
|
|
let first_document = Html::parse_document(&first_html);
|
|
let last_page = Self::extract_last_page(&first_document).max(1).min(25);
|
|
self.collect_pornstars_from_document(&first_document)?;
|
|
last_page
|
|
};
|
|
|
|
for page in 2..=last_page {
|
|
let url = format!("{}/pornstars/page/{page}/", self.url);
|
|
let html = self.fetch_html(&mut requester, &url, &first_page_url).await?;
|
|
let document = Html::parse_document(&html);
|
|
self.collect_pornstars_from_document(&document)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn collect_pornstars_from_document(&self, document: &Html) -> Result<()> {
|
|
let selector = Self::selector("article.ui-card-model a[href]")?;
|
|
for element in document.select(&selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
if !href.contains("/pornstars/") {
|
|
continue;
|
|
}
|
|
let title = element
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_text)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&element));
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
Self::push_unique(
|
|
&self.stars,
|
|
FilterOption {
|
|
id: self.normalize_url(href),
|
|
title,
|
|
},
|
|
);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn filters_need_refresh(&self) -> bool {
|
|
self.categories
|
|
.read()
|
|
.map(|values| values.len())
|
|
.unwrap_or_default()
|
|
<= 1
|
|
|| self.tags.read().map(|values| values.len()).unwrap_or_default() <= 1
|
|
|| self.stars.read().map(|values| values.len()).unwrap_or_default() <= 1
|
|
|| self.sites.read().map(|values| values.len()).unwrap_or_default() <= 1
|
|
}
|
|
|
|
async fn refresh_filter_catalogs(&self) {
|
|
if let Err(error) = self.load_tags_and_categories().await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_filter_catalogs.tags_and_categories",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = self.load_studios().await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_filter_catalogs.studios",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
if let Err(error) = self.load_pornstars().await {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"refresh_filter_catalogs.pornstars",
|
|
&error.to_string(),
|
|
);
|
|
}
|
|
}
|
|
|
|
fn match_filter(options: &[FilterOption], query: &str) -> Option<String> {
|
|
let normalized_query = Self::normalize_title(query);
|
|
options
|
|
.iter()
|
|
.find(|value| value.id != "all" && Self::normalize_title(&value.title) == normalized_query)
|
|
.map(|value| value.id.clone())
|
|
}
|
|
|
|
fn resolve_option_target(&self, options: &ServerOptions, sort: &str) -> Target {
|
|
for candidate in [
|
|
options.sites.as_deref(),
|
|
options.stars.as_deref(),
|
|
options.filter.as_deref(),
|
|
options.categories.as_deref(),
|
|
]
|
|
.into_iter()
|
|
.flatten()
|
|
{
|
|
if candidate.starts_with(&self.url) && candidate != "all" {
|
|
return Target::Archive(candidate.to_string());
|
|
}
|
|
}
|
|
|
|
match sort {
|
|
"new" | "latest" => Target::New,
|
|
"popular" | "all" | "most_viewed" => Target::Popular,
|
|
_ => Target::Hot,
|
|
}
|
|
}
|
|
|
|
async fn resolve_query_target(&self, query: &str, sort: &str) -> Target {
|
|
let query = query.trim();
|
|
if query.is_empty() {
|
|
return self.resolve_option_target(
|
|
&ServerOptions {
|
|
featured: None,
|
|
category: None,
|
|
sites: None,
|
|
filter: None,
|
|
language: None,
|
|
public_url_base: None,
|
|
requester: None,
|
|
network: None,
|
|
stars: None,
|
|
categories: None,
|
|
duration: None,
|
|
sort: None,
|
|
sexuality: None,
|
|
},
|
|
sort,
|
|
);
|
|
}
|
|
|
|
if self.filters_need_refresh() {
|
|
self.refresh_filter_catalogs().await;
|
|
}
|
|
|
|
for options in [&self.sites, &self.stars, &self.tags, &self.categories] {
|
|
if let Ok(values) = options.read() {
|
|
if let Some(url) = Self::match_filter(&values, query) {
|
|
return Target::Archive(url);
|
|
}
|
|
}
|
|
}
|
|
|
|
Target::Search(query.to_string())
|
|
}
|
|
|
|
fn build_url_for_target(&self, target: &Target, page: u16) -> String {
|
|
match target {
|
|
Target::Hot => {
|
|
if page <= 1 {
|
|
format!("{}/", self.url)
|
|
} else {
|
|
format!("{}/all/page/{page}/", self.url)
|
|
}
|
|
}
|
|
Target::New => {
|
|
if page <= 1 {
|
|
format!("{}/new/", self.url)
|
|
} else {
|
|
format!("{}/new/page/{page}/", self.url)
|
|
}
|
|
}
|
|
Target::Popular => {
|
|
if page <= 1 {
|
|
format!("{}/all/", self.url)
|
|
} else {
|
|
format!("{}/all/page/{page}/", self.url)
|
|
}
|
|
}
|
|
Target::Search(query) => {
|
|
let query = query.split_whitespace().collect::<Vec<_>>().join("+");
|
|
if page <= 1 {
|
|
format!("{}/search/videos/?query={query}", self.url)
|
|
} else {
|
|
format!("{}/search/videos/page/{page}/?query={query}", self.url)
|
|
}
|
|
}
|
|
Target::Archive(url) => {
|
|
let base = url.trim_end_matches('/');
|
|
if page <= 1 {
|
|
format!("{base}/")
|
|
} else {
|
|
format!("{base}/page/{page}/")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_list_videos(&self, html: &str) -> Result<Vec<VideoItem>> {
|
|
let document = Html::parse_document(html);
|
|
let article_selector = Self::selector("article.ui-video-card")?;
|
|
let header_link_selector = Self::selector("header a[href]")?;
|
|
let title_selector = Self::selector("a.ui-video-card__name")?;
|
|
let thumb_selector = Self::selector("img.ui-video-card__cover")?;
|
|
let duration_selector = Self::selector("div.ui-time span")?;
|
|
let studio_selector = Self::selector("a.ui-video-card__studio-link[href]")?;
|
|
let info_text_selector = Self::selector(".ui-video-card__info-mini span")?;
|
|
let footer_text_selector = Self::selector(".ui-video-card__footer .ui-video-card__text")?;
|
|
|
|
let mut items = Vec::new();
|
|
|
|
for article in document.select(&article_selector) {
|
|
let header_link = article.select(&header_link_selector).next();
|
|
let title_link = article.select(&title_selector).next();
|
|
let href = title_link
|
|
.and_then(|link| link.value().attr("href"))
|
|
.or_else(|| header_link.and_then(|link| link.value().attr("href")))
|
|
.unwrap_or_default();
|
|
if href.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let title = title_link
|
|
.map(|element| Self::text_of(&element))
|
|
.filter(|value| !value.is_empty())
|
|
.or_else(|| {
|
|
header_link.and_then(|element| {
|
|
element
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_text)
|
|
.filter(|value| !value.is_empty())
|
|
})
|
|
})
|
|
.unwrap_or_default();
|
|
if title.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let thumb = article
|
|
.select(&thumb_selector)
|
|
.next()
|
|
.and_then(|img| img.value().attr("src"))
|
|
.map(|value| self.normalize_url(value))
|
|
.unwrap_or_default();
|
|
|
|
let duration = article
|
|
.select(&duration_selector)
|
|
.next()
|
|
.map(|element| Self::text_of(&element))
|
|
.and_then(|value| parse_time_to_seconds(&value))
|
|
.unwrap_or(0)
|
|
.max(0) as u32;
|
|
|
|
let url = self.normalize_url(href);
|
|
let id = href
|
|
.trim_matches('/')
|
|
.split('/')
|
|
.next_back()
|
|
.unwrap_or_default()
|
|
.to_string();
|
|
if id.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let mut item = VideoItem::new(
|
|
id,
|
|
title,
|
|
url.clone(),
|
|
CHANNEL_ID.to_string(),
|
|
thumb,
|
|
duration,
|
|
);
|
|
|
|
if let Some(studio) = article.select(&studio_selector).next() {
|
|
let uploader = studio
|
|
.value()
|
|
.attr("title")
|
|
.map(Self::decode_text)
|
|
.filter(|value| !value.is_empty())
|
|
.unwrap_or_else(|| Self::text_of(&studio));
|
|
if !uploader.is_empty() {
|
|
item.uploader = Some(uploader);
|
|
}
|
|
if let Some(href) = studio.value().attr("href") {
|
|
item.uploaderUrl = Some(self.normalize_url(href));
|
|
}
|
|
}
|
|
|
|
let footer_values = article
|
|
.select(&footer_text_selector)
|
|
.map(|element| Self::text_of(&element))
|
|
.collect::<Vec<_>>();
|
|
if footer_values.len() >= 2 {
|
|
item.views = parse_abbreviated_number(&footer_values[1]);
|
|
}
|
|
|
|
let info_values = article
|
|
.select(&info_text_selector)
|
|
.map(|element| Self::text_of(&element))
|
|
.collect::<Vec<_>>();
|
|
if item.uploader.is_none() && !info_values.is_empty() {
|
|
item.uploader = Some(info_values[0].clone());
|
|
}
|
|
|
|
items.push(item);
|
|
}
|
|
|
|
Ok(items)
|
|
}
|
|
|
|
fn extract_json_ld_video(&self, html: &str) -> Result<Option<Value>> {
|
|
let script_regex = Self::regex(r#"(?s)<script[^>]+application/ld\+json[^>]*>(.*?)</script>"#)?;
|
|
for captures in script_regex.captures_iter(html) {
|
|
let Some(raw) = captures.get(1).map(|value| value.as_str().trim()) else {
|
|
continue;
|
|
};
|
|
let parsed: Value = match serde_json::from_str(raw) {
|
|
Ok(value) => value,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
if let Some(graph) = parsed.get("@graph").and_then(Value::as_array) {
|
|
for item in graph {
|
|
if item
|
|
.get("@type")
|
|
.and_then(Value::as_str)
|
|
.is_some_and(|value| value == "VideoObject")
|
|
{
|
|
return Ok(Some(item.clone()));
|
|
}
|
|
}
|
|
}
|
|
|
|
if parsed
|
|
.get("@type")
|
|
.and_then(Value::as_str)
|
|
.is_some_and(|value| value == "VideoObject")
|
|
{
|
|
return Ok(Some(parsed));
|
|
}
|
|
}
|
|
|
|
Ok(None)
|
|
}
|
|
|
|
fn parse_uploaded_at(value: &str) -> Option<u64> {
|
|
DateTime::parse_from_rfc3339(value)
|
|
.ok()
|
|
.map(|date| date.with_timezone(&Utc).timestamp().max(0) as u64)
|
|
}
|
|
|
|
fn extract_media_quality(url: &str) -> String {
|
|
let lowercase = url.to_ascii_lowercase();
|
|
for label in ["8k", "6k", "5k", "4k", "hq", "hd", "sd"] {
|
|
if lowercase.contains(&format!("free_{label}"))
|
|
|| lowercase.contains(&format!("_{label}_"))
|
|
|| lowercase.contains(&format!("/{label}.mp4"))
|
|
|| lowercase.contains(&format!("{label}.mp4"))
|
|
{
|
|
return label.to_ascii_uppercase();
|
|
}
|
|
}
|
|
if lowercase.contains("original") {
|
|
return "ORIGINAL".to_string();
|
|
}
|
|
"MP4".to_string()
|
|
}
|
|
|
|
fn quality_rank(label: &str) -> usize {
|
|
match label.to_ascii_lowercase().as_str() {
|
|
"sd" => 0,
|
|
"hq" => 1,
|
|
"hd" => 2,
|
|
"4k" => 3,
|
|
"5k" => 4,
|
|
"6k" => 5,
|
|
"8k" => 6,
|
|
"original" => 7,
|
|
_ => 8,
|
|
}
|
|
}
|
|
|
|
fn extract_formats(html: &str, referer: &str, slug: &str) -> Result<Vec<VideoFormat>> {
|
|
let regex = Self::regex(r#"https://(?:cdns|mcdnds)\.vrporn\.com/[^"'\s]+\.mp4\?[^"'\s<]+"#)?;
|
|
let mut seen = HashSet::new();
|
|
let mut raw_formats = Vec::new();
|
|
let slug = slug.trim_matches('/').to_ascii_lowercase();
|
|
|
|
for found in regex.find_iter(html).map(|value| value.as_str().to_string()) {
|
|
let lowercase = found.to_ascii_lowercase();
|
|
if lowercase.contains("shortvideo")
|
|
|| lowercase.contains("preview")
|
|
|| lowercase.contains("trailer")
|
|
|| lowercase.contains("video-short")
|
|
{
|
|
continue;
|
|
}
|
|
if !slug.is_empty() && !lowercase.contains(&format!("/videos/{slug}/")) {
|
|
continue;
|
|
}
|
|
if seen.insert(found.clone()) {
|
|
let label = Self::extract_media_quality(&found);
|
|
raw_formats.push((Self::quality_rank(&label), label, found));
|
|
}
|
|
}
|
|
|
|
raw_formats.sort_by_key(|(rank, _, _)| *rank);
|
|
|
|
let formats = raw_formats
|
|
.into_iter()
|
|
.map(|(_, label, url)| {
|
|
let mut format =
|
|
VideoFormat::new(url, label.clone(), "mp4".to_string()).format_id(label);
|
|
format.add_http_header("Referer".to_string(), referer.to_string());
|
|
format.add_http_header("User-Agent".to_string(), FIREFOX_UA.to_string());
|
|
format
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
Ok(formats)
|
|
}
|
|
|
|
fn extract_preview(html: &str, slug: &str) -> Result<Option<String>> {
|
|
let regex = Self::regex(
|
|
r#"https://(?:cdn|mcdn|mcdnds)\.vrporn\.com/[^"'\s]+(?:shortvideo[^"'\s]*|preview[^"'\s]*|trailer[^"'\s]*)\.mp4(?:\?[^"'\s<]+)?"#,
|
|
)?;
|
|
let slug = slug.trim_matches('/').to_ascii_lowercase();
|
|
Ok(regex.find_iter(html).find_map(|value| {
|
|
let found = value.as_str().to_string();
|
|
if slug.is_empty() || found.to_ascii_lowercase().contains(&slug) {
|
|
Some(found)
|
|
} else {
|
|
None
|
|
}
|
|
}))
|
|
}
|
|
|
|
fn apply_detail_video(&self, mut item: VideoItem, html: &str) -> Result<VideoItem> {
|
|
let document = Html::parse_document(html);
|
|
let title_selector = Self::selector("h1")?;
|
|
let sub_text_selector = Self::selector("span.ui-player-title__sub-text")?;
|
|
let studio_selector = Self::selector(".ui-detail-video a[href*=\"/studio/\"]")?;
|
|
let pornstar_selector = Self::selector("div.starrings a[href*=\"/pornstars/\"]")?;
|
|
let tag_selector = Self::selector("a[href*=\"/tag/\"]")?;
|
|
let description_selector = Self::selector(".ui-detail-video__content-inner")?;
|
|
let verified_selector = Self::selector(".detail-video__verfy")?;
|
|
|
|
if let Some(title) = document.select(&title_selector).next() {
|
|
let title = Self::text_of(&title);
|
|
if !title.is_empty() {
|
|
item.title = title;
|
|
}
|
|
}
|
|
|
|
let sub_texts = document
|
|
.select(&sub_text_selector)
|
|
.map(|element| Self::text_of(&element))
|
|
.collect::<Vec<_>>();
|
|
if let Some(value) = sub_texts.first() {
|
|
let cleaned = value.trim_end_matches('%').trim();
|
|
if let Ok(parsed) = cleaned.parse::<f32>() {
|
|
item.rating = Some(parsed);
|
|
}
|
|
}
|
|
if let Some(value) = sub_texts.get(1) {
|
|
if item.views.is_none() {
|
|
item.views = parse_abbreviated_number(value);
|
|
}
|
|
}
|
|
|
|
if document.select(&verified_selector).next().is_some() {
|
|
item.verified = Some(true);
|
|
}
|
|
|
|
if let Some(studio) = document.select(&studio_selector).next() {
|
|
let title = Self::text_of(&studio);
|
|
if !title.is_empty() {
|
|
item.uploader = Some(title);
|
|
}
|
|
if let Some(href) = studio.value().attr("href") {
|
|
item.uploaderUrl = Some(self.normalize_url(href));
|
|
}
|
|
}
|
|
|
|
let mut tags = item.tags.take().unwrap_or_default();
|
|
let mut discovered_formats = Vec::new();
|
|
for element in document.select(&pornstar_selector) {
|
|
let name = Self::text_of(&element);
|
|
if !name.is_empty() && !tags.iter().any(|value| value == &name) {
|
|
tags.push(name);
|
|
}
|
|
}
|
|
for element in document.select(&tag_selector) {
|
|
let href = element.value().attr("href").unwrap_or_default();
|
|
if !href.contains("/tag/") {
|
|
continue;
|
|
}
|
|
let name = Self::text_of(&element);
|
|
if !name.is_empty() && !tags.iter().any(|value| value == &name) {
|
|
tags.push(name);
|
|
}
|
|
}
|
|
|
|
if let Some(json_ld) = self.extract_json_ld_video(html)? {
|
|
if let Some(title) = json_ld.get("name").and_then(Value::as_str) {
|
|
if !title.trim().is_empty() {
|
|
item.title = Self::decode_text(title);
|
|
}
|
|
}
|
|
|
|
if item.thumb.is_empty() {
|
|
if let Some(thumb) = json_ld.get("thumbnailUrl").and_then(Value::as_str) {
|
|
item.thumb = self.normalize_url(thumb);
|
|
}
|
|
}
|
|
|
|
if item.uploadedAt.is_none() {
|
|
if let Some(uploaded_at) = json_ld.get("uploadDate").and_then(Value::as_str) {
|
|
item.uploadedAt = Self::parse_uploaded_at(uploaded_at);
|
|
}
|
|
}
|
|
|
|
if item.uploader.is_none() {
|
|
if let Some(author) = json_ld.get("author") {
|
|
if let Some(name) = author.get("name").and_then(Value::as_str) {
|
|
item.uploader = Some(Self::decode_text(name));
|
|
}
|
|
if let Some(url) = author.get("url").and_then(Value::as_str) {
|
|
item.uploaderUrl = Some(self.normalize_url(url));
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(actors) = json_ld.get("actor").and_then(Value::as_array) {
|
|
for actor in actors {
|
|
if let Some(name) = actor.get("name").and_then(Value::as_str) {
|
|
let name = Self::decode_text(name);
|
|
if !name.is_empty() && !tags.iter().any(|value| value == &name) {
|
|
tags.push(name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(content_url) = json_ld.get("contentUrl").and_then(Value::as_str) {
|
|
let mut format = VideoFormat::new(
|
|
content_url.to_string(),
|
|
Self::extract_media_quality(content_url),
|
|
"mp4".to_string(),
|
|
);
|
|
format.add_http_header("Referer".to_string(), item.url.clone());
|
|
format.add_http_header("User-Agent".to_string(), FIREFOX_UA.to_string());
|
|
discovered_formats.push(format);
|
|
}
|
|
}
|
|
|
|
if let Some(description) = document.select(&description_selector).next() {
|
|
let description = Self::text_of(&description);
|
|
if !description.is_empty() {
|
|
for tag in description
|
|
.split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '\'')
|
|
.map(str::trim)
|
|
.filter(|value| value.len() > 2 && value.len() < 40)
|
|
.take(0)
|
|
{
|
|
let _ = tag;
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(preview) = Self::extract_preview(html, &item.id)? {
|
|
item.preview = Some(preview);
|
|
}
|
|
|
|
let mut formats = Self::extract_formats(html, &item.url, &item.id)?;
|
|
for format in formats.drain(..) {
|
|
if !discovered_formats.iter().any(|existing| existing.url == format.url) {
|
|
discovered_formats.push(format);
|
|
}
|
|
}
|
|
if !discovered_formats.is_empty() {
|
|
item.formats = Some(discovered_formats);
|
|
}
|
|
|
|
if !tags.is_empty() {
|
|
item.tags = Some(tags);
|
|
}
|
|
|
|
Ok(item)
|
|
}
|
|
|
|
async fn enrich_video(&self, item: VideoItem, options: &ServerOptions) -> VideoItem {
|
|
let mut requester = requester_or_default(options, CHANNEL_ID, "enrich_video");
|
|
match self
|
|
.fetch_html(&mut requester, &item.url, &format!("{}/", self.url))
|
|
.await
|
|
{
|
|
Ok(html) => match self.apply_detail_video(item.clone(), &html) {
|
|
Ok(enriched) => enriched,
|
|
Err(error) => {
|
|
report_provider_error_background(
|
|
CHANNEL_ID,
|
|
"apply_detail_video",
|
|
&error.to_string(),
|
|
);
|
|
item
|
|
}
|
|
},
|
|
Err(error) => {
|
|
report_provider_error_background(CHANNEL_ID, "fetch_detail", &error.to_string());
|
|
item
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn fetch_items_for_url(
|
|
&self,
|
|
cache: VideoCache,
|
|
url: String,
|
|
per_page_limit: usize,
|
|
enrich_details: bool,
|
|
options: &ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
if let Some((time, items)) = cache.get(&url) {
|
|
if time.elapsed().unwrap_or_default().as_secs() < 60 * 15 {
|
|
return Ok(items.clone());
|
|
}
|
|
}
|
|
|
|
let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_items_for_url");
|
|
let html = self
|
|
.fetch_html(&mut requester, &url, &format!("{}/", self.url))
|
|
.await?;
|
|
let items = self.parse_list_videos(&html)?;
|
|
if items.is_empty() {
|
|
return Ok(vec![]);
|
|
}
|
|
|
|
let limited = items
|
|
.into_iter()
|
|
.take(per_page_limit.max(1))
|
|
.collect::<Vec<_>>();
|
|
|
|
if !enrich_details {
|
|
cache.insert(url, limited.clone());
|
|
return Ok(limited);
|
|
}
|
|
|
|
let enriched = stream::iter(limited.into_iter().map(|item| {
|
|
let provider = self.clone();
|
|
let options = options.clone();
|
|
async move { provider.enrich_video(item, &options).await }
|
|
}))
|
|
.buffer_unordered(4)
|
|
.collect::<Vec<_>>()
|
|
.await;
|
|
|
|
cache.insert(url, enriched.clone());
|
|
Ok(enriched)
|
|
}
|
|
|
|
async fn get(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u16,
|
|
sort: &str,
|
|
per_page_limit: usize,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let target = self.resolve_option_target(&options, sort);
|
|
let url = self.build_url_for_target(&target, page);
|
|
self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options)
|
|
.await
|
|
}
|
|
|
|
async fn query(
|
|
&self,
|
|
cache: VideoCache,
|
|
page: u16,
|
|
sort: &str,
|
|
query: &str,
|
|
per_page_limit: usize,
|
|
options: ServerOptions,
|
|
) -> Result<Vec<VideoItem>> {
|
|
let target = self.resolve_query_target(query, sort).await;
|
|
let url = self.build_url_for_target(&target, page);
|
|
self.fetch_items_for_url(cache, url, per_page_limit, page <= 1, &options)
|
|
.await
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Provider for VrpornProvider {
|
|
async fn get_videos(
|
|
&self,
|
|
cache: VideoCache,
|
|
pool: DbPool,
|
|
sort: String,
|
|
query: Option<String>,
|
|
page: String,
|
|
per_page: String,
|
|
options: ServerOptions,
|
|
) -> Vec<VideoItem> {
|
|
let _ = pool;
|
|
let page = page.parse::<u16>().unwrap_or(1);
|
|
let per_page_limit = per_page.parse::<usize>().unwrap_or(30);
|
|
|
|
let result = match query {
|
|
Some(query) if !query.trim().is_empty() => {
|
|
self.query(cache, page, &sort, &query, per_page_limit, options)
|
|
.await
|
|
}
|
|
_ => self.get(cache, page, &sort, per_page_limit, options).await,
|
|
};
|
|
|
|
match result {
|
|
Ok(videos) => videos,
|
|
Err(error) => {
|
|
report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await;
|
|
vec![]
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
|
Some(self.build_channel(clientversion))
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn provider() -> VrpornProvider {
|
|
VrpornProvider {
|
|
url: BASE_URL.to_string(),
|
|
categories: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "https://vrporn.com/tag/anal/".to_string(),
|
|
title: "Anal".to_string(),
|
|
}])),
|
|
tags: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "https://vrporn.com/tag/passthrough/".to_string(),
|
|
title: "Passthrough".to_string(),
|
|
}])),
|
|
stars: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "https://vrporn.com/pornstars/elena-vrq/".to_string(),
|
|
title: "Elena VRQ".to_string(),
|
|
}])),
|
|
sites: Arc::new(RwLock::new(vec![FilterOption {
|
|
id: "https://vrporn.com/studio/vrsun/".to_string(),
|
|
title: "VRSUN".to_string(),
|
|
}])),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn builds_search_page_two_url() {
|
|
let provider = provider();
|
|
let url = provider.build_url_for_target(&Target::Search("adriana chechik".to_string()), 2);
|
|
assert_eq!(
|
|
url,
|
|
"https://vrporn.com/search/videos/page/2/?query=adriana+chechik"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn builds_archive_page_two_url() {
|
|
let provider = provider();
|
|
let url = provider.build_url_for_target(
|
|
&Target::Archive("https://vrporn.com/tag/anal/".to_string()),
|
|
2,
|
|
);
|
|
assert_eq!(url, "https://vrporn.com/tag/anal/page/2/");
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_formats_from_detail_html() {
|
|
let formats = VrpornProvider::extract_formats(
|
|
r#"
|
|
<meta property="og:video" content="https://cdns.vrporn.com/videos/test/free_4k.mp4?ttl=1&token=1">
|
|
<script>
|
|
"https://cdns.vrporn.com/videos/test/free_6k.mp4?ttl=1&token=2"
|
|
"https://cdns.vrporn.com/videos/other/free_8k.mp4?ttl=1&token=3"
|
|
"https://cdn.vrporn.com/videos/test/testshortvideo_video_123.mp4"
|
|
</script>
|
|
"#,
|
|
"https://vrporn.com/test/",
|
|
"test",
|
|
)
|
|
.expect("formats should parse");
|
|
|
|
assert_eq!(formats.len(), 2);
|
|
assert!(formats[0].url.contains("free_4k.mp4"));
|
|
assert!(formats[1].url.contains("free_6k.mp4"));
|
|
}
|
|
|
|
#[test]
|
|
fn detail_parser_ignores_embed_url() {
|
|
let provider = provider();
|
|
let item = VideoItem::new(
|
|
"test".to_string(),
|
|
"Original".to_string(),
|
|
"https://vrporn.com/videos/test/".to_string(),
|
|
CHANNEL_ID.to_string(),
|
|
String::new(),
|
|
0,
|
|
);
|
|
let parsed = provider
|
|
.apply_detail_video(
|
|
item,
|
|
r#"
|
|
<html>
|
|
<head>
|
|
<script type="application/ld+json">
|
|
{
|
|
"@type": "VideoObject",
|
|
"name": "Updated Title",
|
|
"embedUrl": "https://vrporn.com/embed/test/",
|
|
"contentUrl": "https://cdns.vrporn.com/videos/test/free_4k.mp4?ttl=1&token=1"
|
|
}
|
|
</script>
|
|
</head>
|
|
<body></body>
|
|
</html>
|
|
"#,
|
|
)
|
|
.expect("detail HTML should parse");
|
|
|
|
assert_eq!(parsed.title, "Updated Title");
|
|
assert!(parsed.embed.is_none());
|
|
}
|
|
}
|