diff --git a/build.rs b/build.rs index 6d269eb..72d646d 100644 --- a/build.rs +++ b/build.rs @@ -209,6 +209,11 @@ const PROVIDERS: &[ProviderDef] = &[ module: "supjav", ty: "SupjavProvider", }, + ProviderDef { + id: "vjav", + module: "vjav", + ty: "VjavProvider", + }, ProviderDef { id: "hypnotube", module: "hypnotube", diff --git a/src/providers/vjav.rs b/src/providers/vjav.rs new file mode 100644 index 0000000..bd099b0 --- /dev/null +++ b/src/providers/vjav.rs @@ -0,0 +1,1740 @@ +use crate::DbPool; +use crate::api::ClientVersion; +use crate::providers::{ + Provider, report_provider_error, report_provider_error_background, requester_or_default, +}; +use crate::status::*; +use crate::uploaders::{ + UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef, +}; +use crate::util::cache::VideoCache; +use crate::util::requester::Requester; +use crate::util::time::parse_time_to_seconds; +use crate::videos::{ServerOptions, VideoFormat, VideoItem}; +use async_trait::async_trait; +use base64::{Engine as _, engine::general_purpose::STANDARD}; +use chrono::NaiveDateTime; +use error_chain::error_chain; +use futures::stream::{self, StreamExt}; +use serde::Deserialize; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, RwLock}; +use std::thread; + +pub const CHANNEL_METADATA: crate::providers::ProviderChannelMetadata = + crate::providers::ProviderChannelMetadata { + group_id: "jav", + tags: &["jav", "asian", "tube"], + }; + +const BASE_URL: &str = "https://vjav.com"; +const CHANNEL_ID: &str = "vjav"; +const DEFAULT_PER_PAGE: usize = 30; +const MAX_PER_PAGE: usize = 60; +const INITIAL_UPLOADER_PAGES: u16 = 10; +const HOT_SEARCH_LIMIT: usize = 120; +const UPLOADER_OPTION_LIMIT: usize = 200; +const ENRICH_CONCURRENCY: usize = 8; + +error_chain! { + foreign_links { + Io(std::io::Error); + Json(serde_json::Error); + Url(url::ParseError); + Decode(base64::DecodeError); + Utf8(std::string::FromUtf8Error); + } + errors { + Parse(msg: String) { + description("parse error") + display("parse error: {}", msg) + } + } +} + +#[derive(Debug, Clone)] +pub struct VjavProvider { + url: String, + tags: Arc>>, + hot_searches: Arc>>, + uploaders: Arc>>, + tag_ids: Arc>>, + tag_targets: Arc>>, + hot_search_lookup: Arc>>, + uploader_lookup: Arc>>, +} + +#[derive(Debug, Clone)] +enum Target { + Videos, + Tag { slug: String }, + Uploader { user_id: String }, + Search { query: String }, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct VideosListResponse { + #[serde(default)] + videos: Vec, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct ListVideo { + #[serde(default)] + video_id: String, + #[serde(default)] + title: String, + #[serde(default)] + dir: String, + #[serde(default)] + duration: String, + #[serde(default)] + video_viewed: String, + #[serde(default)] + rating: String, + #[serde(default)] + post_date: String, + #[serde(default)] + file_dimensions: String, + #[serde(default)] + tags: String, + #[serde(default)] + categories_data: String, + #[serde(default)] + user_id: String, + #[serde(default)] + username: String, + #[serde(default)] + display_name: String, + #[serde(default)] + scr: String, + #[serde(default)] + models: String, + #[serde(default)] + categories: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct DetailResponse { + #[serde(default)] + video: DetailVideo, + #[serde(default)] + related_searches: Option>, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct DetailVideo { + #[serde(default)] + video_id: String, + #[serde(default)] + title: String, + #[serde(default)] + dir: String, + #[serde(default)] + description: String, + #[serde(default)] + post_date: String, + #[serde(default)] + duration: String, + #[serde(default)] + thumb: String, + #[serde(default)] + thumbsrc: String, + #[serde(default)] + statistics: DetailStatistics, + #[serde(default)] + user: DetailUser, + #[serde(default)] + channel: DetailChannel, + #[serde(default)] + categories: HashMap, + #[serde(default)] + related_searches: Option>, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct DetailStatistics { + #[serde(default)] + viewed: String, + #[serde(default)] + rating: String, + #[serde(default)] + likes: i64, + #[serde(default)] + dislikes: i64, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct DetailUser { + #[serde(default)] + id: String, + #[serde(default)] + username: String, + #[serde(default)] + avatar: String, + #[serde(default)] + subscribers_count: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct DetailChannel { + #[serde(default)] + title: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct DetailCategory { + #[serde(default)] + title: String, + #[serde(default)] + dir: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct VideofileEntry { + #[serde(default)] + format: String, + #[serde(default)] + video_url: String, + #[serde(default)] + is_default: i32, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct CategoriesResponse { + #[serde(default)] + categories: Vec, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct CategoryEntry { + #[serde(default)] + category_id: String, + #[serde(default)] + title: String, + #[serde(default)] + dir: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct SuggesterResponse { + #[serde(default)] + keywords: Vec, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct MembersResponse { + #[serde(default)] + members: Vec, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct MemberSummary { + #[serde(default)] + user_id: String, + #[serde(default)] + username: String, + #[serde(default)] + avatar: String, + #[serde(default)] + statistics: MemberStatistics, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct MemberStatistics { + #[serde(default)] + videos: String, + #[serde(default)] + viewed: String, + #[serde(default)] + subscribers: String, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct MemberProfileResponse { + #[serde(default)] + user: MemberProfile, + #[serde(default)] + success: i32, +} + +#[derive(Debug, Deserialize, Clone, Default)] +struct MemberProfile { + #[serde(default)] + user_id: String, + #[serde(default)] + username: String, + #[serde(default)] + country: String, + #[serde(default)] + is_trusted: String, + #[serde(default)] + about_me: String, + #[serde(default)] + interests: String, + #[serde(default)] + occupation: String, + #[serde(default)] + avatar: String, + #[serde(default)] + statistics: MemberStatistics, +} + +#[derive(Debug, Clone)] +struct ParsedCategoryTag { + id: String, + title: String, + slug: String, +} + +impl VjavProvider { + pub fn new() -> Self { + let provider = Self { + url: BASE_URL.to_string(), + tags: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + hot_searches: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + uploaders: Arc::new(RwLock::new(vec![FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }])), + tag_ids: Arc::new(RwLock::new(HashMap::new())), + tag_targets: Arc::new(RwLock::new(HashMap::new())), + hot_search_lookup: Arc::new(RwLock::new(HashMap::new())), + uploader_lookup: Arc::new(RwLock::new(HashMap::new())), + }; + provider.spawn_initial_load(); + provider + } + + fn spawn_initial_load(&self) { + let tags = Arc::clone(&self.tags); + let hot_searches = Arc::clone(&self.hot_searches); + let uploaders = Arc::clone(&self.uploaders); + let tag_ids = Arc::clone(&self.tag_ids); + let tag_targets = Arc::clone(&self.tag_targets); + let hot_search_lookup = Arc::clone(&self.hot_search_lookup); + let uploader_lookup = Arc::clone(&self.uploader_lookup); + + thread::spawn(move || { + let runtime = match tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + { + Ok(runtime) => runtime, + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.runtime", + &error.to_string(), + ); + return; + } + }; + + runtime.block_on(async move { + if let Err(error) = Self::load_categories( + Arc::clone(&tags), + Arc::clone(&tag_ids), + Arc::clone(&tag_targets), + ) + .await + { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.categories", + &error.to_string(), + ); + } + + if let Err(error) = Self::load_hot_searches( + Arc::clone(&hot_searches), + Arc::clone(&hot_search_lookup), + ) + .await + { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.hot_searches", + &error.to_string(), + ); + } + + if let Err(error) = Self::load_uploaders( + Arc::clone(&uploaders), + Arc::clone(&uploader_lookup), + ) + .await + { + report_provider_error_background( + CHANNEL_ID, + "spawn_initial_load.uploaders", + &error.to_string(), + ); + } + }); + }); + } + + fn build_channel(&self, _clientversion: ClientVersion) -> Channel { + let categories = self + .tags + .read() + .map(|values| values.iter().skip(1).map(|value| value.title.clone()).collect()) + .unwrap_or_default(); + let category_options = self + .tags + .read() + .map(|values| values.clone()) + .unwrap_or_default(); + let filter_options = self + .hot_searches + .read() + .map(|values| { + values + .iter() + .take(HOT_SEARCH_LIMIT + 1) + .cloned() + .collect::>() + }) + .unwrap_or_default(); + let uploader_options = self + .uploaders + .read() + .map(|values| { + values + .iter() + .take(UPLOADER_OPTION_LIMIT + 1) + .cloned() + .collect::>() + }) + .unwrap_or_default(); + + Channel { + id: CHANNEL_ID.to_string(), + name: "VJAV".to_string(), + description: + "VJAV archives with exact tag, uploader, and hot-search matching plus direct HLS playback URLs." + .to_string(), + premium: false, + favicon: "https://www.google.com/s2/favicons?sz=64&domain=vjav.com".to_string(), + status: "active".to_string(), + categories, + options: vec![ + ChannelOption { + id: "sort".to_string(), + title: "Sort".to_string(), + description: + "Browse VJAV by latest, popularity, views, rating, length, or comments." + .to_string(), + systemImage: "arrow.up.arrow.down".to_string(), + colorName: "blue".to_string(), + options: vec![ + FilterOption { + id: "new".to_string(), + title: "Latest".to_string(), + }, + FilterOption { + id: "popular".to_string(), + title: "Popular".to_string(), + }, + FilterOption { + id: "views".to_string(), + title: "Most Viewed".to_string(), + }, + FilterOption { + id: "top".to_string(), + title: "Top Rated".to_string(), + }, + FilterOption { + id: "long".to_string(), + title: "Longest".to_string(), + }, + FilterOption { + id: "commented".to_string(), + title: "Most Commented".to_string(), + }, + ], + multiSelect: false, + }, + ChannelOption { + id: "categories".to_string(), + title: "Tags".to_string(), + description: "Use the native VJAV category archives.".to_string(), + systemImage: "tag".to_string(), + colorName: "orange".to_string(), + options: category_options, + multiSelect: false, + }, + ChannelOption { + id: "stars".to_string(), + title: "Uploaders".to_string(), + description: "Open uploader archives directly when known.".to_string(), + systemImage: "person.2".to_string(), + colorName: "green".to_string(), + options: uploader_options, + multiSelect: false, + }, + ChannelOption { + id: "filter".to_string(), + title: "Hot Searches".to_string(), + description: "Popular VJAV search shortcuts from the site suggester.".to_string(), + systemImage: "magnifyingglass".to_string(), + colorName: "purple".to_string(), + options: filter_options, + multiSelect: false, + }, + ], + nsfw: true, + cacheDuration: Some(60), + } + } + + fn normalize_title(value: &str) -> String { + let mut normalized = String::new(); + let mut last_space = true; + + for character in value.chars().flat_map(|character| character.to_lowercase()) { + if character.is_alphanumeric() { + normalized.push(character); + last_space = false; + } else if !last_space { + normalized.push(' '); + last_space = true; + } + } + + normalized.trim().to_string() + } + + fn push_filter_option(target: &Arc>>, option: FilterOption) { + let Ok(mut values) = target.write() else { + return; + }; + + if option.id.trim().is_empty() || option.title.trim().is_empty() { + return; + } + + if let Some(existing) = values.iter_mut().find(|value| value.id == option.id) { + if existing.title != option.title { + existing.title = option.title; + } + return; + } + + values.push(option); + } + + fn store_tag_mapping( + tag_ids: &Arc>>, + tag_targets: &Arc>>, + title: &str, + slug: &str, + category_id: &str, + ) { + let normalized_title = Self::normalize_title(title); + if normalized_title.is_empty() || slug.trim().is_empty() || category_id.trim().is_empty() { + return; + } + + if let Ok(mut values) = tag_ids.write() { + values.insert(normalized_title.clone(), category_id.to_string()); + } + + if let Ok(mut values) = tag_targets.write() { + values.insert(normalized_title.clone(), slug.to_string()); + let slug_key = Self::normalize_title(&slug.replace('-', " ")); + if !slug_key.is_empty() { + values.insert(slug_key, slug.to_string()); + } + } + } + + fn store_lookup(lookup: &Arc>>, key: &str, value: &str) { + let normalized = Self::normalize_title(key); + if normalized.is_empty() || value.trim().is_empty() { + return; + } + + if let Ok(mut values) = lookup.write() { + values.insert(normalized, value.to_string()); + } + } + + async fn load_categories( + tags: Arc>>, + tag_ids: Arc>>, + tag_targets: Arc>>, + ) -> Result<()> { + let mut requester = Requester::new(); + let text = requester + .get( + "https://vjav.com/api/json/categories/14400/str.all.en.json", + None, + ) + .await + .map_err(|error| Error::from(format!("load categories request failed: {error}")))?; + let response: CategoriesResponse = serde_json::from_str(&text)?; + + for category in response.categories { + if category.dir.trim().is_empty() || category.title.trim().is_empty() { + continue; + } + + Self::push_filter_option( + &tags, + FilterOption { + id: category.dir.clone(), + title: category.title.clone(), + }, + ); + Self::store_tag_mapping( + &tag_ids, + &tag_targets, + &category.title, + &category.dir, + &category.category_id, + ); + } + + Ok(()) + } + + async fn load_hot_searches( + hot_searches: Arc>>, + hot_search_lookup: Arc>>, + ) -> Result<()> { + let mut requester = Requester::new(); + let text = requester + .get("https://vjav.com/api//json/suggester/43200/1000.json", None) + .await + .map_err(|error| Error::from(format!("load suggester request failed: {error}")))?; + let response: SuggesterResponse = serde_json::from_str(&text)?; + + for keyword in response.keywords { + if keyword.trim().is_empty() { + continue; + } + + Self::push_filter_option( + &hot_searches, + FilterOption { + id: keyword.clone(), + title: keyword.clone(), + }, + ); + Self::store_lookup(&hot_search_lookup, &keyword, &keyword); + } + + Ok(()) + } + + async fn load_uploaders( + uploaders: Arc>>, + uploader_lookup: Arc>>, + ) -> Result<()> { + let mut requester = Requester::new(); + + for page in 1..=INITIAL_UPLOADER_PAGES { + let url = format!( + "{BASE_URL}/api/json/members/86400/all/filt......./count-videos/96/{page}.json" + ); + let text = requester + .get(&url, None) + .await + .map_err(|error| Error::from(format!("load uploaders page {page} failed: {error}")))?; + let response: MembersResponse = serde_json::from_str(&text)?; + if response.members.is_empty() { + break; + } + + for member in response.members { + if Self::parse_u64(&member.statistics.videos).unwrap_or_default() == 0 { + continue; + } + Self::push_filter_option( + &uploaders, + FilterOption { + id: member.user_id.clone(), + title: member.username.clone(), + }, + ); + Self::store_lookup(&uploader_lookup, &member.username, &member.user_id); + } + } + + Ok(()) + } + + async fn ensure_filters_loaded(&self) { + let tags_loaded = self.tags.read().map(|values| values.len()).unwrap_or_default() > 1; + let hot_searches_loaded = self + .hot_searches + .read() + .map(|values| values.len()) + .unwrap_or_default() + > 1; + let uploaders_loaded = self + .uploaders + .read() + .map(|values| values.len()) + .unwrap_or_default() + > 1; + + if !tags_loaded { + if let Err(error) = Self::load_categories( + Arc::clone(&self.tags), + Arc::clone(&self.tag_ids), + Arc::clone(&self.tag_targets), + ) + .await + { + report_provider_error_background( + CHANNEL_ID, + "ensure_filters_loaded.categories", + &error.to_string(), + ); + } + } + + if !hot_searches_loaded { + if let Err(error) = Self::load_hot_searches( + Arc::clone(&self.hot_searches), + Arc::clone(&self.hot_search_lookup), + ) + .await + { + report_provider_error_background( + CHANNEL_ID, + "ensure_filters_loaded.hot_searches", + &error.to_string(), + ); + } + } + + if !uploaders_loaded { + if let Err(error) = Self::load_uploaders( + Arc::clone(&self.uploaders), + Arc::clone(&self.uploader_lookup), + ) + .await + { + report_provider_error_background( + CHANNEL_ID, + "ensure_filters_loaded.uploaders", + &error.to_string(), + ); + } + } + } + + fn site_sort(sort: &str) -> &'static str { + match sort { + "popular" => "most-popular", + "views" => "most-viewed", + "top" => "top-rated", + "long" => "longest", + "commented" => "most-commented", + _ => "latest-updates", + } + } + + fn resolve_option_target(&self, options: &ServerOptions) -> Option { + if let Some(user_id) = options.stars.as_deref() { + if user_id != "all" && !user_id.trim().is_empty() { + return Some(Target::Uploader { + user_id: user_id.to_string(), + }); + } + } + + if let Some(slug) = options.categories.as_deref() { + if slug != "all" && !slug.trim().is_empty() { + return Some(Target::Tag { + slug: slug.to_string(), + }); + } + } + + if let Some(query) = options.filter.as_deref() { + if query != "all" && !query.trim().is_empty() { + let key = Self::normalize_title(query); + if let Ok(values) = self.hot_search_lookup.read() { + if let Some(search) = values.get(&key) { + return Some(Target::Search { + query: search.clone(), + }); + } + } + } + } + + None + } + + fn resolve_query_target(&self, query: &str) -> Option { + let key = Self::normalize_title(query); + if key.is_empty() { + return None; + } + + if let Ok(values) = self.uploader_lookup.read() { + if let Some(user_id) = values.get(&key) { + return Some(Target::Uploader { + user_id: user_id.clone(), + }); + } + } + + if let Ok(values) = self.tag_targets.read() { + if let Some(slug) = values.get(&key) { + return Some(Target::Tag { slug: slug.clone() }); + } + } + + if let Ok(values) = self.hot_search_lookup.read() { + if let Some(search) = values.get(&key) { + return Some(Target::Search { + query: search.clone(), + }); + } + } + + None + } + + async fn search_uploader_target( + &self, + query: &str, + options: &ServerOptions, + ) -> Option { + let encoded_query: String = url::form_urlencoded::byte_serialize(query.as_bytes()).collect(); + let url = format!( + "{BASE_URL}/api/json/members/86400/all/filt......./recent/30/1.json?search={encoded_query}" + ); + let mut requester = + requester_or_default(options, CHANNEL_ID, "search_uploader_target.request"); + let text = requester.get(&url, None).await.ok()?; + let response: MembersResponse = serde_json::from_str(&text).ok()?; + + let normalized_query = Self::normalize_title(query); + let exact = response.members.into_iter().find(|member| { + Self::normalize_title(&member.username) == normalized_query + || Self::normalize_title(&member.user_id) == normalized_query + })?; + + Self::push_filter_option( + &self.uploaders, + FilterOption { + id: exact.user_id.clone(), + title: exact.username.clone(), + }, + ); + Self::store_lookup(&self.uploader_lookup, &exact.username, &exact.user_id); + + Some(Target::Uploader { + user_id: exact.user_id, + }) + } + + async fn pick_target( + &self, + query: Option<&str>, + options: &ServerOptions, + ) -> Target { + if let Some(query) = query { + if let Some(target) = self.resolve_query_target(query) { + return target; + } + + self.ensure_filters_loaded().await; + + if let Some(target) = self.resolve_query_target(query) { + return target; + } + + if let Some(target) = self.search_uploader_target(query, options).await { + return target; + } + + return Target::Search { + query: query.to_string(), + }; + } + + if let Some(target) = self.resolve_option_target(options) { + return target; + } + + Target::Videos + } + + fn build_list_api_url( + &self, + target: &Target, + sort: &str, + page: u16, + per_page: usize, + ) -> String { + let page = page.max(1); + let per_page = per_page.clamp(1, MAX_PER_PAGE); + let site_sort = Self::site_sort(sort); + + match target { + Target::Videos => format!( + "{BASE_URL}/api/json/videos2/86400/str/{site_sort}/{per_page}/..{page}.all...json" + ), + Target::Tag { slug } => format!( + "{BASE_URL}/api/json/videos2/86400/str/{site_sort}/{per_page}/categories.{slug}.{page}.all...json" + ), + Target::Uploader { user_id } => format!( + "{BASE_URL}/api/json/videos2/14400/str/{site_sort}/{per_page}/uploaded_videos.{user_id}.{page}.all...json" + ), + Target::Search { query } => { + let encoded_query: String = + url::form_urlencoded::byte_serialize(query.as_bytes()).collect(); + format!( + "{BASE_URL}/api/videos2.php?params=86400/str/relevance/{per_page}/search..{page}.all..&s={encoded_query}" + ) + } + } + } + + async fn fetch_list( + &self, + target: &Target, + sort: &str, + page: u16, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let url = self.build_list_api_url(target, sort, page, per_page); + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_list"); + let text = requester + .get(&url, None) + .await + .map_err(|error| Error::from(format!("list request failed for {url}: {error}")))?; + let response: VideosListResponse = serde_json::from_str(&text)?; + Ok(response.videos) + } + + fn parse_u64(value: &str) -> Option { + let digits: String = value.chars().filter(|character| character.is_ascii_digit()).collect(); + if digits.is_empty() { + return None; + } + digits.parse::().ok() + } + + fn parse_u32(value: &str) -> Option { + Self::parse_u64(value).and_then(|value| u32::try_from(value).ok()) + } + + fn parse_rating_percent(value: &str) -> Option { + let trimmed = value.trim(); + if trimmed.is_empty() { + return None; + } + + let parsed = trimmed.parse::().ok()?; + if parsed <= 5.0 { + Some((parsed * 20.0).clamp(0.0, 100.0)) + } else { + Some(parsed.clamp(0.0, 100.0)) + } + } + + fn parse_uploaded_at(value: &str) -> Option { + NaiveDateTime::parse_from_str(value.trim(), "%Y-%m-%d %H:%M:%S") + .ok() + .map(|value| value.and_utc().timestamp()) + .and_then(|timestamp| u64::try_from(timestamp).ok()) + } + + fn parse_aspect_ratio(dimensions: &str) -> Option { + let (width, height) = dimensions.split_once('x')?; + let width = width.trim().parse::().ok()?; + let height = height.trim().parse::().ok()?; + (height > 0.0).then_some(width / height) + } + + fn parse_category_tags(value: &str) -> Vec { + value + .split(',') + .filter_map(|entry| { + let mut parts = entry.split('|'); + let slug = parts.next()?.trim().to_string(); + let id = parts.next()?.trim().to_string(); + let title = parts.next()?.trim().to_string(); + if slug.is_empty() || id.is_empty() || title.is_empty() { + return None; + } + Some(ParsedCategoryTag { id, title, slug }) + }) + .collect() + } + + fn split_csv_titles(value: &str) -> Vec { + value + .split(',') + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) + .collect() + } + + fn combine_tags( + &self, + seed: &ListVideo, + detail: &DetailResponse, + ) -> Vec { + let mut combined = Vec::new(); + let mut seen = HashSet::new(); + + for entry in Self::parse_category_tags(&seed.categories_data) { + Self::store_tag_mapping( + &self.tag_ids, + &self.tag_targets, + &entry.title, + &entry.slug, + &entry.id, + ); + + let key = Self::normalize_title(&entry.title); + if !key.is_empty() && seen.insert(key) { + combined.push(entry.title); + } + } + + for value in Self::split_csv_titles(&seed.categories) { + let key = Self::normalize_title(&value); + if !key.is_empty() && seen.insert(key) { + combined.push(value); + } + } + + for value in Self::split_csv_titles(&seed.tags) { + let key = Self::normalize_title(&value); + if !key.is_empty() && seen.insert(key) { + combined.push(value); + } + } + + for value in Self::split_csv_titles(&seed.models) { + let key = Self::normalize_title(&value); + if !key.is_empty() && seen.insert(key) { + combined.push(value); + } + } + + for category in detail.video.categories.values() { + let key = Self::normalize_title(&category.title); + if !key.is_empty() && seen.insert(key) { + combined.push(category.title.clone()); + } + } + + let related_searches = detail + .video + .related_searches + .as_ref() + .or(detail.related_searches.as_ref()) + .cloned() + .unwrap_or_default(); + for value in related_searches.into_iter().take(8) { + let key = Self::normalize_title(&value); + if !key.is_empty() && seen.insert(key) { + combined.push(value); + } + } + + combined + } + + fn video_page_url(&self, video_id: &str, slug: &str) -> String { + format!("{}/videos/{video_id}/{slug}/", self.url) + } + + fn uploader_page_url(&self, user_id: &str) -> String { + format!("{}/members/{user_id}/uploaded_videos/1/", self.url) + } + + fn detail_api_url(&self, video_id: &str) -> Result { + let numeric_id = video_id + .parse::() + .map_err(|error| Error::from(format!("invalid video id `{video_id}`: {error}")))?; + let bucket = numeric_id / 1000 * 1000; + Ok(format!( + "{}/api/json/video/86400/0/{bucket}/{video_id}.json", + self.url + )) + } + + async fn fetch_detail( + &self, + video_id: &str, + options: &ServerOptions, + ) -> Result { + let url = self.detail_api_url(video_id)?; + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_detail"); + let text = requester + .get(&url, None) + .await + .map_err(|error| Error::from(format!("detail request failed for {url}: {error}")))?; + Ok(serde_json::from_str(&text)?) + } + + async fn fetch_videofile_entries( + &self, + video_id: &str, + options: &ServerOptions, + ) -> Result> { + let url = format!("{}/api/videofile.php?video_id={video_id}&lifetime=8640000", self.url); + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_videofile_entries"); + let text = requester + .get(&url, None) + .await + .map_err(|error| Error::from(format!("videofile request failed for {url}: {error}")))?; + Ok(serde_json::from_str(&text)?) + } + + fn decode_obfuscated_base64(value: &str) -> String { + value + .chars() + .map(|character| match character { + 'А' => 'A', + 'В' => 'B', + 'Е' => 'E', + 'К' => 'K', + 'М' => 'M', + 'Н' => 'H', + 'О' => 'O', + 'Р' => 'P', + 'С' => 'C', + 'Т' => 'T', + 'Х' => 'X', + 'а' => 'a', + 'е' => 'e', + 'о' => 'o', + 'р' => 'p', + 'с' => 'c', + 'у' => 'y', + 'х' => 'x', + 'к' => 'k', + 'м' => 'm', + 'і' => 'i', + 'І' => 'I', + _ => character, + }) + .collect() + } + + fn decode_base64ish(value: &str) -> Result { + let mut normalized = value.trim().replace('~', "="); + while normalized.len() % 4 != 0 { + normalized.push('='); + } + Ok(String::from_utf8(STANDARD.decode(normalized)?)?) + } + + fn decode_video_url(&self, value: &str) -> Result { + let normalized = Self::decode_obfuscated_base64(value); + if normalized.contains(',') { + let mut parts = normalized.split(','); + let path_part = parts + .next() + .ok_or_else(|| Error::from("missing path segment".to_string()))?; + let query_part = parts + .next() + .ok_or_else(|| Error::from("missing query segment".to_string()))?; + + let path = Self::decode_base64ish(path_part)?; + let query = Self::decode_base64ish(query_part)?; + let separator = if path.contains('?') { "&" } else { "?" }; + return Ok(format!("{}{path}{separator}{query}&f=video.m3u8", self.url)); + } + + let decoded = Self::decode_base64ish(&normalized)?; + if decoded.starts_with("http://") || decoded.starts_with("https://") { + return Ok(decoded); + } + if decoded.starts_with('/') { + return Ok(format!("{}{}", self.url, decoded)); + } + Err(Error::from("unsupported video url payload".to_string())) + } + + fn format_quality_label(format: &str) -> String { + format + .trim() + .trim_start_matches('_') + .trim_end_matches(".mp4") + .replace('_', " ") + } + + fn canonical_uploader_id(user_id: &str) -> String { + format!("{CHANNEL_ID}:{user_id}") + } + + fn absolute_avatar(avatar: &str) -> Option { + let trimmed = avatar.trim(); + if trimmed.is_empty() { + return None; + } + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + return Some(trimmed.to_string()); + } + Some(format!("{BASE_URL}/{}", trimmed.trim_start_matches('/'))) + } + + fn store_uploader_aliases(&self, user_id: &str, aliases: &[String]) { + for alias in aliases { + Self::store_lookup(&self.uploader_lookup, alias, user_id); + } + } + + async fn enrich_video( + &self, + seed: ListVideo, + options: &ServerOptions, + ) -> Result { + let detail = self.fetch_detail(&seed.video_id, options).await?; + let videofile_entries = self.fetch_videofile_entries(&seed.video_id, options).await?; + + let mut formats = Vec::new(); + let mut default_url = None; + + for entry in videofile_entries { + if entry.video_url.trim().is_empty() { + continue; + } + + let url = self.decode_video_url(&entry.video_url)?; + if default_url.is_none() || entry.is_default == 1 { + default_url = Some(url.clone()); + } + + let quality = Self::format_quality_label(&entry.format); + let format = VideoFormat::new(url, quality.clone(), "hls".to_string()) + .format_id(entry.format.clone()) + .format_note(quality); + formats.push(format); + } + + let media_url = default_url + .or_else(|| formats.first().map(|format| format.url.clone())) + .ok_or_else(|| Error::from("no playable media url".to_string()))?; + + let title = if detail.video.title.trim().is_empty() { + seed.title.clone() + } else { + detail.video.title.clone() + }; + let slug = if detail.video.dir.trim().is_empty() { + seed.dir.clone() + } else { + detail.video.dir.clone() + }; + let thumb = if detail.video.thumb.trim().is_empty() { + seed.scr.clone() + } else { + detail.video.thumb.clone() + }; + let preview = if detail.video.thumbsrc.trim().is_empty() { + None + } else { + Some(detail.video.thumbsrc.clone()) + }; + let uploader_user_id = if detail.video.user.id.trim().is_empty() { + seed.user_id.clone() + } else { + detail.video.user.id.clone() + }; + let uploader_name = [ + seed.display_name.trim(), + detail.video.user.username.trim(), + seed.username.trim(), + ] + .into_iter() + .find(|value| !value.is_empty()) + .unwrap_or("") + .to_string(); + + self.store_uploader_aliases( + &uploader_user_id, + &[ + uploader_name.clone(), + detail.video.user.username.clone(), + seed.username.clone(), + ], + ); + Self::push_filter_option( + &self.uploaders, + FilterOption { + id: uploader_user_id.clone(), + title: if uploader_name.is_empty() { + uploader_user_id.clone() + } else { + uploader_name.clone() + }, + }, + ); + + let mut item = VideoItem::new( + seed.video_id.clone(), + title, + media_url, + CHANNEL_ID.to_string(), + thumb, + parse_time_to_seconds(&detail.video.duration) + .or_else(|| parse_time_to_seconds(&seed.duration)) + .and_then(|value| u32::try_from(value).ok()) + .unwrap_or_default(), + ); + + item.views = Self::parse_u32(&detail.video.statistics.viewed) + .or_else(|| Self::parse_u32(&seed.video_viewed)); + item.rating = Self::parse_rating_percent(&detail.video.statistics.rating) + .or_else(|| Self::parse_rating_percent(&seed.rating)); + item.uploader = (!uploader_name.is_empty()).then_some(uploader_name); + item.uploaderUrl = (!uploader_user_id.is_empty()).then_some(self.uploader_page_url(&uploader_user_id)); + item.uploaderId = (!uploader_user_id.is_empty()) + .then_some(Self::canonical_uploader_id(&uploader_user_id)); + item.tags = { + let tags = self.combine_tags(&seed, &detail); + (!tags.is_empty()).then_some(tags) + }; + item.uploadedAt = Self::parse_uploaded_at(&detail.video.post_date) + .or_else(|| Self::parse_uploaded_at(&seed.post_date)); + item.formats = (!formats.is_empty()).then_some(formats); + item.preview = preview; + item.aspectRatio = Self::parse_aspect_ratio(&seed.file_dimensions); + item.verified = None; + item.embed = None; + + let detail_page_url = self.video_page_url(&seed.video_id, &slug); + if item.url.trim().is_empty() { + item.url = detail_page_url; + } + + Ok(item) + } + + async fn fetch_target_items( + &self, + target: Target, + sort: &str, + page: u16, + per_page: usize, + options: &ServerOptions, + ) -> Result> { + let seeds = self.fetch_list(&target, sort, page, per_page, options).await?; + let options = options.clone(); + + Ok(stream::iter(seeds.into_iter().map(|seed| { + let provider = self.clone(); + let options = options.clone(); + async move { + match provider.enrich_video(seed, &options).await { + Ok(item) => Some(item), + Err(error) => { + report_provider_error_background( + CHANNEL_ID, + "fetch_target_items.enrich_video", + &error.to_string(), + ); + None + } + } + } + })) + .buffer_unordered(ENRICH_CONCURRENCY) + .filter_map(async move |value| value) + .collect::>() + .await) + } + + async fn resolve_uploader_id( + &self, + uploader_id: Option<&str>, + uploader_name: Option<&str>, + options: &ServerOptions, + ) -> Option { + if let Some(uploader_id) = uploader_id { + let local_id = uploader_id + .strip_prefix(&format!("{CHANNEL_ID}:")) + .unwrap_or(uploader_id) + .trim(); + if !local_id.is_empty() { + return Some(local_id.to_string()); + } + } + + let uploader_name = uploader_name?.trim(); + if uploader_name.is_empty() { + return None; + } + + if let Some(Target::Uploader { user_id }) = self.resolve_query_target(uploader_name) { + return Some(user_id); + } + + self.ensure_filters_loaded().await; + if let Some(Target::Uploader { user_id }) = self.resolve_query_target(uploader_name) { + return Some(user_id); + } + + match self.search_uploader_target(uploader_name, options).await { + Some(Target::Uploader { user_id }) => Some(user_id), + _ => None, + } + } + + async fn fetch_member_profile( + &self, + user_id: &str, + options: &ServerOptions, + ) -> Result { + let url = format!("{}/api/member.php", self.url); + let body = format!("section=user&user_id={user_id}"); + let headers = vec![("Content-Type", "application/x-www-form-urlencoded")]; + + let mut requester = requester_or_default(options, CHANNEL_ID, "fetch_member_profile"); + let response = requester + .post(&url, &body, headers) + .await + .map_err(|error| Error::from(format!("member profile request failed: {error}")))?; + let text = response + .text() + .await + .map_err(|error| Error::from(format!("member profile body failed: {error}")))?; + Ok(serde_json::from_str(&text)?) + } + + fn profile_bio(profile: &MemberProfile) -> Option { + let mut parts = Vec::new(); + if !profile.about_me.trim().is_empty() { + parts.push(profile.about_me.trim().to_string()); + } + if !profile.interests.trim().is_empty() { + parts.push(profile.interests.trim().to_string()); + } + if !profile.occupation.trim().is_empty() { + parts.push(format!("Occupation: {}", profile.occupation.trim())); + } + if !profile.country.trim().is_empty() { + parts.push(format!("Country: {}", profile.country.trim().to_uppercase())); + } + (!parts.is_empty()).then(|| parts.join("\n")) + } + + async fn build_uploader_profile( + &self, + user_id: &str, + requested_name: Option<&str>, + query: Option<&str>, + profile_content: bool, + options: &ServerOptions, + ) -> Result> { + let response = self.fetch_member_profile(user_id, options).await?; + if response.success != 1 || response.user.user_id.trim().is_empty() { + return Ok(None); + } + + let resolved_name = requested_name + .filter(|value| !value.trim().is_empty()) + .map(ToOwned::to_owned) + .unwrap_or_else(|| response.user.username.clone()); + self.store_uploader_aliases( + &response.user.user_id, + &[resolved_name.clone(), response.user.username.clone()], + ); + + let mut videos = None; + let mut layout = vec![UploaderLayoutRow::videos(Some("Uploads".to_string()))]; + + if profile_content { + let items = self + .fetch_target_items( + Target::Uploader { + user_id: response.user.user_id.clone(), + }, + "new", + 1, + 24, + options, + ) + .await?; + + let canonical_id = Self::canonical_uploader_id(&response.user.user_id); + let filtered_items = if let Some(query) = query.filter(|value| !value.trim().is_empty()) { + let normalized_query = Self::normalize_title(query); + items.into_iter() + .filter(|item| { + let haystack = format!( + "{} {}", + item.title, + item.tags.as_ref().map(|values| values.join(" ")).unwrap_or_default() + ); + Self::normalize_title(&haystack).contains(&normalized_query) + }) + .collect::>() + } else { + items + }; + + let refs = filtered_items + .iter() + .map(|item| UploaderVideoRef::from_video_item(item, &resolved_name, &canonical_id)) + .collect::>(); + let featured_ids = refs + .iter() + .take(10) + .map(|item| item.id.clone()) + .collect::>(); + + if !featured_ids.is_empty() { + layout.insert( + 0, + UploaderLayoutRow::horizontal(Some("For You".to_string()), featured_ids), + ); + } + + videos = Some(refs); + } + + Ok(Some(UploaderProfile { + id: Self::canonical_uploader_id(&response.user.user_id), + name: resolved_name, + url: Some(self.uploader_page_url(&response.user.user_id)), + channel: Some(CHANNEL_ID.to_string()), + verified: response.user.is_trusted == "1", + videoCount: Self::parse_u64(&response.user.statistics.videos).unwrap_or_default(), + totalViews: Self::parse_u64(&response.user.statistics.viewed).unwrap_or_default(), + channels: Some(vec![UploaderChannelStat { + channel: CHANNEL_ID.to_string(), + videoCount: Self::parse_u64(&response.user.statistics.videos).unwrap_or_default(), + firstSeenAt: None, + lastSeenAt: None, + }]), + avatar: Self::absolute_avatar(&response.user.avatar), + description: None, + bio: Self::profile_bio(&response.user), + videos, + tapes: Some(vec![]), + playlists: Some(vec![]), + layout: Some(layout), + })) + } +} + +#[async_trait] +impl Provider for VjavProvider { + async fn get_videos( + &self, + cache: VideoCache, + pool: DbPool, + sort: String, + query: Option, + page: String, + per_page: String, + options: ServerOptions, + ) -> Vec { + let _ = cache; + let _ = pool; + + let page = page.parse::().unwrap_or(1); + let per_page = per_page + .parse::() + .unwrap_or(DEFAULT_PER_PAGE) + .clamp(1, MAX_PER_PAGE); + let normalized_query = query + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned); + let target = self.pick_target(normalized_query.as_deref(), &options).await; + + match self + .fetch_target_items(target, &sort, page, per_page, &options) + .await + { + Ok(items) => items, + Err(error) => { + report_provider_error(CHANNEL_ID, "get_videos", &error.to_string()).await; + vec![] + } + } + } + + fn get_channel(&self, clientversion: ClientVersion) -> Option { + Some(self.build_channel(clientversion)) + } + + async fn get_uploader( + &self, + cache: VideoCache, + pool: DbPool, + uploader_id: Option, + uploader_name: Option, + query: Option, + profile_content: bool, + options: ServerOptions, + ) -> std::result::Result, String> { + let _ = cache; + let _ = pool; + + let Some(user_id) = self + .resolve_uploader_id( + uploader_id.as_deref(), + uploader_name.as_deref(), + &options, + ) + .await + else { + return Ok(None); + }; + + self.build_uploader_profile( + &user_id, + uploader_name.as_deref(), + query.as_deref(), + profile_content, + &options, + ) + .await + .map_err(|error| error.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn provider() -> VjavProvider { + VjavProvider { + url: BASE_URL.to_string(), + tags: Arc::new(RwLock::new(vec![ + FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }, + FilterOption { + id: "japanese".to_string(), + title: "Japanese".to_string(), + }, + ])), + hot_searches: Arc::new(RwLock::new(vec![ + FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }, + FilterOption { + id: "Teacher".to_string(), + title: "Teacher".to_string(), + }, + ])), + uploaders: Arc::new(RwLock::new(vec![ + FilterOption { + id: "all".to_string(), + title: "All".to_string(), + }, + FilterOption { + id: "335627".to_string(), + title: "bata167".to_string(), + }, + ])), + tag_ids: Arc::new(RwLock::new(HashMap::from([( + "japanese".to_string(), + "5".to_string(), + )]))), + tag_targets: Arc::new(RwLock::new(HashMap::from([( + "japanese".to_string(), + "japanese".to_string(), + )]))), + hot_search_lookup: Arc::new(RwLock::new(HashMap::from([( + "teacher".to_string(), + "Teacher".to_string(), + )]))), + uploader_lookup: Arc::new(RwLock::new(HashMap::from([( + "bata167".to_string(), + "335627".to_string(), + )]))), + } + } + + #[test] + fn decodes_obfuscated_video_url() { + let provider = provider(); + let url = provider + .decode_video_url("L2dldF9maWxlLzМvМDc0ZjczY2Q2Zjg2МjQ2ZTRhМjkwМDFiYWUwYzljODМzZTlkN2FkY2U4LzgxNjАwМС84МTYyNTcvODЕ2МjU3X2hxLm1wNС8,ZD0yМzUzJmJyPTЕ4NSZ0aT0xNzc1МjQxODUx") + .expect("decodes"); + + assert!(url.starts_with("https://vjav.com/get_file/")); + assert!(url.contains("video.m3u8")); + assert!(url.contains("ti=")); + + let legacy = provider + .decode_video_url("aHR0cHМ6Ly92aWRlb3R4eHguY29tL2V4dС9nZXRfZmlsZS85LzZlМzkwNmМwYWNkNDRiZDМzNWVmYTЕ5ZTQzODY0МGМ1ODZlNjY5YmU4NS8zOTgwМDАvМzk4МTМxLzМ5ODЕzМV9ocS5tcDQvP2Q9NzQ1МСZicj0xМDМmdGk9МTc3NTI0МjkzМw~~") + .expect("decodes legacy"); + assert!(legacy.starts_with("https://videotxxx.com/")); + } + + #[test] + fn builds_archive_urls() { + let provider = provider(); + + assert_eq!( + provider.build_list_api_url(&Target::Videos, "new", 2, 30), + "https://vjav.com/api/json/videos2/86400/str/latest-updates/30/..2.all...json" + ); + assert_eq!( + provider.build_list_api_url( + &Target::Tag { + slug: "japanese".to_string(), + }, + "views", + 3, + 12, + ), + "https://vjav.com/api/json/videos2/86400/str/most-viewed/12/categories.japanese.3.all...json" + ); + assert_eq!( + provider.build_list_api_url( + &Target::Uploader { + user_id: "335627".to_string(), + }, + "top", + 1, + 5, + ), + "https://vjav.com/api/json/videos2/14400/str/top-rated/5/uploaded_videos.335627.1.all...json" + ); + } + + #[test] + fn resolves_exact_query_targets() { + let provider = provider(); + + match provider.resolve_query_target("bata167") { + Some(Target::Uploader { user_id }) => assert_eq!(user_id, "335627"), + _ => panic!("expected uploader target"), + } + + match provider.resolve_query_target("Japanese") { + Some(Target::Tag { slug }) => assert_eq!(slug, "japanese"), + _ => panic!("expected tag target"), + } + + match provider.resolve_query_target("Teacher") { + Some(Target::Search { query }) => assert_eq!(query, "Teacher"), + _ => panic!("expected search target"), + } + } +} diff --git a/src/util/requester.rs b/src/util/requester.rs index e6bc359..34a08b6 100644 --- a/src/util/requester.rs +++ b/src/util/requester.rs @@ -172,7 +172,11 @@ impl Requester { Ok(response.text().await?) } - #[cfg(any(not(hottub_single_provider), hottub_provider = "hypnotube"))] + #[cfg(any( + not(hottub_single_provider), + hottub_provider = "hypnotube", + hottub_provider = "vjav", + ))] fn debug_cookie_preview_from_borrowed_headers( &self, url: &str, @@ -379,6 +383,7 @@ impl Requester { not(hottub_single_provider), hottub_provider = "hypnotube", hottub_provider = "freeuseporn", + hottub_provider = "vjav", ))] pub async fn post( &mut self,