uploaders
This commit is contained in:
@@ -4,6 +4,9 @@ use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
};
|
||||
use crate::status::*;
|
||||
use crate::uploaders::{
|
||||
UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef,
|
||||
};
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::requester::Requester;
|
||||
@@ -14,6 +17,7 @@ use chrono::{DateTime, Duration as ChronoDuration, NaiveDate, Utc};
|
||||
use error_chain::error_chain;
|
||||
use futures::stream::{self, StreamExt};
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
|
||||
use regex::Regex;
|
||||
use scraper::{ElementRef, Html, Selector};
|
||||
use std::sync::{Arc, RwLock};
|
||||
@@ -507,6 +511,263 @@ impl HsexProvider {
|
||||
}
|
||||
}
|
||||
|
||||
fn canonical_uploader_id(author: &str) -> String {
|
||||
format!(
|
||||
"{CHANNEL_ID}:{}",
|
||||
utf8_percent_encode(author, NON_ALPHANUMERIC)
|
||||
)
|
||||
}
|
||||
|
||||
fn author_from_uploader_id(value: &str) -> Option<String> {
|
||||
let suffix = match value.split_once(':') {
|
||||
Some((channel, suffix)) if channel.eq_ignore_ascii_case(CHANNEL_ID) => suffix,
|
||||
Some(_) => return None,
|
||||
None => value,
|
||||
};
|
||||
percent_decode_str(suffix)
|
||||
.decode_utf8()
|
||||
.ok()
|
||||
.map(|value| value.into_owned())
|
||||
.and_then(|value| (!value.trim().is_empty()).then_some(value))
|
||||
}
|
||||
|
||||
fn author_from_uploader_href(&self, href: &str) -> Option<String> {
|
||||
let url = Url::parse(&self.absolute_url(href)).ok()?;
|
||||
url.query_pairs()
|
||||
.find(|(key, _)| key == "author")
|
||||
.map(|(_, value)| value.to_string())
|
||||
}
|
||||
|
||||
fn pagination_last_page(html: &str) -> Option<u16> {
|
||||
let regex = Regex::new(r#"user-(?P<page>\d+)\.htm\?author="#).ok()?;
|
||||
regex
|
||||
.captures_iter(html)
|
||||
.filter_map(|captures| captures.name("page")?.as_str().parse::<u16>().ok())
|
||||
.max()
|
||||
}
|
||||
|
||||
fn uploader_option_by_name(&self, uploader_name: &str) -> Option<FilterOption> {
|
||||
let normalized = uploader_name.trim();
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let lowered = normalized.to_lowercase();
|
||||
self.uploaders
|
||||
.read()
|
||||
.ok()?
|
||||
.iter()
|
||||
.find(|option| {
|
||||
option.title == normalized
|
||||
|| option.title.to_lowercase() == lowered
|
||||
|| option.id.eq_ignore_ascii_case(normalized)
|
||||
})
|
||||
.cloned()
|
||||
}
|
||||
|
||||
fn resolve_uploader_author(
|
||||
&self,
|
||||
uploader_id: Option<&str>,
|
||||
uploader_name: Option<&str>,
|
||||
) -> Option<String> {
|
||||
if let Some(uploader_id) = uploader_id {
|
||||
if let Some(author) = Self::author_from_uploader_id(uploader_id) {
|
||||
return Some(author);
|
||||
}
|
||||
}
|
||||
if let Some(uploader_name) = uploader_name {
|
||||
if let Some(option) = self.uploader_option_by_name(uploader_name) {
|
||||
if let Some(Target::Uploader { author }) = self.target_from_filter_id(&option.id) {
|
||||
return Some(author);
|
||||
}
|
||||
}
|
||||
let trimmed = uploader_name.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return Some(trimmed.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn display_name_for_uploader(
|
||||
&self,
|
||||
author: &str,
|
||||
requested_name: Option<&str>,
|
||||
first_page_items: &[VideoItem],
|
||||
) -> String {
|
||||
if let Some(requested_name) = requested_name {
|
||||
let trimmed = requested_name.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return trimmed.to_string();
|
||||
}
|
||||
}
|
||||
if let Some(name) = first_page_items
|
||||
.iter()
|
||||
.find_map(|item| item.uploader.as_deref())
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
{
|
||||
return name.to_string();
|
||||
}
|
||||
if let Some(option) = self
|
||||
.uploaders
|
||||
.read()
|
||||
.ok()
|
||||
.and_then(|values| {
|
||||
values
|
||||
.iter()
|
||||
.find(|value| value.id.contains(author) || value.title == author)
|
||||
.cloned()
|
||||
})
|
||||
{
|
||||
return option.title;
|
||||
}
|
||||
author.to_string()
|
||||
}
|
||||
|
||||
fn rank_videos_for_query(
|
||||
videos: &[UploaderVideoRef],
|
||||
query: Option<&str>,
|
||||
) -> Vec<UploaderVideoRef> {
|
||||
let Some(query) = query.map(|value| value.trim()).filter(|value| !value.is_empty()) else {
|
||||
return videos.to_vec();
|
||||
};
|
||||
let query = query.to_lowercase();
|
||||
let mut ranked = videos.to_vec();
|
||||
ranked.sort_by(|a, b| {
|
||||
let score = |video: &UploaderVideoRef| {
|
||||
let mut score = 0u8;
|
||||
if video.title.to_lowercase().contains(&query) {
|
||||
score += 2;
|
||||
}
|
||||
if video.uploader.to_lowercase().contains(&query) {
|
||||
score += 1;
|
||||
}
|
||||
score
|
||||
};
|
||||
score(b)
|
||||
.cmp(&score(a))
|
||||
.then(b.views.cmp(&a.views))
|
||||
.then_with(|| a.id.cmp(&b.id))
|
||||
});
|
||||
ranked
|
||||
}
|
||||
|
||||
async fn build_uploader_profile(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
author: &str,
|
||||
requested_name: Option<&str>,
|
||||
query: Option<&str>,
|
||||
profile_content: bool,
|
||||
options: &ServerOptions,
|
||||
) -> Result<Option<UploaderProfile>> {
|
||||
let first_page_url = self.build_uploader_url(author, 1);
|
||||
let first_page_items = self
|
||||
.fetch_items_for_url(
|
||||
cache.clone(),
|
||||
first_page_url.clone(),
|
||||
64,
|
||||
profile_content,
|
||||
options,
|
||||
)
|
||||
.await?;
|
||||
if first_page_items.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut requester = requester_or_default(options, CHANNEL_ID, "get_uploader.profile_page");
|
||||
let first_page_html = self
|
||||
.fetch_html(&mut requester, &first_page_url, &format!("{}/", self.url))
|
||||
.await?;
|
||||
let last_page = Self::pagination_last_page(&first_page_html).unwrap_or(1);
|
||||
let first_page_size = first_page_items.len() as u64;
|
||||
|
||||
let last_page_items = if last_page > 1 {
|
||||
self.fetch_items_for_url(
|
||||
cache,
|
||||
self.build_uploader_url(author, last_page),
|
||||
64,
|
||||
false,
|
||||
options,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let display_name = self.display_name_for_uploader(author, requested_name, &first_page_items);
|
||||
let canonical_id = Self::canonical_uploader_id(author);
|
||||
let mut videos = first_page_items
|
||||
.iter()
|
||||
.map(|item| UploaderVideoRef::from_video_item(item, &display_name, &canonical_id))
|
||||
.collect::<Vec<_>>();
|
||||
let ranked_videos = Self::rank_videos_for_query(&videos, query);
|
||||
let horizontal_ids = ranked_videos
|
||||
.iter()
|
||||
.take(12)
|
||||
.map(|video| video.id.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let newest_seen = first_page_items
|
||||
.iter()
|
||||
.filter_map(|item| item.uploadedAt)
|
||||
.max();
|
||||
let oldest_seen = last_page_items
|
||||
.iter()
|
||||
.filter_map(|item| item.uploadedAt)
|
||||
.min()
|
||||
.or_else(|| first_page_items.iter().filter_map(|item| item.uploadedAt).min());
|
||||
|
||||
let video_count = if last_page > 1 {
|
||||
((last_page as u64 - 1) * first_page_size) + last_page_items.len() as u64
|
||||
} else {
|
||||
first_page_size
|
||||
};
|
||||
let total_views = first_page_items
|
||||
.iter()
|
||||
.chain(last_page_items.iter())
|
||||
.filter_map(|item| item.views)
|
||||
.map(u64::from)
|
||||
.sum();
|
||||
|
||||
for item in &mut videos {
|
||||
item.uploader = display_name.clone();
|
||||
item.uploaderId = canonical_id.clone();
|
||||
}
|
||||
|
||||
let layout = if horizontal_ids.is_empty() {
|
||||
vec![UploaderLayoutRow::videos(None)]
|
||||
} else {
|
||||
vec![
|
||||
UploaderLayoutRow::horizontal(Some("For You".to_string()), horizontal_ids),
|
||||
UploaderLayoutRow::videos(None),
|
||||
]
|
||||
};
|
||||
|
||||
Ok(Some(UploaderProfile {
|
||||
id: canonical_id,
|
||||
name: display_name,
|
||||
url: Some(first_page_url),
|
||||
channel: Some(CHANNEL_ID.to_string()),
|
||||
verified: false,
|
||||
videoCount: video_count,
|
||||
totalViews: total_views,
|
||||
channels: Some(vec![UploaderChannelStat {
|
||||
channel: CHANNEL_ID.to_string(),
|
||||
videoCount: video_count,
|
||||
firstSeenAt: crate::uploaders::iso_timestamp_from_unix(oldest_seen),
|
||||
lastSeenAt: crate::uploaders::iso_timestamp_from_unix(newest_seen),
|
||||
}]),
|
||||
avatar: None,
|
||||
description: None,
|
||||
bio: None,
|
||||
videos: profile_content.then_some(videos),
|
||||
tapes: profile_content.then_some(Vec::new()),
|
||||
playlists: profile_content.then_some(Vec::new()),
|
||||
layout: Some(layout),
|
||||
}))
|
||||
}
|
||||
|
||||
fn first_video_link<'a>(&self, element: &'a ElementRef<'a>) -> Result<Option<ElementRef<'a>>> {
|
||||
let selector = Self::selector("a[href]")?;
|
||||
Ok(element.select(&selector).find(|link| {
|
||||
@@ -636,10 +897,13 @@ impl HsexProvider {
|
||||
if !uploader_name.is_empty() {
|
||||
item.uploader = Some(uploader_name);
|
||||
}
|
||||
if let Some(uploader_href) = uploader.value().attr("href") {
|
||||
item.uploaderUrl = Some(self.absolute_url(uploader_href));
|
||||
}
|
||||
if let Some(uploader_href) = uploader.value().attr("href") {
|
||||
item.uploaderUrl = Some(self.absolute_url(uploader_href));
|
||||
item.uploaderId = self
|
||||
.author_from_uploader_href(uploader_href)
|
||||
.map(|author| Self::canonical_uploader_id(&author));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(info) = element.select(&info_selector).next() {
|
||||
let info_text = Self::decode_text(&Self::collapse_whitespace(
|
||||
@@ -701,6 +965,9 @@ impl HsexProvider {
|
||||
}
|
||||
if let Some(href) = author.value().attr("href") {
|
||||
item.uploaderUrl = Some(self.absolute_url(href));
|
||||
item.uploaderId = self
|
||||
.author_from_uploader_href(href)
|
||||
.map(|author| Self::canonical_uploader_id(&author));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -934,6 +1201,34 @@ impl Provider for HsexProvider {
|
||||
fn get_channel(&self, clientversion: ClientVersion) -> Option<Channel> {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
|
||||
async fn get_uploader(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
pool: DbPool,
|
||||
uploader_id: Option<String>,
|
||||
uploader_name: Option<String>,
|
||||
query: Option<String>,
|
||||
profile_content: bool,
|
||||
options: ServerOptions,
|
||||
) -> std::result::Result<Option<UploaderProfile>, String> {
|
||||
let _ = pool;
|
||||
let Some(author) =
|
||||
self.resolve_uploader_author(uploader_id.as_deref(), uploader_name.as_deref())
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
self.build_uploader_profile(
|
||||
cache,
|
||||
&author,
|
||||
uploader_name.as_deref(),
|
||||
query.as_deref(),
|
||||
profile_content,
|
||||
&options,
|
||||
)
|
||||
.await
|
||||
.map_err(|error| error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1020,6 +1315,29 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonical_uploader_id_round_trips() {
|
||||
let canonical = HsexProvider::canonical_uploader_id("xihongshiddd");
|
||||
assert_eq!(canonical, "hsex:xihongshiddd");
|
||||
assert_eq!(
|
||||
HsexProvider::author_from_uploader_id(&canonical).as_deref(),
|
||||
Some("xihongshiddd")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_last_page_from_pagination() {
|
||||
let html = r#"
|
||||
<ul class="pagination1">
|
||||
<li><a href="user-1.htm?author=xihongshiddd">1</a></li>
|
||||
<li><a href="user-2.htm?author=xihongshiddd">2</a></li>
|
||||
<li><a href="user-7.htm?author=xihongshiddd">7</a></li>
|
||||
</ul>
|
||||
"#;
|
||||
|
||||
assert_eq!(HsexProvider::pagination_last_page(html), Some(7));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[ignore]
|
||||
async fn fetches_page_two_items() {
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::{
|
||||
DbPool,
|
||||
api::ClientVersion,
|
||||
status::{Channel, ChannelGroup, ChannelView, FilterOption, Status, StatusResponse},
|
||||
uploaders::UploaderProfile,
|
||||
util::{cache::VideoCache, discord::send_discord_error_report, requester::Requester},
|
||||
videos::{FlexibleNumber, ServerOptions, VideoItem, VideosRequest},
|
||||
};
|
||||
@@ -577,6 +578,53 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_uploader_provider_guarded<F>(
|
||||
provider_name: &str,
|
||||
context: &str,
|
||||
fut: F,
|
||||
) -> Result<Option<UploaderProfile>, String>
|
||||
where
|
||||
F: Future<Output = Result<Option<UploaderProfile>, String>>,
|
||||
{
|
||||
crate::flow_debug!(
|
||||
"provider uploader guard enter provider={} context={}",
|
||||
provider_name,
|
||||
context
|
||||
);
|
||||
match AssertUnwindSafe(fut).catch_unwind().await {
|
||||
Ok(result) => {
|
||||
crate::flow_debug!(
|
||||
"provider uploader guard exit provider={} context={} matched={}",
|
||||
provider_name,
|
||||
context,
|
||||
result.as_ref().ok().and_then(|value| value.as_ref()).is_some()
|
||||
);
|
||||
result
|
||||
}
|
||||
Err(payload) => {
|
||||
let panic_msg = panic_payload_to_string(payload);
|
||||
crate::flow_debug!(
|
||||
"provider uploader guard panic provider={} context={} panic={}",
|
||||
provider_name,
|
||||
context,
|
||||
&panic_msg
|
||||
);
|
||||
let _ = send_discord_error_report(
|
||||
format!("Provider panic: {}", provider_name),
|
||||
None,
|
||||
Some("Provider Guard"),
|
||||
Some(&format!("context={}; panic={}", context, panic_msg)),
|
||||
file!(),
|
||||
line!(),
|
||||
module_path!(),
|
||||
)
|
||||
.await;
|
||||
schedule_provider_validation(provider_name, context, &panic_msg);
|
||||
Err(panic_msg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn report_provider_error(provider_name: &str, context: &str, msg: &str) {
|
||||
let _ = send_discord_error_report(
|
||||
format!("Provider error: {}", provider_name),
|
||||
@@ -868,6 +916,19 @@ pub trait Provider: Send + Sync {
|
||||
cacheDuration: None,
|
||||
})
|
||||
}
|
||||
|
||||
async fn get_uploader(
|
||||
&self,
|
||||
_cache: VideoCache,
|
||||
_pool: DbPool,
|
||||
_uploader_id: Option<String>,
|
||||
_uploader_name: Option<String>,
|
||||
_query: Option<String>,
|
||||
_profile_content: bool,
|
||||
_options: ServerOptions,
|
||||
) -> Result<Option<UploaderProfile>, String> {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, not(hottub_single_provider)))]
|
||||
|
||||
@@ -474,6 +474,19 @@ impl NoodlemagazineProvider {
|
||||
if normalized.is_empty() || !self.is_allowed_thumb_url(&normalized) {
|
||||
return String::new();
|
||||
}
|
||||
let Some(url) = Url::parse(&normalized).ok() else {
|
||||
return String::new();
|
||||
};
|
||||
if url
|
||||
.host_str()
|
||||
.is_some_and(|host| host.eq_ignore_ascii_case("img.pvvstream.pro"))
|
||||
{
|
||||
return crate::providers::build_proxy_url(
|
||||
_options,
|
||||
"noodlemagazine-thumb",
|
||||
&crate::providers::strip_url_scheme(&normalized),
|
||||
);
|
||||
}
|
||||
normalized
|
||||
}
|
||||
|
||||
@@ -707,7 +720,7 @@ mod tests {
|
||||
assert_eq!(items.len(), 1);
|
||||
assert_eq!(
|
||||
items[0].thumb,
|
||||
"https://img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l"
|
||||
"https://example.com/proxy/noodlemagazine-thumb/img.pvvstream.pro/preview/abc/-111_222/240/iv.okcdn.ru/getVideoPreview?id=1&type=39&fn=vid_l"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user