omgxx uploader
This commit is contained in:
@@ -1,6 +1,12 @@
|
||||
use crate::DbPool;
|
||||
use crate::api::ClientVersion;
|
||||
use crate::providers::{Provider, report_provider_error, report_provider_error_background};
|
||||
use crate::providers::{
|
||||
Provider, report_provider_error, report_provider_error_background, requester_or_default,
|
||||
};
|
||||
use crate::uploaders::{
|
||||
UploaderChannelStat, UploaderLayoutRow, UploaderProfile, UploaderVideoRef,
|
||||
iso_timestamp_from_unix,
|
||||
};
|
||||
use crate::util::cache::VideoCache;
|
||||
use crate::util::parse_abbreviated_number;
|
||||
use crate::util::time::parse_time_to_seconds;
|
||||
@@ -9,6 +15,8 @@ use crate::{status::*, util};
|
||||
use async_trait::async_trait;
|
||||
use error_chain::error_chain;
|
||||
use htmlentity::entity::{ICodedDataTrait, decode};
|
||||
use percent_encoding::{NON_ALPHANUMERIC, percent_decode_str, utf8_percent_encode};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread;
|
||||
@@ -34,6 +42,20 @@ pub struct OmgxxxProvider {
|
||||
networks: Arc<RwLock<Vec<FilterOption>>>,
|
||||
stars: Arc<RwLock<Vec<FilterOption>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum OmgUploaderTargetKind {
|
||||
Site,
|
||||
Network,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind,
|
||||
id: String,
|
||||
title: String,
|
||||
}
|
||||
|
||||
impl OmgxxxProvider {
|
||||
pub fn new() -> Self {
|
||||
let provider = OmgxxxProvider {
|
||||
@@ -418,6 +440,329 @@ impl OmgxxxProvider {
|
||||
}
|
||||
}
|
||||
|
||||
fn canonical_uploader_id(kind: &OmgUploaderTargetKind, id: &str) -> String {
|
||||
let kind = match kind {
|
||||
OmgUploaderTargetKind::Site => "site",
|
||||
OmgUploaderTargetKind::Network => "network",
|
||||
};
|
||||
format!(
|
||||
"omgxxx:{kind}:{}",
|
||||
utf8_percent_encode(id, NON_ALPHANUMERIC)
|
||||
)
|
||||
}
|
||||
|
||||
fn uploader_target_from_id(&self, uploader_id: &str) -> Option<OmgUploaderTarget> {
|
||||
let parts = uploader_id.split(':').collect::<Vec<_>>();
|
||||
if parts.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (kind, raw_id) = match parts.as_slice() {
|
||||
["omgxxx", kind, raw_id] => (*kind, *raw_id),
|
||||
["omgxxx", raw_id] => ("site", *raw_id),
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let decoded_id = percent_decode_str(raw_id).decode_utf8().ok()?.to_string();
|
||||
match kind {
|
||||
"site" => self
|
||||
.sites
|
||||
.read()
|
||||
.ok()?
|
||||
.iter()
|
||||
.find(|option| option.id == decoded_id)
|
||||
.map(|option| OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind::Site,
|
||||
id: option.id.clone(),
|
||||
title: option.title.clone(),
|
||||
})
|
||||
.or_else(|| {
|
||||
Some(OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind::Site,
|
||||
id: decoded_id.clone(),
|
||||
title: decoded_id.clone(),
|
||||
})
|
||||
}),
|
||||
"network" => self
|
||||
.networks
|
||||
.read()
|
||||
.ok()?
|
||||
.iter()
|
||||
.find(|option| option.id == decoded_id)
|
||||
.map(|option| OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind::Network,
|
||||
id: option.id.clone(),
|
||||
title: option.title.clone(),
|
||||
})
|
||||
.or_else(|| {
|
||||
Some(OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind::Network,
|
||||
id: decoded_id.clone(),
|
||||
title: decoded_id.clone(),
|
||||
})
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn uploader_target_from_name(&self, uploader_name: &str) -> Option<OmgUploaderTarget> {
|
||||
let normalized = uploader_name.trim();
|
||||
if normalized.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let lowered = normalized.to_ascii_lowercase();
|
||||
|
||||
if let Ok(sites) = self.sites.read() {
|
||||
if let Some(option) = sites.iter().find(|option| {
|
||||
option.title.eq_ignore_ascii_case(normalized)
|
||||
|| option.id.eq_ignore_ascii_case(normalized)
|
||||
|| option.title.to_ascii_lowercase() == lowered
|
||||
}) {
|
||||
return Some(OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind::Site,
|
||||
id: option.id.clone(),
|
||||
title: option.title.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(networks) = self.networks.read() {
|
||||
if let Some(option) = networks.iter().find(|option| {
|
||||
option.title.eq_ignore_ascii_case(normalized)
|
||||
|| option.id.eq_ignore_ascii_case(normalized)
|
||||
|| option.title.to_ascii_lowercase() == lowered
|
||||
}) {
|
||||
return Some(OmgUploaderTarget {
|
||||
kind: OmgUploaderTargetKind::Network,
|
||||
id: option.id.clone(),
|
||||
title: option.title.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn resolve_uploader_target(
|
||||
&self,
|
||||
uploader_id: Option<&str>,
|
||||
uploader_name: Option<&str>,
|
||||
) -> Option<OmgUploaderTarget> {
|
||||
uploader_id
|
||||
.and_then(|value| self.uploader_target_from_id(value))
|
||||
.or_else(|| uploader_name.and_then(|value| self.uploader_target_from_name(value)))
|
||||
}
|
||||
|
||||
fn uploader_target_url(&self, target: &OmgUploaderTarget, page: u8) -> String {
|
||||
let base = match target.kind {
|
||||
OmgUploaderTargetKind::Site => "sites",
|
||||
OmgUploaderTargetKind::Network => "networks",
|
||||
};
|
||||
format!("{}/{}/{}/{}/", self.url, base, target.id, page.max(1))
|
||||
}
|
||||
|
||||
fn uploader_target_last_page(&self, html: &str, target: &OmgUploaderTarget) -> Option<u8> {
|
||||
let base = match target.kind {
|
||||
OmgUploaderTargetKind::Site => "sites",
|
||||
OmgUploaderTargetKind::Network => "networks",
|
||||
};
|
||||
let pattern = format!(r#"/{}/{}/(?P<page>\d+)/"#, regex::escape(base), regex::escape(&target.id));
|
||||
let regex = Regex::new(&pattern).ok()?;
|
||||
regex
|
||||
.captures_iter(html)
|
||||
.filter_map(|captures| captures.name("page")?.as_str().parse::<u8>().ok())
|
||||
.max()
|
||||
}
|
||||
|
||||
fn display_name_for_uploader_target(
|
||||
&self,
|
||||
target: &OmgUploaderTarget,
|
||||
html: &str,
|
||||
first_page_items: &[VideoItem],
|
||||
) -> String {
|
||||
if !target.title.trim().is_empty() && target.title != target.id {
|
||||
return target.title.clone();
|
||||
}
|
||||
|
||||
if let Some(title) = html
|
||||
.split("<title>")
|
||||
.nth(1)
|
||||
.and_then(|segment| segment.split("</title>").next())
|
||||
.map(|title| decode(title.as_bytes()).to_string().unwrap_or_else(|_| title.to_string()))
|
||||
.map(|title| title.replace(" Porn! 😮 - OMG.XXX", ""))
|
||||
.map(|title| title.replace(" - OMG.XXX", ""))
|
||||
.map(|title| title.trim().to_string())
|
||||
.filter(|title| !title.is_empty())
|
||||
{
|
||||
return title;
|
||||
}
|
||||
|
||||
if let Some(site_name) = first_page_items.iter().find_map(|item| {
|
||||
item.title
|
||||
.strip_prefix('[')
|
||||
.and_then(|title| title.split(']').next())
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
}) {
|
||||
return site_name;
|
||||
}
|
||||
|
||||
target
|
||||
.id
|
||||
.split('-')
|
||||
.map(|part| {
|
||||
let mut chars = part.chars();
|
||||
match chars.next() {
|
||||
Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
|
||||
None => String::new(),
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
}
|
||||
|
||||
fn rank_uploader_videos(
|
||||
videos: &[UploaderVideoRef],
|
||||
query: Option<&str>,
|
||||
) -> Vec<UploaderVideoRef> {
|
||||
let Some(query) = query.map(str::trim).filter(|value| !value.is_empty()) else {
|
||||
return videos.to_vec();
|
||||
};
|
||||
let query = query.to_ascii_lowercase();
|
||||
let mut ranked = videos.to_vec();
|
||||
ranked.sort_by(|a, b| {
|
||||
let score = |video: &UploaderVideoRef| {
|
||||
let mut score = 0u8;
|
||||
if video.title.to_ascii_lowercase().contains(&query) {
|
||||
score += 2;
|
||||
}
|
||||
if video.uploader.to_ascii_lowercase().contains(&query) {
|
||||
score += 1;
|
||||
}
|
||||
score
|
||||
};
|
||||
score(b)
|
||||
.cmp(&score(a))
|
||||
.then(b.views.cmp(&a.views))
|
||||
.then_with(|| a.id.cmp(&b.id))
|
||||
});
|
||||
ranked
|
||||
}
|
||||
|
||||
async fn build_uploader_profile(
|
||||
&self,
|
||||
_cache: VideoCache,
|
||||
target: &OmgUploaderTarget,
|
||||
query: Option<&str>,
|
||||
profile_content: bool,
|
||||
options: &ServerOptions,
|
||||
) -> Result<Option<UploaderProfile>> {
|
||||
let first_page_url = self.uploader_target_url(target, 1);
|
||||
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
|
||||
let first_page_html = match requester.get(&first_page_url, None).await {
|
||||
Ok(html) => html,
|
||||
Err(error) => {
|
||||
return Err(Error::from(format!(
|
||||
"uploader page request failed url={first_page_url}; error={error}"
|
||||
)));
|
||||
}
|
||||
};
|
||||
let first_page_items = self.get_video_items_from_html(first_page_html.clone());
|
||||
if first_page_items.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let display_name =
|
||||
self.display_name_for_uploader_target(target, &first_page_html, &first_page_items);
|
||||
|
||||
let last_page = self
|
||||
.uploader_target_last_page(&first_page_html, target)
|
||||
.unwrap_or(1);
|
||||
let last_page_items = if last_page > 1 {
|
||||
let mut requester = requester_or_default(options, module_path!(), "missing_requester");
|
||||
let html = requester
|
||||
.get(&self.uploader_target_url(target, last_page), None)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
self.get_video_items_from_html(html)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let uploader_id = Self::canonical_uploader_id(&target.kind, &target.id);
|
||||
let mut video_refs = first_page_items
|
||||
.iter()
|
||||
.map(|item| {
|
||||
let mut video =
|
||||
UploaderVideoRef::from_video_item(item, &display_name, &uploader_id);
|
||||
video.uploader = display_name.clone();
|
||||
video.uploaderId = uploader_id.clone();
|
||||
video
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let ranked = Self::rank_uploader_videos(&video_refs, query);
|
||||
let featured_ids = ranked
|
||||
.iter()
|
||||
.take(12)
|
||||
.map(|video| video.id.clone())
|
||||
.collect::<Vec<_>>();
|
||||
let video_count = if last_page > 1 {
|
||||
((last_page as u64 - 1) * first_page_items.len() as u64) + last_page_items.len() as u64
|
||||
} else {
|
||||
first_page_items.len() as u64
|
||||
};
|
||||
let total_views = first_page_items
|
||||
.iter()
|
||||
.chain(last_page_items.iter())
|
||||
.filter_map(|item| item.views)
|
||||
.map(u64::from)
|
||||
.sum();
|
||||
let newest = first_page_items.iter().filter_map(|item| item.uploadedAt).max();
|
||||
let oldest = last_page_items
|
||||
.iter()
|
||||
.filter_map(|item| item.uploadedAt)
|
||||
.min()
|
||||
.or_else(|| first_page_items.iter().filter_map(|item| item.uploadedAt).min());
|
||||
|
||||
for video in &mut video_refs {
|
||||
video.uploader = display_name.clone();
|
||||
video.uploaderId = uploader_id.clone();
|
||||
}
|
||||
|
||||
let layout = if featured_ids.is_empty() {
|
||||
vec![UploaderLayoutRow::videos(None)]
|
||||
} else {
|
||||
vec![
|
||||
UploaderLayoutRow::horizontal(Some("For You".to_string()), featured_ids),
|
||||
UploaderLayoutRow::videos(None),
|
||||
]
|
||||
};
|
||||
|
||||
Ok(Some(UploaderProfile {
|
||||
id: uploader_id,
|
||||
name: display_name,
|
||||
url: Some(first_page_url),
|
||||
channel: Some("omgxxx".to_string()),
|
||||
verified: false,
|
||||
videoCount: video_count,
|
||||
totalViews: total_views,
|
||||
channels: Some(vec![UploaderChannelStat {
|
||||
channel: "omgxxx".to_string(),
|
||||
videoCount: video_count,
|
||||
firstSeenAt: iso_timestamp_from_unix(oldest),
|
||||
lastSeenAt: iso_timestamp_from_unix(newest),
|
||||
}]),
|
||||
avatar: None,
|
||||
description: None,
|
||||
bio: None,
|
||||
videos: profile_content.then_some(video_refs),
|
||||
tapes: profile_content.then_some(Vec::new()),
|
||||
playlists: profile_content.then_some(Vec::new()),
|
||||
layout: Some(layout),
|
||||
}))
|
||||
}
|
||||
|
||||
async fn get(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
@@ -464,8 +809,7 @@ impl OmgxxxProvider {
|
||||
}
|
||||
};
|
||||
|
||||
let mut requester =
|
||||
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
|
||||
let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
|
||||
let text = match requester.get(&video_url, None).await {
|
||||
Ok(text) => text,
|
||||
Err(e) => {
|
||||
@@ -542,8 +886,7 @@ impl OmgxxxProvider {
|
||||
}
|
||||
};
|
||||
|
||||
let mut requester =
|
||||
crate::providers::requester_or_default(&options, module_path!(), "missing_requester");
|
||||
let mut requester = requester_or_default(&options, module_path!(), "missing_requester");
|
||||
let text = match requester.get(&video_url, None).await {
|
||||
Ok(text) => text,
|
||||
Err(e) => {
|
||||
@@ -837,7 +1180,7 @@ impl OmgxxxProvider {
|
||||
}
|
||||
}
|
||||
|
||||
let video_item = VideoItem::new(
|
||||
let mut video_item = VideoItem::new(
|
||||
id,
|
||||
title,
|
||||
video_url.to_string(),
|
||||
@@ -846,8 +1189,10 @@ impl OmgxxxProvider {
|
||||
duration,
|
||||
)
|
||||
.views(views)
|
||||
.preview(preview)
|
||||
.tags(tags);
|
||||
.preview(preview);
|
||||
if !tags.is_empty() {
|
||||
video_item.tags = Some(tags);
|
||||
}
|
||||
items.push(video_item);
|
||||
}
|
||||
return items;
|
||||
@@ -865,11 +1210,40 @@ mod tests {
|
||||
id: "clubsweethearts".to_string(),
|
||||
title: "Club Sweethearts".to_string(),
|
||||
}])),
|
||||
networks: Arc::new(RwLock::new(vec![])),
|
||||
networks: Arc::new(RwLock::new(vec![FilterOption {
|
||||
id: "mofos".to_string(),
|
||||
title: "Club Sweethearts".to_string(),
|
||||
}])),
|
||||
stars: Arc::new(RwLock::new(vec![])),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uploader_name_prefers_site_before_network() {
|
||||
let provider = test_provider();
|
||||
let target = provider
|
||||
.uploader_target_from_name("Club Sweethearts")
|
||||
.expect("target should resolve");
|
||||
|
||||
assert!(matches!(target.kind, OmgUploaderTargetKind::Site));
|
||||
assert_eq!(target.id, "clubsweethearts");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uploader_id_round_trips_for_networks() {
|
||||
let provider = test_provider();
|
||||
let target = provider
|
||||
.uploader_target_from_id("omgxxx:network:mofos")
|
||||
.expect("target should resolve");
|
||||
|
||||
assert!(matches!(target.kind, OmgUploaderTargetKind::Network));
|
||||
assert_eq!(target.id, "mofos");
|
||||
assert_eq!(
|
||||
OmgxxxProvider::canonical_uploader_id(&target.kind, &target.id),
|
||||
"omgxxx:network:mofos"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_model_and_site_tags_without_empty_strings() {
|
||||
let provider = test_provider();
|
||||
@@ -1043,4 +1417,26 @@ impl Provider for OmgxxxProvider {
|
||||
fn get_channel(&self, clientversion: ClientVersion) -> Option<crate::status::Channel> {
|
||||
Some(self.build_channel(clientversion))
|
||||
}
|
||||
|
||||
async fn get_uploader(
|
||||
&self,
|
||||
cache: VideoCache,
|
||||
pool: DbPool,
|
||||
uploader_id: Option<String>,
|
||||
uploader_name: Option<String>,
|
||||
query: Option<String>,
|
||||
profile_content: bool,
|
||||
options: ServerOptions,
|
||||
) -> std::result::Result<Option<UploaderProfile>, String> {
|
||||
let _ = pool;
|
||||
let Some(target) =
|
||||
self.resolve_uploader_target(uploader_id.as_deref(), uploader_name.as_deref())
|
||||
else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
self.build_uploader_profile(cache, &target, query.as_deref(), profile_content, &options)
|
||||
.await
|
||||
.map_err(|error| error.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user