This commit is contained in:
Simon
2026-03-17 01:12:52 +00:00
parent a977381b3b
commit 9172941ac6
3 changed files with 235 additions and 62 deletions

View File

@@ -9,6 +9,7 @@ use crate::{status::*, util};
use async_trait::async_trait;
use error_chain::error_chain;
use htmlentity::entity::{ICodedDataTrait, decode};
use scraper::{Html, Selector};
use std::sync::{Arc, RwLock};
use std::thread;
use std::vec;
@@ -586,6 +587,28 @@ impl OmgxxxProvider {
return None;
}
fn extract_tag_entries(&self, video_segment: &str) -> Vec<(String, String)> {
let fragment = Html::parse_fragment(video_segment);
let selector = Selector::parse("div.models a").expect("valid omgxxx models selector");
fragment
.select(&selector)
.filter_map(|anchor| {
let href = anchor.value().attr("href")?.to_string();
let title = anchor
.text()
.collect::<Vec<_>>()
.join(" ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
if title.is_empty() {
return None;
}
Some((href, title))
})
.collect()
}
fn get_video_items_from_html(&self, html: String) -> Vec<VideoItem> {
if html.is_empty() {
println!("HTML is empty");
@@ -751,64 +774,50 @@ impl OmgxxxProvider {
let site_id = self
.get_site_id_from_name(site_name)
.unwrap_or("".to_string());
let mut tags = match video_segment.contains("class=\"models\">") {
true => video_segment
.split("class=\"models\">")
.collect::<Vec<&str>>()
.get(1)
.copied()
let mut tags = Vec::new();
for (href, tag_title) in self.extract_tag_entries(video_segment) {
if href.contains("/models/") {
let model_id = href
.split("/models/")
.nth(1)
.unwrap_or_default()
.split("</div>")
.collect::<Vec<&str>>()
.get(0)
.copied()
.split('/')
.next()
.unwrap_or_default()
.split("href=\"")
.collect::<Vec<&str>>()[1..]
.into_iter()
.map(|s| {
.to_string();
if !model_id.is_empty() {
Self::push_unique(
&self.stars,
FilterOption {
id: s
.split("/")
.collect::<Vec<&str>>()
.get(4)
.copied()
.unwrap_or_default()
.to_string(),
title: s
.split(">")
.collect::<Vec<&str>>()
.get(1)
.copied()
.unwrap_or_default()
.split("<")
.collect::<Vec<&str>>()
.get(0)
.copied()
.unwrap_or_default()
.trim()
.to_string(),
id: model_id,
title: tag_title.clone(),
},
);
s.split(">")
.collect::<Vec<&str>>()
.get(1)
.copied()
}
}
if href.contains("/sites/") {
let site_id = href
.split("/sites/")
.nth(1)
.unwrap_or_default()
.split("<")
.collect::<Vec<&str>>()
.get(0)
.copied()
.split('/')
.next()
.unwrap_or_default()
.trim()
.to_string()
})
.collect::<Vec<String>>()
.to_vec(),
false => vec![],
};
.to_string();
if !site_id.is_empty() {
Self::push_unique(
&self.sites,
FilterOption {
id: site_id,
title: tag_title.clone(),
},
);
}
}
if !tags.iter().any(|existing| existing == &tag_title) {
tags.push(tag_title);
}
}
if !site_id.is_empty() {
Self::push_unique(
&self.sites,
@@ -817,8 +826,10 @@ impl OmgxxxProvider {
title: site_name.to_string(),
},
);
if !tags.iter().any(|existing| existing == site_name) {
tags.push(site_name.to_string());
}
}
let video_item = VideoItem::new(
id,
@@ -837,6 +848,160 @@ impl OmgxxxProvider {
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_provider() -> OmgxxxProvider {
OmgxxxProvider {
url: "https://www.omg.xxx".to_string(),
sites: Arc::new(RwLock::new(vec![FilterOption {
id: "clubsweethearts".to_string(),
title: "Club Sweethearts".to_string(),
}])),
networks: Arc::new(RwLock::new(vec![])),
stars: Arc::new(RwLock::new(vec![])),
}
}
#[test]
fn parses_model_and_site_tags_without_empty_strings() {
let provider = test_provider();
let html = r##"
<div class="list-videos">
<div class="item">
<a href="https://www.omg.xxx/videos/4290034/example-video/" title="[Club Sweethearts] Example Video"></a>
<img loading="lazy" data-src="https://cdn.example/thumb.jpg" />
<span class="duration">Duration 12:34</span>
<div class="views">1.2M</div>
<div class="thumb" data-preview="https://cdn.example/preview.mp4"></div>
<div class="models">
<a class="models__item thumb_cs" href="https://www.omg.xxx/sites/clubsweethearts/" style="order: 0;">
<svg class="icon icon-tv"><use xlink:href="#icon-tv"></use></svg>
<span>Club Sweethearts</span>
</a>
<a class="models__item thumb_model" href="https://www.omg.xxx/models/oliver-trunk/" style="order: 0;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Oliver Trunk</span>
</a>
<a class="models__item thumb_model" href="https://www.omg.xxx/models/sara-bork/" style="order: 0;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Sara Bork</span>
</a>
</div>
</div>
</div>
"##
.to_string();
let items = provider.get_video_items_from_html(html);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].tags,
Some(vec![
"Club Sweethearts".to_string(),
"Oliver Trunk".to_string(),
"Sara Bork".to_string()
])
);
assert!(
items[0]
.tags
.as_ref()
.unwrap()
.iter()
.all(|tag| !tag.is_empty())
);
let stars = provider.stars.read().unwrap().clone();
assert!(
stars
.iter()
.any(|tag| tag.id == "oliver-trunk" && tag.title == "Oliver Trunk")
);
assert!(
stars
.iter()
.any(|tag| tag.id == "sara-bork" && tag.title == "Sara Bork")
);
}
#[test]
fn parses_live_item_shape_with_channel_and_pornstar_info() {
let provider = test_provider();
let html = r##"
<div class="list-videos">
<div class="item">
<a href="https://www.omg.xxx/videos/93763302/step-daughter-vol-2-scene-3/" target="_blank" title="Step Daughter Vol.2 Scene 3">
<div class="img thumb__img" data-preview="https://cast.omg.xxx/preview/93763302.mp4">
<img loading="lazy" class="thumb lazyloaded" src="https://img.omg.xxx/93763000/93763302/medium@2x/1.jpg" data-src="https://img.omg.xxx/93763000/93763302/medium@2x/1.jpg" alt="Step Daughter Vol.2 Scene 3" width="0" height="0">
<span class="duration"> Full Video 26:44 </span>
<span class="js-favourites thumb-favourites" data-action="add" data-type="video" data-object_id="93763302">
<svg class="icon icon-heart-plus"><use xlink:href="#icon-heart-plus"></use></svg>
<svg class="icon icon-trashcan"><use xlink:href="#icon-trashcan"></use></svg>
</span>
</div>
</a>
<div class="item-info">
<a href="https://www.omg.xxx/videos/93763302/step-daughter-vol-2-scene-3/" title="Step Daughter Vol.2 Scene 3">
<strong class="title"> Step Daughter Vol.2 Scene 3 </strong>
</a>
<div class="models is-truncated">
<a class="models__item thumb_cs" href="https://www.omg.xxx/sites/family-sinners/" style="order: 0;">
<svg class="icon icon-tv"><use xlink:href="#icon-tv"></use></svg>
<span>Family Sinners</span>
</a>
<a class="models__item" href="https://www.omg.xxx/models/vienna-rose/" style="order: 0;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Vienna Rose</span>
</a>
<a class="models__item" href="https://www.omg.xxx/models/mark-wood/" style="order: 1;">
<svg class="icon icon-star"><use xlink:href="#icon-star"></use></svg>
<span>Mark Wood</span>
</a>
</div>
<div class="wrap">
<div class="rating positive "> 100% </div>
<div class="views">4.8K</div>
</div>
</div>
</div>
</div>
"##
.to_string();
let items = provider.get_video_items_from_html(html);
assert_eq!(items.len(), 1);
assert_eq!(
items[0].tags,
Some(vec![
"Family Sinners".to_string(),
"Vienna Rose".to_string(),
"Mark Wood".to_string()
])
);
let sites = provider.sites.read().unwrap().clone();
assert!(
sites
.iter()
.any(|tag| tag.id == "family-sinners" && tag.title == "Family Sinners")
);
let stars = provider.stars.read().unwrap().clone();
assert!(
stars
.iter()
.any(|tag| tag.id == "vienna-rose" && tag.title == "Vienna Rose")
);
assert!(
stars
.iter()
.any(|tag| tag.id == "mark-wood" && tag.title == "Mark Wood")
);
}
}
#[async_trait]
impl Provider for OmgxxxProvider {
async fn get_videos(

View File

@@ -438,7 +438,8 @@ sys.stdout.buffer.write(response.content)
uploaders: Arc<RwLock<Vec<FilterOption>>>,
) -> Result<()> {
let link_selector = Self::selector("a[href]")?;
let article_selector = Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy, article.post")?;
let article_selector =
Self::selector("article.entry-tpl-grid, article.entry-tpl-list-fancy, article.post")?;
let pages = vec![
format!("{base_url}/"),
format!("{base_url}/page/2/"),
@@ -643,7 +644,8 @@ sys.stdout.buffer.write(response.content)
let source_selector = Self::selector(".entry-categories a[href]")?;
let views_selector = Self::selector(".entry-views strong")?;
let time_selector = Self::selector("time.entry-date[datetime]")?;
let author_selector = Self::selector(".entry-author a[href] strong, .entry-author a[href]")?;
let author_selector =
Self::selector(".entry-author a[href] strong, .entry-author a[href]")?;
let mut items = Vec::new();
@@ -859,8 +861,12 @@ sys.stdout.buffer.write(response.content)
.trim()
.to_string();
if stream_url.is_empty() || !(stream_url.starts_with("https://") || stream_url.starts_with("http://")) {
return Err(Error::from("vidara stream missing streaming_url".to_string()));
if stream_url.is_empty()
|| !(stream_url.starts_with("https://") || stream_url.starts_with("http://"))
{
return Err(Error::from(
"vidara stream missing streaming_url".to_string(),
));
}
Ok(stream_url)
@@ -1255,7 +1261,9 @@ impl Provider for PorndishProvider {
let page = page.parse::<u32>().unwrap_or(1);
let videos = match query {
Some(query) if !query.trim().is_empty() => self.query(cache, page, &query, options).await,
Some(query) if !query.trim().is_empty() => {
self.query(cache, page, &query, options).await
}
_ => self.get(cache, page, &sort, options).await,
};

View File

@@ -197,8 +197,7 @@ if location:
}
fn extract_iframe_fragments(html: &str) -> Vec<String> {
let Some(regex) =
Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#)
let Some(regex) = Self::regex(r#"const\s+[A-Za-z0-9_]+Content\s*=\s*"((?:\\.|[^"\\])*)";"#)
else {
return vec![];
};
@@ -230,7 +229,8 @@ if location:
return None;
}
let html = Self::fetch_with_curl_cffi(iframe_url, Some("https://www.porndish.com/")).await?;
let html =
Self::fetch_with_curl_cffi(iframe_url, Some("https://www.porndish.com/")).await?;
let pass_regex = Self::regex(r#"\$\.get\(\s*['"](/pass_md5/[^'"]+)['"]"#)?;
let path = pass_regex
.captures(&html)