viralxxxporn and xfree bugfix

This commit is contained in:
Simon
2026-03-05 19:49:30 +00:00
parent 63782f6a7c
commit 2f8951601b
3 changed files with 772 additions and 10 deletions

View File

@@ -293,15 +293,35 @@ impl XfreeProvider {
}
fn extract_views(text: &str) -> Option<u32> {
let Ok(views_re) = Regex::new(r"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb]?)\s*(?:views?|view)\b")
else {
return None;
};
let raw = views_re
.captures(text)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())?;
parse_abbreviated_number(&raw)
let patterns = [
r#"(?is)(?:class=["'][^"']*views[^"']*["'][^>]*>|data-views=["']?)([0-9]+(?:\.[0-9]+)?\s*[kmb]?)"#,
r#"(?is)(?:fa-eye|icon-eye|eye[^>]*></i>)[^0-9]{0,20}([0-9]+(?:\.[0-9]+)?\s*[kmb]?)"#,
r#"(?i)\b([0-9]+(?:\.[0-9]+)?\s*[kmb])\s*(?:views?|view)\b"#,
];
for pattern in patterns {
let Ok(re) = Regex::new(pattern) else {
continue;
};
let Some(raw) = re
.captures(text)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
else {
continue;
};
let Some(parsed) = parse_abbreviated_number(&raw) else {
continue;
};
let has_suffix = raw
.chars()
.any(|c| matches!(c, 'k' | 'K' | 'm' | 'M' | 'b' | 'B'));
if has_suffix || parsed >= 100 {
return Some(parsed);
}
}
None
}
fn extract_tags(text: &str) -> Vec<String> {
@@ -390,7 +410,7 @@ impl XfreeProvider {
}
let seg_start = full.start().saturating_sub(400);
let seg_end = (full.end() + 1600).min(html.len());
let seg_end = (full.end() + 700).min(html.len());
let segment = html.get(seg_start..seg_end).unwrap_or(full.as_str());
let title_from_attr = title_attr_re
@@ -748,4 +768,19 @@ mod tests {
assert!(urls.iter().any(|u| u.contains("clip_720p.mp4")));
assert!(urls.iter().any(|u| u.contains("master.m3u8")));
}
#[test]
fn does_not_take_views_from_plain_title_text() {
let provider = XfreeProvider::new();
let html = r#"
<a href="/video?id=777&title=18-view-example" title="18 View Example">
<img src="https://thumbs.xfree.com/x/y/test.jpg" />
<span>1:01</span>
<span>18 View Example</span>
</a>
"#;
let items = provider.parse_listing_items(html);
assert_eq!(items.len(), 1);
assert_eq!(items[0].views, None);
}
}