From da54f3a302448ca0bc8d5f5896e0c00e94f784b3 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Sat, 23 May 2026 01:02:08 +0800 Subject: [PATCH] fix: render indexer jinja fields in rust --- rust/moviepilot_rust/src/indexer.rs | 78 ++++++++++++++++------------- tests/test_rust_accel.py | 50 ++++++++++++++++++ 2 files changed, 94 insertions(+), 34 deletions(-) diff --git a/rust/moviepilot_rust/src/indexer.rs b/rust/moviepilot_rust/src/indexer.rs index a95db918..4dd8a5ce 100644 --- a/rust/moviepilot_rust/src/indexer.rs +++ b/rust/moviepilot_rust/src/indexer.rs @@ -37,8 +37,9 @@ static NUMERIC_FACTOR_RE: Lazy = Lazy::new(|| Regex::new(r"(\d+\.?\d*)"). static FIELD_EXPR_RE: Lazy = Lazy::new(|| { Regex::new(r#"^fields(?:\.([A-Za-z0-9_]+)|\[\s*['"]([^'"]+)['"]\s*\])$"#).unwrap() }); -static JINJA_EXPR_RE: Lazy = - Lazy::new(|| Regex::new(r#"\{\{-?\s*(.*?)\s*-?\}\}"#).unwrap()); +static FIELD_REF_RE: Lazy = + Lazy::new(|| Regex::new(r#"fields(?:\.([A-Za-z0-9_]+)|\[\s*['"]([^'"]+)['"]\s*\])"#).unwrap()); +static JINJA_EXPR_RE: Lazy = Lazy::new(|| Regex::new(r#"\{\{-?\s*(.*?)\s*-?\}\}"#).unwrap()); static JINJA_TAG_RE: Lazy = Lazy::new(|| Regex::new(r#"\{%-?\s*(.*?)\s*-?%\}"#).unwrap()); enum RowParseResult { @@ -371,7 +372,7 @@ fn parse_indexer_row( Ok(RowParseResult::Item(output.into())) } -/// 解析标题字段,支持直接 selector 和常见的 title_default/title_optional 模板。 +/// 解析标题字段,支持直接 selector 和按模板引用字段渲染 title.text。 fn parse_title( py: Python<'_>, row: ElementRef<'_>, @@ -384,23 +385,12 @@ fn parse_title( let mut title = if selector.contains("selector")? { safe_query(row, &selector)? } else if let Some(template) = get_optional_string(&selector, "text")? { - let Some(default_selector) = get_field_dict(fields, "title_default")? else { - return Ok(false); - }; - let title_default = safe_query(row, &default_selector)?.unwrap_or_default(); - let title_optional = - if let Some(optional_selector) = get_field_dict(fields, "title_optional")? { - safe_query(row, &optional_selector)?.unwrap_or_default() - } else { - String::new() - }; - let Some(rendered) = render_known_template( - &template, - &[ - ("title_default", title_default.as_str()), - ("title_optional", title_optional.as_str()), - ], - ) else { + let values = collect_template_field_values(row, fields, &template)?; + let refs: Vec<(&str, &str)> = values + .iter() + .map(|(key, value)| (key.as_str(), value.as_str())) + .collect(); + let Some(rendered) = render_known_template(&template, &refs) else { return Ok(false); }; Some(rendered) @@ -414,7 +404,7 @@ fn parse_title( Ok(true) } -/// 解析描述字段,支持直接 selector 和常见 description 模板。 +/// 解析描述字段,支持直接 selector 和按模板引用字段渲染 description.text。 fn parse_description( py: Python<'_>, row: ElementRef<'_>, @@ -427,18 +417,7 @@ fn parse_description( let mut description = if selector.contains("selector")? || selector.contains("selectors")? { safe_query(row, &selector)? } else if let Some(template) = get_optional_string(&selector, "text")? { - let mut values = Vec::new(); - for key in [ - "tags", - "subject", - "description_free_forever", - "description_normal", - ] { - if let Some(field_selector) = get_field_dict(fields, key)? { - let value = safe_query(row, &field_selector)?.unwrap_or_default(); - values.push((key.to_string(), value)); - } - } + let values = collect_template_field_values(row, fields, &template)?; let refs: Vec<(&str, &str)> = values .iter() .map(|(key, value)| (key.as_str(), value.as_str())) @@ -457,6 +436,33 @@ fn parse_description( Ok(true) } +/// 按 Jinja 模板实际引用的 fields 字段提取当前行数据,避免把模板能力绑死在固定字段名上。 +fn collect_template_field_values( + row: ElementRef<'_>, + fields: &Bound<'_, PyDict>, + template: &str, +) -> PyResult> { + let mut keys = Vec::new(); + for captures in FIELD_REF_RE.captures_iter(template) { + let Some(key) = captures.get(1).or_else(|| captures.get(2)) else { + continue; + }; + let key = key.as_str(); + if !keys.iter().any(|item: &String| item == key) { + keys.push(key.to_string()); + } + } + + let mut values = Vec::new(); + for key in keys { + if let Some(field_selector) = get_field_dict(fields, &key)? { + let value = safe_query(row, &field_selector)?.unwrap_or_default(); + values.push((key, value)); + } + } + Ok(values) +} + /// 解析普通文本字段。 fn parse_plain_field( py: Python<'_>, @@ -1112,7 +1118,11 @@ fn eval_field_atom(expression: &str, values: &[(&str, &str)]) -> Option return Some(value); } let key = parse_field_key(expression)?; - Some(get_template_value(values, &key).unwrap_or_default().to_string()) + Some( + get_template_value(values, &key) + .unwrap_or_default() + .to_string(), + ) } /// 解析单引号或双引号字符串字面量。 diff --git a/tests/test_rust_accel.py b/tests/test_rust_accel.py index 57f35a06..87a3be78 100644 --- a/tests/test_rust_accel.py +++ b/tests/test_rust_accel.py @@ -249,6 +249,56 @@ def test_rust_indexer_page_parser_renders_common_title_template(): assert [item["title"] for item in torrents] == ["Optional Name", "Default Fallback"] +def test_rust_indexer_page_parser_renders_literal_title_template_without_default_field(): + """ + Rust 普通 indexer 页面解析应在没有 title_default 时渲染 title_optional 的纯文本兜底模板。 + """ + spider = SiteSpider( + indexer={ + "id": "demo", + "name": "Demo", + "domain": "https://example.org/", + "search": {"paths": [{"path": "torrents.php"}]}, + "torrents": { + "list": {"selector": "tr.torrent"}, + "fields": { + "title_optional": { + "selector": "a.title", + "attribute": "title", + "optional": True, + }, + "title": { + "text": ( + "{% if fields['title_optional'] %}" + "{{ fields['title_optional'] }}" + "{% else %}" + "For All Mankind S05 2019 2160p ATVP WEB-DL " + "DDP5.1 Atmos DV H 265-HHWEB [新]" + "{% endif %}" + ) + }, + "download": {"selector": "a.dl", "attribute": "href"}, + }, + }, + }, + ) + html = """ + + + + + +
IgnoredDL
+ """ + + torrents = spider.parse(html) + + assert torrents == [{ + "title": "For All Mankind S05 2019 2160p ATVP WEB-DL DDP5.1 Atmos DV H 265-HHWEB [新]", + "enclosure": "https://example.org/download/1", + }] + + def test_rust_indexer_page_parser_renders_common_description_templates(): """ Rust 普通 indexer 页面解析应兼容站点构建项目里的 description 字段模板。