fix: support jinja indexer parsing in rust

2026-06-02 07:26:46 +00:00 · 2026-05-23 08:07:43 +08:00
parent da54f3a302
commit fcbea077b7
5 changed files with 199 additions and 299 deletions
--- a/docker/update.sh
+++ b/docker/update.sh
@@ -24,6 +24,52 @@ function WARN() {
 VENV_PATH="${VENV_PATH:-/opt/venv}"
 export PATH="${VENV_PATH}/bin:$PATH"

+# 按需准备 Rust 构建环境，避免把工具链常驻打进 Docker runtime 镜像。
+function ensure_rust_build_env() {
+    export PATH="/root/.cargo/bin:$PATH"
+    if command -v cargo > /dev/null 2>&1; then
+        return 0
+    fi
+    INFO "→ 当前镜像未包含 cargo，正在按需准备 Rust 构建环境..."
+    if command -v apt-get > /dev/null 2>&1; then
+        if ! apt-get update; then
+            ERROR "更新 apt 索引失败，无法安装 Rust 构建依赖"
+            return 1
+        fi
+        if ! apt-get install -y --no-install-recommends build-essential curl ca-certificates; then
+            ERROR "安装 Rust 构建依赖失败"
+            return 1
+        fi
+        apt-get clean
+        rm -rf /var/lib/apt/lists/*
+    fi
+    if ! curl ${CURL_OPTIONS} https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal; then
+        ERROR "安装 Rust 工具链失败"
+        return 1
+    fi
+    export PATH="/root/.cargo/bin:$PATH"
+    command -v cargo > /dev/null 2>&1
+}
+
+# 更新 Rust 加速扩展，确保 Docker dev/release 更新源码后不会继续加载旧 wheel。
+function install_rust_accel() {
+    local manifest="/app/rust/moviepilot_rust/Cargo.toml"
+    if [ ! -f "${manifest}" ]; then
+        WARN "未找到 Rust 扩展源码，跳过 Rust 加速扩展更新"
+        return 0
+    fi
+    if ! ensure_rust_build_env; then
+        ERROR "Rust 构建环境不可用，无法更新 Rust 加速扩展"
+        return 1
+    fi
+    INFO "→ 正在更新 Rust 加速扩展..."
+    if ! "${VENV_PATH}/bin/python" -m maturin develop --release --manifest-path "${manifest}"; then
+        ERROR "Rust 加速扩展更新失败"
+        return 1
+    fi
+    INFO "Rust 加速扩展更新成功"
+}
+
 # 下载及解压
 function download_and_unzip() {
    local retries=0
@@ -166,6 +212,9 @@ function install_backend_and_download_resources() {
        WARN "${sites_file} 下载失败，继续使用旧的资源来启动..."
    fi
    INFO "站点资源更新成功"
+    if ! install_rust_accel; then
+        return 1
+    fi
    # 清理临时目录
    rm -rf "${TMP_PATH}"
    return 0
--- a/rust/moviepilot_rust/Cargo.lock
+++ b/rust/moviepilot_rust/Cargo.lock
@@ -344,6 +344,12 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"

+[[package]]
+name = "memo-map"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38d1115007560874e373613744c6fba374c17688327a71c1476d1a5954cc857b"
+
 [[package]]
 name = "memoffset"
 version = "0.9.1"
@@ -353,10 +359,21 @@ dependencies = [
 "autocfg",
 ]

+[[package]]
+name = "minijinja"
+version = "2.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2929e494b2280e1e18959bb2e121da03347ae896896fdfaceaab43c88a02803f"
+dependencies = [
+ "memo-map",
+ "serde",
+]
+
 [[package]]
 name = "moviepilot-rust"
 version = "0.1.0"
 dependencies = [
+ "minijinja",
 "once_cell",
 "percent-encoding",
 "pyo3",
--- a/rust/moviepilot_rust/Cargo.toml
+++ b/rust/moviepilot_rust/Cargo.toml
@@ -8,6 +8,7 @@ name = "moviepilot_rust"
 crate-type = ["cdylib"]

 [dependencies]
+minijinja = "2.20"
 once_cell = "1.20"
 percent-encoding = "2.3"
 pyo3 = { version = "0.23", features = ["abi3-py311", "extension-module"] }
--- a/rust/moviepilot_rust/src/indexer.rs
+++ b/rust/moviepilot_rust/src/indexer.rs
@@ -1,4 +1,5 @@
 use crate::utils::{get_optional_i64, get_optional_string, py_i64_to_usize};
+use minijinja::{context, Environment, UndefinedBehavior};
 use once_cell::sync::Lazy;
 use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
 use pyo3::exceptions::PyValueError;
@@ -6,6 +7,7 @@ use pyo3::prelude::*;
 use pyo3::types::{PyDict, PyList};
 use regex::{Regex, RegexBuilder};
 use scraper::{ElementRef, Html, Selector};
+use std::collections::BTreeMap;
 use url::form_urlencoded;
 use url::Url;

@@ -34,13 +36,12 @@ static FILESIZE_UNIT_RE: Lazy<Regex> = Lazy::new(|| {
        .unwrap()
 });
 static NUMERIC_FACTOR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d+\.?\d*)").unwrap());
-static FIELD_EXPR_RE: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r#"^fields(?:\.([A-Za-z0-9_]+)|\[\s*['"]([^'"]+)['"]\s*\])$"#).unwrap()
-});
 static FIELD_REF_RE: Lazy<Regex> =
    Lazy::new(|| Regex::new(r#"fields(?:\.([A-Za-z0-9_]+)|\[\s*['"]([^'"]+)['"]\s*\])"#).unwrap());
-static JINJA_EXPR_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\{\{-?\s*(.*?)\s*-?\}\}"#).unwrap());
-static JINJA_TAG_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\{%-?\s*(.*?)\s*-?%\}"#).unwrap());
+static HAS_QUOTED_SELECTOR_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#":has\(\s*"([^"]+)"\s*\)|:has\(\s*'([^']+)'\s*\)"#).unwrap());
+static TABLE_DIRECT_TR_RE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"\b(table[^>,]*?)\s*>\s*(tr(?:[^\s>,]*)?)"#).unwrap());

 enum RowParseResult {
    Unsupported,
@@ -66,7 +67,7 @@ pub(crate) fn parse_indexer_torrents_fast(
    if list_selector_text.is_empty() {
        return Ok(None);
    }
-    let Ok(list_selector) = Selector::parse(&list_selector_text) else {
+    let Some(list_selector) = parse_site_selector(&list_selector_text) else {
        return Ok(None);
    };
    let document = Html::parse_document(html_text);
@@ -386,11 +387,7 @@ fn parse_title(
        safe_query(row, &selector)?
    } else if let Some(template) = get_optional_string(&selector, "text")? {
        let values = collect_template_field_values(row, fields, &template)?;
-        let refs: Vec<(&str, &str)> = values
-            .iter()
-            .map(|(key, value)| (key.as_str(), value.as_str()))
-            .collect();
-        let Some(rendered) = render_known_template(&template, &refs) else {
+        let Some(rendered) = render_jinja_template(&template, &values) else {
            return Ok(false);
        };
        Some(rendered)
@@ -418,11 +415,7 @@ fn parse_description(
        safe_query(row, &selector)?
    } else if let Some(template) = get_optional_string(&selector, "text")? {
        let values = collect_template_field_values(row, fields, &template)?;
-        let refs: Vec<(&str, &str)> = values
-            .iter()
-            .map(|(key, value)| (key.as_str(), value.as_str()))
-            .collect();
-        let Some(rendered) = render_known_template(&template, &refs) else {
+        let Some(rendered) = render_jinja_template(&template, &values) else {
            return Ok(false);
        };
        Some(rendered)
@@ -441,7 +434,7 @@ fn collect_template_field_values(
    row: ElementRef<'_>,
    fields: &Bound<'_, PyDict>,
    template: &str,
-) -> PyResult<Vec<(String, String)>> {
+) -> PyResult<BTreeMap<String, String>> {
    let mut keys = Vec::new();
    for captures in FIELD_REF_RE.captures_iter(template) {
        let Some(key) = captures.get(1).or_else(|| captures.get(2)) else {
@@ -453,14 +446,14 @@ fn collect_template_field_values(
        }
    }

-    let mut values = Vec::new();
+    let mut values = BTreeMap::new();
    for key in keys {
        if let Some(field_selector) = get_field_dict(fields, &key)? {
            let value = safe_query(row, &field_selector)?.unwrap_or_default();
-            values.push((key, value));
+            values.insert(key, value);
        }
    }
-    Ok(values)
+    Ok(resolve_embedded_field_templates(values))
 }

 /// 解析普通文本字段。
@@ -683,6 +676,44 @@ fn get_field_dict<'py>(
    Ok(Some(value.downcast_into::<PyDict>()?))
 }

+/// 解析站点配置选择器，并兼容 PyQuery 允许的 :has("selector") 写法。
+fn parse_site_selector(selector_text: &str) -> Option<Selector> {
+    let normalized = normalize_pyquery_selector(selector_text);
+    let expanded = expand_table_direct_tr_selector(&normalized);
+    if let Ok(selector) = Selector::parse(&expanded) {
+        return Some(selector);
+    }
+    if expanded != normalized {
+        if let Ok(selector) = Selector::parse(&normalized) {
+            return Some(selector);
+        }
+    }
+    Selector::parse(selector_text).ok()
+}
+
+/// 将 PyQuery 扩展选择器转换为 scraper 可识别的 CSS selector 形式。
+fn normalize_pyquery_selector(selector_text: &str) -> String {
+    HAS_QUOTED_SELECTOR_RE
+        .replace_all(selector_text, |captures: &regex::Captures<'_>| {
+            let inner = captures
+                .get(1)
+                .or_else(|| captures.get(2))
+                .map(|item| item.as_str())
+                .unwrap_or_default();
+            format!(":has({inner})")
+        })
+        .into_owned()
+}
+
+/// 为 table > tr 选择器追加 tbody 变体，适配 Rust HTML5 解析自动补 tbody 的行为。
+fn expand_table_direct_tr_selector(selector_text: &str) -> String {
+    let expanded = TABLE_DIRECT_TR_RE.replace_all(selector_text, "$1 > tbody > $2");
+    if expanded == selector_text {
+        return selector_text.to_string();
+    }
+    format!("{selector_text}, {expanded}")
+}
+
 /// 执行 selector 查询并返回第一个符合 index/contents 规则的文本。
 fn safe_query(
    row: ElementRef<'_>,
@@ -702,7 +733,7 @@ fn query_all_values(
    let Some(selector_text) = get_selector_text(selector_config)? else {
        return Ok(None);
    };
-    let Ok(selector) = Selector::parse(&selector_text) else {
+    let Some(selector) = parse_site_selector(&selector_text) else {
        return Ok(None);
    };
    let attribute = get_optional_string(selector_config, "attribute")?;
@@ -729,7 +760,7 @@ fn parse_remove_selectors(selector_config: &Bound<'_, PyDict>) -> PyResult<Vec<S
        if item.is_empty() {
            continue;
        }
-        let Ok(selector) = Selector::parse(item) else {
+        let Some(selector) = parse_site_selector(item) else {
            return Ok(Vec::new());
        };
        selectors.push(selector);
@@ -848,7 +879,7 @@ fn should_skip_text_node(

 /// 判断 row 内是否存在指定 selector。
 fn selector_exists(row: ElementRef<'_>, selector_text: &str) -> PyResult<bool> {
-    let Ok(selector) = Selector::parse(selector_text) else {
+    let Some(selector) = parse_site_selector(selector_text) else {
        return Ok(false);
    };
    Ok(row.select(&selector).next().is_some())
@@ -882,290 +913,32 @@ fn normalize_site_link(domain: &str, link: &str, protocol_relative: bool) -> Str
    }
 }

-/// 渲染常见的 Jinja 字段模板，不支持复杂表达式时由调用方回退 Python。
-fn render_known_template(template: &str, values: &[(&str, &str)]) -> Option<String> {
-    if template.contains("{#") {
-        return None;
-    }
-    let rendered = render_jinja_blocks(template, values)?;
-    render_field_vars(&rendered, values)
+/// 使用 MiniJinja 渲染站点字段模板，语义对齐 Python jinja2 的 Template.render(fields=...)。
+fn render_jinja_template(template: &str, fields: &BTreeMap<String, String>) -> Option<String> {
+    let mut env = Environment::new();
+    env.set_undefined_behavior(UndefinedBehavior::Chainable);
+    env.render_str(template, context! { fields => fields }).ok()
 }

-/// 渲染站点解析配置里常见的 if/elif/else/endif 字段模板。
-fn render_jinja_blocks(template: &str, values: &[(&str, &str)]) -> Option<String> {
-    let mut result = String::new();
-    let mut cursor = 0;
-    while let Some(tag_match) = JINJA_TAG_RE.find_at(template, cursor) {
-        result.push_str(&template[cursor..tag_match.start()]);
-        let captures = JINJA_TAG_RE.captures(tag_match.as_str())?;
-        let tag_content = captures.get(1)?.as_str().trim();
-        let Some(condition) = tag_content.strip_prefix("if ") else {
-            return None;
-        };
-        let block_end = find_matching_endif(template, tag_match.end())?;
-        let body = &template[tag_match.end()..block_end.endif_start];
-        let rendered_branch = render_if_body(body, condition, values)?;
-        result.push_str(&rendered_branch);
-        cursor = block_end.endif_end;
-    }
-    result.push_str(&template[cursor..]);
-    Some(result)
-}
-
-/// 查找当前 if 块对应的 endif，允许内部再嵌套一层字段模板。
-fn find_matching_endif(template: &str, from: usize) -> Option<JinjaBlockEnd> {
-    let mut depth = 1;
-    for tag_match in JINJA_TAG_RE.find_iter(&template[from..]) {
-        let absolute_start = from + tag_match.start();
-        let absolute_end = from + tag_match.end();
-        let captures = JINJA_TAG_RE.captures(tag_match.as_str())?;
-        let tag_content = captures.get(1)?.as_str().trim();
-        if tag_content.starts_with("if ") {
-            depth += 1;
-        } else if tag_content == "endif" {
-            depth -= 1;
-            if depth == 0 {
-                return Some(JinjaBlockEnd {
-                    endif_start: absolute_start,
-                    endif_end: absolute_end,
-                });
-            }
-        }
-    }
-    None
-}
-
-/// 从 if 块中选出第一个满足条件的分支并继续渲染。
-fn render_if_body(body: &str, first_condition: &str, values: &[(&str, &str)]) -> Option<String> {
-    let branches = split_if_branches(body, first_condition)?;
-    for branch in branches {
-        let selected = match branch.condition {
-            Some(condition) => eval_field_condition(&condition, values)?,
-            None => true,
-        };
-        if selected {
-            return render_known_template(&branch.content, values);
-        }
-    }
-    Some(String::new())
-}
-
-/// 按同层级 elif/else 拆分 if 块，嵌套 if 内部的分支不会被误拆。
-fn split_if_branches(body: &str, first_condition: &str) -> Option<Vec<JinjaBranch>> {
-    let mut branches = Vec::new();
-    let mut depth = 0;
-    let mut current_condition = Some(first_condition.trim().to_string());
-    let mut branch_start = 0;
-    for tag_match in JINJA_TAG_RE.find_iter(body) {
-        let captures = JINJA_TAG_RE.captures(tag_match.as_str())?;
-        let tag_content = captures.get(1)?.as_str().trim();
-        if tag_content.starts_with("if ") {
-            depth += 1;
+/// 渲染字段值中意外残留的 Jinja 模板，避免站点 title 属性里的模板文本继续进入识别链路。
+fn resolve_embedded_field_templates(values: BTreeMap<String, String>) -> BTreeMap<String, String> {
+    let mut resolved = values.clone();
+    for (key, value) in &values {
+        if !contains_jinja_syntax(value) {
            continue;
        }
-        if tag_content == "endif" {
-            if depth == 0 {
-                return None;
-            }
-            depth -= 1;
-            continue;
-        }
-        if depth == 0 {
-            if let Some(condition) = tag_content.strip_prefix("elif ") {
-                branches.push(JinjaBranch {
-                    condition: current_condition.take(),
-                    content: body[branch_start..tag_match.start()].to_string(),
-                });
-                current_condition = Some(condition.trim().to_string());
-                branch_start = tag_match.end();
-            } else if tag_content == "else" {
-                branches.push(JinjaBranch {
-                    condition: current_condition.take(),
-                    content: body[branch_start..tag_match.start()].to_string(),
-                });
-                current_condition = None;
-                branch_start = tag_match.end();
-            }
+        let mut context_values = resolved.clone();
+        context_values.insert(key.clone(), String::new());
+        if let Some(rendered) = render_jinja_template(value, &context_values) {
+            resolved.insert(key.clone(), rendered);
        }
    }
-    branches.push(JinjaBranch {
-        condition: current_condition,
-        content: body[branch_start..].to_string(),
-    });
-    Some(branches)
+    resolved
 }

-/// 计算字段真值条件，覆盖站点模板里的 fields.xxx、not、and、or。
-fn eval_field_condition(condition: &str, values: &[(&str, &str)]) -> Option<bool> {
-    let trimmed = condition.trim();
-    if trimmed.contains(" or ") {
-        for part in trimmed.split(" or ") {
-            if eval_field_condition(part, values)? {
-                return Some(true);
-            }
-        }
-        return Some(false);
-    }
-    if trimmed.contains(" and ") {
-        for part in trimmed.split(" and ") {
-            if !eval_field_condition(part, values)? {
-                return Some(false);
-            }
-        }
-        return Some(true);
-    }
-    eval_field_condition_atom(trimmed, values)
-}
-
-/// 计算单个字段条件，缺失字段按 Jinja Undefined 的假值处理。
-fn eval_field_condition_atom(condition: &str, values: &[(&str, &str)]) -> Option<bool> {
-    let (negated, expression) = if let Some(rest) = condition.trim().strip_prefix("not ") {
-        (true, rest.trim())
-    } else {
-        (false, condition.trim())
-    };
-    let key = parse_field_key(expression)?;
-    let value = get_template_value(values, &key).unwrap_or_default();
-    let truthy = !value.is_empty();
-    Some(if negated { !truthy } else { truthy })
-}
-
-/// 替换模板中的 fields 变量，存在未知变量语法时回退 Python。
-fn render_field_vars(template: &str, values: &[(&str, &str)]) -> Option<String> {
-    let mut rendered = String::new();
-    let mut cursor = 0;
-    for captures in JINJA_EXPR_RE.captures_iter(template) {
-        let whole = captures.get(0)?;
-        rendered.push_str(&template[cursor..whole.start()]);
-        let expression = captures.get(1)?.as_str();
-        rendered.push_str(&eval_field_output(expression, values)?);
-        cursor = whole.end();
-    }
-    rendered.push_str(&template[cursor..]);
-    if rendered.contains("{{") || rendered.contains("{%") {
-        return None;
-    }
-    Some(rendered)
-}
-
-/// 渲染输出表达式，覆盖字段变量、字段三元表达式和字符串拼接。
-fn eval_field_output(expression: &str, values: &[(&str, &str)]) -> Option<String> {
-    let expression = expression.trim();
-    if let Some((true_expr, condition, false_expr)) = split_inline_if(expression) {
-        if eval_field_condition(condition, values)? {
-            return eval_field_output(true_expr, values);
-        }
-        return eval_field_output(false_expr, values);
-    }
-    let terms = split_concat_terms(expression)?;
-    if terms.len() > 1 {
-        let mut rendered = String::new();
-        for term in terms {
-            rendered.push_str(&eval_field_atom(term, values)?);
-        }
-        return Some(rendered);
-    }
-    eval_field_atom(expression, values)
-}
-
-/// 拆分 Jinja 的简单三元表达式：a if cond else b。
-fn split_inline_if(expression: &str) -> Option<(&str, &str, &str)> {
-    let (true_expr, right) = expression.split_once(" if ")?;
-    let (condition, false_expr) = right.split_once(" else ")?;
-    Some((true_expr.trim(), condition.trim(), false_expr.trim()))
-}
-
-/// 按字符串字面量边界拆分加号拼接表达式。
-fn split_concat_terms(expression: &str) -> Option<Vec<&str>> {
-    let mut terms = Vec::new();
-    let mut start = 0;
-    let mut quote: Option<char> = None;
-    for (index, ch) in expression.char_indices() {
-        if let Some(current_quote) = quote {
-            if ch == current_quote {
-                quote = None;
-            }
-            continue;
-        }
-        if ch == '\'' || ch == '"' {
-            quote = Some(ch);
-            continue;
-        }
-        if ch == '+' {
-            let term = expression[start..index].trim();
-            if term.is_empty() {
-                return None;
-            }
-            terms.push(term);
-            start = index + ch.len_utf8();
-        }
-    }
-    if quote.is_some() {
-        return None;
-    }
-    let term = expression[start..].trim();
-    if term.is_empty() {
-        return None;
-    }
-    terms.push(term);
-    Some(terms)
-}
-
-/// 渲染字段或字符串字面量，其他表达式交给 Python 回退。
-fn eval_field_atom(expression: &str, values: &[(&str, &str)]) -> Option<String> {
-    let expression = expression.trim();
-    if let Some(value) = parse_string_literal(expression) {
-        return Some(value);
-    }
-    let key = parse_field_key(expression)?;
-    Some(
-        get_template_value(values, &key)
-            .unwrap_or_default()
-            .to_string(),
-    )
-}
-
-/// 解析单引号或双引号字符串字面量。
-fn parse_string_literal(expression: &str) -> Option<String> {
-    let mut chars = expression.chars();
-    let quote = chars.next()?;
-    if quote != '\'' && quote != '"' {
-        return None;
-    }
-    if !expression.ends_with(quote) || expression.len() < 2 {
-        return None;
-    }
-    let inner = &expression[quote.len_utf8()..expression.len() - quote.len_utf8()];
-    Some(inner.to_string())
-}
-
-/// 解析 fields 变量名，拒绝函数调用和比较表达式等完整 Jinja 能力。
-fn parse_field_key(expression: &str) -> Option<String> {
-    let captures = FIELD_EXPR_RE.captures(expression.trim())?;
-    captures
-        .get(1)
-        .or_else(|| captures.get(2))
-        .map(|item| item.as_str().to_string())
-}
-
-/// 从模板上下文中获取字段值，缺失字段按 Jinja 的空值处理。
-fn get_template_value<'a>(values: &'a [(&str, &str)], template_key: &str) -> Option<&'a str> {
-    for (field_key, value) in values {
-        if *field_key == template_key {
-            return Some(*value);
-        }
-    }
-    None
-}
-
-struct JinjaBlockEnd {
-    endif_start: usize,
-    endif_end: usize,
-}
-
-struct JinjaBranch {
-    condition: Option<String>,
-    content: String,
+/// 判断文本是否包含 Jinja 语法标记，作为字段内嵌模板的低成本预筛选。
+fn contains_jinja_syntax(value: &str) -> bool {
+    value.contains("{{") || value.contains("{%") || value.contains("{#")
 }

 /// 读取分类配置中的 ID 列表。
--- a/tests/test_rust_accel.py
+++ b/tests/test_rust_accel.py
@@ -299,6 +299,66 @@ def test_rust_indexer_page_parser_renders_literal_title_template_without_default
    }]


+def test_rust_indexer_page_parser_supports_agsvpt_selector_and_embedded_title_template():
+    """
+    Rust 普通 indexer 页面解析应兼容 AGSVPT 的 PyQuery 选择器和字段内嵌 Jinja 模板。
+    """
+    spider = SiteSpider(
+        indexer={
+            "id": "agsvpt",
+            "name": "AGSVPT",
+            "domain": "https://www.agsvpt.com/",
+            "search": {"paths": [{"path": "torrents.php"}]},
+            "torrents": {
+                "list": {"selector": 'table.torrents > tr:has("table.torrentname")'},
+                "fields": {
+                    "title_default": {"selector": 'a[href*="details.php?id="]'},
+                    "title_optional": {
+                        "selector": 'a[title][href*="details.php?id="]',
+                        "attribute": "title",
+                        "optional": True,
+                    },
+                    "title": {
+                        "text": (
+                            "{% if fields['title_optional'] %}"
+                            "{{ fields['title_optional'] }}"
+                            "{% else %}"
+                            "{{ fields['title_default'] }}"
+                            "{% endif %}"
+                        )
+                    },
+                    "details": {
+                        "selector": 'a[href*="details.php?id="]',
+                        "attribute": "href",
+                    },
+                    "download": {
+                        "selector": 'a[href*="download.php?id="]',
+                        "attribute": "href",
+                    },
+                },
+            },
+        },
+    )
+    html = """
+    <table class="torrents">
+      <tr>
+        <td><table class="torrentname"><tr><td>
+          <a href="details.php?id=1" title="{% if fields['title_optional'] %}{% else %}Release that Witch S01 2026 1080p WEB-DL H264 AAC-HHWEB{% endif %}">Ignored</a>
+        </td></tr></table></td>
+        <td><a href="download.php?id=1">DL</a></td>
+      </tr>
+    </table>
+    """
+
+    torrents = spider.parse(html)
+
+    assert torrents == [{
+        "title": "Release that Witch S01 2026 1080p WEB-DL H264 AAC-HHWEB",
+        "page_url": "https://www.agsvpt.com/details.php?id=1",
+        "enclosure": "https://www.agsvpt.com/download.php?id=1",
+    }]
+
+
 def test_rust_indexer_page_parser_renders_common_description_templates():
    """
    Rust 普通 indexer 页面解析应兼容站点构建项目里的 description 字段模板。