feat: accelerate rss parsing with rust

This commit is contained in:
jxxghp
2026-05-23 16:14:47 +08:00
parent d1e2881347
commit ad38f51d6b
8 changed files with 700 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ from lxml import etree
from app.core.config import settings
from app.helper.browser import PlaywrightHelper
from app.log import logger
from app.utils import rust_accel
from app.utils.http import RequestUtils
from app.utils.string import StringUtils
@@ -298,6 +299,12 @@ class RssHelper:
logger.error("RSS内容不是有效的XML格式")
return False
rust_items = rust_accel.parse_rss_items(ret_xml, self.MAX_RSS_ITEMS + 1)
if rust_items is not None:
if len(rust_items) > self.MAX_RSS_ITEMS:
logger.warning(f"RSS条目过多: 超过{self.MAX_RSS_ITEMS},仅处理前{self.MAX_RSS_ITEMS}")
return rust_items[:self.MAX_RSS_ITEMS]
# 使用lxml.etree解析XML
parser = None
try:

View File

@@ -67,6 +67,20 @@ def parse_indexer_torrents(
return None
def parse_rss_items(xml_text: str, max_items: int = 1000) -> Optional[List[dict]]:
"""
使用 Rust 解析 RSS/Atom 条目,不可用或异常时返回 None。
"""
if not _moviepilot_rust:
return None
try:
return _moviepilot_rust.parse_rss_items_fast(xml_text, max_items)
except BaseException as err:
_raise_non_rust_panic(err)
logger.debug(f"Rust RSS解析失败使用 Python 解析兜底:{err}")
return None
def _raise_non_rust_panic(err: BaseException) -> None:
"""
只吞掉 Rust 扩展 panic/异常,保留用户中断和进程退出语义。

View File

@@ -487,6 +487,7 @@ dependencies = [
"minijinja",
"once_cell",
"pyo3",
"quick-xml",
"regex",
"scraper",
"url",
@@ -693,6 +694,15 @@ dependencies = [
"syn",
]
[[package]]
name = "quick-xml"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
]
[[package]]
name = "quote"
version = "1.0.45"

View File

@@ -12,6 +12,7 @@ minijinja = "2.20"
chrono = "0.4"
once_cell = "1.20"
pyo3 = { version = "0.23", features = ["abi3-py311", "extension-module"] }
quick-xml = "0.38"
regex = "1.11"
scraper = "0.24"
url = "2.5"

View File

@@ -1,5 +1,6 @@
mod filter;
mod indexer;
mod rss;
mod utils;
use pyo3::prelude::*;
@@ -16,5 +17,6 @@ fn moviepilot_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(is_available, m)?)?;
m.add_function(wrap_pyfunction!(filter::parse_filter_rule_fast, m)?)?;
m.add_function(wrap_pyfunction!(indexer::parse_indexer_torrents_fast, m)?)?;
m.add_function(wrap_pyfunction!(rss::parse_rss_items_fast, m)?)?;
Ok(())
}

View File

@@ -0,0 +1,384 @@
use chrono::{DateTime, Local, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
use pyo3::prelude::*;
use pyo3::types::{PyAny, PyDict, PyList};
use quick_xml::events::{BytesRef, BytesStart, Event};
use quick_xml::name::QName;
use quick_xml::Reader;
#[derive(Default)]
struct RssItem {
title: String,
description: String,
link: String,
enclosure: String,
size: i64,
pubdate: String,
nickname: String,
}
#[derive(Clone, Copy)]
enum TextField {
Title,
Description,
Link,
Pubdate,
Nickname,
}
/// 解析 RSS/Atom 文本并返回 MoviePilot 现有调用方兼容的条目字典。
#[pyfunction]
#[pyo3(signature = (xml_text, max_items=1000))]
pub(crate) fn parse_rss_items_fast(
py: Python<'_>,
xml_text: &str,
max_items: usize,
) -> PyResult<Option<PyObject>> {
let parsed = parse_rss_items(xml_text, max_items)?;
let result = PyList::empty(py);
for item in parsed {
result.append(item_to_py(py, &item)?)?;
}
Ok(Some(result.into()))
}
/// 使用 quick-xml 流式读取 RSS/Atom避免 lxml XPath 对每个 item 的重复遍历。
fn parse_rss_items(xml_text: &str, max_items: usize) -> PyResult<Vec<RssItem>> {
let mut reader = Reader::from_str(xml_text);
let mut results = Vec::new();
let mut current_item: Option<RssItem> = None;
let mut item_depth = 0usize;
let mut current_field: Option<(TextField, usize)> = None;
loop {
match reader.read_event() {
Ok(Event::Start(event)) => {
let local = local_name(event.name());
if current_item.is_none() && is_item_node(&local) {
current_item = Some(RssItem::default());
item_depth = 1;
current_field = None;
continue;
}
if let Some(item) = current_item.as_mut() {
item_depth += 1;
handle_start_field(&event, &local, item, item_depth, &mut current_field)?;
}
}
Ok(Event::Empty(event)) => {
let local = local_name(event.name());
if let Some(item) = current_item.as_mut() {
handle_empty_field(&event, &local, item)?;
}
}
Ok(Event::Text(event)) => {
if let (Some(item), Some((field, _))) = (current_item.as_mut(), current_field) {
let text = event.decode().map_err(to_py_value_error)?.to_string();
append_text_field(item, field, &text);
}
}
Ok(Event::CData(event)) => {
if let (Some(item), Some((field, _))) = (current_item.as_mut(), current_field) {
let text = event.decode().map_err(to_py_value_error)?.to_string();
append_text_field(item, field, &text);
}
}
Ok(Event::GeneralRef(event)) => {
if let (Some(item), Some((field, _))) = (current_item.as_mut(), current_field) {
let text = resolve_general_ref(&event)?;
append_text_field(item, field, &text);
}
}
Ok(Event::End(event)) => {
let local = local_name(event.name());
if current_item.is_some() && item_depth == 1 && is_item_node(&local) {
if let Some(item) = current_item.take() {
if let Some(item) = finalize_item(item) {
results.push(item);
if results.len() >= max_items {
break;
}
}
}
item_depth = 0;
current_field = None;
continue;
}
if current_item.is_some() && item_depth > 0 {
if current_field
.map(|(_, depth)| depth == item_depth)
.unwrap_or(false)
{
current_field = None;
}
item_depth = item_depth.saturating_sub(1);
}
}
Ok(Event::Eof) => break,
Err(err) => {
return Err(to_py_value_error(err));
}
_ => {}
}
}
Ok(results)
}
/// 处理开始标签,记录当前需要采集文本的字段和链接属性。
fn handle_start_field(
event: &BytesStart<'_>,
local: &str,
item: &mut RssItem,
depth: usize,
current_field: &mut Option<(TextField, usize)>,
) -> PyResult<()> {
if local == "enclosure" {
fill_enclosure(event, item)?;
return Ok(());
}
if local == "link" {
fill_link_from_href(event, item)?;
}
if current_field.is_none() {
if let Some(field) = pick_text_field(local, item) {
*current_field = Some((field, depth));
}
}
Ok(())
}
/// 处理空标签,覆盖 Atom 的 link href 和 RSS 的 enclosure。
fn handle_empty_field(event: &BytesStart<'_>, local: &str, item: &mut RssItem) -> PyResult<()> {
match local {
"enclosure" => fill_enclosure(event, item)?,
"link" => fill_link_from_href(event, item)?,
_ => {}
}
Ok(())
}
/// 根据标签名和已采集状态选择当前文本字段。
fn pick_text_field(local: &str, item: &RssItem) -> Option<TextField> {
match local {
"title" if item.title.is_empty() => Some(TextField::Title),
"description" | "summary" if item.description.is_empty() => Some(TextField::Description),
"link" if item.link.is_empty() => Some(TextField::Link),
"pubDate" | "published" | "updated" if item.pubdate.is_empty() => Some(TextField::Pubdate),
"creator" if item.nickname.is_empty() => Some(TextField::Nickname),
_ => None,
}
}
/// 追加文本字段内容,兼容 CDATA 和带内联标签的描述。
fn append_text_field(item: &mut RssItem, field: TextField, text: &str) {
if text.is_empty() {
return;
}
match field {
TextField::Title => item.title.push_str(text),
TextField::Description => item.description.push_str(text),
TextField::Link => item.link.push_str(text),
TextField::Pubdate => item.pubdate.push_str(text),
TextField::Nickname => item.nickname.push_str(text),
}
}
/// 解析 XML 通用实体,保留未识别实体的原始文本以便 Python 兜底时可复查。
fn resolve_general_ref(event: &BytesRef<'_>) -> PyResult<String> {
if let Some(value) = event.resolve_char_ref().map_err(to_py_value_error)? {
return Ok(value.to_string());
}
let name = event.decode().map_err(to_py_value_error)?;
let resolved = match name.as_ref() {
"amp" => "&".to_string(),
"lt" => "<".to_string(),
"gt" => ">".to_string(),
"apos" => "'".to_string(),
"quot" => "\"".to_string(),
other => format!("&{other};"),
};
Ok(resolved)
}
/// 从 enclosure 标签读取下载链接和大小。
fn fill_enclosure(event: &BytesStart<'_>, item: &mut RssItem) -> PyResult<()> {
if !item.enclosure.is_empty() {
return Ok(());
}
if let Some(url) = attr_value(event, b"url")? {
item.enclosure = url;
}
if let Some(length) = attr_value(event, b"length")? {
item.size = length.trim().parse::<i64>().unwrap_or(0);
}
Ok(())
}
/// 从 Atom link 的 href 属性读取页面地址。
fn fill_link_from_href(event: &BytesStart<'_>, item: &mut RssItem) -> PyResult<()> {
if !item.link.is_empty() {
return Ok(());
}
if let Some(href) = attr_value(event, b"href")? {
item.link = href;
}
Ok(())
}
/// 读取并反转义指定属性值。
fn attr_value(event: &BytesStart<'_>, name: &[u8]) -> PyResult<Option<String>> {
for attr in event.attributes().with_checks(false) {
let attr = attr.map_err(to_py_value_error)?;
if attr.key.as_ref().eq_ignore_ascii_case(name) {
let value = attr
.decode_and_unescape_value(event.decoder())
.map_err(to_py_value_error)?;
return Ok(Some(value.trim().to_string()));
}
}
Ok(None)
}
/// 完成单条 RSS item 的兼容性整理,保留原 Python 逻辑的跳过条件。
fn finalize_item(mut item: RssItem) -> Option<RssItem> {
item.title = item.title.trim().to_string();
item.description = item.description.trim().to_string();
item.link = item.link.trim().to_string();
item.enclosure = item.enclosure.trim().to_string();
item.pubdate = item.pubdate.trim().to_string();
item.nickname = item.nickname.trim().to_string();
if item.title.is_empty() {
return None;
}
if item.enclosure.is_empty() {
if item.link.is_empty() {
return None;
}
item.enclosure = item.link.clone();
}
Some(item)
}
/// 将 Rust 条目转换为 Python dict字段名保持与 RssHelper.parse 原返回一致。
fn item_to_py(py: Python<'_>, item: &RssItem) -> PyResult<PyObject> {
let dict = PyDict::new(py);
dict.set_item("title", &item.title)?;
dict.set_item("enclosure", &item.enclosure)?;
dict.set_item("size", item.size)?;
dict.set_item("description", &item.description)?;
dict.set_item("link", &item.link)?;
if let Some(timestamp) = parse_pubdate_timestamp(&item.pubdate) {
dict.set_item("pubdate", py_datetime_from_timestamp(py, timestamp)?)?;
} else {
dict.set_item("pubdate", "")?;
}
if !item.nickname.is_empty() {
dict.set_item("nickname", &item.nickname)?;
}
Ok(dict.into())
}
/// 将 Unix 时间戳转换为本地时区 Python datetime匹配原 astimezone(tz=None) 语义。
fn py_datetime_from_timestamp<'py>(py: Python<'py>, timestamp: i64) -> PyResult<Bound<'py, PyAny>> {
let datetime_mod = py.import("datetime")?;
let datetime_cls = datetime_mod.getattr("datetime")?;
let timezone_cls = datetime_mod.getattr("timezone")?;
let utc = timezone_cls.getattr("utc")?;
let utc_dt = datetime_cls.call_method1("fromtimestamp", (timestamp, utc))?;
utc_dt.call_method0("astimezone")
}
/// 解析 RSS/Atom 常见日期格式并返回时间戳。
fn parse_pubdate_timestamp(value: &str) -> Option<i64> {
let trimmed = value.trim();
if trimmed.is_empty() {
return None;
}
if let Ok(datetime) = DateTime::parse_from_rfc2822(trimmed) {
return Some(datetime.timestamp());
}
if let Ok(datetime) = DateTime::parse_from_rfc3339(trimmed) {
return Some(datetime.timestamp());
}
if let Some(timestamp) = parse_utc_suffix_datetime(trimmed) {
return Some(timestamp);
}
parse_local_naive_datetime(trimmed)
}
/// 兼容部分站点输出的 UTC/GMT 文本后缀。
fn parse_utc_suffix_datetime(value: &str) -> Option<i64> {
for suffix in [" UTC", " GMT"] {
let Some(stripped) = value.strip_suffix(suffix) else {
continue;
};
for format in [
"%a, %d %b %Y %H:%M:%S",
"%d %b %Y %H:%M:%S",
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%S",
] {
if let Ok(naive) = NaiveDateTime::parse_from_str(stripped.trim(), format) {
return Some(Utc.from_utc_datetime(&naive).timestamp());
}
}
}
None
}
/// 解析不带时区的日期格式,并按系统本地时区解释。
fn parse_local_naive_datetime(value: &str) -> Option<i64> {
for format in [
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%d %H:%M",
"%Y/%m/%d %H:%M:%S",
"%Y/%m/%d %H:%M",
"%d %b %Y %H:%M:%S",
"%a, %d %b %Y %H:%M:%S",
] {
if let Ok(naive) = NaiveDateTime::parse_from_str(value, format) {
return local_timestamp(naive);
}
}
for format in ["%Y-%m-%d", "%Y/%m/%d", "%d %b %Y"] {
if let Ok(date) = NaiveDate::parse_from_str(value, format) {
return local_timestamp(NaiveDateTime::new(date, NaiveTime::MIN));
}
}
None
}
/// 将本地无时区时间转换为时间戳,处理夏令时歧义时取较早值。
fn local_timestamp(naive: NaiveDateTime) -> Option<i64> {
Local
.from_local_datetime(&naive)
.single()
.or_else(|| Local.from_local_datetime(&naive).earliest())
.map(|datetime| datetime.timestamp())
}
/// 判断当前标签是否为 RSS item 或 Atom entry。
fn is_item_node(local: &str) -> bool {
matches!(local, "item" | "entry")
}
/// 提取 XML 名称的本地部分,用于兼容 dc:creator 这类命名空间字段。
fn local_name(name: QName<'_>) -> String {
let raw = std::str::from_utf8(name.as_ref()).unwrap_or_default();
raw.rsplit_once(':')
.map(|(_, local)| local)
.unwrap_or(raw)
.to_string()
}
/// 将 quick-xml 错误转换为 Python ValueError 交给 Python 包装层判断是否兜底。
fn to_py_value_error<E: std::fmt::Display>(err: E) -> PyErr {
pyo3::exceptions::PyValueError::new_err(err.to_string())
}

View File

@@ -0,0 +1,156 @@
import argparse
import statistics
import sys
import time
from contextlib import contextmanager
from pathlib import Path
from types import SimpleNamespace
PROJECT_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(PROJECT_ROOT))
from app.helper import rss as rss_module
from app.helper.rss import RssHelper
from app.utils import rust_accel
class FakeRequestUtils:
"""
基准测试用 RequestUtils固定返回内存中的 RSS 文本。
"""
xml_text = ""
def __init__(self, **_kwargs):
"""
保持与真实 RequestUtils 构造签名兼容。
"""
def get_res(self, _url):
"""
返回 RssHelper.parse 所需的最小响应对象。
"""
return SimpleNamespace(
status_code=200,
content=self.xml_text.encode("utf-8"),
text=self.xml_text,
apparent_encoding="utf-8",
encoding="utf-8",
)
def build_rss_xml(items: int) -> str:
"""
构造覆盖标题、描述、链接、enclosure、日期和 creator 的 RSS 文本。
"""
rows = []
for index in range(items):
rows.append(f"""
<item>
<title>MoviePilot Benchmark {index}</title>
<description><![CDATA[Benchmark description {index} <b>tag</b>]]></description>
<link>https://example.com/details/{index}</link>
<enclosure url="https://example.com/download/{index}.torrent" length="{1024 + index}" />
<pubDate>Tue, 19 May 2026 08:30:00 GMT</pubDate>
<dc:creator>bench-user-{index}</dc:creator>
</item>
""")
return f"""
<rss xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
{''.join(rows)}
</channel>
</rss>
"""
@contextmanager
def patched_request_utils(xml_text: str):
"""
临时替换 RSS 请求层,让基准覆盖 RssHelper.parse 的实际解析链路。
"""
original_request_utils = rss_module.RequestUtils
FakeRequestUtils.xml_text = xml_text
rss_module.RequestUtils = FakeRequestUtils
try:
yield
finally:
rss_module.RequestUtils = original_request_utils
def disabled_rust_parse(_xml_text: str, _max_items: int = 1000):
"""
关闭 Rust 快路径,用同一条 RssHelper.parse 链路测量 Python lxml 兜底性能。
"""
return None
@contextmanager
def selected_rss_parser(use_rust: bool):
"""
在 Rust 快路径和 Python lxml 解析之间切换,保持请求与编码成本一致。
"""
original_parse = rss_module.rust_accel.parse_rss_items
if not use_rust:
rss_module.rust_accel.parse_rss_items = disabled_rust_parse
try:
yield
finally:
rss_module.rust_accel.parse_rss_items = original_parse
def parse_chain(xml_text: str, use_rust: bool):
"""
执行一次 RssHelper.parse返回解析到的 RSS 条目。
"""
with patched_request_utils(xml_text), selected_rss_parser(use_rust):
return RssHelper().parse("https://example.com/rss")
def measure_chain(xml_text: str, use_rust: bool, loops: int, repeats: int):
"""
多轮测量 RssHelper.parse 平均耗时,并校验每轮解析数量稳定。
"""
samples = []
parsed_count = 0
for _ in range(repeats):
start = time.perf_counter()
for _ in range(loops):
parsed = parse_chain(xml_text, use_rust)
parsed_count = len(parsed)
samples.append((time.perf_counter() - start) * 1000 / loops)
return statistics.median(samples), parsed_count
def parse_args():
"""
解析命令行参数。
"""
parser = argparse.ArgumentParser(description="Benchmark RSS parsing through RssHelper.parse")
parser.add_argument("--items", type=int, default=200, help="RSS item count")
parser.add_argument("--loops", type=int, default=50, help="Loops per repeat")
parser.add_argument("--repeats", type=int, default=5, help="Repeat count")
return parser.parse_args()
def main() -> int:
"""
运行 Rust 与 Python RSS 解析链路基准测试。
"""
args = parse_args()
xml_text = build_rss_xml(args.items)
rust_ms, rust_count = measure_chain(xml_text, use_rust=True, loops=args.loops, repeats=args.repeats)
python_ms, python_count = measure_chain(xml_text, use_rust=False, loops=args.loops, repeats=args.repeats)
speedup = python_ms / rust_ms if rust_ms else 0
print(f"rust_available={rust_accel.is_available()}")
print(f"items={args.items} loops={args.loops} repeats={args.repeats}")
print(f"rust_items={rust_count} python_items={python_count}")
print(f"rust_chain_ms_per_loop={rust_ms:.3f}")
print(f"python_chain_ms_per_loop={python_ms:.3f}")
print(f"speedup={speedup:.2f}x")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,5 +1,10 @@
from datetime import datetime, timezone
from types import SimpleNamespace
import pytest
from app.helper import rss as rss_module
from app.helper.rss import RssHelper
from app.modules.indexer.spider import SiteSpider
from app.schemas.types import MediaType
from app.utils import rust_accel
@@ -29,6 +34,127 @@ def test_rust_filter_rule_parser_handles_parentheses_and_or():
assert result == [[["CNSUB", "and", ["4K", "or", "1080P"]], "and", ["not", "BLU"]]]
def test_rust_rss_parser_extracts_rss_and_atom_items():
"""
Rust RSS解析应覆盖 RSS item、Atom entry、命名空间和日期字段。
"""
xml = """
<root xmlns:dc="http://purl.org/dc/elements/1.1/">
<rss>
<channel>
<item>
<title>Movie &amp; Show</title>
<description><![CDATA[Desc <b>bold</b>]]></description>
<link>https://example.com/details/1</link>
<enclosure url="https://example.com/download/1.torrent" length="123456" />
<pubDate>Tue, 19 May 2026 08:30:00 GMT</pubDate>
<dc:creator>豆瓣用户</dc:creator>
</item>
</channel>
</rss>
<feed>
<entry>
<title>Atom Title</title>
<summary>Atom Summary</summary>
<link href="https://example.com/atom/2" />
<updated>2026-05-19T09:30:00Z</updated>
</entry>
</feed>
</root>
"""
result = rust_accel.parse_rss_items(xml, max_items=100)
assert len(result) == 2
assert result[0]["title"] == "Movie & Show"
assert result[0]["description"] == "Desc <b>bold</b>"
assert result[0]["link"] == "https://example.com/details/1"
assert result[0]["enclosure"] == "https://example.com/download/1.torrent"
assert result[0]["size"] == 123456
assert result[0]["nickname"] == "豆瓣用户"
assert int(result[0]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 8, 30, tzinfo=timezone.utc).timestamp())
assert result[1]["title"] == "Atom Title"
assert result[1]["description"] == "Atom Summary"
assert result[1]["link"] == "https://example.com/atom/2"
assert result[1]["enclosure"] == "https://example.com/atom/2"
assert int(result[1]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 9, 30, tzinfo=timezone.utc).timestamp())
def test_rust_rss_parser_skips_incomplete_items():
"""
Rust RSS解析应保持原逻辑跳过无标题或无链接的条目。
"""
xml = """
<rss>
<channel>
<item><title></title><link>https://example.com/a</link></item>
<item><title>No Link</title></item>
<item><title>OK</title><link>https://example.com/ok</link></item>
</channel>
</rss>
"""
result = rust_accel.parse_rss_items(xml, max_items=100)
assert result == [{
"title": "OK",
"enclosure": "https://example.com/ok",
"size": 0,
"description": "",
"link": "https://example.com/ok",
"pubdate": "",
}]
def test_rss_helper_parse_uses_rust_parser(monkeypatch):
"""
RssHelper.parse 应在请求和编码处理后直接使用 Rust 解析结果。
"""
xml = """
<rss>
<channel>
<item>
<title>Helper Title</title>
<description>Helper Description</description>
<link>https://example.com/details/3</link>
<pubDate>2026-05-19T10:30:00Z</pubDate>
</item>
</channel>
</rss>
"""
class FakeRequestUtils:
"""
测试用 RequestUtils避免真实网络请求。
"""
def __init__(self, **_kwargs):
"""
保存构造参数占位,兼容 RssHelper 的调用方式。
"""
def get_res(self, _url):
"""
返回带 content/text/status_code 的最小响应对象。
"""
return SimpleNamespace(
status_code=200,
content=xml.encode("utf-8"),
text=xml,
apparent_encoding="utf-8",
encoding="utf-8",
)
monkeypatch.setattr(rss_module, "RequestUtils", FakeRequestUtils)
result = RssHelper().parse("https://example.com/rss")
assert len(result) == 1
assert result[0]["title"] == "Helper Title"
assert result[0]["enclosure"] == "https://example.com/details/3"
assert int(result[0]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 10, 30, tzinfo=timezone.utc).timestamp())
def test_rust_indexer_parser_handles_jinja_pyquery_filters_and_links():
"""
Rust indexer 解析应覆盖普通站点配置的 Jinja、PyQuery selector 和过滤器。