mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-07 07:26:50 +00:00
feat: accelerate metainfo parsing with rust
This commit is contained in:
@@ -12,6 +12,7 @@ from app.core.meta.infopath import (
|
||||
from app.core.meta.words import WordsMatcher
|
||||
from app.log import logger
|
||||
from app.schemas.types import MediaType
|
||||
from app.utils import rust_accel
|
||||
|
||||
|
||||
_ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE)
|
||||
@@ -122,6 +123,87 @@ def _build_meta_info(
|
||||
return meta
|
||||
|
||||
|
||||
def _rust_parse_options(custom_words: List[str] = None) -> dict:
|
||||
"""
|
||||
收集 Rust Meta 解析所需的运行时配置,避免 Rust 层直接访问数据库和 settings。
|
||||
"""
|
||||
from app.core.meta.customization import CustomizationMatcher
|
||||
from app.core.meta.releasegroup import ReleaseGroupsMatcher
|
||||
from app.db.systemconfig_oper import SystemConfigOper
|
||||
from app.schemas.types import SystemConfigKey
|
||||
|
||||
systemconfig = SystemConfigOper()
|
||||
custom_release_groups = systemconfig.get(SystemConfigKey.CustomReleaseGroups)
|
||||
if isinstance(custom_release_groups, list):
|
||||
custom_release_groups = list(filter(None, custom_release_groups))
|
||||
release_matcher = ReleaseGroupsMatcher()
|
||||
release_groups = release_matcher._ReleaseGroupsMatcher__release_groups
|
||||
if custom_release_groups:
|
||||
release_groups = f"{release_groups}|{'|'.join(custom_release_groups)}"
|
||||
|
||||
customization = CustomizationMatcher._normalize_customization(
|
||||
systemconfig.get(SystemConfigKey.Customization)
|
||||
)
|
||||
words = custom_words
|
||||
if words is None:
|
||||
words = systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
|
||||
return {
|
||||
"custom_words": words or [],
|
||||
"media_exts": settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT,
|
||||
"release_groups": release_groups,
|
||||
"customization": customization,
|
||||
}
|
||||
|
||||
|
||||
def _meta_from_rust(parsed: dict) -> Optional[MetaBase]:
|
||||
"""
|
||||
将 Rust 解析结果灌回现有 MetaVideo/MetaAnime 对象,保留下游属性和方法兼容性。
|
||||
"""
|
||||
if not parsed:
|
||||
return None
|
||||
meta = MetaAnime("") if parsed.get("kind") == "anime" else MetaVideo("")
|
||||
type_map = {
|
||||
MediaType.MOVIE.value: MediaType.MOVIE,
|
||||
MediaType.TV.value: MediaType.TV,
|
||||
MediaType.COLLECTION.value: MediaType.COLLECTION,
|
||||
MediaType.UNKNOWN.value: MediaType.UNKNOWN,
|
||||
}
|
||||
fields = {
|
||||
"isfile": parsed.get("isfile") or False,
|
||||
"title": parsed.get("title") or "",
|
||||
"org_string": parsed.get("org_string"),
|
||||
"subtitle": parsed.get("subtitle"),
|
||||
"type": type_map.get(parsed.get("type"), MediaType.UNKNOWN),
|
||||
"cn_name": parsed.get("cn_name"),
|
||||
"en_name": parsed.get("en_name"),
|
||||
"original_name": parsed.get("original_name"),
|
||||
"year": parsed.get("year"),
|
||||
"total_season": parsed.get("total_season") or 0,
|
||||
"begin_season": parsed.get("begin_season"),
|
||||
"end_season": parsed.get("end_season"),
|
||||
"total_episode": parsed.get("total_episode") or 0,
|
||||
"begin_episode": parsed.get("begin_episode"),
|
||||
"end_episode": parsed.get("end_episode"),
|
||||
"part": parsed.get("part"),
|
||||
"resource_type": parsed.get("resource_type"),
|
||||
"resource_effect": parsed.get("resource_effect"),
|
||||
"resource_pix": parsed.get("resource_pix"),
|
||||
"resource_team": parsed.get("resource_team"),
|
||||
"customization": parsed.get("customization"),
|
||||
"web_source": parsed.get("web_source"),
|
||||
"video_encode": parsed.get("video_encode"),
|
||||
"video_bit": parsed.get("video_bit"),
|
||||
"audio_encode": parsed.get("audio_encode"),
|
||||
"apply_words": parsed.get("apply_words") or [],
|
||||
"tmdbid": parsed.get("tmdbid"),
|
||||
"doubanid": parsed.get("doubanid"),
|
||||
"fps": parsed.get("fps"),
|
||||
}
|
||||
for key, value in fields.items():
|
||||
setattr(meta, key, value)
|
||||
return meta
|
||||
|
||||
|
||||
def MetaInfo(title: str, subtitle: Optional[str] = None, custom_words: List[str] = None) -> MetaBase:
|
||||
"""
|
||||
根据标题和副标题识别元数据
|
||||
@@ -130,6 +212,11 @@ def MetaInfo(title: str, subtitle: Optional[str] = None, custom_words: List[str]
|
||||
:param custom_words: 自定义识别词列表
|
||||
:return: MetaAnime、MetaVideo
|
||||
"""
|
||||
rust_meta = _meta_from_rust(
|
||||
rust_accel.parse_metainfo(title, subtitle, _rust_parse_options(custom_words))
|
||||
)
|
||||
if rust_meta:
|
||||
return rust_meta
|
||||
meta = _build_meta_info(title=title, subtitle=subtitle, custom_words=custom_words)
|
||||
if meta.apply_words:
|
||||
original_meta = _build_meta_info(title=title, subtitle=subtitle)
|
||||
@@ -145,6 +232,11 @@ def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase:
|
||||
:param path: 路径
|
||||
:param custom_words: 自定义识别词列表
|
||||
"""
|
||||
rust_meta = _meta_from_rust(
|
||||
rust_accel.parse_metainfo_path(str(path), _rust_parse_options(custom_words))
|
||||
)
|
||||
if rust_meta:
|
||||
return rust_meta
|
||||
# 文件元数据,不包含后缀
|
||||
file_meta = MetaInfo(title=path.name, custom_words=custom_words)
|
||||
if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta):
|
||||
@@ -185,6 +277,9 @@ def find_metainfo(title: str) -> Tuple[str, dict]:
|
||||
"""
|
||||
从标题中提取媒体信息
|
||||
"""
|
||||
rust_result = rust_accel.find_metainfo(title)
|
||||
if rust_result:
|
||||
return rust_result["title"], rust_result["metainfo"]
|
||||
metainfo = _empty_metainfo()
|
||||
if not title:
|
||||
return title, metainfo
|
||||
|
||||
@@ -81,6 +81,48 @@ def parse_rss_items(xml_text: str, max_items: int = 1000) -> Optional[List[dict]
|
||||
return None
|
||||
|
||||
|
||||
def parse_metainfo(title: str, subtitle: Optional[str] = None, options: Optional[dict] = None) -> Optional[dict]:
|
||||
"""
|
||||
使用 Rust 从标题入口解析 MetaInfo,不可用或异常时返回 None。
|
||||
"""
|
||||
if not _moviepilot_rust:
|
||||
return None
|
||||
try:
|
||||
return _moviepilot_rust.parse_metainfo_fast(title, subtitle, options or {})
|
||||
except BaseException as err:
|
||||
_raise_non_rust_panic(err)
|
||||
logger.debug(f"Rust MetaInfo解析失败,使用 Python 解析兜底:{err}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_metainfo_path(path: str, options: Optional[dict] = None) -> Optional[dict]:
|
||||
"""
|
||||
使用 Rust 从路径入口解析 MetaInfoPath,不可用或异常时返回 None。
|
||||
"""
|
||||
if not _moviepilot_rust:
|
||||
return None
|
||||
try:
|
||||
return _moviepilot_rust.parse_metainfo_path_fast(path, options or {})
|
||||
except BaseException as err:
|
||||
_raise_non_rust_panic(err)
|
||||
logger.debug(f"Rust MetaInfoPath解析失败,使用 Python 解析兜底:{err}")
|
||||
return None
|
||||
|
||||
|
||||
def find_metainfo(title: str) -> Optional[dict]:
|
||||
"""
|
||||
使用 Rust 提取标题中的显式媒体标签,不可用或异常时返回 None。
|
||||
"""
|
||||
if not _moviepilot_rust:
|
||||
return None
|
||||
try:
|
||||
return _moviepilot_rust.find_metainfo_fast(title)
|
||||
except BaseException as err:
|
||||
_raise_non_rust_panic(err)
|
||||
logger.debug(f"Rust 显式媒体标签解析失败,使用 Python 解析兜底:{err}")
|
||||
return None
|
||||
|
||||
|
||||
def _raise_non_rust_panic(err: BaseException) -> None:
|
||||
"""
|
||||
只吞掉 Rust 扩展 panic/异常,保留用户中断和进程退出语义。
|
||||
|
||||
24
rust/moviepilot_rust/Cargo.lock
generated
24
rust/moviepilot_rust/Cargo.lock
generated
@@ -20,6 +20,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anitomy-pure"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33f525032668d2aff5dff115958157db7aecf1dc2fd5f5df93cf1be1452dfd4a"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.5.0"
|
||||
@@ -170,6 +176,12 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fst"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a"
|
||||
|
||||
[[package]]
|
||||
name = "futf"
|
||||
version = "0.1.5"
|
||||
@@ -375,6 +387,16 @@ dependencies = [
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inputx-pinyin"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fa1bf26d5923bb73e34c63e7ef21fc976c37e016ea13872045bf72335f0a43c"
|
||||
dependencies = [
|
||||
"fst",
|
||||
"phf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.18"
|
||||
@@ -483,7 +505,9 @@ dependencies = [
|
||||
name = "moviepilot-rust"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anitomy-pure",
|
||||
"chrono",
|
||||
"inputx-pinyin",
|
||||
"minijinja",
|
||||
"once_cell",
|
||||
"pyo3",
|
||||
|
||||
@@ -8,6 +8,8 @@ name = "moviepilot_rust"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
anitomy-pure = "0.1"
|
||||
inputx-pinyin = "1.0.2"
|
||||
minijinja = "2.20"
|
||||
chrono = "0.4"
|
||||
once_cell = "1.20"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
mod filter;
|
||||
mod indexer;
|
||||
mod metainfo;
|
||||
mod rss;
|
||||
mod utils;
|
||||
|
||||
@@ -17,6 +18,9 @@ fn moviepilot_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(is_available, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(filter::parse_filter_rule_fast, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(indexer::parse_indexer_torrents_fast, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(metainfo::parse_metainfo_fast, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(metainfo::parse_metainfo_path_fast, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(metainfo::find_metainfo_fast, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(rss::parse_rss_items_fast, m)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
3244
rust/moviepilot_rust/src/metainfo.rs
Normal file
3244
rust/moviepilot_rust/src/metainfo.rs
Normal file
File diff suppressed because it is too large
Load Diff
116
scripts/benchmark_metainfo_rust.py
Normal file
116
scripts/benchmark_metainfo_rust.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import argparse
|
||||
import statistics
|
||||
import sys
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
from app.core import metainfo as metainfo_module
|
||||
from app.core.metainfo import MetaInfo, MetaInfoPath
|
||||
from tests.cases.meta import meta_cases
|
||||
|
||||
|
||||
def build_inputs(repeat: int):
|
||||
"""
|
||||
构造覆盖 MetaInfo 和 MetaInfoPath 的基准输入。
|
||||
"""
|
||||
inputs = []
|
||||
for _ in range(repeat):
|
||||
for item in meta_cases:
|
||||
if item.get("path"):
|
||||
inputs.append(("path", item["path"], item.get("subtitle")))
|
||||
else:
|
||||
inputs.append(("title", item["title"], item.get("subtitle")))
|
||||
return inputs
|
||||
|
||||
|
||||
def disabled_rust_parse(*_args, **_kwargs):
|
||||
"""
|
||||
关闭 Rust MetaInfo 快路径,用于测量旧 Python 链路。
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def selected_meta_parser(use_rust: bool):
|
||||
"""
|
||||
在 Rust 入口和 Python 旧实现之间切换。
|
||||
"""
|
||||
original_parse = metainfo_module.rust_accel.parse_metainfo
|
||||
original_parse_path = metainfo_module.rust_accel.parse_metainfo_path
|
||||
original_find = metainfo_module.rust_accel.find_metainfo
|
||||
if not use_rust:
|
||||
metainfo_module.rust_accel.parse_metainfo = disabled_rust_parse
|
||||
metainfo_module.rust_accel.parse_metainfo_path = disabled_rust_parse
|
||||
metainfo_module.rust_accel.find_metainfo = disabled_rust_parse
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
metainfo_module.rust_accel.parse_metainfo = original_parse
|
||||
metainfo_module.rust_accel.parse_metainfo_path = original_parse_path
|
||||
metainfo_module.rust_accel.find_metainfo = original_find
|
||||
|
||||
|
||||
def parse_all(inputs, use_rust: bool):
|
||||
"""
|
||||
执行一轮完整 MetaInfo/MetaInfoPath 入口解析。
|
||||
"""
|
||||
with selected_meta_parser(use_rust):
|
||||
parsed = []
|
||||
for kind, value, subtitle in inputs:
|
||||
if kind == "path":
|
||||
parsed.append(MetaInfoPath(Path(value)))
|
||||
else:
|
||||
parsed.append(MetaInfo(title=value, subtitle=subtitle, custom_words=["#"]))
|
||||
return parsed
|
||||
|
||||
|
||||
def measure(inputs, use_rust: bool, loops: int, repeats: int):
|
||||
"""
|
||||
多轮测量 MetaInfo 入口解析耗时。
|
||||
"""
|
||||
samples = []
|
||||
parsed_count = 0
|
||||
for _ in range(repeats):
|
||||
start = time.perf_counter()
|
||||
for _ in range(loops):
|
||||
parsed = parse_all(inputs, use_rust)
|
||||
parsed_count = len(parsed)
|
||||
samples.append((time.perf_counter() - start) * 1000 / loops)
|
||||
return statistics.median(samples), parsed_count
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""
|
||||
解析命令行参数。
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Benchmark MetaInfo parsing through public entries")
|
||||
parser.add_argument("--repeat-inputs", type=int, default=20, help="Repeat meta cases per loop")
|
||||
parser.add_argument("--loops", type=int, default=10, help="Loops per repeat")
|
||||
parser.add_argument("--repeats", type=int, default=5, help="Repeat count")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""
|
||||
运行 MetaInfo Rust 与 Python 入口链路基准测试。
|
||||
"""
|
||||
args = parse_args()
|
||||
inputs = build_inputs(args.repeat_inputs)
|
||||
rust_ms, rust_count = measure(inputs, use_rust=True, loops=args.loops, repeats=args.repeats)
|
||||
python_ms, python_count = measure(inputs, use_rust=False, loops=args.loops, repeats=args.repeats)
|
||||
speedup = python_ms / rust_ms if rust_ms else 0
|
||||
|
||||
print(f"items_per_loop={len(inputs)} loops={args.loops} repeats={args.repeats}")
|
||||
print(f"rust_items={rust_count} python_items={python_count}")
|
||||
print(f"rust_chain_ms_per_loop={rust_ms:.3f}")
|
||||
print(f"python_chain_ms_per_loop={python_ms:.3f}")
|
||||
print(f"speedup={speedup:.2f}x")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -5,7 +5,13 @@ import pytest
|
||||
|
||||
from app.helper import rss as rss_module
|
||||
from app.helper.rss import RssHelper
|
||||
from app.core import metainfo as metainfo_module
|
||||
from app.core.config import settings
|
||||
from app.core.meta.customization import CustomizationMatcher
|
||||
from app.core.meta.releasegroup import ReleaseGroupsMatcher
|
||||
from app.db.systemconfig_oper import SystemConfigOper
|
||||
from app.modules.indexer.spider import SiteSpider
|
||||
from app.schemas.types import SystemConfigKey
|
||||
from app.schemas.types import MediaType
|
||||
from app.utils import rust_accel
|
||||
|
||||
@@ -155,6 +161,107 @@ def test_rss_helper_parse_uses_rust_parser(monkeypatch):
|
||||
assert int(result[0]["pubdate"].timestamp()) == int(datetime(2026, 5, 19, 10, 30, tzinfo=timezone.utc).timestamp())
|
||||
|
||||
|
||||
def _metainfo_options(custom_words=None):
|
||||
"""
|
||||
构造 Rust MetaInfo 测试所需的配置,保持和生产入口一致。
|
||||
"""
|
||||
systemconfig = SystemConfigOper()
|
||||
custom_release_groups = systemconfig.get(SystemConfigKey.CustomReleaseGroups)
|
||||
if isinstance(custom_release_groups, list):
|
||||
custom_release_groups = list(filter(None, custom_release_groups))
|
||||
release_groups = ReleaseGroupsMatcher()._ReleaseGroupsMatcher__release_groups
|
||||
if custom_release_groups:
|
||||
release_groups = f"{release_groups}|{'|'.join(custom_release_groups)}"
|
||||
customization = CustomizationMatcher._normalize_customization(
|
||||
systemconfig.get(SystemConfigKey.Customization)
|
||||
)
|
||||
return {
|
||||
"custom_words": custom_words or [],
|
||||
"media_exts": settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT,
|
||||
"release_groups": release_groups,
|
||||
"customization": customization,
|
||||
}
|
||||
|
||||
|
||||
def test_rust_metainfo_parser_handles_video_from_entry():
|
||||
"""
|
||||
Rust MetaInfo 入口应完整识别普通影视标题。
|
||||
"""
|
||||
result = rust_accel.parse_metainfo(
|
||||
"The Long Season 2017 2160p WEB-DL H265 120FPS AAC-XXX",
|
||||
options=_metainfo_options(),
|
||||
)
|
||||
|
||||
assert result["kind"] == "video"
|
||||
assert result["type"] == "未知"
|
||||
assert result["en_name"] == "The Long Season"
|
||||
assert result["year"] == "2017"
|
||||
assert result["resource_type"] == "WEB-DL"
|
||||
assert result["resource_pix"] == "2160p"
|
||||
assert result["video_encode"] == "H265"
|
||||
assert result["audio_encode"] == "AAC"
|
||||
assert result["fps"] == 120
|
||||
|
||||
|
||||
def test_rust_metainfo_parser_handles_anime_from_entry():
|
||||
"""
|
||||
Rust MetaInfo 入口应完整识别 Anime 标题。
|
||||
"""
|
||||
result = rust_accel.parse_metainfo(
|
||||
"[ANi] OVERLORD 第四季 - 04 [1080P][Baha][WEB-DL][AAC AVC][CHT].mp4",
|
||||
options=_metainfo_options(),
|
||||
)
|
||||
|
||||
assert result["kind"] == "anime"
|
||||
assert result["type"] == "电视剧"
|
||||
assert result["en_name"] == "Overlord"
|
||||
assert result["begin_season"] == 4
|
||||
assert result["begin_episode"] == 4
|
||||
assert result["resource_pix"] == "1080p"
|
||||
assert result["video_encode"] == "AVC"
|
||||
assert result["audio_encode"] == "AAC"
|
||||
|
||||
|
||||
def test_rust_metainfo_path_parser_merges_parent_title():
|
||||
"""
|
||||
Rust MetaInfoPath 入口应在 Rust 内完成父目录标题合并。
|
||||
"""
|
||||
result = rust_accel.parse_metainfo_path(
|
||||
"/Marty Supreme 2025 2160p DoVi HDR Atmos TrueHD 7.1 x265-PbK/简英双语特效.mp4",
|
||||
options=_metainfo_options(),
|
||||
)
|
||||
|
||||
assert result["kind"] == "video"
|
||||
assert result["en_name"] == "Marty Supreme"
|
||||
assert result["year"] == "2025"
|
||||
assert result["original_name"] == "Marty Supreme"
|
||||
assert result["resource_pix"] == "2160p"
|
||||
|
||||
|
||||
def test_metainfo_public_entry_uses_rust(monkeypatch):
|
||||
"""
|
||||
MetaInfo 公共入口应调用 Rust 解析器,而不是直接进入 Python 旧解析逻辑。
|
||||
"""
|
||||
calls = []
|
||||
original_parse = metainfo_module.rust_accel.parse_metainfo
|
||||
|
||||
def wrapped_parse(*args, **kwargs):
|
||||
"""
|
||||
记录 Rust 入口调用并透传结果。
|
||||
"""
|
||||
calls.append(args[0])
|
||||
return original_parse(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(metainfo_module.rust_accel, "parse_metainfo", wrapped_parse)
|
||||
|
||||
meta = metainfo_module.MetaInfo("旧名 第03集", custom_words=["旧名 => 新名 && 第 <> 集 >> EP+1"])
|
||||
|
||||
assert calls == ["旧名 第03集"]
|
||||
assert meta.name == "新名"
|
||||
assert meta.episode == "E04"
|
||||
assert meta.apply_words == ["旧名 => 新名 && 第 <> 集 >> EP+1"]
|
||||
|
||||
|
||||
def test_rust_indexer_parser_handles_jinja_pyquery_filters_and_links():
|
||||
"""
|
||||
Rust indexer 解析应覆盖普通站点配置的 Jinja、PyQuery selector 和过滤器。
|
||||
|
||||
Reference in New Issue
Block a user