From ac3432c54fc408e64fa42945a9800a212326441d Mon Sep 17 00:00:00 2001 From: jxxghp Date: Sun, 24 May 2026 23:32:27 +0800 Subject: [PATCH] feat: support TMDB episode group (g=) in explicit media tags and custom identifiers - Add episode_group (g=) parameter parsing to explicit media tags in both Python and Rust metainfo parsers - Propagate episode_group through MetaInfo, MetaBase, MediaInfo, and context models - Update SKILL.md and update_custom_identifiers.py docs to describe episode group usage - Add tests for episode_group recognition in metainfo and chain recognition logic --- .../tools/impl/update_custom_identifiers.py | 3 +- app/chain/__init__.py | 4 + app/core/meta/metabase.py | 4 + app/core/metainfo.py | 134 ++++++++++-------- app/schemas/context.py | 2 + rust/moviepilot_rust/src/metainfo.rs | 19 ++- skills/generate-identifiers/SKILL.md | 19 ++- tests/test_episode_group_recognition.py | 37 +++++ tests/test_metainfo.py | 28 ++++ tests/test_rust_accel.py | 16 +++ 10 files changed, 203 insertions(+), 63 deletions(-) create mode 100644 tests/test_episode_group_recognition.py diff --git a/app/agent/tools/impl/update_custom_identifiers.py b/app/agent/tools/impl/update_custom_identifiers.py index 886b1bcc..cf4c0892 100644 --- a/app/agent/tools/impl/update_custom_identifiers.py +++ b/app/agent/tools/impl/update_custom_identifiers.py @@ -50,7 +50,8 @@ class UpdateCustomIdentifiersTool(MoviePilotTool): "3) Episode offset: '前定位词 <> 后定位词 >> EP±N'; " "4) Combined: '被替换词 => 替换词 && 前定位词 <> 后定位词 >> EP±N'; " "Lines starting with '#' are comments. " - "The replacement target supports: {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]} for direct TMDB ID matching." + "The replacement target supports: {[tmdbid=xxx;type=movie/tv;g=xxx;s=xxx;e=xxx]} " + "for direct TMDB ID matching; g is an optional TMDB episode group ID for TV recognition." ) require_admin: bool = True args_schema: Type[BaseModel] = UpdateCustomIdentifiersInput diff --git a/app/chain/__init__.py b/app/chain/__init__.py index fb643abd..b774f787 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -582,6 +582,8 @@ class ChainBase(metaclass=ABCMeta): tmdbid = meta.tmdbid if not doubanid and hasattr(meta, "doubanid"): doubanid = meta.doubanid + if not episode_group and hasattr(meta, "episode_group"): + episode_group = meta.episode_group # 有tmdbid时,不使用meta推断的类型(由消歧逻辑决定),也不使用其它ID if tmdbid: doubanid = None @@ -665,6 +667,8 @@ class ChainBase(metaclass=ABCMeta): tmdbid = meta.tmdbid if not doubanid and hasattr(meta, "doubanid"): doubanid = meta.doubanid + if not episode_group and hasattr(meta, "episode_group"): + episode_group = meta.episode_group # 有tmdbid时,不使用meta推断的类型(由消歧逻辑决定),也不使用其它ID if tmdbid: doubanid = None diff --git a/app/core/meta/metabase.py b/app/core/meta/metabase.py index 71937eee..12b1160d 100644 --- a/app/core/meta/metabase.py +++ b/app/core/meta/metabase.py @@ -90,6 +90,7 @@ class MetaBase(object): # 附加信息 tmdbid: int = None doubanid: str = None + episode_group: Optional[str] = None # 帧率信息(纯数值) fps: Optional[int] = None @@ -645,6 +646,9 @@ class MetaBase(object): # doubanid if not self.doubanid and meta.doubanid: self.doubanid = meta.doubanid + # 剧集组 + if not self.episode_group and meta.episode_group: + self.episode_group = meta.episode_group def to_dict(self): """ diff --git a/app/core/metainfo.py b/app/core/metainfo.py index d7ce9998..0eeebd8c 100644 --- a/app/core/metainfo.py +++ b/app/core/metainfo.py @@ -29,6 +29,7 @@ _BRACED_METAINFO_RE = re.compile(r'(?<={\[)[\W\w]+(?=]})') _BRACED_TMDBID_RE = re.compile(r'(?<=tmdbid=)\d+') _BRACED_DOUBANID_RE = re.compile(r'(?<=doubanid=)\d+') _BRACED_TYPE_RE = re.compile(r'(?<=type=)\w+') +_BRACED_EPISODE_GROUP_RE = re.compile(r'(?:^|;)g=([0-9a-fA-F]+)(?=;|$)') _BRACED_BEGIN_SEASON_RE = re.compile(r'(?<=s=)\d+') _BRACED_END_SEASON_RE = re.compile(r'(?<=s=\d+-)\d+') _BRACED_BEGIN_EPISODE_RE = re.compile(r'(?<=e=)\d+') @@ -49,6 +50,7 @@ def _empty_metainfo() -> dict: 'tmdbid': None, 'doubanid': None, 'type': None, + 'episode_group': None, 'begin_season': None, 'end_season': None, 'total_season': None, @@ -70,6 +72,75 @@ def _apply_range_total(metainfo: dict, begin_key: str, end_key: str, total_key: metainfo[total_key] = 1 +def _find_metainfo_python(title: str) -> Tuple[str, dict]: + """ + 使用 Python 解析标题中的显式媒体标签,作为 Rust 入口不可用时的兜底。 + """ + metainfo = _empty_metainfo() + if not title: + return title, metainfo + # 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;g=xxx;s=xxx;e=xxx]} + results = _BRACED_METAINFO_RE.findall(title) + if results: + for result in results: + # 查找tmdbid信息 + tmdbid = _BRACED_TMDBID_RE.search(result) + if tmdbid and tmdbid.group(0).isdigit(): + metainfo['tmdbid'] = tmdbid.group(0) + # 查找豆瓣id信息 + doubanid = _BRACED_DOUBANID_RE.search(result) + if doubanid and doubanid.group(0).isdigit(): + metainfo['doubanid'] = doubanid.group(0) + # 查找媒体类型 + mtype = _BRACED_TYPE_RE.search(result) + if mtype: + media_type = mtype.group(0) + if media_type in ["movie", "movies"]: + metainfo['type'] = MediaType.MOVIE + elif media_type == "tv": + metainfo['type'] = MediaType.TV + # 查找剧集组 + episode_group = _BRACED_EPISODE_GROUP_RE.search(result) + if episode_group: + metainfo['episode_group'] = episode_group.group(1) + # 查找季信息 + begin_season = _BRACED_BEGIN_SEASON_RE.search(result) + if begin_season and begin_season.group(0).isdigit(): + metainfo['begin_season'] = int(begin_season.group(0)) + end_season = _BRACED_END_SEASON_RE.search(result) + if end_season and end_season.group(0).isdigit(): + metainfo['end_season'] = int(end_season.group(0)) + # 查找集信息 + begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result) + if begin_episode and begin_episode.group(0).isdigit(): + metainfo['begin_episode'] = int(begin_episode.group(0)) + end_episode = _BRACED_END_EPISODE_RE.search(result) + if end_episode and end_episode.group(0).isdigit(): + metainfo['end_episode'] = int(end_episode.group(0)) + # 去除title中该部分 + if tmdbid or mtype or episode_group or begin_season or end_season or begin_episode or end_episode: + title = title.replace(f"{{[{result}]}}", '') + + # 支持Emby格式的ID标签;第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。 + tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title) + if tmdb_match: + metainfo['tmdbid'] = tmdb_match.group(1) + title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip() + elif not metainfo['tmdbid']: + # 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb} + for tmdb_re in _EMBY_TMDB_RE_LIST[1:]: + tmdb_match = tmdb_re.search(title) + if tmdb_match: + metainfo['tmdbid'] = tmdb_match.group(1) + title = tmdb_re.sub('', title).strip() + break + + # 计算季集总数 + _apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season') + _apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode') + return title, metainfo + + def _build_meta_info( title: str, subtitle: Optional[str] = None, @@ -109,6 +180,8 @@ def _build_meta_info( meta.doubanid = metainfo['doubanid'] if metainfo.get('type'): meta.type = metainfo['type'] + if metainfo.get('episode_group'): + meta.episode_group = metainfo['episode_group'] if metainfo.get('begin_season'): meta.begin_season = metainfo['begin_season'] if metainfo.get('end_season'): @@ -225,6 +298,7 @@ def _meta_from_rust(parsed: dict) -> Optional[MetaBase]: "apply_words": parsed.get("apply_words") or [], "tmdbid": parsed.get("tmdbid"), "doubanid": parsed.get("doubanid"), + "episode_group": parsed.get("episode_group"), "fps": parsed.get("fps"), } for key, value in fields.items(): @@ -308,62 +382,4 @@ def find_metainfo(title: str) -> Tuple[str, dict]: rust_result = rust_accel.find_metainfo(title) if rust_result: return rust_result["title"], rust_result["metainfo"] - metainfo = _empty_metainfo() - if not title: - return title, metainfo - # 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;s=xxx;e=xxx]} - results = _BRACED_METAINFO_RE.findall(title) - if results: - for result in results: - # 查找tmdbid信息 - tmdbid = _BRACED_TMDBID_RE.search(result) - if tmdbid and tmdbid.group(0).isdigit(): - metainfo['tmdbid'] = tmdbid.group(0) - # 查找豆瓣id信息 - doubanid = _BRACED_DOUBANID_RE.search(result) - if doubanid and doubanid.group(0).isdigit(): - metainfo['doubanid'] = doubanid.group(0) - # 查找媒体类型 - mtype = _BRACED_TYPE_RE.search(result) - if mtype: - media_type = mtype.group(0) - if media_type == "movies": - metainfo['type'] = MediaType.MOVIE - elif media_type == "tv": - metainfo['type'] = MediaType.TV - # 查找季信息 - begin_season = _BRACED_BEGIN_SEASON_RE.search(result) - if begin_season and begin_season.group(0).isdigit(): - metainfo['begin_season'] = int(begin_season.group(0)) - end_season = _BRACED_END_SEASON_RE.search(result) - if end_season and end_season.group(0).isdigit(): - metainfo['end_season'] = int(end_season.group(0)) - # 查找集信息 - begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result) - if begin_episode and begin_episode.group(0).isdigit(): - metainfo['begin_episode'] = int(begin_episode.group(0)) - end_episode = _BRACED_END_EPISODE_RE.search(result) - if end_episode and end_episode.group(0).isdigit(): - metainfo['end_episode'] = int(end_episode.group(0)) - # 去除title中该部分 - if tmdbid or mtype or begin_season or end_season or begin_episode or end_episode: - title = title.replace(f"{{[{result}]}}", '') - - # 支持Emby格式的ID标签;第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。 - tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title) - if tmdb_match: - metainfo['tmdbid'] = tmdb_match.group(1) - title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip() - elif not metainfo['tmdbid']: - # 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb} - for tmdb_re in _EMBY_TMDB_RE_LIST[1:]: - tmdb_match = tmdb_re.search(title) - if tmdb_match: - metainfo['tmdbid'] = tmdb_match.group(1) - title = tmdb_re.sub('', title).strip() - break - - # 计算季集总数 - _apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season') - _apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode') - return title, metainfo + return _find_metainfo_python(title) diff --git a/app/schemas/context.py b/app/schemas/context.py index e4092d0d..27cb1362 100644 --- a/app/schemas/context.py +++ b/app/schemas/context.py @@ -61,6 +61,8 @@ class MetaInfo(BaseModel): web_source: Optional[str] = None # 应用的识别词信息 apply_words: Optional[List[str]] = None + # 剧集组 + episode_group: Optional[str] = None class MediaInfo(BaseModel): diff --git a/rust/moviepilot_rust/src/metainfo.rs b/rust/moviepilot_rust/src/metainfo.rs index 02558a26..a197c7fc 100644 --- a/rust/moviepilot_rust/src/metainfo.rs +++ b/rust/moviepilot_rust/src/metainfo.rs @@ -40,6 +40,8 @@ static BRACED_METAINFO_RE: Lazy = Lazy::new(|| Regex::new(r"\{\[([^\]]+)] static BRACED_TMDBID_RE: Lazy = Lazy::new(|| Regex::new(r"tmdbid=(\d+)").unwrap()); static BRACED_DOUBANID_RE: Lazy = Lazy::new(|| Regex::new(r"doubanid=(\d+)").unwrap()); static BRACED_TYPE_RE: Lazy = Lazy::new(|| Regex::new(r"type=(\w+)").unwrap()); +static BRACED_EPISODE_GROUP_RE: Lazy = + Lazy::new(|| Regex::new(r"(?:^|;)g=([0-9a-fA-F]+)(?:;|$)").unwrap()); static BRACED_BEGIN_SEASON_RE: Lazy = Lazy::new(|| Regex::new(r"s=(\d+)").unwrap()); static BRACED_END_SEASON_RE: Lazy = Lazy::new(|| Regex::new(r"s=\d+-(\d+)").unwrap()); static BRACED_BEGIN_EPISODE_RE: Lazy = Lazy::new(|| Regex::new(r"e=(\d+)").unwrap()); @@ -356,6 +358,7 @@ struct MetaResult { apply_words: Vec, tmdbid: Option, doubanid: Option, + episode_group: Option, fps: Option, subtitle_flag: bool, } @@ -365,6 +368,7 @@ struct ExplicitMetaInfo { tmdbid: Option, doubanid: Option, media_type: Option, + episode_group: Option, begin_season: Option, end_season: Option, total_season: Option, @@ -448,6 +452,7 @@ pub(crate) fn find_metainfo_fast(py: Python<'_>, title: &str) -> PyResult ExplicitMetaInfo { tmdbid: None, doubanid: None, media_type: None, + episode_group: None, begin_season: None, end_season: None, total_season: None, @@ -661,6 +667,9 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo { .captures(&result) .and_then(|cap| cap.get(1)); let mtype = BRACED_TYPE_RE.captures(&result).and_then(|cap| cap.get(1)); + let episode_group = BRACED_EPISODE_GROUP_RE + .captures(&result) + .and_then(|cap| cap.get(1)); let begin_season = BRACED_BEGIN_SEASON_RE .captures(&result) .and_then(|cap| cap.get(1)); @@ -681,11 +690,14 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo { } if let Some(value) = mtype { match value.as_str() { - "movies" => info.media_type = Some(MEDIA_TYPE_MOVIE.to_string()), + "movie" | "movies" => info.media_type = Some(MEDIA_TYPE_MOVIE.to_string()), "tv" => info.media_type = Some(MEDIA_TYPE_TV.to_string()), _ => {} } } + if let Some(value) = episode_group { + info.episode_group = Some(value.as_str().to_string()); + } if let Some(value) = begin_season { info.begin_season = value.as_str().parse::().ok(); } @@ -700,6 +712,7 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo { } if tmdbid.is_some() || mtype.is_some() + || episode_group.is_some() || begin_season.is_some() || end_season.is_some() || begin_episode.is_some() @@ -769,6 +782,9 @@ fn apply_explicit_metainfo(meta: &mut MetaResult, explicit: &ExplicitMetaInfo) { if let Some(value) = explicit.doubanid.as_ref() { meta.doubanid = Some(value.clone()); } + if let Some(value) = explicit.episode_group.as_ref() { + meta.episode_group = Some(value.clone()); + } if let Some(value) = explicit.media_type.as_ref() { meta.media_type = value.clone(); } @@ -3125,6 +3141,7 @@ fn meta_to_py(py: Python<'_>, meta: &MetaResult) -> PyResult { dict.set_item("apply_words", &meta.apply_words)?; dict.set_item("tmdbid", meta.tmdbid)?; dict.set_item("doubanid", &meta.doubanid)?; + dict.set_item("episode_group", &meta.episode_group)?; dict.set_item("fps", meta.fps)?; Ok(dict.into()) } diff --git a/skills/generate-identifiers/SKILL.md b/skills/generate-identifiers/SKILL.md index 89a8d646..434e68f6 100644 --- a/skills/generate-identifiers/SKILL.md +++ b/skills/generate-identifiers/SKILL.md @@ -12,7 +12,8 @@ description: >- 1) A torrent or file name is incorrectly recognized (wrong title, season, episode, etc.); 2) The user wants to block unwanted keywords from torrent names; 3) The user needs episode offset rules for series with non-standard numbering; - 4) The user wants to force recognition of a specific media by TMDB/Douban ID. + 4) The user wants to force recognition of a specific media by TMDB/Douban ID; + 5) The user wants TV recognition to use a specific TMDB episode group. allowed-tools: query_custom_identifiers update_custom_identifiers recognize_media --- @@ -54,7 +55,11 @@ Regex substitution. The left side is a regex pattern, the right side is the repl 被替换词 => {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]} 被替换词 => {[doubanid=xxx;type=movie/tv;s=xxx;e=xxx]} ``` -Where `s` (season) and `e` (episode) are optional. +Where `s` (season) and `e` (episode) are optional. For TMDB TV recognition, add `g=xxx` to specify an episode group: + +``` +被替换词 => {[tmdbid=xxx;type=tv;g=xxx;s=xxx;e=xxx]} +``` ### 3. Episode Offset (集偏移) @@ -225,6 +230,16 @@ Tell the user: Some\.Weird\.Name(?:\.S01E\d+)?(?:\.1080p)? => {[tmdbid=12345;type=tv;s=1]} ``` +### Force TMDB Episode Group Recognition + +**User**: "种子名 `Some.Weird.Name.S01E01.1080p.mkv`,这是按 TMDB 剧集组 `5ad0ec240e0a26303f00d84d` 排序的电视剧" + +**Solution**: Direct TMDB ID specification with `g=...`: +``` +# 仅在 Some.Weird.Name 命名模式下绑定 TMDB ID 12345 并指定剧集组 +Some\.Weird\.Name(?:\.S01E\d+)?(?:\.1080p)? => {[tmdbid=12345;type=tv;g=5ad0ec240e0a26303f00d84d;s=1]} +``` + ### Combined Fix **User**: "种子名 `[Baha][OldTitle][13][1080P]`,标题应该是NewTitle,而且13应该是第二季第1集" diff --git a/tests/test_episode_group_recognition.py b/tests/test_episode_group_recognition.py new file mode 100644 index 00000000..27321d72 --- /dev/null +++ b/tests/test_episode_group_recognition.py @@ -0,0 +1,37 @@ +import sys +from types import ModuleType +from unittest.mock import patch + +sys.modules.setdefault("qbittorrentapi", ModuleType("qbittorrentapi")) +setattr(sys.modules["qbittorrentapi"], "TorrentFilesList", list) +sys.modules.setdefault("transmission_rpc", ModuleType("transmission_rpc")) +setattr(sys.modules["transmission_rpc"], "File", object) +sys.modules.setdefault("psutil", ModuleType("psutil")) + +from app.chain import ChainBase +from app.core.context import MediaInfo +from app.core.meta import MetaBase +from app.schemas.types import MediaType + + +def test_recognize_media_uses_meta_episode_group(): + """ + 识别链未显式传 episode_group 时,应沿用元数据中识别出的剧集组。 + """ + group_id = "5ad0ec240e0a26303f00d84d" + chain = ChainBase() + meta = MetaBase("测试剧集") + meta.name = "测试剧集" + meta.type = MediaType.TV + meta.episode_group = group_id + mediainfo = MediaInfo(title="测试剧集", year="2024", tmdb_id=100, type=MediaType.TV) + + with patch.object(chain, "run_module", return_value=mediainfo) as run_module, patch( + "app.chain.MediaRecognizeShareHelper.report", + return_value=True, + ), patch("app.chain.MediaRecognizeShareHelper.query") as query_mock: + result = chain.recognize_media(meta=meta, cache=False) + + assert result is mediainfo + assert run_module.call_args.kwargs["episode_group"] == group_id + query_mock.assert_not_called() diff --git a/tests/test_metainfo.py b/tests/test_metainfo.py index ed985f80..65510565 100644 --- a/tests/test_metainfo.py +++ b/tests/test_metainfo.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from pathlib import Path from unittest import TestCase +from unittest.mock import patch from app.core.metainfo import MetaInfo, MetaInfoPath, find_metainfo from tests.cases.meta import meta_cases @@ -132,6 +133,33 @@ class MetaInfoTest(TestCase): self.assertEqual(meta.episode, "E04") self.assertEqual(meta.apply_words, custom_words) + def test_custom_words_support_episode_group_parameter(self): + """测试自定义识别词替换结果中的 g 参数会写入剧集组""" + group_id = "5ad0ec240e0a26303f00d84d" + custom_words = [ + f"Bakemonogatari => 物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}}" + ] + meta = MetaInfo(title="Bakemonogatari 01", custom_words=custom_words) + self.assertEqual(meta.tmdbid, 46195) + self.assertEqual(meta.type.value, "电视剧") + self.assertEqual(meta.begin_season, 1) + self.assertEqual(meta.episode_group, group_id) + self.assertEqual(meta.apply_words, custom_words) + + def test_find_metainfo_supports_episode_group_parameter(self): + """测试显式媒体标签支持 g 剧集组参数""" + group_id = "5ad0ec240e0a26303f00d84d" + title, metainfo = find_metainfo(f"物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}}") + self.assertEqual(metainfo["episode_group"], group_id) + self.assertNotIn("g=", title) + + def test_find_metainfo_does_not_support_episode_group_alias(self): + """测试 e_group 不会被当作剧集组参数识别""" + group_id = "5ad0ec240e0a26303f00d84d" + with patch("app.core.metainfo.rust_accel.find_metainfo", return_value=None): + _, metainfo = find_metainfo(f"物语系列 {{[tmdbid=46195;type=tv;e_group={group_id};s=1]}}") + self.assertIsNone(metainfo["episode_group"]) + def test_video_bit_extracted_for_video_title(self): """测试普通影视标题中的视频位深可单独识别""" meta = MetaInfo(title="The 355 2022 BluRay 1080p DTS-HD MA5.1 X265.10bit-BeiTai") diff --git a/tests/test_rust_accel.py b/tests/test_rust_accel.py index f869f92e..8c5d19fc 100644 --- a/tests/test_rust_accel.py +++ b/tests/test_rust_accel.py @@ -223,6 +223,22 @@ def test_rust_metainfo_parser_handles_anime_from_entry(): assert result["audio_encode"] == "AAC" +def test_rust_metainfo_parser_handles_episode_group(): + """ + Rust MetaInfo 入口应识别显式媒体标签中的 g 剧集组参数。 + """ + group_id = "5ad0ec240e0a26303f00d84d" + result = rust_accel.parse_metainfo( + f"物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}} 01", + options=_metainfo_options(), + ) + + assert result["tmdbid"] == 46195 + assert result["type"] == MediaType.TV.value + assert result["episode_group"] == group_id + assert result["begin_season"] == 1 + + def test_rust_metainfo_path_parser_merges_parent_title(): """ Rust MetaInfoPath 入口应在 Rust 内完成父目录标题合并。