feat: support TMDB episode group (g=) in explicit media tags and custom identifiers

- Add episode_group (g=) parameter parsing to explicit media tags in both Python and Rust metainfo parsers
- Propagate episode_group through MetaInfo, MetaBase, MediaInfo, and context models
- Update SKILL.md and update_custom_identifiers.py docs to describe episode group usage
- Add tests for episode_group recognition in metainfo and chain recognition logic
This commit is contained in:
jxxghp
2026-05-24 23:32:27 +08:00
parent ea52537423
commit ac3432c54f
10 changed files with 203 additions and 63 deletions

View File

@@ -50,7 +50,8 @@ class UpdateCustomIdentifiersTool(MoviePilotTool):
"3) Episode offset: '前定位词 <> 后定位词 >> EP±N'; "
"4) Combined: '被替换词 => 替换词 && 前定位词 <> 后定位词 >> EP±N'; "
"Lines starting with '#' are comments. "
"The replacement target supports: {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]} for direct TMDB ID matching."
"The replacement target supports: {[tmdbid=xxx;type=movie/tv;g=xxx;s=xxx;e=xxx]} "
"for direct TMDB ID matching; g is an optional TMDB episode group ID for TV recognition."
)
require_admin: bool = True
args_schema: Type[BaseModel] = UpdateCustomIdentifiersInput

View File

@@ -582,6 +582,8 @@ class ChainBase(metaclass=ABCMeta):
tmdbid = meta.tmdbid
if not doubanid and hasattr(meta, "doubanid"):
doubanid = meta.doubanid
if not episode_group and hasattr(meta, "episode_group"):
episode_group = meta.episode_group
# 有tmdbid时不使用meta推断的类型由消歧逻辑决定也不使用其它ID
if tmdbid:
doubanid = None
@@ -665,6 +667,8 @@ class ChainBase(metaclass=ABCMeta):
tmdbid = meta.tmdbid
if not doubanid and hasattr(meta, "doubanid"):
doubanid = meta.doubanid
if not episode_group and hasattr(meta, "episode_group"):
episode_group = meta.episode_group
# 有tmdbid时不使用meta推断的类型由消歧逻辑决定也不使用其它ID
if tmdbid:
doubanid = None

View File

@@ -90,6 +90,7 @@ class MetaBase(object):
# 附加信息
tmdbid: int = None
doubanid: str = None
episode_group: Optional[str] = None
# 帧率信息(纯数值)
fps: Optional[int] = None
@@ -645,6 +646,9 @@ class MetaBase(object):
# doubanid
if not self.doubanid and meta.doubanid:
self.doubanid = meta.doubanid
# 剧集组
if not self.episode_group and meta.episode_group:
self.episode_group = meta.episode_group
def to_dict(self):
"""

View File

@@ -29,6 +29,7 @@ _BRACED_METAINFO_RE = re.compile(r'(?<={\[)[\W\w]+(?=]})')
_BRACED_TMDBID_RE = re.compile(r'(?<=tmdbid=)\d+')
_BRACED_DOUBANID_RE = re.compile(r'(?<=doubanid=)\d+')
_BRACED_TYPE_RE = re.compile(r'(?<=type=)\w+')
_BRACED_EPISODE_GROUP_RE = re.compile(r'(?:^|;)g=([0-9a-fA-F]+)(?=;|$)')
_BRACED_BEGIN_SEASON_RE = re.compile(r'(?<=s=)\d+')
_BRACED_END_SEASON_RE = re.compile(r'(?<=s=\d+-)\d+')
_BRACED_BEGIN_EPISODE_RE = re.compile(r'(?<=e=)\d+')
@@ -49,6 +50,7 @@ def _empty_metainfo() -> dict:
'tmdbid': None,
'doubanid': None,
'type': None,
'episode_group': None,
'begin_season': None,
'end_season': None,
'total_season': None,
@@ -70,6 +72,75 @@ def _apply_range_total(metainfo: dict, begin_key: str, end_key: str, total_key:
metainfo[total_key] = 1
def _find_metainfo_python(title: str) -> Tuple[str, dict]:
"""
使用 Python 解析标题中的显式媒体标签,作为 Rust 入口不可用时的兜底。
"""
metainfo = _empty_metainfo()
if not title:
return title, metainfo
# 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;g=xxx;s=xxx;e=xxx]}
results = _BRACED_METAINFO_RE.findall(title)
if results:
for result in results:
# 查找tmdbid信息
tmdbid = _BRACED_TMDBID_RE.search(result)
if tmdbid and tmdbid.group(0).isdigit():
metainfo['tmdbid'] = tmdbid.group(0)
# 查找豆瓣id信息
doubanid = _BRACED_DOUBANID_RE.search(result)
if doubanid and doubanid.group(0).isdigit():
metainfo['doubanid'] = doubanid.group(0)
# 查找媒体类型
mtype = _BRACED_TYPE_RE.search(result)
if mtype:
media_type = mtype.group(0)
if media_type in ["movie", "movies"]:
metainfo['type'] = MediaType.MOVIE
elif media_type == "tv":
metainfo['type'] = MediaType.TV
# 查找剧集组
episode_group = _BRACED_EPISODE_GROUP_RE.search(result)
if episode_group:
metainfo['episode_group'] = episode_group.group(1)
# 查找季信息
begin_season = _BRACED_BEGIN_SEASON_RE.search(result)
if begin_season and begin_season.group(0).isdigit():
metainfo['begin_season'] = int(begin_season.group(0))
end_season = _BRACED_END_SEASON_RE.search(result)
if end_season and end_season.group(0).isdigit():
metainfo['end_season'] = int(end_season.group(0))
# 查找集信息
begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result)
if begin_episode and begin_episode.group(0).isdigit():
metainfo['begin_episode'] = int(begin_episode.group(0))
end_episode = _BRACED_END_EPISODE_RE.search(result)
if end_episode and end_episode.group(0).isdigit():
metainfo['end_episode'] = int(end_episode.group(0))
# 去除title中该部分
if tmdbid or mtype or episode_group or begin_season or end_season or begin_episode or end_episode:
title = title.replace(f"{{[{result}]}}", '')
# 支持Emby格式的ID标签第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。
tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip()
elif not metainfo['tmdbid']:
# 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb}
for tmdb_re in _EMBY_TMDB_RE_LIST[1:]:
tmdb_match = tmdb_re.search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = tmdb_re.sub('', title).strip()
break
# 计算季集总数
_apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season')
_apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode')
return title, metainfo
def _build_meta_info(
title: str,
subtitle: Optional[str] = None,
@@ -109,6 +180,8 @@ def _build_meta_info(
meta.doubanid = metainfo['doubanid']
if metainfo.get('type'):
meta.type = metainfo['type']
if metainfo.get('episode_group'):
meta.episode_group = metainfo['episode_group']
if metainfo.get('begin_season'):
meta.begin_season = metainfo['begin_season']
if metainfo.get('end_season'):
@@ -225,6 +298,7 @@ def _meta_from_rust(parsed: dict) -> Optional[MetaBase]:
"apply_words": parsed.get("apply_words") or [],
"tmdbid": parsed.get("tmdbid"),
"doubanid": parsed.get("doubanid"),
"episode_group": parsed.get("episode_group"),
"fps": parsed.get("fps"),
}
for key, value in fields.items():
@@ -308,62 +382,4 @@ def find_metainfo(title: str) -> Tuple[str, dict]:
rust_result = rust_accel.find_metainfo(title)
if rust_result:
return rust_result["title"], rust_result["metainfo"]
metainfo = _empty_metainfo()
if not title:
return title, metainfo
# 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;s=xxx;e=xxx]}
results = _BRACED_METAINFO_RE.findall(title)
if results:
for result in results:
# 查找tmdbid信息
tmdbid = _BRACED_TMDBID_RE.search(result)
if tmdbid and tmdbid.group(0).isdigit():
metainfo['tmdbid'] = tmdbid.group(0)
# 查找豆瓣id信息
doubanid = _BRACED_DOUBANID_RE.search(result)
if doubanid and doubanid.group(0).isdigit():
metainfo['doubanid'] = doubanid.group(0)
# 查找媒体类型
mtype = _BRACED_TYPE_RE.search(result)
if mtype:
media_type = mtype.group(0)
if media_type == "movies":
metainfo['type'] = MediaType.MOVIE
elif media_type == "tv":
metainfo['type'] = MediaType.TV
# 查找季信息
begin_season = _BRACED_BEGIN_SEASON_RE.search(result)
if begin_season and begin_season.group(0).isdigit():
metainfo['begin_season'] = int(begin_season.group(0))
end_season = _BRACED_END_SEASON_RE.search(result)
if end_season and end_season.group(0).isdigit():
metainfo['end_season'] = int(end_season.group(0))
# 查找集信息
begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result)
if begin_episode and begin_episode.group(0).isdigit():
metainfo['begin_episode'] = int(begin_episode.group(0))
end_episode = _BRACED_END_EPISODE_RE.search(result)
if end_episode and end_episode.group(0).isdigit():
metainfo['end_episode'] = int(end_episode.group(0))
# 去除title中该部分
if tmdbid or mtype or begin_season or end_season or begin_episode or end_episode:
title = title.replace(f"{{[{result}]}}", '')
# 支持Emby格式的ID标签第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。
tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip()
elif not metainfo['tmdbid']:
# 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb}
for tmdb_re in _EMBY_TMDB_RE_LIST[1:]:
tmdb_match = tmdb_re.search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = tmdb_re.sub('', title).strip()
break
# 计算季集总数
_apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season')
_apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode')
return title, metainfo
return _find_metainfo_python(title)

View File

@@ -61,6 +61,8 @@ class MetaInfo(BaseModel):
web_source: Optional[str] = None
# 应用的识别词信息
apply_words: Optional[List[str]] = None
# 剧集组
episode_group: Optional[str] = None
class MediaInfo(BaseModel):

View File

@@ -40,6 +40,8 @@ static BRACED_METAINFO_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\{\[([^\]]+)]
static BRACED_TMDBID_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"tmdbid=(\d+)").unwrap());
static BRACED_DOUBANID_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"doubanid=(\d+)").unwrap());
static BRACED_TYPE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"type=(\w+)").unwrap());
static BRACED_EPISODE_GROUP_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?:^|;)g=([0-9a-fA-F]+)(?:;|$)").unwrap());
static BRACED_BEGIN_SEASON_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"s=(\d+)").unwrap());
static BRACED_END_SEASON_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"s=\d+-(\d+)").unwrap());
static BRACED_BEGIN_EPISODE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"e=(\d+)").unwrap());
@@ -356,6 +358,7 @@ struct MetaResult {
apply_words: Vec<String>,
tmdbid: Option<i64>,
doubanid: Option<String>,
episode_group: Option<String>,
fps: Option<i64>,
subtitle_flag: bool,
}
@@ -365,6 +368,7 @@ struct ExplicitMetaInfo {
tmdbid: Option<String>,
doubanid: Option<String>,
media_type: Option<String>,
episode_group: Option<String>,
begin_season: Option<i64>,
end_season: Option<i64>,
total_season: Option<i64>,
@@ -448,6 +452,7 @@ pub(crate) fn find_metainfo_fast(py: Python<'_>, title: &str) -> PyResult<PyObje
meta.set_item("tmdbid", parsed.tmdbid)?;
meta.set_item("doubanid", parsed.doubanid)?;
meta.set_item("type", parsed.media_type)?;
meta.set_item("episode_group", parsed.episode_group)?;
meta.set_item("begin_season", parsed.begin_season)?;
meta.set_item("end_season", parsed.end_season)?;
meta.set_item("total_season", parsed.total_season)?;
@@ -641,6 +646,7 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo {
tmdbid: None,
doubanid: None,
media_type: None,
episode_group: None,
begin_season: None,
end_season: None,
total_season: None,
@@ -661,6 +667,9 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo {
.captures(&result)
.and_then(|cap| cap.get(1));
let mtype = BRACED_TYPE_RE.captures(&result).and_then(|cap| cap.get(1));
let episode_group = BRACED_EPISODE_GROUP_RE
.captures(&result)
.and_then(|cap| cap.get(1));
let begin_season = BRACED_BEGIN_SEASON_RE
.captures(&result)
.and_then(|cap| cap.get(1));
@@ -681,11 +690,14 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo {
}
if let Some(value) = mtype {
match value.as_str() {
"movies" => info.media_type = Some(MEDIA_TYPE_MOVIE.to_string()),
"movie" | "movies" => info.media_type = Some(MEDIA_TYPE_MOVIE.to_string()),
"tv" => info.media_type = Some(MEDIA_TYPE_TV.to_string()),
_ => {}
}
}
if let Some(value) = episode_group {
info.episode_group = Some(value.as_str().to_string());
}
if let Some(value) = begin_season {
info.begin_season = value.as_str().parse::<i64>().ok();
}
@@ -700,6 +712,7 @@ fn find_explicit_metainfo(title: &str) -> ExplicitMetaInfo {
}
if tmdbid.is_some()
|| mtype.is_some()
|| episode_group.is_some()
|| begin_season.is_some()
|| end_season.is_some()
|| begin_episode.is_some()
@@ -769,6 +782,9 @@ fn apply_explicit_metainfo(meta: &mut MetaResult, explicit: &ExplicitMetaInfo) {
if let Some(value) = explicit.doubanid.as_ref() {
meta.doubanid = Some(value.clone());
}
if let Some(value) = explicit.episode_group.as_ref() {
meta.episode_group = Some(value.clone());
}
if let Some(value) = explicit.media_type.as_ref() {
meta.media_type = value.clone();
}
@@ -3125,6 +3141,7 @@ fn meta_to_py(py: Python<'_>, meta: &MetaResult) -> PyResult<PyObject> {
dict.set_item("apply_words", &meta.apply_words)?;
dict.set_item("tmdbid", meta.tmdbid)?;
dict.set_item("doubanid", &meta.doubanid)?;
dict.set_item("episode_group", &meta.episode_group)?;
dict.set_item("fps", meta.fps)?;
Ok(dict.into())
}

View File

@@ -12,7 +12,8 @@ description: >-
1) A torrent or file name is incorrectly recognized (wrong title, season, episode, etc.);
2) The user wants to block unwanted keywords from torrent names;
3) The user needs episode offset rules for series with non-standard numbering;
4) The user wants to force recognition of a specific media by TMDB/Douban ID.
4) The user wants to force recognition of a specific media by TMDB/Douban ID;
5) The user wants TV recognition to use a specific TMDB episode group.
allowed-tools: query_custom_identifiers update_custom_identifiers recognize_media
---
@@ -54,7 +55,11 @@ Regex substitution. The left side is a regex pattern, the right side is the repl
被替换词 => {[tmdbid=xxx;type=movie/tv;s=xxx;e=xxx]}
被替换词 => {[doubanid=xxx;type=movie/tv;s=xxx;e=xxx]}
```
Where `s` (season) and `e` (episode) are optional.
Where `s` (season) and `e` (episode) are optional. For TMDB TV recognition, add `g=xxx` to specify an episode group:
```
被替换词 => {[tmdbid=xxx;type=tv;g=xxx;s=xxx;e=xxx]}
```
### 3. Episode Offset (集偏移)
@@ -225,6 +230,16 @@ Tell the user:
Some\.Weird\.Name(?:\.S01E\d+)?(?:\.1080p)? => {[tmdbid=12345;type=tv;s=1]}
```
### Force TMDB Episode Group Recognition
**User**: "种子名 `Some.Weird.Name.S01E01.1080p.mkv`,这是按 TMDB 剧集组 `5ad0ec240e0a26303f00d84d` 排序的电视剧"
**Solution**: Direct TMDB ID specification with `g=...`:
```
# 仅在 Some.Weird.Name 命名模式下绑定 TMDB ID 12345 并指定剧集组
Some\.Weird\.Name(?:\.S01E\d+)?(?:\.1080p)? => {[tmdbid=12345;type=tv;g=5ad0ec240e0a26303f00d84d;s=1]}
```
### Combined Fix
**User**: "种子名 `[Baha][OldTitle][13][1080P]`标题应该是NewTitle而且13应该是第二季第1集"

View File

@@ -0,0 +1,37 @@
import sys
from types import ModuleType
from unittest.mock import patch
sys.modules.setdefault("qbittorrentapi", ModuleType("qbittorrentapi"))
setattr(sys.modules["qbittorrentapi"], "TorrentFilesList", list)
sys.modules.setdefault("transmission_rpc", ModuleType("transmission_rpc"))
setattr(sys.modules["transmission_rpc"], "File", object)
sys.modules.setdefault("psutil", ModuleType("psutil"))
from app.chain import ChainBase
from app.core.context import MediaInfo
from app.core.meta import MetaBase
from app.schemas.types import MediaType
def test_recognize_media_uses_meta_episode_group():
"""
识别链未显式传 episode_group 时,应沿用元数据中识别出的剧集组。
"""
group_id = "5ad0ec240e0a26303f00d84d"
chain = ChainBase()
meta = MetaBase("测试剧集")
meta.name = "测试剧集"
meta.type = MediaType.TV
meta.episode_group = group_id
mediainfo = MediaInfo(title="测试剧集", year="2024", tmdb_id=100, type=MediaType.TV)
with patch.object(chain, "run_module", return_value=mediainfo) as run_module, patch(
"app.chain.MediaRecognizeShareHelper.report",
return_value=True,
), patch("app.chain.MediaRecognizeShareHelper.query") as query_mock:
result = chain.recognize_media(meta=meta, cache=False)
assert result is mediainfo
assert run_module.call_args.kwargs["episode_group"] == group_id
query_mock.assert_not_called()

View File

@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
from pathlib import Path
from unittest import TestCase
from unittest.mock import patch
from app.core.metainfo import MetaInfo, MetaInfoPath, find_metainfo
from tests.cases.meta import meta_cases
@@ -132,6 +133,33 @@ class MetaInfoTest(TestCase):
self.assertEqual(meta.episode, "E04")
self.assertEqual(meta.apply_words, custom_words)
def test_custom_words_support_episode_group_parameter(self):
"""测试自定义识别词替换结果中的 g 参数会写入剧集组"""
group_id = "5ad0ec240e0a26303f00d84d"
custom_words = [
f"Bakemonogatari => 物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}}"
]
meta = MetaInfo(title="Bakemonogatari 01", custom_words=custom_words)
self.assertEqual(meta.tmdbid, 46195)
self.assertEqual(meta.type.value, "电视剧")
self.assertEqual(meta.begin_season, 1)
self.assertEqual(meta.episode_group, group_id)
self.assertEqual(meta.apply_words, custom_words)
def test_find_metainfo_supports_episode_group_parameter(self):
"""测试显式媒体标签支持 g 剧集组参数"""
group_id = "5ad0ec240e0a26303f00d84d"
title, metainfo = find_metainfo(f"物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}}")
self.assertEqual(metainfo["episode_group"], group_id)
self.assertNotIn("g=", title)
def test_find_metainfo_does_not_support_episode_group_alias(self):
"""测试 e_group 不会被当作剧集组参数识别"""
group_id = "5ad0ec240e0a26303f00d84d"
with patch("app.core.metainfo.rust_accel.find_metainfo", return_value=None):
_, metainfo = find_metainfo(f"物语系列 {{[tmdbid=46195;type=tv;e_group={group_id};s=1]}}")
self.assertIsNone(metainfo["episode_group"])
def test_video_bit_extracted_for_video_title(self):
"""测试普通影视标题中的视频位深可单独识别"""
meta = MetaInfo(title="The 355 2022 BluRay 1080p DTS-HD MA5.1 X265.10bit-BeiTai")

View File

@@ -223,6 +223,22 @@ def test_rust_metainfo_parser_handles_anime_from_entry():
assert result["audio_encode"] == "AAC"
def test_rust_metainfo_parser_handles_episode_group():
"""
Rust MetaInfo 入口应识别显式媒体标签中的 g 剧集组参数。
"""
group_id = "5ad0ec240e0a26303f00d84d"
result = rust_accel.parse_metainfo(
f"物语系列 {{[tmdbid=46195;type=tv;g={group_id};s=1]}} 01",
options=_metainfo_options(),
)
assert result["tmdbid"] == 46195
assert result["type"] == MediaType.TV.value
assert result["episode_group"] == group_id
assert result["begin_season"] == 1
def test_rust_metainfo_path_parser_merges_parent_title():
"""
Rust MetaInfoPath 入口应在 Rust 内完成父目录标题合并。