Files
archived-MoviePilot/app/core/metainfo.py
jxxghp ac3432c54f feat: support TMDB episode group (g=) in explicit media tags and custom identifiers
- Add episode_group (g=) parameter parsing to explicit media tags in both Python and Rust metainfo parsers
- Propagate episode_group through MetaInfo, MetaBase, MediaInfo, and context models
- Update SKILL.md and update_custom_identifiers.py docs to describe episode group usage
- Add tests for episode_group recognition in metainfo and chain recognition logic
2026-05-24 23:32:27 +08:00

386 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from pathlib import Path
from functools import lru_cache
from typing import Tuple, List, Optional
import regex as re
from app.core.config import settings
from app.core.meta import MetaAnime, MetaVideo, MetaBase
from app.core.meta.infopath import (
clear_parsed_title_for_parent_merge,
should_use_parent_title_for_file_stem,
)
from app.core.meta.words import WordsMatcher
from app.log import logger
from app.schemas.types import MediaType
from app.utils import rust_accel
_ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE)
_ANIME_DASH_EPISODE_RE = re.compile(r'\s+-\s+[\dv]{1,4}\s+', re.IGNORECASE)
_VIDEO_SEASON_EPISODE_RE = re.compile(
r"S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|"
r"EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}",
re.IGNORECASE,
)
_ANIME_SQUARE_BRACKET_RE = re.compile(r'\[[+0-9XVPI-]+]\s*\[', re.IGNORECASE)
_BRACED_METAINFO_RE = re.compile(r'(?<={\[)[\W\w]+(?=]})')
_BRACED_TMDBID_RE = re.compile(r'(?<=tmdbid=)\d+')
_BRACED_DOUBANID_RE = re.compile(r'(?<=doubanid=)\d+')
_BRACED_TYPE_RE = re.compile(r'(?<=type=)\w+')
_BRACED_EPISODE_GROUP_RE = re.compile(r'(?:^|;)g=([0-9a-fA-F]+)(?=;|$)')
_BRACED_BEGIN_SEASON_RE = re.compile(r'(?<=s=)\d+')
_BRACED_END_SEASON_RE = re.compile(r'(?<=s=\d+-)\d+')
_BRACED_BEGIN_EPISODE_RE = re.compile(r'(?<=e=)\d+')
_BRACED_END_EPISODE_RE = re.compile(r'(?<=e=\d+-)\d+')
_EMBY_TMDB_RE_LIST = (
re.compile(r'\[tmdbid[=\-](\d+)\]'),
re.compile(r'\[tmdb[=\-](\d+)\]'),
re.compile(r'\{tmdbid[=\-](\d+)\}'),
re.compile(r'\{tmdb[=\-](\d+)\}'),
)
def _empty_metainfo() -> dict:
"""
返回媒体标签的默认结构,避免不同识别请求之间共享可变状态。
"""
return {
'tmdbid': None,
'doubanid': None,
'type': None,
'episode_group': None,
'begin_season': None,
'end_season': None,
'total_season': None,
'begin_episode': None,
'end_episode': None,
'total_episode': None,
}
def _apply_range_total(metainfo: dict, begin_key: str, end_key: str, total_key: str) -> None:
"""
计算季/集范围总数;保留原有倒序输入自动交换的兼容行为。
"""
if metainfo.get(begin_key) and metainfo.get(end_key):
if metainfo[begin_key] > metainfo[end_key]:
metainfo[begin_key], metainfo[end_key] = metainfo[end_key], metainfo[begin_key]
metainfo[total_key] = metainfo[end_key] - metainfo[begin_key] + 1
elif metainfo.get(begin_key) and not metainfo.get(end_key):
metainfo[total_key] = 1
def _find_metainfo_python(title: str) -> Tuple[str, dict]:
"""
使用 Python 解析标题中的显式媒体标签,作为 Rust 入口不可用时的兜底。
"""
metainfo = _empty_metainfo()
if not title:
return title, metainfo
# 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;g=xxx;s=xxx;e=xxx]}
results = _BRACED_METAINFO_RE.findall(title)
if results:
for result in results:
# 查找tmdbid信息
tmdbid = _BRACED_TMDBID_RE.search(result)
if tmdbid and tmdbid.group(0).isdigit():
metainfo['tmdbid'] = tmdbid.group(0)
# 查找豆瓣id信息
doubanid = _BRACED_DOUBANID_RE.search(result)
if doubanid and doubanid.group(0).isdigit():
metainfo['doubanid'] = doubanid.group(0)
# 查找媒体类型
mtype = _BRACED_TYPE_RE.search(result)
if mtype:
media_type = mtype.group(0)
if media_type in ["movie", "movies"]:
metainfo['type'] = MediaType.MOVIE
elif media_type == "tv":
metainfo['type'] = MediaType.TV
# 查找剧集组
episode_group = _BRACED_EPISODE_GROUP_RE.search(result)
if episode_group:
metainfo['episode_group'] = episode_group.group(1)
# 查找季信息
begin_season = _BRACED_BEGIN_SEASON_RE.search(result)
if begin_season and begin_season.group(0).isdigit():
metainfo['begin_season'] = int(begin_season.group(0))
end_season = _BRACED_END_SEASON_RE.search(result)
if end_season and end_season.group(0).isdigit():
metainfo['end_season'] = int(end_season.group(0))
# 查找集信息
begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result)
if begin_episode and begin_episode.group(0).isdigit():
metainfo['begin_episode'] = int(begin_episode.group(0))
end_episode = _BRACED_END_EPISODE_RE.search(result)
if end_episode and end_episode.group(0).isdigit():
metainfo['end_episode'] = int(end_episode.group(0))
# 去除title中该部分
if tmdbid or mtype or episode_group or begin_season or end_season or begin_episode or end_episode:
title = title.replace(f"{{[{result}]}}", '')
# 支持Emby格式的ID标签第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。
tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip()
elif not metainfo['tmdbid']:
# 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb}
for tmdb_re in _EMBY_TMDB_RE_LIST[1:]:
tmdb_match = tmdb_re.search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = tmdb_re.sub('', title).strip()
break
# 计算季集总数
_apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season')
_apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode')
return title, metainfo
def _build_meta_info(
title: str,
subtitle: Optional[str] = None,
custom_words: List[str] = None,
) -> MetaBase:
"""
根据标题构造元数据
"""
# 原标题
org_title = title
# 预处理标题
title, apply_words = WordsMatcher().prepare(title, custom_words=custom_words)
# 获取标题中媒体信息
title, metainfo = find_metainfo(title)
# 判断是否处理文件
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
title_path = Path(title) if title else None
if title_path and title_path.suffix.lower() in media_exts:
isfile = True
# 去掉后缀
title = title_path.stem
else:
isfile = False
# 识别
meta = MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
# 记录原标题
meta.title = org_title
# 记录使用的识别词
meta.apply_words = apply_words or []
# 修正媒体信息
if metainfo.get('tmdbid'):
try:
meta.tmdbid = int(metainfo['tmdbid'])
except ValueError as _:
logger.warn("tmdbid 必须是数字")
if metainfo.get('doubanid'):
meta.doubanid = metainfo['doubanid']
if metainfo.get('type'):
meta.type = metainfo['type']
if metainfo.get('episode_group'):
meta.episode_group = metainfo['episode_group']
if metainfo.get('begin_season'):
meta.begin_season = metainfo['begin_season']
if metainfo.get('end_season'):
meta.end_season = metainfo['end_season']
if metainfo.get('total_season'):
meta.total_season = metainfo['total_season']
if metainfo.get('begin_episode'):
meta.begin_episode = metainfo['begin_episode']
if metainfo.get('end_episode'):
meta.end_episode = metainfo['end_episode']
if metainfo.get('total_episode'):
meta.total_episode = metainfo['total_episode']
return meta
@lru_cache(maxsize=1)
def _rust_default_parse_options() -> dict:
"""
缓存 Rust Meta 默认解析配置,避免热路径反复读取配置并复制流媒体平台大表。
"""
from app.core.meta.customization import CustomizationMatcher
from app.core.meta.releasegroup import ReleaseGroupsMatcher
from app.core.meta.streamingplatform import StreamingPlatforms
from app.db.systemconfig_oper import SystemConfigOper
from app.schemas.types import SystemConfigKey
systemconfig = SystemConfigOper()
custom_release_groups = systemconfig.get(SystemConfigKey.CustomReleaseGroups)
if isinstance(custom_release_groups, list):
custom_release_groups = list(filter(None, custom_release_groups))
release_matcher = ReleaseGroupsMatcher()
release_groups = release_matcher._ReleaseGroupsMatcher__release_groups
if custom_release_groups:
release_groups = f"{release_groups}|{'|'.join(custom_release_groups)}"
customization = CustomizationMatcher._normalize_customization(
systemconfig.get(SystemConfigKey.Customization)
)
return {
"custom_words": systemconfig.get(SystemConfigKey.CustomIdentifiers) or [],
"media_exts": settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT,
"release_groups": release_groups,
"customization": customization,
"streaming_platforms": StreamingPlatforms()._lookup_cache,
}
@lru_cache(maxsize=256)
def _rust_custom_parse_options(custom_words: Tuple[str, ...]) -> dict:
"""
缓存带自定义识别词的 Rust Meta 配置,避免同一组识别词重复构造配置对象。
"""
options = dict(_rust_default_parse_options())
options["custom_words"] = list(custom_words)
return options
def _rust_parse_options(custom_words: List[str] = None) -> dict:
"""
收集 Rust Meta 解析所需的运行时配置,避免 Rust 层直接访问数据库和 settings。
"""
if custom_words is None:
return _rust_default_parse_options()
return _rust_custom_parse_options(tuple(custom_words or []))
def clear_rust_parse_options_cache() -> None:
"""
清理 Rust Meta 默认解析配置缓存,供系统配置变更后重载使用。
"""
_rust_default_parse_options.cache_clear()
_rust_custom_parse_options.cache_clear()
def _meta_from_rust(parsed: dict) -> Optional[MetaBase]:
"""
将 Rust 解析结果灌回现有 MetaVideo/MetaAnime 对象,保留下游属性和方法兼容性。
"""
if not parsed:
return None
meta = MetaAnime("") if parsed.get("kind") == "anime" else MetaVideo("")
type_map = {
MediaType.MOVIE.value: MediaType.MOVIE,
MediaType.TV.value: MediaType.TV,
MediaType.COLLECTION.value: MediaType.COLLECTION,
MediaType.UNKNOWN.value: MediaType.UNKNOWN,
}
fields = {
"isfile": parsed.get("isfile") or False,
"title": parsed.get("title") or "",
"org_string": parsed.get("org_string"),
"subtitle": parsed.get("subtitle"),
"type": type_map.get(parsed.get("type"), MediaType.UNKNOWN),
"cn_name": parsed.get("cn_name"),
"en_name": parsed.get("en_name"),
"original_name": parsed.get("original_name"),
"year": parsed.get("year"),
"total_season": parsed.get("total_season") or 0,
"begin_season": parsed.get("begin_season"),
"end_season": parsed.get("end_season"),
"total_episode": parsed.get("total_episode") or 0,
"begin_episode": parsed.get("begin_episode"),
"end_episode": parsed.get("end_episode"),
"part": parsed.get("part"),
"resource_type": parsed.get("resource_type"),
"resource_effect": parsed.get("resource_effect"),
"resource_pix": parsed.get("resource_pix"),
"resource_team": parsed.get("resource_team"),
"customization": parsed.get("customization"),
"web_source": parsed.get("web_source"),
"video_encode": parsed.get("video_encode"),
"video_bit": parsed.get("video_bit"),
"audio_encode": parsed.get("audio_encode"),
"apply_words": parsed.get("apply_words") or [],
"tmdbid": parsed.get("tmdbid"),
"doubanid": parsed.get("doubanid"),
"episode_group": parsed.get("episode_group"),
"fps": parsed.get("fps"),
}
for key, value in fields.items():
setattr(meta, key, value)
return meta
def MetaInfo(title: str, subtitle: Optional[str] = None, custom_words: List[str] = None) -> MetaBase:
"""
根据标题和副标题识别元数据
:param title: 标题、种子名、文件名
:param subtitle: 副标题、描述
:param custom_words: 自定义识别词列表
:return: MetaAnime、MetaVideo
"""
rust_meta = _meta_from_rust(
rust_accel.parse_metainfo(title, subtitle, _rust_parse_options(custom_words))
)
if rust_meta:
return rust_meta
meta = _build_meta_info(title=title, subtitle=subtitle, custom_words=custom_words)
if meta.apply_words:
original_meta = _build_meta_info(title=title, subtitle=subtitle)
meta.original_name = original_meta.name or meta.name
else:
meta.original_name = meta.name
return meta
def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase:
"""
根据路径识别元数据
:param path: 路径
:param custom_words: 自定义识别词列表
"""
rust_meta = _meta_from_rust(
rust_accel.parse_metainfo_path(str(path), _rust_parse_options(custom_words))
)
if rust_meta:
return rust_meta
# 文件元数据,不包含后缀
file_meta = MetaInfo(title=path.name, custom_words=custom_words)
if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta):
clear_parsed_title_for_parent_merge(file_meta)
# 上级目录元数据
dir_meta = MetaInfo(title=path.parent.name, custom_words=custom_words)
if file_meta.type == MediaType.TV or dir_meta.type != MediaType.TV:
# 合并元数据
file_meta.merge(dir_meta)
# 上上级目录元数据
root_meta = MetaInfo(title=path.parent.parent.name, custom_words=custom_words)
if file_meta.type == MediaType.TV or root_meta.type != MediaType.TV:
# 合并元数据
file_meta.merge(root_meta)
return file_meta
def is_anime(name: str) -> bool:
"""
判断是否为动漫
:param name: 名称
:return: 是否动漫
"""
if not name:
return False
if _ANIME_BRACKET_RE.search(name):
return True
if _ANIME_DASH_EPISODE_RE.search(name):
return True
if _VIDEO_SEASON_EPISODE_RE.search(name):
return False
if _ANIME_SQUARE_BRACKET_RE.search(name):
return True
return False
def find_metainfo(title: str) -> Tuple[str, dict]:
"""
从标题中提取媒体信息
"""
rust_result = rust_accel.find_metainfo(title)
if rust_result:
return rust_result["title"], rust_result["metainfo"]
return _find_metainfo_python(title)