mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-01 07:26:50 +00:00
- Add episode_group (g=) parameter parsing to explicit media tags in both Python and Rust metainfo parsers - Propagate episode_group through MetaInfo, MetaBase, MediaInfo, and context models - Update SKILL.md and update_custom_identifiers.py docs to describe episode group usage - Add tests for episode_group recognition in metainfo and chain recognition logic
386 lines
15 KiB
Python
386 lines
15 KiB
Python
from pathlib import Path
|
||
from functools import lru_cache
|
||
from typing import Tuple, List, Optional
|
||
|
||
import regex as re
|
||
|
||
from app.core.config import settings
|
||
from app.core.meta import MetaAnime, MetaVideo, MetaBase
|
||
from app.core.meta.infopath import (
|
||
clear_parsed_title_for_parent_merge,
|
||
should_use_parent_title_for_file_stem,
|
||
)
|
||
from app.core.meta.words import WordsMatcher
|
||
from app.log import logger
|
||
from app.schemas.types import MediaType
|
||
from app.utils import rust_accel
|
||
|
||
|
||
_ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE)
|
||
_ANIME_DASH_EPISODE_RE = re.compile(r'\s+-\s+[\dv]{1,4}\s+', re.IGNORECASE)
|
||
_VIDEO_SEASON_EPISODE_RE = re.compile(
|
||
r"S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|"
|
||
r"EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}",
|
||
re.IGNORECASE,
|
||
)
|
||
_ANIME_SQUARE_BRACKET_RE = re.compile(r'\[[+0-9XVPI-]+]\s*\[', re.IGNORECASE)
|
||
|
||
_BRACED_METAINFO_RE = re.compile(r'(?<={\[)[\W\w]+(?=]})')
|
||
_BRACED_TMDBID_RE = re.compile(r'(?<=tmdbid=)\d+')
|
||
_BRACED_DOUBANID_RE = re.compile(r'(?<=doubanid=)\d+')
|
||
_BRACED_TYPE_RE = re.compile(r'(?<=type=)\w+')
|
||
_BRACED_EPISODE_GROUP_RE = re.compile(r'(?:^|;)g=([0-9a-fA-F]+)(?=;|$)')
|
||
_BRACED_BEGIN_SEASON_RE = re.compile(r'(?<=s=)\d+')
|
||
_BRACED_END_SEASON_RE = re.compile(r'(?<=s=\d+-)\d+')
|
||
_BRACED_BEGIN_EPISODE_RE = re.compile(r'(?<=e=)\d+')
|
||
_BRACED_END_EPISODE_RE = re.compile(r'(?<=e=\d+-)\d+')
|
||
_EMBY_TMDB_RE_LIST = (
|
||
re.compile(r'\[tmdbid[=\-](\d+)\]'),
|
||
re.compile(r'\[tmdb[=\-](\d+)\]'),
|
||
re.compile(r'\{tmdbid[=\-](\d+)\}'),
|
||
re.compile(r'\{tmdb[=\-](\d+)\}'),
|
||
)
|
||
|
||
|
||
def _empty_metainfo() -> dict:
|
||
"""
|
||
返回媒体标签的默认结构,避免不同识别请求之间共享可变状态。
|
||
"""
|
||
return {
|
||
'tmdbid': None,
|
||
'doubanid': None,
|
||
'type': None,
|
||
'episode_group': None,
|
||
'begin_season': None,
|
||
'end_season': None,
|
||
'total_season': None,
|
||
'begin_episode': None,
|
||
'end_episode': None,
|
||
'total_episode': None,
|
||
}
|
||
|
||
|
||
def _apply_range_total(metainfo: dict, begin_key: str, end_key: str, total_key: str) -> None:
|
||
"""
|
||
计算季/集范围总数;保留原有倒序输入自动交换的兼容行为。
|
||
"""
|
||
if metainfo.get(begin_key) and metainfo.get(end_key):
|
||
if metainfo[begin_key] > metainfo[end_key]:
|
||
metainfo[begin_key], metainfo[end_key] = metainfo[end_key], metainfo[begin_key]
|
||
metainfo[total_key] = metainfo[end_key] - metainfo[begin_key] + 1
|
||
elif metainfo.get(begin_key) and not metainfo.get(end_key):
|
||
metainfo[total_key] = 1
|
||
|
||
|
||
def _find_metainfo_python(title: str) -> Tuple[str, dict]:
|
||
"""
|
||
使用 Python 解析标题中的显式媒体标签,作为 Rust 入口不可用时的兜底。
|
||
"""
|
||
metainfo = _empty_metainfo()
|
||
if not title:
|
||
return title, metainfo
|
||
# 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;g=xxx;s=xxx;e=xxx]}
|
||
results = _BRACED_METAINFO_RE.findall(title)
|
||
if results:
|
||
for result in results:
|
||
# 查找tmdbid信息
|
||
tmdbid = _BRACED_TMDBID_RE.search(result)
|
||
if tmdbid and tmdbid.group(0).isdigit():
|
||
metainfo['tmdbid'] = tmdbid.group(0)
|
||
# 查找豆瓣id信息
|
||
doubanid = _BRACED_DOUBANID_RE.search(result)
|
||
if doubanid and doubanid.group(0).isdigit():
|
||
metainfo['doubanid'] = doubanid.group(0)
|
||
# 查找媒体类型
|
||
mtype = _BRACED_TYPE_RE.search(result)
|
||
if mtype:
|
||
media_type = mtype.group(0)
|
||
if media_type in ["movie", "movies"]:
|
||
metainfo['type'] = MediaType.MOVIE
|
||
elif media_type == "tv":
|
||
metainfo['type'] = MediaType.TV
|
||
# 查找剧集组
|
||
episode_group = _BRACED_EPISODE_GROUP_RE.search(result)
|
||
if episode_group:
|
||
metainfo['episode_group'] = episode_group.group(1)
|
||
# 查找季信息
|
||
begin_season = _BRACED_BEGIN_SEASON_RE.search(result)
|
||
if begin_season and begin_season.group(0).isdigit():
|
||
metainfo['begin_season'] = int(begin_season.group(0))
|
||
end_season = _BRACED_END_SEASON_RE.search(result)
|
||
if end_season and end_season.group(0).isdigit():
|
||
metainfo['end_season'] = int(end_season.group(0))
|
||
# 查找集信息
|
||
begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result)
|
||
if begin_episode and begin_episode.group(0).isdigit():
|
||
metainfo['begin_episode'] = int(begin_episode.group(0))
|
||
end_episode = _BRACED_END_EPISODE_RE.search(result)
|
||
if end_episode and end_episode.group(0).isdigit():
|
||
metainfo['end_episode'] = int(end_episode.group(0))
|
||
# 去除title中该部分
|
||
if tmdbid or mtype or episode_group or begin_season or end_season or begin_episode or end_episode:
|
||
title = title.replace(f"{{[{result}]}}", '')
|
||
|
||
# 支持Emby格式的ID标签;第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。
|
||
tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title)
|
||
if tmdb_match:
|
||
metainfo['tmdbid'] = tmdb_match.group(1)
|
||
title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip()
|
||
elif not metainfo['tmdbid']:
|
||
# 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb}
|
||
for tmdb_re in _EMBY_TMDB_RE_LIST[1:]:
|
||
tmdb_match = tmdb_re.search(title)
|
||
if tmdb_match:
|
||
metainfo['tmdbid'] = tmdb_match.group(1)
|
||
title = tmdb_re.sub('', title).strip()
|
||
break
|
||
|
||
# 计算季集总数
|
||
_apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season')
|
||
_apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode')
|
||
return title, metainfo
|
||
|
||
|
||
def _build_meta_info(
|
||
title: str,
|
||
subtitle: Optional[str] = None,
|
||
custom_words: List[str] = None,
|
||
) -> MetaBase:
|
||
"""
|
||
根据标题构造元数据
|
||
"""
|
||
# 原标题
|
||
org_title = title
|
||
# 预处理标题
|
||
title, apply_words = WordsMatcher().prepare(title, custom_words=custom_words)
|
||
# 获取标题中媒体信息
|
||
title, metainfo = find_metainfo(title)
|
||
# 判断是否处理文件
|
||
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
|
||
title_path = Path(title) if title else None
|
||
if title_path and title_path.suffix.lower() in media_exts:
|
||
isfile = True
|
||
# 去掉后缀
|
||
title = title_path.stem
|
||
else:
|
||
isfile = False
|
||
# 识别
|
||
meta = MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
|
||
# 记录原标题
|
||
meta.title = org_title
|
||
# 记录使用的识别词
|
||
meta.apply_words = apply_words or []
|
||
# 修正媒体信息
|
||
if metainfo.get('tmdbid'):
|
||
try:
|
||
meta.tmdbid = int(metainfo['tmdbid'])
|
||
except ValueError as _:
|
||
logger.warn("tmdbid 必须是数字")
|
||
if metainfo.get('doubanid'):
|
||
meta.doubanid = metainfo['doubanid']
|
||
if metainfo.get('type'):
|
||
meta.type = metainfo['type']
|
||
if metainfo.get('episode_group'):
|
||
meta.episode_group = metainfo['episode_group']
|
||
if metainfo.get('begin_season'):
|
||
meta.begin_season = metainfo['begin_season']
|
||
if metainfo.get('end_season'):
|
||
meta.end_season = metainfo['end_season']
|
||
if metainfo.get('total_season'):
|
||
meta.total_season = metainfo['total_season']
|
||
if metainfo.get('begin_episode'):
|
||
meta.begin_episode = metainfo['begin_episode']
|
||
if metainfo.get('end_episode'):
|
||
meta.end_episode = metainfo['end_episode']
|
||
if metainfo.get('total_episode'):
|
||
meta.total_episode = metainfo['total_episode']
|
||
return meta
|
||
|
||
|
||
@lru_cache(maxsize=1)
|
||
def _rust_default_parse_options() -> dict:
|
||
"""
|
||
缓存 Rust Meta 默认解析配置,避免热路径反复读取配置并复制流媒体平台大表。
|
||
"""
|
||
from app.core.meta.customization import CustomizationMatcher
|
||
from app.core.meta.releasegroup import ReleaseGroupsMatcher
|
||
from app.core.meta.streamingplatform import StreamingPlatforms
|
||
from app.db.systemconfig_oper import SystemConfigOper
|
||
from app.schemas.types import SystemConfigKey
|
||
|
||
systemconfig = SystemConfigOper()
|
||
custom_release_groups = systemconfig.get(SystemConfigKey.CustomReleaseGroups)
|
||
if isinstance(custom_release_groups, list):
|
||
custom_release_groups = list(filter(None, custom_release_groups))
|
||
release_matcher = ReleaseGroupsMatcher()
|
||
release_groups = release_matcher._ReleaseGroupsMatcher__release_groups
|
||
if custom_release_groups:
|
||
release_groups = f"{release_groups}|{'|'.join(custom_release_groups)}"
|
||
|
||
customization = CustomizationMatcher._normalize_customization(
|
||
systemconfig.get(SystemConfigKey.Customization)
|
||
)
|
||
return {
|
||
"custom_words": systemconfig.get(SystemConfigKey.CustomIdentifiers) or [],
|
||
"media_exts": settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT,
|
||
"release_groups": release_groups,
|
||
"customization": customization,
|
||
"streaming_platforms": StreamingPlatforms()._lookup_cache,
|
||
}
|
||
|
||
|
||
@lru_cache(maxsize=256)
|
||
def _rust_custom_parse_options(custom_words: Tuple[str, ...]) -> dict:
|
||
"""
|
||
缓存带自定义识别词的 Rust Meta 配置,避免同一组识别词重复构造配置对象。
|
||
"""
|
||
options = dict(_rust_default_parse_options())
|
||
options["custom_words"] = list(custom_words)
|
||
return options
|
||
|
||
|
||
def _rust_parse_options(custom_words: List[str] = None) -> dict:
|
||
"""
|
||
收集 Rust Meta 解析所需的运行时配置,避免 Rust 层直接访问数据库和 settings。
|
||
"""
|
||
if custom_words is None:
|
||
return _rust_default_parse_options()
|
||
return _rust_custom_parse_options(tuple(custom_words or []))
|
||
|
||
|
||
def clear_rust_parse_options_cache() -> None:
|
||
"""
|
||
清理 Rust Meta 默认解析配置缓存,供系统配置变更后重载使用。
|
||
"""
|
||
_rust_default_parse_options.cache_clear()
|
||
_rust_custom_parse_options.cache_clear()
|
||
|
||
|
||
def _meta_from_rust(parsed: dict) -> Optional[MetaBase]:
|
||
"""
|
||
将 Rust 解析结果灌回现有 MetaVideo/MetaAnime 对象,保留下游属性和方法兼容性。
|
||
"""
|
||
if not parsed:
|
||
return None
|
||
meta = MetaAnime("") if parsed.get("kind") == "anime" else MetaVideo("")
|
||
type_map = {
|
||
MediaType.MOVIE.value: MediaType.MOVIE,
|
||
MediaType.TV.value: MediaType.TV,
|
||
MediaType.COLLECTION.value: MediaType.COLLECTION,
|
||
MediaType.UNKNOWN.value: MediaType.UNKNOWN,
|
||
}
|
||
fields = {
|
||
"isfile": parsed.get("isfile") or False,
|
||
"title": parsed.get("title") or "",
|
||
"org_string": parsed.get("org_string"),
|
||
"subtitle": parsed.get("subtitle"),
|
||
"type": type_map.get(parsed.get("type"), MediaType.UNKNOWN),
|
||
"cn_name": parsed.get("cn_name"),
|
||
"en_name": parsed.get("en_name"),
|
||
"original_name": parsed.get("original_name"),
|
||
"year": parsed.get("year"),
|
||
"total_season": parsed.get("total_season") or 0,
|
||
"begin_season": parsed.get("begin_season"),
|
||
"end_season": parsed.get("end_season"),
|
||
"total_episode": parsed.get("total_episode") or 0,
|
||
"begin_episode": parsed.get("begin_episode"),
|
||
"end_episode": parsed.get("end_episode"),
|
||
"part": parsed.get("part"),
|
||
"resource_type": parsed.get("resource_type"),
|
||
"resource_effect": parsed.get("resource_effect"),
|
||
"resource_pix": parsed.get("resource_pix"),
|
||
"resource_team": parsed.get("resource_team"),
|
||
"customization": parsed.get("customization"),
|
||
"web_source": parsed.get("web_source"),
|
||
"video_encode": parsed.get("video_encode"),
|
||
"video_bit": parsed.get("video_bit"),
|
||
"audio_encode": parsed.get("audio_encode"),
|
||
"apply_words": parsed.get("apply_words") or [],
|
||
"tmdbid": parsed.get("tmdbid"),
|
||
"doubanid": parsed.get("doubanid"),
|
||
"episode_group": parsed.get("episode_group"),
|
||
"fps": parsed.get("fps"),
|
||
}
|
||
for key, value in fields.items():
|
||
setattr(meta, key, value)
|
||
return meta
|
||
|
||
|
||
def MetaInfo(title: str, subtitle: Optional[str] = None, custom_words: List[str] = None) -> MetaBase:
|
||
"""
|
||
根据标题和副标题识别元数据
|
||
:param title: 标题、种子名、文件名
|
||
:param subtitle: 副标题、描述
|
||
:param custom_words: 自定义识别词列表
|
||
:return: MetaAnime、MetaVideo
|
||
"""
|
||
rust_meta = _meta_from_rust(
|
||
rust_accel.parse_metainfo(title, subtitle, _rust_parse_options(custom_words))
|
||
)
|
||
if rust_meta:
|
||
return rust_meta
|
||
meta = _build_meta_info(title=title, subtitle=subtitle, custom_words=custom_words)
|
||
if meta.apply_words:
|
||
original_meta = _build_meta_info(title=title, subtitle=subtitle)
|
||
meta.original_name = original_meta.name or meta.name
|
||
else:
|
||
meta.original_name = meta.name
|
||
return meta
|
||
|
||
|
||
def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase:
|
||
"""
|
||
根据路径识别元数据
|
||
:param path: 路径
|
||
:param custom_words: 自定义识别词列表
|
||
"""
|
||
rust_meta = _meta_from_rust(
|
||
rust_accel.parse_metainfo_path(str(path), _rust_parse_options(custom_words))
|
||
)
|
||
if rust_meta:
|
||
return rust_meta
|
||
# 文件元数据,不包含后缀
|
||
file_meta = MetaInfo(title=path.name, custom_words=custom_words)
|
||
if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta):
|
||
clear_parsed_title_for_parent_merge(file_meta)
|
||
# 上级目录元数据
|
||
dir_meta = MetaInfo(title=path.parent.name, custom_words=custom_words)
|
||
if file_meta.type == MediaType.TV or dir_meta.type != MediaType.TV:
|
||
# 合并元数据
|
||
file_meta.merge(dir_meta)
|
||
# 上上级目录元数据
|
||
root_meta = MetaInfo(title=path.parent.parent.name, custom_words=custom_words)
|
||
if file_meta.type == MediaType.TV or root_meta.type != MediaType.TV:
|
||
# 合并元数据
|
||
file_meta.merge(root_meta)
|
||
return file_meta
|
||
|
||
|
||
def is_anime(name: str) -> bool:
|
||
"""
|
||
判断是否为动漫
|
||
:param name: 名称
|
||
:return: 是否动漫
|
||
"""
|
||
if not name:
|
||
return False
|
||
if _ANIME_BRACKET_RE.search(name):
|
||
return True
|
||
if _ANIME_DASH_EPISODE_RE.search(name):
|
||
return True
|
||
if _VIDEO_SEASON_EPISODE_RE.search(name):
|
||
return False
|
||
if _ANIME_SQUARE_BRACKET_RE.search(name):
|
||
return True
|
||
return False
|
||
|
||
|
||
def find_metainfo(title: str) -> Tuple[str, dict]:
|
||
"""
|
||
从标题中提取媒体信息
|
||
"""
|
||
rust_result = rust_accel.find_metainfo(title)
|
||
if rust_result:
|
||
return rust_result["title"], rust_result["metainfo"]
|
||
return _find_metainfo_python(title)
|