Files
archived-MoviePilot/app/core/metainfo.py
2026-05-15 13:37:36 +08:00

247 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from pathlib import Path
from typing import Tuple, List, Optional
import regex as re
from app.core.config import settings
from app.core.meta import MetaAnime, MetaVideo, MetaBase
from app.core.meta.infopath import (
clear_parsed_title_for_parent_merge,
should_use_parent_title_for_file_stem,
)
from app.core.meta.words import WordsMatcher
from app.log import logger
from app.schemas.types import MediaType
_ANIME_BRACKET_RE = re.compile(r'【[+0-9XVPI-]+】\s*【', re.IGNORECASE)
_ANIME_DASH_EPISODE_RE = re.compile(r'\s+-\s+[\dv]{1,4}\s+', re.IGNORECASE)
_VIDEO_SEASON_EPISODE_RE = re.compile(
r"S\d{2}\s*-\s*S\d{2}|S\d{2}|\s+S\d{1,2}|"
r"EP?\d{2,4}\s*-\s*EP?\d{2,4}|EP?\d{2,4}|\s+EP?\d{1,4}",
re.IGNORECASE,
)
_ANIME_SQUARE_BRACKET_RE = re.compile(r'\[[+0-9XVPI-]+]\s*\[', re.IGNORECASE)
_BRACED_METAINFO_RE = re.compile(r'(?<={\[)[\W\w]+(?=]})')
_BRACED_TMDBID_RE = re.compile(r'(?<=tmdbid=)\d+')
_BRACED_DOUBANID_RE = re.compile(r'(?<=doubanid=)\d+')
_BRACED_TYPE_RE = re.compile(r'(?<=type=)\w+')
_BRACED_BEGIN_SEASON_RE = re.compile(r'(?<=s=)\d+')
_BRACED_END_SEASON_RE = re.compile(r'(?<=s=\d+-)\d+')
_BRACED_BEGIN_EPISODE_RE = re.compile(r'(?<=e=)\d+')
_BRACED_END_EPISODE_RE = re.compile(r'(?<=e=\d+-)\d+')
_EMBY_TMDB_RE_LIST = (
re.compile(r'\[tmdbid[=\-](\d+)\]'),
re.compile(r'\[tmdb[=\-](\d+)\]'),
re.compile(r'\{tmdbid[=\-](\d+)\}'),
re.compile(r'\{tmdb[=\-](\d+)\}'),
)
def _empty_metainfo() -> dict:
"""
返回媒体标签的默认结构,避免不同识别请求之间共享可变状态。
"""
return {
'tmdbid': None,
'doubanid': None,
'type': None,
'begin_season': None,
'end_season': None,
'total_season': None,
'begin_episode': None,
'end_episode': None,
'total_episode': None,
}
def _apply_range_total(metainfo: dict, begin_key: str, end_key: str, total_key: str) -> None:
"""
计算季/集范围总数;保留原有倒序输入自动交换的兼容行为。
"""
if metainfo.get(begin_key) and metainfo.get(end_key):
if metainfo[begin_key] > metainfo[end_key]:
metainfo[begin_key], metainfo[end_key] = metainfo[end_key], metainfo[begin_key]
metainfo[total_key] = metainfo[end_key] - metainfo[begin_key] + 1
elif metainfo.get(begin_key) and not metainfo.get(end_key):
metainfo[total_key] = 1
def _build_meta_info(
title: str,
subtitle: Optional[str] = None,
custom_words: List[str] = None,
) -> MetaBase:
"""
根据标题构造元数据
"""
# 原标题
org_title = title
# 预处理标题
title, apply_words = WordsMatcher().prepare(title, custom_words=custom_words)
# 获取标题中媒体信息
title, metainfo = find_metainfo(title)
# 判断是否处理文件
media_exts = settings.RMT_MEDIAEXT + settings.RMT_SUBEXT + settings.RMT_AUDIOEXT
title_path = Path(title) if title else None
if title_path and title_path.suffix.lower() in media_exts:
isfile = True
# 去掉后缀
title = title_path.stem
else:
isfile = False
# 识别
meta = MetaAnime(title, subtitle, isfile) if is_anime(title) else MetaVideo(title, subtitle, isfile)
# 记录原标题
meta.title = org_title
# 记录使用的识别词
meta.apply_words = apply_words or []
# 修正媒体信息
if metainfo.get('tmdbid'):
try:
meta.tmdbid = int(metainfo['tmdbid'])
except ValueError as _:
logger.warn("tmdbid 必须是数字")
if metainfo.get('doubanid'):
meta.doubanid = metainfo['doubanid']
if metainfo.get('type'):
meta.type = metainfo['type']
if metainfo.get('begin_season'):
meta.begin_season = metainfo['begin_season']
if metainfo.get('end_season'):
meta.end_season = metainfo['end_season']
if metainfo.get('total_season'):
meta.total_season = metainfo['total_season']
if metainfo.get('begin_episode'):
meta.begin_episode = metainfo['begin_episode']
if metainfo.get('end_episode'):
meta.end_episode = metainfo['end_episode']
if metainfo.get('total_episode'):
meta.total_episode = metainfo['total_episode']
return meta
def MetaInfo(title: str, subtitle: Optional[str] = None, custom_words: List[str] = None) -> MetaBase:
"""
根据标题和副标题识别元数据
:param title: 标题、种子名、文件名
:param subtitle: 副标题、描述
:param custom_words: 自定义识别词列表
:return: MetaAnime、MetaVideo
"""
meta = _build_meta_info(title=title, subtitle=subtitle, custom_words=custom_words)
if meta.apply_words:
original_meta = _build_meta_info(title=title, subtitle=subtitle)
meta.original_name = original_meta.name or meta.name
else:
meta.original_name = meta.name
return meta
def MetaInfoPath(path: Path, custom_words: List[str] = None) -> MetaBase:
"""
根据路径识别元数据
:param path: 路径
:param custom_words: 自定义识别词列表
"""
# 文件元数据,不包含后缀
file_meta = MetaInfo(title=path.name, custom_words=custom_words)
if should_use_parent_title_for_file_stem(path.stem, path.parent.name, file_meta):
clear_parsed_title_for_parent_merge(file_meta)
# 上级目录元数据
dir_meta = MetaInfo(title=path.parent.name, custom_words=custom_words)
if file_meta.type == MediaType.TV or dir_meta.type != MediaType.TV:
# 合并元数据
file_meta.merge(dir_meta)
# 上上级目录元数据
root_meta = MetaInfo(title=path.parent.parent.name, custom_words=custom_words)
if file_meta.type == MediaType.TV or root_meta.type != MediaType.TV:
# 合并元数据
file_meta.merge(root_meta)
return file_meta
def is_anime(name: str) -> bool:
"""
判断是否为动漫
:param name: 名称
:return: 是否动漫
"""
if not name:
return False
if _ANIME_BRACKET_RE.search(name):
return True
if _ANIME_DASH_EPISODE_RE.search(name):
return True
if _VIDEO_SEASON_EPISODE_RE.search(name):
return False
if _ANIME_SQUARE_BRACKET_RE.search(name):
return True
return False
def find_metainfo(title: str) -> Tuple[str, dict]:
"""
从标题中提取媒体信息
"""
metainfo = _empty_metainfo()
if not title:
return title, metainfo
# 从标题中提取媒体信息 格式为{[tmdbid=xxx;type=xxx;s=xxx;e=xxx]}
results = _BRACED_METAINFO_RE.findall(title)
if results:
for result in results:
# 查找tmdbid信息
tmdbid = _BRACED_TMDBID_RE.search(result)
if tmdbid and tmdbid.group(0).isdigit():
metainfo['tmdbid'] = tmdbid.group(0)
# 查找豆瓣id信息
doubanid = _BRACED_DOUBANID_RE.search(result)
if doubanid and doubanid.group(0).isdigit():
metainfo['doubanid'] = doubanid.group(0)
# 查找媒体类型
mtype = _BRACED_TYPE_RE.search(result)
if mtype:
media_type = mtype.group(0)
if media_type == "movies":
metainfo['type'] = MediaType.MOVIE
elif media_type == "tv":
metainfo['type'] = MediaType.TV
# 查找季信息
begin_season = _BRACED_BEGIN_SEASON_RE.search(result)
if begin_season and begin_season.group(0).isdigit():
metainfo['begin_season'] = int(begin_season.group(0))
end_season = _BRACED_END_SEASON_RE.search(result)
if end_season and end_season.group(0).isdigit():
metainfo['end_season'] = int(end_season.group(0))
# 查找集信息
begin_episode = _BRACED_BEGIN_EPISODE_RE.search(result)
if begin_episode and begin_episode.group(0).isdigit():
metainfo['begin_episode'] = int(begin_episode.group(0))
end_episode = _BRACED_END_EPISODE_RE.search(result)
if end_episode and end_episode.group(0).isdigit():
metainfo['end_episode'] = int(end_episode.group(0))
# 去除title中该部分
if tmdbid or mtype or begin_season or end_season or begin_episode or end_episode:
title = title.replace(f"{{[{result}]}}", '')
# 支持Emby格式的ID标签第一个 [tmdbid] 历史上始终优先处理,用于覆盖前面 {[...]} 中的旧标签。
tmdb_match = _EMBY_TMDB_RE_LIST[0].search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = _EMBY_TMDB_RE_LIST[0].sub('', title).strip()
elif not metainfo['tmdbid']:
# 保持原有优先级:[tmdbid] > [tmdb] > {tmdbid} > {tmdb}
for tmdb_re in _EMBY_TMDB_RE_LIST[1:]:
tmdb_match = tmdb_re.search(title)
if tmdb_match:
metainfo['tmdbid'] = tmdb_match.group(1)
title = tmdb_re.sub('', title).strip()
break
# 计算季集总数
_apply_range_total(metainfo, 'begin_season', 'end_season', 'total_season')
_apply_range_total(metainfo, 'begin_episode', 'end_episode', 'total_episode')
return title, metainfo