feat: optimize scraping for multi-server compatibility

- Add studio, country, runtime tags to NFO generation
- Fix Fanart naming: showbackground→fanart (recognized by Jellyfin/Emby)
- Add image alias system: backdrop↔fanart, thumb↔landscape
- Merge image sources from all modules instead of first-wins
- Add CLEARART and LANDSCAPE scraping metadata types
- Extend season scraping with backdrop and landscape support

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jxxghp
2026-05-19 12:36:47 +08:00
parent 195e34563d
commit 34ff80e26c
5 changed files with 185 additions and 21 deletions

View File

@@ -136,10 +136,10 @@ class ScrapingConfig:
for mt, mds in [
(
"movie",
["nfo", "poster", "backdrop", "logo", "disc", "banner", "thumb"],
["nfo", "poster", "backdrop", "logo", "disc", "banner", "thumb", "clearart", "landscape"],
),
("tv", ["nfo", "poster", "backdrop", "logo", "banner", "thumb"]),
("season", ["nfo", "poster", "banner", "thumb"]),
("tv", ["nfo", "poster", "backdrop", "logo", "banner", "thumb", "clearart", "landscape"]),
("season", ["nfo", "poster", "backdrop", "banner", "thumb", "landscape"]),
("episode", ["nfo", "thumb"]),
]
for md in mds
@@ -164,6 +164,15 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton):
"cdart": ScrapingMetadata.DISC,
"banner": ScrapingMetadata.BANNER,
"thumb": ScrapingMetadata.THUMB,
"landscape": ScrapingMetadata.LANDSCAPE,
"clearart": ScrapingMetadata.CLEARART,
}
IMAGE_ALIASES = {
"backdrop": ["fanart"],
"fanart": ["backdrop"],
"thumb": ["landscape"],
"landscape": ["thumb"],
}
def __init__(self):
@@ -359,6 +368,8 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton):
ScrapingMetadata.POSTER: "poster",
ScrapingMetadata.BANNER: "banner",
ScrapingMetadata.THUMB: "thumb",
ScrapingMetadata.BACKDROP: "backdrop",
ScrapingMetadata.LANDSCAPE: "landscape",
}
if season_image_name := season_image_name_map.get(metadata_type):
hint_ext = Path(filename_hint).suffix if filename_hint else ".jpg"
@@ -428,6 +439,8 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton):
ScrapingMetadata.POSTER,
ScrapingMetadata.BANNER,
ScrapingMetadata.THUMB,
ScrapingMetadata.BACKDROP,
ScrapingMetadata.LANDSCAPE,
}
):
return targets
@@ -445,6 +458,39 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton):
targets.insert(0, root_target)
return targets
def _expand_with_aliases(
self,
targets: List[Tuple[schemas.FileItem, Path]],
item_type: ScrapingTarget,
) -> List[Tuple[schemas.FileItem, Path]]:
"""
为兼容多媒体服务器,扩展图片保存目标列表,添加别名文件。
例如 backdrop.jpg 同时保存为 fanart.jpgthumb.jpg 同时保存为 landscape.jpg。
"""
expanded = list(targets)
for base_item, image_path in list(targets):
if not image_path:
continue
stem = image_path.stem.lower()
ext = image_path.suffix
# 跳过 season 前缀文件(如 season01-poster.jpg
if stem.startswith("season"):
continue
aliases = self.IMAGE_ALIASES.get(stem)
if not aliases:
continue
for alias in aliases:
alias_meta_type = self.IMAGE_METADATA_MAP.get(alias)
if alias_meta_type:
alias_option = self.scraping_policies.option(item_type, alias_meta_type)
if alias_option.is_skip:
continue
alias_path = image_path.with_name(f"{alias}{ext}")
alias_target = (base_item, alias_path)
if alias_target not in expanded:
expanded.append(alias_target)
return expanded
def metadata_nfo(
self,
meta: MetaBase,
@@ -468,6 +514,32 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton):
episode=episode,
)
def metadata_img(
self,
mediainfo: MediaInfo,
season: Optional[int] = None,
episode: Optional[int] = None,
) -> Optional[dict]:
"""
获取图片名称和url合并所有模块的结果。
优先使用高优先级模块的图片,低优先级模块补充缺失的图片类型。
"""
merged = {}
for module in sorted(
self.modulemanager.get_running_modules("metadata_img"),
key=lambda x: x.get_priority(),
):
try:
result = module.metadata_img(
mediainfo=mediainfo, season=season, episode=episode
)
if result and isinstance(result, dict):
for name, url in result.items():
merged.setdefault(name, url)
except Exception as err:
logger.error(f"获取 {module.get_name()} 图片失败:{str(err)}")
return merged or None
@staticmethod
def select_recognize_source(
log_name: str, log_context: str, native_fn, plugin_fn
@@ -1049,6 +1121,9 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton):
parent_fileitem=parent_fileitem,
)
# 扩展别名目标(如 backdrop→fanart, thumb→landscape
image_targets = self._expand_with_aliases(image_targets, item_type)
for base_item, image_path in image_targets:
if not image_path:
continue

View File

@@ -1,5 +1,4 @@
import asyncio
import re
from pathlib import Path
from typing import Optional, Tuple, Union
@@ -549,15 +548,34 @@ class FanartModule(_ModuleBase):
_images.sort(key=lambda x: int(x.get("likes", 0)), reverse=True)
return _images[0]
@staticmethod
def __name(fanart_name: str) -> str:
_FANART_NAME_MAP = {
"showbackground": "fanart",
"moviebackground": "fanart",
"hdtvlogo": "logo",
"hdmovielogo": "logo",
"movielogo": "logo",
"tvposter": "poster",
"movieposter": "poster",
"tvthumb": "thumb",
"moviethumb": "thumb",
"tvbanner": "banner",
"moviebanner": "banner",
"hdclearart": "clearart",
"movieart": "clearart",
"hdmovieclearart": "clearart",
"cdart": "cdart",
"moviedisc": "disc",
"seasonposter": "seasonposter",
"seasonthumb": "seasonthumb",
"seasonbanner": "seasonbanner",
}
@classmethod
def __name(cls, fanart_name: str) -> str:
"""
转换Fanart图片的名字
转换Fanart图片的名字为媒体服务器兼容名称
"""
words_to_remove = r"tv|movie|hdmovie|hdtv|show|hd"
pattern = re.compile(words_to_remove, re.IGNORECASE)
result = re.sub(pattern, "", fanart_name)
return result
return cls._FANART_NAME_MAP.get(fanart_name.lower(), fanart_name)
@classmethod
@cached(maxsize=settings.CONF.fanart, ttl=settings.CONF.meta, shared_key="get")

View File

@@ -301,6 +301,17 @@ class TmdbScraper:
# 内容分级
if content_rating := mediainfo.content_rating:
DomUtils.add_node(doc, root, "mpaa", content_rating)
# 制作公司
for company in mediainfo.production_companies or []:
if company.get("name"):
DomUtils.add_node(doc, root, "studio", company.get("name"))
# 制作国家
for country in mediainfo.production_countries or []:
if country.get("name"):
DomUtils.add_node(doc, root, "country", country.get("name"))
# 时长
if mediainfo.runtime:
DomUtils.add_node(doc, root, "runtime", str(mediainfo.runtime))
return doc

View File

@@ -446,3 +446,5 @@ class ScrapingMetadata(NameValueEnum):
BANNER = "横幅图"
THUMB = "缩略图"
DISC = "光盘图"
CLEARART = "透明艺术图"
LANDSCAPE = "横版缩略图"

View File

@@ -218,13 +218,16 @@ class TestMediaScrapingImages(unittest.TestCase):
self.media_chain._scrape_images_generic(fileitem, mediainfo, ScrapingTarget.MOVIE)
# Check download called for mapped metadata
# Check download called for mapped metadata + aliases (fanart→backdrop)
calls = self.media_chain._download_and_save_image.call_args_list
self.assertEqual(len(calls), 3)
urls = [call.kwargs["url"] for call in calls]
paths = [call.kwargs["path"] for call in calls]
self.assertIn("http://poster", urls)
self.assertIn("http://fanart", urls)
self.assertIn("http://logo", urls)
# fanart.jpg should also generate backdrop.jpg alias
self.assertIn(Path("/movies/Avatar/fanart.jpg"), paths)
self.assertIn(Path("/movies/Avatar/backdrop.jpg"), paths)
def test_scrape_images_season_filter(self):
fileitem = schemas.FileItem(path="/tv/Show/Season 1", name="Season 1", type="dir", storage="local")
@@ -250,7 +253,7 @@ class TestMediaScrapingImages(unittest.TestCase):
)
def test_scrape_movie_file_images_when_initialized_directly(self):
"""直接初始化刮削电影文件时,应生成同级 poster/backdrop。"""
"""直接初始化刮削电影文件时,应生成同级 poster/backdrop 及别名"""
fileitem = schemas.FileItem(path="/movies/Avatar/Avatar.mkv", name="Avatar.mkv", type="file", storage="local")
parent_item = schemas.FileItem(path="/movies/Avatar", name="Avatar", type="dir", storage="local")
mediainfo = MediaInfo()
@@ -269,13 +272,10 @@ class TestMediaScrapingImages(unittest.TestCase):
)
paths = [call.kwargs["path"] for call in self.media_chain._download_and_save_image.call_args_list]
self.assertEqual(
paths,
[
Path("/movies/Avatar/poster.jpg"),
Path("/movies/Avatar/backdrop.jpg"),
],
)
# poster has no alias, backdrop generates fanart alias
self.assertIn(Path("/movies/Avatar/poster.jpg"), paths)
self.assertIn(Path("/movies/Avatar/backdrop.jpg"), paths)
self.assertIn(Path("/movies/Avatar/fanart.jpg"), paths)
def test_scrape_episode_thumb_image_path(self):
fileitem = schemas.FileItem(path="/tv/Show/Season 1/S01E01.mp4", name="S01E01.mp4", type="file", storage="local")
@@ -333,6 +333,64 @@ class TestMediaScrapingImages(unittest.TestCase):
url="http://episode-thumb"
)
def test_expand_with_aliases_backdrop(self):
"""backdrop should also generate fanart alias."""
parent_item = schemas.FileItem(path="/movies/Avatar", name="Avatar", type="dir", storage="local")
targets = [(parent_item, Path("/movies/Avatar/backdrop.jpg"))]
self.media_chain.scraping_policies.option.return_value = ScrapingOption("movie", "backdrop", ScrapingPolicy.OVERWRITE)
expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.MOVIE)
paths = [t[1] for t in expanded]
self.assertIn(Path("/movies/Avatar/backdrop.jpg"), paths)
self.assertIn(Path("/movies/Avatar/fanart.jpg"), paths)
def test_expand_with_aliases_thumb(self):
"""thumb should also generate landscape alias."""
parent_item = schemas.FileItem(path="/tv/Show", name="Show", type="dir", storage="local")
targets = [(parent_item, Path("/tv/Show/thumb.jpg"))]
self.media_chain.scraping_policies.option.return_value = ScrapingOption("tv", "thumb", ScrapingPolicy.OVERWRITE)
expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.TV)
paths = [t[1] for t in expanded]
self.assertIn(Path("/tv/Show/thumb.jpg"), paths)
self.assertIn(Path("/tv/Show/landscape.jpg"), paths)
def test_expand_with_aliases_skips_season_prefix(self):
"""season-prefixed files should not get aliases."""
parent_item = schemas.FileItem(path="/tv/Show", name="Show", type="dir", storage="local")
targets = [(parent_item, Path("/tv/Show/season01-thumb.jpg"))]
self.media_chain.scraping_policies.option.return_value = ScrapingOption("season", "thumb", ScrapingPolicy.OVERWRITE)
expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.SEASON)
self.assertEqual(len(expanded), 1)
def test_expand_with_aliases_respects_skip_policy(self):
"""Alias should not be generated if its metadata type is set to SKIP."""
parent_item = schemas.FileItem(path="/movies/Avatar", name="Avatar", type="dir", storage="local")
targets = [(parent_item, Path("/movies/Avatar/backdrop.jpg"))]
# backdrop is OVERWRITE but fanart (also BACKDROP type) is SKIP
def option_side_effect(item_type, metadata_type):
if metadata_type == ScrapingMetadata.BACKDROP:
return ScrapingOption("movie", "backdrop", ScrapingPolicy.SKIP)
return ScrapingOption("movie", "backdrop", ScrapingPolicy.OVERWRITE)
self.media_chain.scraping_policies.option.side_effect = option_side_effect
expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.MOVIE)
# fanart maps to BACKDROP which is SKIP, so no alias
self.assertEqual(len(expanded), 1)
def test_season_backdrop_path(self):
"""Season backdrop should be saved in season directory."""
fileitem = schemas.FileItem(path="/tv/Show/Season 1", name="Season 1", type="dir", storage="local")
target_item, target_path = self.media_chain._get_target_fileitem_and_path(
current_fileitem=fileitem,
item_type=ScrapingTarget.SEASON,
metadata_type=ScrapingMetadata.BACKDROP,
filename_hint="season01-backdrop.jpg"
)
self.assertEqual(target_item, fileitem)
self.assertEqual(target_path, Path("/tv/Show/Season 1/backdrop.jpg"))
@patch("app.chain.media.RequestUtils")
@patch("app.chain.media.NamedTemporaryFile")
@patch("app.chain.media.Path.chmod")