From 34ff80e26c99e1eb753e2c288ae79773bf976a67 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Tue, 19 May 2026 12:36:47 +0800 Subject: [PATCH] feat: optimize scraping for multi-server compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add studio, country, runtime tags to NFO generation - Fix Fanart naming: showbackground→fanart (recognized by Jellyfin/Emby) - Add image alias system: backdrop↔fanart, thumb↔landscape - Merge image sources from all modules instead of first-wins - Add CLEARART and LANDSCAPE scraping metadata types - Extend season scraping with backdrop and landscape support Co-Authored-By: Claude Opus 4.7 (1M context) --- app/chain/media.py | 81 +++++++++++++++++++++++++++++-- app/modules/fanart/__init__.py | 34 ++++++++++--- app/modules/themoviedb/scraper.py | 11 +++++ app/schemas/types.py | 2 + tests/test_mediascrape.py | 78 +++++++++++++++++++++++++---- 5 files changed, 185 insertions(+), 21 deletions(-) diff --git a/app/chain/media.py b/app/chain/media.py index bb868e1a..5906b736 100644 --- a/app/chain/media.py +++ b/app/chain/media.py @@ -136,10 +136,10 @@ class ScrapingConfig: for mt, mds in [ ( "movie", - ["nfo", "poster", "backdrop", "logo", "disc", "banner", "thumb"], + ["nfo", "poster", "backdrop", "logo", "disc", "banner", "thumb", "clearart", "landscape"], ), - ("tv", ["nfo", "poster", "backdrop", "logo", "banner", "thumb"]), - ("season", ["nfo", "poster", "banner", "thumb"]), + ("tv", ["nfo", "poster", "backdrop", "logo", "banner", "thumb", "clearart", "landscape"]), + ("season", ["nfo", "poster", "backdrop", "banner", "thumb", "landscape"]), ("episode", ["nfo", "thumb"]), ] for md in mds @@ -164,6 +164,15 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton): "cdart": ScrapingMetadata.DISC, "banner": ScrapingMetadata.BANNER, "thumb": ScrapingMetadata.THUMB, + "landscape": ScrapingMetadata.LANDSCAPE, + "clearart": ScrapingMetadata.CLEARART, + } + + IMAGE_ALIASES = { + "backdrop": ["fanart"], + "fanart": ["backdrop"], + "thumb": ["landscape"], + "landscape": ["thumb"], } def __init__(self): @@ -359,6 +368,8 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton): ScrapingMetadata.POSTER: "poster", ScrapingMetadata.BANNER: "banner", ScrapingMetadata.THUMB: "thumb", + ScrapingMetadata.BACKDROP: "backdrop", + ScrapingMetadata.LANDSCAPE: "landscape", } if season_image_name := season_image_name_map.get(metadata_type): hint_ext = Path(filename_hint).suffix if filename_hint else ".jpg" @@ -428,6 +439,8 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton): ScrapingMetadata.POSTER, ScrapingMetadata.BANNER, ScrapingMetadata.THUMB, + ScrapingMetadata.BACKDROP, + ScrapingMetadata.LANDSCAPE, } ): return targets @@ -445,6 +458,39 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton): targets.insert(0, root_target) return targets + def _expand_with_aliases( + self, + targets: List[Tuple[schemas.FileItem, Path]], + item_type: ScrapingTarget, + ) -> List[Tuple[schemas.FileItem, Path]]: + """ + 为兼容多媒体服务器,扩展图片保存目标列表,添加别名文件。 + 例如 backdrop.jpg 同时保存为 fanart.jpg,thumb.jpg 同时保存为 landscape.jpg。 + """ + expanded = list(targets) + for base_item, image_path in list(targets): + if not image_path: + continue + stem = image_path.stem.lower() + ext = image_path.suffix + # 跳过 season 前缀文件(如 season01-poster.jpg) + if stem.startswith("season"): + continue + aliases = self.IMAGE_ALIASES.get(stem) + if not aliases: + continue + for alias in aliases: + alias_meta_type = self.IMAGE_METADATA_MAP.get(alias) + if alias_meta_type: + alias_option = self.scraping_policies.option(item_type, alias_meta_type) + if alias_option.is_skip: + continue + alias_path = image_path.with_name(f"{alias}{ext}") + alias_target = (base_item, alias_path) + if alias_target not in expanded: + expanded.append(alias_target) + return expanded + def metadata_nfo( self, meta: MetaBase, @@ -468,6 +514,32 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton): episode=episode, ) + def metadata_img( + self, + mediainfo: MediaInfo, + season: Optional[int] = None, + episode: Optional[int] = None, + ) -> Optional[dict]: + """ + 获取图片名称和url,合并所有模块的结果。 + 优先使用高优先级模块的图片,低优先级模块补充缺失的图片类型。 + """ + merged = {} + for module in sorted( + self.modulemanager.get_running_modules("metadata_img"), + key=lambda x: x.get_priority(), + ): + try: + result = module.metadata_img( + mediainfo=mediainfo, season=season, episode=episode + ) + if result and isinstance(result, dict): + for name, url in result.items(): + merged.setdefault(name, url) + except Exception as err: + logger.error(f"获取 {module.get_name()} 图片失败:{str(err)}") + return merged or None + @staticmethod def select_recognize_source( log_name: str, log_context: str, native_fn, plugin_fn @@ -1049,6 +1121,9 @@ class MediaChain(ChainBase, ConfigReloadMixin, metaclass=Singleton): parent_fileitem=parent_fileitem, ) + # 扩展别名目标(如 backdrop→fanart, thumb→landscape) + image_targets = self._expand_with_aliases(image_targets, item_type) + for base_item, image_path in image_targets: if not image_path: continue diff --git a/app/modules/fanart/__init__.py b/app/modules/fanart/__init__.py index 6a12df1e..41c887e4 100644 --- a/app/modules/fanart/__init__.py +++ b/app/modules/fanart/__init__.py @@ -1,5 +1,4 @@ import asyncio -import re from pathlib import Path from typing import Optional, Tuple, Union @@ -549,15 +548,34 @@ class FanartModule(_ModuleBase): _images.sort(key=lambda x: int(x.get("likes", 0)), reverse=True) return _images[0] - @staticmethod - def __name(fanart_name: str) -> str: + _FANART_NAME_MAP = { + "showbackground": "fanart", + "moviebackground": "fanart", + "hdtvlogo": "logo", + "hdmovielogo": "logo", + "movielogo": "logo", + "tvposter": "poster", + "movieposter": "poster", + "tvthumb": "thumb", + "moviethumb": "thumb", + "tvbanner": "banner", + "moviebanner": "banner", + "hdclearart": "clearart", + "movieart": "clearart", + "hdmovieclearart": "clearart", + "cdart": "cdart", + "moviedisc": "disc", + "seasonposter": "seasonposter", + "seasonthumb": "seasonthumb", + "seasonbanner": "seasonbanner", + } + + @classmethod + def __name(cls, fanart_name: str) -> str: """ - 转换Fanart图片的名字 + 转换Fanart图片的名字为媒体服务器兼容名称 """ - words_to_remove = r"tv|movie|hdmovie|hdtv|show|hd" - pattern = re.compile(words_to_remove, re.IGNORECASE) - result = re.sub(pattern, "", fanart_name) - return result + return cls._FANART_NAME_MAP.get(fanart_name.lower(), fanart_name) @classmethod @cached(maxsize=settings.CONF.fanart, ttl=settings.CONF.meta, shared_key="get") diff --git a/app/modules/themoviedb/scraper.py b/app/modules/themoviedb/scraper.py index a83c2c27..5a182660 100644 --- a/app/modules/themoviedb/scraper.py +++ b/app/modules/themoviedb/scraper.py @@ -301,6 +301,17 @@ class TmdbScraper: # 内容分级 if content_rating := mediainfo.content_rating: DomUtils.add_node(doc, root, "mpaa", content_rating) + # 制作公司 + for company in mediainfo.production_companies or []: + if company.get("name"): + DomUtils.add_node(doc, root, "studio", company.get("name")) + # 制作国家 + for country in mediainfo.production_countries or []: + if country.get("name"): + DomUtils.add_node(doc, root, "country", country.get("name")) + # 时长 + if mediainfo.runtime: + DomUtils.add_node(doc, root, "runtime", str(mediainfo.runtime)) return doc diff --git a/app/schemas/types.py b/app/schemas/types.py index ecc855a2..38ade1c6 100644 --- a/app/schemas/types.py +++ b/app/schemas/types.py @@ -446,3 +446,5 @@ class ScrapingMetadata(NameValueEnum): BANNER = "横幅图" THUMB = "缩略图" DISC = "光盘图" + CLEARART = "透明艺术图" + LANDSCAPE = "横版缩略图" diff --git a/tests/test_mediascrape.py b/tests/test_mediascrape.py index 5ef64736..877d13bb 100644 --- a/tests/test_mediascrape.py +++ b/tests/test_mediascrape.py @@ -218,13 +218,16 @@ class TestMediaScrapingImages(unittest.TestCase): self.media_chain._scrape_images_generic(fileitem, mediainfo, ScrapingTarget.MOVIE) - # Check download called for mapped metadata + # Check download called for mapped metadata + aliases (fanart→backdrop) calls = self.media_chain._download_and_save_image.call_args_list - self.assertEqual(len(calls), 3) urls = [call.kwargs["url"] for call in calls] + paths = [call.kwargs["path"] for call in calls] self.assertIn("http://poster", urls) self.assertIn("http://fanart", urls) self.assertIn("http://logo", urls) + # fanart.jpg should also generate backdrop.jpg alias + self.assertIn(Path("/movies/Avatar/fanart.jpg"), paths) + self.assertIn(Path("/movies/Avatar/backdrop.jpg"), paths) def test_scrape_images_season_filter(self): fileitem = schemas.FileItem(path="/tv/Show/Season 1", name="Season 1", type="dir", storage="local") @@ -250,7 +253,7 @@ class TestMediaScrapingImages(unittest.TestCase): ) def test_scrape_movie_file_images_when_initialized_directly(self): - """直接初始化刮削电影文件时,应生成同级 poster/backdrop。""" + """直接初始化刮削电影文件时,应生成同级 poster/backdrop 及别名。""" fileitem = schemas.FileItem(path="/movies/Avatar/Avatar.mkv", name="Avatar.mkv", type="file", storage="local") parent_item = schemas.FileItem(path="/movies/Avatar", name="Avatar", type="dir", storage="local") mediainfo = MediaInfo() @@ -269,13 +272,10 @@ class TestMediaScrapingImages(unittest.TestCase): ) paths = [call.kwargs["path"] for call in self.media_chain._download_and_save_image.call_args_list] - self.assertEqual( - paths, - [ - Path("/movies/Avatar/poster.jpg"), - Path("/movies/Avatar/backdrop.jpg"), - ], - ) + # poster has no alias, backdrop generates fanart alias + self.assertIn(Path("/movies/Avatar/poster.jpg"), paths) + self.assertIn(Path("/movies/Avatar/backdrop.jpg"), paths) + self.assertIn(Path("/movies/Avatar/fanart.jpg"), paths) def test_scrape_episode_thumb_image_path(self): fileitem = schemas.FileItem(path="/tv/Show/Season 1/S01E01.mp4", name="S01E01.mp4", type="file", storage="local") @@ -333,6 +333,64 @@ class TestMediaScrapingImages(unittest.TestCase): url="http://episode-thumb" ) + def test_expand_with_aliases_backdrop(self): + """backdrop should also generate fanart alias.""" + parent_item = schemas.FileItem(path="/movies/Avatar", name="Avatar", type="dir", storage="local") + targets = [(parent_item, Path("/movies/Avatar/backdrop.jpg"))] + self.media_chain.scraping_policies.option.return_value = ScrapingOption("movie", "backdrop", ScrapingPolicy.OVERWRITE) + + expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.MOVIE) + paths = [t[1] for t in expanded] + self.assertIn(Path("/movies/Avatar/backdrop.jpg"), paths) + self.assertIn(Path("/movies/Avatar/fanart.jpg"), paths) + + def test_expand_with_aliases_thumb(self): + """thumb should also generate landscape alias.""" + parent_item = schemas.FileItem(path="/tv/Show", name="Show", type="dir", storage="local") + targets = [(parent_item, Path("/tv/Show/thumb.jpg"))] + self.media_chain.scraping_policies.option.return_value = ScrapingOption("tv", "thumb", ScrapingPolicy.OVERWRITE) + + expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.TV) + paths = [t[1] for t in expanded] + self.assertIn(Path("/tv/Show/thumb.jpg"), paths) + self.assertIn(Path("/tv/Show/landscape.jpg"), paths) + + def test_expand_with_aliases_skips_season_prefix(self): + """season-prefixed files should not get aliases.""" + parent_item = schemas.FileItem(path="/tv/Show", name="Show", type="dir", storage="local") + targets = [(parent_item, Path("/tv/Show/season01-thumb.jpg"))] + self.media_chain.scraping_policies.option.return_value = ScrapingOption("season", "thumb", ScrapingPolicy.OVERWRITE) + + expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.SEASON) + self.assertEqual(len(expanded), 1) + + def test_expand_with_aliases_respects_skip_policy(self): + """Alias should not be generated if its metadata type is set to SKIP.""" + parent_item = schemas.FileItem(path="/movies/Avatar", name="Avatar", type="dir", storage="local") + targets = [(parent_item, Path("/movies/Avatar/backdrop.jpg"))] + # backdrop is OVERWRITE but fanart (also BACKDROP type) is SKIP + def option_side_effect(item_type, metadata_type): + if metadata_type == ScrapingMetadata.BACKDROP: + return ScrapingOption("movie", "backdrop", ScrapingPolicy.SKIP) + return ScrapingOption("movie", "backdrop", ScrapingPolicy.OVERWRITE) + self.media_chain.scraping_policies.option.side_effect = option_side_effect + + expanded = self.media_chain._expand_with_aliases(targets, ScrapingTarget.MOVIE) + # fanart maps to BACKDROP which is SKIP, so no alias + self.assertEqual(len(expanded), 1) + + def test_season_backdrop_path(self): + """Season backdrop should be saved in season directory.""" + fileitem = schemas.FileItem(path="/tv/Show/Season 1", name="Season 1", type="dir", storage="local") + target_item, target_path = self.media_chain._get_target_fileitem_and_path( + current_fileitem=fileitem, + item_type=ScrapingTarget.SEASON, + metadata_type=ScrapingMetadata.BACKDROP, + filename_hint="season01-backdrop.jpg" + ) + self.assertEqual(target_item, fileitem) + self.assertEqual(target_path, Path("/tv/Show/Season 1/backdrop.jpg")) + @patch("app.chain.media.RequestUtils") @patch("app.chain.media.NamedTemporaryFile") @patch("app.chain.media.Path.chmod")