diff --git a/app/chain/__init__.py b/app/chain/__init__.py index 5253f55d..16c18536 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -415,6 +415,48 @@ class ChainBase(metaclass=ABCMeta): and not any([tmdbid, doubanid, bangumiid]) ) + @staticmethod + def _snapshot_recognize_cache_meta(meta: Optional[MetaBase]) -> Optional[MetaBase]: + """ + 保存共享识别前的本地缓存关键元数据,用于共享成功后回填正缓存覆盖负缓存。 + """ + if not meta: + return None + return copy.deepcopy(meta) + + @staticmethod + def _update_local_recognize_cache( + self, + meta: Optional[MetaBase], + mediainfo: Optional[MediaInfo], + ) -> None: + """ + 共享识别成功后回填本地识别缓存,避免名称负缓存导致后续重复回查共享。 + """ + if not meta or not mediainfo: + return + self.run_module( + "update_recognize_cache", + meta=meta, + mediainfo=mediainfo, + ) + + async def _async_update_local_recognize_cache( + self, + meta: Optional[MetaBase], + mediainfo: Optional[MediaInfo], + ) -> None: + """ + 异步回填本地识别缓存。 + """ + if not meta or not mediainfo: + return + await self.async_run_module( + "async_update_recognize_cache", + meta=meta, + mediainfo=mediainfo, + ) + def recognize_media( self, meta: MetaBase = None, @@ -460,10 +502,12 @@ class ChainBase(metaclass=ABCMeta): cache=cache, ) if mediainfo: - share_helper.report(meta=meta, mediainfo=mediainfo) + if not mediainfo.recognize_cache_hit: + share_helper.report(meta=meta, mediainfo=mediainfo) return mediainfo if self._can_use_media_recognize_share(meta, tmdbid, doubanid, bangumiid): + shared_cache_meta = self._snapshot_recognize_cache_meta(meta) shared_item = share_helper.query(meta=meta, mtype=mtype) shared_params = share_helper.to_recognize_params(shared_item) if shared_params: @@ -479,7 +523,9 @@ class ChainBase(metaclass=ABCMeta): cache=cache, ) if mediainfo: - share_helper.report(meta=meta, mediainfo=mediainfo) + self._update_local_recognize_cache(shared_cache_meta, mediainfo) + if not mediainfo.recognize_cache_hit: + share_helper.report(meta=meta, mediainfo=mediainfo) return mediainfo return None @@ -528,10 +574,12 @@ class ChainBase(metaclass=ABCMeta): cache=cache, ) if mediainfo: - await share_helper.async_report(meta=meta, mediainfo=mediainfo) + if not mediainfo.recognize_cache_hit: + await share_helper.async_report(meta=meta, mediainfo=mediainfo) return mediainfo if self._can_use_media_recognize_share(meta, tmdbid, doubanid, bangumiid): + shared_cache_meta = self._snapshot_recognize_cache_meta(meta) shared_item = await share_helper.async_query(meta=meta, mtype=mtype) shared_params = share_helper.to_recognize_params(shared_item) if shared_params: @@ -547,7 +595,9 @@ class ChainBase(metaclass=ABCMeta): cache=cache, ) if mediainfo: - await share_helper.async_report(meta=meta, mediainfo=mediainfo) + await self._async_update_local_recognize_cache(shared_cache_meta, mediainfo) + if not mediainfo.recognize_cache_hit: + await share_helper.async_report(meta=meta, mediainfo=mediainfo) return mediainfo return None diff --git a/app/core/context.py b/app/core/context.py index f77b2349..d8a913dc 100644 --- a/app/core/context.py +++ b/app/core/context.py @@ -152,6 +152,8 @@ class TorrentInfo: @dataclass class MediaInfo: + # 内部标记:是否命中本地识别缓存,不参与序列化 + recognize_cache_hit = False # 来源:themoviedb、douban、bangumi source: str = None # 类型 电影、电视剧 diff --git a/app/helper/recognize.py b/app/helper/recognize.py index 410c71f0..31198a8c 100644 --- a/app/helper/recognize.py +++ b/app/helper/recognize.py @@ -39,7 +39,7 @@ class MediaRecognizeShareHelper(metaclass=WeakSingleton): """ if not meta: return None - keyword = getattr(meta, "original_name", None) or getattr(meta, "name", None) + keyword = meta.original_name or meta.name if keyword: keyword = str(keyword).strip() return keyword or None @@ -77,7 +77,7 @@ class MediaRecognizeShareHelper(metaclass=WeakSingleton): """ if media_type != "tv": return None - season = getattr(meta, "begin_season", None) + season = meta.begin_season if meta else None if season is None and mediainfo: season = mediainfo.season try: @@ -93,7 +93,7 @@ class MediaRecognizeShareHelper(metaclass=WeakSingleton): """ 提取年份 """ - year = getattr(meta, "year", None) or (mediainfo.year if mediainfo else None) + year = (meta.year if meta else None) or (mediainfo.year if mediainfo else None) if year is None: return None year_text = str(year).strip() diff --git a/app/modules/douban/__init__.py b/app/modules/douban/__init__.py index 92388971..b67eca27 100644 --- a/app/modules/douban/__init__.py +++ b/app/modules/douban/__init__.py @@ -108,6 +108,7 @@ class DoubanModule(_ModuleBase): if doubanid: meta.doubanid = doubanid cache_info = self.cache.get(meta) + cache_hit = False # 识别豆瓣信息 if not cache_info or not cache: @@ -148,6 +149,7 @@ class DoubanModule(_ModuleBase): self.cache.update(meta, info) else: # 使用缓存信息 + cache_hit = True if cache_info.get("title"): logger.info(f"{meta.name} 使用豆瓣识别缓存:{cache_info.get('title')}") info = douban_info_func(mtype=cache_info.get("type"), @@ -159,6 +161,7 @@ class DoubanModule(_ModuleBase): if info: # 赋值TMDB信息并返回 mediainfo = MediaInfo(douban_info=info) + mediainfo.recognize_cache_hit = cache_hit if meta: logger.info(f"{meta.name} 豆瓣识别结果:{mediainfo.type.value} " f"{mediainfo.title_year} " @@ -209,6 +212,7 @@ class DoubanModule(_ModuleBase): if doubanid: meta.doubanid = doubanid cache_info = self.cache.get(meta) + cache_hit = False # 识别豆瓣信息 if not cache_info or not cache: @@ -249,6 +253,7 @@ class DoubanModule(_ModuleBase): self.cache.update(meta, info) else: # 使用缓存信息 + cache_hit = True if cache_info.get("title"): logger.info(f"{meta.name} 使用豆瓣识别缓存:{cache_info.get('title')}") info = await async_douban_info_func(mtype=cache_info.get("type"), @@ -260,6 +265,7 @@ class DoubanModule(_ModuleBase): if info: # 赋值TMDB信息并返回 mediainfo = MediaInfo(douban_info=info) + mediainfo.recognize_cache_hit = cache_hit if meta: logger.info(f"{meta.name} 豆瓣识别结果:{mediainfo.type.value} " f"{mediainfo.title_year} " @@ -319,6 +325,31 @@ class DoubanModule(_ModuleBase): **kwargs ) + def update_recognize_cache( + self, + meta: MetaBase, + mediainfo: MediaInfo, + ) -> Optional[bool]: + """ + 回填豆瓣本地识别缓存,覆盖名称负缓存,避免共享识别后重复回查。 + """ + if not meta or not mediainfo: + return None + if mediainfo.source != "douban" or not mediainfo.douban_info: + return None + self.cache.update(meta, mediainfo.douban_info) + return True + + async def async_update_recognize_cache( + self, + meta: MetaBase, + mediainfo: MediaInfo, + ) -> Optional[bool]: + """ + 异步回填豆瓣本地识别缓存。 + """ + return self.update_recognize_cache(meta=meta, mediainfo=mediainfo) + @rate_limit_exponential(source="douban_info") def douban_info(self, doubanid: str, mtype: MediaType = None, raise_exception: bool = True) -> Optional[dict]: """ diff --git a/app/modules/themoviedb/__init__.py b/app/modules/themoviedb/__init__.py index 2b855544..f93d91b3 100644 --- a/app/modules/themoviedb/__init__.py +++ b/app/modules/themoviedb/__init__.py @@ -490,6 +490,7 @@ class TheMovieDbModule(_ModuleBase): group_seasons = [] if episode_group: group_seasons = self.tmdb.get_tv_group_seasons(episode_group) + cache_hit = False # 识别匹配 if not cache_info or not cache: @@ -525,6 +526,7 @@ class TheMovieDbModule(_ModuleBase): self.cache.update(meta, info) else: # 使用缓存信息 + cache_hit = True if cache_info.get("title"): logger.info(f"{meta.name} 使用TMDB识别缓存:{cache_info.get('title')}") info = self.tmdb.get_info(mtype=cache_info.get("type"), @@ -534,7 +536,10 @@ class TheMovieDbModule(_ModuleBase): info = None if info: - return self._build_media_info_result(info, meta, tmdbid, episode_group, group_seasons) + mediainfo = self._build_media_info_result(info, meta, tmdbid, episode_group, group_seasons) + if mediainfo: + mediainfo.recognize_cache_hit = cache_hit + return mediainfo else: logger.info(f"{meta.name if meta else tmdbid} 未匹配到TMDB媒体信息") @@ -574,6 +579,7 @@ class TheMovieDbModule(_ModuleBase): group_seasons = [] if episode_group: group_seasons = await self.tmdb.async_get_tv_group_seasons(episode_group) + cache_hit = False # 识别匹配 if not cache_info or not cache: @@ -609,6 +615,7 @@ class TheMovieDbModule(_ModuleBase): self.cache.update(meta, info) else: # 使用缓存信息 + cache_hit = True if cache_info.get("title"): logger.info(f"{meta.name} 使用TMDB识别缓存:{cache_info.get('title')}") info = await self.tmdb.async_get_info(mtype=cache_info.get("type"), @@ -618,7 +625,10 @@ class TheMovieDbModule(_ModuleBase): info = None if info: - return await self._async_build_media_info_result(info, meta, tmdbid, episode_group, group_seasons) + mediainfo = await self._async_build_media_info_result(info, meta, tmdbid, episode_group, group_seasons) + if mediainfo: + mediainfo.recognize_cache_hit = cache_hit + return mediainfo else: logger.info(f"{meta.name if meta else tmdbid} 未匹配到TMDB媒体信息") @@ -692,6 +702,31 @@ class TheMovieDbModule(_ModuleBase): else: return await self.tmdb.async_get_tv_season_detail(tmdbid=tmdbid, season=season) + def update_recognize_cache( + self, + meta: MetaBase, + mediainfo: MediaInfo, + ) -> Optional[bool]: + """ + 回填TMDB本地识别缓存,覆盖名称负缓存,避免共享识别后重复回查。 + """ + if not meta or not mediainfo: + return None + if mediainfo.source != "themoviedb" or not mediainfo.tmdb_info: + return None + self.cache.update(meta, mediainfo.tmdb_info) + return True + + async def async_update_recognize_cache( + self, + meta: MetaBase, + mediainfo: MediaInfo, + ) -> Optional[bool]: + """ + 异步回填TMDB本地识别缓存。 + """ + return self.update_recognize_cache(meta=meta, mediainfo=mediainfo) + def media_category(self) -> Optional[Dict[str, list]]: """ 获取媒体分类 diff --git a/tests/test_media_recognize_share.py b/tests/test_media_recognize_share.py index 890f431a..08d75ee5 100644 --- a/tests/test_media_recognize_share.py +++ b/tests/test_media_recognize_share.py @@ -78,6 +78,9 @@ class TestMediaRecognizeShare(unittest.TestCase): ), patch( "app.chain.MediaRecognizeShareHelper.report", return_value=False, + ), patch.object( + self.chain, + "_update_local_recognize_cache", ): result = self.chain.recognize_media(meta=meta, cache=False) @@ -117,17 +120,69 @@ class TestMediaRecognizeShare(unittest.TestCase): ), patch( "app.chain.MediaRecognizeShareHelper.async_report", AsyncMock(return_value=False), - ): + ), patch.object( + self.chain, + "_async_update_local_recognize_cache", + AsyncMock(), + ) as backfill_mock: result = await self.chain.async_recognize_media(meta=meta, cache=False) - return result, query_mock + return result, query_mock, backfill_mock - result, query_mock = asyncio.run(runner()) + result, query_mock, backfill_mock = asyncio.run(runner()) self.assertIs(result, shared_media) self.assertEqual(async_run_module.await_count, 2) query_mock.assert_awaited_once_with(meta=meta, mtype=None) + backfill_mock.assert_awaited_once() self.assertIsNone(meta.begin_season) + def test_backfill_local_cache_after_shared_recognize_success(self): + """ + 共享识别后二次本地识别成功时,应回填原始名称对应的本地识别缓存。 + """ + meta = self._build_meta("测试缓存回填", MediaType.MOVIE) + shared_media = MediaInfo( + title="测试缓存回填", + year="2024", + tmdb_id=700, + type=MediaType.MOVIE, + source="themoviedb", + tmdb_info={"id": 700, "media_type": MediaType.MOVIE, "title": "测试缓存回填"}, + ) + + with patch.object( + self.chain, + "run_module", + side_effect=[None, shared_media], + ), patch( + "app.chain.MediaRecognizeShareHelper.query", + return_value={"type": "movie", "tmdbid": 700}, + ), patch( + "app.chain.MediaRecognizeShareHelper.to_recognize_params", + return_value={ + "mtype": MediaType.MOVIE, + "tmdbid": 700, + "doubanid": None, + "bangumiid": None, + "season": None, + }, + ), patch( + "app.chain.MediaRecognizeShareHelper.report", + return_value=False, + ), patch.object( + self.chain, + "_update_local_recognize_cache", + ) as backfill_mock: + result = self.chain.recognize_media(meta=meta, cache=False) + + self.assertIs(result, shared_media) + backfill_mock.assert_called_once() + backfill_meta, backfill_media = backfill_mock.call_args.args + self.assertIsNot(backfill_meta, meta) + self.assertEqual(backfill_meta.name, meta.name) + self.assertEqual(backfill_meta.type, meta.type) + self.assertIs(backfill_media, shared_media) + def test_query_and_report_prefer_original_name_keyword(self): """ 查询和上报共享识别时应优先使用未应用识别词的识别名称 @@ -151,6 +206,57 @@ class TestMediaRecognizeShare(unittest.TestCase): self.assertEqual(query_params["keyword"], "未应用识别词的名称") self.assertEqual(report_payload["keyword"], "未应用识别词的名称") + def test_skip_report_when_local_recognize_hits_cache(self): + """ + 本地识别命中缓存时不应上报共享识别 + """ + meta = self._build_meta("缓存电影", MediaType.MOVIE) + mediainfo = MediaInfo(title="缓存电影", year="2024", tmdb_id=500, type=MediaType.MOVIE) + mediainfo.recognize_cache_hit = True + + with patch.object(self.chain, "run_module", return_value=mediainfo) as run_module, patch( + "app.chain.MediaRecognizeShareHelper.report", + return_value=True, + ) as report_mock, patch( + "app.chain.MediaRecognizeShareHelper.query" + ) as query_mock: + result = self.chain.recognize_media(meta=meta) + + self.assertIs(result, mediainfo) + run_module.assert_called_once() + report_mock.assert_not_called() + query_mock.assert_not_called() + + def test_async_skip_report_when_local_recognize_hits_cache(self): + """ + 异步本地识别命中缓存时不应上报共享识别 + """ + meta = self._build_meta("缓存剧集", MediaType.TV) + mediainfo = MediaInfo(title="缓存剧集", year="2025", tmdb_id=600, type=MediaType.TV) + mediainfo.recognize_cache_hit = True + + async def runner(): + with patch.object( + self.chain, + "async_run_module", + AsyncMock(return_value=mediainfo), + ) as async_run_module, patch( + "app.chain.MediaRecognizeShareHelper.async_report", + AsyncMock(return_value=True), + ) as report_mock, patch( + "app.chain.MediaRecognizeShareHelper.async_query", + AsyncMock(), + ) as query_mock: + result = await self.chain.async_recognize_media(meta=meta) + return result, async_run_module, report_mock, query_mock + + result, async_run_module, report_mock, query_mock = asyncio.run(runner()) + + self.assertIs(result, mediainfo) + async_run_module.assert_awaited_once() + report_mock.assert_not_awaited() + query_mock.assert_not_awaited() + if __name__ == "__main__": unittest.main()