fix: update shared recognize cache flow

This commit is contained in:
jxxghp
2026-05-08 21:21:01 +08:00
parent 64b4de3900
commit 94d7e4385e
6 changed files with 236 additions and 12 deletions

View File

@@ -415,6 +415,48 @@ class ChainBase(metaclass=ABCMeta):
and not any([tmdbid, doubanid, bangumiid])
)
@staticmethod
def _snapshot_recognize_cache_meta(meta: Optional[MetaBase]) -> Optional[MetaBase]:
"""
保存共享识别前的本地缓存关键元数据,用于共享成功后回填正缓存覆盖负缓存。
"""
if not meta:
return None
return copy.deepcopy(meta)
@staticmethod
def _update_local_recognize_cache(
self,
meta: Optional[MetaBase],
mediainfo: Optional[MediaInfo],
) -> None:
"""
共享识别成功后回填本地识别缓存,避免名称负缓存导致后续重复回查共享。
"""
if not meta or not mediainfo:
return
self.run_module(
"update_recognize_cache",
meta=meta,
mediainfo=mediainfo,
)
async def _async_update_local_recognize_cache(
self,
meta: Optional[MetaBase],
mediainfo: Optional[MediaInfo],
) -> None:
"""
异步回填本地识别缓存。
"""
if not meta or not mediainfo:
return
await self.async_run_module(
"async_update_recognize_cache",
meta=meta,
mediainfo=mediainfo,
)
def recognize_media(
self,
meta: MetaBase = None,
@@ -460,10 +502,12 @@ class ChainBase(metaclass=ABCMeta):
cache=cache,
)
if mediainfo:
share_helper.report(meta=meta, mediainfo=mediainfo)
if not mediainfo.recognize_cache_hit:
share_helper.report(meta=meta, mediainfo=mediainfo)
return mediainfo
if self._can_use_media_recognize_share(meta, tmdbid, doubanid, bangumiid):
shared_cache_meta = self._snapshot_recognize_cache_meta(meta)
shared_item = share_helper.query(meta=meta, mtype=mtype)
shared_params = share_helper.to_recognize_params(shared_item)
if shared_params:
@@ -479,7 +523,9 @@ class ChainBase(metaclass=ABCMeta):
cache=cache,
)
if mediainfo:
share_helper.report(meta=meta, mediainfo=mediainfo)
self._update_local_recognize_cache(shared_cache_meta, mediainfo)
if not mediainfo.recognize_cache_hit:
share_helper.report(meta=meta, mediainfo=mediainfo)
return mediainfo
return None
@@ -528,10 +574,12 @@ class ChainBase(metaclass=ABCMeta):
cache=cache,
)
if mediainfo:
await share_helper.async_report(meta=meta, mediainfo=mediainfo)
if not mediainfo.recognize_cache_hit:
await share_helper.async_report(meta=meta, mediainfo=mediainfo)
return mediainfo
if self._can_use_media_recognize_share(meta, tmdbid, doubanid, bangumiid):
shared_cache_meta = self._snapshot_recognize_cache_meta(meta)
shared_item = await share_helper.async_query(meta=meta, mtype=mtype)
shared_params = share_helper.to_recognize_params(shared_item)
if shared_params:
@@ -547,7 +595,9 @@ class ChainBase(metaclass=ABCMeta):
cache=cache,
)
if mediainfo:
await share_helper.async_report(meta=meta, mediainfo=mediainfo)
await self._async_update_local_recognize_cache(shared_cache_meta, mediainfo)
if not mediainfo.recognize_cache_hit:
await share_helper.async_report(meta=meta, mediainfo=mediainfo)
return mediainfo
return None

View File

@@ -152,6 +152,8 @@ class TorrentInfo:
@dataclass
class MediaInfo:
# 内部标记:是否命中本地识别缓存,不参与序列化
recognize_cache_hit = False
# 来源themoviedb、douban、bangumi
source: str = None
# 类型 电影、电视剧

View File

@@ -39,7 +39,7 @@ class MediaRecognizeShareHelper(metaclass=WeakSingleton):
"""
if not meta:
return None
keyword = getattr(meta, "original_name", None) or getattr(meta, "name", None)
keyword = meta.original_name or meta.name
if keyword:
keyword = str(keyword).strip()
return keyword or None
@@ -77,7 +77,7 @@ class MediaRecognizeShareHelper(metaclass=WeakSingleton):
"""
if media_type != "tv":
return None
season = getattr(meta, "begin_season", None)
season = meta.begin_season if meta else None
if season is None and mediainfo:
season = mediainfo.season
try:
@@ -93,7 +93,7 @@ class MediaRecognizeShareHelper(metaclass=WeakSingleton):
"""
提取年份
"""
year = getattr(meta, "year", None) or (mediainfo.year if mediainfo else None)
year = (meta.year if meta else None) or (mediainfo.year if mediainfo else None)
if year is None:
return None
year_text = str(year).strip()

View File

@@ -108,6 +108,7 @@ class DoubanModule(_ModuleBase):
if doubanid:
meta.doubanid = doubanid
cache_info = self.cache.get(meta)
cache_hit = False
# 识别豆瓣信息
if not cache_info or not cache:
@@ -148,6 +149,7 @@ class DoubanModule(_ModuleBase):
self.cache.update(meta, info)
else:
# 使用缓存信息
cache_hit = True
if cache_info.get("title"):
logger.info(f"{meta.name} 使用豆瓣识别缓存:{cache_info.get('title')}")
info = douban_info_func(mtype=cache_info.get("type"),
@@ -159,6 +161,7 @@ class DoubanModule(_ModuleBase):
if info:
# 赋值TMDB信息并返回
mediainfo = MediaInfo(douban_info=info)
mediainfo.recognize_cache_hit = cache_hit
if meta:
logger.info(f"{meta.name} 豆瓣识别结果:{mediainfo.type.value} "
f"{mediainfo.title_year} "
@@ -209,6 +212,7 @@ class DoubanModule(_ModuleBase):
if doubanid:
meta.doubanid = doubanid
cache_info = self.cache.get(meta)
cache_hit = False
# 识别豆瓣信息
if not cache_info or not cache:
@@ -249,6 +253,7 @@ class DoubanModule(_ModuleBase):
self.cache.update(meta, info)
else:
# 使用缓存信息
cache_hit = True
if cache_info.get("title"):
logger.info(f"{meta.name} 使用豆瓣识别缓存:{cache_info.get('title')}")
info = await async_douban_info_func(mtype=cache_info.get("type"),
@@ -260,6 +265,7 @@ class DoubanModule(_ModuleBase):
if info:
# 赋值TMDB信息并返回
mediainfo = MediaInfo(douban_info=info)
mediainfo.recognize_cache_hit = cache_hit
if meta:
logger.info(f"{meta.name} 豆瓣识别结果:{mediainfo.type.value} "
f"{mediainfo.title_year} "
@@ -319,6 +325,31 @@ class DoubanModule(_ModuleBase):
**kwargs
)
def update_recognize_cache(
self,
meta: MetaBase,
mediainfo: MediaInfo,
) -> Optional[bool]:
"""
回填豆瓣本地识别缓存,覆盖名称负缓存,避免共享识别后重复回查。
"""
if not meta or not mediainfo:
return None
if mediainfo.source != "douban" or not mediainfo.douban_info:
return None
self.cache.update(meta, mediainfo.douban_info)
return True
async def async_update_recognize_cache(
self,
meta: MetaBase,
mediainfo: MediaInfo,
) -> Optional[bool]:
"""
异步回填豆瓣本地识别缓存。
"""
return self.update_recognize_cache(meta=meta, mediainfo=mediainfo)
@rate_limit_exponential(source="douban_info")
def douban_info(self, doubanid: str, mtype: MediaType = None, raise_exception: bool = True) -> Optional[dict]:
"""

View File

@@ -490,6 +490,7 @@ class TheMovieDbModule(_ModuleBase):
group_seasons = []
if episode_group:
group_seasons = self.tmdb.get_tv_group_seasons(episode_group)
cache_hit = False
# 识别匹配
if not cache_info or not cache:
@@ -525,6 +526,7 @@ class TheMovieDbModule(_ModuleBase):
self.cache.update(meta, info)
else:
# 使用缓存信息
cache_hit = True
if cache_info.get("title"):
logger.info(f"{meta.name} 使用TMDB识别缓存{cache_info.get('title')}")
info = self.tmdb.get_info(mtype=cache_info.get("type"),
@@ -534,7 +536,10 @@ class TheMovieDbModule(_ModuleBase):
info = None
if info:
return self._build_media_info_result(info, meta, tmdbid, episode_group, group_seasons)
mediainfo = self._build_media_info_result(info, meta, tmdbid, episode_group, group_seasons)
if mediainfo:
mediainfo.recognize_cache_hit = cache_hit
return mediainfo
else:
logger.info(f"{meta.name if meta else tmdbid} 未匹配到TMDB媒体信息")
@@ -574,6 +579,7 @@ class TheMovieDbModule(_ModuleBase):
group_seasons = []
if episode_group:
group_seasons = await self.tmdb.async_get_tv_group_seasons(episode_group)
cache_hit = False
# 识别匹配
if not cache_info or not cache:
@@ -609,6 +615,7 @@ class TheMovieDbModule(_ModuleBase):
self.cache.update(meta, info)
else:
# 使用缓存信息
cache_hit = True
if cache_info.get("title"):
logger.info(f"{meta.name} 使用TMDB识别缓存{cache_info.get('title')}")
info = await self.tmdb.async_get_info(mtype=cache_info.get("type"),
@@ -618,7 +625,10 @@ class TheMovieDbModule(_ModuleBase):
info = None
if info:
return await self._async_build_media_info_result(info, meta, tmdbid, episode_group, group_seasons)
mediainfo = await self._async_build_media_info_result(info, meta, tmdbid, episode_group, group_seasons)
if mediainfo:
mediainfo.recognize_cache_hit = cache_hit
return mediainfo
else:
logger.info(f"{meta.name if meta else tmdbid} 未匹配到TMDB媒体信息")
@@ -692,6 +702,31 @@ class TheMovieDbModule(_ModuleBase):
else:
return await self.tmdb.async_get_tv_season_detail(tmdbid=tmdbid, season=season)
def update_recognize_cache(
self,
meta: MetaBase,
mediainfo: MediaInfo,
) -> Optional[bool]:
"""
回填TMDB本地识别缓存覆盖名称负缓存避免共享识别后重复回查。
"""
if not meta or not mediainfo:
return None
if mediainfo.source != "themoviedb" or not mediainfo.tmdb_info:
return None
self.cache.update(meta, mediainfo.tmdb_info)
return True
async def async_update_recognize_cache(
self,
meta: MetaBase,
mediainfo: MediaInfo,
) -> Optional[bool]:
"""
异步回填TMDB本地识别缓存。
"""
return self.update_recognize_cache(meta=meta, mediainfo=mediainfo)
def media_category(self) -> Optional[Dict[str, list]]:
"""
获取媒体分类

View File

@@ -78,6 +78,9 @@ class TestMediaRecognizeShare(unittest.TestCase):
), patch(
"app.chain.MediaRecognizeShareHelper.report",
return_value=False,
), patch.object(
self.chain,
"_update_local_recognize_cache",
):
result = self.chain.recognize_media(meta=meta, cache=False)
@@ -117,17 +120,69 @@ class TestMediaRecognizeShare(unittest.TestCase):
), patch(
"app.chain.MediaRecognizeShareHelper.async_report",
AsyncMock(return_value=False),
):
), patch.object(
self.chain,
"_async_update_local_recognize_cache",
AsyncMock(),
) as backfill_mock:
result = await self.chain.async_recognize_media(meta=meta, cache=False)
return result, query_mock
return result, query_mock, backfill_mock
result, query_mock = asyncio.run(runner())
result, query_mock, backfill_mock = asyncio.run(runner())
self.assertIs(result, shared_media)
self.assertEqual(async_run_module.await_count, 2)
query_mock.assert_awaited_once_with(meta=meta, mtype=None)
backfill_mock.assert_awaited_once()
self.assertIsNone(meta.begin_season)
def test_backfill_local_cache_after_shared_recognize_success(self):
"""
共享识别后二次本地识别成功时,应回填原始名称对应的本地识别缓存。
"""
meta = self._build_meta("测试缓存回填", MediaType.MOVIE)
shared_media = MediaInfo(
title="测试缓存回填",
year="2024",
tmdb_id=700,
type=MediaType.MOVIE,
source="themoviedb",
tmdb_info={"id": 700, "media_type": MediaType.MOVIE, "title": "测试缓存回填"},
)
with patch.object(
self.chain,
"run_module",
side_effect=[None, shared_media],
), patch(
"app.chain.MediaRecognizeShareHelper.query",
return_value={"type": "movie", "tmdbid": 700},
), patch(
"app.chain.MediaRecognizeShareHelper.to_recognize_params",
return_value={
"mtype": MediaType.MOVIE,
"tmdbid": 700,
"doubanid": None,
"bangumiid": None,
"season": None,
},
), patch(
"app.chain.MediaRecognizeShareHelper.report",
return_value=False,
), patch.object(
self.chain,
"_update_local_recognize_cache",
) as backfill_mock:
result = self.chain.recognize_media(meta=meta, cache=False)
self.assertIs(result, shared_media)
backfill_mock.assert_called_once()
backfill_meta, backfill_media = backfill_mock.call_args.args
self.assertIsNot(backfill_meta, meta)
self.assertEqual(backfill_meta.name, meta.name)
self.assertEqual(backfill_meta.type, meta.type)
self.assertIs(backfill_media, shared_media)
def test_query_and_report_prefer_original_name_keyword(self):
"""
查询和上报共享识别时应优先使用未应用识别词的识别名称
@@ -151,6 +206,57 @@ class TestMediaRecognizeShare(unittest.TestCase):
self.assertEqual(query_params["keyword"], "未应用识别词的名称")
self.assertEqual(report_payload["keyword"], "未应用识别词的名称")
def test_skip_report_when_local_recognize_hits_cache(self):
"""
本地识别命中缓存时不应上报共享识别
"""
meta = self._build_meta("缓存电影", MediaType.MOVIE)
mediainfo = MediaInfo(title="缓存电影", year="2024", tmdb_id=500, type=MediaType.MOVIE)
mediainfo.recognize_cache_hit = True
with patch.object(self.chain, "run_module", return_value=mediainfo) as run_module, patch(
"app.chain.MediaRecognizeShareHelper.report",
return_value=True,
) as report_mock, patch(
"app.chain.MediaRecognizeShareHelper.query"
) as query_mock:
result = self.chain.recognize_media(meta=meta)
self.assertIs(result, mediainfo)
run_module.assert_called_once()
report_mock.assert_not_called()
query_mock.assert_not_called()
def test_async_skip_report_when_local_recognize_hits_cache(self):
"""
异步本地识别命中缓存时不应上报共享识别
"""
meta = self._build_meta("缓存剧集", MediaType.TV)
mediainfo = MediaInfo(title="缓存剧集", year="2025", tmdb_id=600, type=MediaType.TV)
mediainfo.recognize_cache_hit = True
async def runner():
with patch.object(
self.chain,
"async_run_module",
AsyncMock(return_value=mediainfo),
) as async_run_module, patch(
"app.chain.MediaRecognizeShareHelper.async_report",
AsyncMock(return_value=True),
) as report_mock, patch(
"app.chain.MediaRecognizeShareHelper.async_query",
AsyncMock(),
) as query_mock:
result = await self.chain.async_recognize_media(meta=meta)
return result, async_run_module, report_mock, query_mock
result, async_run_module, report_mock, query_mock = asyncio.run(runner())
self.assertIs(result, mediainfo)
async_run_module.assert_awaited_once()
report_mock.assert_not_awaited()
query_mock.assert_not_awaited()
if __name__ == "__main__":
unittest.main()