diff --git a/app/api/endpoints/download.py b/app/api/endpoints/download.py index 683db414..f342fcff 100644 --- a/app/api/endpoints/download.py +++ b/app/api/endpoints/download.py @@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, Body from app import schemas from app.chain.download import DownloadChain from app.chain.media import MediaChain -from app.core.context import MediaInfo, Context, TorrentInfo +from app.core.context import MediaInfo, Context, SubtitleInfo, TorrentInfo from app.core.metainfo import MetaInfo from app.core.security import verify_token from app.db.models.user import User @@ -114,6 +114,33 @@ def add( return schemas.Response(success=True, data={"download_id": did}) +@router.post("/subtitle", summary="下载字幕", response_model=schemas.Response) +def download_subtitle( + subtitle_in: schemas.SubtitleInfo, + tmdbid: Annotated[int | None, Body()] = None, + doubanid: Annotated[str | None, Body()] = None, + save_path: Annotated[str | None, Body()] = None, + current_user: User = Depends(get_current_active_user), +) -> Any: + """ + 下载字幕资源。 + """ + subtitle_info = SubtitleInfo() + subtitle_info.from_dict(subtitle_in.model_dump()) + success, message, saved_files = DownloadChain().download_subtitle( + subtitle=subtitle_info, + tmdbid=tmdbid, + doubanid=doubanid, + save_path=save_path, + username=current_user.name, + ) + return schemas.Response( + success=success, + message=message, + data={"files": saved_files} if saved_files else None, + ) + + @router.get("/start/{hashString}", summary="开始任务", response_model=schemas.Response) def start( hashString: str, diff --git a/app/api/endpoints/search.py b/app/api/endpoints/search.py index d89329c1..72830ba2 100644 --- a/app/api/endpoints/search.py +++ b/app/api/endpoints/search.py @@ -150,13 +150,16 @@ async def search_latest_context(_: schemas.TokenPayload = Depends(verify_token)) 查询上次搜索结果及其对应的搜索参数。 """ search_chain = SearchChain() - torrents = await search_chain.async_last_search_results() or [] params = await search_chain.async_last_search_params() or {} + if params.get("result_type") == "subtitle": + results = await search_chain.async_last_subtitle_search_results() or [] + else: + results = await search_chain.async_last_search_results() or [] return schemas.Response( success=True, data={ "params": params, - "results": [torrent.to_dict() for torrent in torrents], + "results": [result.to_dict() for result in results], }, ) @@ -600,6 +603,46 @@ async def search_by_title( ) +@router.get("/subtitle/title/stream", summary="渐进式模糊搜索字幕") +async def search_subtitle_by_title_stream( + request: Request, + keyword: Optional[str] = None, + page: Optional[int] = 0, + sites: Optional[str] = None, + _: schemas.TokenPayload = Depends(verify_resource_token), +) -> Any: + """ + 根据名称渐进式模糊搜索站点字幕资源,返回格式为SSE。 + """ + + event_source = SearchChain().async_search_subtitles_by_title_stream( + title=keyword, page=page, sites=_parse_site_list(sites), cache_local=True + ) + return StreamingResponse( + _stream_search_events(request, event_source), media_type="text/event-stream" + ) + + +@router.get("/subtitle/title", summary="模糊搜索字幕", response_model=schemas.Response) +async def search_subtitle_by_title( + keyword: Optional[str] = None, + page: Optional[int] = 0, + sites: Optional[str] = None, + _: schemas.TokenPayload = Depends(verify_token), +) -> Any: + """ + 根据名称模糊搜索站点字幕资源,支持分页。 + """ + subtitles = await SearchChain().async_search_subtitles_by_title( + title=keyword, page=page, sites=_parse_site_list(sites), cache_local=True + ) + if not subtitles: + return schemas.Response(success=False, message="未搜索到任何字幕") + return schemas.Response( + success=True, data=[subtitle.to_dict() for subtitle in subtitles] + ) + + @router.post("/recommend", summary="AI推荐资源", response_model=schemas.Response) async def recommend_search_results( filtered_indices: Optional[List[int]] = Body( diff --git a/app/chain/__init__.py b/app/chain/__init__.py index 47b24d46..22066000 100644 --- a/app/chain/__init__.py +++ b/app/chain/__init__.py @@ -14,7 +14,7 @@ from transmission_rpc import File from app.core.cache import FileCache, AsyncFileCache, fresh, async_fresh from app.core.config import settings -from app.core.context import Context, MediaInfo, TorrentInfo +from app.core.context import Context, MediaInfo, SubtitleInfo, TorrentInfo from app.core.event import EventManager from app.core.meta import MetaBase from app.core.module import ModuleManager @@ -1053,6 +1053,23 @@ class ChainBase(metaclass=ABCMeta): "search_torrents", site=site, keyword=keyword, mtype=mtype, page=page ) + def search_subtitles( + self, + site: dict, + keyword: str, + page: Optional[int] = 0, + ) -> List[SubtitleInfo]: + """ + 搜索一个站点的字幕资源。 + :param site: 站点 + :param keyword: 搜索关键词 + :param page: 页码 + :return: 字幕列表 + """ + return self.run_module( + "search_subtitles", site=site, keyword=keyword, page=page + ) + async def async_search_torrents( self, site: dict, @@ -1072,6 +1089,23 @@ class ChainBase(metaclass=ABCMeta): "async_search_torrents", site=site, keyword=keyword, mtype=mtype, page=page ) + async def async_search_subtitles( + self, + site: dict, + keyword: str, + page: Optional[int] = 0, + ) -> List[SubtitleInfo]: + """ + 异步搜索一个站点的字幕资源。 + :param site: 站点 + :param keyword: 搜索关键词 + :param page: 页码 + :return: 字幕列表 + """ + return await self.async_run_module( + "async_search_subtitles", site=site, keyword=keyword, page=page + ) + def refresh_torrents( self, site: dict, diff --git a/app/chain/download.py b/app/chain/download.py index 1e68ef32..9b897c43 100644 --- a/app/chain/download.py +++ b/app/chain/download.py @@ -2,15 +2,17 @@ import base64 import copy import json import re +import shutil import time from pathlib import Path from typing import List, Optional, Tuple, Set, Dict, Union from app import schemas from app.chain import ChainBase +from app.chain.storage import StorageChain from app.core.cache import FileCache from app.core.config import settings, global_vars -from app.core.context import MediaInfo, TorrentInfo, Context +from app.core.context import MediaInfo, SubtitleInfo, TorrentInfo, Context from app.core.event import eventmanager, Event from app.core.meta import MetaBase from app.core.metainfo import MetaInfo @@ -26,6 +28,7 @@ from app.schemas.types import MediaType, TorrentStatus, EventType, MessageChanne ChainEventType from app.utils.http import RequestUtils from app.utils.string import StringUtils +from app.utils.system import SystemUtils class DownloadChain(ChainBase): @@ -33,6 +36,211 @@ class DownloadChain(ChainBase): 下载处理链 """ + @staticmethod + def _safe_subtitle_file_name(file_name: str, fallback_name: str) -> str: + """ + 生成安全的字幕文件名。 + """ + file_name = Path(file_name or fallback_name).name + if not Path(file_name).suffix and Path(fallback_name).suffix: + file_name = f"{file_name}{Path(fallback_name).suffix}" + return file_name + + @staticmethod + def _is_subtitle_archive(file_name: str) -> bool: + """ + 判断是否为字幕压缩包。 + """ + return Path(file_name).suffix.lower() == ".zip" + + @staticmethod + def _is_subtitle_file(file_name: str) -> bool: + """ + 判断是否为支持的字幕文件。 + """ + return Path(file_name).suffix.lower() in settings.RMT_SUBEXT + + @staticmethod + def _detect_subtitle_fallback_name(subtitle: SubtitleInfo, content: bytes) -> str: + """ + 根据响应内容生成兜底字幕文件名。 + """ + suffix = ".zip" if content.startswith(b"PK") else ".srt" + return f"{subtitle.title or subtitle.subtitle_id or 'subtitle'}{suffix}" + + @staticmethod + def _resolve_media_download_dir( + media_info: MediaInfo, + save_path: Optional[str] = None, + ) -> Optional[Path]: + """ + 根据媒体信息解析下载目录。 + """ + storage = 'local' + if save_path: + return Path(save_path) + + dir_info = DirectoryHelper().get_dir(media_info, include_unsorted=True) + storage = dir_info.storage if dir_info else storage + if not dir_info: + logger.error(f"未找到下载目录:{media_info.type.value} {media_info.title_year}") + return None + + if not dir_info.media_type and dir_info.download_type_folder: + download_dir = Path(dir_info.download_path) / media_info.type.value + else: + download_dir = Path(dir_info.download_path) + + if not dir_info.media_category and dir_info.download_category_folder and media_info.category: + download_dir = download_dir / media_info.category + + file_uri = FileURI(storage=storage, path=download_dir.as_posix()) + return Path(file_uri.uri) + + @staticmethod + def _upload_subtitle_file( + storage_chain: StorageChain, + storage: str, + working_dir_item: schemas.FileItem, + subtitle_file: Path, + ) -> Optional[str]: + """ + 上传单个字幕文件到目标目录。 + """ + target_sub_file = Path(working_dir_item.path) / subtitle_file.name + if storage_chain.get_file_item(storage, target_sub_file): + logger.info(f"字幕文件已存在:{target_sub_file}") + return target_sub_file.as_posix() + logger.info(f"转移字幕 {subtitle_file} 到 {target_sub_file} ...") + uploaded = storage_chain.upload_file(working_dir_item, subtitle_file) + if uploaded: + return uploaded.path + return None + + def _save_subtitle_response( + self, + subtitle: SubtitleInfo, + response, + target_dir: Path, + ) -> List[str]: + """ + 保存字幕下载响应到目标目录。 + """ + fallback_name = self._detect_subtitle_fallback_name(subtitle, response.content) + file_name = subtitle.file_name or TorrentHelper.get_url_filename(response, subtitle.enclosure) + if not Path(file_name).suffix: + file_name = fallback_name + file_name = self._safe_subtitle_file_name( + file_name=file_name, + fallback_name=fallback_name, + ) + if not self._is_subtitle_archive(file_name) and not self._is_subtitle_file(file_name): + logger.warn(f"下载链接不是支持的字幕文件:{subtitle.enclosure} - {file_name}") + return [] + + file_uri = FileURI.from_uri(target_dir.as_posix()) + storage = file_uri.storage + target_path = Path(file_uri.path) + storage_chain = StorageChain() + working_dir_item = storage_chain.get_folder(storage, target_path) + if not working_dir_item: + logger.error(f"下载目录不存在,无法保存字幕:{target_path}") + return [] + + saved_files = [] + temp_file = settings.TEMP_PATH / file_name + temp_extract_dir = temp_file.with_name(temp_file.stem) + try: + temp_file.write_bytes(response.content) + if self._is_subtitle_archive(file_name): + shutil.unpack_archive(temp_file, temp_extract_dir, format='zip') + for sub_file in SystemUtils.list_files(temp_extract_dir, settings.RMT_SUBEXT): + uploaded_path = self._upload_subtitle_file( + storage_chain=storage_chain, + storage=storage, + working_dir_item=working_dir_item, + subtitle_file=sub_file, + ) + if uploaded_path: + saved_files.append(uploaded_path) + else: + uploaded_path = self._upload_subtitle_file( + storage_chain=storage_chain, + storage=storage, + working_dir_item=working_dir_item, + subtitle_file=temp_file, + ) + if uploaded_path: + saved_files.append(uploaded_path) + return saved_files + finally: + try: + if temp_extract_dir.exists(): + shutil.rmtree(temp_extract_dir) + if temp_file.exists(): + temp_file.unlink() + except Exception as err: + logger.error(f"删除临时字幕文件失败:{str(err)}") + + def download_subtitle( + self, + subtitle: SubtitleInfo, + tmdbid: Optional[int] = None, + doubanid: Optional[str] = None, + save_path: Optional[str] = None, + username: Optional[str] = None, + ) -> Tuple[bool, str, List[str]]: + """ + 下载字幕文件并保存到媒体对应的下载目录。 + + :param subtitle: 字幕搜索结果 + :param tmdbid: TMDB ID + :param doubanid: 豆瓣 ID + :param save_path: 保存路径 + :param username: 调用下载的用户名 + :return: 成功状态、提示消息、保存文件列表 + """ + if not subtitle or not subtitle.enclosure: + return False, "字幕下载链接为空", [] + + metainfo = MetaInfo(title=subtitle.title, subtitle=subtitle.description) + mediainfo = self.recognize_media( + meta=metainfo, + tmdbid=tmdbid, + doubanid=doubanid, + ) + if not mediainfo: + return False, "无法识别媒体信息", [] + + target_dir = self._resolve_media_download_dir( + media_info=mediainfo, + save_path=save_path, + ) + if not target_dir: + return False, "未找到下载目录", [] + + request = RequestUtils( + cookies=subtitle.site_cookie, + ua=subtitle.site_ua or settings.USER_AGENT, + proxies=settings.PROXY if subtitle.site_proxy else None, + ) + response = request.get_res(subtitle.enclosure) + if not response or response.status_code != 200: + return False, "下载字幕文件失败", [] + + saved_files = self._save_subtitle_response( + subtitle=subtitle, + response=response, + target_dir=target_dir, + ) + if not saved_files: + return False, "未保存任何字幕文件", [] + + logger.info( + f"{mediainfo.title_year} 字幕下载完成:{subtitle.site_name} - {subtitle.title},用户:{username}" + ) + return True, "字幕下载成功", saved_files + def _submit_download_added_task( self, context: Context, diff --git a/app/chain/search.py b/app/chain/search.py index 294a0469..7d04780b 100644 --- a/app/chain/search.py +++ b/app/chain/search.py @@ -14,7 +14,7 @@ from fastapi.concurrency import run_in_threadpool from app.chain import ChainBase from app.core.config import global_vars, settings from app.core.context import Context -from app.core.context import MediaInfo, TorrentInfo +from app.core.context import MediaInfo, SubtitleInfo, TorrentInfo from app.core.event import eventmanager, Event from app.core.metainfo import MetaInfo from app.db.systemconfig_oper import SystemConfigOper @@ -33,6 +33,7 @@ class SearchChain(ChainBase): """ __result_temp_file = "__search_result__" + __subtitle_result_temp_file = "__subtitle_search_result__" __search_params_temp_file = "__search_params__" __ai_indices_cache_file = "__ai_recommend_indices__" @@ -76,6 +77,18 @@ class SearchChain(ChainBase): page_size = self.get_search_page_size(site=site, keyword=keyword) return page_size is not None and len(page_results or []) >= page_size + @staticmethod + def _should_continue_subtitle_search_pages(site: dict, page_results: Optional[List[Any]]) -> bool: + """ + 判断字幕搜索是否继续抓取下一页。 + """ + subtitle_conf = (site or {}).get("subtitles") or {} + try: + page_size = int(subtitle_conf.get("result_num") or site.get("result_num") or 100) + except (TypeError, ValueError): + page_size = 100 + return page_size > 0 and len(page_results or []) >= page_size + @property def is_ai_recommend_enabled(self) -> bool: """ @@ -192,6 +205,7 @@ class SearchChain(ChainBase): "year": str(params.get("year") or ""), "season": str(params.get("season") or ""), "sites": str(params.get("sites") or ""), + "result_type": str(params.get("result_type") or "torrent"), } return normalized if normalized["keyword"] else None @@ -205,6 +219,7 @@ class SearchChain(ChainBase): year: Optional[str] = None, season: Optional[int] = None, sites: Optional[List[int]] = None, + result_type: Optional[str] = "torrent", ) -> None: """ 保存最后一次资源搜索参数。 @@ -218,6 +233,7 @@ class SearchChain(ChainBase): "year": year, "season": season, "sites": self._stringify_sites(sites), + "result_type": result_type or "torrent", } ) if params: @@ -233,6 +249,7 @@ class SearchChain(ChainBase): year: Optional[str] = None, season: Optional[int] = None, sites: Optional[List[int]] = None, + result_type: Optional[str] = "torrent", ) -> None: """ 异步保存最后一次资源搜索参数。 @@ -246,6 +263,7 @@ class SearchChain(ChainBase): "year": year, "season": season, "sites": self._stringify_sites(sites), + "result_type": result_type or "torrent", } ) if params: @@ -555,6 +573,83 @@ class SearchChain(ChainBase): """ return await self.async_load_cache(self.__result_temp_file) + async def async_last_subtitle_search_results(self) -> Optional[List[SubtitleInfo]]: + """ + 异步获取上次字幕搜索结果。 + """ + return await self.async_load_cache(self.__subtitle_result_temp_file) + + async def async_search_subtitles_by_title(self, title: str, page: Optional[int] = 0, + sites: List[int] = None, + cache_local: Optional[bool] = False) -> List[SubtitleInfo]: + """ + 根据标题异步搜索字幕,不识别不过滤,直接返回站点字幕内容。 + :param title: 标题关键词 + :param page: 页码 + :param sites: 站点ID列表 + :param cache_local: 是否缓存到本地 + """ + if cache_local: + self.cancel_ai_recommend() + await self.async_save_last_search_params( + keyword=title, + area="title", + sites=sites, + result_type="subtitle", + ) + logger.info(f'开始搜索字幕,关键词:{title} ...') + subtitles = await self.__async_search_subtitles_all_sites( + keyword=title, sites=sites, page=page + ) or [] + if not subtitles: + logger.warn(f'{title} 未搜索到字幕') + return [] + if cache_local: + await self.async_save_cache(subtitles, self.__subtitle_result_temp_file) + return subtitles + + async def async_search_subtitles_by_title_stream(self, title: str, page: Optional[int] = 0, + sites: List[int] = None, + cache_local: Optional[bool] = False) -> AsyncIterator[dict]: + """ + 根据标题渐进式搜索字幕,不识别不过滤,按站点完成顺序返回结果。 + """ + if cache_local: + self.cancel_ai_recommend() + await self.async_save_last_search_params( + keyword=title, + area="title", + sites=sites, + result_type="subtitle", + ) + logger.info(f'开始渐进式搜索字幕,关键词:{title} ...') + + subtitles: List[SubtitleInfo] = [] + async for event in self.__async_search_subtitles_all_sites_stream( + keyword=title, sites=sites, page=page): + result = event.pop("items", []) or [] + if result: + subtitles.extend(result) + yield { + **event, + "type": "append", + "items": [subtitle.to_dict() for subtitle in result], + "total_items": len(subtitles) + } + + if cache_local: + await self.async_save_cache(subtitles, self.__subtitle_result_temp_file) + + if not subtitles: + logger.warn(f'{title} 未搜索到字幕') + yield { + "type": "done", + "stage": "done", + "text": f"搜索完成,共 {len(subtitles)} 个字幕", + "items": [subtitle.to_dict() for subtitle in subtitles], + "total_items": len(subtitles) + } + async def async_search_by_id(self, tmdbid: Optional[int] = None, doubanid: Optional[str] = None, mtype: MediaType = None, area: Optional[str] = "title", season: Optional[int] = None, sites: List[int] = None, cache_local: bool = False) -> List[Context]: @@ -1622,6 +1717,231 @@ class SearchChain(ChainBase): logger.info(f"站点搜索完成,有效资源数:{results_count},总耗时 {(end_time - start_time).seconds} 秒") progress.end() + async def __async_search_subtitles_all_sites(self, keyword: str, + sites: List[int] = None, + page: Optional[int] = 0) -> Optional[List[SubtitleInfo]]: + """ + 异步搜索多个站点的字幕资源。 + :param keyword: 搜索关键词 + :param sites: 指定站点ID列表,如有则只搜索指定站点,否则搜索所有站点 + :param page: 搜索页码 + :reutrn: 字幕资源列表 + """ + indexer_sites = [] + + if not sites: + sites = SystemConfigOper().get(SystemConfigKey.IndexerSites) or [] + + for indexer in await SitesHelper().async_get_indexers(): + if not indexer.get("subtitles"): + continue + if not sites or indexer.get("id") in sites: + indexer_sites.append(indexer) + if not indexer_sites: + logger.warn('未开启任何支持字幕搜索的有效站点,无法搜索字幕') + return [] + + progress = ProgressHelper(ProgressKey.Search) + progress.start() + start_time = datetime.now() + search_pages = self._build_search_pages(page) + total_num = len(indexer_sites) * len(search_pages) + finish_count = 0 + progress.update(value=0, + text=f"开始搜索字幕,共 {len(indexer_sites)} 个站点,{len(search_pages)} 页 ...") + results = [] + semaphore = asyncio.Semaphore(settings.CONF.threadpool or total_num) + + async def search_site_page(site: dict, search_page: int) -> List[SubtitleInfo]: + """ + 控制单次字幕站点页请求的并发量,并返回该页的字幕列表。 + """ + async with semaphore: + return await self.async_search_subtitles( + site=site, keyword=keyword, page=search_page + ) + + pending_tasks = {} + + def submit_site_page(site: dict, page_index: int): + """ + 提交异步字幕站点页搜索任务,并记录站点和页码位置。 + """ + search_page = search_pages[page_index] + task = asyncio.create_task(search_site_page(site=site, search_page=search_page)) + pending_tasks[task] = (site, page_index, search_page) + + for site in indexer_sites: + submit_site_page(site=site, page_index=0) + + try: + while pending_tasks: + if global_vars.is_system_stopped: + break + done_tasks, _ = await asyncio.wait( + pending_tasks.keys(), + return_when=asyncio.FIRST_COMPLETED, + ) + for future in done_tasks: + site, page_index, search_page = pending_tasks.pop(future) + finish_count += 1 + result = await future + if result: + results.extend(result) + if ( + self._should_continue_subtitle_search_pages(site=site, page_results=result) + and page_index + 1 < len(search_pages) + ): + submit_site_page(site=site, page_index=page_index + 1) + else: + logger.debug( + f"{site.get('name')} 字幕第 {search_page} 页返回 {len(result or [])} 条,停止继续翻页" + ) + logger.info(f"站点字幕搜索进度:{finish_count} / {total_num}") + progress.update(value=finish_count / total_num * 100, + text=f"正在搜索字幕{keyword or ''},已完成 {finish_count} / {total_num} 个请求 ...") + finally: + for task in pending_tasks: + if not task.done(): + task.cancel() + if pending_tasks: + await asyncio.gather(*pending_tasks.keys(), return_exceptions=True) + + end_time = datetime.now() + progress.update(value=100, + text=f"站点字幕搜索完成,有效字幕数:{len(results)},总耗时 {(end_time - start_time).seconds} 秒") + logger.info(f"站点字幕搜索完成,有效字幕数:{len(results)},总耗时 {(end_time - start_time).seconds} 秒") + progress.end() + return results + + async def __async_search_subtitles_all_sites_stream(self, keyword: str, + sites: List[int] = None, + page: Optional[int] = 0) -> AsyncIterator[Dict[str, Any]]: + """ + 异步搜索多个站点的字幕资源,按站点完成顺序渐进式返回结果。 + :param keyword: 搜索关键词 + :param sites: 指定站点ID列表,如有则只搜索指定站点,否则搜索所有站点 + :param page: 搜索页码 + """ + indexer_sites = [] + + if not sites: + sites = SystemConfigOper().get(SystemConfigKey.IndexerSites) or [] + + for indexer in await SitesHelper().async_get_indexers(): + if not indexer.get("subtitles"): + continue + if not sites or indexer.get("id") in sites: + indexer_sites.append(indexer) + if not indexer_sites: + logger.warn('未开启任何支持字幕搜索的有效站点,无法搜索字幕') + yield { + "type": "done", + "stage": "searching", + "value": 100, + "text": "未开启任何支持字幕搜索的有效站点,无法搜索字幕", + "items": [], + "finished": 0, + "total": 0 + } + return + + progress = ProgressHelper(ProgressKey.Search) + progress.start() + start_time = datetime.now() + search_pages = self._build_search_pages(page) + total_num = len(indexer_sites) * len(search_pages) + finish_count = 0 + progress.update(value=0, + text=f"开始搜索字幕,共 {len(indexer_sites)} 个站点,{len(search_pages)} 页 ...") + yield { + "type": "progress", + "stage": "searching", + "value": 0, + "text": f"开始搜索字幕,共 {len(indexer_sites)} 个站点,{len(search_pages)} 页 ...", + "items": [], + "finished": 0, + "total": total_num + } + + semaphore = asyncio.Semaphore(settings.CONF.threadpool or total_num) + + async def search_site(site: dict, search_page: int) -> List[SubtitleInfo]: + """ + 搜索单个站点字幕页,用于渐进式返回入口。 + """ + async with semaphore: + site_result = await self.async_search_subtitles( + site=site, keyword=keyword, page=search_page + ) + return site_result or [] + + tasks = {} + + def submit_site_page(site: dict, page_index: int): + """ + 提交渐进式字幕站点页搜索任务,并保留站点和页码上下文。 + """ + search_page = search_pages[page_index] + task = asyncio.create_task(search_site(site=site, search_page=search_page)) + tasks[task] = (site, page_index, search_page) + + for site in indexer_sites: + submit_site_page(site=site, page_index=0) + + results_count = 0 + try: + while tasks: + if global_vars.is_system_stopped: + break + done_tasks, _ = await asyncio.wait( + tasks.keys(), + return_when=asyncio.FIRST_COMPLETED, + ) + for future in done_tasks: + site, page_index, search_page = tasks.pop(future) + finish_count += 1 + result = await future + results_count += len(result) + if ( + self._should_continue_subtitle_search_pages(site=site, page_results=result) + and page_index + 1 < len(search_pages) + ): + submit_site_page(site=site, page_index=page_index + 1) + else: + logger.debug( + f"{site.get('name')} 字幕第 {search_page} 页返回 {len(result)} 条,停止继续翻页" + ) + logger.info(f"站点字幕搜索进度:{finish_count} / {total_num}") + progress_value = finish_count / total_num * 100 + progress_text = f"正在搜索字幕{keyword or ''},已完成 {finish_count} / {total_num} 个请求 ..." + progress.update(value=progress_value, text=progress_text) + yield { + "type": "append", + "stage": "searching", + "value": progress_value, + "text": progress_text, + "items": result, + "site": site.get("name"), + "site_id": site.get("id"), + "page": search_page, + "finished": finish_count, + "total": total_num, + "total_items": results_count + } + finally: + for task in tasks: + if not task.done(): + task.cancel() + if tasks: + await asyncio.gather(*tasks.keys(), return_exceptions=True) + + end_time = datetime.now() + progress.update(value=100, + text=f"站点字幕搜索完成,有效字幕数:{results_count},总耗时 {(end_time - start_time).seconds} 秒") + logger.info(f"站点字幕搜索完成,有效字幕数:{results_count},总耗时 {(end_time - start_time).seconds} 秒") + progress.end() + @eventmanager.register(EventType.SiteDeleted) def remove_site(self, event: Event): """ diff --git a/app/core/context.py b/app/core/context.py index a6f62005..28e08674 100644 --- a/app/core/context.py +++ b/app/core/context.py @@ -150,6 +150,72 @@ class TorrentInfo: return dicts +@dataclass +class SubtitleInfo: + """ + 字幕搜索结果信息。 + """ + + # 站点ID + site: int = None + # 站点名称 + site_name: str = None + # 站点Cookie + site_cookie: str = None + # 站点UA + site_ua: str = None + # 站点是否使用代理 + site_proxy: bool = False + # 站点优先级 + site_order: int = 0 + # 字幕标题 + title: str = None + # 字幕描述 + description: str = None + # 字幕下载链接 + enclosure: str = None + # 详情页面 + page_url: str = None + # 语言 + language: str = None + # 语言图标 + language_icon: str = None + # 字幕大小 + size: float = 0.0 + # 发布时间 + pubdate: str = None + # 已过时间 + date_elapsed: str = None + # 点击/下载次数 + grabs: int = 0 + # 上传者 + uploader: str = None + # 举报页面 + report_url: str = None + # 种子ID + torrent_id: str = None + # 字幕ID + subtitle_id: str = None + # 下载文件名 + file_name: str = None + + def __setattr__(self, name: str, value: Any): + self.__dict__[name] = value + + def from_dict(self, data: dict): + """ + 从字典中初始化。 + """ + for key, value in data.items(): + setattr(self, key, value) + + def to_dict(self): + """ + 返回字典。 + """ + return vars(self).copy() + + @dataclass class MediaInfo: # 内部标记:是否命中本地识别缓存,不参与序列化 diff --git a/app/modules/indexer/__init__.py b/app/modules/indexer/__init__.py index dcead0fb..121089e4 100644 --- a/app/modules/indexer/__init__.py +++ b/app/modules/indexer/__init__.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import List, Optional, Tuple, Union -from app.core.context import TorrentInfo +from app.core.context import SubtitleInfo, TorrentInfo from app.db.site_oper import SiteOper from app.helper.module import ModuleHelper from app.helper.sites import SitesHelper # noqa @@ -160,6 +160,24 @@ class IndexerModule(_ModuleBase): site_downloader=site.get("downloader"), **result) for result in result_array] + @staticmethod + def __parse_subtitle_result(site: dict, result_array: list, seconds: int) -> List[SubtitleInfo]: + """ + 解析字幕搜索结果为 SubtitleInfo 对象。 + """ + if not result_array or len(result_array) == 0: + logger.warn(f"{site.get('name')} 未搜索到字幕,耗时 {seconds} 秒") + return [] + logger.info( + f"{site.get('name')} 字幕搜索完成,耗时 {seconds} 秒,返回数据:{len(result_array)}") + return [SubtitleInfo(site=site.get("id"), + site_name=site.get("name"), + site_cookie=site.get("cookie"), + site_ua=site.get("ua"), + site_proxy=site.get("proxy"), + site_order=site.get("pri"), + **result) for result in result_array] + @staticmethod def get_search_page_size(site: dict, keyword: Optional[str] = None) -> Optional[int]: """ @@ -270,6 +288,47 @@ class IndexerModule(_ModuleBase): seconds=seconds ) + def search_subtitles(self, site: dict, + keyword: str = None, + page: Optional[int] = 0) -> List[SubtitleInfo]: + """ + 搜索一个站点的字幕资源。 + :param site: 站点 + :param keyword: 搜索关键词 + :param page: 页码 + :return: 字幕列表 + """ + + result = [] + start_time = datetime.now() + error_flag = False + + if not site.get("subtitles"): + return [] + + if not self.__search_check(site, keyword): + return [] + + search_word = self.__clear_search_text(keyword) + + try: + error_flag, result = self.__spider_search( + search_word=search_word, + indexer=site, + page=page, + search_type="subtitles" + ) + except Exception as err: + logger.error(f"{site.get('name')} 字幕搜索出错:{str(err)}") + + seconds = (datetime.now() - start_time).seconds + self.__indexer_statistic(site=site, error_flag=error_flag, seconds=seconds) + return self.__parse_subtitle_result( + site=site, + result_array=result, + seconds=seconds + ) + async def async_search_torrents(self, site: dict, keyword: str = None, mtype: MediaType = None, @@ -365,12 +424,54 @@ class IndexerModule(_ModuleBase): seconds=seconds ) + async def async_search_subtitles(self, site: dict, + keyword: str = None, + page: Optional[int] = 0) -> List[SubtitleInfo]: + """ + 异步搜索一个站点的字幕资源。 + :param site: 站点 + :param keyword: 搜索关键词 + :param page: 页码 + :return: 字幕列表 + """ + + result = [] + start_time = datetime.now() + error_flag = False + + if not site.get("subtitles"): + return [] + + if not self.__search_check(site, keyword): + return [] + + search_word = self.__clear_search_text(keyword) + + try: + error_flag, result = await self.__async_spider_search( + search_word=search_word, + indexer=site, + page=page, + search_type="subtitles" + ) + except Exception as err: + logger.error(f"{site.get('name')} 字幕搜索出错:{str(err)}") + + seconds = (datetime.now() - start_time).seconds + await self.__async_indexer_statistic(site=site, error_flag=error_flag, seconds=seconds) + return self.__parse_subtitle_result( + site=site, + result_array=result, + seconds=seconds + ) + @staticmethod def __spider_search(indexer: dict, search_word: Optional[str] = None, mtype: MediaType = None, cat: Optional[str] = None, - page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + page: Optional[int] = 0, + search_type: Optional[str] = "torrents") -> Tuple[bool, List[dict]]: """ 根据关键字搜索单个站点 :param: indexer: 站点配置 @@ -385,7 +486,8 @@ class IndexerModule(_ModuleBase): keyword=search_word, mtype=mtype, cat=cat, - page=page) + page=page, + search_type=search_type) try: return _spider.is_error, _spider.get_torrents() @@ -397,7 +499,8 @@ class IndexerModule(_ModuleBase): search_word: Optional[str] = None, mtype: MediaType = None, cat: Optional[str] = None, - page: Optional[int] = 0) -> Tuple[bool, List[dict]]: + page: Optional[int] = 0, + search_type: Optional[str] = "torrents") -> Tuple[bool, List[dict]]: """ 异步根据关键字搜索单个站点 :param: indexer: 站点配置 @@ -412,7 +515,8 @@ class IndexerModule(_ModuleBase): keyword=search_word, mtype=mtype, cat=cat, - page=page) + page=page, + search_type=search_type) try: result = await _spider.async_get_torrents() diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index 6de3053e..64d9f364 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -43,7 +43,8 @@ class SiteSpider: mtype: MediaType = None, cat: Optional[str] = None, page: Optional[int] = 0, - referer: Optional[str] = None): + referer: Optional[str] = None, + search_type: Optional[str] = "torrents"): """ 设置查询参数 :param indexer: 索引器 @@ -58,20 +59,32 @@ class SiteSpider: self.keyword = keyword self.cat = cat self.mtype = mtype + self.search_type = search_type or "torrents" self.indexerid = indexer.get('id') self.indexername = indexer.get('name') - self.search = indexer.get('search') - self.batch = indexer.get('batch') - self.browse = indexer.get('browse') - self.category = indexer.get('category') - self.list = indexer.get('torrents').get('list', {}) - self.fields = indexer.get('torrents').get('fields') - if not keyword and self.browse: - self.list = self.browse.get('list') or self.list - self.fields = self.browse.get('fields') or self.fields + if self.search_type == "subtitles": + subtitle_conf = indexer.get('subtitles') or {} + self.search = subtitle_conf.get('search') + self.batch = subtitle_conf.get('batch') + self.browse = subtitle_conf.get('browse') + self.category = subtitle_conf.get('category') + self.list = subtitle_conf.get('list') or {} + self.fields = subtitle_conf.get('fields') or {} + result_num = subtitle_conf.get('result_num') or indexer.get('result_num') + else: + self.search = indexer.get('search') + self.batch = indexer.get('batch') + self.browse = indexer.get('browse') + self.category = indexer.get('category') + self.list = (indexer.get('torrents') or {}).get('list', {}) + self.fields = (indexer.get('torrents') or {}).get('fields') or {} + if not keyword and self.browse: + self.list = self.browse.get('list') or self.list + self.fields = self.browse.get('fields') or self.fields + result_num = indexer.get('result_num') self._field_templates = self.__build_field_templates() self.domain = indexer.get('domain') - self.result_num = int(indexer.get('result_num') or self.default_result_num()) + self.result_num = int(result_num or self.default_result_num()) self._timeout = int(indexer.get('timeout') or 15) self.page = page if self.domain and not str(self.domain).endswith("/"): @@ -399,6 +412,30 @@ class SiteSpider: else: self.torrents_info['enclosure'] = download_link + def __get_report_url(self, torrent: Any): + """ + 获取字幕举报页面链接。 + """ + if 'report' not in self.fields: + return + selector = self.fields.get('report', {}) + item = self._safe_query(torrent, selector) + report_link = self.__filter_text(item, selector.get('filters')) + if report_link: + self.torrents_info['report_url'] = self.__normalize_link(report_link) + + def __get_language_icon(self, torrent: Any): + """ + 获取字幕语言图标链接。 + """ + if 'language_icon' not in self.fields: + return + selector = self.fields.get('language_icon', {}) + item = self._safe_query(torrent, selector) + icon_link = self.__filter_text(item, selector.get('filters')) + if icon_link: + self.torrents_info['language_icon'] = self.__normalize_link(icon_link) + def __get_imdbid(self, torrent: Any): # imdbid if "imdbid" not in self.fields: @@ -600,6 +637,49 @@ class SiteSpider: else: self.torrents_info['category'] = MediaType.UNKNOWN.value + def __get_subtitle_field(self, torrent: Any, field_name: str): + """ + 按配置读取字幕字段。 + """ + selector = self.fields.get(field_name, {}) + if not selector: + return + item = self._safe_query(torrent, selector) + value = self.__filter_text(item, selector.get('filters')) + if value is not None: + self.torrents_info[field_name] = value + + def __fill_subtitle_ids(self): + """ + 从字幕下载链接中补充站点种子ID和字幕ID。 + """ + enclosure = self.torrents_info.get("enclosure") + if not enclosure: + return + query_params = parse_qs(urlparse(enclosure).query) + if not self.torrents_info.get("torrent_id"): + torrent_id = query_params.get("torrentid") or query_params.get("torrent_id") + if torrent_id: + self.torrents_info["torrent_id"] = torrent_id[0] + if not self.torrents_info.get("subtitle_id"): + subtitle_id = query_params.get("subid") or query_params.get("subtitle") + if subtitle_id: + self.torrents_info["subtitle_id"] = subtitle_id[0] + + def __normalize_link(self, link: Optional[str]) -> Optional[str]: + """ + 将站点相对链接转换为绝对链接。 + """ + if not link: + return None + if not link.startswith("http"): + if link.startswith("//"): + return self.domain.split(":")[0] + ":" + link + if link.startswith("/"): + return self.domain + link[1:] + return self.domain + link + return link + def _safe_query(self, torrent: Any, selector_config: Optional[dict]) -> Optional[str]: """ 安全地执行PyQuery查询并自动清理资源 @@ -672,6 +752,34 @@ class SiteSpider: finally: self.torrents_info.clear() + def get_subtitle_info(self, subtitle: Any) -> dict: + """ + 解析单条字幕数据。 + """ + self.torrents_info = {} + try: + self.__get_title(subtitle) + self.__get_description(subtitle) + self.__get_detail(subtitle) + self.__get_download(subtitle) + self.__get_size(subtitle) + self.__get_pubdate(subtitle) + self.__get_date_elapsed(subtitle) + self.__get_grabs(subtitle) + self.__get_language_icon(subtitle) + self.__get_report_url(subtitle) + for field_name in ( + "language", "uploader", "torrent_id", "subtitle_id", "file_name" + ): + self.__get_subtitle_field(subtitle, field_name) + self.__fill_subtitle_ids() + return self.torrents_info.copy() if self.torrents_info else {} + except Exception as err: + logger.error("%s 字幕搜索出现错误:%s" % (self.indexername, str(err))) + return {} + finally: + self.torrents_info.clear() + @staticmethod def __filter_text(text: Optional[str], filters: Optional[List[dict]]) -> str: """ @@ -758,16 +866,17 @@ class SiteSpider: self.is_error = True return [] - rust_torrents = rust_accel.parse_indexer_torrents( - html_text=html_text, - domain=self.domain, - list_config=self.list, - fields=self.fields, - category=self.category, - result_num=self.result_num - ) - if rust_torrents is not None: - return rust_torrents + if self.search_type != "subtitles": + rust_torrents = rust_accel.parse_indexer_torrents( + html_text=html_text, + domain=self.domain, + list_config=self.list, + fields=self.fields, + category=self.category, + result_num=self.result_num + ) + if rust_torrents is not None: + return rust_torrents # 清空旧结果 self.torrents_info_array = [] @@ -785,7 +894,10 @@ class SiteSpider: torrent_query = PyQuery(torn) try: # 直接获取种子信息,避免深拷贝 - torrent_info = self.get_info(torrent_query) + if self.search_type == "subtitles": + torrent_info = self.get_subtitle_info(torrent_query) + else: + torrent_info = self.get_info(torrent_query) if torrent_info: # 浅拷贝即可,减少内存使用 self.torrents_info_array.append(torrent_info) diff --git a/app/schemas/context.py b/app/schemas/context.py index 27cb1362..9dce24ed 100644 --- a/app/schemas/context.py +++ b/app/schemas/context.py @@ -242,6 +242,54 @@ class TorrentInfo(BaseModel): freedate_diff: Optional[str] = None +class SubtitleInfo(BaseModel): + """ + 搜索字幕信息 + """ + # 站点ID + site: Optional[int] = None + # 站点名称 + site_name: Optional[str] = None + # 站点Cookie + site_cookie: Optional[str] = None + # 站点UA + site_ua: Optional[str] = None + # 站点是否使用代理 + site_proxy: Optional[bool] = False + # 站点优先级 + site_order: Optional[int] = 0 + # 字幕标题 + title: Optional[str] = None + # 字幕描述 + description: Optional[str] = None + # 字幕下载链接 + enclosure: Optional[str] = None + # 详情页面 + page_url: Optional[str] = None + # 语言 + language: Optional[str] = None + # 语言图标 + language_icon: Optional[str] = None + # 字幕大小 + size: Optional[float] = 0.0 + # 发布时间 + pubdate: Optional[str] = None + # 已过时间 + date_elapsed: Optional[str] = None + # 点击/下载次数 + grabs: Optional[int] = 0 + # 上传者 + uploader: Optional[str] = None + # 举报页面 + report_url: Optional[str] = None + # 种子ID + torrent_id: Optional[str] = None + # 字幕ID + subtitle_id: Optional[str] = None + # 下载文件名 + file_name: Optional[str] = None + + class Context(BaseModel): """ 上下文 diff --git a/docs/mcp-api.md b/docs/mcp-api.md index c3cb996b..006429fa 100644 --- a/docs/mcp-api.md +++ b/docs/mcp-api.md @@ -74,6 +74,38 @@ MoviePilot 实现了标准的 **Model Context Protocol (MCP)**,允许 AI 智 ## 6. RESTful API 所有工具相关的API端点都在 `/api/v1/mcp` 路径下(保持向后兼容)。 +### 相关 REST 端点 + +MoviePilot 也提供普通 REST API 给前端和自动化客户端使用。所有接口同样需要 API KEY 认证,在请求头中添加 `X-API-KEY: ` 或在查询参数中添加 `apikey=`。 + +#### 搜索 / 种子 / 字幕 + +| 方法 | 路径 | 说明 | +| :--- | :--- | :--- | +| GET | `/api/v1/search/media/{mediaid}` | 按媒体 ID 搜索站点种子资源,`mediaid` 支持 `tmdb:123`、`douban:123`、`bangumi:123`,参数:`mtype`、`area`、`title`、`year`、`season`、`sites` | +| GET | `/api/v1/search/media/{mediaid}/stream` | 按媒体 ID 渐进式搜索站点种子资源,返回 SSE,参数同上 | +| GET | `/api/v1/search/title` | 按关键字模糊搜索站点种子资源,参数:`keyword`、`page`、`sites` | +| GET | `/api/v1/search/title/stream` | 按关键字渐进式搜索站点种子资源,返回 SSE,参数:`keyword`、`page`、`sites` | +| GET | `/api/v1/search/subtitle/title` | 按关键字搜索站点字幕资源,参数:`keyword`、`page`、`sites` | +| GET | `/api/v1/search/subtitle/title/stream` | 按关键字渐进式搜索站点字幕资源,返回 SSE,参数:`keyword`、`page`、`sites` | +| GET | `/api/v1/search/last` | 获取上一次种子搜索结果 | +| GET | `/api/v1/search/last/context` | 获取上一次搜索结果及可复用搜索参数,`params.result_type` 为 `torrent` 或 `subtitle` | +| POST | `/api/v1/search/recommend` | 获取 AI 推荐资源,请求体:`filtered_indices`、`check_only`、`force` | + +#### 下载 + +| 方法 | 路径 | 说明 | +| :--- | :--- | :--- | +| GET | `/api/v1/download/` | 查询正在下载的任务,参数:`name` | +| POST | `/api/v1/download/` | 添加含媒体信息的下载任务,请求体包含媒体信息和种子信息 | +| POST | `/api/v1/download/add` | 添加不含媒体信息的下载任务,请求体包含 `torrent_in`,可选 `tmdbid`、`doubanid`、`downloader`、`save_path` | +| POST | `/api/v1/download/subtitle` | 下载字幕到识别出的媒体下载目录,请求体包含 `subtitle_in`,可选 `tmdbid`、`doubanid`、`save_path` | +| GET | `/api/v1/download/start/{hashString}` | 恢复下载任务,参数:`name` | +| GET | `/api/v1/download/stop/{hashString}` | 暂停下载任务,参数:`name` | +| GET | `/api/v1/download/clients` | 查询可用下载器 | +| GET | `/api/v1/download/paths` | 查询可用于下载接口 `save_path` 参数的下载路径 | +| DELETE | `/api/v1/download/{hashString}` | 删除下载任务,参数:`name` | + ### 插件补充接口 **GET** `/api/v1/plugin/history/{plugin_id}` diff --git a/skills/moviepilot-api/SKILL.md b/skills/moviepilot-api/SKILL.md index 9c2b5b4c..5475a1bc 100644 --- a/skills/moviepilot-api/SKILL.md +++ b/skills/moviepilot-api/SKILL.md @@ -1,7 +1,7 @@ --- name: moviepilot-api version: 1 -description: Use this skill when you need to call MoviePilot REST API endpoints directly. Covers all 238 API endpoints across 27 categories including media search, downloads, subscriptions, library management, site management, system administration, plugins, workflows, and more. Use this skill whenever the user asks to interact with MoviePilot via its HTTP API, or when the moviepilot-cli skill cannot cover a specific operation. +description: Use this skill when you need to call MoviePilot REST API endpoints directly. Covers all 244 API endpoints across 27 categories including media search, downloads, subscriptions, library management, site management, system administration, plugins, workflows, and more. Use this skill whenever the user asks to interact with MoviePilot via its HTTP API, or when the moviepilot-cli skill cannot cover a specific operation. --- # MoviePilot REST API @@ -107,22 +107,28 @@ All endpoints are under the base URL `{MP_HOST}`. Path parameters are shown as ` | GET | `/api/v1/bangumi/person/{person_id}` | Person detail | | GET | `/api/v1/bangumi/person/credits/{person_id}` | Person filmography. Params: `page`, `count` | -### Search / Torrents (4 endpoints) +### Search / Torrents / Subtitles (9 endpoints) | Method | Path | Description | |--------|------|-------------| | GET | `/api/v1/search/media/{mediaid}` | Search torrents by media ID (format: `tmdb:123` / `douban:123` / `bangumi:123`). Params: `mtype`, `area`, `title`, `year`, `season`, `sites` | +| GET | `/api/v1/search/media/{mediaid}/stream` | Stream torrent search by media ID with SSE. Params: `mtype`, `area`, `title`, `year`, `season`, `sites` | | GET | `/api/v1/search/title` | Fuzzy search torrents by keyword. Params: `keyword`, `page`, `sites` | +| GET | `/api/v1/search/title/stream` | Stream fuzzy torrent search with SSE. Params: `keyword`, `page`, `sites` | +| GET | `/api/v1/search/subtitle/title` | Fuzzy search site subtitles by keyword. Params: `keyword`, `page`, `sites` | +| GET | `/api/v1/search/subtitle/title/stream` | Stream fuzzy site subtitle search with SSE. Params: `keyword`, `page`, `sites` | | GET | `/api/v1/search/last` | Get latest search results | +| GET | `/api/v1/search/last/context` | Get latest search results with replayable params. `params.result_type` is `torrent` or `subtitle` | | POST | `/api/v1/search/recommend` | AI recommended resources. Body: `filtered_indices`, `check_only`, `force` | -### Download (7 endpoints) +### Download (8 endpoints) | Method | Path | Description | |--------|------|-------------| | GET | `/api/v1/download/` | List active downloads. Params: `name` (downloader name) | | POST | `/api/v1/download/` | Add download (with media info). Body: JSON | | POST | `/api/v1/download/add` | Add download (without media info). Body: JSON with `torrent_url` | +| POST | `/api/v1/download/subtitle` | Download subtitle file to the recognized media download directory. Body: `subtitle_in`, optional `tmdbid`, `doubanid`, `save_path` | | GET | `/api/v1/download/start/{hashString}` | Resume download task | | GET | `/api/v1/download/stop/{hashString}` | Pause download task | | GET | `/api/v1/download/clients` | List available download clients | @@ -486,6 +492,19 @@ python scripts/mp-api.py GET /api/v1/search/last python scripts/mp-api.py POST /api/v1/download/add --json '{"torrent_url":""}' ``` +### Search and download subtitles + +```bash +# 1. Search site subtitles by keyword +python scripts/mp-api.py GET /api/v1/search/subtitle/title keyword="Inception" sites="1,2" + +# 2. Restore the last subtitle search with replayable params +python scripts/mp-api.py GET /api/v1/search/last/context + +# 3. Download a subtitle result to the recognized media directory +python scripts/mp-api.py POST /api/v1/download/subtitle --json '{"subtitle_in":{"title":"Inception.2010.1080p.chs","enclosure":"https://example.com/downloadsubs.php?torrentid=1&subid=2","site_name":"Example"},"tmdbid":27205}' +``` + ### Add a subscription ```bash diff --git a/tests/test_indexer_spider_search_url.py b/tests/test_indexer_spider_search_url.py index a67daebe..1907b59b 100644 --- a/tests/test_indexer_spider_search_url.py +++ b/tests/test_indexer_spider_search_url.py @@ -211,3 +211,59 @@ def test_python_spider_remove_does_not_pollute_other_fields(): "description": "Main description", "imdbid": "tt1234567", }] + + +def test_nexus_php_subtitle_table_parse_extracts_common_fields(): + """ + NexusPHP 字幕表格应解析出下载链接、语言、标题、时间、大小、点击、上传者等字段。 + """ + indexer = _build_indexer( + subtitles={ + "search": { + "paths": [{"path": "subtitles.php?search={keyword}&lang_id=0"}], + }, + "list": {"selector": "table tr:has(td.rowfollow)"}, + "fields": { + "language": {"selector": "td:nth-child(1) img", "attribute": "title"}, + "language_icon": {"selector": "td:nth-child(1) img", "attribute": "src"}, + "title": {"selector": "td:nth-child(2) a"}, + "download": {"selector": "td:nth-child(2) a", "attribute": "href"}, + "date_added": {"selector": "td:nth-child(3) span", "attribute": "title"}, + "date_elapsed": {"selector": "td:nth-child(3) span"}, + "size": {"selector": "td:nth-child(4)"}, + "grabs": {"selector": "td:nth-child(5)"}, + "uploader": {"selector": "td:nth-child(6)"}, + "report": {"selector": "td:nth-child(7) a", "attribute": "href"}, + }, + }, + ) + html = """ + + + + + + + + + + +
语言标题
日本語739437-second-to-last-love-s03-2025-1080p-fod-web-dl-aac20-h264-magicstar-japanese-subtitle2月23天233.19 KB0匿名Report
+ """ + + result = SiteSpider(indexer, keyword="love", search_type="subtitles").parse(html) + + assert result == [{ + "title": "739437-second-to-last-love-s03-2025-1080p-fod-web-dl-aac20-h264-magicstar-japanese-subtitle", + "enclosure": "https://example.com/downloadsubs.php?torrentid=514068&subid=2179", + "size": 238787, + "pubdate": "2026-03-17 19:48:55", + "date_elapsed": "2月23天", + "grabs": 0, + "language_icon": "https://example.com/pic/flag/japan.gif", + "report_url": "https://example.com/report.php?subtitle=2179", + "language": "日本語", + "uploader": "匿名", + "torrent_id": "514068", + "subtitle_id": "2179", + }]