From ca0127cc87ad84bfb717ae02c81b6b7f3def6697 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Sun, 17 May 2026 11:43:50 +0800 Subject: [PATCH] fix: adapt site imdb search urls --- app/modules/indexer/spider/__init__.py | 26 +++- tests/test_indexer_spider_search_url.py | 156 ++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 tests/test_indexer_spider_search_url.py diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py index ef2bba9d..bcbf9e63 100644 --- a/app/modules/indexer/spider/__init__.py +++ b/app/modules/indexer/spider/__init__.py @@ -3,7 +3,7 @@ import re import traceback from typing import Any, Optional from typing import List -from urllib.parse import quote, urlencode, urlparse, parse_qs +from urllib.parse import quote, urlparse, parse_qs from fastapi.concurrency import run_in_threadpool from jinja2 import Template @@ -14,6 +14,7 @@ from app.log import logger from app.schemas.types import MediaType from app.utils.http import RequestUtils, AsyncRequestUtils from app.utils.string import StringUtils +from app.utils.url import UrlUtils class SiteSpider: @@ -120,14 +121,15 @@ class SiteSpider: search_word = self.keyword # 查询模式与 search_mode = "0" + is_imdbid_search = isinstance(self.keyword, str) and re.fullmatch(r"tt\d+", self.keyword) + search_word = self.__format_search_word(search_word) # 搜索URL indexer_params = self.search.get("params", {}).copy() if indexer_params: search_area = indexer_params.get('search_area') # search_area非0表示支持imdbid搜索 - if (search_area and - (not self.keyword or not self.keyword.startswith('tt'))): + if search_area and not is_imdbid_search: # 支持imdbid搜索,但关键字不是imdbid时,不启用imdbid搜索 indexer_params.pop('search_area') # 变量字典 @@ -168,7 +170,7 @@ class SiteSpider: params.update({ "cat%s" % cat.get("id"): 1 }) - searchurl = self.domain + torrentspath + "?" + urlencode(params) + searchurl = UrlUtils.combine_url(self.domain, torrentspath, params) else: # 变量字典 inputs_dict = { @@ -200,6 +202,22 @@ class SiteSpider: return searchurl + def __format_search_word(self, search_word: str) -> str: + """ + 按站点配置转换搜索关键字,用于兼容站点特殊的 IMDb ID 查询格式。 + """ + if not search_word or not isinstance(search_word, str): + return search_word + if re.fullmatch(r"tt\d+", search_word): + imdbid_format = self.search.get("imdbid_format") + if imdbid_format: + return str(imdbid_format).format( + keyword=search_word, + imdbid=search_word, + imdbid_num=search_word[2:] + ) + return search_word + def get_torrents(self) -> List[dict]: """ 开始请求 diff --git a/tests/test_indexer_spider_search_url.py b/tests/test_indexer_spider_search_url.py new file mode 100644 index 00000000..da385b7b --- /dev/null +++ b/tests/test_indexer_spider_search_url.py @@ -0,0 +1,156 @@ +from urllib.parse import parse_qs, urlparse + +from app.modules.indexer.spider import SiteSpider +from app.schemas.types import MediaType + + +def _build_indexer(**kwargs): + """ + 构造 SiteSpider 生成搜索 URL 所需的最小站点配置。 + """ + indexer = { + "id": "test", + "name": "测试站点", + "domain": "https://example.com/", + "search": { + "paths": [{"path": "torrents.php"}], + "params": {"search": "{keyword}"}, + }, + "torrents": {"list": {}, "fields": {}}, + } + indexer.update(kwargs) + return indexer + + +def _get_search_url(indexer: dict, keyword: str | list[str], mtype: MediaType = None) -> str: + """ + 调用 SiteSpider 私有 URL 构造逻辑,避免真实请求站点。 + """ + spider = SiteSpider(indexer=indexer, keyword=keyword, mtype=mtype) + return spider._SiteSpider__get_search_url() + + +def test_eastgame_imdb_search_uses_imdb_area(): + """ + TLF 支持 IMDb ID 搜索时应使用站点配置的 IMDb 搜索区域。 + """ + indexer = _build_indexer( + id="eastgame", + domain="https://pt.eastgame.org/", + search={ + "paths": [{"path": "torrents.php"}], + "params": { + "search_area": 4, + "search": "{keyword}", + }, + }, + ) + + parsed_url = urlparse(_get_search_url(indexer, "tt16311594")) + query = parse_qs(parsed_url.query) + + assert parsed_url.geturl().startswith("https://pt.eastgame.org/torrents.php?") + assert query["search"] == ["tt16311594"] + assert query["search_area"] == ["4"] + + +def test_eastgame_title_search_keeps_title_area(): + """ + TLF 普通标题搜索不应误用 IMDb 搜索区域。 + """ + indexer = _build_indexer( + id="eastgame", + domain="https://pt.eastgame.org/", + search={ + "paths": [{"path": "torrents.php"}], + "params": { + "search_area": 4, + "search": "{keyword}", + }, + }, + ) + + query = parse_qs(urlparse(_get_search_url(indexer, "普通标题")).query) + + assert query["search"] == ["普通标题"] + assert query["search_area"] == ["0"] + + +def test_eastgame_batch_search_keeps_title_area(): + """ + TLF 批量搜索不是单个 IMDb ID,不能触发 IMDb 搜索区域。 + """ + indexer = _build_indexer( + id="eastgame", + domain="https://pt.eastgame.org/", + search={ + "paths": [{"path": "torrents.php"}], + "params": { + "search_area": 4, + "search": "{keyword}", + }, + }, + ) + + query = parse_qs(urlparse(_get_search_url(indexer, ["tt1234567", "tt7654321"])).query) + + assert query["search"] == ["tt1234567 tt7654321"] + assert query["search_mode"] == ["1"] + assert query["search_area"] == ["0"] + + +def test_ttg_imdb_search_formats_keyword_and_keeps_existing_query(): + """ + TTG 的 IMDb 搜索需要 tt 前缀转换,并且路径自带查询参数不能生成双问号。 + """ + indexer = _build_indexer( + id="ttg", + domain="https://totheglory.im/", + search={ + "paths": [{"path": "browse.php?c=M"}], + "params": { + "search_field": "{keyword}", + "c": "M", + }, + "imdbid_format": "imdb{imdbid_num}", + }, + category={ + "field": "search_field", + "delimiter": " 分类:", + "movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}], + }, + ) + + search_url = _get_search_url(indexer, "tt0049406", MediaType.MOVIE) + query = parse_qs(urlparse(search_url).query) + + assert search_url.count("?") == 1 + assert query["c"] == ["M"] + assert query["search_field"] == ["imdb0049406 分类:电影DVDRip"] + + +def test_ttg_title_search_does_not_format_keyword(): + """ + TTG 普通标题搜索不能被 IMDb ID 格式化规则影响。 + """ + indexer = _build_indexer( + id="ttg", + domain="https://totheglory.im/", + search={ + "paths": [{"path": "browse.php?c=M"}], + "params": { + "search_field": "{keyword}", + "c": "M", + }, + "imdbid_format": "imdb{imdbid_num}", + }, + category={ + "field": "search_field", + "delimiter": " 分类:", + "movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}], + }, + ) + + query = parse_qs(urlparse(_get_search_url(indexer, "The Movie", MediaType.MOVIE)).query) + + assert query["search_field"] == ["The Movie 分类:电影DVDRip"]