From ca0127cc87ad84bfb717ae02c81b6b7f3def6697 Mon Sep 17 00:00:00 2001
From: jxxghp <jxxghp@gmail.com>
Date: Sun, 17 May 2026 11:43:50 +0800
Subject: [PATCH] fix: adapt site imdb search urls

---
 app/modules/indexer/spider/__init__.py  |  26 +++-
 tests/test_indexer_spider_search_url.py | 156 ++++++++++++++++++++++++
 2 files changed, 178 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_indexer_spider_search_url.py

diff --git a/app/modules/indexer/spider/__init__.py b/app/modules/indexer/spider/__init__.py
index ef2bba9d..bcbf9e63 100644
--- a/app/modules/indexer/spider/__init__.py
+++ b/app/modules/indexer/spider/__init__.py
@@ -3,7 +3,7 @@ import re
 import traceback
 from typing import Any, Optional
 from typing import List
-from urllib.parse import quote, urlencode, urlparse, parse_qs
+from urllib.parse import quote, urlparse, parse_qs
 
 from fastapi.concurrency import run_in_threadpool
 from jinja2 import Template
@@ -14,6 +14,7 @@ from app.log import logger
 from app.schemas.types import MediaType
 from app.utils.http import RequestUtils, AsyncRequestUtils
 from app.utils.string import StringUtils
+from app.utils.url import UrlUtils
 
 
 class SiteSpider:
@@ -120,14 +121,15 @@ class SiteSpider:
                 search_word = self.keyword
                 # 查询模式与
                 search_mode = "0"
+            is_imdbid_search = isinstance(self.keyword, str) and re.fullmatch(r"tt\d+", self.keyword)
+            search_word = self.__format_search_word(search_word)
 
             # 搜索URL
             indexer_params = self.search.get("params", {}).copy()
             if indexer_params:
                 search_area = indexer_params.get('search_area')
                 # search_area非0表示支持imdbid搜索
-                if (search_area and
-                        (not self.keyword or not self.keyword.startswith('tt'))):
+                if search_area and not is_imdbid_search:
                     # 支持imdbid搜索，但关键字不是imdbid时，不启用imdbid搜索
                     indexer_params.pop('search_area')
                 # 变量字典
@@ -168,7 +170,7 @@ class SiteSpider:
                             params.update({
                                 "cat%s" % cat.get("id"): 1
                             })
-                searchurl = self.domain + torrentspath + "?" + urlencode(params)
+                searchurl = UrlUtils.combine_url(self.domain, torrentspath, params)
             else:
                 # 变量字典
                 inputs_dict = {
@@ -200,6 +202,22 @@ class SiteSpider:
 
         return searchurl
 
+    def __format_search_word(self, search_word: str) -> str:
+        """
+        按站点配置转换搜索关键字，用于兼容站点特殊的 IMDb ID 查询格式。
+        """
+        if not search_word or not isinstance(search_word, str):
+            return search_word
+        if re.fullmatch(r"tt\d+", search_word):
+            imdbid_format = self.search.get("imdbid_format")
+            if imdbid_format:
+                return str(imdbid_format).format(
+                    keyword=search_word,
+                    imdbid=search_word,
+                    imdbid_num=search_word[2:]
+                )
+        return search_word
+
     def get_torrents(self) -> List[dict]:
         """
         开始请求
diff --git a/tests/test_indexer_spider_search_url.py b/tests/test_indexer_spider_search_url.py
new file mode 100644
index 00000000..da385b7b
--- /dev/null
+++ b/tests/test_indexer_spider_search_url.py
@@ -0,0 +1,156 @@
+from urllib.parse import parse_qs, urlparse
+
+from app.modules.indexer.spider import SiteSpider
+from app.schemas.types import MediaType
+
+
+def _build_indexer(**kwargs):
+    """
+    构造 SiteSpider 生成搜索 URL 所需的最小站点配置。
+    """
+    indexer = {
+        "id": "test",
+        "name": "测试站点",
+        "domain": "https://example.com/",
+        "search": {
+            "paths": [{"path": "torrents.php"}],
+            "params": {"search": "{keyword}"},
+        },
+        "torrents": {"list": {}, "fields": {}},
+    }
+    indexer.update(kwargs)
+    return indexer
+
+
+def _get_search_url(indexer: dict, keyword: str | list[str], mtype: MediaType = None) -> str:
+    """
+    调用 SiteSpider 私有 URL 构造逻辑，避免真实请求站点。
+    """
+    spider = SiteSpider(indexer=indexer, keyword=keyword, mtype=mtype)
+    return spider._SiteSpider__get_search_url()
+
+
+def test_eastgame_imdb_search_uses_imdb_area():
+    """
+    TLF 支持 IMDb ID 搜索时应使用站点配置的 IMDb 搜索区域。
+    """
+    indexer = _build_indexer(
+        id="eastgame",
+        domain="https://pt.eastgame.org/",
+        search={
+            "paths": [{"path": "torrents.php"}],
+            "params": {
+                "search_area": 4,
+                "search": "{keyword}",
+            },
+        },
+    )
+
+    parsed_url = urlparse(_get_search_url(indexer, "tt16311594"))
+    query = parse_qs(parsed_url.query)
+
+    assert parsed_url.geturl().startswith("https://pt.eastgame.org/torrents.php?")
+    assert query["search"] == ["tt16311594"]
+    assert query["search_area"] == ["4"]
+
+
+def test_eastgame_title_search_keeps_title_area():
+    """
+    TLF 普通标题搜索不应误用 IMDb 搜索区域。
+    """
+    indexer = _build_indexer(
+        id="eastgame",
+        domain="https://pt.eastgame.org/",
+        search={
+            "paths": [{"path": "torrents.php"}],
+            "params": {
+                "search_area": 4,
+                "search": "{keyword}",
+            },
+        },
+    )
+
+    query = parse_qs(urlparse(_get_search_url(indexer, "普通标题")).query)
+
+    assert query["search"] == ["普通标题"]
+    assert query["search_area"] == ["0"]
+
+
+def test_eastgame_batch_search_keeps_title_area():
+    """
+    TLF 批量搜索不是单个 IMDb ID，不能触发 IMDb 搜索区域。
+    """
+    indexer = _build_indexer(
+        id="eastgame",
+        domain="https://pt.eastgame.org/",
+        search={
+            "paths": [{"path": "torrents.php"}],
+            "params": {
+                "search_area": 4,
+                "search": "{keyword}",
+            },
+        },
+    )
+
+    query = parse_qs(urlparse(_get_search_url(indexer, ["tt1234567", "tt7654321"])).query)
+
+    assert query["search"] == ["tt1234567 tt7654321"]
+    assert query["search_mode"] == ["1"]
+    assert query["search_area"] == ["0"]
+
+
+def test_ttg_imdb_search_formats_keyword_and_keeps_existing_query():
+    """
+    TTG 的 IMDb 搜索需要 tt 前缀转换，并且路径自带查询参数不能生成双问号。
+    """
+    indexer = _build_indexer(
+        id="ttg",
+        domain="https://totheglory.im/",
+        search={
+            "paths": [{"path": "browse.php?c=M"}],
+            "params": {
+                "search_field": "{keyword}",
+                "c": "M",
+            },
+            "imdbid_format": "imdb{imdbid_num}",
+        },
+        category={
+            "field": "search_field",
+            "delimiter": " 分类:",
+            "movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
+        },
+    )
+
+    search_url = _get_search_url(indexer, "tt0049406", MediaType.MOVIE)
+    query = parse_qs(urlparse(search_url).query)
+
+    assert search_url.count("?") == 1
+    assert query["c"] == ["M"]
+    assert query["search_field"] == ["imdb0049406 分类:电影DVDRip"]
+
+
+def test_ttg_title_search_does_not_format_keyword():
+    """
+    TTG 普通标题搜索不能被 IMDb ID 格式化规则影响。
+    """
+    indexer = _build_indexer(
+        id="ttg",
+        domain="https://totheglory.im/",
+        search={
+            "paths": [{"path": "browse.php?c=M"}],
+            "params": {
+                "search_field": "{keyword}",
+                "c": "M",
+            },
+            "imdbid_format": "imdb{imdbid_num}",
+        },
+        category={
+            "field": "search_field",
+            "delimiter": " 分类:",
+            "movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
+        },
+    )
+
+    query = parse_qs(urlparse(_get_search_url(indexer, "The Movie", MediaType.MOVIE)).query)
+
+    assert query["search_field"] == ["The Movie 分类:电影DVDRip"]