mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-06-05 07:26:48 +00:00
fix: adapt site imdb search urls
This commit is contained in:
@@ -3,7 +3,7 @@ import re
|
||||
import traceback
|
||||
from typing import Any, Optional
|
||||
from typing import List
|
||||
from urllib.parse import quote, urlencode, urlparse, parse_qs
|
||||
from urllib.parse import quote, urlparse, parse_qs
|
||||
|
||||
from fastapi.concurrency import run_in_threadpool
|
||||
from jinja2 import Template
|
||||
@@ -14,6 +14,7 @@ from app.log import logger
|
||||
from app.schemas.types import MediaType
|
||||
from app.utils.http import RequestUtils, AsyncRequestUtils
|
||||
from app.utils.string import StringUtils
|
||||
from app.utils.url import UrlUtils
|
||||
|
||||
|
||||
class SiteSpider:
|
||||
@@ -120,14 +121,15 @@ class SiteSpider:
|
||||
search_word = self.keyword
|
||||
# 查询模式与
|
||||
search_mode = "0"
|
||||
is_imdbid_search = isinstance(self.keyword, str) and re.fullmatch(r"tt\d+", self.keyword)
|
||||
search_word = self.__format_search_word(search_word)
|
||||
|
||||
# 搜索URL
|
||||
indexer_params = self.search.get("params", {}).copy()
|
||||
if indexer_params:
|
||||
search_area = indexer_params.get('search_area')
|
||||
# search_area非0表示支持imdbid搜索
|
||||
if (search_area and
|
||||
(not self.keyword or not self.keyword.startswith('tt'))):
|
||||
if search_area and not is_imdbid_search:
|
||||
# 支持imdbid搜索,但关键字不是imdbid时,不启用imdbid搜索
|
||||
indexer_params.pop('search_area')
|
||||
# 变量字典
|
||||
@@ -168,7 +170,7 @@ class SiteSpider:
|
||||
params.update({
|
||||
"cat%s" % cat.get("id"): 1
|
||||
})
|
||||
searchurl = self.domain + torrentspath + "?" + urlencode(params)
|
||||
searchurl = UrlUtils.combine_url(self.domain, torrentspath, params)
|
||||
else:
|
||||
# 变量字典
|
||||
inputs_dict = {
|
||||
@@ -200,6 +202,22 @@ class SiteSpider:
|
||||
|
||||
return searchurl
|
||||
|
||||
def __format_search_word(self, search_word: str) -> str:
|
||||
"""
|
||||
按站点配置转换搜索关键字,用于兼容站点特殊的 IMDb ID 查询格式。
|
||||
"""
|
||||
if not search_word or not isinstance(search_word, str):
|
||||
return search_word
|
||||
if re.fullmatch(r"tt\d+", search_word):
|
||||
imdbid_format = self.search.get("imdbid_format")
|
||||
if imdbid_format:
|
||||
return str(imdbid_format).format(
|
||||
keyword=search_word,
|
||||
imdbid=search_word,
|
||||
imdbid_num=search_word[2:]
|
||||
)
|
||||
return search_word
|
||||
|
||||
def get_torrents(self) -> List[dict]:
|
||||
"""
|
||||
开始请求
|
||||
|
||||
156
tests/test_indexer_spider_search_url.py
Normal file
156
tests/test_indexer_spider_search_url.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from app.modules.indexer.spider import SiteSpider
|
||||
from app.schemas.types import MediaType
|
||||
|
||||
|
||||
def _build_indexer(**kwargs):
|
||||
"""
|
||||
构造 SiteSpider 生成搜索 URL 所需的最小站点配置。
|
||||
"""
|
||||
indexer = {
|
||||
"id": "test",
|
||||
"name": "测试站点",
|
||||
"domain": "https://example.com/",
|
||||
"search": {
|
||||
"paths": [{"path": "torrents.php"}],
|
||||
"params": {"search": "{keyword}"},
|
||||
},
|
||||
"torrents": {"list": {}, "fields": {}},
|
||||
}
|
||||
indexer.update(kwargs)
|
||||
return indexer
|
||||
|
||||
|
||||
def _get_search_url(indexer: dict, keyword: str | list[str], mtype: MediaType = None) -> str:
|
||||
"""
|
||||
调用 SiteSpider 私有 URL 构造逻辑,避免真实请求站点。
|
||||
"""
|
||||
spider = SiteSpider(indexer=indexer, keyword=keyword, mtype=mtype)
|
||||
return spider._SiteSpider__get_search_url()
|
||||
|
||||
|
||||
def test_eastgame_imdb_search_uses_imdb_area():
|
||||
"""
|
||||
TLF 支持 IMDb ID 搜索时应使用站点配置的 IMDb 搜索区域。
|
||||
"""
|
||||
indexer = _build_indexer(
|
||||
id="eastgame",
|
||||
domain="https://pt.eastgame.org/",
|
||||
search={
|
||||
"paths": [{"path": "torrents.php"}],
|
||||
"params": {
|
||||
"search_area": 4,
|
||||
"search": "{keyword}",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
parsed_url = urlparse(_get_search_url(indexer, "tt16311594"))
|
||||
query = parse_qs(parsed_url.query)
|
||||
|
||||
assert parsed_url.geturl().startswith("https://pt.eastgame.org/torrents.php?")
|
||||
assert query["search"] == ["tt16311594"]
|
||||
assert query["search_area"] == ["4"]
|
||||
|
||||
|
||||
def test_eastgame_title_search_keeps_title_area():
|
||||
"""
|
||||
TLF 普通标题搜索不应误用 IMDb 搜索区域。
|
||||
"""
|
||||
indexer = _build_indexer(
|
||||
id="eastgame",
|
||||
domain="https://pt.eastgame.org/",
|
||||
search={
|
||||
"paths": [{"path": "torrents.php"}],
|
||||
"params": {
|
||||
"search_area": 4,
|
||||
"search": "{keyword}",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
query = parse_qs(urlparse(_get_search_url(indexer, "普通标题")).query)
|
||||
|
||||
assert query["search"] == ["普通标题"]
|
||||
assert query["search_area"] == ["0"]
|
||||
|
||||
|
||||
def test_eastgame_batch_search_keeps_title_area():
|
||||
"""
|
||||
TLF 批量搜索不是单个 IMDb ID,不能触发 IMDb 搜索区域。
|
||||
"""
|
||||
indexer = _build_indexer(
|
||||
id="eastgame",
|
||||
domain="https://pt.eastgame.org/",
|
||||
search={
|
||||
"paths": [{"path": "torrents.php"}],
|
||||
"params": {
|
||||
"search_area": 4,
|
||||
"search": "{keyword}",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
query = parse_qs(urlparse(_get_search_url(indexer, ["tt1234567", "tt7654321"])).query)
|
||||
|
||||
assert query["search"] == ["tt1234567 tt7654321"]
|
||||
assert query["search_mode"] == ["1"]
|
||||
assert query["search_area"] == ["0"]
|
||||
|
||||
|
||||
def test_ttg_imdb_search_formats_keyword_and_keeps_existing_query():
|
||||
"""
|
||||
TTG 的 IMDb 搜索需要 tt 前缀转换,并且路径自带查询参数不能生成双问号。
|
||||
"""
|
||||
indexer = _build_indexer(
|
||||
id="ttg",
|
||||
domain="https://totheglory.im/",
|
||||
search={
|
||||
"paths": [{"path": "browse.php?c=M"}],
|
||||
"params": {
|
||||
"search_field": "{keyword}",
|
||||
"c": "M",
|
||||
},
|
||||
"imdbid_format": "imdb{imdbid_num}",
|
||||
},
|
||||
category={
|
||||
"field": "search_field",
|
||||
"delimiter": " 分类:",
|
||||
"movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
|
||||
},
|
||||
)
|
||||
|
||||
search_url = _get_search_url(indexer, "tt0049406", MediaType.MOVIE)
|
||||
query = parse_qs(urlparse(search_url).query)
|
||||
|
||||
assert search_url.count("?") == 1
|
||||
assert query["c"] == ["M"]
|
||||
assert query["search_field"] == ["imdb0049406 分类:电影DVDRip"]
|
||||
|
||||
|
||||
def test_ttg_title_search_does_not_format_keyword():
|
||||
"""
|
||||
TTG 普通标题搜索不能被 IMDb ID 格式化规则影响。
|
||||
"""
|
||||
indexer = _build_indexer(
|
||||
id="ttg",
|
||||
domain="https://totheglory.im/",
|
||||
search={
|
||||
"paths": [{"path": "browse.php?c=M"}],
|
||||
"params": {
|
||||
"search_field": "{keyword}",
|
||||
"c": "M",
|
||||
},
|
||||
"imdbid_format": "imdb{imdbid_num}",
|
||||
},
|
||||
category={
|
||||
"field": "search_field",
|
||||
"delimiter": " 分类:",
|
||||
"movie": [{"id": "电影DVDRip", "cat": "Movies/SD"}],
|
||||
},
|
||||
)
|
||||
|
||||
query = parse_qs(urlparse(_get_search_url(indexer, "The Movie", MediaType.MOVIE)).query)
|
||||
|
||||
assert query["search_field"] == ["The Movie 分类:电影DVDRip"]
|
||||
Reference in New Issue
Block a user