mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-30 07:26:48 +00:00
fix: simplify search web providers
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import asyncio
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Optional, Type
|
||||
@@ -12,7 +11,6 @@ from pydantic import BaseModel, Field
|
||||
from app.agent.tools.base import MoviePilotTool
|
||||
from app.core.config import settings
|
||||
from app.log import logger
|
||||
from app.utils.http import AsyncRequestUtils
|
||||
|
||||
# 搜索超时时间(秒)
|
||||
SEARCH_TIMEOUT = 20
|
||||
@@ -25,16 +23,16 @@ SEARCH_ENGINE_BACKENDS = (
|
||||
"auto",
|
||||
"duckduckgo",
|
||||
"google",
|
||||
"bing",
|
||||
"brave",
|
||||
"yahoo",
|
||||
"wikipedia",
|
||||
"yandex",
|
||||
"mojeek",
|
||||
)
|
||||
# 可显式调用的搜索 API 后端
|
||||
SEARCH_API_BACKENDS = ("exa", "tavily")
|
||||
SUPPORTED_SEARCH_ENGINES = SEARCH_API_BACKENDS + SEARCH_ENGINE_BACKENDS
|
||||
SUPPORTED_SEARCH_ENGINES = SEARCH_ENGINE_BACKENDS
|
||||
DDGS_AUTO_BACKEND = ",".join(
|
||||
backend for backend in SEARCH_ENGINE_BACKENDS if backend != DEFAULT_SEARCH_ENGINE
|
||||
)
|
||||
SITE_SEARCH_PATTERN = re.compile(r"\bsite:", re.IGNORECASE)
|
||||
|
||||
|
||||
@@ -64,8 +62,8 @@ class SearchWebInput(BaseModel):
|
||||
search_engine: Optional[str] = Field(
|
||||
DEFAULT_SEARCH_ENGINE,
|
||||
description=(
|
||||
"Search backend to use. Supported values: auto, exa, tavily, "
|
||||
"duckduckgo, google, bing, brave, yahoo, wikipedia, yandex, mojeek. "
|
||||
"Search backend to use. Supported values: auto, duckduckgo, google, "
|
||||
"brave, yahoo, wikipedia, yandex, mojeek. "
|
||||
"Use auto unless the user asks for a specific search engine."
|
||||
),
|
||||
)
|
||||
@@ -80,15 +78,16 @@ class SearchWebInput(BaseModel):
|
||||
|
||||
class SearchWebTool(MoviePilotTool):
|
||||
"""
|
||||
网络搜索工具,支持 API 搜索、搜索引擎搜索和指定站点限定搜索。
|
||||
网络搜索工具,支持 DDGS 搜索引擎和指定站点限定搜索。
|
||||
"""
|
||||
|
||||
name: str = "search_web"
|
||||
description: str = (
|
||||
"Search the web for information when you need current information, facts, "
|
||||
"or references. Supports automatic API-backed search, explicit search "
|
||||
"engine selection, and site_url-limited searches for a specified website "
|
||||
"or URL. Returns search results with titles, snippets, and URLs."
|
||||
"or references. Supports DDGS-backed search engine selection, automatic "
|
||||
"fallback, and site_url-limited searches for a specified website "
|
||||
"or URL. Uses the configured system proxy by default. Returns search "
|
||||
"results with titles, snippets, and URLs."
|
||||
)
|
||||
args_schema: Type[BaseModel] = SearchWebInput
|
||||
|
||||
@@ -175,6 +174,7 @@ class SearchWebTool(MoviePilotTool):
|
||||
"""规范化搜索源参数"""
|
||||
engine = (search_engine or DEFAULT_SEARCH_ENGINE).strip().lower()
|
||||
aliases = {
|
||||
"ddgs": DEFAULT_SEARCH_ENGINE,
|
||||
"ddg": "duckduckgo",
|
||||
"duck": "duckduckgo",
|
||||
"search": DEFAULT_SEARCH_ENGINE,
|
||||
@@ -187,14 +187,7 @@ class SearchWebTool(MoviePilotTool):
|
||||
"""根据搜索源配置生成兜底搜索顺序"""
|
||||
if search_engine != DEFAULT_SEARCH_ENGINE:
|
||||
return [search_engine]
|
||||
|
||||
search_plan: List[str] = []
|
||||
if settings.EXA_API_KEY:
|
||||
search_plan.append("exa")
|
||||
if SearchWebTool._choose_tavily_api_key():
|
||||
search_plan.append("tavily")
|
||||
search_plan.append(DEFAULT_SEARCH_ENGINE)
|
||||
return search_plan
|
||||
return [DEFAULT_SEARCH_ENGINE]
|
||||
|
||||
async def _search_with_backend(
|
||||
self,
|
||||
@@ -212,148 +205,20 @@ class SearchWebTool(MoviePilotTool):
|
||||
:param site_filter: 站点限定条件
|
||||
:return: 搜索结果列表
|
||||
"""
|
||||
if engine == "exa":
|
||||
logger.info("使用 Exa 进行搜索...")
|
||||
return await self._search_exa(query, max_results, site_filter)
|
||||
if engine == "tavily":
|
||||
logger.info("使用 Tavily 进行搜索...")
|
||||
return await self._search_tavily(query, max_results, site_filter)
|
||||
|
||||
logger.info(f"使用搜索引擎 {engine} 进行搜索...")
|
||||
return await self._search_duckduckgo(query, max_results, engine, site_filter)
|
||||
logger.info(f"使用 DDGS 搜索后端 {self._get_ddgs_backend(engine)} 进行搜索...")
|
||||
return await self._search_ddgs(query, max_results, engine, site_filter)
|
||||
|
||||
@staticmethod
|
||||
async def _search_tavily(
|
||||
query: str,
|
||||
max_results: int,
|
||||
site_filter: Optional[_SearchSiteFilter] = None,
|
||||
) -> List[Dict]:
|
||||
"""使用 Tavily API 进行搜索"""
|
||||
response = None
|
||||
try:
|
||||
# 从设置中随机选择一个 API Key(如果有多个)
|
||||
tavily_api_key = SearchWebTool._choose_tavily_api_key()
|
||||
if not tavily_api_key:
|
||||
return []
|
||||
payload = {
|
||||
"api_key": tavily_api_key,
|
||||
"query": query,
|
||||
"search_depth": "basic",
|
||||
"max_results": max_results,
|
||||
"include_answer": False,
|
||||
"include_images": False,
|
||||
"include_raw_content": False,
|
||||
}
|
||||
if site_filter:
|
||||
payload["include_domains"] = [site_filter.domain]
|
||||
def _get_ddgs_backend(search_engine: str) -> str:
|
||||
"""
|
||||
获取实际传给 DDGS 的搜索后端。
|
||||
|
||||
response = await AsyncRequestUtils(
|
||||
ua=settings.USER_AGENT,
|
||||
proxies=settings.PROXY,
|
||||
timeout=SEARCH_TIMEOUT,
|
||||
content_type="application/json",
|
||||
accept_type="application/json",
|
||||
).post_res(
|
||||
"https://api.tavily.com/search",
|
||||
json=payload,
|
||||
)
|
||||
if not response or response.status_code != 200:
|
||||
status_code = response.status_code if response else "无响应"
|
||||
logger.warning(f"Tavily 搜索失败,HTTP状态码: {status_code}")
|
||||
return []
|
||||
data = response.json()
|
||||
|
||||
results = []
|
||||
for result in data.get("results", []):
|
||||
results.append(
|
||||
{
|
||||
"title": result.get("title", ""),
|
||||
"snippet": result.get("content", ""),
|
||||
"url": result.get("url", ""),
|
||||
"source": "Tavily",
|
||||
}
|
||||
)
|
||||
return SearchWebTool._filter_results_by_site(results, site_filter)
|
||||
except Exception as e:
|
||||
logger.warning(f"Tavily 搜索失败: {e}")
|
||||
return []
|
||||
finally:
|
||||
if response is not None:
|
||||
await response.aclose()
|
||||
|
||||
@staticmethod
|
||||
def _choose_tavily_api_key() -> Optional[str]:
|
||||
"""从配置中选择一个可用的 Tavily API Key"""
|
||||
api_keys = settings.TAVILY_API_KEY
|
||||
if not api_keys:
|
||||
return None
|
||||
if isinstance(api_keys, str):
|
||||
api_keys = [api_keys]
|
||||
available_api_keys = [api_key for api_key in api_keys if api_key]
|
||||
if not available_api_keys:
|
||||
return None
|
||||
return random.choice(available_api_keys)
|
||||
|
||||
@staticmethod
|
||||
async def _search_exa(
|
||||
query: str,
|
||||
max_results: int,
|
||||
site_filter: Optional[_SearchSiteFilter] = None,
|
||||
) -> List[Dict]:
|
||||
"""使用 Exa API 进行搜索"""
|
||||
response = None
|
||||
try:
|
||||
if not settings.EXA_API_KEY:
|
||||
return []
|
||||
payload = {
|
||||
"query": query,
|
||||
"numResults": max_results,
|
||||
"type": "auto",
|
||||
"contents": {"highlights": {"maxCharacters": 2000}},
|
||||
}
|
||||
if site_filter:
|
||||
payload["includeDomains"] = [site_filter.domain]
|
||||
|
||||
response = await AsyncRequestUtils(
|
||||
headers={
|
||||
"x-api-key": settings.EXA_API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": settings.USER_AGENT,
|
||||
},
|
||||
proxies=settings.PROXY,
|
||||
timeout=SEARCH_TIMEOUT,
|
||||
).post_res(
|
||||
"https://api.exa.ai/search",
|
||||
json=payload,
|
||||
)
|
||||
if not response or response.status_code != 200:
|
||||
status_code = response.status_code if response else "无响应"
|
||||
logger.warning(f"Exa 搜索失败,HTTP状态码: {status_code}")
|
||||
return []
|
||||
data = response.json()
|
||||
|
||||
results = []
|
||||
for result in data.get("results", []):
|
||||
highlights = result.get("highlights", [])
|
||||
snippet = (
|
||||
highlights[0] if highlights else result.get("text", "")[:500]
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"title": result.get("title", ""),
|
||||
"snippet": snippet,
|
||||
"url": result.get("url", ""),
|
||||
"source": "Exa",
|
||||
}
|
||||
)
|
||||
return SearchWebTool._filter_results_by_site(results, site_filter)
|
||||
except Exception as e:
|
||||
logger.warning(f"Exa 搜索失败: {e}")
|
||||
return []
|
||||
finally:
|
||||
if response is not None:
|
||||
await response.aclose()
|
||||
:param search_engine: 用户指定的搜索源
|
||||
:return: DDGS 后端名称或逗号分隔的后端列表
|
||||
"""
|
||||
if search_engine == DEFAULT_SEARCH_ENGINE:
|
||||
return DDGS_AUTO_BACKEND
|
||||
return search_engine
|
||||
|
||||
@staticmethod
|
||||
def _normalize_site_filter(site_url: Optional[str]) -> Optional[_SearchSiteFilter]:
|
||||
@@ -475,10 +340,9 @@ class SearchWebTool(MoviePilotTool):
|
||||
:return: 展示名称
|
||||
"""
|
||||
labels = {
|
||||
"auto": "SearchEngine",
|
||||
"auto": "DDGS",
|
||||
"duckduckgo": "DuckDuckGo",
|
||||
"google": "Google",
|
||||
"bing": "Bing",
|
||||
"brave": "Brave",
|
||||
"yahoo": "Yahoo",
|
||||
"wikipedia": "Wikipedia",
|
||||
@@ -524,7 +388,7 @@ class SearchWebTool(MoviePilotTool):
|
||||
return proxy_setting.get("http") or proxy_setting.get("https")
|
||||
return proxy_setting
|
||||
|
||||
async def _search_duckduckgo(
|
||||
async def _search_ddgs(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int,
|
||||
@@ -532,7 +396,7 @@ class SearchWebTool(MoviePilotTool):
|
||||
site_filter: Optional[_SearchSiteFilter] = None,
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
使用搜索引擎后端进行搜索。
|
||||
使用 DDGS 搜索引擎后端进行搜索。
|
||||
|
||||
:param query: 搜索关键词
|
||||
:param max_results: 最大结果数
|
||||
@@ -555,12 +419,12 @@ class SearchWebTool(MoviePilotTool):
|
||||
ddgs_results = ddgs.text(
|
||||
query,
|
||||
max_results=max_results,
|
||||
backend=search_engine,
|
||||
backend=self._get_ddgs_backend(search_engine),
|
||||
)
|
||||
if ddgs_results:
|
||||
for result in ddgs_results:
|
||||
source = (
|
||||
result.get("provider")
|
||||
DEFAULT_SEARCH_ENGINE
|
||||
if search_engine == DEFAULT_SEARCH_ENGINE
|
||||
else search_engine
|
||||
)
|
||||
|
||||
@@ -588,14 +588,6 @@ class ConfigModel(BaseModel):
|
||||
AI_RECOMMEND_ENABLED: bool = False
|
||||
# AI推荐用户偏好
|
||||
AI_RECOMMEND_USER_PREFERENCE: str = ""
|
||||
# Tavily API密钥(用于网络搜索)
|
||||
TAVILY_API_KEY: List[str] = [
|
||||
"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",
|
||||
"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",
|
||||
"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",
|
||||
]
|
||||
# Exa API密钥(用于网络搜索)
|
||||
EXA_API_KEY: str = "161ce010-fb56-419c-9ea8-4fb459b96298"
|
||||
|
||||
# AI推荐条目数量限制
|
||||
AI_RECOMMEND_MAX_ITEMS: int = 50
|
||||
|
||||
@@ -159,7 +159,7 @@ MoviePilot 实现了标准的 **Model Context Protocol (MCP)**,允许 AI 智
|
||||
}
|
||||
```
|
||||
|
||||
`search_engine` 可选,支持 `auto`、`exa`、`tavily`、`duckduckgo`、`google`、`bing`、`brave`、`yahoo`、`wikipedia`、`yandex`、`mojeek`。`site_url` 可选,用于限定搜索到指定域名或 URL 路径范围。
|
||||
`search_engine` 可选,通过 DDGS 支持 `auto`、`duckduckgo`、`google`、`brave`、`yahoo`、`wikipedia`、`yandex`、`mojeek`。`site_url` 可选,用于限定搜索到指定域名或 URL 路径范围。搜索默认使用系统代理配置。
|
||||
|
||||
### 3. 获取工具详情
|
||||
|
||||
|
||||
@@ -40,9 +40,9 @@ dedicated tool can complete the task more directly and safely.
|
||||
- `browse_webpage` - Real browser actions: `goto`, `get_content`, `screenshot`,
|
||||
`click`, `fill`, `select`, `evaluate`, `wait`.
|
||||
- `search_web` - Find current pages or official references before opening a
|
||||
target URL. It supports `search_engine` (`auto`, `duckduckgo`, `google`,
|
||||
`bing`, `brave`, etc.) and `site_url` for limiting results to a specified
|
||||
domain or URL path.
|
||||
target URL. It supports DDGS-backed `search_engine` (`auto`, `duckduckgo`,
|
||||
`google`, `brave`, etc.) and `site_url` for limiting results to a specified
|
||||
domain or URL path. It uses the configured system proxy by default.
|
||||
- `query_sites` - Get MoviePilot site IDs before site-specific operations.
|
||||
- `update_site_cookie` - Update a configured site's Cookie and User-Agent using
|
||||
username, password, and optional two-step code.
|
||||
|
||||
@@ -3,7 +3,11 @@ import json
|
||||
import unittest
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from app.agent.tools.impl.search_web import DEFAULT_SEARCH_ENGINE, SearchWebTool
|
||||
from app.agent.tools.impl.search_web import (
|
||||
DDGS_AUTO_BACKEND,
|
||||
DEFAULT_SEARCH_ENGINE,
|
||||
SearchWebTool,
|
||||
)
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
@@ -28,7 +32,10 @@ class TestAgentSearchWebTool(unittest.TestCase):
|
||||
|
||||
self.assertEqual(
|
||||
"asyncio site:docs.python.org",
|
||||
SearchWebTool._build_search_query("asyncio site:docs.python.org", site_filter),
|
||||
SearchWebTool._build_search_query(
|
||||
"asyncio site:docs.python.org",
|
||||
site_filter,
|
||||
),
|
||||
)
|
||||
|
||||
def test_filter_results_by_site_matches_domain_and_path(self):
|
||||
@@ -50,14 +57,36 @@ class TestAgentSearchWebTool(unittest.TestCase):
|
||||
)
|
||||
|
||||
def test_auto_search_plan_falls_back_to_search_engine(self):
|
||||
"""没有 API Key 时自动模式应退回搜索引擎后端"""
|
||||
with patch.object(settings, "EXA_API_KEY", ""), patch.object(
|
||||
settings, "TAVILY_API_KEY", []
|
||||
):
|
||||
search_plan = SearchWebTool._get_search_plan(DEFAULT_SEARCH_ENGINE)
|
||||
"""自动模式应只使用 DDGS 搜索引擎后端"""
|
||||
search_plan = SearchWebTool._get_search_plan(DEFAULT_SEARCH_ENGINE)
|
||||
|
||||
self.assertEqual([DEFAULT_SEARCH_ENGINE], search_plan)
|
||||
|
||||
def test_auto_ddgs_backend_excludes_bing(self):
|
||||
"""DDGS 自动搜索后端不应包含 Bing"""
|
||||
auto_backends = SearchWebTool._get_ddgs_backend(
|
||||
DEFAULT_SEARCH_ENGINE
|
||||
).split(",")
|
||||
|
||||
self.assertNotIn("bing", auto_backends)
|
||||
self.assertIn("duckduckgo", auto_backends)
|
||||
self.assertEqual(DDGS_AUTO_BACKEND, ",".join(auto_backends))
|
||||
|
||||
def test_bing_search_engine_is_not_supported(self):
|
||||
"""Bing 不应再作为可选 DDGS 搜索后端暴露"""
|
||||
tool = SearchWebTool(session_id="session-1", user_id="10001")
|
||||
|
||||
result = asyncio.run(tool.run(query="asyncio", search_engine="bing"))
|
||||
|
||||
self.assertIn("不支持的搜索源 'bing'", result)
|
||||
|
||||
def test_ddgs_alias_uses_auto_backend(self):
|
||||
"""DDGS 别名应映射到自动 DDGS 后端"""
|
||||
self.assertEqual(
|
||||
DEFAULT_SEARCH_ENGINE,
|
||||
SearchWebTool._normalize_search_engine("ddgs"),
|
||||
)
|
||||
|
||||
def test_run_uses_specific_search_engine_and_site_filter(self):
|
||||
"""显式搜索引擎和指定网址应传入后端搜索调用"""
|
||||
|
||||
@@ -95,6 +124,36 @@ class TestAgentSearchWebTool(unittest.TestCase):
|
||||
self.assertEqual(1, payload["total_results"])
|
||||
self.assertEqual("DuckDuckGo", payload["results"][0]["source"])
|
||||
|
||||
def test_ddgs_uses_system_proxy_by_default(self):
|
||||
"""DDGS 搜索默认应使用系统代理配置"""
|
||||
|
||||
async def _run_tool():
|
||||
"""执行一次带 mock DDGS 后端的搜索工具调用"""
|
||||
tool = SearchWebTool(session_id="session-1", user_id="10001")
|
||||
with patch.object(
|
||||
settings, "PROXY_HOST", "http://proxy.example.com:7890"
|
||||
), patch("app.agent.tools.impl.search_web.DDGS") as ddgs_mock:
|
||||
ddgs = ddgs_mock.return_value.__enter__.return_value
|
||||
ddgs.text.return_value = [
|
||||
{
|
||||
"title": "asyncio",
|
||||
"body": "Python asyncio docs",
|
||||
"href": "https://docs.python.org/3/library/asyncio.html",
|
||||
}
|
||||
]
|
||||
|
||||
results = await tool._search_ddgs(
|
||||
query="asyncio",
|
||||
max_results=1,
|
||||
search_engine="duckduckgo",
|
||||
)
|
||||
return results, ddgs_mock.call_args.kwargs
|
||||
|
||||
results, ddgs_kwargs = asyncio.run(_run_tool())
|
||||
|
||||
self.assertEqual("http://proxy.example.com:7890", ddgs_kwargs["proxy"])
|
||||
self.assertEqual(1, len(results))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user