diff --git a/app/agent/tools/impl/search_web.py b/app/agent/tools/impl/search_web.py index bb89add7..d198a47c 100644 --- a/app/agent/tools/impl/search_web.py +++ b/app/agent/tools/impl/search_web.py @@ -1,6 +1,5 @@ import asyncio import json -import random import re from dataclasses import dataclass from typing import Dict, List, Optional, Type @@ -12,7 +11,6 @@ from pydantic import BaseModel, Field from app.agent.tools.base import MoviePilotTool from app.core.config import settings from app.log import logger -from app.utils.http import AsyncRequestUtils # 搜索超时时间(秒) SEARCH_TIMEOUT = 20 @@ -25,16 +23,16 @@ SEARCH_ENGINE_BACKENDS = ( "auto", "duckduckgo", "google", - "bing", "brave", "yahoo", "wikipedia", "yandex", "mojeek", ) -# 可显式调用的搜索 API 后端 -SEARCH_API_BACKENDS = ("exa", "tavily") -SUPPORTED_SEARCH_ENGINES = SEARCH_API_BACKENDS + SEARCH_ENGINE_BACKENDS +SUPPORTED_SEARCH_ENGINES = SEARCH_ENGINE_BACKENDS +DDGS_AUTO_BACKEND = ",".join( + backend for backend in SEARCH_ENGINE_BACKENDS if backend != DEFAULT_SEARCH_ENGINE +) SITE_SEARCH_PATTERN = re.compile(r"\bsite:", re.IGNORECASE) @@ -64,8 +62,8 @@ class SearchWebInput(BaseModel): search_engine: Optional[str] = Field( DEFAULT_SEARCH_ENGINE, description=( - "Search backend to use. Supported values: auto, exa, tavily, " - "duckduckgo, google, bing, brave, yahoo, wikipedia, yandex, mojeek. " + "Search backend to use. Supported values: auto, duckduckgo, google, " + "brave, yahoo, wikipedia, yandex, mojeek. " "Use auto unless the user asks for a specific search engine." ), ) @@ -80,15 +78,16 @@ class SearchWebInput(BaseModel): class SearchWebTool(MoviePilotTool): """ - 网络搜索工具,支持 API 搜索、搜索引擎搜索和指定站点限定搜索。 + 网络搜索工具,支持 DDGS 搜索引擎和指定站点限定搜索。 """ name: str = "search_web" description: str = ( "Search the web for information when you need current information, facts, " - "or references. Supports automatic API-backed search, explicit search " - "engine selection, and site_url-limited searches for a specified website " - "or URL. Returns search results with titles, snippets, and URLs." + "or references. Supports DDGS-backed search engine selection, automatic " + "fallback, and site_url-limited searches for a specified website " + "or URL. Uses the configured system proxy by default. Returns search " + "results with titles, snippets, and URLs." ) args_schema: Type[BaseModel] = SearchWebInput @@ -175,6 +174,7 @@ class SearchWebTool(MoviePilotTool): """规范化搜索源参数""" engine = (search_engine or DEFAULT_SEARCH_ENGINE).strip().lower() aliases = { + "ddgs": DEFAULT_SEARCH_ENGINE, "ddg": "duckduckgo", "duck": "duckduckgo", "search": DEFAULT_SEARCH_ENGINE, @@ -187,14 +187,7 @@ class SearchWebTool(MoviePilotTool): """根据搜索源配置生成兜底搜索顺序""" if search_engine != DEFAULT_SEARCH_ENGINE: return [search_engine] - - search_plan: List[str] = [] - if settings.EXA_API_KEY: - search_plan.append("exa") - if SearchWebTool._choose_tavily_api_key(): - search_plan.append("tavily") - search_plan.append(DEFAULT_SEARCH_ENGINE) - return search_plan + return [DEFAULT_SEARCH_ENGINE] async def _search_with_backend( self, @@ -212,148 +205,20 @@ class SearchWebTool(MoviePilotTool): :param site_filter: 站点限定条件 :return: 搜索结果列表 """ - if engine == "exa": - logger.info("使用 Exa 进行搜索...") - return await self._search_exa(query, max_results, site_filter) - if engine == "tavily": - logger.info("使用 Tavily 进行搜索...") - return await self._search_tavily(query, max_results, site_filter) - - logger.info(f"使用搜索引擎 {engine} 进行搜索...") - return await self._search_duckduckgo(query, max_results, engine, site_filter) + logger.info(f"使用 DDGS 搜索后端 {self._get_ddgs_backend(engine)} 进行搜索...") + return await self._search_ddgs(query, max_results, engine, site_filter) @staticmethod - async def _search_tavily( - query: str, - max_results: int, - site_filter: Optional[_SearchSiteFilter] = None, - ) -> List[Dict]: - """使用 Tavily API 进行搜索""" - response = None - try: - # 从设置中随机选择一个 API Key(如果有多个) - tavily_api_key = SearchWebTool._choose_tavily_api_key() - if not tavily_api_key: - return [] - payload = { - "api_key": tavily_api_key, - "query": query, - "search_depth": "basic", - "max_results": max_results, - "include_answer": False, - "include_images": False, - "include_raw_content": False, - } - if site_filter: - payload["include_domains"] = [site_filter.domain] + def _get_ddgs_backend(search_engine: str) -> str: + """ + 获取实际传给 DDGS 的搜索后端。 - response = await AsyncRequestUtils( - ua=settings.USER_AGENT, - proxies=settings.PROXY, - timeout=SEARCH_TIMEOUT, - content_type="application/json", - accept_type="application/json", - ).post_res( - "https://api.tavily.com/search", - json=payload, - ) - if not response or response.status_code != 200: - status_code = response.status_code if response else "无响应" - logger.warning(f"Tavily 搜索失败,HTTP状态码: {status_code}") - return [] - data = response.json() - - results = [] - for result in data.get("results", []): - results.append( - { - "title": result.get("title", ""), - "snippet": result.get("content", ""), - "url": result.get("url", ""), - "source": "Tavily", - } - ) - return SearchWebTool._filter_results_by_site(results, site_filter) - except Exception as e: - logger.warning(f"Tavily 搜索失败: {e}") - return [] - finally: - if response is not None: - await response.aclose() - - @staticmethod - def _choose_tavily_api_key() -> Optional[str]: - """从配置中选择一个可用的 Tavily API Key""" - api_keys = settings.TAVILY_API_KEY - if not api_keys: - return None - if isinstance(api_keys, str): - api_keys = [api_keys] - available_api_keys = [api_key for api_key in api_keys if api_key] - if not available_api_keys: - return None - return random.choice(available_api_keys) - - @staticmethod - async def _search_exa( - query: str, - max_results: int, - site_filter: Optional[_SearchSiteFilter] = None, - ) -> List[Dict]: - """使用 Exa API 进行搜索""" - response = None - try: - if not settings.EXA_API_KEY: - return [] - payload = { - "query": query, - "numResults": max_results, - "type": "auto", - "contents": {"highlights": {"maxCharacters": 2000}}, - } - if site_filter: - payload["includeDomains"] = [site_filter.domain] - - response = await AsyncRequestUtils( - headers={ - "x-api-key": settings.EXA_API_KEY, - "Content-Type": "application/json", - "Accept": "application/json", - "User-Agent": settings.USER_AGENT, - }, - proxies=settings.PROXY, - timeout=SEARCH_TIMEOUT, - ).post_res( - "https://api.exa.ai/search", - json=payload, - ) - if not response or response.status_code != 200: - status_code = response.status_code if response else "无响应" - logger.warning(f"Exa 搜索失败,HTTP状态码: {status_code}") - return [] - data = response.json() - - results = [] - for result in data.get("results", []): - highlights = result.get("highlights", []) - snippet = ( - highlights[0] if highlights else result.get("text", "")[:500] - ) - results.append( - { - "title": result.get("title", ""), - "snippet": snippet, - "url": result.get("url", ""), - "source": "Exa", - } - ) - return SearchWebTool._filter_results_by_site(results, site_filter) - except Exception as e: - logger.warning(f"Exa 搜索失败: {e}") - return [] - finally: - if response is not None: - await response.aclose() + :param search_engine: 用户指定的搜索源 + :return: DDGS 后端名称或逗号分隔的后端列表 + """ + if search_engine == DEFAULT_SEARCH_ENGINE: + return DDGS_AUTO_BACKEND + return search_engine @staticmethod def _normalize_site_filter(site_url: Optional[str]) -> Optional[_SearchSiteFilter]: @@ -475,10 +340,9 @@ class SearchWebTool(MoviePilotTool): :return: 展示名称 """ labels = { - "auto": "SearchEngine", + "auto": "DDGS", "duckduckgo": "DuckDuckGo", "google": "Google", - "bing": "Bing", "brave": "Brave", "yahoo": "Yahoo", "wikipedia": "Wikipedia", @@ -524,7 +388,7 @@ class SearchWebTool(MoviePilotTool): return proxy_setting.get("http") or proxy_setting.get("https") return proxy_setting - async def _search_duckduckgo( + async def _search_ddgs( self, query: str, max_results: int, @@ -532,7 +396,7 @@ class SearchWebTool(MoviePilotTool): site_filter: Optional[_SearchSiteFilter] = None, ) -> List[Dict]: """ - 使用搜索引擎后端进行搜索。 + 使用 DDGS 搜索引擎后端进行搜索。 :param query: 搜索关键词 :param max_results: 最大结果数 @@ -555,12 +419,12 @@ class SearchWebTool(MoviePilotTool): ddgs_results = ddgs.text( query, max_results=max_results, - backend=search_engine, + backend=self._get_ddgs_backend(search_engine), ) if ddgs_results: for result in ddgs_results: source = ( - result.get("provider") + DEFAULT_SEARCH_ENGINE if search_engine == DEFAULT_SEARCH_ENGINE else search_engine ) diff --git a/app/core/config.py b/app/core/config.py index 8f034082..d2d6faf7 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -588,14 +588,6 @@ class ConfigModel(BaseModel): AI_RECOMMEND_ENABLED: bool = False # AI推荐用户偏好 AI_RECOMMEND_USER_PREFERENCE: str = "" - # Tavily API密钥(用于网络搜索) - TAVILY_API_KEY: List[str] = [ - "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", - "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", - "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", - ] - # Exa API密钥(用于网络搜索) - EXA_API_KEY: str = "161ce010-fb56-419c-9ea8-4fb459b96298" # AI推荐条目数量限制 AI_RECOMMEND_MAX_ITEMS: int = 50 diff --git a/docs/mcp-api.md b/docs/mcp-api.md index a3ee126d..04cbaaaa 100644 --- a/docs/mcp-api.md +++ b/docs/mcp-api.md @@ -159,7 +159,7 @@ MoviePilot 实现了标准的 **Model Context Protocol (MCP)**,允许 AI 智 } ``` -`search_engine` 可选,支持 `auto`、`exa`、`tavily`、`duckduckgo`、`google`、`bing`、`brave`、`yahoo`、`wikipedia`、`yandex`、`mojeek`。`site_url` 可选,用于限定搜索到指定域名或 URL 路径范围。 +`search_engine` 可选,通过 DDGS 支持 `auto`、`duckduckgo`、`google`、`brave`、`yahoo`、`wikipedia`、`yandex`、`mojeek`。`site_url` 可选,用于限定搜索到指定域名或 URL 路径范围。搜索默认使用系统代理配置。 ### 3. 获取工具详情 diff --git a/skills/browser-use/SKILL.md b/skills/browser-use/SKILL.md index 197f7b17..90ffecd3 100644 --- a/skills/browser-use/SKILL.md +++ b/skills/browser-use/SKILL.md @@ -40,9 +40,9 @@ dedicated tool can complete the task more directly and safely. - `browse_webpage` - Real browser actions: `goto`, `get_content`, `screenshot`, `click`, `fill`, `select`, `evaluate`, `wait`. - `search_web` - Find current pages or official references before opening a - target URL. It supports `search_engine` (`auto`, `duckduckgo`, `google`, - `bing`, `brave`, etc.) and `site_url` for limiting results to a specified - domain or URL path. + target URL. It supports DDGS-backed `search_engine` (`auto`, `duckduckgo`, + `google`, `brave`, etc.) and `site_url` for limiting results to a specified + domain or URL path. It uses the configured system proxy by default. - `query_sites` - Get MoviePilot site IDs before site-specific operations. - `update_site_cookie` - Update a configured site's Cookie and User-Agent using username, password, and optional two-step code. diff --git a/tests/test_agent_search_web_tool.py b/tests/test_agent_search_web_tool.py index 90a7a5c4..d697315d 100644 --- a/tests/test_agent_search_web_tool.py +++ b/tests/test_agent_search_web_tool.py @@ -3,7 +3,11 @@ import json import unittest from unittest.mock import AsyncMock, patch -from app.agent.tools.impl.search_web import DEFAULT_SEARCH_ENGINE, SearchWebTool +from app.agent.tools.impl.search_web import ( + DDGS_AUTO_BACKEND, + DEFAULT_SEARCH_ENGINE, + SearchWebTool, +) from app.core.config import settings @@ -28,7 +32,10 @@ class TestAgentSearchWebTool(unittest.TestCase): self.assertEqual( "asyncio site:docs.python.org", - SearchWebTool._build_search_query("asyncio site:docs.python.org", site_filter), + SearchWebTool._build_search_query( + "asyncio site:docs.python.org", + site_filter, + ), ) def test_filter_results_by_site_matches_domain_and_path(self): @@ -50,14 +57,36 @@ class TestAgentSearchWebTool(unittest.TestCase): ) def test_auto_search_plan_falls_back_to_search_engine(self): - """没有 API Key 时自动模式应退回搜索引擎后端""" - with patch.object(settings, "EXA_API_KEY", ""), patch.object( - settings, "TAVILY_API_KEY", [] - ): - search_plan = SearchWebTool._get_search_plan(DEFAULT_SEARCH_ENGINE) + """自动模式应只使用 DDGS 搜索引擎后端""" + search_plan = SearchWebTool._get_search_plan(DEFAULT_SEARCH_ENGINE) self.assertEqual([DEFAULT_SEARCH_ENGINE], search_plan) + def test_auto_ddgs_backend_excludes_bing(self): + """DDGS 自动搜索后端不应包含 Bing""" + auto_backends = SearchWebTool._get_ddgs_backend( + DEFAULT_SEARCH_ENGINE + ).split(",") + + self.assertNotIn("bing", auto_backends) + self.assertIn("duckduckgo", auto_backends) + self.assertEqual(DDGS_AUTO_BACKEND, ",".join(auto_backends)) + + def test_bing_search_engine_is_not_supported(self): + """Bing 不应再作为可选 DDGS 搜索后端暴露""" + tool = SearchWebTool(session_id="session-1", user_id="10001") + + result = asyncio.run(tool.run(query="asyncio", search_engine="bing")) + + self.assertIn("不支持的搜索源 'bing'", result) + + def test_ddgs_alias_uses_auto_backend(self): + """DDGS 别名应映射到自动 DDGS 后端""" + self.assertEqual( + DEFAULT_SEARCH_ENGINE, + SearchWebTool._normalize_search_engine("ddgs"), + ) + def test_run_uses_specific_search_engine_and_site_filter(self): """显式搜索引擎和指定网址应传入后端搜索调用""" @@ -95,6 +124,36 @@ class TestAgentSearchWebTool(unittest.TestCase): self.assertEqual(1, payload["total_results"]) self.assertEqual("DuckDuckGo", payload["results"][0]["source"]) + def test_ddgs_uses_system_proxy_by_default(self): + """DDGS 搜索默认应使用系统代理配置""" + + async def _run_tool(): + """执行一次带 mock DDGS 后端的搜索工具调用""" + tool = SearchWebTool(session_id="session-1", user_id="10001") + with patch.object( + settings, "PROXY_HOST", "http://proxy.example.com:7890" + ), patch("app.agent.tools.impl.search_web.DDGS") as ddgs_mock: + ddgs = ddgs_mock.return_value.__enter__.return_value + ddgs.text.return_value = [ + { + "title": "asyncio", + "body": "Python asyncio docs", + "href": "https://docs.python.org/3/library/asyncio.html", + } + ] + + results = await tool._search_ddgs( + query="asyncio", + max_results=1, + search_engine="duckduckgo", + ) + return results, ddgs_mock.call_args.kwargs + + results, ddgs_kwargs = asyncio.run(_run_tool()) + + self.assertEqual("http://proxy.example.com:7890", ddgs_kwargs["proxy"]) + self.assertEqual(1, len(results)) + if __name__ == "__main__": unittest.main()