diff --git a/app/agent/tools/impl/browse_webpage.py b/app/agent/tools/impl/browse_webpage.py index df917246..2d80d53b 100644 --- a/app/agent/tools/impl/browse_webpage.py +++ b/app/agent/tools/impl/browse_webpage.py @@ -198,68 +198,62 @@ class BrowseWebpageTool(MoviePilotTool): cookies: Optional[str], user_agent: Optional[str], ) -> str: - """在同步上下文中执行 Playwright 浏览器操作""" - from playwright.sync_api import sync_playwright + """在同步上下文中执行 CloakBrowser 浏览器操作""" + from cloakbrowser import launch_context try: - with sync_playwright() as playwright: - browser = None - context = None - page = None - try: - # 启动浏览器 - browser_type = settings.PLAYWRIGHT_BROWSER_TYPE or "chromium" - browser = playwright[browser_type].launch(headless=True) - - # 创建上下文 - context_kwargs = {} - if user_agent: - context_kwargs["user_agent"] = user_agent - # 设置视口大小 - context_kwargs["viewport"] = { + context = None + page = None + try: + context_kwargs = { + "viewport": { "width": SCREENSHOT_MAX_WIDTH, "height": SCREENSHOT_MAX_HEIGHT, } + } + if user_agent: + context_kwargs["user_agent"] = user_agent - context = browser.new_context(**context_kwargs) - page = context.new_page() - page.set_default_timeout(timeout * 1000) + context = launch_context( + headless=True, + humanize=settings.CLOAKBROWSER_HUMANIZE, + human_preset=settings.CLOAKBROWSER_HUMAN_PRESET, + **context_kwargs, + ) + page = context.new_page() + page.set_default_timeout(timeout * 1000) - # 设置 cookies - if cookies: - page.set_extra_http_headers({"cookie": cookies}) + # 设置 cookies + if cookies: + page.set_extra_http_headers({"cookie": cookies}) - # 对于非 goto 操作,如果提供了 url 先导航 - if url and browser_action != BrowserAction.GOTO: - page.goto( - url, wait_until="domcontentloaded", timeout=timeout * 1000 - ) - page.wait_for_load_state("networkidle", timeout=timeout * 1000) + # 对于非 goto 操作,如果提供了 url 先导航 + if url and browser_action != BrowserAction.GOTO: + page.goto(url, wait_until="domcontentloaded", timeout=timeout * 1000) + page.wait_for_load_state("networkidle", timeout=timeout * 1000) - # 执行具体操作 - result = self._do_action( - page, - browser_action, - url, - selector, - value, - script, - content_type, - timeout, - ) - return result + # 执行具体操作 + result = self._do_action( + page, + browser_action, + url, + selector, + value, + script, + content_type, + timeout, + ) + return result - finally: - if page: - page.close() - if context: - context.close() - if browser: - browser.close() + finally: + if page: + page.close() + if context: + context.close() except Exception as e: - logger.error(f"Playwright 执行失败: {e}", exc_info=True) - return f"Playwright 执行失败: {str(e)}" + logger.error(f"CloakBrowser 执行失败: {e}", exc_info=True) + return f"CloakBrowser 执行失败: {str(e)}" def _do_action( self, diff --git a/app/core/config.py b/app/core/config.py index 22f0432a..371be472 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -331,8 +331,12 @@ class ConfigModel(BaseModel): NO_CACHE_SITE_KEY: str = "m-team" # OCR服务器地址,用于识别站点验证码 OCR_HOST: str = "https://movie-pilot.org" - # 仿真类型:playwright 或 flaresolverr - BROWSER_EMULATION: str = "playwright" + # 仿真类型:cloakbrowser 或 flaresolverr,其他值按 cloakbrowser 处理 + BROWSER_EMULATION: str = "cloakbrowser" + # CloakBrowser 是否启用拟人化输入 + CLOAKBROWSER_HUMANIZE: bool = True + # CloakBrowser 拟人化输入预设:default 或 careful + CLOAKBROWSER_HUMAN_PRESET: str = "default" # FlareSolverr 服务地址,例如 http://127.0.0.1:8191 FLARESOLVERR_URL: Optional[str] = None @@ -526,7 +530,7 @@ class ConfigModel(BaseModel): # ==================== Docker配置 ==================== # Docker Client API地址 DOCKER_CLIENT_API: Optional[str] = "tcp://127.0.0.1:38379" - # Playwright浏览器类型,chromium/firefox + # Playwright浏览器类型,供智能体浏览器工具和插件直接使用 Playwright 时读取 PLAYWRIGHT_BROWSER_TYPE: str = "chromium" # ==================== AI智能体配置 ==================== diff --git a/app/helper/browser.py b/app/helper/browser.py index c9420d53..fd902d42 100644 --- a/app/helper/browser.py +++ b/app/helper/browser.py @@ -1,8 +1,7 @@ import uuid from typing import Callable, Any, Optional -from cf_clearance import sync_cf_retry, sync_stealth -from playwright.sync_api import sync_playwright, Page +from playwright.sync_api import BrowserContext, Page from app.core.config import settings from app.log import logger @@ -10,17 +9,33 @@ from app.utils.http import RequestUtils, cookie_parse class PlaywrightHelper: - def __init__(self, browser_type=settings.PLAYWRIGHT_BROWSER_TYPE): - self.browser_type = browser_type + def __init__(self, browser_type: Optional[str] = None, *args, **kwargs): + """ + 兼容旧的 PlaywrightHelper(browser_type=...) 构造方式。 + """ + self.browser_type = browser_type or settings.PLAYWRIGHT_BROWSER_TYPE @staticmethod - def __pass_cloudflare(url: str, page: Page) -> bool: + def __browser_emulation() -> str: """ - 尝试跳过cloudfare验证 + 当前浏览器仿真类型。 """ - sync_stealth(page, pure=True) - page.goto(url) - return sync_cf_retry(page)[0] + return (settings.BROWSER_EMULATION or "cloakbrowser").lower() + + @staticmethod + def __launch_cloakbrowser_context(headless: bool, + user_agent: Optional[str] = None, + proxies: Optional[dict] = None) -> BrowserContext: + """ + 启动 CloakBrowser 上下文。 + """ + from cloakbrowser import launch_context + + return launch_context(headless=headless, + proxy=proxies, + user_agent=user_agent, + humanize=settings.CLOAKBROWSER_HUMANIZE, + human_preset=settings.CLOAKBROWSER_HUMAN_PRESET) @staticmethod def __fs_cookie_str(cookies: list) -> str: @@ -148,51 +163,44 @@ class PlaywrightHelper: """ result = None try: - with sync_playwright() as playwright: - browser = None - context = None - page = None - try: - # 如果配置使用 FlareSolverr,先通过其获取清除后的 cookies 与 UA - fs_cookie_header = None - fs_ua = None - if settings.BROWSER_EMULATION == "flaresolverr": - solution = self.__flaresolverr_request(url=url, cookies=cookies, - proxy_config=proxies, timeout=timeout) - if solution: - fs_cookie_header = self.__fs_cookie_str(solution.get("cookies", [])) - fs_ua = solution.get("userAgent") + context = None + page = None + try: + # 如果配置使用 FlareSolverr,先通过其获取清除后的 cookies 与 UA + fs_cookie_header = None + fs_ua = None + if self.__browser_emulation() == "flaresolverr": + solution = self.__flaresolverr_request(url=url, cookies=cookies, + proxy_config=proxies, timeout=timeout) + if solution: + fs_cookie_header = self.__fs_cookie_str(solution.get("cookies", [])) + fs_ua = solution.get("userAgent") - browser = playwright[self.browser_type].launch(headless=headless) - context = browser.new_context(user_agent=fs_ua or ua, proxy=proxies) - page = context.new_page() + context = self.__launch_cloakbrowser_context(headless=headless, + user_agent=fs_ua or ua, + proxies=proxies) + page = context.new_page() - # 优先使用 FlareSolverr 返回,其次使用入参 - merged_cookie = fs_cookie_header or cookies - if merged_cookie: - page.set_extra_http_headers({"cookie": merged_cookie}) + # 优先使用 FlareSolverr 返回,其次使用入参 + merged_cookie = fs_cookie_header or cookies + if merged_cookie: + page.set_extra_http_headers({"cookie": merged_cookie}) - if settings.BROWSER_EMULATION == "playwright": - if not self.__pass_cloudflare(url, page): - logger.warn("cloudflare challenge fail!") - else: - page.goto(url) - page.wait_for_load_state("networkidle", timeout=timeout * 1000) + page.goto(url) + page.wait_for_load_state("networkidle", timeout=timeout * 1000) - # 回调函数 - result = callback(page) + # 回调函数 + result = callback(page) - except Exception as e: - logger.error(f"网页操作失败: {str(e)}") - finally: - if page: - page.close() - if context: - context.close() - if browser: - browser.close() + except Exception as e: + logger.error(f"网页操作失败: {str(e)}") + finally: + if page: + page.close() + if context: + context.close() except Exception as e: - logger.error(f"Playwright初始化失败: {str(e)}") + logger.error(f"CloakBrowser初始化失败: {str(e)}") return result @@ -213,7 +221,7 @@ class PlaywrightHelper: """ source = None # 如果配置为 FlareSolverr,则直接调用获取页面源码 - if settings.BROWSER_EMULATION == "flaresolverr": + if self.__browser_emulation() == "flaresolverr": try: solution = self.__flaresolverr_request(url=url, cookies=cookies, proxy_config=proxies, timeout=timeout) @@ -222,36 +230,32 @@ class PlaywrightHelper: except Exception as e: logger.error(f"FlareSolverr 获取源码失败: {str(e)}") try: - with sync_playwright() as playwright: - browser = None - context = None - page = None - try: - browser = playwright[self.browser_type].launch(headless=headless) - context = browser.new_context(user_agent=ua, proxy=proxies) - page = context.new_page() + context = None + page = None + try: + context = self.__launch_cloakbrowser_context(headless=headless, + user_agent=ua, + proxies=proxies) + page = context.new_page() - if cookies: - page.set_extra_http_headers({"cookie": cookies}) + if cookies: + page.set_extra_http_headers({"cookie": cookies}) - if not self.__pass_cloudflare(url, page): - logger.warn("cloudflare challenge fail!") - page.wait_for_load_state("networkidle", timeout=timeout * 1000) + page.goto(url) + page.wait_for_load_state("networkidle", timeout=timeout * 1000) - source = page.content() + source = page.content() - except Exception as e: - logger.error(f"获取网页源码失败: {str(e)}") - source = None - finally: - # 确保资源被正确清理 - if page: - page.close() - if context: - context.close() - if browser: - browser.close() + except Exception as e: + logger.error(f"获取网页源码失败: {str(e)}") + source = None + finally: + # 确保资源被正确清理 + if page: + page.close() + if context: + context.close() except Exception as e: - logger.error(f"Playwright初始化失败: {str(e)}") + logger.error(f"CloakBrowser初始化失败: {str(e)}") return source diff --git a/docker/Dockerfile b/docker/Dockerfile index 6bb9a39a..8ef5c4d2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -109,9 +109,8 @@ COPY --from=prepare_venv --chmod=777 ${VENV_PATH} ${VENV_PATH} COPY --from=prepare_venv /usr/local/bin/uv /usr/local/bin/uv COPY --from=prepare_venv /usr/local/bin/uv-pip-compat /usr/local/bin/uv-pip-compat -# playwright 环境 +# 浏览器运行依赖 RUN playwright install-deps chromium \ - && playwright install-deps firefox \ && apt-get autoremove -y \ && apt-get clean \ && rm -rf \ diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 23e2a2b3..0822a5a5 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -46,6 +46,7 @@ function load_config_from_app_env() { ["PROXY_HOST"]="" ["GITHUB_TOKEN"]="" ["MOVIEPILOT_AUTO_UPDATE"]="release" + ["BROWSER_EMULATION"]="cloakbrowser" # database ["DB_TYPE"]="sqlite" @@ -220,7 +221,7 @@ function graceful_exit() { # 插件依赖和主程序共用同一套 venv 时,历史安装记录可能已经污染环境, # 这里优先在真正拉起后端前做一次自愈,避免容器反复起不来。 function ensure_backend_runtime_dependencies() { - local probe_code="import alembic, fastapi, pydantic, pydantic_core, pydantic_settings, sqlalchemy, starlette, uvicorn; from pydantic import BaseModel, Field" + local probe_code="import alembic, cloakbrowser, fastapi, pydantic, pydantic_core, pydantic_settings, sqlalchemy, starlette, uvicorn; from pydantic import BaseModel, Field" INFO "→ 启动前检查后端核心依赖..." if "${VENV_PATH}/bin/python3" -c "${probe_code}" >/dev/null 2>&1; then @@ -327,12 +328,28 @@ chown -R moviepilot:moviepilot \ /var/log/nginx chown moviepilot:moviepilot /etc/hosts /tmp +# 启动前优先确认主运行环境仍然健康,避免插件依赖污染导致服务直接起不来。 +ensure_backend_runtime_dependencies + # 下载浏览器内核 -if [[ "$HTTPS_PROXY" =~ ^https?:// ]] || [[ "$HTTPS_PROXY" =~ ^https?:// ]] || [[ "$PROXY_HOST" =~ ^https?:// ]]; then - HTTPS_PROXY="${HTTPS_PROXY:-${https_proxy:-$PROXY_HOST}}" gosu moviepilot:moviepilot playwright install ${PLAYWRIGHT_BROWSER_TYPE:-chromium} -else - gosu moviepilot:moviepilot playwright install ${PLAYWRIGHT_BROWSER_TYPE:-chromium} -fi +function install_browser_kernel() { + local emulation="${BROWSER_EMULATION:-cloakbrowser}" + emulation="${emulation,,}" + local proxy="${HTTPS_PROXY:-${https_proxy:-$PROXY_HOST}}" + + if [ "${emulation}" != "cloakbrowser" ] && [ "${emulation}" != "flaresolverr" ] && [ -n "${emulation}" ]; then + WARN "浏览器仿真类型 ${emulation} 已按 CloakBrowser 处理。" + fi + + INFO "下载 CloakBrowser 浏览器内核" + if [[ "$proxy" =~ ^https?:// ]]; then + HTTPS_PROXY="$proxy" gosu moviepilot:moviepilot python -m cloakbrowser install + else + gosu moviepilot:moviepilot python -m cloakbrowser install + fi +} + +install_browser_kernel # 证书管理 source /app/docker/cert.sh @@ -358,9 +375,6 @@ fi # 设置后端服务权限掩码 umask "${UMASK}" -# 启动前优先确认主运行环境仍然健康,避免插件依赖污染导致服务直接起不来。 -ensure_backend_runtime_dependencies - # 清除非系统环境导入的变量,保证转移到 dumb-init 的时候,不会带入不必要的环境变量 INFO "准备为 Python 应用清理的非系统环境导入的变量..." if [ ${#VARS_SET_BY_SCRIPT[@]} -gt 0 ]; then diff --git a/docker/update.sh b/docker/update.sh index fb9fc1bc..359163be 100644 --- a/docker/update.sh +++ b/docker/update.sh @@ -80,6 +80,10 @@ function install_backend_and_download_resources() { cp /tmp/requirements.txt.backup /app/requirements.txt return 1 fi + INFO "正在更新 CloakBrowser 浏览器内核" + if ! ${VENV_PATH}/bin/python -m cloakbrowser install; then + WARN "CloakBrowser 浏览器内核更新失败,后续首次使用时可能重新下载" + fi INFO "依赖更新成功" else INFO "依赖无变化,跳过依赖更新" diff --git a/requirements.in b/requirements.in index cbfb7b6d..9ee73038 100644 --- a/requirements.in +++ b/requirements.in @@ -38,8 +38,8 @@ pillow~=12.1.1 pillow-avif-plugin~=1.5.2 pyTelegramBotAPI~=4.27.0 telegramify-markdown~=0.5.2 +cloakbrowser~=0.3.28 playwright~=1.53.0 -cf_clearance~=0.31.0 torrentool~=1.2.0 slack-bolt~=1.23.0 slack-sdk~=3.35.0 diff --git a/scripts/local_setup.py b/scripts/local_setup.py index 02ddb428..fbfca63e 100644 --- a/scripts/local_setup.py +++ b/scripts/local_setup.py @@ -2653,9 +2653,18 @@ def install_deps(*, python_bin: str, venv_dir: Path, recreate: bool) -> Path: print_step("安装项目依赖") run([str(venv_pip), "install", "-r", str(ROOT / "requirements.txt")]) + install_browser_runtime(venv_python) return venv_python +def install_browser_runtime(venv_python: Path) -> None: + """ + 预下载 CloakBrowser 浏览器内核,避免首次仿真登录时才拉取大文件。 + """ + print_step("安装 CloakBrowser 浏览器内核") + run([str(venv_python), "-m", "cloakbrowser", "install"]) + + def _startup_platform_name() -> str: system = platform.system() if system == "Darwin": diff --git a/tests/test_browser_helper.py b/tests/test_browser_helper.py new file mode 100644 index 00000000..196aa37b --- /dev/null +++ b/tests/test_browser_helper.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import unittest +from unittest.mock import patch + +from app.helper.browser import PlaywrightHelper + + +class _FakePage: + def __init__(self) -> None: + self.headers = None + self.loaded_url = None + self.closed = False + + def set_extra_http_headers(self, headers: dict[str, str]) -> None: + self.headers = headers + + def goto(self, url: str) -> None: + self.loaded_url = url + + def wait_for_load_state(self, _state: str, timeout: int) -> None: + self.timeout = timeout + + def content(self) -> str: + return "ok" + + def close(self) -> None: + self.closed = True + + +class _FakeContext: + def __init__(self, page: _FakePage) -> None: + self.page = page + self.closed = False + + def new_page(self) -> _FakePage: + return self.page + + def close(self) -> None: + self.closed = True + + +class BrowserHelperTests(unittest.TestCase): + def _assert_get_page_source_uses_cloakbrowser(self, emulation: str) -> None: + page = _FakePage() + context = _FakeContext(page) + + with patch("app.helper.browser.settings.BROWSER_EMULATION", emulation), \ + patch.object( + PlaywrightHelper, + "_PlaywrightHelper__launch_cloakbrowser_context", + return_value=context, + ) as launch_context: + source = PlaywrightHelper().get_page_source( + url="https://example.com", + cookies="uid=1", + ua="UA", + timeout=3, + ) + + self.assertEqual(source, "ok") + launch_context.assert_called_once_with( + headless=False, + user_agent="UA", + proxies=None, + ) + self.assertEqual(page.headers, {"cookie": "uid=1"}) + self.assertEqual(page.loaded_url, "https://example.com") + self.assertTrue(page.closed) + self.assertTrue(context.closed) + + def test_default_emulation_uses_cloakbrowser_context(self): + self._assert_get_page_source_uses_cloakbrowser("cloakbrowser") + + def test_legacy_playwright_emulation_uses_cloakbrowser_context(self): + self._assert_get_page_source_uses_cloakbrowser("Playwright") + + def test_legacy_browser_type_constructor_is_accepted(self): + page = _FakePage() + context = _FakeContext(page) + + with patch.object( + PlaywrightHelper, + "_PlaywrightHelper__launch_cloakbrowser_context", + return_value=context, + ): + source = PlaywrightHelper(browser_type="firefox").get_page_source( + url="https://example.com" + ) + + self.assertEqual(source, "ok") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_local_setup_config_dir.py b/tests/test_local_setup_config_dir.py index 9a7ae258..bbdd0a2d 100644 --- a/tests/test_local_setup_config_dir.py +++ b/tests/test_local_setup_config_dir.py @@ -1,6 +1,7 @@ from __future__ import annotations import importlib.util +import tempfile import unittest import uuid from pathlib import Path @@ -58,6 +59,35 @@ class LocalSetupConfigDirTests(unittest.TestCase): self.assertIsNone(result) prompt_mock.assert_not_called() + def test_install_deps_installs_browser_runtime(self): + module = load_local_setup_module() + + with tempfile.TemporaryDirectory() as temp_dir: + venv_dir = (Path(temp_dir) / "venv").resolve() + venv_python = venv_dir / "bin" / "python" + venv_pip = venv_dir / "bin" / "pip" + + with patch.object(module, "ensure_supported_python"), \ + patch.object( + module, + "configure_venv_pip_compat", + return_value=venv_pip, + ), \ + patch.object(module, "run") as run_mock, \ + patch.object(module, "install_browser_runtime") as install_browser: + result = module.install_deps( + python_bin="python3", + venv_dir=venv_dir, + recreate=False, + ) + + self.assertEqual(result, venv_python) + run_mock.assert_any_call(["python3", "-m", "venv", str(venv_dir)]) + run_mock.assert_any_call( + [str(venv_pip), "install", "-r", str(module.ROOT / "requirements.txt")] + ) + install_browser.assert_called_once_with(venv_python) + if __name__ == "__main__": unittest.main()