feat(browser): migrate to CloakBrowser for browser emulation and streamline dependency management

- Replace Playwright-based browser emulation with CloakBrowser as default
- Update config to support CloakBrowser options and humanization presets
- Refactor browser helper to use CloakBrowser context and remove cf_clearance dependency
- Update Dockerfile, entrypoint, and update scripts to install CloakBrowser runtime
- Ensure CloakBrowser kernel is pre-installed during local setup and dependency updates
- Add tests for CloakBrowser integration and legacy compatibility
This commit is contained in:
jxxghp
2026-05-16 20:51:38 +08:00
parent 9069dccb2a
commit 0ee9fec1d2
10 changed files with 293 additions and 140 deletions

View File

@@ -198,68 +198,62 @@ class BrowseWebpageTool(MoviePilotTool):
cookies: Optional[str],
user_agent: Optional[str],
) -> str:
"""在同步上下文中执行 Playwright 浏览器操作"""
from playwright.sync_api import sync_playwright
"""在同步上下文中执行 CloakBrowser 浏览器操作"""
from cloakbrowser import launch_context
try:
with sync_playwright() as playwright:
browser = None
context = None
page = None
try:
# 启动浏览器
browser_type = settings.PLAYWRIGHT_BROWSER_TYPE or "chromium"
browser = playwright[browser_type].launch(headless=True)
# 创建上下文
context_kwargs = {}
if user_agent:
context_kwargs["user_agent"] = user_agent
# 设置视口大小
context_kwargs["viewport"] = {
context = None
page = None
try:
context_kwargs = {
"viewport": {
"width": SCREENSHOT_MAX_WIDTH,
"height": SCREENSHOT_MAX_HEIGHT,
}
}
if user_agent:
context_kwargs["user_agent"] = user_agent
context = browser.new_context(**context_kwargs)
page = context.new_page()
page.set_default_timeout(timeout * 1000)
context = launch_context(
headless=True,
humanize=settings.CLOAKBROWSER_HUMANIZE,
human_preset=settings.CLOAKBROWSER_HUMAN_PRESET,
**context_kwargs,
)
page = context.new_page()
page.set_default_timeout(timeout * 1000)
# 设置 cookies
if cookies:
page.set_extra_http_headers({"cookie": cookies})
# 设置 cookies
if cookies:
page.set_extra_http_headers({"cookie": cookies})
# 对于非 goto 操作,如果提供了 url 先导航
if url and browser_action != BrowserAction.GOTO:
page.goto(
url, wait_until="domcontentloaded", timeout=timeout * 1000
)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
# 对于非 goto 操作,如果提供了 url 先导航
if url and browser_action != BrowserAction.GOTO:
page.goto(url, wait_until="domcontentloaded", timeout=timeout * 1000)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
# 执行具体操作
result = self._do_action(
page,
browser_action,
url,
selector,
value,
script,
content_type,
timeout,
)
return result
# 执行具体操作
result = self._do_action(
page,
browser_action,
url,
selector,
value,
script,
content_type,
timeout,
)
return result
finally:
if page:
page.close()
if context:
context.close()
if browser:
browser.close()
finally:
if page:
page.close()
if context:
context.close()
except Exception as e:
logger.error(f"Playwright 执行失败: {e}", exc_info=True)
return f"Playwright 执行失败: {str(e)}"
logger.error(f"CloakBrowser 执行失败: {e}", exc_info=True)
return f"CloakBrowser 执行失败: {str(e)}"
def _do_action(
self,

View File

@@ -331,8 +331,12 @@ class ConfigModel(BaseModel):
NO_CACHE_SITE_KEY: str = "m-team"
# OCR服务器地址用于识别站点验证码
OCR_HOST: str = "https://movie-pilot.org"
# 仿真类型:playwright 或 flaresolverr
BROWSER_EMULATION: str = "playwright"
# 仿真类型:cloakbrowser 或 flaresolverr,其他值按 cloakbrowser 处理
BROWSER_EMULATION: str = "cloakbrowser"
# CloakBrowser 是否启用拟人化输入
CLOAKBROWSER_HUMANIZE: bool = True
# CloakBrowser 拟人化输入预设default 或 careful
CLOAKBROWSER_HUMAN_PRESET: str = "default"
# FlareSolverr 服务地址,例如 http://127.0.0.1:8191
FLARESOLVERR_URL: Optional[str] = None
@@ -526,7 +530,7 @@ class ConfigModel(BaseModel):
# ==================== Docker配置 ====================
# Docker Client API地址
DOCKER_CLIENT_API: Optional[str] = "tcp://127.0.0.1:38379"
# Playwright浏览器类型chromium/firefox
# Playwright浏览器类型供智能体浏览器工具和插件直接使用 Playwright 时读取
PLAYWRIGHT_BROWSER_TYPE: str = "chromium"
# ==================== AI智能体配置 ====================

View File

@@ -1,8 +1,7 @@
import uuid
from typing import Callable, Any, Optional
from cf_clearance import sync_cf_retry, sync_stealth
from playwright.sync_api import sync_playwright, Page
from playwright.sync_api import BrowserContext, Page
from app.core.config import settings
from app.log import logger
@@ -10,17 +9,33 @@ from app.utils.http import RequestUtils, cookie_parse
class PlaywrightHelper:
def __init__(self, browser_type=settings.PLAYWRIGHT_BROWSER_TYPE):
self.browser_type = browser_type
def __init__(self, browser_type: Optional[str] = None, *args, **kwargs):
"""
兼容旧的 PlaywrightHelper(browser_type=...) 构造方式。
"""
self.browser_type = browser_type or settings.PLAYWRIGHT_BROWSER_TYPE
@staticmethod
def __pass_cloudflare(url: str, page: Page) -> bool:
def __browser_emulation() -> str:
"""
尝试跳过cloudfare验证
当前浏览器仿真类型。
"""
sync_stealth(page, pure=True)
page.goto(url)
return sync_cf_retry(page)[0]
return (settings.BROWSER_EMULATION or "cloakbrowser").lower()
@staticmethod
def __launch_cloakbrowser_context(headless: bool,
user_agent: Optional[str] = None,
proxies: Optional[dict] = None) -> BrowserContext:
"""
启动 CloakBrowser 上下文。
"""
from cloakbrowser import launch_context
return launch_context(headless=headless,
proxy=proxies,
user_agent=user_agent,
humanize=settings.CLOAKBROWSER_HUMANIZE,
human_preset=settings.CLOAKBROWSER_HUMAN_PRESET)
@staticmethod
def __fs_cookie_str(cookies: list) -> str:
@@ -148,51 +163,44 @@ class PlaywrightHelper:
"""
result = None
try:
with sync_playwright() as playwright:
browser = None
context = None
page = None
try:
# 如果配置使用 FlareSolverr先通过其获取清除后的 cookies 与 UA
fs_cookie_header = None
fs_ua = None
if settings.BROWSER_EMULATION == "flaresolverr":
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_config=proxies, timeout=timeout)
if solution:
fs_cookie_header = self.__fs_cookie_str(solution.get("cookies", []))
fs_ua = solution.get("userAgent")
context = None
page = None
try:
# 如果配置使用 FlareSolverr先通过其获取清除后的 cookies 与 UA
fs_cookie_header = None
fs_ua = None
if self.__browser_emulation() == "flaresolverr":
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_config=proxies, timeout=timeout)
if solution:
fs_cookie_header = self.__fs_cookie_str(solution.get("cookies", []))
fs_ua = solution.get("userAgent")
browser = playwright[self.browser_type].launch(headless=headless)
context = browser.new_context(user_agent=fs_ua or ua, proxy=proxies)
page = context.new_page()
context = self.__launch_cloakbrowser_context(headless=headless,
user_agent=fs_ua or ua,
proxies=proxies)
page = context.new_page()
# 优先使用 FlareSolverr 返回,其次使用入参
merged_cookie = fs_cookie_header or cookies
if merged_cookie:
page.set_extra_http_headers({"cookie": merged_cookie})
# 优先使用 FlareSolverr 返回,其次使用入参
merged_cookie = fs_cookie_header or cookies
if merged_cookie:
page.set_extra_http_headers({"cookie": merged_cookie})
if settings.BROWSER_EMULATION == "playwright":
if not self.__pass_cloudflare(url, page):
logger.warn("cloudflare challenge fail")
else:
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
# 回调函数
result = callback(page)
# 回调函数
result = callback(page)
except Exception as e:
logger.error(f"网页操作失败: {str(e)}")
finally:
if page:
page.close()
if context:
context.close()
if browser:
browser.close()
except Exception as e:
logger.error(f"网页操作失败: {str(e)}")
finally:
if page:
page.close()
if context:
context.close()
except Exception as e:
logger.error(f"Playwright初始化失败: {str(e)}")
logger.error(f"CloakBrowser初始化失败: {str(e)}")
return result
@@ -213,7 +221,7 @@ class PlaywrightHelper:
"""
source = None
# 如果配置为 FlareSolverr则直接调用获取页面源码
if settings.BROWSER_EMULATION == "flaresolverr":
if self.__browser_emulation() == "flaresolverr":
try:
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_config=proxies, timeout=timeout)
@@ -222,36 +230,32 @@ class PlaywrightHelper:
except Exception as e:
logger.error(f"FlareSolverr 获取源码失败: {str(e)}")
try:
with sync_playwright() as playwright:
browser = None
context = None
page = None
try:
browser = playwright[self.browser_type].launch(headless=headless)
context = browser.new_context(user_agent=ua, proxy=proxies)
page = context.new_page()
context = None
page = None
try:
context = self.__launch_cloakbrowser_context(headless=headless,
user_agent=ua,
proxies=proxies)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
if cookies:
page.set_extra_http_headers({"cookie": cookies})
if not self.__pass_cloudflare(url, page):
logger.warn("cloudflare challenge fail")
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
source = page.content()
source = page.content()
except Exception as e:
logger.error(f"获取网页源码失败: {str(e)}")
source = None
finally:
# 确保资源被正确清理
if page:
page.close()
if context:
context.close()
if browser:
browser.close()
except Exception as e:
logger.error(f"获取网页源码失败: {str(e)}")
source = None
finally:
# 确保资源被正确清理
if page:
page.close()
if context:
context.close()
except Exception as e:
logger.error(f"Playwright初始化失败: {str(e)}")
logger.error(f"CloakBrowser初始化失败: {str(e)}")
return source

View File

@@ -109,9 +109,8 @@ COPY --from=prepare_venv --chmod=777 ${VENV_PATH} ${VENV_PATH}
COPY --from=prepare_venv /usr/local/bin/uv /usr/local/bin/uv
COPY --from=prepare_venv /usr/local/bin/uv-pip-compat /usr/local/bin/uv-pip-compat
# playwright 环境
# 浏览器运行依赖
RUN playwright install-deps chromium \
&& playwright install-deps firefox \
&& apt-get autoremove -y \
&& apt-get clean \
&& rm -rf \

View File

@@ -46,6 +46,7 @@ function load_config_from_app_env() {
["PROXY_HOST"]=""
["GITHUB_TOKEN"]=""
["MOVIEPILOT_AUTO_UPDATE"]="release"
["BROWSER_EMULATION"]="cloakbrowser"
# database
["DB_TYPE"]="sqlite"
@@ -220,7 +221,7 @@ function graceful_exit() {
# 插件依赖和主程序共用同一套 venv 时,历史安装记录可能已经污染环境,
# 这里优先在真正拉起后端前做一次自愈,避免容器反复起不来。
function ensure_backend_runtime_dependencies() {
local probe_code="import alembic, fastapi, pydantic, pydantic_core, pydantic_settings, sqlalchemy, starlette, uvicorn; from pydantic import BaseModel, Field"
local probe_code="import alembic, cloakbrowser, fastapi, pydantic, pydantic_core, pydantic_settings, sqlalchemy, starlette, uvicorn; from pydantic import BaseModel, Field"
INFO "→ 启动前检查后端核心依赖..."
if "${VENV_PATH}/bin/python3" -c "${probe_code}" >/dev/null 2>&1; then
@@ -327,12 +328,28 @@ chown -R moviepilot:moviepilot \
/var/log/nginx
chown moviepilot:moviepilot /etc/hosts /tmp
# 启动前优先确认主运行环境仍然健康,避免插件依赖污染导致服务直接起不来。
ensure_backend_runtime_dependencies
# 下载浏览器内核
if [[ "$HTTPS_PROXY" =~ ^https?:// ]] || [[ "$HTTPS_PROXY" =~ ^https?:// ]] || [[ "$PROXY_HOST" =~ ^https?:// ]]; then
HTTPS_PROXY="${HTTPS_PROXY:-${https_proxy:-$PROXY_HOST}}" gosu moviepilot:moviepilot playwright install ${PLAYWRIGHT_BROWSER_TYPE:-chromium}
else
gosu moviepilot:moviepilot playwright install ${PLAYWRIGHT_BROWSER_TYPE:-chromium}
fi
function install_browser_kernel() {
local emulation="${BROWSER_EMULATION:-cloakbrowser}"
emulation="${emulation,,}"
local proxy="${HTTPS_PROXY:-${https_proxy:-$PROXY_HOST}}"
if [ "${emulation}" != "cloakbrowser" ] && [ "${emulation}" != "flaresolverr" ] && [ -n "${emulation}" ]; then
WARN "浏览器仿真类型 ${emulation} 已按 CloakBrowser 处理。"
fi
INFO "下载 CloakBrowser 浏览器内核"
if [[ "$proxy" =~ ^https?:// ]]; then
HTTPS_PROXY="$proxy" gosu moviepilot:moviepilot python -m cloakbrowser install
else
gosu moviepilot:moviepilot python -m cloakbrowser install
fi
}
install_browser_kernel
# 证书管理
source /app/docker/cert.sh
@@ -358,9 +375,6 @@ fi
# 设置后端服务权限掩码
umask "${UMASK}"
# 启动前优先确认主运行环境仍然健康,避免插件依赖污染导致服务直接起不来。
ensure_backend_runtime_dependencies
# 清除非系统环境导入的变量,保证转移到 dumb-init 的时候,不会带入不必要的环境变量
INFO "准备为 Python 应用清理的非系统环境导入的变量..."
if [ ${#VARS_SET_BY_SCRIPT[@]} -gt 0 ]; then

View File

@@ -80,6 +80,10 @@ function install_backend_and_download_resources() {
cp /tmp/requirements.txt.backup /app/requirements.txt
return 1
fi
INFO "正在更新 CloakBrowser 浏览器内核"
if ! ${VENV_PATH}/bin/python -m cloakbrowser install; then
WARN "CloakBrowser 浏览器内核更新失败,后续首次使用时可能重新下载"
fi
INFO "依赖更新成功"
else
INFO "依赖无变化,跳过依赖更新"

View File

@@ -38,8 +38,8 @@ pillow~=12.1.1
pillow-avif-plugin~=1.5.2
pyTelegramBotAPI~=4.27.0
telegramify-markdown~=0.5.2
cloakbrowser~=0.3.28
playwright~=1.53.0
cf_clearance~=0.31.0
torrentool~=1.2.0
slack-bolt~=1.23.0
slack-sdk~=3.35.0

View File

@@ -2653,9 +2653,18 @@ def install_deps(*, python_bin: str, venv_dir: Path, recreate: bool) -> Path:
print_step("安装项目依赖")
run([str(venv_pip), "install", "-r", str(ROOT / "requirements.txt")])
install_browser_runtime(venv_python)
return venv_python
def install_browser_runtime(venv_python: Path) -> None:
"""
预下载 CloakBrowser 浏览器内核,避免首次仿真登录时才拉取大文件。
"""
print_step("安装 CloakBrowser 浏览器内核")
run([str(venv_python), "-m", "cloakbrowser", "install"])
def _startup_platform_name() -> str:
system = platform.system()
if system == "Darwin":

View File

@@ -0,0 +1,95 @@
from __future__ import annotations
import unittest
from unittest.mock import patch
from app.helper.browser import PlaywrightHelper
class _FakePage:
def __init__(self) -> None:
self.headers = None
self.loaded_url = None
self.closed = False
def set_extra_http_headers(self, headers: dict[str, str]) -> None:
self.headers = headers
def goto(self, url: str) -> None:
self.loaded_url = url
def wait_for_load_state(self, _state: str, timeout: int) -> None:
self.timeout = timeout
def content(self) -> str:
return "<html>ok</html>"
def close(self) -> None:
self.closed = True
class _FakeContext:
def __init__(self, page: _FakePage) -> None:
self.page = page
self.closed = False
def new_page(self) -> _FakePage:
return self.page
def close(self) -> None:
self.closed = True
class BrowserHelperTests(unittest.TestCase):
def _assert_get_page_source_uses_cloakbrowser(self, emulation: str) -> None:
page = _FakePage()
context = _FakeContext(page)
with patch("app.helper.browser.settings.BROWSER_EMULATION", emulation), \
patch.object(
PlaywrightHelper,
"_PlaywrightHelper__launch_cloakbrowser_context",
return_value=context,
) as launch_context:
source = PlaywrightHelper().get_page_source(
url="https://example.com",
cookies="uid=1",
ua="UA",
timeout=3,
)
self.assertEqual(source, "<html>ok</html>")
launch_context.assert_called_once_with(
headless=False,
user_agent="UA",
proxies=None,
)
self.assertEqual(page.headers, {"cookie": "uid=1"})
self.assertEqual(page.loaded_url, "https://example.com")
self.assertTrue(page.closed)
self.assertTrue(context.closed)
def test_default_emulation_uses_cloakbrowser_context(self):
self._assert_get_page_source_uses_cloakbrowser("cloakbrowser")
def test_legacy_playwright_emulation_uses_cloakbrowser_context(self):
self._assert_get_page_source_uses_cloakbrowser("Playwright")
def test_legacy_browser_type_constructor_is_accepted(self):
page = _FakePage()
context = _FakeContext(page)
with patch.object(
PlaywrightHelper,
"_PlaywrightHelper__launch_cloakbrowser_context",
return_value=context,
):
source = PlaywrightHelper(browser_type="firefox").get_page_source(
url="https://example.com"
)
self.assertEqual(source, "<html>ok</html>")
if __name__ == "__main__":
unittest.main()

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import importlib.util
import tempfile
import unittest
import uuid
from pathlib import Path
@@ -58,6 +59,35 @@ class LocalSetupConfigDirTests(unittest.TestCase):
self.assertIsNone(result)
prompt_mock.assert_not_called()
def test_install_deps_installs_browser_runtime(self):
module = load_local_setup_module()
with tempfile.TemporaryDirectory() as temp_dir:
venv_dir = (Path(temp_dir) / "venv").resolve()
venv_python = venv_dir / "bin" / "python"
venv_pip = venv_dir / "bin" / "pip"
with patch.object(module, "ensure_supported_python"), \
patch.object(
module,
"configure_venv_pip_compat",
return_value=venv_pip,
), \
patch.object(module, "run") as run_mock, \
patch.object(module, "install_browser_runtime") as install_browser:
result = module.install_deps(
python_bin="python3",
venv_dir=venv_dir,
recreate=False,
)
self.assertEqual(result, venv_python)
run_mock.assert_any_call(["python3", "-m", "venv", str(venv_dir)])
run_mock.assert_any_call(
[str(venv_pip), "install", "-r", str(module.ROOT / "requirements.txt")]
)
install_browser.assert_called_once_with(venv_python)
if __name__ == "__main__":
unittest.main()