Files
archived-MoviePilot/app/helper/browser.py

326 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import uuid
from typing import Callable, Any, Optional, Protocol
from app.core.config import settings
from app.log import logger
from app.utils.http import RequestUtils, cookie_parse
class BrowserElement(Protocol):
"""
页面元素的最小接口,避免为了类型标注直接导入 Playwright。
"""
def is_visible(self) -> bool:
...
def fill(self, value: str) -> None:
...
class BrowserContext(Protocol):
"""
CloakBrowser 返回的上下文只需要满足这些能力即可。
"""
def new_page(self) -> "BrowserPage":
...
def cookies(self) -> list[dict[str, Any]]:
...
def close(self) -> None:
...
class BrowserPage(Protocol):
"""
CloakBrowser 页面对象的最小接口,覆盖 helper 和登录流程当前用到的方法。
"""
context: BrowserContext
url: str
def set_extra_http_headers(self, headers: dict[str, str]) -> None:
...
def goto(self, url: str, *args: Any, **kwargs: Any) -> Any:
...
def wait_for_load_state(self, state: str, *args: Any, **kwargs: Any) -> Any:
...
def wait_for_selector(self, selector: str, *args: Any, **kwargs: Any) -> Any:
...
def fill(self, selector: str, value: str, *args: Any, **kwargs: Any) -> Any:
...
def click(self, selector: str, *args: Any, **kwargs: Any) -> Any:
...
def query_selector(self, selector: str) -> Optional[BrowserElement]:
...
def content(self) -> str:
...
def evaluate(self, expression: str, *args: Any, **kwargs: Any) -> Any:
...
def close(self) -> None:
...
class PlaywrightHelper:
def __init__(self, browser_type: Optional[str] = None, *args, **kwargs):
"""
兼容旧的 PlaywrightHelper(browser_type=...) 构造方式。
"""
self.browser_type = browser_type or settings.PLAYWRIGHT_BROWSER_TYPE
@staticmethod
def __browser_emulation() -> str:
"""
当前浏览器仿真类型。
"""
return (settings.BROWSER_EMULATION or "cloakbrowser").lower()
@staticmethod
def __launch_cloakbrowser_context(headless: bool,
user_agent: Optional[str] = None,
proxies: Optional[dict] = None) -> BrowserContext:
"""
启动 CloakBrowser 上下文。
"""
from cloakbrowser import launch_context
return launch_context(headless=headless,
proxy=proxies,
user_agent=user_agent,
humanize=settings.CLOAKBROWSER_HUMANIZE,
human_preset=settings.CLOAKBROWSER_HUMAN_PRESET)
@staticmethod
def __fs_cookie_str(cookies: list) -> str:
if not cookies:
return ""
return "; ".join([f"{c.get('name')}={c.get('value')}" for c in cookies if c and c.get('name') is not None])
@staticmethod
def __flaresolverr_request(url: str,
cookies: Optional[str] = None,
proxy_config: Optional[dict] = None,
timeout: Optional[int] = 60) -> Optional[dict]:
"""
调用 FlareSolverr 解决 Cloudflare 并返回 solution 结果
参考: https://github.com/FlareSolverr/FlareSolverr
"""
if not settings.FLARESOLVERR_URL:
logger.warn("未配置 FLARESOLVERR_URL无法使用 FlareSolverr")
return None
fs_api = settings.FLARESOLVERR_URL.rstrip("/") + "/v1"
session_id = None
try:
# 检查是否需要代理认证
need_proxy_auth = (proxy_config and proxy_config.get("server") and
(proxy_config.get("username") or proxy_config.get("password")))
if need_proxy_auth:
# 使用 session 模式支持代理认证
logger.debug("检测到flaresolverr代理需要认证使用 session 模式")
# 1. 创建会话
session_id = str(uuid.uuid4())
create_payload: dict = {
"cmd": "sessions.create",
"session": session_id
}
# 添加代理配置到会话创建请求
if proxy_config and proxy_config.get("server"):
proxy_payload: dict = {"url": proxy_config["server"]}
if proxy_config.get("username"):
proxy_payload["username"] = proxy_config["username"]
if proxy_config.get("password"):
proxy_payload["password"] = proxy_config["password"]
create_payload["proxy"] = proxy_payload
# 创建会话
create_result = RequestUtils(content_type="application/json",
timeout=timeout or 60).post_json(url=fs_api, json=create_payload)
if not create_result or create_result.get("status") != "ok":
logger.error(
f"创建 FlareSolverr 会话失败: {create_result.get('message') if create_result else '无响应'}")
return None
# 2. 使用会话发送请求
request_payload = {
"cmd": "request.get",
"url": url,
"session": session_id,
"maxTimeout": int(timeout or 60) * 1000,
}
else:
# 使用普通模式(无代理认证)
request_payload = {
"cmd": "request.get",
"url": url,
"maxTimeout": int(timeout or 60) * 1000,
}
# 添加代理配置(仅 URL无认证
if proxy_config and proxy_config.get("server"):
request_payload["proxy"] = {"url": proxy_config["server"]}
# 将 cookies 以数组形式传递给 FlareSolverr
if cookies:
try:
request_payload["cookies"] = cookie_parse(cookies, array=True)
except Exception as e:
logger.debug(f"解析 cookies 失败,忽略: {str(e)}")
# 发送请求
data = RequestUtils(content_type="application/json",
timeout=timeout or 60).post_json(url=fs_api, json=request_payload)
if not data:
logger.error("FlareSolverr 返回空响应")
return None
if data.get("status") != "ok":
logger.error(f"FlareSolverr 调用失败: {data.get('message')}")
return None
return data.get("solution")
except Exception as e:
logger.error(f"调用 FlareSolverr 失败: {str(e)}")
return None
finally:
# 清理会话
if session_id:
try:
destroy_payload = {
"cmd": "sessions.destroy",
"session": session_id
}
RequestUtils(content_type="application/json",
timeout=10).post_json(url=fs_api, json=destroy_payload)
logger.debug(f"已清理 FlareSolverr 会话: {session_id}")
except Exception as e:
logger.warning(f"清理 FlareSolverr 会话失败: {str(e)}")
def action(self, url: str,
callback: Callable[[BrowserPage], Any],
cookies: Optional[str] = None,
ua: Optional[str] = None,
proxies: Optional[dict] = None,
headless: Optional[bool] = False,
timeout: Optional[int] = 60) -> Any:
"""
访问网页接收Page对象并执行操作
:param url: 网页地址
:param callback: 回调函数需要接收page对象
:param cookies: cookies
:param ua: user-agent
:param proxies: 代理
:param headless: 是否无头模式
:param timeout: 超时时间
"""
result = None
try:
context = None
page = None
try:
# 如果配置使用 FlareSolverr先通过其获取清除后的 cookies 与 UA
fs_cookie_header = None
fs_ua = None
if self.__browser_emulation() == "flaresolverr":
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_config=proxies, timeout=timeout)
if solution:
fs_cookie_header = self.__fs_cookie_str(solution.get("cookies", []))
fs_ua = solution.get("userAgent")
context = self.__launch_cloakbrowser_context(headless=headless,
user_agent=fs_ua or ua,
proxies=proxies)
page = context.new_page()
# 优先使用 FlareSolverr 返回,其次使用入参
merged_cookie = fs_cookie_header or cookies
if merged_cookie:
page.set_extra_http_headers({"cookie": merged_cookie})
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
# 回调函数
result = callback(page)
except Exception as e:
logger.error(f"网页操作失败: {str(e)}")
finally:
if page:
page.close()
if context:
context.close()
except Exception as e:
logger.error(f"CloakBrowser初始化失败: {str(e)}")
return result
def get_page_source(self, url: str,
cookies: Optional[str] = None,
ua: Optional[str] = None,
proxies: Optional[dict] = None,
headless: Optional[bool] = False,
timeout: Optional[int] = 60) -> Optional[str]:
"""
获取网页源码
:param url: 网页地址
:param cookies: cookies
:param ua: user-agent
:param proxies: 代理
:param headless: 是否无头模式
:param timeout: 超时时间
"""
source = None
# 如果配置为 FlareSolverr则直接调用获取页面源码
if self.__browser_emulation() == "flaresolverr":
try:
solution = self.__flaresolverr_request(url=url, cookies=cookies,
proxy_config=proxies, timeout=timeout)
if solution:
return solution.get("response")
except Exception as e:
logger.error(f"FlareSolverr 获取源码失败: {str(e)}")
try:
context = None
page = None
try:
context = self.__launch_cloakbrowser_context(headless=headless,
user_agent=ua,
proxies=proxies)
page = context.new_page()
if cookies:
page.set_extra_http_headers({"cookie": cookies})
page.goto(url)
page.wait_for_load_state("networkidle", timeout=timeout * 1000)
source = page.content()
except Exception as e:
logger.error(f"获取网页源码失败: {str(e)}")
source = None
finally:
# 确保资源被正确清理
if page:
page.close()
if context:
context.close()
except Exception as e:
logger.error(f"CloakBrowser初始化失败: {str(e)}")
return source