diff --git a/app/agent/__init__.py b/app/agent/__init__.py index 0db945ca..092ce8f1 100644 --- a/app/agent/__init__.py +++ b/app/agent/__init__.py @@ -169,6 +169,7 @@ class ReplyMode(str, Enum): HEARTBEAT_SESSION_PREFIX = "__agent_heartbeat_" +UNSUPPORTED_IMAGE_INPUT_MESSAGE = "当前模型不支持图片输入,请更换支持图片输入的模型,或在系统设置中关闭图片输入支持后重试。" class MoviePilotAgent: @@ -376,6 +377,92 @@ class MoviePilotAgent: return "".join(text_parts) return str(content) + @classmethod + def _has_image_input_content(cls, content: Any) -> bool: + """ + 检查消息内容里是否包含真正会发给模型的图片块。 + 结构化 JSON 文本里的 images 字段只是给 Agent 阅读的说明,不能作为图片输入判断。 + """ + if isinstance(content, list): + return any(cls._has_image_input_content(item) for item in content) + if not isinstance(content, dict): + return False + + block_type = str(content.get("type") or "").lower() + if block_type in {"image", "image_url", "input_image"}: + return True + if content.get("image_url") or content.get("image"): + return True + return any(cls._has_image_input_content(value) for value in content.values()) + + @classmethod + def _messages_have_image_input(cls, messages: List[BaseMessage]) -> bool: + """检查本轮提交给模型的消息列表中是否包含图片输入。""" + return any( + cls._has_image_input_content(getattr(message, "content", None)) + for message in messages or [] + ) + + @staticmethod + def _exception_detail_text(error: Exception) -> str: + """ + 提取异常对象里可用于匹配的文本。 + OpenAI 兼容端点的错误详情可能藏在 body/code/status_code 等属性中。 + """ + parts = [str(error)] + for attr in ("message", "code", "status_code"): + value = getattr(error, attr, None) + if value is not None: + parts.append(str(value)) + body = getattr(error, "body", None) + if body is not None: + try: + parts.append(json.dumps(body, ensure_ascii=False)) + except (TypeError, ValueError): + parts.append(str(body)) + return " ".join(part for part in parts if part) + + @classmethod + def _is_unsupported_image_input_error(cls, error: Exception) -> bool: + """ + 判断模型服务是否在拒绝图片输入。 + 兼容 OpenAI 及 OpenAI-compatible 服务常见的错误文案,避免把普通 404 当作图片能力问题。 + """ + detail = cls._exception_detail_text(error).lower() + if "no endpoints found that support image input" in detail: + return True + if "image input" not in detail and "images" not in detail: + return False + return any( + marker in detail + for marker in ( + "does not support", + "do not support", + "not support", + "not supported", + "unsupported", + "no endpoint", + "no endpoints", + ) + ) + + async def _dispatch_execution_notice(self, message: str) -> None: + """ + 将执行层可预期的失败转成用户可读提示。 + 按当前回复模式处理,避免后台捕获任务绕过 CAPTURE_ONLY 约束。 + """ + if not message: + return + self._emit_output(message) + if self._tool_context.get("user_reply_sent"): + return + + title = "MoviePilot助手" if self.is_background else "" + if self.should_dispatch_reply: + await self.send_agent_message(message, title=title) + elif self.persist_output_message: + await self._save_agent_message_to_db(message, title=title) + def _emit_output(self, text: str): """ 输出当前流式文本到外部回调。 @@ -741,6 +828,12 @@ class MoviePilotAgent: logger.info(f"Agent执行被取消: session_id={self.session_id}") return "任务已取消", {} except Exception as e: + if self._messages_have_image_input(messages) and self._is_unsupported_image_input_error(e): + logger.warning( + f"当前模型不支持图片输入,已向用户发送友好提示: {e}" + ) + await self._dispatch_execution_notice(UNSUPPORTED_IMAGE_INPUT_MESSAGE) + return UNSUPPORTED_IMAGE_INPUT_MESSAGE, {} logger.error(f"Agent执行失败: {e} - {traceback.format_exc()}") return str(e), {} finally: diff --git a/tests/test_agent_background_output.py b/tests/test_agent_background_output.py index 29d4bdb4..7bcb4730 100644 --- a/tests/test_agent_background_output.py +++ b/tests/test_agent_background_output.py @@ -2,13 +2,14 @@ import unittest from types import SimpleNamespace from unittest.mock import AsyncMock, patch -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, HumanMessage from app.agent import ( HEARTBEAT_SESSION_PREFIX, MoviePilotAgent, AgentManager, ReplyMode, + UNSUPPORTED_IMAGE_INPUT_MESSAGE, ) from app.agent.memory import memory_manager from app.core.config import settings @@ -31,6 +32,24 @@ class _FakeAgent: return _FakeGraphState(self._messages) +class _FakeFailingAgent: + def __init__(self, error): + self._error = error + + async def ainvoke(self, _payload, config=None): + raise self._error + + def get_state(self, _config): + return _FakeGraphState([]) + + +class _FakeStreamingFailingAgent(_FakeFailingAgent): + async def astream(self, _messages, **_kwargs): + raise self._error + # 保持 async generator 形态,避免测试替身变成普通 coroutine。 + yield None + + class AgentBackgroundOutputTest(unittest.IsolatedAsyncioTestCase): async def test_background_non_streaming_does_not_send_by_default(self): agent = MoviePilotAgent(session_id="bg-test", user_id="system") @@ -60,6 +79,80 @@ class AgentBackgroundOutputTest(unittest.IsolatedAsyncioTestCase): save_messages.assert_called_once() self.assertEqual("后台结果", agent._streamed_output) + async def test_non_streaming_image_unsupported_error_sends_friendly_notice(self): + agent = MoviePilotAgent(session_id="image-test", user_id="user-1") + agent.channel = "Telegram" + agent.source = "telegram-test" + agent._tool_context = {"user_reply_sent": False} + agent._streamed_output = "" + agent.stream_handler = SimpleNamespace( + stop_streaming=AsyncMock(return_value=(False, "")) + ) + agent._should_stream = lambda: False + agent._create_agent = AsyncMock( + return_value=_FakeFailingAgent( + RuntimeError("No endpoints found that support image input") + ) + ) + agent.send_agent_message = AsyncMock() + agent._save_agent_message_to_db = AsyncMock() + + result, _ = await agent._execute_agent( + [ + HumanMessage( + content=[ + {"type": "text", "text": "看看这张图"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,xxx"}}, + ] + ) + ] + ) + + self.assertEqual(UNSUPPORTED_IMAGE_INPUT_MESSAGE, result) + agent.send_agent_message.assert_awaited_once_with( + UNSUPPORTED_IMAGE_INPUT_MESSAGE, title="" + ) + agent._save_agent_message_to_db.assert_not_awaited() + self.assertEqual(UNSUPPORTED_IMAGE_INPUT_MESSAGE, agent._streamed_output) + + async def test_streaming_image_unsupported_error_sends_friendly_notice(self): + agent = MoviePilotAgent(session_id="image-test", user_id="user-1") + agent.channel = "Telegram" + agent.source = "telegram-test" + agent._tool_context = {"user_reply_sent": False} + agent._streamed_output = "" + agent.stream_handler = SimpleNamespace( + set_dispatch_policy=lambda allow_dispatch_without_context=False: None, + start_streaming=AsyncMock(), + flush_pending_tool_summary=lambda: "", + stop_streaming=AsyncMock(return_value=(False, "")), + ) + agent._should_stream = lambda: True + agent._create_agent = AsyncMock( + return_value=_FakeStreamingFailingAgent( + RuntimeError("Error code: 404 - No endpoints found that support image input") + ) + ) + agent.send_agent_message = AsyncMock() + agent._save_agent_message_to_db = AsyncMock() + + result, _ = await agent._execute_agent( + [ + HumanMessage( + content=[ + {"type": "text", "text": "看看这张图"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,xxx"}}, + ] + ) + ] + ) + + self.assertEqual(UNSUPPORTED_IMAGE_INPUT_MESSAGE, result) + agent.send_agent_message.assert_awaited_once_with( + UNSUPPORTED_IMAGE_INPUT_MESSAGE, title="" + ) + agent._save_agent_message_to_db.assert_not_awaited() + async def test_background_non_streaming_sends_when_reply_mode_dispatch(self): agent = MoviePilotAgent(session_id="bg-test", user_id="system") agent.channel = None