feat: add user-friendly handling for unsupported image input errors in agent execution

2026-05-31 23:16:48 +00:00 · 2026-05-14 20:36:14 +08:00
parent 7b27b7fd16
commit 23784f614b
2 changed files with 187 additions and 1 deletions
--- a/app/agent/init.py
+++ b/app/agent/init.py
@@ -169,6 +169,7 @@ class ReplyMode(str, Enum):


 HEARTBEAT_SESSION_PREFIX = "__agent_heartbeat_"
+UNSUPPORTED_IMAGE_INPUT_MESSAGE = "当前模型不支持图片输入，请更换支持图片输入的模型，或在系统设置中关闭图片输入支持后重试。"


 class MoviePilotAgent:
@@ -376,6 +377,92 @@ class MoviePilotAgent:
            return "".join(text_parts)
        return str(content)

+    @classmethod
+    def _has_image_input_content(cls, content: Any) -> bool:
+        """
+        检查消息内容里是否包含真正会发给模型的图片块。
+        结构化 JSON 文本里的 images 字段只是给 Agent 阅读的说明，不能作为图片输入判断。
+        """
+        if isinstance(content, list):
+            return any(cls._has_image_input_content(item) for item in content)
+        if not isinstance(content, dict):
+            return False
+
+        block_type = str(content.get("type") or "").lower()
+        if block_type in {"image", "image_url", "input_image"}:
+            return True
+        if content.get("image_url") or content.get("image"):
+            return True
+        return any(cls._has_image_input_content(value) for value in content.values())
+
+    @classmethod
+    def _messages_have_image_input(cls, messages: List[BaseMessage]) -> bool:
+        """检查本轮提交给模型的消息列表中是否包含图片输入。"""
+        return any(
+            cls._has_image_input_content(getattr(message, "content", None))
+            for message in messages or []
+        )
+
+    @staticmethod
+    def _exception_detail_text(error: Exception) -> str:
+        """
+        提取异常对象里可用于匹配的文本。
+        OpenAI 兼容端点的错误详情可能藏在 body/code/status_code 等属性中。
+        """
+        parts = [str(error)]
+        for attr in ("message", "code", "status_code"):
+            value = getattr(error, attr, None)
+            if value is not None:
+                parts.append(str(value))
+        body = getattr(error, "body", None)
+        if body is not None:
+            try:
+                parts.append(json.dumps(body, ensure_ascii=False))
+            except (TypeError, ValueError):
+                parts.append(str(body))
+        return " ".join(part for part in parts if part)
+
+    @classmethod
+    def _is_unsupported_image_input_error(cls, error: Exception) -> bool:
+        """
+        判断模型服务是否在拒绝图片输入。
+        兼容 OpenAI 及 OpenAI-compatible 服务常见的错误文案，避免把普通 404 当作图片能力问题。
+        """
+        detail = cls._exception_detail_text(error).lower()
+        if "no endpoints found that support image input" in detail:
+            return True
+        if "image input" not in detail and "images" not in detail:
+            return False
+        return any(
+            marker in detail
+            for marker in (
+                "does not support",
+                "do not support",
+                "not support",
+                "not supported",
+                "unsupported",
+                "no endpoint",
+                "no endpoints",
+            )
+        )
+
+    async def _dispatch_execution_notice(self, message: str) -> None:
+        """
+        将执行层可预期的失败转成用户可读提示。
+        按当前回复模式处理，避免后台捕获任务绕过 CAPTURE_ONLY 约束。
+        """
+        if not message:
+            return
+        self._emit_output(message)
+        if self._tool_context.get("user_reply_sent"):
+            return
+
+        title = "MoviePilot助手" if self.is_background else ""
+        if self.should_dispatch_reply:
+            await self.send_agent_message(message, title=title)
+        elif self.persist_output_message:
+            await self._save_agent_message_to_db(message, title=title)
+
    def _emit_output(self, text: str):
        """
        输出当前流式文本到外部回调。
@@ -741,6 +828,12 @@ class MoviePilotAgent:
            logger.info(f"Agent执行被取消: session_id={self.session_id}")
            return "任务已取消", {}
        except Exception as e:
+            if self._messages_have_image_input(messages) and self._is_unsupported_image_input_error(e):
+                logger.warning(
+                    f"当前模型不支持图片输入，已向用户发送友好提示: {e}"
+                )
+                await self._dispatch_execution_notice(UNSUPPORTED_IMAGE_INPUT_MESSAGE)
+                return UNSUPPORTED_IMAGE_INPUT_MESSAGE, {}
            logger.error(f"Agent执行失败: {e} - {traceback.format_exc()}")
            return str(e), {}
        finally:
--- a/tests/test_agent_background_output.py
+++ b/tests/test_agent_background_output.py
@@ -2,13 +2,14 @@ import unittest
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, patch

-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, HumanMessage

 from app.agent import (
    HEARTBEAT_SESSION_PREFIX,
    MoviePilotAgent,
    AgentManager,
    ReplyMode,
+    UNSUPPORTED_IMAGE_INPUT_MESSAGE,
 )
 from app.agent.memory import memory_manager
 from app.core.config import settings
@@ -31,6 +32,24 @@ class _FakeAgent:
        return _FakeGraphState(self._messages)


+class _FakeFailingAgent:
+    def __init__(self, error):
+        self._error = error
+
+    async def ainvoke(self, _payload, config=None):
+        raise self._error
+
+    def get_state(self, _config):
+        return _FakeGraphState([])
+
+
+class _FakeStreamingFailingAgent(_FakeFailingAgent):
+    async def astream(self, _messages, **_kwargs):
+        raise self._error
+        # 保持 async generator 形态，避免测试替身变成普通 coroutine。
+        yield None
+
+
 class AgentBackgroundOutputTest(unittest.IsolatedAsyncioTestCase):
    async def test_background_non_streaming_does_not_send_by_default(self):
        agent = MoviePilotAgent(session_id="bg-test", user_id="system")
@@ -60,6 +79,80 @@ class AgentBackgroundOutputTest(unittest.IsolatedAsyncioTestCase):
        save_messages.assert_called_once()
        self.assertEqual("后台结果", agent._streamed_output)

+    async def test_non_streaming_image_unsupported_error_sends_friendly_notice(self):
+        agent = MoviePilotAgent(session_id="image-test", user_id="user-1")
+        agent.channel = "Telegram"
+        agent.source = "telegram-test"
+        agent._tool_context = {"user_reply_sent": False}
+        agent._streamed_output = ""
+        agent.stream_handler = SimpleNamespace(
+            stop_streaming=AsyncMock(return_value=(False, ""))
+        )
+        agent._should_stream = lambda: False
+        agent._create_agent = AsyncMock(
+            return_value=_FakeFailingAgent(
+                RuntimeError("No endpoints found that support image input")
+            )
+        )
+        agent.send_agent_message = AsyncMock()
+        agent._save_agent_message_to_db = AsyncMock()
+
+        result, _ = await agent._execute_agent(
+            [
+                HumanMessage(
+                    content=[
+                        {"type": "text", "text": "看看这张图"},
+                        {"type": "image_url", "image_url": {"url": "data:image/png;base64,xxx"}},
+                    ]
+                )
+            ]
+        )
+
+        self.assertEqual(UNSUPPORTED_IMAGE_INPUT_MESSAGE, result)
+        agent.send_agent_message.assert_awaited_once_with(
+            UNSUPPORTED_IMAGE_INPUT_MESSAGE, title=""
+        )
+        agent._save_agent_message_to_db.assert_not_awaited()
+        self.assertEqual(UNSUPPORTED_IMAGE_INPUT_MESSAGE, agent._streamed_output)
+
+    async def test_streaming_image_unsupported_error_sends_friendly_notice(self):
+        agent = MoviePilotAgent(session_id="image-test", user_id="user-1")
+        agent.channel = "Telegram"
+        agent.source = "telegram-test"
+        agent._tool_context = {"user_reply_sent": False}
+        agent._streamed_output = ""
+        agent.stream_handler = SimpleNamespace(
+            set_dispatch_policy=lambda allow_dispatch_without_context=False: None,
+            start_streaming=AsyncMock(),
+            flush_pending_tool_summary=lambda: "",
+            stop_streaming=AsyncMock(return_value=(False, "")),
+        )
+        agent._should_stream = lambda: True
+        agent._create_agent = AsyncMock(
+            return_value=_FakeStreamingFailingAgent(
+                RuntimeError("Error code: 404 - No endpoints found that support image input")
+            )
+        )
+        agent.send_agent_message = AsyncMock()
+        agent._save_agent_message_to_db = AsyncMock()
+
+        result, _ = await agent._execute_agent(
+            [
+                HumanMessage(
+                    content=[
+                        {"type": "text", "text": "看看这张图"},
+                        {"type": "image_url", "image_url": {"url": "data:image/png;base64,xxx"}},
+                    ]
+                )
+            ]
+        )
+
+        self.assertEqual(UNSUPPORTED_IMAGE_INPUT_MESSAGE, result)
+        agent.send_agent_message.assert_awaited_once_with(
+            UNSUPPORTED_IMAGE_INPUT_MESSAGE, title=""
+        )
+        agent._save_agent_message_to_db.assert_not_awaited()
+
    async def test_background_non_streaming_sends_when_reply_mode_dispatch(self):
        agent = MoviePilotAgent(session_id="bg-test", user_id="system")
        agent.channel = None