fix: 支持飞书语音消息识别

2026-05-13 07:26:45 +00:00 · 2026-05-13 12:45:32 +08:00
parent b24127e66f
commit f8d096f476
5 changed files with 98 additions and 20 deletions
--- a/app/chain/message.py
+++ b/app/chain/message.py
@@ -1272,6 +1272,13 @@ class MessageChain(ChainBase):
                    )
                elif audio_ref.startswith("wxbot://voice"):
                    continue
+                elif audio_ref.startswith("feishu://file/"):
+                    content = self.run_module(
+                        "download_feishu_file_bytes", file_ref=audio_ref, source=source
+                    )
+                    filename = self._guess_audio_filename(
+                        audio_ref, default="input.opus"
+                    )
                elif audio_ref.startswith("http"):
                    resp = RequestUtils(timeout=30).get_res(audio_ref)
                    content = resp.content if resp and resp.content else None
@@ -1339,11 +1346,11 @@ class MessageChain(ChainBase):
        """
        下载可直接提供给 LLM 的附件内容，并统一转换为 data URL。
        """
-        attachments = CommingMessage.MessageImage.normalize_list(attachments)
-        if not attachments:
+        normalized_attachments = CommingMessage.MessageImage.normalize_list(attachments) or []
+        if not normalized_attachments:
            return None
        data_urls = []
-        for attachment in attachments:
+        for attachment in normalized_attachments:
            attachment_ref = attachment.ref
            try:
                before_count = len(data_urls)
--- a/app/modules/feishu/init.py
+++ b/app/modules/feishu/init.py
@@ -301,11 +301,32 @@ class FeishuModule(_ModuleBase, _MessageBase[Feishu]):
        client = self.get_instance(client_config.name)
        if not client:
            return None
-        parts = file_ref.replace("feishu://file/", "", 1).split("/", 1)
-        file_key = parts[0].strip() if parts else ""
+        parts = [
+            part.strip()
+            for part in file_ref.replace("feishu://file/", "", 1).split("/")
+            if part.strip()
+        ]
+        file_key = ""
+        downloaded = None
+        if len(parts) >= 2 and parts[0].startswith("om_"):
+            message_id, file_key = parts[0], parts[1]
+            downloaded = client.download_message_resource_bytes(
+                message_id=message_id,
+                file_key=file_key,
+                resource_type="audio",
+            )
+            if not downloaded:
+                downloaded = client.download_message_resource_bytes(
+                    message_id=message_id,
+                    file_key=file_key,
+                    resource_type="file",
+                )
+        else:
+            file_key = parts[0] if parts else ""
        if not file_key:
            return None
-        downloaded = client.download_file_bytes(file_key)
+        if not downloaded:
+            downloaded = client.download_file_bytes(file_key)
        if not downloaded:
            return None
        content, _, _ = downloaded
--- a/app/modules/feishu/feishu.py
+++ b/app/modules/feishu/feishu.py
@@ -235,13 +235,16 @@ class Feishu:
        elif message_type in {"audio", "media", "file"}:
            file_key = str(content.get("file_key") or "").strip()
            file_name = str(content.get("file_name") or "").strip() or None
+            message_id = str(getattr(message, "message_id", None) or "").strip()
            if file_key:
                if message_type == "audio":
-                    audio_refs = [f"feishu://file/{file_key}/{file_name or 'audio.opus'}"]
+                    resource_path = f"{message_id}/{file_key}" if message_id else file_key
+                    audio_refs = [f"feishu://file/{resource_path}/{file_name or 'audio.opus'}"]
                else:
+                    resource_path = f"{message_id}/{file_key}" if message_id else file_key
                    files = [
                        CommingMessage.MessageAttachment(
-                            ref=f"feishu://file/{file_key}/{file_name or 'attachment'}",
+                            ref=f"feishu://file/{resource_path}/{file_name or 'attachment'}",
                            name=file_name,
                        )
                    ]
--- a/tests/test_agent_image_support.py
+++ b/tests/test_agent_image_support.py
@@ -2,6 +2,7 @@ import base64
 import json
 import tempfile
 import unittest
+from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, Mock, patch
 from urllib.parse import quote
@@ -14,6 +15,7 @@ from app.agent import MoviePilotAgent, AgentChain
 from app.chain.message import MessageChain
 from app.core.config import settings
 from app.agent.llm import LLMHelper
+from app.helper.voice import VoiceHelper
 from app.modules.discord import DiscordModule
 from app.modules.qqbot import QQBotModule
 from app.modules.slack import SlackModule
@@ -278,16 +280,24 @@ class AgentImageSupportTest(unittest.TestCase):
            "qq://file/" + quote("https://example.com/qq-voice.ogg", safe=""),
            "vocechat://file/%2Fuploads%2Fvoice.ogg",
            "synology://file/" + quote("https://example.com/synology-voice.wav", safe=""),
+            "feishu://file/om_audio/file_audio/voice.opus",
        ]

        with patch.object(VoiceHelper, "is_available", return_value=True), patch.object(
            chain,
            "run_module",
-            side_effect=[b"slack", b"discord", b"qq", b"vocechat", b"synology"],
+            side_effect=[b"slack", b"discord", b"qq", b"vocechat", b"synology", b"feishu"],
        ) as run_module, patch.object(
            VoiceHelper,
            "transcribe_bytes",
-            side_effect=["slack text", "discord text", "qq text", "vocechat text", "synology text"],
+            side_effect=[
+                "slack text",
+                "discord text",
+                "qq text",
+                "vocechat text",
+                "synology text",
+                "feishu text",
+            ],
        ) as transcribe_bytes:
            result = chain._transcribe_audio_refs(
                audio_refs=audio_refs,
@@ -297,7 +307,7 @@ class AgentImageSupportTest(unittest.TestCase):

        self.assertEqual(
            result,
-            "slack text\ndiscord text\nqq text\nvocechat text\nsynology text",
+            "slack text\ndiscord text\nqq text\nvocechat text\nsynology text\nfeishu text",
        )
        self.assertEqual(
            [call.args[0] for call in run_module.call_args_list],
@@ -307,6 +317,7 @@ class AgentImageSupportTest(unittest.TestCase):
                "download_qq_file_bytes",
                "download_vocechat_file_bytes",
                "download_synologychat_file_bytes",
+                "download_feishu_file_bytes",
            ],
        )
        self.assertEqual(
@@ -317,6 +328,7 @@ class AgentImageSupportTest(unittest.TestCase):
                "qq-voice.ogg",
                "voice.ogg",
                "synology-voice.wav",
+                "voice.opus",
            ],
        )

@@ -393,7 +405,7 @@ class AgentImageSupportTest(unittest.TestCase):
        with patch.object(settings, "AI_AGENT_ENABLE", True), patch.object(
            settings, "LLM_SUPPORT_IMAGE_INPUT", False
        ), patch.object(chain, "_get_or_create_session_id", return_value="session-1"), patch.object(
-            chain, "_download_images_to_base64"
+            chain, "_download_attachments_to_data_urls"
        ) as download_images, patch.object(
            chain,
            "_prepare_agent_files",
@@ -439,8 +451,8 @@ class AgentImageSupportTest(unittest.TestCase):
            "run_module",
            return_value="data:image/png;base64,abc123",
        ) as run_module:
-            images = chain._download_images_to_base64(
-                images=["https://files.slack.com/files-pri/T1-F1/test.png"],
+            images = chain._download_attachments_to_data_urls(
+                attachments=["https://files.slack.com/files-pri/T1-F1/test.png"],
                channel=MessageChannel.Slack,
                source="slack-test",
            )
@@ -598,8 +610,8 @@ class AgentImageSupportTest(unittest.TestCase):
            "run_module",
            return_value="data:image/png;base64,wechat123",
        ) as run_module:
-            images = chain._download_images_to_base64(
-                images=["wxwork://media_id/media-1"],
+            images = chain._download_attachments_to_data_urls(
+                attachments=["wxwork://media_id/media-1"],
                channel=MessageChannel.Wechat,
                source="wechat-test",
            )
@@ -964,7 +976,13 @@ class AgentImageSupportTest(unittest.TestCase):
            with patch.object(
                module,
                "get_configs",
-                return_value={"discord-test": SimpleNamespace(name="discord-test")},
+                return_value={
+                    "discord-test": SimpleNamespace(
+                        name="discord-test",
+                        type="discord",
+                        enabled=True,
+                    )
+                },
            ), patch.object(
                module, "check_message", return_value=True
            ), patch.object(
@@ -1145,8 +1163,9 @@ class AgentImageSupportTest(unittest.TestCase):

    def test_prepare_agent_files_saves_local_file(self):
        chain = MessageChain()
-        with tempfile.TemporaryDirectory() as tempdir, patch.object(
-            settings, "TEMP_PATH", Path(tempdir)
+        with tempfile.TemporaryDirectory() as tempdir, patch(
+            "app.chain.message.settings",
+            SimpleNamespace(TEMP_PATH=Path(tempdir)),
        ), patch.object(
            chain,
            "_download_message_file_bytes",
--- a/tests/test_feishu.py
+++ b/tests/test_feishu.py
@@ -576,6 +576,24 @@ class TestFeishu(unittest.TestCase):
        payload = forward.call_args.args[0]
        self.assertEqual(payload["images"][0]["ref"], "feishu://image/om_img_evt/img_v2_evt")

+    def test_on_message_wraps_feishu_audio_ref_with_message_id(self):
+        client = self._build_client()
+        message = SimpleNamespace(
+            message_id="om_audio_evt",
+            chat_id="oc_chat_evt",
+            chat_type="p2p",
+            message_type="audio",
+            content=json.dumps({"file_key": "file_audio_evt", "file_name": "voice.opus"}),
+        )
+        sender = SimpleNamespace(sender_id=SimpleNamespace(open_id="ou_user_evt", user_id=None))
+        event = SimpleNamespace(sender=sender, message=message)
+
+        with patch.object(client, "_forward_to_message_chain") as forward:
+            client._on_message(SimpleNamespace(event=event))
+
+        payload = forward.call_args.args[0]
+        self.assertEqual(payload["audio_refs"], ["feishu://file/om_audio_evt/file_audio_evt/voice.opus"])
+
    def test_feishu_channel_capabilities_enable_images_and_files(self):
        self.assertTrue(
            ChannelCapabilityManager.supports_capability(
@@ -784,14 +802,24 @@ class TestFeishu(unittest.TestCase):
        ):
            data_url = module.download_feishu_image_to_data_url("feishu://image/om_msg/img_v2_xxx", "feishu-main")
            file_bytes = module.download_feishu_file_bytes("feishu://file/file_xxx/note.txt", "feishu-main")
+            audio_bytes = module.download_feishu_file_bytes(
+                "feishu://file/om_audio/file_audio/voice.opus",
+                "feishu-main",
+            )

        self.assertTrue(data_url.startswith("data:image/png;base64,"))
        self.assertEqual(file_bytes, b"file")
-        client.download_message_resource_bytes.assert_called_once_with(
+        self.assertEqual(audio_bytes, b"image")
+        client.download_message_resource_bytes.assert_any_call(
            message_id="om_msg",
            file_key="img_v2_xxx",
            resource_type="image",
        )
+        client.download_message_resource_bytes.assert_any_call(
+            message_id="om_audio",
+            file_key="file_audio",
+            resource_type="audio",
+        )

    def test_module_message_reaction_helpers_delegate_to_client(self):
        module = FeishuModule()