mirror of
https://github.com/jxxghp/MoviePilot.git
synced 2026-05-30 07:26:48 +00:00
fix(agent): enable voice replies for supported channels
This commit is contained in:
@@ -734,29 +734,61 @@ class AgentCapabilityManager:
|
||||
return cls.REPLY_MODE_TEXT
|
||||
|
||||
@classmethod
|
||||
def supports_native_voice_reply(
|
||||
cls, channel: Optional[str], source: Optional[str]
|
||||
) -> bool:
|
||||
"""判断当前渠道是否支持原生语音消息发送。"""
|
||||
def _parse_message_channel(cls, channel: Optional[Any]):
|
||||
"""将渠道入参归一化为消息渠道枚举。"""
|
||||
if not channel:
|
||||
return None
|
||||
|
||||
from app.schemas.types import MessageChannel
|
||||
|
||||
if isinstance(channel, MessageChannel):
|
||||
return channel
|
||||
|
||||
channel_text = str(channel).strip()
|
||||
if not channel_text:
|
||||
return None
|
||||
lowered_channel = channel_text.lower()
|
||||
for channel_item in MessageChannel:
|
||||
aliases = {
|
||||
channel_item.value.lower(),
|
||||
channel_item.name.lower(),
|
||||
f"{MessageChannel.__name__}.{channel_item.name}".lower(),
|
||||
}
|
||||
if lowered_channel in aliases:
|
||||
return channel_item
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _is_wechat_app_mode(source: Optional[str]) -> bool:
|
||||
"""判断企业微信来源是否为自建应用模式。"""
|
||||
if not source:
|
||||
return False
|
||||
|
||||
from app.helper.service import ServiceConfigHelper
|
||||
from app.schemas.types import MessageChannel
|
||||
|
||||
try:
|
||||
channel_enum = MessageChannel(channel)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
|
||||
if channel_enum == MessageChannel.Telegram:
|
||||
return True
|
||||
if channel_enum != MessageChannel.Wechat:
|
||||
return False
|
||||
|
||||
# 企业微信 bot 模式不支持发送语音,只有应用模式可用。
|
||||
for config in ServiceConfigHelper.get_notification_configs():
|
||||
if config.name != source:
|
||||
continue
|
||||
return (config.config or {}).get("WECHAT_MODE", "app") != "bot"
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def supports_native_voice_reply(
|
||||
cls, channel: Optional[str], source: Optional[str]
|
||||
) -> bool:
|
||||
"""判断当前渠道是否支持原生语音消息发送。"""
|
||||
from app.schemas.message import ChannelCapability, ChannelCapabilityManager
|
||||
from app.schemas.types import MessageChannel
|
||||
|
||||
channel_enum = cls._parse_message_channel(channel)
|
||||
if not channel_enum:
|
||||
return False
|
||||
|
||||
if not ChannelCapabilityManager.supports_capability(
|
||||
channel_enum, ChannelCapability.AUDIO_OUTPUT
|
||||
):
|
||||
return False
|
||||
|
||||
if channel_enum == MessageChannel.Wechat:
|
||||
return cls._is_wechat_app_mode(source)
|
||||
return True
|
||||
|
||||
@@ -15,8 +15,10 @@ from app.schemas import Notification, NotificationType
|
||||
class SendVoiceMessageInput(BaseModel):
|
||||
"""发送语音消息工具输入。"""
|
||||
|
||||
explanation: Optional[str] = Field(None,
|
||||
description="Clear explanation of why a voice reply is the best fit in the current context",)
|
||||
explanation: Optional[str] = Field(
|
||||
None,
|
||||
description="Clear explanation of why a voice reply is the best fit in the current context",
|
||||
)
|
||||
message: str = Field(
|
||||
...,
|
||||
description="The spoken content to send back to the user",
|
||||
@@ -24,6 +26,8 @@ class SendVoiceMessageInput(BaseModel):
|
||||
|
||||
|
||||
class SendVoiceMessageTool(MoviePilotTool):
|
||||
"""发送 Agent 语音回复的工具。"""
|
||||
|
||||
name: str = "send_voice_message"
|
||||
sends_message: bool = True
|
||||
description: str = (
|
||||
@@ -36,12 +40,14 @@ class SendVoiceMessageTool(MoviePilotTool):
|
||||
require_admin: bool = False
|
||||
|
||||
def get_tool_message(self, **kwargs) -> Optional[str]:
|
||||
"""生成语音回复工具的执行提示。"""
|
||||
message = kwargs.get("message") or ""
|
||||
if len(message) > 40:
|
||||
message = message[:40] + "..."
|
||||
return f"发送语音回复: {message}"
|
||||
|
||||
async def run(self, message: str, **kwargs) -> str:
|
||||
"""合成语音并发送到当前对话渠道,不支持时回退为文字。"""
|
||||
if not message:
|
||||
return "语音回复内容不能为空"
|
||||
|
||||
@@ -69,11 +75,8 @@ class SendVoiceMessageTool(MoviePilotTool):
|
||||
fallback_reason = "当前未配置可用的语音合成能力"
|
||||
|
||||
logger.info(
|
||||
"执行工具: %s, channel=%s, use_voice=%s, text_len=%s",
|
||||
self.name,
|
||||
channel,
|
||||
used_voice,
|
||||
len(message),
|
||||
f"执行工具: {self.name}, channel={channel}, "
|
||||
f"use_voice={used_voice}, text_len={len(message)}"
|
||||
)
|
||||
|
||||
await ToolChain().async_post_message(
|
||||
|
||||
@@ -273,6 +273,8 @@ class ChannelCapability(Enum):
|
||||
IMAGES = "images"
|
||||
# 支持链接
|
||||
LINKS = "links"
|
||||
# 支持原生语音输出
|
||||
AUDIO_OUTPUT = "audio_output"
|
||||
# 支持文件发送
|
||||
FILE_SENDING = "file_sending"
|
||||
# 支持可收口的消息处理状态提示,如 reaction 或 typing
|
||||
@@ -313,6 +315,7 @@ class ChannelCapabilityManager:
|
||||
ChannelCapability.RICH_TEXT,
|
||||
ChannelCapability.IMAGES,
|
||||
ChannelCapability.LINKS,
|
||||
ChannelCapability.AUDIO_OUTPUT,
|
||||
ChannelCapability.FILE_SENDING,
|
||||
ChannelCapability.PROCESSING_STATUS,
|
||||
},
|
||||
@@ -327,6 +330,7 @@ class ChannelCapabilityManager:
|
||||
capabilities={
|
||||
ChannelCapability.IMAGES,
|
||||
ChannelCapability.LINKS,
|
||||
ChannelCapability.AUDIO_OUTPUT,
|
||||
ChannelCapability.MENU_COMMANDS,
|
||||
},
|
||||
fallback_enabled=True,
|
||||
@@ -341,6 +345,7 @@ class ChannelCapabilityManager:
|
||||
ChannelCapability.RICH_TEXT,
|
||||
ChannelCapability.IMAGES,
|
||||
ChannelCapability.LINKS,
|
||||
ChannelCapability.AUDIO_OUTPUT,
|
||||
ChannelCapability.FILE_SENDING,
|
||||
ChannelCapability.PROCESSING_STATUS,
|
||||
},
|
||||
|
||||
@@ -11,6 +11,8 @@ sys.modules.setdefault("psutil", Mock())
|
||||
sys.modules.setdefault("pyquery", Mock())
|
||||
|
||||
from app.core.config import settings
|
||||
from app.schemas.message import ChannelCapability, ChannelCapabilityManager
|
||||
from app.schemas.types import MessageChannel
|
||||
|
||||
module_path = Path(__file__).resolve().parents[1] / "app" / "agent" / "llm" / "capability.py"
|
||||
spec = importlib.util.spec_from_file_location("test_agent_llm_capability_module", module_path)
|
||||
@@ -157,6 +159,73 @@ class AgentCapabilityManagerTest(unittest.TestCase):
|
||||
self.assertEqual(result, Path("/tmp/reply.opus"))
|
||||
provider.synthesize_speech.assert_called_once_with(text="你好")
|
||||
|
||||
def test_native_voice_reply_supports_channels_with_audio_output(self):
|
||||
"""校验 Agent 语音回复渠道支持判断覆盖常见渠道写法。"""
|
||||
self.assertTrue(
|
||||
AgentCapabilityManager.supports_native_voice_reply("telegram", None)
|
||||
)
|
||||
self.assertTrue(
|
||||
AgentCapabilityManager.supports_native_voice_reply(
|
||||
MessageChannel.Telegram.value, None
|
||||
)
|
||||
)
|
||||
self.assertTrue(
|
||||
AgentCapabilityManager.supports_native_voice_reply(
|
||||
MessageChannel.Feishu.value, None
|
||||
)
|
||||
)
|
||||
self.assertTrue(
|
||||
AgentCapabilityManager.supports_native_voice_reply("Feishu", None)
|
||||
)
|
||||
self.assertFalse(
|
||||
AgentCapabilityManager.supports_native_voice_reply("Slack", None)
|
||||
)
|
||||
|
||||
def test_native_voice_reply_respects_wechat_mode(self):
|
||||
"""校验企业微信只有自建应用模式允许 Agent 语音回复。"""
|
||||
configs = [
|
||||
SimpleNamespace(name="wechat-app", config={"WECHAT_MODE": "app"}),
|
||||
SimpleNamespace(name="wechat-bot", config={"WECHAT_MODE": "bot"}),
|
||||
]
|
||||
|
||||
with patch(
|
||||
"app.helper.service.ServiceConfigHelper.get_notification_configs",
|
||||
return_value=configs,
|
||||
):
|
||||
self.assertTrue(
|
||||
AgentCapabilityManager.supports_native_voice_reply(
|
||||
MessageChannel.Wechat.value, "wechat-app"
|
||||
)
|
||||
)
|
||||
self.assertFalse(
|
||||
AgentCapabilityManager.supports_native_voice_reply(
|
||||
MessageChannel.Wechat.value, "wechat-bot"
|
||||
)
|
||||
)
|
||||
self.assertFalse(
|
||||
AgentCapabilityManager.supports_native_voice_reply(
|
||||
MessageChannel.Wechat.value, "missing"
|
||||
)
|
||||
)
|
||||
|
||||
def test_channel_capability_marks_voice_output_channels(self):
|
||||
"""校验消息渠道能力显式声明原生语音输出支持。"""
|
||||
for channel in (
|
||||
MessageChannel.Telegram,
|
||||
MessageChannel.Feishu,
|
||||
MessageChannel.Wechat,
|
||||
):
|
||||
self.assertTrue(
|
||||
ChannelCapabilityManager.supports_capability(
|
||||
channel, ChannelCapability.AUDIO_OUTPUT
|
||||
)
|
||||
)
|
||||
self.assertFalse(
|
||||
ChannelCapabilityManager.supports_capability(
|
||||
MessageChannel.Slack, ChannelCapability.AUDIO_OUTPUT
|
||||
)
|
||||
)
|
||||
|
||||
def test_mimo_tts_uses_chat_completions_audio_payload(self):
|
||||
provider = MiMoAudioProvider()
|
||||
fake_client = Mock()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import langchain.agents as langchain_agents
|
||||
@@ -9,6 +10,7 @@ if not hasattr(langchain_agents, "create_agent"):
|
||||
|
||||
from app.agent.callback import StreamingHandler
|
||||
from app.agent.tools.base import MoviePilotTool
|
||||
from app.agent.tools.impl.send_voice_message import SendVoiceMessageTool
|
||||
from app.api.endpoints.openai import _OpenAIStreamingHandler
|
||||
from app.core.config import settings
|
||||
from app.schemas.message import MessageResponse
|
||||
@@ -397,6 +399,80 @@ class TestAgentToolStreaming(unittest.TestCase):
|
||||
send_tool_message.assert_awaited_once_with("前置内容\n\n⚙️ => run test tool")
|
||||
self.assertEqual(buffered_message, "")
|
||||
|
||||
def test_send_voice_message_uses_native_voice_for_supported_channels(self):
|
||||
"""校验支持语音输出的渠道会发送原生语音消息。"""
|
||||
|
||||
async def _run(channel: MessageChannel):
|
||||
"""运行指定渠道的语音发送工具。"""
|
||||
tool = SendVoiceMessageTool(session_id="session-1", user_id="10001")
|
||||
tool.set_message_attr(
|
||||
channel=channel.value, source=f"{channel.name.lower()}-main", username="tester"
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(settings, "LLM_SUPPORT_AUDIO_OUTPUT", True),
|
||||
patch.object(settings, "AUDIO_OUTPUT_INCLUDE_TEXT", True),
|
||||
patch(
|
||||
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.is_audio_output_available",
|
||||
return_value=True,
|
||||
),
|
||||
patch(
|
||||
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.synthesize_speech",
|
||||
return_value=Path("/tmp/reply.opus"),
|
||||
) as synthesize_speech,
|
||||
patch(
|
||||
"app.agent.tools.impl.send_voice_message.ToolChain.async_post_message",
|
||||
new_callable=AsyncMock,
|
||||
) as async_post_message,
|
||||
):
|
||||
result = await tool.run("你好")
|
||||
return result, synthesize_speech, async_post_message
|
||||
|
||||
for channel in (MessageChannel.Telegram, MessageChannel.Feishu):
|
||||
result, synthesize_speech, async_post_message = asyncio.run(_run(channel))
|
||||
notification = async_post_message.await_args.args[0]
|
||||
|
||||
self.assertEqual(result, "语音回复已发送")
|
||||
synthesize_speech.assert_called_once_with("你好")
|
||||
self.assertEqual(notification.channel, channel)
|
||||
self.assertEqual(notification.voice_path, "/tmp/reply.opus")
|
||||
self.assertEqual(notification.voice_caption, "你好")
|
||||
|
||||
def test_send_voice_message_falls_back_for_unsupported_channels(self):
|
||||
"""校验不支持语音输出的渠道继续回退为文字消息。"""
|
||||
|
||||
async def _run():
|
||||
"""运行不支持语音输出渠道的语音发送工具。"""
|
||||
tool = SendVoiceMessageTool(session_id="session-1", user_id="10001")
|
||||
tool.set_message_attr(
|
||||
channel=MessageChannel.Slack.value, source="slack-main", username="tester"
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(settings, "LLM_SUPPORT_AUDIO_OUTPUT", True),
|
||||
patch(
|
||||
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.is_audio_output_available",
|
||||
return_value=True,
|
||||
),
|
||||
patch(
|
||||
"app.agent.tools.impl.send_voice_message.AgentCapabilityManager.synthesize_speech"
|
||||
) as synthesize_speech,
|
||||
patch(
|
||||
"app.agent.tools.impl.send_voice_message.ToolChain.async_post_message",
|
||||
new_callable=AsyncMock,
|
||||
) as async_post_message,
|
||||
):
|
||||
result = await tool.run("你好")
|
||||
return result, synthesize_speech, async_post_message
|
||||
|
||||
result, synthesize_speech, async_post_message = asyncio.run(_run())
|
||||
notification = async_post_message.await_args.args[0]
|
||||
|
||||
self.assertEqual(result, "当前渠道不支持语音回复,已自动回退为文字回复")
|
||||
synthesize_speech.assert_not_called()
|
||||
self.assertEqual(notification.text, "你好")
|
||||
self.assertIsNone(notification.voice_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user