From 38933d58822265769a81c4433185648ca755f9dc Mon Sep 17 00:00:00 2001 From: jxxghp Date: Wed, 22 Apr 2026 10:36:36 +0800 Subject: [PATCH] feat(agent): support disabling model thinking --- app/api/endpoints/system.py | 6 +++ app/core/config.py | 2 + app/helper/llm.py | 75 +++++++++++++++++++++++++++- scripts/local_setup.py | 4 ++ tests/test_llm_helper_testcall.py | 82 +++++++++++++++++++++++++++++++ tests/test_system_llm_test.py | 3 ++ 6 files changed, 171 insertions(+), 1 deletion(-) diff --git a/app/api/endpoints/system.py b/app/api/endpoints/system.py index d4f0d254..f0cccfe5 100644 --- a/app/api/endpoints/system.py +++ b/app/api/endpoints/system.py @@ -57,6 +57,7 @@ class LlmTestRequest(BaseModel): enabled: Optional[bool] = None provider: Optional[str] = None model: Optional[str] = None + disable_thinking: Optional[bool] = None api_key: Optional[str] = None base_url: Optional[str] = None @@ -307,6 +308,7 @@ def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[s """ provider = settings.LLM_PROVIDER model = settings.LLM_MODEL + disable_thinking = bool(getattr(settings, "LLM_DISABLE_THINKING", False)) api_key = settings.LLM_API_KEY base_url = settings.LLM_BASE_URL enabled = bool(settings.AI_AGENT_ENABLE) @@ -318,6 +320,8 @@ def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[s provider = _normalize_llm_test_value(payload.provider) or "" if payload.model is not None: model = _normalize_llm_test_value(payload.model) or "" + if payload.disable_thinking is not None: + disable_thinking = bool(payload.disable_thinking) if payload.api_key is not None: api_key = _normalize_llm_test_value(payload.api_key, empty_as_none=True) if payload.base_url is not None: @@ -327,6 +331,7 @@ def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[s "enabled": enabled, "provider": provider, "model": model, + "disable_thinking": disable_thinking, "api_key": api_key, "base_url": base_url, } @@ -755,6 +760,7 @@ async def llm_test( result = await LLMHelper.test_current_settings( provider=snapshot["provider"], model=snapshot["model"], + disable_thinking=snapshot["disable_thinking"], api_key=snapshot["api_key"], base_url=snapshot["base_url"], ) diff --git a/app/core/config.py b/app/core/config.py index 4862cf2f..2ecf0de8 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -496,6 +496,8 @@ class ConfigModel(BaseModel): LLM_PROVIDER: str = "deepseek" # LLM模型名称 LLM_MODEL: str = "deepseek-chat" + # 是否尽量关闭模型的思考/推理能力(按各 provider/model 支持情况自动适配) + LLM_DISABLE_THINKING: bool = False # LLM是否支持图片输入,开启后消息图片会按多模态输入发送给模型 LLM_SUPPORT_IMAGE_INPUT: bool = True # LLM API密钥 diff --git a/app/helper/llm.py b/app/helper/llm.py index d2c90ecf..9e337bef 100644 --- a/app/helper/llm.py +++ b/app/helper/llm.py @@ -3,7 +3,7 @@ import asyncio import inspect import time -from typing import List +from typing import Any, List from app.core.config import settings from app.log import logger @@ -77,6 +77,68 @@ def _get_httpx_proxy_key() -> str: class LLMHelper: """LLM模型相关辅助功能""" + @staticmethod + def _should_disable_thinking(disable_thinking: bool | None = None) -> bool: + """ + 判断本次调用是否应尝试关闭模型思考能力。 + """ + if disable_thinking is not None: + return bool(disable_thinking) + return bool(getattr(settings, "LLM_DISABLE_THINKING", False)) + + @staticmethod + def _normalize_model_name(model_name: str | None) -> str: + """ + 统一清理模型名称,便于按模型族做能力映射。 + """ + return (model_name or "").strip().lower() + + @classmethod + def _build_disabled_thinking_kwargs( + cls, + provider: str, + model: str | None, + disable_thinking: bool | None = None, + ) -> dict[str, Any]: + """ + 按 provider/model 生成“禁用思考”相关参数。 + + 优先使用 LangChain/OpenAI SDK 已支持的原生字段;仅在 provider + 明确要求自定义请求体时,才回退到 extra_body。 + """ + if not cls._should_disable_thinking(disable_thinking): + return {} + + provider_name = (provider or "").strip().lower() + model_name = cls._normalize_model_name(model) + if not model_name: + return {} + + # Moonshot Kimi K2.5/K2.6 需要在请求体显式声明 thinking.disabled。 + if model_name.startswith(("kimi-k2.5", "kimi-k2.6")): + return {"extra_body": {"thinking": {"type": "disabled"}}} + + # OpenAI 原生推理模型优先走 LangChain 内置 reasoning_effort。 + if provider_name == "openai" and model_name.startswith( + ("gpt-5", "o1", "o3", "o4") + ): + return {"reasoning_effort": "none"} + + # Gemini 使用 google-genai / langchain-google-genai 内置思考控制参数。 + if provider_name == "google": + if "gemini-2.5" in model_name: + return { + "thinking_budget": 0, + "include_thoughts": False, + } + if "gemini-3" in model_name: + return { + "thinking_level": "minimal", + "include_thoughts": False, + } + + return {} + @staticmethod def supports_image_input() -> bool: """ @@ -89,6 +151,7 @@ class LLMHelper: streaming: bool = False, provider: str | None = None, model: str | None = None, + disable_thinking: bool | None = None, api_key: str | None = None, base_url: str | None = None, ): @@ -103,6 +166,11 @@ class LLMHelper: model_name = model if model is not None else settings.LLM_MODEL api_key_value = api_key if api_key is not None else settings.LLM_API_KEY base_url_value = base_url if base_url is not None else settings.LLM_BASE_URL + thinking_kwargs = LLMHelper._build_disabled_thinking_kwargs( + provider=provider_name, + model=model_name, + disable_thinking=disable_thinking, + ) if not api_key_value: raise ValueError("未配置LLM API Key") @@ -128,6 +196,7 @@ class LLMHelper: temperature=settings.LLM_TEMPERATURE, streaming=streaming, client_args=client_args, + **thinking_kwargs, ) elif provider_name == "deepseek": from langchain_deepseek import ChatDeepSeek @@ -139,6 +208,7 @@ class LLMHelper: temperature=settings.LLM_TEMPERATURE, streaming=streaming, stream_usage=True, + **thinking_kwargs, ) else: from langchain_openai import ChatOpenAI @@ -152,6 +222,7 @@ class LLMHelper: streaming=streaming, stream_usage=True, openai_proxy=settings.PROXY_HOST, + **thinking_kwargs, ) # 检查是否有profile @@ -211,6 +282,7 @@ class LLMHelper: timeout: int = 20, provider: str | None = None, model: str | None = None, + disable_thinking: bool | None = None, api_key: str | None = None, base_url: str | None = None, ) -> dict: @@ -226,6 +298,7 @@ class LLMHelper: streaming=False, provider=provider_name, model=model_name, + disable_thinking=disable_thinking, api_key=api_key_value, base_url=base_url_value, ) diff --git a/scripts/local_setup.py b/scripts/local_setup.py index e2ea3a31..8690e3cb 100644 --- a/scripts/local_setup.py +++ b/scripts/local_setup.py @@ -1476,6 +1476,10 @@ def _collect_agent_config() -> dict[str, Any]: current_value=read_env_value("LLM_API_KEY"), required=True, ), + "LLM_DISABLE_THINKING": _prompt_yes_no( + "是否尽量关闭模型思考/推理", + default=_env_bool("LLM_DISABLE_THINKING", False), + ), "LLM_SUPPORT_IMAGE_INPUT": _prompt_yes_no( "是否启用图片输入支持", default=_env_bool("LLM_SUPPORT_IMAGE_INPUT", True), diff --git a/tests/test_llm_helper_testcall.py b/tests/test_llm_helper_testcall.py index 6633382b..8a64f389 100644 --- a/tests/test_llm_helper_testcall.py +++ b/tests/test_llm_helper_testcall.py @@ -38,6 +38,7 @@ _stub_module( LLM_MODEL="global-model", LLM_API_KEY="global-key", LLM_BASE_URL="https://global.example.com", + LLM_DISABLE_THINKING=False, LLM_TEMPERATURE=0.1, LLM_MAX_CONTEXT_TOKENS=64, PROXY_HOST=None, @@ -82,6 +83,7 @@ class LlmHelperTestCallTest(unittest.TestCase): streaming=False, provider="deepseek", model="deepseek-chat", + disable_thinking=None, api_key="sk-test", base_url="https://api.deepseek.com", ) @@ -109,6 +111,86 @@ class LlmHelperTestCallTest(unittest.TestCase): self.assertNotIn("reply_preview", result) + def test_get_llm_uses_kimi_extra_body_to_disable_thinking(self): + calls = [] + + class _FakeChatOpenAI: + def __init__(self, **kwargs): + calls.append(kwargs) + self.model = kwargs["model"] + self.profile = None + + with patch.dict( + sys.modules, + {"langchain_openai": SimpleNamespace(ChatOpenAI=_FakeChatOpenAI)}, + ): + llm_module.LLMHelper.get_llm( + provider="openai", + model="kimi-k2.6", + disable_thinking=True, + api_key="sk-test", + base_url="https://kimi.example.com/v1", + ) + + self.assertEqual(len(calls), 1) + self.assertEqual( + calls[0].get("extra_body"), + {"thinking": {"type": "disabled"}}, + ) + + def test_get_llm_uses_openai_reasoning_effort_none(self): + calls = [] + + class _FakeChatOpenAI: + def __init__(self, **kwargs): + calls.append(kwargs) + self.model = kwargs["model"] + self.profile = None + + with patch.dict( + sys.modules, + {"langchain_openai": SimpleNamespace(ChatOpenAI=_FakeChatOpenAI)}, + ): + llm_module.LLMHelper.get_llm( + provider="openai", + model="gpt-5-mini", + disable_thinking=True, + api_key="sk-test", + base_url="https://api.openai.com/v1", + ) + + self.assertEqual(len(calls), 1) + self.assertEqual(calls[0].get("reasoning_effort"), "none") + + def test_get_llm_uses_gemini_builtin_thinking_controls(self): + calls = [] + + class _FakeChatGoogleGenerativeAI: + def __init__(self, **kwargs): + calls.append(kwargs) + self.model = kwargs["model"] + self.profile = None + + with patch.dict( + sys.modules, + { + "langchain_google_genai": SimpleNamespace( + ChatGoogleGenerativeAI=_FakeChatGoogleGenerativeAI + ) + }, + ): + llm_module.LLMHelper.get_llm( + provider="google", + model="gemini-2.5-flash", + disable_thinking=True, + api_key="sk-test", + base_url=None, + ) + + self.assertEqual(len(calls), 1) + self.assertEqual(calls[0].get("thinking_budget"), 0) + self.assertFalse(calls[0].get("include_thoughts")) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_system_llm_test.py b/tests/test_system_llm_test.py index e185cc34..66c7a7fe 100644 --- a/tests/test_system_llm_test.py +++ b/tests/test_system_llm_test.py @@ -133,6 +133,7 @@ class LlmTestEndpointTest(unittest.TestCase): llm_test_mock.assert_awaited_once_with( provider="deepseek", model="deepseek-chat", + disable_thinking=False, api_key="sk-test", base_url="https://api.deepseek.com", ) @@ -155,6 +156,7 @@ class LlmTestEndpointTest(unittest.TestCase): enabled=True, provider="openai", model="gpt-4.1-mini", + disable_thinking=True, api_key="sk-live", base_url="https://example.com/v1", ) @@ -176,6 +178,7 @@ class LlmTestEndpointTest(unittest.TestCase): llm_test_mock.assert_awaited_once_with( provider="openai", model="gpt-4.1-mini", + disable_thinking=True, api_key="sk-live", base_url="https://example.com/v1", )