From 38933d58822265769a81c4433185648ca755f9dc Mon Sep 17 00:00:00 2001
From: jxxghp <jxxghp@gmail.com>
Date: Wed, 22 Apr 2026 10:36:36 +0800
Subject: [PATCH] feat(agent): support disabling model thinking

---
 app/api/endpoints/system.py       |  6 +++
 app/core/config.py                |  2 +
 app/helper/llm.py                 | 75 +++++++++++++++++++++++++++-
 scripts/local_setup.py            |  4 ++
 tests/test_llm_helper_testcall.py | 82 +++++++++++++++++++++++++++++++
 tests/test_system_llm_test.py     |  3 ++
 6 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/app/api/endpoints/system.py b/app/api/endpoints/system.py
index d4f0d254..f0cccfe5 100644
--- a/app/api/endpoints/system.py
+++ b/app/api/endpoints/system.py
@@ -57,6 +57,7 @@ class LlmTestRequest(BaseModel):
     enabled: Optional[bool] = None
     provider: Optional[str] = None
     model: Optional[str] = None
+    disable_thinking: Optional[bool] = None
     api_key: Optional[str] = None
     base_url: Optional[str] = None
 
@@ -307,6 +308,7 @@ def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[s
     """
     provider = settings.LLM_PROVIDER
     model = settings.LLM_MODEL
+    disable_thinking = bool(getattr(settings, "LLM_DISABLE_THINKING", False))
     api_key = settings.LLM_API_KEY
     base_url = settings.LLM_BASE_URL
     enabled = bool(settings.AI_AGENT_ENABLE)
@@ -318,6 +320,8 @@ def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[s
             provider = _normalize_llm_test_value(payload.provider) or ""
         if payload.model is not None:
             model = _normalize_llm_test_value(payload.model) or ""
+        if payload.disable_thinking is not None:
+            disable_thinking = bool(payload.disable_thinking)
         if payload.api_key is not None:
             api_key = _normalize_llm_test_value(payload.api_key, empty_as_none=True)
         if payload.base_url is not None:
@@ -327,6 +331,7 @@ def _build_llm_test_snapshot(payload: Optional[LlmTestRequest] = None) -> dict[s
         "enabled": enabled,
         "provider": provider,
         "model": model,
+        "disable_thinking": disable_thinking,
         "api_key": api_key,
         "base_url": base_url,
     }
@@ -755,6 +760,7 @@ async def llm_test(
         result = await LLMHelper.test_current_settings(
             provider=snapshot["provider"],
             model=snapshot["model"],
+            disable_thinking=snapshot["disable_thinking"],
             api_key=snapshot["api_key"],
             base_url=snapshot["base_url"],
         )
diff --git a/app/core/config.py b/app/core/config.py
index 4862cf2f..2ecf0de8 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -496,6 +496,8 @@ class ConfigModel(BaseModel):
     LLM_PROVIDER: str = "deepseek"
     # LLM模型名称
     LLM_MODEL: str = "deepseek-chat"
+    # 是否尽量关闭模型的思考/推理能力（按各 provider/model 支持情况自动适配）
+    LLM_DISABLE_THINKING: bool = False
     # LLM是否支持图片输入，开启后消息图片会按多模态输入发送给模型
     LLM_SUPPORT_IMAGE_INPUT: bool = True
     # LLM API密钥
diff --git a/app/helper/llm.py b/app/helper/llm.py
index d2c90ecf..9e337bef 100644
--- a/app/helper/llm.py
+++ b/app/helper/llm.py
@@ -3,7 +3,7 @@
 import asyncio
 import inspect
 import time
-from typing import List
+from typing import Any, List
 
 from app.core.config import settings
 from app.log import logger
@@ -77,6 +77,68 @@ def _get_httpx_proxy_key() -> str:
 class LLMHelper:
     """LLM模型相关辅助功能"""
 
+    @staticmethod
+    def _should_disable_thinking(disable_thinking: bool | None = None) -> bool:
+        """
+        判断本次调用是否应尝试关闭模型思考能力。
+        """
+        if disable_thinking is not None:
+            return bool(disable_thinking)
+        return bool(getattr(settings, "LLM_DISABLE_THINKING", False))
+
+    @staticmethod
+    def _normalize_model_name(model_name: str | None) -> str:
+        """
+        统一清理模型名称，便于按模型族做能力映射。
+        """
+        return (model_name or "").strip().lower()
+
+    @classmethod
+    def _build_disabled_thinking_kwargs(
+        cls,
+        provider: str,
+        model: str | None,
+        disable_thinking: bool | None = None,
+    ) -> dict[str, Any]:
+        """
+        按 provider/model 生成“禁用思考”相关参数。
+
+        优先使用 LangChain/OpenAI SDK 已支持的原生字段；仅在 provider
+        明确要求自定义请求体时，才回退到 extra_body。
+        """
+        if not cls._should_disable_thinking(disable_thinking):
+            return {}
+
+        provider_name = (provider or "").strip().lower()
+        model_name = cls._normalize_model_name(model)
+        if not model_name:
+            return {}
+
+        # Moonshot Kimi K2.5/K2.6 需要在请求体显式声明 thinking.disabled。
+        if model_name.startswith(("kimi-k2.5", "kimi-k2.6")):
+            return {"extra_body": {"thinking": {"type": "disabled"}}}
+
+        # OpenAI 原生推理模型优先走 LangChain 内置 reasoning_effort。
+        if provider_name == "openai" and model_name.startswith(
+            ("gpt-5", "o1", "o3", "o4")
+        ):
+            return {"reasoning_effort": "none"}
+
+        # Gemini 使用 google-genai / langchain-google-genai 内置思考控制参数。
+        if provider_name == "google":
+            if "gemini-2.5" in model_name:
+                return {
+                    "thinking_budget": 0,
+                    "include_thoughts": False,
+                }
+            if "gemini-3" in model_name:
+                return {
+                    "thinking_level": "minimal",
+                    "include_thoughts": False,
+                }
+
+        return {}
+
     @staticmethod
     def supports_image_input() -> bool:
         """
@@ -89,6 +151,7 @@ class LLMHelper:
         streaming: bool = False,
         provider: str | None = None,
         model: str | None = None,
+        disable_thinking: bool | None = None,
         api_key: str | None = None,
         base_url: str | None = None,
     ):
@@ -103,6 +166,11 @@ class LLMHelper:
         model_name = model if model is not None else settings.LLM_MODEL
         api_key_value = api_key if api_key is not None else settings.LLM_API_KEY
         base_url_value = base_url if base_url is not None else settings.LLM_BASE_URL
+        thinking_kwargs = LLMHelper._build_disabled_thinking_kwargs(
+            provider=provider_name,
+            model=model_name,
+            disable_thinking=disable_thinking,
+        )
 
         if not api_key_value:
             raise ValueError("未配置LLM API Key")
@@ -128,6 +196,7 @@ class LLMHelper:
                 temperature=settings.LLM_TEMPERATURE,
                 streaming=streaming,
                 client_args=client_args,
+                **thinking_kwargs,
             )
         elif provider_name == "deepseek":
             from langchain_deepseek import ChatDeepSeek
@@ -139,6 +208,7 @@ class LLMHelper:
                 temperature=settings.LLM_TEMPERATURE,
                 streaming=streaming,
                 stream_usage=True,
+                **thinking_kwargs,
             )
         else:
             from langchain_openai import ChatOpenAI
@@ -152,6 +222,7 @@ class LLMHelper:
                 streaming=streaming,
                 stream_usage=True,
                 openai_proxy=settings.PROXY_HOST,
+                **thinking_kwargs,
             )
 
         # 检查是否有profile
@@ -211,6 +282,7 @@ class LLMHelper:
         timeout: int = 20,
         provider: str | None = None,
         model: str | None = None,
+        disable_thinking: bool | None = None,
         api_key: str | None = None,
         base_url: str | None = None,
     ) -> dict:
@@ -226,6 +298,7 @@ class LLMHelper:
             streaming=False,
             provider=provider_name,
             model=model_name,
+            disable_thinking=disable_thinking,
             api_key=api_key_value,
             base_url=base_url_value,
         )
diff --git a/scripts/local_setup.py b/scripts/local_setup.py
index e2ea3a31..8690e3cb 100644
--- a/scripts/local_setup.py
+++ b/scripts/local_setup.py
@@ -1476,6 +1476,10 @@ def _collect_agent_config() -> dict[str, Any]:
             current_value=read_env_value("LLM_API_KEY"),
             required=True,
         ),
+        "LLM_DISABLE_THINKING": _prompt_yes_no(
+            "是否尽量关闭模型思考/推理",
+            default=_env_bool("LLM_DISABLE_THINKING", False),
+        ),
         "LLM_SUPPORT_IMAGE_INPUT": _prompt_yes_no(
             "是否启用图片输入支持",
             default=_env_bool("LLM_SUPPORT_IMAGE_INPUT", True),
diff --git a/tests/test_llm_helper_testcall.py b/tests/test_llm_helper_testcall.py
index 6633382b..8a64f389 100644
--- a/tests/test_llm_helper_testcall.py
+++ b/tests/test_llm_helper_testcall.py
@@ -38,6 +38,7 @@ _stub_module(
         LLM_MODEL="global-model",
         LLM_API_KEY="global-key",
         LLM_BASE_URL="https://global.example.com",
+        LLM_DISABLE_THINKING=False,
         LLM_TEMPERATURE=0.1,
         LLM_MAX_CONTEXT_TOKENS=64,
         PROXY_HOST=None,
@@ -82,6 +83,7 @@ class LlmHelperTestCallTest(unittest.TestCase):
             streaming=False,
             provider="deepseek",
             model="deepseek-chat",
+            disable_thinking=None,
             api_key="sk-test",
             base_url="https://api.deepseek.com",
         )
@@ -109,6 +111,86 @@ class LlmHelperTestCallTest(unittest.TestCase):
 
         self.assertNotIn("reply_preview", result)
 
+    def test_get_llm_uses_kimi_extra_body_to_disable_thinking(self):
+        calls = []
+
+        class _FakeChatOpenAI:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {"langchain_openai": SimpleNamespace(ChatOpenAI=_FakeChatOpenAI)},
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="openai",
+                model="kimi-k2.6",
+                disable_thinking=True,
+                api_key="sk-test",
+                base_url="https://kimi.example.com/v1",
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(
+            calls[0].get("extra_body"),
+            {"thinking": {"type": "disabled"}},
+        )
+
+    def test_get_llm_uses_openai_reasoning_effort_none(self):
+        calls = []
+
+        class _FakeChatOpenAI:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {"langchain_openai": SimpleNamespace(ChatOpenAI=_FakeChatOpenAI)},
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="openai",
+                model="gpt-5-mini",
+                disable_thinking=True,
+                api_key="sk-test",
+                base_url="https://api.openai.com/v1",
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(calls[0].get("reasoning_effort"), "none")
+
+    def test_get_llm_uses_gemini_builtin_thinking_controls(self):
+        calls = []
+
+        class _FakeChatGoogleGenerativeAI:
+            def __init__(self, **kwargs):
+                calls.append(kwargs)
+                self.model = kwargs["model"]
+                self.profile = None
+
+        with patch.dict(
+            sys.modules,
+            {
+                "langchain_google_genai": SimpleNamespace(
+                    ChatGoogleGenerativeAI=_FakeChatGoogleGenerativeAI
+                )
+            },
+        ):
+            llm_module.LLMHelper.get_llm(
+                provider="google",
+                model="gemini-2.5-flash",
+                disable_thinking=True,
+                api_key="sk-test",
+                base_url=None,
+            )
+
+        self.assertEqual(len(calls), 1)
+        self.assertEqual(calls[0].get("thinking_budget"), 0)
+        self.assertFalse(calls[0].get("include_thoughts"))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_system_llm_test.py b/tests/test_system_llm_test.py
index e185cc34..66c7a7fe 100644
--- a/tests/test_system_llm_test.py
+++ b/tests/test_system_llm_test.py
@@ -133,6 +133,7 @@ class LlmTestEndpointTest(unittest.TestCase):
         llm_test_mock.assert_awaited_once_with(
             provider="deepseek",
             model="deepseek-chat",
+            disable_thinking=False,
             api_key="sk-test",
             base_url="https://api.deepseek.com",
         )
@@ -155,6 +156,7 @@ class LlmTestEndpointTest(unittest.TestCase):
             enabled=True,
             provider="openai",
             model="gpt-4.1-mini",
+            disable_thinking=True,
             api_key="sk-live",
             base_url="https://example.com/v1",
         )
@@ -176,6 +178,7 @@ class LlmTestEndpointTest(unittest.TestCase):
         llm_test_mock.assert_awaited_once_with(
             provider="openai",
             model="gpt-4.1-mini",
+            disable_thinking=True,
             api_key="sk-live",
             base_url="https://example.com/v1",
         )