fix: REPL display, /compact panic, identity leak, DeepSeek reasoning, thinking blocks

Five interrelated fixes from parallel Hephaestus sessions: 1. fix(repl): display assistant text after spinner (#2981, #2982, #2937) - Added final_assistant_text() call after run_turn spinner completes - REPL now shows response text like run_prompt_json does 2. fix(compact): handle Thinking content blocks (#2985) - Added ContentBlock::Thinking variant throughout compact summarizer - Prevents panic when /compact encounters thinking blocks 3. fix(prompt): provider-aware model identity (#2822) - New ModelFamilyIdentity enum (Claude vs Generic) - Non-Anthropic models no longer say 'I am Claude' - model_family_identity_for() detects provider and sets identity 4. fix(openai): preserve DeepSeek reasoning_content (#2821) - Stream parser now captures reasoning_content from OpenAI-compat - Emits ThinkingDelta/SignatureDelta events for reasoning models - Thinking blocks included in conversation history for re-send 5. feat(runtime): Thinking block support across codebase - AssistantEvent::Thinking variant in conversation.rs - ContentBlock::Thinking in session serialization - Thinking-aware compact summarization - Tests for thinking block ordering and content Closes #2981, #2982, #2937, #2985, #2822, #2821
2026-05-15 18:36:45 +00:00 · 2026-05-06 15:32:34 +09:00
parent 553d25ee50
commit 75c08bc982
15 changed files with 1099 additions and 75 deletions
--- a/rust/crates/api/src/lib.rs
+++ b/rust/crates/api/src/lib.rs
@@ -21,11 +21,12 @@ pub use prompt_cache::{
 pub use providers::anthropic::{AnthropicClient, AnthropicClient as ApiClient, AuthSource};
 pub use providers::openai_compat::{
    build_chat_completion_request, flatten_tool_result_content, is_reasoning_model,
-    model_rejects_is_error_field, translate_message, OpenAiCompatClient, OpenAiCompatConfig,
+    model_rejects_is_error_field, model_requires_reasoning_content_in_history, translate_message,
+    OpenAiCompatClient, OpenAiCompatConfig,
 };
 pub use providers::{
    detect_provider_kind, max_tokens_for_model, max_tokens_for_model_with_override,
-    resolve_model_alias, ProviderKind,
+    model_family_identity_for, model_family_identity_for_kind, resolve_model_alias, ProviderKind,
 };
 pub use sse::{parse_frame, SseParser};
 pub use types::{
--- a/rust/crates/api/src/providers/mod.rs
+++ b/rust/crates/api/src/providers/mod.rs
@@ -250,6 +250,19 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
    ProviderKind::Anthropic
 }

+#[must_use]
+pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity {
+    match kind {
+        ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude,
+        ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic,
+    }
+}
+
+#[must_use]
+pub fn model_family_identity_for(model: &str) -> runtime::ModelFamilyIdentity {
+    model_family_identity_for_kind(detect_provider_kind(model))
+}
+
 #[must_use]
 pub fn max_tokens_for_model(model: &str) -> u32 {
    let canonical = resolve_model_alias(model);
@@ -484,8 +497,8 @@ mod tests {
    use super::{
        anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind,
        load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override,
-        model_token_limit, parse_dotenv, preflight_message_request, resolve_model_alias,
-        ProviderKind,
+        model_family_identity_for, model_family_identity_for_kind, model_token_limit, parse_dotenv,
+        preflight_message_request, resolve_model_alias, ProviderKind,
    };

    /// Serializes every test in this module that mutates process-wide
@@ -544,6 +557,42 @@ mod tests {
        );
    }

+    #[test]
+    fn maps_provider_kind_to_model_family_identity() {
+        // given: each supported provider kind
+        let anthropic = ProviderKind::Anthropic;
+        let openai = ProviderKind::OpenAi;
+        let xai = ProviderKind::Xai;
+
+        // when: converting provider kinds to prompt model family identities
+        let anthropic_identity = model_family_identity_for_kind(anthropic);
+        let openai_identity = model_family_identity_for_kind(openai);
+        let xai_identity = model_family_identity_for_kind(xai);
+
+        // then: Anthropic stays Claude and OpenAI-compatible providers are generic
+        assert_eq!(anthropic_identity, runtime::ModelFamilyIdentity::Claude);
+        assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
+        assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
+    }
+
+    #[test]
+    fn maps_model_name_to_model_family_identity() {
+        // given: Anthropic, OpenAI-compatible, and xAI model names
+        let claude_model = "claude-opus-4-6";
+        let openai_model = "openai/gpt-4.1-mini";
+        let xai_model = "grok-3";
+
+        // when: detecting prompt model family identities from model names
+        let claude_identity = model_family_identity_for(claude_model);
+        let openai_identity = model_family_identity_for(openai_model);
+        let xai_identity = model_family_identity_for(xai_model);
+
+        // then: Anthropic stays Claude and OpenAI-compatible providers are generic
+        assert_eq!(claude_identity, runtime::ModelFamilyIdentity::Claude);
+        assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
+        assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
+    }
+
    #[test]
    fn openai_namespaced_model_routes_to_openai_not_anthropic() {
        // Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -443,6 +443,8 @@ struct StreamState {
    stop_reason: Option<String>,
    usage: Option<Usage>,
    tool_calls: BTreeMap<u32, ToolCallState>,
+    thinking_started: bool,
+    thinking_finished: bool,
 }

 impl StreamState {
@@ -456,6 +458,8 @@ impl StreamState {
            stop_reason: None,
            usage: None,
            tool_calls: BTreeMap::new(),
+            thinking_started: false,
+            thinking_finished: false,
        }
    }

@@ -493,35 +497,61 @@ impl StreamState {
        }

        for choice in chunk.choices {
+            if let Some(reasoning) = choice
+                .delta
+                .reasoning_content
+                .filter(|value| !value.is_empty())
+            {
+                if !self.thinking_started {
+                    self.thinking_started = true;
+                    events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent {
+                        index: 0,
+                        content_block: OutputContentBlock::Thinking {
+                            thinking: String::new(),
+                            signature: None,
+                        },
+                    }));
+                }
+                events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
+                    index: 0,
+                    delta: ContentBlockDelta::ThinkingDelta {
+                        thinking: reasoning,
+                    },
+                }));
+            }
+
            if let Some(content) = choice.delta.content.filter(|value| !value.is_empty()) {
+                self.close_thinking(&mut events);
                if !self.text_started {
                    self.text_started = true;
                    events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent {
-                        index: 0,
+                        index: self.text_block_index(),
                        content_block: OutputContentBlock::Text {
                            text: String::new(),
                        },
                    }));
                }
                events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
-                    index: 0,
+                    index: self.text_block_index(),
                    delta: ContentBlockDelta::TextDelta { text: content },
                }));
            }

            for tool_call in choice.delta.tool_calls {
+                self.close_thinking(&mut events);
+                let tool_index_offset = self.tool_index_offset();
                let state = self.tool_calls.entry(tool_call.index).or_default();
                state.apply(tool_call);
-                let block_index = state.block_index();
+                let block_index = state.block_index(tool_index_offset);
                if !state.started {
-                    if let Some(start_event) = state.start_event()? {
+                    if let Some(start_event) = state.start_event(tool_index_offset)? {
                        state.started = true;
                        events.push(StreamEvent::ContentBlockStart(start_event));
                    } else {
                        continue;
                    }
                }
-                if let Some(delta_event) = state.delta_event() {
+                if let Some(delta_event) = state.delta_event(tool_index_offset) {
                    events.push(StreamEvent::ContentBlockDelta(delta_event));
                }
                if choice.finish_reason.as_deref() == Some("tool_calls") && !state.stopped {
@@ -535,11 +565,12 @@ impl StreamState {
            if let Some(finish_reason) = choice.finish_reason {
                self.stop_reason = Some(normalize_finish_reason(&finish_reason));
                if finish_reason == "tool_calls" {
+                    let tool_index_offset = self.tool_index_offset();
                    for state in self.tool_calls.values_mut() {
                        if state.started && !state.stopped {
                            state.stopped = true;
                            events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
-                                index: state.block_index(),
+                                index: state.block_index(tool_index_offset),
                            }));
                        }
                    }
@@ -557,19 +588,21 @@ impl StreamState {
        self.finished = true;

        let mut events = Vec::new();
+        self.close_thinking(&mut events);
        if self.text_started && !self.text_finished {
            self.text_finished = true;
            events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
-                index: 0,
+                index: self.text_block_index(),
            }));
        }

+        let tool_index_offset = self.tool_index_offset();
        for state in self.tool_calls.values_mut() {
            if !state.started {
-                if let Some(start_event) = state.start_event()? {
+                if let Some(start_event) = state.start_event(tool_index_offset)? {
                    state.started = true;
                    events.push(StreamEvent::ContentBlockStart(start_event));
-                    if let Some(delta_event) = state.delta_event() {
+                    if let Some(delta_event) = state.delta_event(tool_index_offset) {
                        events.push(StreamEvent::ContentBlockDelta(delta_event));
                    }
                }
@@ -577,7 +610,7 @@ impl StreamState {
            if state.started && !state.stopped {
                state.stopped = true;
                events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
-                    index: state.block_index(),
+                    index: state.block_index(tool_index_offset),
                }));
            }
        }
@@ -603,6 +636,31 @@ impl StreamState {
        }
        Ok(events)
    }
+
+    fn close_thinking(&mut self, events: &mut Vec<StreamEvent>) {
+        if self.thinking_started && !self.thinking_finished {
+            self.thinking_finished = true;
+            events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
+                index: 0,
+            }));
+        }
+    }
+
+    const fn text_block_index(&self) -> u32 {
+        if self.thinking_started {
+            1
+        } else {
+            0
+        }
+    }
+
+    const fn tool_index_offset(&self) -> u32 {
+        if self.thinking_started {
+            2
+        } else {
+            1
+        }
+    }
 }

 #[derive(Debug, Default)]
@@ -630,12 +688,12 @@ impl ToolCallState {
        }
    }

-    const fn block_index(&self) -> u32 {
-        self.openai_index + 1
+    const fn block_index(&self, offset: u32) -> u32 {
+        self.openai_index + offset
    }

    #[allow(clippy::unnecessary_wraps)]
-    fn start_event(&self) -> Result<Option<ContentBlockStartEvent>, ApiError> {
+    fn start_event(&self, offset: u32) -> Result<Option<ContentBlockStartEvent>, ApiError> {
        let Some(name) = self.name.clone() else {
            return Ok(None);
        };
@@ -644,7 +702,7 @@ impl ToolCallState {
            .clone()
            .unwrap_or_else(|| format!("tool_call_{}", self.openai_index));
        Ok(Some(ContentBlockStartEvent {
-            index: self.block_index(),
+            index: self.block_index(offset),
            content_block: OutputContentBlock::ToolUse {
                id,
                name,
@@ -653,14 +711,14 @@ impl ToolCallState {
        }))
    }

-    fn delta_event(&mut self) -> Option<ContentBlockDeltaEvent> {
+    fn delta_event(&mut self, offset: u32) -> Option<ContentBlockDeltaEvent> {
        if self.emitted_len >= self.arguments.len() {
            return None;
        }
        let delta = self.arguments[self.emitted_len..].to_string();
        self.emitted_len = self.arguments.len();
        Some(ContentBlockDeltaEvent {
-            index: self.block_index(),
+            index: self.block_index(offset),
            delta: ContentBlockDelta::InputJsonDelta {
                partial_json: delta,
            },
@@ -690,6 +748,8 @@ struct ChatMessage {
    #[serde(default)]
    content: Option<String>,
    #[serde(default)]
+    reasoning_content: Option<String>,
+    #[serde(default)]
    tool_calls: Vec<ResponseToolCall>,
 }

@@ -735,6 +795,8 @@ struct ChunkChoice {
 struct ChunkDelta {
    #[serde(default)]
    content: Option<String>,
+    #[serde(default)]
+    reasoning_content: Option<String>,
    #[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
    tool_calls: Vec<DeltaToolCall>,
 }
@@ -793,6 +855,15 @@ pub fn is_reasoning_model(model: &str) -> bool {
        || canonical.contains("thinking")
 }

+/// Returns true for OpenAI-compatible DeepSeek V4 models that require prior
+/// assistant reasoning to be echoed back as `reasoning_content` in history.
+#[must_use]
+pub fn model_requires_reasoning_content_in_history(model: &str) -> bool {
+    let lowered = model.to_ascii_lowercase();
+    let canonical = lowered.rsplit('/').next().unwrap_or(lowered.as_str());
+    canonical.starts_with("deepseek-v4")
+}
+
 /// Strip routing prefix (e.g., "openai/gpt-4" → "gpt-4") for the wire.
 /// The prefix is used only to select transport; the backend expects the
 /// bare model id.
@@ -948,10 +1019,14 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
    match message.role.as_str() {
        "assistant" => {
            let mut text = String::new();
+            let mut reasoning = String::new();
            let mut tool_calls = Vec::new();
            for block in &message.content {
                match block {
                    InputContentBlock::Text { text: value } => text.push_str(value),
+                    InputContentBlock::Thinking {
+                        thinking: value, ..
+                    } => reasoning.push_str(value),
                    InputContentBlock::ToolUse { id, name, input } => tool_calls.push(json!({
                        "id": id,
                        "type": "function",
@@ -963,13 +1038,18 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
                    InputContentBlock::ToolResult { .. } => {}
                }
            }
-            if text.is_empty() && tool_calls.is_empty() {
+            let include_reasoning =
+                model_requires_reasoning_content_in_history(model) && !reasoning.is_empty();
+            if text.is_empty() && tool_calls.is_empty() && !include_reasoning {
                Vec::new()
            } else {
                let mut msg = serde_json::json!({
                    "role": "assistant",
                    "content": (!text.is_empty()).then_some(text),
                });
+                if include_reasoning {
+                    msg["reasoning_content"] = json!(reasoning);
+                }
                // Only include tool_calls when non-empty: some providers reject
                // assistant messages with an explicit empty tool_calls array.
                if !tool_calls.is_empty() {
@@ -1003,6 +1083,7 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
                    }
                    Some(msg)
                }
+                InputContentBlock::Thinking { .. } => None,
                InputContentBlock::ToolUse { .. } => None,
            })
            .collect(),
@@ -1182,6 +1263,16 @@ fn normalize_response(
            "chat completion response missing choices",
        ))?;
    let mut content = Vec::new();
+    if let Some(thinking) = choice
+        .message
+        .reasoning_content
+        .filter(|value| !value.is_empty())
+    {
+        content.push(OutputContentBlock::Thinking {
+            thinking,
+            signature: None,
+        });
+    }
    if let Some(text) = choice.message.content.filter(|value| !value.is_empty()) {
        content.push(OutputContentBlock::Text { text });
    }
@@ -1413,13 +1504,15 @@ impl StringExt for String {
 mod tests {
    use super::{
        build_chat_completion_request, chat_completions_endpoint, is_reasoning_model,
-        normalize_finish_reason, openai_tool_choice, parse_tool_arguments, OpenAiCompatClient,
-        OpenAiCompatConfig,
+        model_requires_reasoning_content_in_history, normalize_finish_reason, normalize_response,
+        openai_tool_choice, parse_tool_arguments, OpenAiCompatClient, OpenAiCompatConfig,
+        StreamState,
    };
    use crate::error::ApiError;
    use crate::types::{
-        InputContentBlock, InputMessage, MessageRequest, ToolChoice, ToolDefinition,
-        ToolResultContentBlock,
+        ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent,
+        InputContentBlock, InputMessage, MessageRequest, OutputContentBlock, StreamEvent,
+        ToolChoice, ToolDefinition, ToolResultContentBlock,
    };
    use serde_json::json;
    use std::sync::{Mutex, OnceLock};
@@ -1465,6 +1558,188 @@ mod tests {
        assert_eq!(payload["tool_choice"], json!("auto"));
    }

+    #[test]
+    fn model_requires_reasoning_content_in_history_detects_deepseek_v4_models() {
+        // Given DeepSeek V4 and non-V4 model names.
+        let positive = [
+            "deepseek-v4-flash",
+            "deepseek-v4-pro",
+            "openai/deepseek-v4-pro",
+            "deepseek/deepseek-v4-flash",
+        ];
+        let negative = [
+            "deepseek-reasoner",
+            "deepseek-chat",
+            "gpt-4o",
+            "claude-sonnet-4-6",
+        ];
+
+        // When checking whether history reasoning_content is required.
+        // Then only DeepSeek V4 variants require it.
+        for model in positive {
+            assert!(model_requires_reasoning_content_in_history(model));
+        }
+        for model in negative {
+            assert!(!model_requires_reasoning_content_in_history(model));
+        }
+    }
+
+    #[test]
+    fn legacy_deepseek_reasoner_request_omits_reasoning_content_for_assistant_history() {
+        // Given an assistant history turn containing thinking.
+        let request = assistant_history_with_thinking_request("deepseek-reasoner");
+
+        // When serializing for legacy deepseek-reasoner.
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+
+        // Then reasoning_content is omitted.
+        let assistant = &payload["messages"][0];
+        assert_eq!(assistant["role"], json!("assistant"));
+        assert!(assistant.get("reasoning_content").is_none());
+    }
+
+    #[test]
+    fn deepseek_v4_pro_request_includes_reasoning_content_for_assistant_history() {
+        // Given an assistant history turn containing thinking.
+        let request = assistant_history_with_thinking_request("openai/deepseek-v4-pro");
+
+        // When serializing for DeepSeek V4 Pro.
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+
+        // Then reasoning_content is included on the assistant message.
+        let assistant = &payload["messages"][0];
+        assert_eq!(assistant["reasoning_content"], json!("prior reasoning"));
+        assert_eq!(assistant["content"], json!("answer"));
+    }
+
+    #[test]
+    fn deepseek_v4_flash_request_includes_reasoning_content_for_assistant_history() {
+        // Given an assistant history turn containing thinking.
+        let request = assistant_history_with_thinking_request("deepseek-v4-flash");
+
+        // When serializing for DeepSeek V4 Flash.
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+
+        // Then reasoning_content is included on the assistant message.
+        let assistant = &payload["messages"][0];
+        assert_eq!(assistant["reasoning_content"], json!("prior reasoning"));
+    }
+
+    #[test]
+    fn non_streaming_response_with_reasoning_content_emits_thinking_block_first() {
+        // Given a non-streaming OpenAI-compatible response with reasoning_content.
+        let response = super::ChatCompletionResponse {
+            id: "chatcmpl_reasoning".to_string(),
+            model: "deepseek-v4-pro".to_string(),
+            choices: vec![super::ChatChoice {
+                message: super::ChatMessage {
+                    role: "assistant".to_string(),
+                    content: Some("final answer".to_string()),
+                    reasoning_content: Some("hidden thought".to_string()),
+                    tool_calls: Vec::new(),
+                },
+                finish_reason: Some("stop".to_string()),
+            }],
+            usage: None,
+        };
+
+        // When normalizing the provider response.
+        let normalized = normalize_response("deepseek-v4-pro", response).expect("normalized");
+
+        // Then Thinking is the first content block, before text.
+        assert_eq!(
+            normalized.content,
+            vec![
+                OutputContentBlock::Thinking {
+                    thinking: "hidden thought".to_string(),
+                    signature: None,
+                },
+                OutputContentBlock::Text {
+                    text: "final answer".to_string(),
+                },
+            ]
+        );
+    }
+
+    #[test]
+    fn streaming_chunks_with_reasoning_content_emit_thinking_block_events_before_text() {
+        // Given streaming chunks with reasoning_content followed by text.
+        let mut state = StreamState::new("deepseek-v4-pro".to_string());
+        let mut events = state
+            .ingest_chunk(super::ChatCompletionChunk {
+                id: "chatcmpl_stream_reasoning".to_string(),
+                model: Some("deepseek-v4-pro".to_string()),
+                choices: vec![super::ChunkChoice {
+                    delta: super::ChunkDelta {
+                        content: None,
+                        reasoning_content: Some("think".to_string()),
+                        tool_calls: Vec::new(),
+                    },
+                    finish_reason: None,
+                }],
+                usage: None,
+            })
+            .expect("reasoning chunk");
+        events.extend(
+            state
+                .ingest_chunk(super::ChatCompletionChunk {
+                    id: "chatcmpl_stream_reasoning".to_string(),
+                    model: None,
+                    choices: vec![super::ChunkChoice {
+                        delta: super::ChunkDelta {
+                            content: Some(" answer".to_string()),
+                            reasoning_content: None,
+                            tool_calls: Vec::new(),
+                        },
+                        finish_reason: Some("stop".to_string()),
+                    }],
+                    usage: None,
+                })
+                .expect("text chunk"),
+        );
+        events.extend(state.finish().expect("finish"));
+
+        // When reading normalized stream events.
+        // Then Thinking starts at index 0, text is offset to index 1.
+        assert!(matches!(events[0], StreamEvent::MessageStart(_)));
+        assert!(matches!(
+            events[1],
+            StreamEvent::ContentBlockStart(ContentBlockStartEvent {
+                index: 0,
+                content_block: OutputContentBlock::Thinking { .. },
+            })
+        ));
+        assert!(matches!(
+            events[2],
+            StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
+                index: 0,
+                delta: ContentBlockDelta::ThinkingDelta { .. },
+            })
+        ));
+        assert!(matches!(
+            events[3],
+            StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 })
+        ));
+        assert!(matches!(
+            events[4],
+            StreamEvent::ContentBlockStart(ContentBlockStartEvent {
+                index: 1,
+                content_block: OutputContentBlock::Text { .. },
+            })
+        ));
+        assert!(matches!(
+            events[5],
+            StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
+                index: 1,
+                delta: ContentBlockDelta::TextDelta { .. },
+            })
+        ));
+        assert!(matches!(
+            events[6],
+            StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 1 })
+        ));
+    }
+
    #[test]
    fn tool_schema_object_gets_strict_fields_for_responses_endpoint() {
        // OpenAI /responses endpoint rejects object schemas missing
@@ -1624,6 +1899,27 @@ mod tests {
        );
    }

+    fn assistant_history_with_thinking_request(model: &str) -> MessageRequest {
+        MessageRequest {
+            model: model.to_string(),
+            max_tokens: 100,
+            messages: vec![InputMessage {
+                role: "assistant".to_string(),
+                content: vec![
+                    InputContentBlock::Thinking {
+                        thinking: "prior reasoning".to_string(),
+                        signature: None,
+                    },
+                    InputContentBlock::Text {
+                        text: "answer".to_string(),
+                    },
+                ],
+            }],
+            stream: false,
+            ..Default::default()
+        }
+    }
+
    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
        LOCK.get_or_init(|| Mutex::new(()))
--- a/rust/crates/api/src/types.rs
+++ b/rust/crates/api/src/types.rs
@@ -81,6 +81,11 @@ pub enum InputContentBlock {
    Text {
        text: String,
    },
+    Thinking {
+        thinking: String,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        signature: Option<String>,
+    },
    ToolUse {
        id: String,
        name: String,
@@ -268,8 +273,9 @@ pub enum StreamEvent {
 #[cfg(test)]
 mod tests {
    use runtime::format_usd;
+    use serde_json::json;

-    use super::{MessageResponse, Usage};
+    use super::{InputContentBlock, MessageResponse, Usage};

    #[test]
    fn usage_total_tokens_includes_cache_tokens() {
@@ -307,4 +313,33 @@ mod tests {
        assert_eq!(format_usd(cost.total_cost_usd()), "$54.6750");
        assert_eq!(response.total_tokens(), 1_800_000);
    }
+
+    #[test]
+    fn input_content_block_thinking_serializes_with_snake_case_type() {
+        // given
+        let block = InputContentBlock::Thinking {
+            thinking: "pondering".to_string(),
+            signature: Some("sig_123".to_string()),
+        };
+
+        // when
+        let serialized = serde_json::to_value(&block).unwrap();
+        let deserialized: InputContentBlock = serde_json::from_value(json!({
+            "type": "thinking",
+            "thinking": "pondering",
+            "signature": "sig_123"
+        }))
+        .unwrap();
+
+        // then
+        assert_eq!(
+            serialized,
+            json!({
+                "type": "thinking",
+                "thinking": "pondering",
+                "signature": "sig_123"
+            })
+        );
+        assert_eq!(deserialized, block);
+    }
 }