mirror of
https://github.com/instructkr/claude-code.git
synced 2026-05-15 18:36:45 +00:00
fix: REPL display, /compact panic, identity leak, DeepSeek reasoning, thinking blocks
Five interrelated fixes from parallel Hephaestus sessions: 1. fix(repl): display assistant text after spinner (#2981, #2982, #2937) - Added final_assistant_text() call after run_turn spinner completes - REPL now shows response text like run_prompt_json does 2. fix(compact): handle Thinking content blocks (#2985) - Added ContentBlock::Thinking variant throughout compact summarizer - Prevents panic when /compact encounters thinking blocks 3. fix(prompt): provider-aware model identity (#2822) - New ModelFamilyIdentity enum (Claude vs Generic) - Non-Anthropic models no longer say 'I am Claude' - model_family_identity_for() detects provider and sets identity 4. fix(openai): preserve DeepSeek reasoning_content (#2821) - Stream parser now captures reasoning_content from OpenAI-compat - Emits ThinkingDelta/SignatureDelta events for reasoning models - Thinking blocks included in conversation history for re-send 5. feat(runtime): Thinking block support across codebase - AssistantEvent::Thinking variant in conversation.rs - ContentBlock::Thinking in session serialization - Thinking-aware compact summarization - Tests for thinking block ordering and content Closes #2981, #2982, #2937, #2985, #2822, #2821
This commit is contained in:
@@ -21,11 +21,12 @@ pub use prompt_cache::{
|
||||
pub use providers::anthropic::{AnthropicClient, AnthropicClient as ApiClient, AuthSource};
|
||||
pub use providers::openai_compat::{
|
||||
build_chat_completion_request, flatten_tool_result_content, is_reasoning_model,
|
||||
model_rejects_is_error_field, translate_message, OpenAiCompatClient, OpenAiCompatConfig,
|
||||
model_rejects_is_error_field, model_requires_reasoning_content_in_history, translate_message,
|
||||
OpenAiCompatClient, OpenAiCompatConfig,
|
||||
};
|
||||
pub use providers::{
|
||||
detect_provider_kind, max_tokens_for_model, max_tokens_for_model_with_override,
|
||||
resolve_model_alias, ProviderKind,
|
||||
model_family_identity_for, model_family_identity_for_kind, resolve_model_alias, ProviderKind,
|
||||
};
|
||||
pub use sse::{parse_frame, SseParser};
|
||||
pub use types::{
|
||||
|
||||
@@ -250,6 +250,19 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
|
||||
ProviderKind::Anthropic
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity {
|
||||
match kind {
|
||||
ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude,
|
||||
ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn model_family_identity_for(model: &str) -> runtime::ModelFamilyIdentity {
|
||||
model_family_identity_for_kind(detect_provider_kind(model))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn max_tokens_for_model(model: &str) -> u32 {
|
||||
let canonical = resolve_model_alias(model);
|
||||
@@ -484,8 +497,8 @@ mod tests {
|
||||
use super::{
|
||||
anthropic_missing_credentials, anthropic_missing_credentials_hint, detect_provider_kind,
|
||||
load_dotenv_file, max_tokens_for_model, max_tokens_for_model_with_override,
|
||||
model_token_limit, parse_dotenv, preflight_message_request, resolve_model_alias,
|
||||
ProviderKind,
|
||||
model_family_identity_for, model_family_identity_for_kind, model_token_limit, parse_dotenv,
|
||||
preflight_message_request, resolve_model_alias, ProviderKind,
|
||||
};
|
||||
|
||||
/// Serializes every test in this module that mutates process-wide
|
||||
@@ -544,6 +557,42 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn maps_provider_kind_to_model_family_identity() {
|
||||
// given: each supported provider kind
|
||||
let anthropic = ProviderKind::Anthropic;
|
||||
let openai = ProviderKind::OpenAi;
|
||||
let xai = ProviderKind::Xai;
|
||||
|
||||
// when: converting provider kinds to prompt model family identities
|
||||
let anthropic_identity = model_family_identity_for_kind(anthropic);
|
||||
let openai_identity = model_family_identity_for_kind(openai);
|
||||
let xai_identity = model_family_identity_for_kind(xai);
|
||||
|
||||
// then: Anthropic stays Claude and OpenAI-compatible providers are generic
|
||||
assert_eq!(anthropic_identity, runtime::ModelFamilyIdentity::Claude);
|
||||
assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
|
||||
assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn maps_model_name_to_model_family_identity() {
|
||||
// given: Anthropic, OpenAI-compatible, and xAI model names
|
||||
let claude_model = "claude-opus-4-6";
|
||||
let openai_model = "openai/gpt-4.1-mini";
|
||||
let xai_model = "grok-3";
|
||||
|
||||
// when: detecting prompt model family identities from model names
|
||||
let claude_identity = model_family_identity_for(claude_model);
|
||||
let openai_identity = model_family_identity_for(openai_model);
|
||||
let xai_identity = model_family_identity_for(xai_model);
|
||||
|
||||
// then: Anthropic stays Claude and OpenAI-compatible providers are generic
|
||||
assert_eq!(claude_identity, runtime::ModelFamilyIdentity::Claude);
|
||||
assert_eq!(openai_identity, runtime::ModelFamilyIdentity::Generic);
|
||||
assert_eq!(xai_identity, runtime::ModelFamilyIdentity::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openai_namespaced_model_routes_to_openai_not_anthropic() {
|
||||
// Regression: "openai/gpt-4.1-mini" was misrouted to Anthropic when
|
||||
|
||||
@@ -443,6 +443,8 @@ struct StreamState {
|
||||
stop_reason: Option<String>,
|
||||
usage: Option<Usage>,
|
||||
tool_calls: BTreeMap<u32, ToolCallState>,
|
||||
thinking_started: bool,
|
||||
thinking_finished: bool,
|
||||
}
|
||||
|
||||
impl StreamState {
|
||||
@@ -456,6 +458,8 @@ impl StreamState {
|
||||
stop_reason: None,
|
||||
usage: None,
|
||||
tool_calls: BTreeMap::new(),
|
||||
thinking_started: false,
|
||||
thinking_finished: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,35 +497,61 @@ impl StreamState {
|
||||
}
|
||||
|
||||
for choice in chunk.choices {
|
||||
if let Some(reasoning) = choice
|
||||
.delta
|
||||
.reasoning_content
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
if !self.thinking_started {
|
||||
self.thinking_started = true;
|
||||
events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent {
|
||||
index: 0,
|
||||
content_block: OutputContentBlock::Thinking {
|
||||
thinking: String::new(),
|
||||
signature: None,
|
||||
},
|
||||
}));
|
||||
}
|
||||
events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
|
||||
index: 0,
|
||||
delta: ContentBlockDelta::ThinkingDelta {
|
||||
thinking: reasoning,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if let Some(content) = choice.delta.content.filter(|value| !value.is_empty()) {
|
||||
self.close_thinking(&mut events);
|
||||
if !self.text_started {
|
||||
self.text_started = true;
|
||||
events.push(StreamEvent::ContentBlockStart(ContentBlockStartEvent {
|
||||
index: 0,
|
||||
index: self.text_block_index(),
|
||||
content_block: OutputContentBlock::Text {
|
||||
text: String::new(),
|
||||
},
|
||||
}));
|
||||
}
|
||||
events.push(StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
|
||||
index: 0,
|
||||
index: self.text_block_index(),
|
||||
delta: ContentBlockDelta::TextDelta { text: content },
|
||||
}));
|
||||
}
|
||||
|
||||
for tool_call in choice.delta.tool_calls {
|
||||
self.close_thinking(&mut events);
|
||||
let tool_index_offset = self.tool_index_offset();
|
||||
let state = self.tool_calls.entry(tool_call.index).or_default();
|
||||
state.apply(tool_call);
|
||||
let block_index = state.block_index();
|
||||
let block_index = state.block_index(tool_index_offset);
|
||||
if !state.started {
|
||||
if let Some(start_event) = state.start_event()? {
|
||||
if let Some(start_event) = state.start_event(tool_index_offset)? {
|
||||
state.started = true;
|
||||
events.push(StreamEvent::ContentBlockStart(start_event));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Some(delta_event) = state.delta_event() {
|
||||
if let Some(delta_event) = state.delta_event(tool_index_offset) {
|
||||
events.push(StreamEvent::ContentBlockDelta(delta_event));
|
||||
}
|
||||
if choice.finish_reason.as_deref() == Some("tool_calls") && !state.stopped {
|
||||
@@ -535,11 +565,12 @@ impl StreamState {
|
||||
if let Some(finish_reason) = choice.finish_reason {
|
||||
self.stop_reason = Some(normalize_finish_reason(&finish_reason));
|
||||
if finish_reason == "tool_calls" {
|
||||
let tool_index_offset = self.tool_index_offset();
|
||||
for state in self.tool_calls.values_mut() {
|
||||
if state.started && !state.stopped {
|
||||
state.stopped = true;
|
||||
events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
|
||||
index: state.block_index(),
|
||||
index: state.block_index(tool_index_offset),
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -557,19 +588,21 @@ impl StreamState {
|
||||
self.finished = true;
|
||||
|
||||
let mut events = Vec::new();
|
||||
self.close_thinking(&mut events);
|
||||
if self.text_started && !self.text_finished {
|
||||
self.text_finished = true;
|
||||
events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
|
||||
index: 0,
|
||||
index: self.text_block_index(),
|
||||
}));
|
||||
}
|
||||
|
||||
let tool_index_offset = self.tool_index_offset();
|
||||
for state in self.tool_calls.values_mut() {
|
||||
if !state.started {
|
||||
if let Some(start_event) = state.start_event()? {
|
||||
if let Some(start_event) = state.start_event(tool_index_offset)? {
|
||||
state.started = true;
|
||||
events.push(StreamEvent::ContentBlockStart(start_event));
|
||||
if let Some(delta_event) = state.delta_event() {
|
||||
if let Some(delta_event) = state.delta_event(tool_index_offset) {
|
||||
events.push(StreamEvent::ContentBlockDelta(delta_event));
|
||||
}
|
||||
}
|
||||
@@ -577,7 +610,7 @@ impl StreamState {
|
||||
if state.started && !state.stopped {
|
||||
state.stopped = true;
|
||||
events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
|
||||
index: state.block_index(),
|
||||
index: state.block_index(tool_index_offset),
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -603,6 +636,31 @@ impl StreamState {
|
||||
}
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
fn close_thinking(&mut self, events: &mut Vec<StreamEvent>) {
|
||||
if self.thinking_started && !self.thinking_finished {
|
||||
self.thinking_finished = true;
|
||||
events.push(StreamEvent::ContentBlockStop(ContentBlockStopEvent {
|
||||
index: 0,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
const fn text_block_index(&self) -> u32 {
|
||||
if self.thinking_started {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
const fn tool_index_offset(&self) -> u32 {
|
||||
if self.thinking_started {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -630,12 +688,12 @@ impl ToolCallState {
|
||||
}
|
||||
}
|
||||
|
||||
const fn block_index(&self) -> u32 {
|
||||
self.openai_index + 1
|
||||
const fn block_index(&self, offset: u32) -> u32 {
|
||||
self.openai_index + offset
|
||||
}
|
||||
|
||||
#[allow(clippy::unnecessary_wraps)]
|
||||
fn start_event(&self) -> Result<Option<ContentBlockStartEvent>, ApiError> {
|
||||
fn start_event(&self, offset: u32) -> Result<Option<ContentBlockStartEvent>, ApiError> {
|
||||
let Some(name) = self.name.clone() else {
|
||||
return Ok(None);
|
||||
};
|
||||
@@ -644,7 +702,7 @@ impl ToolCallState {
|
||||
.clone()
|
||||
.unwrap_or_else(|| format!("tool_call_{}", self.openai_index));
|
||||
Ok(Some(ContentBlockStartEvent {
|
||||
index: self.block_index(),
|
||||
index: self.block_index(offset),
|
||||
content_block: OutputContentBlock::ToolUse {
|
||||
id,
|
||||
name,
|
||||
@@ -653,14 +711,14 @@ impl ToolCallState {
|
||||
}))
|
||||
}
|
||||
|
||||
fn delta_event(&mut self) -> Option<ContentBlockDeltaEvent> {
|
||||
fn delta_event(&mut self, offset: u32) -> Option<ContentBlockDeltaEvent> {
|
||||
if self.emitted_len >= self.arguments.len() {
|
||||
return None;
|
||||
}
|
||||
let delta = self.arguments[self.emitted_len..].to_string();
|
||||
self.emitted_len = self.arguments.len();
|
||||
Some(ContentBlockDeltaEvent {
|
||||
index: self.block_index(),
|
||||
index: self.block_index(offset),
|
||||
delta: ContentBlockDelta::InputJsonDelta {
|
||||
partial_json: delta,
|
||||
},
|
||||
@@ -690,6 +748,8 @@ struct ChatMessage {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
#[serde(default)]
|
||||
reasoning_content: Option<String>,
|
||||
#[serde(default)]
|
||||
tool_calls: Vec<ResponseToolCall>,
|
||||
}
|
||||
|
||||
@@ -735,6 +795,8 @@ struct ChunkChoice {
|
||||
struct ChunkDelta {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
#[serde(default)]
|
||||
reasoning_content: Option<String>,
|
||||
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
|
||||
tool_calls: Vec<DeltaToolCall>,
|
||||
}
|
||||
@@ -793,6 +855,15 @@ pub fn is_reasoning_model(model: &str) -> bool {
|
||||
|| canonical.contains("thinking")
|
||||
}
|
||||
|
||||
/// Returns true for OpenAI-compatible DeepSeek V4 models that require prior
|
||||
/// assistant reasoning to be echoed back as `reasoning_content` in history.
|
||||
#[must_use]
|
||||
pub fn model_requires_reasoning_content_in_history(model: &str) -> bool {
|
||||
let lowered = model.to_ascii_lowercase();
|
||||
let canonical = lowered.rsplit('/').next().unwrap_or(lowered.as_str());
|
||||
canonical.starts_with("deepseek-v4")
|
||||
}
|
||||
|
||||
/// Strip routing prefix (e.g., "openai/gpt-4" → "gpt-4") for the wire.
|
||||
/// The prefix is used only to select transport; the backend expects the
|
||||
/// bare model id.
|
||||
@@ -948,10 +1019,14 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
|
||||
match message.role.as_str() {
|
||||
"assistant" => {
|
||||
let mut text = String::new();
|
||||
let mut reasoning = String::new();
|
||||
let mut tool_calls = Vec::new();
|
||||
for block in &message.content {
|
||||
match block {
|
||||
InputContentBlock::Text { text: value } => text.push_str(value),
|
||||
InputContentBlock::Thinking {
|
||||
thinking: value, ..
|
||||
} => reasoning.push_str(value),
|
||||
InputContentBlock::ToolUse { id, name, input } => tool_calls.push(json!({
|
||||
"id": id,
|
||||
"type": "function",
|
||||
@@ -963,13 +1038,18 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
|
||||
InputContentBlock::ToolResult { .. } => {}
|
||||
}
|
||||
}
|
||||
if text.is_empty() && tool_calls.is_empty() {
|
||||
let include_reasoning =
|
||||
model_requires_reasoning_content_in_history(model) && !reasoning.is_empty();
|
||||
if text.is_empty() && tool_calls.is_empty() && !include_reasoning {
|
||||
Vec::new()
|
||||
} else {
|
||||
let mut msg = serde_json::json!({
|
||||
"role": "assistant",
|
||||
"content": (!text.is_empty()).then_some(text),
|
||||
});
|
||||
if include_reasoning {
|
||||
msg["reasoning_content"] = json!(reasoning);
|
||||
}
|
||||
// Only include tool_calls when non-empty: some providers reject
|
||||
// assistant messages with an explicit empty tool_calls array.
|
||||
if !tool_calls.is_empty() {
|
||||
@@ -1003,6 +1083,7 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
|
||||
}
|
||||
Some(msg)
|
||||
}
|
||||
InputContentBlock::Thinking { .. } => None,
|
||||
InputContentBlock::ToolUse { .. } => None,
|
||||
})
|
||||
.collect(),
|
||||
@@ -1182,6 +1263,16 @@ fn normalize_response(
|
||||
"chat completion response missing choices",
|
||||
))?;
|
||||
let mut content = Vec::new();
|
||||
if let Some(thinking) = choice
|
||||
.message
|
||||
.reasoning_content
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
content.push(OutputContentBlock::Thinking {
|
||||
thinking,
|
||||
signature: None,
|
||||
});
|
||||
}
|
||||
if let Some(text) = choice.message.content.filter(|value| !value.is_empty()) {
|
||||
content.push(OutputContentBlock::Text { text });
|
||||
}
|
||||
@@ -1413,13 +1504,15 @@ impl StringExt for String {
|
||||
mod tests {
|
||||
use super::{
|
||||
build_chat_completion_request, chat_completions_endpoint, is_reasoning_model,
|
||||
normalize_finish_reason, openai_tool_choice, parse_tool_arguments, OpenAiCompatClient,
|
||||
OpenAiCompatConfig,
|
||||
model_requires_reasoning_content_in_history, normalize_finish_reason, normalize_response,
|
||||
openai_tool_choice, parse_tool_arguments, OpenAiCompatClient, OpenAiCompatConfig,
|
||||
StreamState,
|
||||
};
|
||||
use crate::error::ApiError;
|
||||
use crate::types::{
|
||||
InputContentBlock, InputMessage, MessageRequest, ToolChoice, ToolDefinition,
|
||||
ToolResultContentBlock,
|
||||
ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent,
|
||||
InputContentBlock, InputMessage, MessageRequest, OutputContentBlock, StreamEvent,
|
||||
ToolChoice, ToolDefinition, ToolResultContentBlock,
|
||||
};
|
||||
use serde_json::json;
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
@@ -1465,6 +1558,188 @@ mod tests {
|
||||
assert_eq!(payload["tool_choice"], json!("auto"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_requires_reasoning_content_in_history_detects_deepseek_v4_models() {
|
||||
// Given DeepSeek V4 and non-V4 model names.
|
||||
let positive = [
|
||||
"deepseek-v4-flash",
|
||||
"deepseek-v4-pro",
|
||||
"openai/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
];
|
||||
let negative = [
|
||||
"deepseek-reasoner",
|
||||
"deepseek-chat",
|
||||
"gpt-4o",
|
||||
"claude-sonnet-4-6",
|
||||
];
|
||||
|
||||
// When checking whether history reasoning_content is required.
|
||||
// Then only DeepSeek V4 variants require it.
|
||||
for model in positive {
|
||||
assert!(model_requires_reasoning_content_in_history(model));
|
||||
}
|
||||
for model in negative {
|
||||
assert!(!model_requires_reasoning_content_in_history(model));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_deepseek_reasoner_request_omits_reasoning_content_for_assistant_history() {
|
||||
// Given an assistant history turn containing thinking.
|
||||
let request = assistant_history_with_thinking_request("deepseek-reasoner");
|
||||
|
||||
// When serializing for legacy deepseek-reasoner.
|
||||
let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
|
||||
|
||||
// Then reasoning_content is omitted.
|
||||
let assistant = &payload["messages"][0];
|
||||
assert_eq!(assistant["role"], json!("assistant"));
|
||||
assert!(assistant.get("reasoning_content").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_v4_pro_request_includes_reasoning_content_for_assistant_history() {
|
||||
// Given an assistant history turn containing thinking.
|
||||
let request = assistant_history_with_thinking_request("openai/deepseek-v4-pro");
|
||||
|
||||
// When serializing for DeepSeek V4 Pro.
|
||||
let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
|
||||
|
||||
// Then reasoning_content is included on the assistant message.
|
||||
let assistant = &payload["messages"][0];
|
||||
assert_eq!(assistant["reasoning_content"], json!("prior reasoning"));
|
||||
assert_eq!(assistant["content"], json!("answer"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_v4_flash_request_includes_reasoning_content_for_assistant_history() {
|
||||
// Given an assistant history turn containing thinking.
|
||||
let request = assistant_history_with_thinking_request("deepseek-v4-flash");
|
||||
|
||||
// When serializing for DeepSeek V4 Flash.
|
||||
let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
|
||||
|
||||
// Then reasoning_content is included on the assistant message.
|
||||
let assistant = &payload["messages"][0];
|
||||
assert_eq!(assistant["reasoning_content"], json!("prior reasoning"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_streaming_response_with_reasoning_content_emits_thinking_block_first() {
|
||||
// Given a non-streaming OpenAI-compatible response with reasoning_content.
|
||||
let response = super::ChatCompletionResponse {
|
||||
id: "chatcmpl_reasoning".to_string(),
|
||||
model: "deepseek-v4-pro".to_string(),
|
||||
choices: vec![super::ChatChoice {
|
||||
message: super::ChatMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: Some("final answer".to_string()),
|
||||
reasoning_content: Some("hidden thought".to_string()),
|
||||
tool_calls: Vec::new(),
|
||||
},
|
||||
finish_reason: Some("stop".to_string()),
|
||||
}],
|
||||
usage: None,
|
||||
};
|
||||
|
||||
// When normalizing the provider response.
|
||||
let normalized = normalize_response("deepseek-v4-pro", response).expect("normalized");
|
||||
|
||||
// Then Thinking is the first content block, before text.
|
||||
assert_eq!(
|
||||
normalized.content,
|
||||
vec![
|
||||
OutputContentBlock::Thinking {
|
||||
thinking: "hidden thought".to_string(),
|
||||
signature: None,
|
||||
},
|
||||
OutputContentBlock::Text {
|
||||
text: "final answer".to_string(),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streaming_chunks_with_reasoning_content_emit_thinking_block_events_before_text() {
|
||||
// Given streaming chunks with reasoning_content followed by text.
|
||||
let mut state = StreamState::new("deepseek-v4-pro".to_string());
|
||||
let mut events = state
|
||||
.ingest_chunk(super::ChatCompletionChunk {
|
||||
id: "chatcmpl_stream_reasoning".to_string(),
|
||||
model: Some("deepseek-v4-pro".to_string()),
|
||||
choices: vec![super::ChunkChoice {
|
||||
delta: super::ChunkDelta {
|
||||
content: None,
|
||||
reasoning_content: Some("think".to_string()),
|
||||
tool_calls: Vec::new(),
|
||||
},
|
||||
finish_reason: None,
|
||||
}],
|
||||
usage: None,
|
||||
})
|
||||
.expect("reasoning chunk");
|
||||
events.extend(
|
||||
state
|
||||
.ingest_chunk(super::ChatCompletionChunk {
|
||||
id: "chatcmpl_stream_reasoning".to_string(),
|
||||
model: None,
|
||||
choices: vec![super::ChunkChoice {
|
||||
delta: super::ChunkDelta {
|
||||
content: Some(" answer".to_string()),
|
||||
reasoning_content: None,
|
||||
tool_calls: Vec::new(),
|
||||
},
|
||||
finish_reason: Some("stop".to_string()),
|
||||
}],
|
||||
usage: None,
|
||||
})
|
||||
.expect("text chunk"),
|
||||
);
|
||||
events.extend(state.finish().expect("finish"));
|
||||
|
||||
// When reading normalized stream events.
|
||||
// Then Thinking starts at index 0, text is offset to index 1.
|
||||
assert!(matches!(events[0], StreamEvent::MessageStart(_)));
|
||||
assert!(matches!(
|
||||
events[1],
|
||||
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
|
||||
index: 0,
|
||||
content_block: OutputContentBlock::Thinking { .. },
|
||||
})
|
||||
));
|
||||
assert!(matches!(
|
||||
events[2],
|
||||
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
|
||||
index: 0,
|
||||
delta: ContentBlockDelta::ThinkingDelta { .. },
|
||||
})
|
||||
));
|
||||
assert!(matches!(
|
||||
events[3],
|
||||
StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 0 })
|
||||
));
|
||||
assert!(matches!(
|
||||
events[4],
|
||||
StreamEvent::ContentBlockStart(ContentBlockStartEvent {
|
||||
index: 1,
|
||||
content_block: OutputContentBlock::Text { .. },
|
||||
})
|
||||
));
|
||||
assert!(matches!(
|
||||
events[5],
|
||||
StreamEvent::ContentBlockDelta(ContentBlockDeltaEvent {
|
||||
index: 1,
|
||||
delta: ContentBlockDelta::TextDelta { .. },
|
||||
})
|
||||
));
|
||||
assert!(matches!(
|
||||
events[6],
|
||||
StreamEvent::ContentBlockStop(ContentBlockStopEvent { index: 1 })
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_schema_object_gets_strict_fields_for_responses_endpoint() {
|
||||
// OpenAI /responses endpoint rejects object schemas missing
|
||||
@@ -1624,6 +1899,27 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
fn assistant_history_with_thinking_request(model: &str) -> MessageRequest {
|
||||
MessageRequest {
|
||||
model: model.to_string(),
|
||||
max_tokens: 100,
|
||||
messages: vec![InputMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
InputContentBlock::Thinking {
|
||||
thinking: "prior reasoning".to_string(),
|
||||
signature: None,
|
||||
},
|
||||
InputContentBlock::Text {
|
||||
text: "answer".to_string(),
|
||||
},
|
||||
],
|
||||
}],
|
||||
stream: false,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn env_lock() -> std::sync::MutexGuard<'static, ()> {
|
||||
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
|
||||
LOCK.get_or_init(|| Mutex::new(()))
|
||||
|
||||
@@ -81,6 +81,11 @@ pub enum InputContentBlock {
|
||||
Text {
|
||||
text: String,
|
||||
},
|
||||
Thinking {
|
||||
thinking: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
signature: Option<String>,
|
||||
},
|
||||
ToolUse {
|
||||
id: String,
|
||||
name: String,
|
||||
@@ -268,8 +273,9 @@ pub enum StreamEvent {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use runtime::format_usd;
|
||||
use serde_json::json;
|
||||
|
||||
use super::{MessageResponse, Usage};
|
||||
use super::{InputContentBlock, MessageResponse, Usage};
|
||||
|
||||
#[test]
|
||||
fn usage_total_tokens_includes_cache_tokens() {
|
||||
@@ -307,4 +313,33 @@ mod tests {
|
||||
assert_eq!(format_usd(cost.total_cost_usd()), "$54.6750");
|
||||
assert_eq!(response.total_tokens(), 1_800_000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn input_content_block_thinking_serializes_with_snake_case_type() {
|
||||
// given
|
||||
let block = InputContentBlock::Thinking {
|
||||
thinking: "pondering".to_string(),
|
||||
signature: Some("sig_123".to_string()),
|
||||
};
|
||||
|
||||
// when
|
||||
let serialized = serde_json::to_value(&block).unwrap();
|
||||
let deserialized: InputContentBlock = serde_json::from_value(json!({
|
||||
"type": "thinking",
|
||||
"thinking": "pondering",
|
||||
"signature": "sig_123"
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
// then
|
||||
assert_eq!(
|
||||
serialized,
|
||||
json!({
|
||||
"type": "thinking",
|
||||
"thinking": "pondering",
|
||||
"signature": "sig_123"
|
||||
})
|
||||
);
|
||||
assert_eq!(deserialized, block);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user