mirror of
https://github.com/instructkr/claude-code.git
synced 2026-06-06 04:06:45 +00:00
Merge remote-tracking branch 'upstream/main' into worktree-api-timeout-retry-v2
# Conflicts: # rust/crates/runtime/src/config.rs # rust/crates/runtime/src/lib.rs
This commit is contained in:
@@ -161,7 +161,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resolves_existing_and_grok_aliases() {
|
||||
assert_eq!(resolve_model_alias("opus"), "claude-opus-4-6");
|
||||
assert_eq!(resolve_model_alias("opus"), "claude-opus-4-7");
|
||||
assert_eq!(resolve_model_alias("grok"), "grok-3");
|
||||
assert_eq!(resolve_model_alias("grok-mini"), "grok-3-mini");
|
||||
}
|
||||
@@ -235,4 +235,22 @@ mod tests {
|
||||
other => panic!("Expected ProviderClient::OpenAi for qwen-plus, got: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_openai_base_url_routes_authless_ollama_models() {
|
||||
let _lock = env_lock();
|
||||
let _base_url = EnvVarGuard::set("OPENAI_BASE_URL", Some("http://127.0.0.1:11434/v1"));
|
||||
let _openai_key = EnvVarGuard::set("OPENAI_API_KEY", None);
|
||||
let _anthropic_key = EnvVarGuard::set("ANTHROPIC_API_KEY", Some("test-anthropic-key"));
|
||||
let _anthropic_token = EnvVarGuard::set("ANTHROPIC_AUTH_TOKEN", None);
|
||||
|
||||
let client = ProviderClient::from_model("qwen2.5-coder:7b")
|
||||
.expect("local model should route to OpenAI-compatible client without auth");
|
||||
match client {
|
||||
ProviderClient::OpenAi(openai_client) => {
|
||||
assert_eq!(openai_client.base_url(), "http://127.0.0.1:11434/v1")
|
||||
}
|
||||
other => panic!("Expected ProviderClient::OpenAi for local model, got: {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -487,8 +487,7 @@ impl AnthropicClient {
|
||||
request: &MessageRequest,
|
||||
) -> Result<reqwest::Response, ApiError> {
|
||||
let request_url = format!("{}/v1/messages", self.base_url.trim_end_matches('/'));
|
||||
let mut request_body = self.request_profile.render_json_body(request)?;
|
||||
strip_unsupported_beta_body_fields(&mut request_body);
|
||||
let request_body = render_standard_messages_body(&self.request_profile, request)?;
|
||||
let request_builder = self.build_request(&request_url).json(&request_body);
|
||||
request_builder.send().await.map_err(ApiError::from)
|
||||
}
|
||||
@@ -548,8 +547,7 @@ impl AnthropicClient {
|
||||
"{}/v1/messages/count_tokens",
|
||||
self.base_url.trim_end_matches('/')
|
||||
);
|
||||
let mut request_body = self.request_profile.render_json_body(request)?;
|
||||
strip_unsupported_beta_body_fields(&mut request_body);
|
||||
let request_body = render_standard_messages_body(&self.request_profile, request)?;
|
||||
let response = self
|
||||
.build_request(&request_url)
|
||||
.json(&request_body)
|
||||
@@ -1036,6 +1034,21 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
|
||||
}
|
||||
}
|
||||
|
||||
fn anthropic_wire_model(model: &str) -> &str {
|
||||
model.strip_prefix("anthropic/").unwrap_or(model)
|
||||
}
|
||||
|
||||
fn render_standard_messages_body(
|
||||
request_profile: &AnthropicRequestProfile,
|
||||
request: &MessageRequest,
|
||||
) -> Result<Value, serde_json::Error> {
|
||||
let mut wire_request = request.clone();
|
||||
wire_request.model = anthropic_wire_model(&request.model).to_string();
|
||||
let mut body = request_profile.render_json_body(&wire_request)?;
|
||||
strip_unsupported_beta_body_fields(&mut body);
|
||||
Ok(body)
|
||||
}
|
||||
|
||||
/// Remove beta-only body fields that the standard `/v1/messages` and
|
||||
/// `/v1/messages/count_tokens` endpoints reject as `Extra inputs are not
|
||||
/// permitted`. The `betas` opt-in is communicated via the `anthropic-beta`
|
||||
@@ -1609,6 +1622,27 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn standard_messages_body_strips_anthropic_routing_prefix() {
|
||||
let client = AnthropicClient::new("test-key");
|
||||
let request = MessageRequest {
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
max_tokens: 64,
|
||||
messages: vec![],
|
||||
system: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
stream: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let rendered = super::render_standard_messages_body(client.request_profile(), &request)
|
||||
.expect("body should render");
|
||||
|
||||
assert_eq!(rendered["model"], serde_json::json!("claude-opus-4-6"));
|
||||
assert!(rendered.get("betas").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enrich_bearer_auth_error_appends_sk_ant_hint_on_401_with_pure_bearer_token() {
|
||||
// given
|
||||
|
||||
@@ -211,7 +211,7 @@ pub fn resolve_model_alias(model: &str) -> String {
|
||||
.find_map(|(alias, metadata)| {
|
||||
(*alias == lower).then_some(match metadata.provider {
|
||||
ProviderKind::Anthropic => match *alias {
|
||||
"opus" => "claude-opus-4-6",
|
||||
"opus" => "claude-opus-4-7",
|
||||
"sonnet" => "claude-sonnet-4-6",
|
||||
"haiku" => "claude-haiku-4-5-20251213",
|
||||
_ => trimmed,
|
||||
@@ -262,6 +262,14 @@ pub fn metadata_for_model(model: &str) -> Option<ProviderMetadata> {
|
||||
default_base_url: openai_compat::DEFAULT_OPENAI_BASE_URL,
|
||||
});
|
||||
}
|
||||
if canonical.starts_with("local/") {
|
||||
return Some(ProviderMetadata {
|
||||
provider: ProviderKind::OpenAi,
|
||||
auth_env: "OPENAI_API_KEY",
|
||||
base_url_env: "OPENAI_BASE_URL",
|
||||
default_base_url: openai_compat::DEFAULT_OPENAI_BASE_URL,
|
||||
});
|
||||
}
|
||||
// Alibaba DashScope compatible-mode endpoint. Routes qwen/* and bare
|
||||
// qwen-* model names (qwen-max, qwen-plus, qwen-turbo, qwen-qwq, etc.)
|
||||
// to the OpenAI-compat client pointed at DashScope's /compatible-mode/v1.
|
||||
@@ -337,17 +345,21 @@ pub fn provider_diagnostics_for_model(model: &str) -> ProviderDiagnostics {
|
||||
}
|
||||
}
|
||||
|
||||
fn looks_like_local_openai_model(model: &str) -> bool {
|
||||
model.contains(':') || model.contains('.')
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn detect_provider_kind(model: &str) -> ProviderKind {
|
||||
if let Some(metadata) = metadata_for_model(model) {
|
||||
let resolved_model = resolve_model_alias(model);
|
||||
if let Some(metadata) = metadata_for_model(&resolved_model) {
|
||||
return metadata.provider;
|
||||
}
|
||||
// When OPENAI_BASE_URL is set, the user explicitly configured an
|
||||
// OpenAI-compatible endpoint. Prefer it over the Anthropic fallback
|
||||
// even when the model name has no recognized prefix — this is the
|
||||
// common case for local providers (Ollama, LM Studio, vLLM, etc.)
|
||||
// where model names like "qwen2.5-coder:7b" don't match any prefix.
|
||||
if std::env::var_os("OPENAI_BASE_URL").is_some() && openai_compat::has_api_key("OPENAI_API_KEY")
|
||||
// When OPENAI_BASE_URL is set and the unknown model name looks like a
|
||||
// local server tag (for example `llama3.2` or `qwen2.5-coder:7b`), prefer
|
||||
// the OpenAI-compatible endpoint over ambient Anthropic credentials.
|
||||
if std::env::var_os("OPENAI_BASE_URL").is_some()
|
||||
&& looks_like_local_openai_model(&resolved_model)
|
||||
{
|
||||
return ProviderKind::OpenAi;
|
||||
}
|
||||
@@ -608,7 +620,7 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
||||
let canonical = resolve_model_alias(model);
|
||||
let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
|
||||
match base_model {
|
||||
"claude-opus-4-6" => Some(ModelTokenLimit {
|
||||
"claude-opus-4-7" | "claude-opus-4-6" => Some(ModelTokenLimit {
|
||||
max_output_tokens: 32_000,
|
||||
context_window_tokens: 200_000,
|
||||
}),
|
||||
@@ -1042,6 +1054,18 @@ mod tests {
|
||||
assert_eq!(kind2, ProviderKind::OpenAi);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_prefix_routes_to_openai_not_anthropic() {
|
||||
let meta = super::metadata_for_model("local/Qwen/Qwen3.6-27B-FP8")
|
||||
.expect("local/ prefix must resolve to OpenAI-compatible metadata");
|
||||
assert_eq!(meta.provider, ProviderKind::OpenAi);
|
||||
assert_eq!(meta.auth_env, "OPENAI_API_KEY");
|
||||
assert_eq!(meta.base_url_env, "OPENAI_BASE_URL");
|
||||
|
||||
let kind = detect_provider_kind("local/Qwen/Qwen3.6-27B-FP8");
|
||||
assert_eq!(kind, ProviderKind::OpenAi);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qwen_prefix_routes_to_dashscope_not_anthropic() {
|
||||
// User request from Discord #clawcode-get-help: web3g wants to use
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::net::Ipv4Addr;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
@@ -131,13 +132,22 @@ impl OpenAiCompatClient {
|
||||
}
|
||||
|
||||
pub fn from_env(config: OpenAiCompatConfig) -> Result<Self, ApiError> {
|
||||
let Some(api_key) = read_env_non_empty(config.api_key_env)? else {
|
||||
return Err(ApiError::missing_credentials(
|
||||
config.provider_name,
|
||||
config.credential_env_vars(),
|
||||
));
|
||||
let base_url = read_base_url(config);
|
||||
let api_key = match read_env_non_empty(config.api_key_env)? {
|
||||
Some(api_key) => api_key,
|
||||
None if config.provider_name == "OpenAI"
|
||||
&& is_local_openai_compatible_base_url(&base_url) =>
|
||||
{
|
||||
"local-dev-token".to_string()
|
||||
}
|
||||
None => {
|
||||
return Err(ApiError::missing_credentials(
|
||||
config.provider_name,
|
||||
config.credential_env_vars(),
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok(Self::new(api_key, config))
|
||||
Ok(Self::new(api_key, config).with_base_url(base_url))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -933,14 +943,18 @@ pub fn model_requires_reasoning_content_in_history(model: &str) -> bool {
|
||||
|
||||
/// Strip routing prefix (e.g., "openai/gpt-4" → "gpt-4") for the wire.
|
||||
/// The prefix is used only to select transport; the backend expects the
|
||||
/// bare model id.
|
||||
/// bare model id. Use `local/` to force OpenAI-compatible routing while
|
||||
/// preserving any slashes that follow the prefix.
|
||||
#[allow(dead_code)]
|
||||
fn strip_routing_prefix(model: &str) -> &str {
|
||||
if let Some(pos) = model.find('/') {
|
||||
let prefix = &model[..pos];
|
||||
// Only strip if the prefix before "/" is a known routing prefix,
|
||||
// not if "/" appears in the middle of the model name for other reasons.
|
||||
if matches!(prefix, "openai" | "xai" | "grok" | "qwen" | "kimi") {
|
||||
if matches!(
|
||||
prefix,
|
||||
"openai" | "xai" | "grok" | "qwen" | "kimi" | "local"
|
||||
) {
|
||||
&model[pos + 1..]
|
||||
} else {
|
||||
model
|
||||
@@ -950,6 +964,44 @@ fn strip_routing_prefix(model: &str) -> &str {
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_base_url_for_model_routing(url: &str) -> &str {
|
||||
let trimmed = url.trim_end_matches('/');
|
||||
trimmed
|
||||
.strip_suffix("/chat/completions")
|
||||
.map(|value| value.trim_end_matches('/'))
|
||||
.unwrap_or(trimmed)
|
||||
}
|
||||
|
||||
fn url_host(url: &str) -> &str {
|
||||
let after_scheme = url.split_once("://").map_or(url, |(_, rest)| rest);
|
||||
let authority = after_scheme.split(['/', '?', '#']).next().unwrap_or("");
|
||||
let host_port = authority
|
||||
.rsplit_once('@')
|
||||
.map_or(authority, |(_, host_port)| host_port);
|
||||
if host_port.starts_with('[') {
|
||||
return host_port
|
||||
.split(']')
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim_start_matches('[');
|
||||
}
|
||||
host_port.split(':').next().unwrap_or("")
|
||||
}
|
||||
|
||||
fn is_local_openai_compatible_base_url(url: &str) -> bool {
|
||||
let host = url_host(url.trim());
|
||||
if host.eq_ignore_ascii_case("localhost") || host == "::1" {
|
||||
return true;
|
||||
}
|
||||
let Ok(address) = host.parse::<Ipv4Addr>() else {
|
||||
return false;
|
||||
};
|
||||
let [first, second, ..] = address.octets();
|
||||
matches!(first, 10 | 127)
|
||||
|| first == 192 && second == 168
|
||||
|| first == 172 && (16..=31).contains(&second)
|
||||
}
|
||||
|
||||
fn wire_model_for_base_url<'a>(
|
||||
model: &'a str,
|
||||
config: OpenAiCompatConfig,
|
||||
@@ -962,26 +1014,22 @@ fn wire_model_for_base_url<'a>(
|
||||
let lowered_prefix = prefix.to_ascii_lowercase();
|
||||
|
||||
if lowered_prefix == "openai" {
|
||||
let trimmed_base_url = base_url.trim_end_matches('/');
|
||||
let default_openai = DEFAULT_OPENAI_BASE_URL.trim_end_matches('/');
|
||||
if matches!(
|
||||
lowered_prefix.as_str(),
|
||||
"xai" | "grok" | "kimi" | "gemini" | "gemma"
|
||||
) {
|
||||
let normalized_base_url = normalize_base_url_for_model_routing(base_url);
|
||||
let default_base_url = normalize_base_url_for_model_routing(config.default_base_url);
|
||||
if normalized_base_url.eq_ignore_ascii_case(default_base_url)
|
||||
|| is_local_openai_compatible_base_url(base_url)
|
||||
{
|
||||
return Cow::Borrowed(&model[pos + 1..]);
|
||||
}
|
||||
if config.provider_name == "OpenAI" && trimmed_base_url != default_openai {
|
||||
// Only preserve the full slug if it's NOT a model we want to strip
|
||||
if !model.contains("gemini") && !model.contains("gemma") {
|
||||
return Cow::Borrowed(model);
|
||||
}
|
||||
}
|
||||
return Cow::Borrowed(&model[pos + 1..]);
|
||||
return Cow::Borrowed(model);
|
||||
}
|
||||
|
||||
if matches!(lowered_prefix.as_str(), "xai" | "grok" | "qwen" | "kimi") {
|
||||
return Cow::Borrowed(&model[pos + 1..]);
|
||||
}
|
||||
if lowered_prefix == "local" {
|
||||
return Cow::Borrowed(&model[pos + 1..]);
|
||||
}
|
||||
|
||||
Cow::Borrowed(model)
|
||||
}
|
||||
@@ -1133,6 +1181,13 @@ fn build_chat_completion_request_for_base_url(
|
||||
payload[key] = value.clone();
|
||||
}
|
||||
|
||||
// DeepSeek V4 Pro/Flash thinking mode requires this provider-specific opt-in
|
||||
// and also requires assistant reasoning history to be echoed as `reasoning_content`.
|
||||
// Apply it after extra_body so callers cannot accidentally override the required shape.
|
||||
if model_requires_reasoning_content_in_history(wire_model) {
|
||||
payload["thinking"] = json!({"type": "enabled"});
|
||||
}
|
||||
|
||||
payload
|
||||
}
|
||||
|
||||
@@ -1190,16 +1245,19 @@ pub fn translate_message(message: &InputMessage, model: &str) -> Vec<Value> {
|
||||
InputContentBlock::ToolResult { .. } => {}
|
||||
}
|
||||
}
|
||||
let include_reasoning =
|
||||
model_requires_reasoning_content_in_history(model) && !reasoning.is_empty();
|
||||
if text.is_empty() && tool_calls.is_empty() && !include_reasoning {
|
||||
let needs_reasoning = model_requires_reasoning_content_in_history(model);
|
||||
if text.is_empty() && tool_calls.is_empty() && reasoning.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
let mut msg = serde_json::json!({
|
||||
"role": "assistant",
|
||||
"content": (!text.is_empty()).then_some(text),
|
||||
});
|
||||
if include_reasoning {
|
||||
if !text.is_empty() {
|
||||
msg["content"] = json!(text);
|
||||
} else if !needs_reasoning {
|
||||
msg["content"] = Value::Null;
|
||||
}
|
||||
if needs_reasoning {
|
||||
msg["reasoning_content"] = json!(reasoning);
|
||||
}
|
||||
// Only include tool_calls when non-empty: some providers reject
|
||||
@@ -1752,6 +1810,7 @@ mod tests {
|
||||
ToolChoice, ToolDefinition, ToolResultContentBlock,
|
||||
};
|
||||
use serde_json::json;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
@@ -1850,6 +1909,31 @@ mod tests {
|
||||
assert_eq!(assistant["content"], json!("answer"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_v4_assistant_with_only_tool_calls_omits_content_and_includes_reasoning() {
|
||||
let request = MessageRequest {
|
||||
model: "deepseek-v4-pro".to_string(),
|
||||
max_tokens: 100,
|
||||
messages: vec![InputMessage {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![InputContentBlock::ToolUse {
|
||||
id: "call_1".to_string(),
|
||||
name: "get_weather".to_string(),
|
||||
input: json!({"city": "Paris"}),
|
||||
}],
|
||||
}],
|
||||
stream: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
|
||||
let assistant = &payload["messages"][0];
|
||||
|
||||
assert!(assistant.get("content").is_none());
|
||||
assert_eq!(assistant["reasoning_content"], json!(""));
|
||||
assert_eq!(assistant["tool_calls"].as_array().map(Vec::len), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_v4_flash_request_includes_reasoning_content_for_assistant_history() {
|
||||
// Given an assistant history turn containing thinking.
|
||||
@@ -2036,6 +2120,49 @@ mod tests {
|
||||
assert_eq!(payload["reasoning_effort"], json!("high"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_v4_request_includes_thinking_parameter() {
|
||||
let payload = build_chat_completion_request(
|
||||
&MessageRequest {
|
||||
model: "deepseek-v4-pro".to_string(),
|
||||
max_tokens: 1024,
|
||||
messages: vec![InputMessage::user_text("hello")],
|
||||
..Default::default()
|
||||
},
|
||||
OpenAiCompatConfig::openai(),
|
||||
);
|
||||
assert_eq!(payload["thinking"], json!({"type": "enabled"}));
|
||||
assert_eq!(payload["model"], json!("deepseek-v4-pro"));
|
||||
|
||||
let mut extra_body = BTreeMap::new();
|
||||
extra_body.insert("thinking".to_string(), json!({"type": "disabled"}));
|
||||
let payload_with_override = build_chat_completion_request(
|
||||
&MessageRequest {
|
||||
model: "openai/deepseek-v4-flash".to_string(),
|
||||
max_tokens: 1024,
|
||||
messages: vec![InputMessage::user_text("hello")],
|
||||
extra_body,
|
||||
..Default::default()
|
||||
},
|
||||
OpenAiCompatConfig::openai(),
|
||||
);
|
||||
assert_eq!(
|
||||
payload_with_override["thinking"],
|
||||
json!({"type": "enabled"})
|
||||
);
|
||||
|
||||
let non_deepseek_payload = build_chat_completion_request(
|
||||
&MessageRequest {
|
||||
model: "gpt-4o".to_string(),
|
||||
max_tokens: 64,
|
||||
messages: vec![InputMessage::user_text("hello")],
|
||||
..Default::default()
|
||||
},
|
||||
OpenAiCompatConfig::openai(),
|
||||
);
|
||||
assert!(non_deepseek_payload.get("thinking").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_effort_omitted_when_not_set() {
|
||||
let payload = build_chat_completion_request(
|
||||
@@ -2123,6 +2250,28 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_openai_base_url_does_not_require_api_key() {
|
||||
let _lock = env_lock();
|
||||
let original_base_url = std::env::var_os("OPENAI_BASE_URL");
|
||||
let original_api_key = std::env::var_os("OPENAI_API_KEY");
|
||||
std::env::set_var("OPENAI_BASE_URL", "http://127.0.0.1:11434/v1");
|
||||
std::env::remove_var("OPENAI_API_KEY");
|
||||
|
||||
let client = OpenAiCompatClient::from_env(OpenAiCompatConfig::openai())
|
||||
.expect("local OpenAI-compatible endpoint should not require an API key");
|
||||
assert_eq!(client.base_url(), "http://127.0.0.1:11434/v1");
|
||||
|
||||
match original_base_url {
|
||||
Some(value) => std::env::set_var("OPENAI_BASE_URL", value),
|
||||
None => std::env::remove_var("OPENAI_BASE_URL"),
|
||||
}
|
||||
match original_api_key {
|
||||
Some(value) => std::env::set_var("OPENAI_API_KEY", value),
|
||||
None => std::env::remove_var("OPENAI_API_KEY"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn endpoint_builder_accepts_base_urls_and_full_endpoints() {
|
||||
assert_eq!(
|
||||
@@ -2738,6 +2887,66 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wire_model_strips_openai_prefix_for_default_and_local_preserves_custom_gateways() {
|
||||
assert_eq!(
|
||||
super::wire_model_for_base_url(
|
||||
"openai/gpt-4o",
|
||||
OpenAiCompatConfig::openai(),
|
||||
super::DEFAULT_OPENAI_BASE_URL,
|
||||
),
|
||||
Cow::Borrowed("gpt-4o")
|
||||
);
|
||||
assert_eq!(
|
||||
super::wire_model_for_base_url(
|
||||
"openai/qwen2.5-coder:7b",
|
||||
OpenAiCompatConfig::openai(),
|
||||
"http://127.0.0.1:11434/v1",
|
||||
),
|
||||
Cow::Borrowed("qwen2.5-coder:7b")
|
||||
);
|
||||
assert_eq!(
|
||||
super::wire_model_for_base_url(
|
||||
"openai/llama3.2",
|
||||
OpenAiCompatConfig::openai(),
|
||||
"http://localhost:11434/v1/chat/completions",
|
||||
),
|
||||
Cow::Borrowed("llama3.2")
|
||||
);
|
||||
assert_eq!(
|
||||
super::wire_model_for_base_url(
|
||||
"openai/gpt-4.1-mini",
|
||||
OpenAiCompatConfig::openai(),
|
||||
"https://openrouter.ai/api/v1",
|
||||
),
|
||||
Cow::Borrowed("openai/gpt-4.1-mini")
|
||||
);
|
||||
assert_eq!(
|
||||
super::wire_model_for_base_url(
|
||||
"openai/gpt-4.1-mini",
|
||||
OpenAiCompatConfig::openai(),
|
||||
"https://not-localhost.example.com/v1",
|
||||
),
|
||||
Cow::Borrowed("openai/gpt-4.1-mini")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_routing_prefix_strips_only_escape_hatch() {
|
||||
assert_eq!(
|
||||
super::strip_routing_prefix("local/Qwen/Qwen3.6-27B-FP8"),
|
||||
"Qwen/Qwen3.6-27B-FP8"
|
||||
);
|
||||
assert_eq!(
|
||||
super::wire_model_for_base_url(
|
||||
"local/Qwen/Qwen3.6-27B-FP8",
|
||||
OpenAiCompatConfig::openai(),
|
||||
"http://127.0.0.1:8000/v1",
|
||||
),
|
||||
Cow::Borrowed("Qwen/Qwen3.6-27B-FP8")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_request_body_size_allows_large_requests_for_openai() {
|
||||
// Create a request that exceeds DashScope's limit but is under OpenAI's 100MB limit
|
||||
|
||||
@@ -103,6 +103,58 @@ async fn send_message_posts_json_and_parses_response() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn send_message_strips_anthropic_routing_prefix_on_wire() {
|
||||
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
|
||||
let server = spawn_server(
|
||||
state.clone(),
|
||||
vec![
|
||||
http_response("200 OK", "application/json", "{\"input_tokens\":1}"),
|
||||
http_response(
|
||||
"200 OK",
|
||||
"application/json",
|
||||
concat!(
|
||||
"{",
|
||||
"\"id\":\"msg_prefixed\",",
|
||||
"\"type\":\"message\",",
|
||||
"\"role\":\"assistant\",",
|
||||
"\"content\":[{\"type\":\"text\",\"text\":\"ok\"}],",
|
||||
"\"model\":\"claude-opus-4-6\",",
|
||||
"\"stop_reason\":\"end_turn\",",
|
||||
"\"stop_sequence\":null,",
|
||||
"\"usage\":{\"input_tokens\":1,\"output_tokens\":1}",
|
||||
"}"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
let client = AnthropicClient::new("test-key").with_base_url(server.base_url());
|
||||
client
|
||||
.send_message(&MessageRequest {
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
..sample_request(false)
|
||||
})
|
||||
.await
|
||||
.expect("request should succeed");
|
||||
|
||||
let captured = state.lock().await;
|
||||
assert_eq!(
|
||||
captured.len(),
|
||||
2,
|
||||
"count_tokens and messages requests should be captured"
|
||||
);
|
||||
let count_tokens_body: serde_json::Value =
|
||||
serde_json::from_str(&captured[0].body).expect("count_tokens body should be json");
|
||||
let messages_body: serde_json::Value =
|
||||
serde_json::from_str(&captured[1].body).expect("request body should be json");
|
||||
assert_eq!(captured[0].path, "/v1/messages/count_tokens");
|
||||
assert_eq!(captured[1].path, "/v1/messages");
|
||||
assert_eq!(count_tokens_body["model"], json!("claude-opus-4-6"));
|
||||
assert_eq!(messages_body["model"], json!("claude-opus-4-6"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn send_message_blocks_oversized_requests_before_the_http_call() {
|
||||
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
|
||||
|
||||
@@ -159,10 +159,15 @@ async fn send_message_preserves_deepseek_reasoning_content_before_text() {
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let captured = state.lock().await;
|
||||
let request = captured.first().expect("server should capture request");
|
||||
let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
|
||||
assert_eq!(body["thinking"], json!({"type": "enabled"}));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn custom_openai_gateway_preserves_slash_model_ids_and_extra_body_params() {
|
||||
async fn local_openai_gateway_strips_routing_prefix_and_preserves_extra_body_params() {
|
||||
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
|
||||
let body = concat!(
|
||||
"{",
|
||||
@@ -206,7 +211,7 @@ async fn custom_openai_gateway_preserves_slash_model_ids_and_extra_body_params()
|
||||
let captured = state.lock().await;
|
||||
let request = captured.first().expect("captured request");
|
||||
let body: serde_json::Value = serde_json::from_str(&request.body).expect("json body");
|
||||
assert_eq!(body["model"], json!("openai/gpt-4.1-mini"));
|
||||
assert_eq!(body["model"], json!("gpt-4.1-mini"));
|
||||
assert_eq!(
|
||||
body["web_search_options"],
|
||||
json!({"search_context_size": "low"})
|
||||
|
||||
Reference in New Issue
Block a user