mirror of
https://github.com/instructkr/claude-code.git
synced 2026-05-27 07:56:46 +00:00
feat: auto-compact and retry on context window errors
Adds automatic compaction and retry when context window is exceeded.
This commit is contained in:
@@ -3843,12 +3843,13 @@ fn run_resume_command(
|
||||
json: Some(serde_json::json!({ "kind": "help", "text": render_repl_help() })),
|
||||
}),
|
||||
SlashCommand::Compact => {
|
||||
let result = runtime::compact_session(
|
||||
let result = runtime::trident::trident_compact_session(
|
||||
session,
|
||||
CompactionConfig {
|
||||
max_estimated_tokens: 0,
|
||||
..CompactionConfig::default()
|
||||
},
|
||||
&runtime::trident::TridentConfig::default(),
|
||||
);
|
||||
let removed = result.removed_message_count;
|
||||
let kept = result.compacted_session.messages.len();
|
||||
@@ -5046,6 +5047,120 @@ impl LiveCli {
|
||||
TerminalRenderer::new().color_theme(),
|
||||
&mut stdout,
|
||||
)?;
|
||||
|
||||
// ============================================================================
|
||||
// Auto-compact retry on context window errors
|
||||
// ============================================================================
|
||||
// When the model API returns a context_window_blocked error (because the request
|
||||
// exceeds the model's context window), we automatically:
|
||||
// 1. Compact the session (remove old messages to free up space)
|
||||
// 2. Retry the original request with the compacted session
|
||||
// 3. Report results to the user
|
||||
//
|
||||
// This eliminates the need for users to manually run /compact when they
|
||||
// hit context limits - the recovery happens automatically.
|
||||
//
|
||||
// Detection: We look for "context_window" or "Context window" in the error
|
||||
// message, which covers error types like:
|
||||
// - "context_window_blocked"
|
||||
// - "Context window blocked"
|
||||
// - "This model's maximum context length is X tokens..."
|
||||
// ============================================================================
|
||||
|
||||
let error_str = error.to_string();
|
||||
// Detect context window overflow. Some providers (e.g. OpenAI-compat backends)
|
||||
// return 400 with "no parseable body" instead of a proper context_length_exceeded
|
||||
// error when the request is too large to even parse — treat that as context overflow too.
|
||||
let is_context_window = error_str.contains("context_window")
|
||||
|| error_str.contains("Context window")
|
||||
|| error_str.contains("no parseable body");
|
||||
|
||||
if is_context_window {
|
||||
// A single compaction pass may not free enough context space.
|
||||
// Progressive retry: each round preserves fewer recent messages (4→2→1→0),
|
||||
// trading conversation continuity for a smaller payload until it fits.
|
||||
// Max 4 rounds before giving up and surfacing the error to the user.
|
||||
let max_compact_rounds = 4;
|
||||
let preserve_schedule = [4, 2, 1, 0];
|
||||
|
||||
for round in 0..max_compact_rounds {
|
||||
let preserve = preserve_schedule[round];
|
||||
println!(
|
||||
" Auto-compacting session (round {}/{}, preserving {} recent messages)...",
|
||||
round + 1,
|
||||
max_compact_rounds,
|
||||
preserve
|
||||
);
|
||||
|
||||
// Run Trident pipeline then summary-based compaction
|
||||
let result = runtime::trident::trident_compact_session(
|
||||
runtime.session(),
|
||||
CompactionConfig {
|
||||
preserve_recent_messages: preserve,
|
||||
max_estimated_tokens: 0,
|
||||
},
|
||||
&runtime::trident::TridentConfig::default(),
|
||||
);
|
||||
let removed = result.removed_message_count;
|
||||
|
||||
if removed == 0 && round > 0 {
|
||||
// No more messages to compact — further rounds won't help
|
||||
println!(" No further compaction possible.");
|
||||
break;
|
||||
}
|
||||
|
||||
if removed > 0 {
|
||||
println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false));
|
||||
}
|
||||
|
||||
// Without this, prepare_turn_runtime() reads from self.runtime.session()
|
||||
// which still holds the ORIGINAL un-compacted session, so every retry round
|
||||
// would send the same bloated request — compaction was wasted.
|
||||
*self.runtime.session_mut() = result.compacted_session.clone();
|
||||
|
||||
// Build a new runtime with the compacted session and retry
|
||||
let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?;
|
||||
drop(hook_abort_monitor);
|
||||
|
||||
let mut rp = CliPermissionPrompter::new(self.permission_mode);
|
||||
match new_runtime.run_turn(input, Some(&mut rp)) {
|
||||
Ok(summary) => {
|
||||
self.replace_runtime(new_runtime)?;
|
||||
spinner.finish(
|
||||
if round == 0 { "✨ Done (after auto-compact)" } else { "✨ Done (after aggressive auto-compact)" },
|
||||
TerminalRenderer::new().color_theme(),
|
||||
&mut stdout,
|
||||
)?;
|
||||
println!();
|
||||
if let Some(event) = summary.auto_compaction {
|
||||
println!("{}", format_auto_compaction_notice(event.removed_message_count));
|
||||
}
|
||||
self.persist_session()?;
|
||||
return Ok(());
|
||||
}
|
||||
Err(retry_error) => {
|
||||
let retry_str = retry_error.to_string();
|
||||
let still_context_window = retry_str.contains("context_window")
|
||||
|| retry_str.contains("Context window")
|
||||
|| retry_str.contains("no parseable body");
|
||||
|
||||
if still_context_window && round + 1 < max_compact_rounds {
|
||||
// The compacted session was still too large for the model's context.
|
||||
// Shut down the old runtime, adopt the partially-compacted one,
|
||||
// and loop — the next round will compact more aggressively.
|
||||
runtime.shutdown_plugins()?;
|
||||
runtime = new_runtime;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not a context window error, or out of rounds
|
||||
return Err(Box::new(retry_error));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If not a context window error, return original error
|
||||
Err(Box::new(error))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user