mirror of
https://github.com/instructkr/claude-code.git
synced 2026-05-27 16:06:44 +00:00
feat: auto-compact and retry on context window errors
Adds automatic compaction and retry when context window is exceeded.
This commit is contained in:
@@ -108,10 +108,15 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
|
||||
.first()
|
||||
.and_then(extract_existing_compacted_summary);
|
||||
let compacted_prefix_len = usize::from(existing_summary.is_some());
|
||||
let raw_keep_from = session
|
||||
.messages
|
||||
.len()
|
||||
.saturating_sub(config.preserve_recent_messages);
|
||||
// When preserve_recent_messages is 0, the caller wants maximum compaction
|
||||
// (no recent messages preserved). Without this guard, saturating_sub(0)
|
||||
// returns messages.len(), which later indexes past the end of the array
|
||||
// at session.messages[k] because keep_from == messages.len() is out of bounds.
|
||||
let raw_keep_from = if config.preserve_recent_messages == 0 {
|
||||
session.messages.len()
|
||||
} else {
|
||||
session.messages.len().saturating_sub(config.preserve_recent_messages)
|
||||
};
|
||||
// Ensure we do not split a tool-use / tool-result pair at the compaction
|
||||
// boundary. If the first preserved message is a user message whose first
|
||||
// block is a ToolResult, the assistant message with the matching ToolUse
|
||||
|
||||
@@ -10,6 +10,7 @@ pub mod bash_validation;
|
||||
mod bootstrap;
|
||||
pub mod branch_lock;
|
||||
mod compact;
|
||||
pub mod trident;
|
||||
mod config;
|
||||
pub mod config_validate;
|
||||
mod conversation;
|
||||
|
||||
791
rust/crates/runtime/src/trident.rs
Normal file
791
rust/crates/runtime/src/trident.rs
Normal file
@@ -0,0 +1,791 @@
|
||||
use crate::compact::{compact_session, CompactionConfig, CompactionResult};
|
||||
use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
/// Configuration for the Trident compaction pipeline.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TridentConfig {
|
||||
pub supersede_enabled: bool,
|
||||
pub collapse_enabled: bool,
|
||||
pub cluster_enabled: bool,
|
||||
pub collapse_threshold: usize,
|
||||
pub cluster_min_size: usize,
|
||||
pub cluster_similarity_threshold: f64,
|
||||
pub max_file_operations: usize,
|
||||
}
|
||||
|
||||
impl Default for TridentConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
supersede_enabled: true,
|
||||
collapse_enabled: true,
|
||||
cluster_enabled: true,
|
||||
collapse_threshold: 4,
|
||||
cluster_min_size: 3,
|
||||
cluster_similarity_threshold: 0.6,
|
||||
max_file_operations: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics from a Trident compaction run.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TridentStats {
|
||||
pub superseded_count: usize,
|
||||
pub collapsed_chains: usize,
|
||||
pub messages_collapsed: usize,
|
||||
pub clusters_found: usize,
|
||||
pub messages_clustered: usize,
|
||||
pub tokens_saved_estimate: usize,
|
||||
pub original_message_count: usize,
|
||||
pub final_message_count: usize,
|
||||
}
|
||||
|
||||
impl Default for TridentStats {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
superseded_count: 0,
|
||||
collapsed_chains: 0,
|
||||
messages_collapsed: 0,
|
||||
clusters_found: 0,
|
||||
messages_clustered: 0,
|
||||
tokens_saved_estimate: 0,
|
||||
original_message_count: 0,
|
||||
final_message_count: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TridentStats {
|
||||
pub fn format_report(&self) -> String {
|
||||
let compression = if self.final_message_count > 0 {
|
||||
self.original_message_count as f64 / self.final_message_count as f64
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
let mut lines = vec![
|
||||
"Trident Compaction Complete".to_string(),
|
||||
format!(
|
||||
" Stage 1 (Supersede): {} obsolete removed",
|
||||
self.superseded_count
|
||||
),
|
||||
format!(
|
||||
" Stage 2 (Collapse): {} -> {} summaries",
|
||||
self.messages_collapsed, self.collapsed_chains
|
||||
),
|
||||
format!(
|
||||
" Stage 3 (Cluster): {} -> {} clusters",
|
||||
self.messages_clustered, self.clusters_found
|
||||
),
|
||||
format!(" Original: {} messages", self.original_message_count),
|
||||
format!(" Final: {} messages ({:.1}x compression)", self.final_message_count, compression),
|
||||
];
|
||||
if self.tokens_saved_estimate > 0 {
|
||||
lines.push(format!(
|
||||
" Est. tokens saved: ~{}",
|
||||
self.tokens_saved_estimate
|
||||
));
|
||||
}
|
||||
lines.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of the Trident compaction pipeline.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TridentResult {
|
||||
pub compacted_session: Session,
|
||||
pub stats: TridentStats,
|
||||
}
|
||||
|
||||
/// Run the full Trident compaction pipeline on a session, then apply
|
||||
/// the standard summary-based compaction.
|
||||
pub fn trident_compact_session(
|
||||
session: &Session,
|
||||
compaction_config: CompactionConfig,
|
||||
trident_config: &TridentConfig,
|
||||
) -> CompactionResult {
|
||||
let original_count = session.messages.len();
|
||||
let original_tokens: usize = session.messages.iter().map(estimate_message_tokens).sum();
|
||||
|
||||
let mut stats = TridentStats {
|
||||
original_message_count: original_count,
|
||||
..TridentStats::default()
|
||||
};
|
||||
|
||||
let mut messages = session.messages.clone();
|
||||
|
||||
if trident_config.supersede_enabled {
|
||||
let (kept, superseded_count) = stage1_supersede(&messages);
|
||||
stats.superseded_count = superseded_count;
|
||||
messages = kept;
|
||||
}
|
||||
|
||||
if trident_config.collapse_enabled {
|
||||
let (collapsed, chains, collapsed_count) = stage2_collapse(&messages, trident_config.collapse_threshold);
|
||||
stats.collapsed_chains = chains;
|
||||
stats.messages_collapsed = collapsed_count;
|
||||
messages = collapsed;
|
||||
}
|
||||
|
||||
if trident_config.cluster_enabled {
|
||||
let (clustered, clusters_found, messages_clustered) = stage3_cluster(
|
||||
&messages,
|
||||
trident_config.cluster_min_size,
|
||||
trident_config.cluster_similarity_threshold,
|
||||
);
|
||||
stats.clusters_found = clusters_found;
|
||||
stats.messages_clustered = messages_clustered;
|
||||
messages = clustered;
|
||||
}
|
||||
|
||||
stats.final_message_count = messages.len();
|
||||
|
||||
let final_tokens: usize = messages.iter().map(estimate_message_tokens).sum();
|
||||
stats.tokens_saved_estimate = original_tokens.saturating_sub(final_tokens);
|
||||
|
||||
let mut trident_session = session.clone();
|
||||
trident_session.messages = messages;
|
||||
|
||||
let result = compact_session(&trident_session, compaction_config);
|
||||
|
||||
if stats.superseded_count > 0 || stats.collapsed_chains > 0 || stats.clusters_found > 0 {
|
||||
eprintln!("{}", stats.format_report());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STAGE 1: SUPERSEDE — Zero-cost factual pruning
|
||||
// =============================================================================
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum FileOp {
|
||||
Read,
|
||||
Write,
|
||||
Edit,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FileOperation {
|
||||
index: usize,
|
||||
op_type: FileOp,
|
||||
}
|
||||
|
||||
fn stage1_supersede(messages: &[ConversationMessage]) -> (Vec<ConversationMessage>, usize) {
|
||||
let mut file_ops: BTreeMap<String, Vec<FileOperation>> = BTreeMap::new();
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
for block in &msg.blocks {
|
||||
if let Some((path, op_type)) = extract_file_operation(block) {
|
||||
file_ops.entry(path).or_default().push(FileOperation {
|
||||
index: i,
|
||||
op_type,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut obsolete_indices: BTreeSet<usize> = BTreeSet::new();
|
||||
|
||||
for (_path, ops) in &file_ops {
|
||||
if ops.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let last_write_idx = ops
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|op| op.op_type == FileOp::Write || op.op_type == FileOp::Edit)
|
||||
.map(|op| op.index);
|
||||
|
||||
if let Some(last_write) = last_write_idx {
|
||||
for op in ops {
|
||||
if op.op_type == FileOp::Read && op.index < last_write {
|
||||
obsolete_indices.insert(op.index);
|
||||
} else if (op.op_type == FileOp::Write || op.op_type == FileOp::Edit)
|
||||
&& op.index < last_write
|
||||
{
|
||||
obsolete_indices.insert(op.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let superseded_count = obsolete_indices.len();
|
||||
let kept: Vec<ConversationMessage> = messages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| !obsolete_indices.contains(i))
|
||||
.map(|(_, msg)| msg.clone())
|
||||
.collect();
|
||||
|
||||
(kept, superseded_count)
|
||||
}
|
||||
|
||||
fn extract_file_operation(block: &ContentBlock) -> Option<(String, FileOp)> {
|
||||
match block {
|
||||
ContentBlock::ToolUse { name, input, .. } => {
|
||||
let path = extract_path_from_tool_input(name, input)?;
|
||||
let op_type = match name.as_str() {
|
||||
"read_file" | "Read" => FileOp::Read,
|
||||
"write_file" | "Write" => FileOp::Write,
|
||||
"edit_file" | "Edit" => FileOp::Edit,
|
||||
_ => return None,
|
||||
};
|
||||
Some((path, op_type))
|
||||
}
|
||||
ContentBlock::ToolResult { tool_name, output, .. } => {
|
||||
let path = extract_path_from_tool_output(tool_name, output)?;
|
||||
let op_type = match tool_name.as_str() {
|
||||
"read_file" | "Read" => FileOp::Read,
|
||||
"write_file" | "Write" => FileOp::Write,
|
||||
"edit_file" | "Edit" => FileOp::Edit,
|
||||
_ => return None,
|
||||
};
|
||||
Some((path, op_type))
|
||||
}
|
||||
ContentBlock::Text { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_path_from_tool_input(tool_name: &str, input: &str) -> Option<String> {
|
||||
if !matches!(tool_name, "read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
serde_json::from_str::<serde_json::Value>(input)
|
||||
.ok()
|
||||
.and_then(|v| v.get("path")?.as_str().map(String::from))
|
||||
.or_else(|| {
|
||||
serde_json::from_str::<serde_json::Value>(input)
|
||||
.ok()
|
||||
.and_then(|v| v.get("file_path")?.as_str().map(String::from))
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_path_from_tool_output(tool_name: &str, output: &str) -> Option<String> {
|
||||
if !matches!(tool_name, "read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
serde_json::from_str::<serde_json::Value>(output)
|
||||
.ok()
|
||||
.and_then(|v| v.get("path")?.as_str().map(String::from))
|
||||
.or_else(|| {
|
||||
output
|
||||
.lines()
|
||||
.next()
|
||||
.and_then(|line| line.strip_prefix("path: "))
|
||||
.map(String::from)
|
||||
})
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STAGE 2: COLLAPSE — Summarize chatty exchanges
|
||||
// =============================================================================
|
||||
|
||||
fn stage2_collapse(
|
||||
messages: &[ConversationMessage],
|
||||
threshold: usize,
|
||||
) -> (Vec<ConversationMessage>, usize, usize) {
|
||||
if messages.len() < threshold {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let mut result: Vec<ConversationMessage> = Vec::new();
|
||||
let mut buffer: Vec<ConversationMessage> = Vec::new();
|
||||
let mut total_chains = 0;
|
||||
let mut total_collapsed = 0;
|
||||
|
||||
for msg in messages {
|
||||
if is_chatty_message(msg) {
|
||||
buffer.push(msg.clone());
|
||||
} else {
|
||||
if buffer.len() >= threshold {
|
||||
let summary = generate_collapse_summary(&buffer);
|
||||
total_chains += 1;
|
||||
total_collapsed += buffer.len();
|
||||
result.push(ConversationMessage {
|
||||
role: MessageRole::System,
|
||||
blocks: vec![ContentBlock::Text {
|
||||
text: format!("[Collapsed Conversation]\n{summary}"),
|
||||
}],
|
||||
usage: None,
|
||||
});
|
||||
} else {
|
||||
result.extend(buffer.drain(..));
|
||||
}
|
||||
buffer.clear();
|
||||
result.push(msg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if buffer.len() >= threshold {
|
||||
let summary = generate_collapse_summary(&buffer);
|
||||
total_chains += 1;
|
||||
total_collapsed += buffer.len();
|
||||
result.push(ConversationMessage {
|
||||
role: MessageRole::System,
|
||||
blocks: vec![ContentBlock::Text {
|
||||
text: format!("[Collapsed Conversation]\n{summary}"),
|
||||
}],
|
||||
usage: None,
|
||||
});
|
||||
} else {
|
||||
result.extend(buffer);
|
||||
}
|
||||
|
||||
(result, total_chains, total_collapsed)
|
||||
}
|
||||
|
||||
fn is_chatty_message(msg: &ConversationMessage) -> bool {
|
||||
let total_chars: usize = msg.blocks.iter().map(|b| match b {
|
||||
ContentBlock::Text { text } => text.len(),
|
||||
ContentBlock::ToolUse { input, .. } => input.len(),
|
||||
ContentBlock::ToolResult { output, .. } => output.len(),
|
||||
}).sum();
|
||||
|
||||
let has_tool_use = msg.blocks.iter().any(|b| matches!(b, ContentBlock::ToolUse { .. }));
|
||||
let has_tool_result = msg.blocks.iter().any(|b| matches!(b, ContentBlock::ToolResult { .. }));
|
||||
|
||||
if has_tool_use || has_tool_result {
|
||||
return false;
|
||||
}
|
||||
|
||||
total_chars < 200
|
||||
}
|
||||
|
||||
fn generate_collapse_summary(messages: &[ConversationMessage]) -> String {
|
||||
let user_count = messages
|
||||
.iter()
|
||||
.filter(|m| m.role == MessageRole::User)
|
||||
.count();
|
||||
let assistant_count = messages
|
||||
.iter()
|
||||
.filter(|m| m.role == MessageRole::Assistant)
|
||||
.count();
|
||||
|
||||
let mut topics: Vec<String> = messages
|
||||
.iter()
|
||||
.filter_map(|m| {
|
||||
m.blocks.iter().find_map(|b| match b {
|
||||
ContentBlock::Text { text } if !text.trim().is_empty() => {
|
||||
Some(truncate_text(text, 80))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.take(5)
|
||||
.collect();
|
||||
topics.dedup();
|
||||
|
||||
let mut lines = vec![format!(
|
||||
"Collapsed {} messages ({} user, {} assistant).",
|
||||
messages.len(),
|
||||
user_count,
|
||||
assistant_count
|
||||
)];
|
||||
|
||||
if !topics.is_empty() {
|
||||
lines.push("Topics:".to_string());
|
||||
for topic in &topics {
|
||||
lines.push(format!(" - {topic}"));
|
||||
}
|
||||
}
|
||||
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STAGE 3: CLUSTER — Semantic grouping and deep storage
|
||||
// =============================================================================
|
||||
|
||||
fn stage3_cluster(
|
||||
messages: &[ConversationMessage],
|
||||
min_cluster_size: usize,
|
||||
similarity_threshold: f64,
|
||||
) -> (Vec<ConversationMessage>, usize, usize) {
|
||||
if messages.len() < min_cluster_size {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let fingerprints: Vec<MessageFingerprint> = messages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, msg)| fingerprint_message(i, msg))
|
||||
.collect();
|
||||
|
||||
if fingerprints.len() < min_cluster_size {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let mut cluster_assignments: BTreeMap<usize, usize> = BTreeMap::new();
|
||||
let mut cluster_id = 0;
|
||||
|
||||
for i in 0..fingerprints.len() {
|
||||
if cluster_assignments.contains_key(&fingerprints[i].index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut cluster_members: Vec<usize> = vec![fingerprints[i].index];
|
||||
|
||||
for j in (i + 1)..fingerprints.len() {
|
||||
if cluster_assignments.contains_key(&fingerprints[j].index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let similarity = compute_similarity(&fingerprints[i], &fingerprints[j]);
|
||||
if similarity >= similarity_threshold {
|
||||
cluster_members.push(fingerprints[j].index);
|
||||
}
|
||||
}
|
||||
|
||||
if cluster_members.len() >= min_cluster_size {
|
||||
for member_idx in &cluster_members {
|
||||
cluster_assignments.insert(*member_idx, cluster_id);
|
||||
}
|
||||
cluster_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if cluster_assignments.is_empty() {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let total_clustered: usize = cluster_assignments.len();
|
||||
let clusters_found = cluster_id as usize;
|
||||
|
||||
let mut result: Vec<ConversationMessage> = Vec::new();
|
||||
let mut cluster_buffers: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
|
||||
|
||||
for (msg_idx, &cid) in &cluster_assignments {
|
||||
cluster_buffers.entry(cid).or_default().push(*msg_idx);
|
||||
}
|
||||
|
||||
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
if let Some(&cid) = cluster_assignments.get(&i) {
|
||||
if let Some(buffer) = cluster_buffers.get_mut(&cid) {
|
||||
if buffer[0] == i {
|
||||
let cluster_messages: Vec<&ConversationMessage> = buffer
|
||||
.iter()
|
||||
.filter_map(|&idx| messages.get(idx))
|
||||
.collect();
|
||||
let summary = generate_cluster_summary(&cluster_messages);
|
||||
result.push(ConversationMessage {
|
||||
role: MessageRole::System,
|
||||
blocks: vec![ContentBlock::Text {
|
||||
text: format!("[Clustered {} messages]\n{summary}", buffer.len()),
|
||||
}],
|
||||
usage: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.push(msg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
(result, clusters_found, total_clustered)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MessageFingerprint {
|
||||
index: usize,
|
||||
tool_names: BTreeSet<String>,
|
||||
file_paths: BTreeSet<String>,
|
||||
role: MessageRole,
|
||||
text_length: usize,
|
||||
}
|
||||
|
||||
fn fingerprint_message(index: usize, msg: &ConversationMessage) -> Option<MessageFingerprint> {
|
||||
if msg.role == MessageRole::System {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut tool_names: BTreeSet<String> = BTreeSet::new();
|
||||
let mut file_paths: BTreeSet<String> = BTreeSet::new();
|
||||
let mut text_length = 0;
|
||||
|
||||
for block in &msg.blocks {
|
||||
match block {
|
||||
ContentBlock::ToolUse { name, input, .. } => {
|
||||
tool_names.insert(name.clone());
|
||||
if let Some(path) = extract_path_from_tool_input(name, input) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
text_length += input.len();
|
||||
}
|
||||
ContentBlock::ToolResult { tool_name, output, .. } => {
|
||||
tool_names.insert(tool_name.clone());
|
||||
if let Some(path) = extract_path_from_tool_output(tool_name, output) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
text_length += output.len();
|
||||
}
|
||||
ContentBlock::Text { text } => {
|
||||
text_length += text.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(MessageFingerprint {
|
||||
index,
|
||||
tool_names,
|
||||
file_paths,
|
||||
role: msg.role,
|
||||
text_length,
|
||||
})
|
||||
}
|
||||
|
||||
fn compute_similarity(a: &MessageFingerprint, b: &MessageFingerprint) -> f64 {
|
||||
if a.role != b.role {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let tool_overlap = if a.tool_names.is_empty() && b.tool_names.is_empty() {
|
||||
1.0
|
||||
} else if a.tool_names.is_empty() || b.tool_names.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
let intersection: usize = a.tool_names.intersection(&b.tool_names).count();
|
||||
let union: usize = a.tool_names.union(&b.tool_names).count();
|
||||
intersection as f64 / union as f64
|
||||
};
|
||||
|
||||
let file_overlap = if a.file_paths.is_empty() && b.file_paths.is_empty() {
|
||||
1.0
|
||||
} else if a.file_paths.is_empty() || b.file_paths.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
let intersection: usize = a.file_paths.intersection(&b.file_paths).count();
|
||||
let union: usize = a.file_paths.union(&b.file_paths).count();
|
||||
intersection as f64 / union as f64
|
||||
};
|
||||
|
||||
let length_similarity = if a.text_length == 0 && b.text_length == 0 {
|
||||
1.0
|
||||
} else if a.text_length == 0 || b.text_length == 0 {
|
||||
0.0
|
||||
} else {
|
||||
let min_len = a.text_length.min(b.text_length) as f64;
|
||||
let max_len = a.text_length.max(b.text_length) as f64;
|
||||
min_len / max_len
|
||||
};
|
||||
|
||||
0.4 * tool_overlap + 0.4 * file_overlap + 0.2 * length_similarity
|
||||
}
|
||||
|
||||
fn generate_cluster_summary(messages: &[&ConversationMessage]) -> String {
|
||||
let mut tool_names: BTreeSet<String> = BTreeSet::new();
|
||||
let mut file_paths: BTreeSet<String> = BTreeSet::new();
|
||||
|
||||
for msg in messages {
|
||||
for block in &msg.blocks {
|
||||
match block {
|
||||
ContentBlock::ToolUse { name, input, .. } => {
|
||||
tool_names.insert(name.clone());
|
||||
if let Some(path) = extract_path_from_tool_input(name, input) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
}
|
||||
ContentBlock::ToolResult { tool_name, output, .. } => {
|
||||
tool_names.insert(tool_name.clone());
|
||||
if let Some(path) = extract_path_from_tool_output(tool_name, output) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
}
|
||||
ContentBlock::Text { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut lines = vec![format!("{} similar messages grouped.", messages.len())];
|
||||
|
||||
if !tool_names.is_empty() {
|
||||
lines.push(format!(
|
||||
"Tools: {}.",
|
||||
tool_names.iter().cloned().collect::<Vec<_>>().join(", ")
|
||||
));
|
||||
}
|
||||
|
||||
if !file_paths.is_empty() {
|
||||
let paths: Vec<String> = file_paths.iter().take(5).cloned().collect();
|
||||
lines.push(format!("Files: {}.", paths.join(", ")));
|
||||
}
|
||||
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Utilities
|
||||
// =============================================================================
|
||||
|
||||
fn estimate_message_tokens(message: &ConversationMessage) -> usize {
|
||||
message
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|block| match block {
|
||||
ContentBlock::Text { text } => text.len() / 4 + 1,
|
||||
ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
|
||||
ContentBlock::ToolResult {
|
||||
tool_name, output, ..
|
||||
} => (tool_name.len() + output.len()) / 4 + 1,
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn truncate_text(text: &str, max_chars: usize) -> String {
|
||||
if text.chars().count() <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
let mut truncated: String = text.chars().take(max_chars).collect();
|
||||
truncated.push('…');
|
||||
truncated
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::compact::CompactionConfig;
|
||||
use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
|
||||
|
||||
#[test]
|
||||
fn stage1_removes_obsolete_file_reads() {
|
||||
let messages = vec![
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"old"}"#, false),
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "2".to_string(),
|
||||
name: "edit_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs","old":"old","new":"new"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result("2", "edit_file", r#"{"path":"src/main.rs","ok":true}"#, false),
|
||||
];
|
||||
|
||||
let (kept, superseded) = stage1_supersede(&messages);
|
||||
assert!(superseded > 0, "should supersede the earlier read");
|
||||
assert!(kept.len() < messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stage1_keeps_standalone_reads() {
|
||||
let messages = vec![
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"data"}"#, false),
|
||||
];
|
||||
|
||||
let (kept, superseded) = stage1_supersede(&messages);
|
||||
assert_eq!(superseded, 0);
|
||||
assert_eq!(kept.len(), messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stage2_collapses_chatty_messages() {
|
||||
let mut messages = vec![];
|
||||
for i in 0..6 {
|
||||
messages.push(ConversationMessage::user_text(&format!("ok {i}")));
|
||||
messages.push(ConversationMessage::assistant(vec![ContentBlock::Text {
|
||||
text: format!("got {i}"),
|
||||
}]));
|
||||
}
|
||||
messages.push(ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "t".to_string(),
|
||||
name: "bash".to_string(),
|
||||
input: r#"{"command":"ls"}"#.to_string(),
|
||||
}]));
|
||||
|
||||
let (result, chains, collapsed) = stage2_collapse(&messages, 4);
|
||||
assert!(chains > 0, "should collapse at least one chain");
|
||||
assert!(collapsed > 0);
|
||||
assert!(result.len() < messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stage3_clusters_similar_messages() {
|
||||
let mut messages = vec![];
|
||||
for i in 0..5 {
|
||||
messages.push(ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: format!("read_{i}"),
|
||||
name: "read_file".to_string(),
|
||||
input: format!(r#"{{"path":"src/{i}.rs"}}"#),
|
||||
}]));
|
||||
messages.push(ConversationMessage::tool_result(
|
||||
&format!("read_{i}"),
|
||||
"read_file",
|
||||
&format!(r#"{{"path":"src/{i}.rs","content":"data {i}"}}"#),
|
||||
false,
|
||||
));
|
||||
}
|
||||
|
||||
let (result, clusters, clustered) =
|
||||
stage3_cluster(&messages, 3, 0.4);
|
||||
assert!(clusters > 0, "should find at least one cluster");
|
||||
assert!(clustered > 0);
|
||||
assert!(result.len() < messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trident_full_pipeline_preserves_important_content() {
|
||||
let mut session = Session::new();
|
||||
session.messages = vec![
|
||||
ConversationMessage::user_text("Read and fix main.rs"),
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result("1", "read_file", r#"{"path":"src/main.rs","content":"fn main() { buggy }"}"#, false),
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "2".to_string(),
|
||||
name: "edit_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs","old":"buggy","new":"fixed"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result("2", "edit_file", r#"{"path":"src/main.rs","ok":true}"#, false),
|
||||
ConversationMessage::assistant(vec![ContentBlock::Text {
|
||||
text: "Fixed the bug in main.rs".to_string(),
|
||||
}]),
|
||||
];
|
||||
|
||||
let trident_config = TridentConfig::default();
|
||||
let result = trident_compact_session(
|
||||
&session,
|
||||
CompactionConfig {
|
||||
preserve_recent_messages: 4,
|
||||
max_estimated_tokens: 1,
|
||||
},
|
||||
&trident_config,
|
||||
);
|
||||
|
||||
assert!(result.removed_message_count > 0 || result.compacted_session.messages.len() < session.messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trident_stats_report() {
|
||||
let stats = TridentStats {
|
||||
superseded_count: 5,
|
||||
collapsed_chains: 2,
|
||||
messages_collapsed: 8,
|
||||
clusters_found: 1,
|
||||
messages_clustered: 3,
|
||||
tokens_saved_estimate: 1200,
|
||||
original_message_count: 20,
|
||||
final_message_count: 8,
|
||||
};
|
||||
let report = stats.format_report();
|
||||
assert!(report.contains("Stage 1 (Supersede): 5"));
|
||||
assert!(report.contains("Stage 2 (Collapse): 8 -> 2"));
|
||||
assert!(report.contains("Stage 3 (Cluster): 3 -> 1"));
|
||||
assert!(report.contains("1200") || report.contains("1,200"));
|
||||
}
|
||||
}
|
||||
@@ -3843,12 +3843,13 @@ fn run_resume_command(
|
||||
json: Some(serde_json::json!({ "kind": "help", "text": render_repl_help() })),
|
||||
}),
|
||||
SlashCommand::Compact => {
|
||||
let result = runtime::compact_session(
|
||||
let result = runtime::trident::trident_compact_session(
|
||||
session,
|
||||
CompactionConfig {
|
||||
max_estimated_tokens: 0,
|
||||
..CompactionConfig::default()
|
||||
},
|
||||
&runtime::trident::TridentConfig::default(),
|
||||
);
|
||||
let removed = result.removed_message_count;
|
||||
let kept = result.compacted_session.messages.len();
|
||||
@@ -5046,6 +5047,120 @@ impl LiveCli {
|
||||
TerminalRenderer::new().color_theme(),
|
||||
&mut stdout,
|
||||
)?;
|
||||
|
||||
// ============================================================================
|
||||
// Auto-compact retry on context window errors
|
||||
// ============================================================================
|
||||
// When the model API returns a context_window_blocked error (because the request
|
||||
// exceeds the model's context window), we automatically:
|
||||
// 1. Compact the session (remove old messages to free up space)
|
||||
// 2. Retry the original request with the compacted session
|
||||
// 3. Report results to the user
|
||||
//
|
||||
// This eliminates the need for users to manually run /compact when they
|
||||
// hit context limits - the recovery happens automatically.
|
||||
//
|
||||
// Detection: We look for "context_window" or "Context window" in the error
|
||||
// message, which covers error types like:
|
||||
// - "context_window_blocked"
|
||||
// - "Context window blocked"
|
||||
// - "This model's maximum context length is X tokens..."
|
||||
// ============================================================================
|
||||
|
||||
let error_str = error.to_string();
|
||||
// Detect context window overflow. Some providers (e.g. OpenAI-compat backends)
|
||||
// return 400 with "no parseable body" instead of a proper context_length_exceeded
|
||||
// error when the request is too large to even parse — treat that as context overflow too.
|
||||
let is_context_window = error_str.contains("context_window")
|
||||
|| error_str.contains("Context window")
|
||||
|| error_str.contains("no parseable body");
|
||||
|
||||
if is_context_window {
|
||||
// A single compaction pass may not free enough context space.
|
||||
// Progressive retry: each round preserves fewer recent messages (4→2→1→0),
|
||||
// trading conversation continuity for a smaller payload until it fits.
|
||||
// Max 4 rounds before giving up and surfacing the error to the user.
|
||||
let max_compact_rounds = 4;
|
||||
let preserve_schedule = [4, 2, 1, 0];
|
||||
|
||||
for round in 0..max_compact_rounds {
|
||||
let preserve = preserve_schedule[round];
|
||||
println!(
|
||||
" Auto-compacting session (round {}/{}, preserving {} recent messages)...",
|
||||
round + 1,
|
||||
max_compact_rounds,
|
||||
preserve
|
||||
);
|
||||
|
||||
// Run Trident pipeline then summary-based compaction
|
||||
let result = runtime::trident::trident_compact_session(
|
||||
runtime.session(),
|
||||
CompactionConfig {
|
||||
preserve_recent_messages: preserve,
|
||||
max_estimated_tokens: 0,
|
||||
},
|
||||
&runtime::trident::TridentConfig::default(),
|
||||
);
|
||||
let removed = result.removed_message_count;
|
||||
|
||||
if removed == 0 && round > 0 {
|
||||
// No more messages to compact — further rounds won't help
|
||||
println!(" No further compaction possible.");
|
||||
break;
|
||||
}
|
||||
|
||||
if removed > 0 {
|
||||
println!("{}", format_compact_report(removed, result.compacted_session.messages.len(), false));
|
||||
}
|
||||
|
||||
// Without this, prepare_turn_runtime() reads from self.runtime.session()
|
||||
// which still holds the ORIGINAL un-compacted session, so every retry round
|
||||
// would send the same bloated request — compaction was wasted.
|
||||
*self.runtime.session_mut() = result.compacted_session.clone();
|
||||
|
||||
// Build a new runtime with the compacted session and retry
|
||||
let (mut new_runtime, hook_abort_monitor) = self.prepare_turn_runtime(true)?;
|
||||
drop(hook_abort_monitor);
|
||||
|
||||
let mut rp = CliPermissionPrompter::new(self.permission_mode);
|
||||
match new_runtime.run_turn(input, Some(&mut rp)) {
|
||||
Ok(summary) => {
|
||||
self.replace_runtime(new_runtime)?;
|
||||
spinner.finish(
|
||||
if round == 0 { "✨ Done (after auto-compact)" } else { "✨ Done (after aggressive auto-compact)" },
|
||||
TerminalRenderer::new().color_theme(),
|
||||
&mut stdout,
|
||||
)?;
|
||||
println!();
|
||||
if let Some(event) = summary.auto_compaction {
|
||||
println!("{}", format_auto_compaction_notice(event.removed_message_count));
|
||||
}
|
||||
self.persist_session()?;
|
||||
return Ok(());
|
||||
}
|
||||
Err(retry_error) => {
|
||||
let retry_str = retry_error.to_string();
|
||||
let still_context_window = retry_str.contains("context_window")
|
||||
|| retry_str.contains("Context window")
|
||||
|| retry_str.contains("no parseable body");
|
||||
|
||||
if still_context_window && round + 1 < max_compact_rounds {
|
||||
// The compacted session was still too large for the model's context.
|
||||
// Shut down the old runtime, adopt the partially-compacted one,
|
||||
// and loop — the next round will compact more aggressively.
|
||||
runtime.shutdown_plugins()?;
|
||||
runtime = new_runtime;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not a context window error, or out of rounds
|
||||
return Err(Box::new(retry_error));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If not a context window error, return original error
|
||||
Err(Box::new(error))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user