omx(team): auto-checkpoint worker-1 [1]

This commit is contained in:
bellman
2026-05-14 18:22:36 +09:00
parent 2ad56860df
commit 0bca524c8c
3 changed files with 107 additions and 10 deletions

View File

@@ -4,6 +4,7 @@ use std::process::{Command, Stdio};
use std::time::Duration;
use serde::{Deserialize, Serialize};
use serde_json::json;
use tokio::process::Command as TokioCommand;
use tokio::runtime::Builder;
use tokio::time::timeout;
@@ -179,6 +180,8 @@ async fn execute_bash_async(
match timeout(Duration::from_millis(timeout_ms), command.output()).await {
Ok(result) => (result?, false),
Err(_) => {
let is_test = is_test_command(&input.command);
let return_code_interpretation = if is_test { "test.hung" } else { "timeout" };
return Ok(BashCommandOutput {
stdout: String::new(),
stderr: format!("Command exceeded timeout of {timeout_ms} ms"),
@@ -189,9 +192,13 @@ async fn execute_bash_async(
backgrounded_by_user: None,
assistant_auto_backgrounded: None,
dangerously_disable_sandbox: input.dangerously_disable_sandbox,
return_code_interpretation: Some(String::from("timeout")),
return_code_interpretation: Some(String::from(return_code_interpretation)),
no_output_expected: Some(true),
structured_content: None,
structured_content: Some(vec![test_timeout_provenance(
&input.command,
timeout_ms,
is_test,
)]),
persisted_output_path: None,
persisted_output_size: None,
sandbox_status: Some(sandbox_status),
@@ -233,6 +240,37 @@ async fn execute_bash_async(
})
}
fn is_test_command(command: &str) -> bool {
let normalized = command
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase();
normalized.contains("cargo test")
|| normalized.contains("cargo nextest")
|| normalized.contains("npm test")
|| normalized.contains("pnpm test")
|| normalized.contains("yarn test")
|| normalized.contains("pytest")
}
fn test_timeout_provenance(
command: &str,
timeout_ms: u64,
classified_as_test_hang: bool,
) -> serde_json::Value {
json!({
"event": if classified_as_test_hang { "test.hung" } else { "command.timeout" },
"failureClass": if classified_as_test_hang { "test_hang" } else { "timeout" },
"data": {
"command": command,
"timeoutMs": timeout_ms,
"provenance": "bash.timeout",
"classification": if classified_as_test_hang { "test.hung" } else { "timeout" }
}
})
}
fn sandbox_status_for_input(input: &BashCommandInput, cwd: &std::path::Path) -> SandboxStatus {
let config = ConfigLoader::default_for(cwd).load().map_or_else(
|_| SandboxConfig::default(),
@@ -349,6 +387,31 @@ mod tests {
assert!(!output.sandbox_status.expect("sandbox status").enabled);
}
#[test]
fn timed_out_test_command_is_classified_as_hung_test_with_provenance() {
let output = execute_bash(BashCommandInput {
command: String::from("sleep 1 # cargo test slow_case"),
timeout: Some(1),
description: None,
run_in_background: Some(false),
dangerously_disable_sandbox: Some(false),
namespace_restrictions: Some(false),
isolate_network: Some(false),
filesystem_mode: Some(FilesystemIsolationMode::WorkspaceOnly),
allowed_mounts: None,
})
.expect("bash command should return structured timeout");
assert!(output.interrupted);
assert_eq!(
output.return_code_interpretation.as_deref(),
Some("test.hung")
);
let structured = output.structured_content.expect("structured content");
assert_eq!(structured[0]["event"], "test.hung");
assert_eq!(structured[0]["data"]["provenance"], "bash.timeout");
}
}
/// Maximum output bytes before truncation (16 KiB, matching upstream).

View File

@@ -376,8 +376,10 @@ pub fn attempt_recovery(scenario: &FailureScenario, ctx: &mut RecoveryContext) -
}
RecoveryResult::PartialRecovery { remaining, .. } => {
entry.state = RecoveryAttemptState::Failed;
entry.last_failure_summary =
Some(format!("{} step(s) remaining after partial recovery", remaining.len()));
entry.last_failure_summary = Some(format!(
"{} step(s) remaining after partial recovery",
remaining.len()
));
}
RecoveryResult::EscalationRequired { reason } => {
entry.state = RecoveryAttemptState::Exhausted;
@@ -630,10 +632,7 @@ mod tests {
let result = attempt_recovery(&scenario, &mut ctx);
// then
assert!(matches!(
result,
RecoveryResult::EscalationRequired { .. }
));
assert!(matches!(result, RecoveryResult::EscalationRequired { .. }));
let entry = ctx.ledger_entry(&scenario).expect("ledger entry");
assert_eq!(entry.state, RecoveryAttemptState::Exhausted);
assert_eq!(entry.attempt_count, 1);

View File

@@ -6212,6 +6212,8 @@ Command exceeded timeout of {timeout_ms} ms",
stderr.trim_end()
)
};
let is_test = is_test_command(command);
let return_code_interpretation = if is_test { "test.hung" } else { "timeout" };
return Ok(runtime::BashCommandOutput {
stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
stderr,
@@ -6222,9 +6224,11 @@ Command exceeded timeout of {timeout_ms} ms",
backgrounded_by_user: None,
assistant_auto_backgrounded: None,
dangerously_disable_sandbox: None,
return_code_interpretation: Some(String::from("timeout")),
return_code_interpretation: Some(String::from(return_code_interpretation)),
no_output_expected: Some(false),
structured_content: None,
structured_content: Some(vec![test_timeout_provenance(
command, timeout_ms, is_test,
)]),
persisted_output_path: None,
persisted_output_size: None,
sandbox_status: None,
@@ -6258,6 +6262,37 @@ Command exceeded timeout of {timeout_ms} ms",
})
}
fn is_test_command(command: &str) -> bool {
let normalized = command
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_lowercase();
normalized.contains("cargo test")
|| normalized.contains("cargo nextest")
|| normalized.contains("npm test")
|| normalized.contains("pnpm test")
|| normalized.contains("yarn test")
|| normalized.contains("pytest")
}
fn test_timeout_provenance(
command: &str,
timeout_ms: u64,
classified_as_test_hang: bool,
) -> serde_json::Value {
json!({
"event": if classified_as_test_hang { "test.hung" } else { "command.timeout" },
"failureClass": if classified_as_test_hang { "test_hang" } else { "timeout" },
"data": {
"command": command,
"timeoutMs": timeout_ms,
"provenance": "shell.timeout",
"classification": if classified_as_test_hang { "test.hung" } else { "timeout" }
}
})
}
fn resolve_cell_index(
cells: &[serde_json::Value],
cell_id: Option<&str>,