From 0bca524c8c8056aaf4adf21a5419c0f7d49d82e5 Mon Sep 17 00:00:00 2001 From: bellman Date: Thu, 14 May 2026 18:22:36 +0900 Subject: [PATCH] omx(team): auto-checkpoint worker-1 [1] --- rust/crates/runtime/src/bash.rs | 67 ++++++++++++++++++++- rust/crates/runtime/src/recovery_recipes.rs | 11 ++-- rust/crates/tools/src/lib.rs | 39 +++++++++++- 3 files changed, 107 insertions(+), 10 deletions(-) diff --git a/rust/crates/runtime/src/bash.rs b/rust/crates/runtime/src/bash.rs index f7c3d45b..331db2cf 100644 --- a/rust/crates/runtime/src/bash.rs +++ b/rust/crates/runtime/src/bash.rs @@ -4,6 +4,7 @@ use std::process::{Command, Stdio}; use std::time::Duration; use serde::{Deserialize, Serialize}; +use serde_json::json; use tokio::process::Command as TokioCommand; use tokio::runtime::Builder; use tokio::time::timeout; @@ -179,6 +180,8 @@ async fn execute_bash_async( match timeout(Duration::from_millis(timeout_ms), command.output()).await { Ok(result) => (result?, false), Err(_) => { + let is_test = is_test_command(&input.command); + let return_code_interpretation = if is_test { "test.hung" } else { "timeout" }; return Ok(BashCommandOutput { stdout: String::new(), stderr: format!("Command exceeded timeout of {timeout_ms} ms"), @@ -189,9 +192,13 @@ async fn execute_bash_async( backgrounded_by_user: None, assistant_auto_backgrounded: None, dangerously_disable_sandbox: input.dangerously_disable_sandbox, - return_code_interpretation: Some(String::from("timeout")), + return_code_interpretation: Some(String::from(return_code_interpretation)), no_output_expected: Some(true), - structured_content: None, + structured_content: Some(vec![test_timeout_provenance( + &input.command, + timeout_ms, + is_test, + )]), persisted_output_path: None, persisted_output_size: None, sandbox_status: Some(sandbox_status), @@ -233,6 +240,37 @@ async fn execute_bash_async( }) } +fn is_test_command(command: &str) -> bool { + let normalized = command + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase(); + normalized.contains("cargo test") + || normalized.contains("cargo nextest") + || normalized.contains("npm test") + || normalized.contains("pnpm test") + || normalized.contains("yarn test") + || normalized.contains("pytest") +} + +fn test_timeout_provenance( + command: &str, + timeout_ms: u64, + classified_as_test_hang: bool, +) -> serde_json::Value { + json!({ + "event": if classified_as_test_hang { "test.hung" } else { "command.timeout" }, + "failureClass": if classified_as_test_hang { "test_hang" } else { "timeout" }, + "data": { + "command": command, + "timeoutMs": timeout_ms, + "provenance": "bash.timeout", + "classification": if classified_as_test_hang { "test.hung" } else { "timeout" } + } + }) +} + fn sandbox_status_for_input(input: &BashCommandInput, cwd: &std::path::Path) -> SandboxStatus { let config = ConfigLoader::default_for(cwd).load().map_or_else( |_| SandboxConfig::default(), @@ -349,6 +387,31 @@ mod tests { assert!(!output.sandbox_status.expect("sandbox status").enabled); } + + #[test] + fn timed_out_test_command_is_classified_as_hung_test_with_provenance() { + let output = execute_bash(BashCommandInput { + command: String::from("sleep 1 # cargo test slow_case"), + timeout: Some(1), + description: None, + run_in_background: Some(false), + dangerously_disable_sandbox: Some(false), + namespace_restrictions: Some(false), + isolate_network: Some(false), + filesystem_mode: Some(FilesystemIsolationMode::WorkspaceOnly), + allowed_mounts: None, + }) + .expect("bash command should return structured timeout"); + + assert!(output.interrupted); + assert_eq!( + output.return_code_interpretation.as_deref(), + Some("test.hung") + ); + let structured = output.structured_content.expect("structured content"); + assert_eq!(structured[0]["event"], "test.hung"); + assert_eq!(structured[0]["data"]["provenance"], "bash.timeout"); + } } /// Maximum output bytes before truncation (16 KiB, matching upstream). diff --git a/rust/crates/runtime/src/recovery_recipes.rs b/rust/crates/runtime/src/recovery_recipes.rs index ab3e3dab..58f0ee0e 100644 --- a/rust/crates/runtime/src/recovery_recipes.rs +++ b/rust/crates/runtime/src/recovery_recipes.rs @@ -376,8 +376,10 @@ pub fn attempt_recovery(scenario: &FailureScenario, ctx: &mut RecoveryContext) - } RecoveryResult::PartialRecovery { remaining, .. } => { entry.state = RecoveryAttemptState::Failed; - entry.last_failure_summary = - Some(format!("{} step(s) remaining after partial recovery", remaining.len())); + entry.last_failure_summary = Some(format!( + "{} step(s) remaining after partial recovery", + remaining.len() + )); } RecoveryResult::EscalationRequired { reason } => { entry.state = RecoveryAttemptState::Exhausted; @@ -630,10 +632,7 @@ mod tests { let result = attempt_recovery(&scenario, &mut ctx); // then - assert!(matches!( - result, - RecoveryResult::EscalationRequired { .. } - )); + assert!(matches!(result, RecoveryResult::EscalationRequired { .. })); let entry = ctx.ledger_entry(&scenario).expect("ledger entry"); assert_eq!(entry.state, RecoveryAttemptState::Exhausted); assert_eq!(entry.attempt_count, 1); diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index 9615c631..6221b6fe 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -6212,6 +6212,8 @@ Command exceeded timeout of {timeout_ms} ms", stderr.trim_end() ) }; + let is_test = is_test_command(command); + let return_code_interpretation = if is_test { "test.hung" } else { "timeout" }; return Ok(runtime::BashCommandOutput { stdout: String::from_utf8_lossy(&output.stdout).into_owned(), stderr, @@ -6222,9 +6224,11 @@ Command exceeded timeout of {timeout_ms} ms", backgrounded_by_user: None, assistant_auto_backgrounded: None, dangerously_disable_sandbox: None, - return_code_interpretation: Some(String::from("timeout")), + return_code_interpretation: Some(String::from(return_code_interpretation)), no_output_expected: Some(false), - structured_content: None, + structured_content: Some(vec![test_timeout_provenance( + command, timeout_ms, is_test, + )]), persisted_output_path: None, persisted_output_size: None, sandbox_status: None, @@ -6258,6 +6262,37 @@ Command exceeded timeout of {timeout_ms} ms", }) } +fn is_test_command(command: &str) -> bool { + let normalized = command + .split_whitespace() + .collect::>() + .join(" ") + .to_ascii_lowercase(); + normalized.contains("cargo test") + || normalized.contains("cargo nextest") + || normalized.contains("npm test") + || normalized.contains("pnpm test") + || normalized.contains("yarn test") + || normalized.contains("pytest") +} + +fn test_timeout_provenance( + command: &str, + timeout_ms: u64, + classified_as_test_hang: bool, +) -> serde_json::Value { + json!({ + "event": if classified_as_test_hang { "test.hung" } else { "command.timeout" }, + "failureClass": if classified_as_test_hang { "test_hang" } else { "timeout" }, + "data": { + "command": command, + "timeoutMs": timeout_ms, + "provenance": "shell.timeout", + "classification": if classified_as_test_hang { "test.hung" } else { "timeout" } + } + }) +} + fn resolve_cell_index( cells: &[serde_json::Value], cell_id: Option<&str>,