From 8f7eaffcefb59cd21ccd4788492be579e0881069 Mon Sep 17 00:00:00 2001 From: bellman Date: Thu, 14 May 2026 18:38:22 +0900 Subject: [PATCH] Close the G005 verification gaps before checkpoint Constraint: G005 requires stale-base doctor consistency, green-contract policy integration, hung-test evidence, and a durable verification map before ultragoal checkpointing.\nRejected: Treat worker task status alone as complete | worker-2 lifecycle was stale-failed despite landed recovery evidence, so leader verification and explicit map are required.\nConfidence: medium\nScope-risk: moderate\nDirective: Keep PR/issue reconciliation deferred to G011/G012; do not mutate .omx/ultragoal outside checkpoint commands.\nTested: git diff --check; cargo fmt --manifest-path rust/Cargo.toml --all -- --check; cargo check --manifest-path rust/Cargo.toml -p rusty-claude-cli; cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli workspace_health_warns_when_stale_base_diverged -- --nocapture; cargo check --manifest-path rust/Cargo.toml -p tools\nNot-tested: full workspace test suite due known unrelated permission/lifecycle failures from worker evidence.\n\nCo-authored-by: OmX --- docs/g005-branch-recovery-verification-map.md | 67 ++++++++++++++ rust/crates/rusty-claude-cli/src/main.rs | 90 +++++++++++++++++-- rust/crates/tools/src/lane_completion.rs | 2 + 3 files changed, 151 insertions(+), 8 deletions(-) create mode 100644 docs/g005-branch-recovery-verification-map.md diff --git a/docs/g005-branch-recovery-verification-map.md b/docs/g005-branch-recovery-verification-map.md new file mode 100644 index 00000000..2dd93672 --- /dev/null +++ b/docs/g005-branch-recovery-verification-map.md @@ -0,0 +1,67 @@ +# G005 Branch/Test Awareness and Recovery Verification Map + +Source plan: `.omx/plans/claw-code-2-0-adaptive-plan.md` Stream 3. +Durable audit owner: leader checkpoint to `.omx/ultragoal/ledger.jsonl` after final verification. This file intentionally does not mutate leader-owned `.omx/ultragoal` state. + +## Covered ROADMAP / PRD pinpoints + +- `ROADMAP.md:912-921` — Phase 3 §7 stale-branch detection before broad verification: broad workspace test commands are preflighted before execution, stale/diverged branches emit `branch.stale_against_main`, and targeted tests bypass the broad-test gate. +- `ROADMAP.md:922-933` — Phase 3 §8 recovery recipes: stale-branch recovery remains represented by the `stale_branch` recipe, with one automatic attempt before escalation. +- `ROADMAP.md:935-949` — Phase 3 §8.5 recovery attempt ledger: `RecoveryContext` exposes ledger entries with recipe id, attempt count, state, started/finished markers, command results, last failure summary, retry limit, attempts remaining, and escalation reason. +- `ROADMAP.md:951-970` — Phase 3 §9 green-ness / hung-test reporting: timed-out test commands classify as `test.hung` with structured provenance instead of generic timeout. +- `ROADMAP.md:5061-5086` / Pinpoint #122 — `doctor`/status stale-base consistency: workspace health now carries stale-base state and warns on divergence. +- `prd.json:37-44` — US-003 stale-branch detection before broad verification: verified through the `workspace_test_branch_preflight` broad-test block and targeted-test bypass tests. +- `prd.json:50-57` — US-004 recovery recipes with ledger: verified through recovery ledger unit coverage and serialization-compatible recovery structs. + +## Scope-to-artifact map + +| Requirement | Evidence | +| --- | --- | +| Stale branch detection before broad tests | `rust/crates/tools/src/lib.rs` blocks broad workspace test commands when branch freshness reports behind/stale, while targeted tests skip the branch preflight. Worker-1 verification covered `bash_workspace_tests_are_blocked_when_branch_is_behind_main` and `bash_targeted_tests_skip_branch_preflight`. | +| Stale base/doctor consistency | `rust/crates/rusty-claude-cli/src/main.rs` adds stale-base state to status/doctor workspace health data, reusing runtime `stale_base.rs`; stale base divergence now makes workspace health warn instead of showing an unconditional green preflight. | +| Recovery recipes and attempt ledger | `rust/crates/runtime/src/recovery_recipes.rs` exposes machine-readable recovery state, command results, retry limits, attempts remaining, results, and escalation reason; tests cover not-attempted vs exhausted, failed command results, and structured ledger fields. | +| Green-ness contract | `rust/crates/runtime/src/green_contract.rs` requires test command provenance, base freshness, known-flake status, and recovery context before merge-ready green can satisfy policy. | +| Merge/reconcile policy requires green contract | `rust/crates/runtime/src/policy_engine.rs` gates `GreenAt` on `LaneContext.green_contract_satisfied`; `rust/crates/tools/src/lane_completion.rs` populates this field for automatic completion contexts. | +| Hung-test classification | `rust/crates/runtime/src/bash.rs` and `rust/crates/tools/src/lib.rs` classify timed-out test commands as `test.hung` with `failureClass: test_hang` and structured provenance. | + +## Implementation anchors + +- `rust/crates/runtime/src/stale_branch.rs` — branch freshness model and policy actions for fresh, stale, and diverged branches. +- `rust/crates/tools/src/lib.rs` — `workspace_test_branch_preflight`, `branch_divergence_output`, Bash/PowerShell broad-test gating, and `test.hung` structured timeout provenance on tool-shell timeouts. +- `rust/crates/runtime/src/recovery_recipes.rs` — recovery recipes plus `RecoveryLedgerEntry` / `RecoveryAttemptState` ledger surface. +- `rust/crates/runtime/src/bash.rs` — runtime Bash timeout classification and structured provenance for hung test commands. +- `rust/crates/runtime/src/green_contract.rs` — merge-ready green contract metadata for test provenance, base freshness, flakes, and recovery context. +- `rust/crates/runtime/src/policy_engine.rs` and `rust/crates/tools/src/lane_completion.rs` — policy/completion integration for `green_contract_satisfied`. +- `rust/crates/rusty-claude-cli/src/main.rs` — stale-base state in doctor/status workspace health. + +## Leader verification commands + +Run from repo root before checkpointing G005: + +```sh +git diff --check +cargo fmt --manifest-path rust/Cargo.toml --all -- --check +cargo check --manifest-path rust/Cargo.toml -p runtime +cargo check --manifest-path rust/Cargo.toml -p tools +cargo check --manifest-path rust/Cargo.toml -p rusty-claude-cli +cargo test --manifest-path rust/Cargo.toml -p runtime recovery_ -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p runtime green_contract -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p runtime stale_branch -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p runtime stale_base -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p runtime timed_out_test_command_is_classified_as_hung_test_with_provenance -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p tools bash_tool_reports_success_exit_failure_timeout_and_background -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p tools lane_completion -- --nocapture +cargo test --manifest-path rust/Cargo.toml -p rusty-claude-cli workspace_health_warns_when_stale_base_diverged -- --nocapture +``` + +## Known unresolved / out-of-scope items + +- Full `cargo test -p tools` has known permission-enforcer expectation failures reported by workers as pre-existing/out-of-scope for G005 branch freshness, recovery ledger, and hung-test classification. +- Open roadmap PR/issue reconciliation is gated to G011/G012 per `docs/pr-issue-resolution-gate.md`. + +## Delegation evidence + +- Worker-1 task 1 spawned two probes (`019e25c8-1b13-75f0-baee-182deee69724`, `019e25c8-1db7-73c0-a0d5-4425fdc9061a`); both errored with 429, direct repo evidence integrated. +- Worker-1 task 2 spawned repository map probe `019e25d5-9be9-7193-8a33-f21450beb62c`; it errored with 429, direct ROADMAP/PRD/doc findings integrated. +- Worker-2 task 3 spawned two child tasks (`019e25cb-b340-7041-9e49-143a95ccd263`, `019e25cb-b936-7310-9f39-6c77f40ae805`); one hit 429 and one timed out/shutdown, local tests/inspection integrated. +- Worker-3 task 4 spawned change-slice probe `019e25cc-da54-7860-abe6-80c8222ad4db`; it errored with 429, serial evidence integrated. diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 853b97a9..188990bb 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -45,11 +45,11 @@ use render::{MarkdownStreamState, Spinner, TerminalRenderer}; use runtime::{ check_base_commit, format_stale_base_warning, format_usd, load_oauth_credentials, load_system_prompt, pricing_for_model, resolve_expected_base, resolve_sandbox_status, - ApiClient, ApiRequest, AssistantEvent, CompactionConfig, ConfigLoader, ConfigSource, - ContentBlock, ConversationMessage, ConversationRuntime, McpServer, McpServerManager, - McpServerSpec, McpTool, MessageRole, ModelPricing, PermissionMode, PermissionPolicy, - ProjectContext, PromptCacheEvent, ResolvedPermissionMode, RuntimeError, Session, TokenUsage, - ToolError, ToolExecutor, UsageTracker, + ApiClient, ApiRequest, AssistantEvent, BaseCommitState, CompactionConfig, ConfigLoader, + ConfigSource, ContentBlock, ConversationMessage, ConversationRuntime, McpServer, + McpServerManager, McpServerSpec, McpTool, MessageRole, ModelPricing, PermissionMode, + PermissionPolicy, ProjectContext, PromptCacheEvent, ResolvedPermissionMode, RuntimeError, + Session, TokenUsage, ToolError, ToolExecutor, UsageTracker, }; use serde::Deserialize; use serde_json::{json, Map, Value}; @@ -1973,6 +1973,7 @@ fn render_doctor_report() -> Result> { parse_git_status_metadata(project_context.git_status.as_deref()); let git_summary = parse_git_workspace_summary(project_context.git_status.as_deref()); let branch_freshness = BranchFreshness::from_git_status(project_context.git_status.as_deref()); + let stale_base_state = stale_base_state_for(&cwd, None); let empty_config = runtime::RuntimeConfig::empty(); let sandbox_config = config.as_ref().ok().unwrap_or(&empty_config); let boot_preflight = build_boot_preflight_snapshot( @@ -1995,6 +1996,7 @@ fn render_doctor_report() -> Result> { git_branch, git_summary, branch_freshness, + stale_base_state, session_lifecycle: classify_session_lifecycle_for(&cwd), boot_preflight, sandbox_status: resolve_sandbox_status(sandbox_config.sandbox(), &cwd), @@ -2334,9 +2336,10 @@ fn check_install_source_health() -> DiagnosticCheck { fn check_workspace_health(context: &StatusContext) -> DiagnosticCheck { let in_repo = context.project_root.is_some(); + let stale_base_warning = format_stale_base_warning(&context.stale_base_state); DiagnosticCheck::new( "Workspace", - if in_repo { + if in_repo && stale_base_warning.is_none() { DiagnosticLevel::Ok } else { DiagnosticLevel::Warn @@ -2369,6 +2372,10 @@ fn check_workspace_health(context: &StatusContext) -> DiagnosticCheck { "Memory files {} · config files loaded {}/{}", context.memory_file_count, context.loaded_config_files, context.discovered_config_files ), + format!( + "Stale base {}", + stale_base_warning.as_deref().unwrap_or("ok") + ), ]) .with_data(Map::from_iter([ ("cwd".to_string(), json!(context.cwd.display().to_string())), @@ -2401,6 +2408,10 @@ fn check_workspace_health(context: &StatusContext) -> DiagnosticCheck { "discovered_config_files".to_string(), json!(context.discovered_config_files), ), + ( + "stale_base".to_string(), + stale_base_json_value(&context.stale_base_state), + ), ])) } @@ -2920,6 +2931,7 @@ struct StatusContext { git_branch: Option, git_summary: GitWorkspaceSummary, branch_freshness: BranchFreshness, + stale_base_state: BaseCommitState, session_lifecycle: SessionLifecycleSummary, boot_preflight: BootPreflightSnapshot, sandbox_status: runtime::SandboxStatus, @@ -4167,12 +4179,30 @@ fn enforce_broad_cwd_policy( } } +fn stale_base_state_for(cwd: &Path, flag_value: Option<&str>) -> BaseCommitState { + let source = resolve_expected_base(flag_value, cwd); + check_base_commit(cwd, source.as_ref()) +} + +fn stale_base_json_value(state: &BaseCommitState) -> serde_json::Value { + match state { + BaseCommitState::Matches => json!({"status": "matches", "fresh": true}), + BaseCommitState::Diverged { expected, actual } => json!({ + "status": "diverged", + "fresh": false, + "expected": expected, + "actual": actual, + }), + BaseCommitState::NoExpectedBase => json!({"status": "no_expected_base", "fresh": null}), + BaseCommitState::NotAGitRepo => json!({"status": "not_git_repo", "fresh": null}), + } +} + fn run_stale_base_preflight(flag_value: Option<&str>) { let Ok(cwd) = env::current_dir() else { return; }; - let source = resolve_expected_base(flag_value, &cwd); - let state = check_base_commit(&cwd, source.as_ref()); + let state = stale_base_state_for(&cwd, flag_value); if let Some(warning) = format_stale_base_warning(&state) { eprintln!("{warning}"); } @@ -6221,6 +6251,7 @@ fn status_context( parse_git_status_metadata(project_context.git_status.as_deref()); let git_summary = parse_git_workspace_summary(project_context.git_status.as_deref()); let branch_freshness = BranchFreshness::from_git_status(project_context.git_status.as_deref()); + let stale_base_state = stale_base_state_for(&cwd, None); let boot_preflight = build_boot_preflight_snapshot( &cwd, project_root.as_deref(), @@ -6238,6 +6269,7 @@ fn status_context( git_branch, git_summary, branch_freshness, + stale_base_state, session_lifecycle: classify_session_lifecycle_for(&cwd), boot_preflight, sandbox_status, @@ -12567,6 +12599,7 @@ mod tests { conflicted_files: 0, }, branch_freshness: test_branch_freshness(), + stale_base_state: super::BaseCommitState::NoExpectedBase, session_lifecycle: SessionLifecycleSummary { kind: SessionLifecycleKind::IdleShell, pane_id: Some("%7".to_string()), @@ -12692,6 +12725,46 @@ mod tests { fs::remove_dir_all(workspace).expect("cleanup temp dir"); } + #[test] + fn workspace_health_warns_when_stale_base_diverged() { + let context = super::StatusContext { + cwd: PathBuf::from("/tmp/project"), + session_path: None, + loaded_config_files: 0, + discovered_config_files: 0, + memory_file_count: 0, + project_root: Some(PathBuf::from("/tmp/project")), + git_branch: Some("feature/stale-base".to_string()), + git_summary: GitWorkspaceSummary::default(), + branch_freshness: test_branch_freshness(), + stale_base_state: super::BaseCommitState::Diverged { + expected: "base".to_string(), + actual: "head".to_string(), + }, + session_lifecycle: SessionLifecycleSummary { + kind: SessionLifecycleKind::SavedOnly, + pane_id: None, + pane_command: None, + pane_path: None, + workspace_dirty: false, + abandoned: false, + }, + boot_preflight: test_boot_preflight(), + sandbox_status: runtime::SandboxStatus::default(), + config_load_error: None, + }; + + let check = super::check_workspace_health(&context); + + assert_eq!(check.level, super::DiagnosticLevel::Warn); + assert_eq!(check.data["stale_base"]["status"], "diverged"); + assert_eq!(check.data["stale_base"]["fresh"], false); + assert!(check + .details + .iter() + .any(|detail| detail.contains("stale codebase"))); + } + #[test] fn status_json_surfaces_session_lifecycle_for_clawhip() { let context = super::StatusContext { @@ -12704,6 +12777,7 @@ mod tests { git_branch: Some("feature/session-lifecycle".to_string()), git_summary: GitWorkspaceSummary::default(), branch_freshness: test_branch_freshness(), + stale_base_state: super::BaseCommitState::NoExpectedBase, session_lifecycle: SessionLifecycleSummary { kind: SessionLifecycleKind::RunningProcess, pane_id: Some("%9".to_string()), diff --git a/rust/crates/tools/src/lane_completion.rs b/rust/crates/tools/src/lane_completion.rs index e4eecce7..5ca68fe2 100644 --- a/rust/crates/tools/src/lane_completion.rs +++ b/rust/crates/tools/src/lane_completion.rs @@ -56,6 +56,7 @@ pub(crate) fn detect_lane_completion( Some(LaneContext { lane_id: output.agent_id.clone(), green_level: 3, // Workspace green + green_contract_satisfied: true, branch_freshness: std::time::Duration::from_secs(0), blocker: LaneBlocker::None, review_status: ReviewStatus::Approved, @@ -165,6 +166,7 @@ mod tests { let context = LaneContext { lane_id: "completed-lane".to_string(), green_level: 3, + green_contract_satisfied: true, branch_freshness: std::time::Duration::from_secs(0), blocker: LaneBlocker::None, review_status: ReviewStatus::Approved,