feat: sweep

This commit is contained in:
Yeachan-Heo
2026-05-25 04:36:30 +00:00
parent 1003510a75
commit c613e8e676
9 changed files with 441 additions and 27 deletions

View File

@@ -1111,7 +1111,24 @@ enum BlockKind {
pub(crate) fn format_rag_query_json_for_model(body: &str) -> Result<String, String> {
let v: Value = serde_json::from_str(body).map_err(|e| format!("invalid JSON: {e}"))?;
let phase = v.get("phase").and_then(|x| x.as_str()).unwrap_or("unknown");
let phase = v.get("phase").and_then(|x| x.as_str()).ok_or_else(|| {
json!({
"kind": "unknown_bootstrap_phase",
"field": "phase",
"received_value": v.get("phase").cloned().unwrap_or(Value::Null),
"message": "RAG response is missing a string phase; refusing to silently render phase as unknown"
})
.to_string()
})?;
if phase.trim().is_empty() || phase == "unknown" {
return Err(json!({
"kind": "unknown_bootstrap_phase",
"field": "phase",
"received_value": phase,
"message": "RAG response phase must be a concrete phase name"
})
.to_string());
}
let hits = v
.get("hits")
.and_then(|h| h.as_array())
@@ -2557,6 +2574,20 @@ mod tests {
assert!(out.contains("score="));
}
#[test]
fn rag_response_missing_phase_returns_typed_error() {
let err = format_rag_query_json_for_model(r#"{"hits":[]}"#).unwrap_err();
assert!(err.contains(r#""kind":"unknown_bootstrap_phase""#));
assert!(err.contains(r#""field":"phase""#));
}
#[test]
fn rag_response_unknown_phase_returns_typed_error() {
let err = format_rag_query_json_for_model(r#"{"hits":[],"phase":"unknown"}"#).unwrap_err();
assert!(err.contains(r#""kind":"unknown_bootstrap_phase""#));
assert!(err.contains(r#""received_value":"unknown""#));
}
#[test]
fn resolve_rag_base_url_toml_beats_env() {
let _g = mock_env_lock();

View File

@@ -438,13 +438,24 @@ fn normalize_path(path: &Path) -> PathBuf {
/// Extract repository name from a path for event context.
fn extract_repo_name(cwd: &str) -> Option<String> {
let path = Path::new(cwd);
// Try to find a .git directory to identify repo root
let mut current = Some(path);
while let Some(p) = current {
if p.join(".git").is_dir() {
return p.file_name().map(|n| n.to_string_lossy().to_string());
// Ask git from the cwd itself. Walking ancestors manually can accidentally
// classify synthetic/nonexistent paths as an unrelated parent repo (for
// example `/tmp/.git`), which makes trust events point at the wrong repo.
if path.is_dir() {
if let Ok(output) = std::process::Command::new("git")
.args(["rev-parse", "--show-toplevel"])
.current_dir(path)
.output()
{
if output.status.success() {
let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
if !root.is_empty() {
return Path::new(&root)
.file_name()
.map(|n| n.to_string_lossy().to_string());
}
}
}
current = p.parent();
}
// Fallback: use the last component of the path
path.file_name().map(|n| n.to_string_lossy().to_string())

View File

@@ -13,6 +13,7 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::{Arc, Mutex};
use std::time::{SystemTime, UNIX_EPOCH};
@@ -73,6 +74,7 @@ pub struct WorkerFailure {
#[serde(rename_all = "snake_case")]
pub enum WorkerEventKind {
Spawning,
StartupPreflightWarning,
TrustRequired,
ToolPermissionRequired,
TrustResolved,
@@ -102,6 +104,21 @@ pub enum WorkerPromptTarget {
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WorkerStartupPreflightWarningKind {
FileAbsentOnBranch,
GitMetadataNotWritable,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct WorkerStartupPreflightWarning {
pub kind: WorkerStartupPreflightWarningKind,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
}
/// Classification of startup failure when no evidence is available.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
@@ -212,6 +229,12 @@ pub enum WorkerEventPayload {
evidence: StartupEvidenceBundle,
classification: StartupFailureClassification,
},
StartupPreflightWarning {
kind: WorkerStartupPreflightWarningKind,
message: String,
#[serde(skip_serializing_if = "Option::is_none")]
path: Option<String>,
},
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
@@ -329,6 +352,34 @@ impl WorkerRegistry {
inner.workers.get(worker_id).cloned()
}
pub fn observe_startup_preflight(
&self,
worker_id: &str,
task_prompt: &str,
) -> Result<Worker, String> {
let mut inner = self.inner.lock().expect("worker registry lock poisoned");
let worker = inner
.workers
.get_mut(worker_id)
.ok_or_else(|| format!("worker not found: {worker_id}"))?;
for warning in startup_preflight_warnings(Path::new(&worker.cwd), task_prompt) {
push_event(
worker,
WorkerEventKind::StartupPreflightWarning,
worker.status,
Some(warning.message.clone()),
Some(WorkerEventPayload::StartupPreflightWarning {
kind: warning.kind,
message: warning.message,
path: warning.path,
}),
);
}
Ok(worker.clone())
}
pub fn observe(&self, worker_id: &str, screen_text: &str) -> Result<Worker, String> {
let mut inner = self.inner.lock().expect("worker registry lock poisoned");
let worker = inner
@@ -1064,6 +1115,118 @@ fn extract_server_from_qualified_tool(tool: &str) -> Option<String> {
(!server.is_empty()).then(|| server.to_string())
}
pub fn startup_preflight_warnings(
cwd: &Path,
task_prompt: &str,
) -> Vec<WorkerStartupPreflightWarning> {
let mut warnings = Vec::new();
if let Some(git_path) = git_metadata_path(cwd) {
if !path_is_writable(&git_path) {
warnings.push(WorkerStartupPreflightWarning {
kind: WorkerStartupPreflightWarningKind::GitMetadataNotWritable,
message: format!(
"git metadata is not writable; commits or pushes may fail: {}",
git_path.display()
),
path: Some(git_path.display().to_string()),
});
}
}
for path in mentioned_repo_paths(task_prompt) {
if !git_tracks_path(cwd, &path) {
warnings.push(WorkerStartupPreflightWarning {
kind: WorkerStartupPreflightWarningKind::FileAbsentOnBranch,
message: format!(
"task mentions {path}, but git does not track it on the current branch"
),
path: Some(path),
});
}
}
warnings
}
fn mentioned_repo_paths(task_prompt: &str) -> Vec<String> {
let mut out = Vec::new();
for raw in task_prompt.split_whitespace() {
let token = raw.trim_matches(|ch: char| {
matches!(
ch,
'`' | '"' | '\'' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';' | ':'
)
});
if !token.contains('/') || token.contains("://") || token.starts_with('/') {
continue;
}
let token = token.trim_start_matches("./");
if token.contains("..") {
continue;
}
if token
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '/' | '_' | '-' | '.'))
&& token
.rsplit('/')
.next()
.is_some_and(|name| name.contains('.'))
&& !out.iter().any(|seen| seen == token)
{
out.push(token.to_string());
}
}
out
}
fn git_tracks_path(cwd: &Path, path: &str) -> bool {
Command::new("git")
.arg("ls-files")
.arg("--error-unmatch")
.arg("--")
.arg(path)
.current_dir(cwd)
.output()
.is_ok_and(|output| output.status.success())
}
fn git_metadata_path(cwd: &Path) -> Option<PathBuf> {
let output = Command::new("git")
.args(["rev-parse", "--git-path", "."])
.current_dir(cwd)
.output()
.ok()?;
if !output.status.success() {
return None;
}
let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
if text.is_empty() {
return None;
}
let path = PathBuf::from(text);
Some(if path.is_absolute() {
path
} else {
cwd.join(path)
})
}
fn path_is_writable(path: &Path) -> bool {
let probe_dir = if path.is_dir() {
path.to_path_buf()
} else {
path.parent().unwrap_or(path).to_path_buf()
};
let probe = probe_dir.join(format!(".claw-write-probe-{}", now_secs()));
std::fs::OpenOptions::new()
.write(true)
.create_new(true)
.open(&probe)
.and_then(|_| std::fs::remove_file(&probe))
.is_ok()
}
fn detect_trust_prompt(lowered: &str) -> bool {
[
"do you trust the files in this folder",
@@ -1285,6 +1448,8 @@ fn cwd_matches_observed_target(expected_cwd: &str, observed_cwd: &str) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use std::process::Command;
#[test]
fn allowlisted_trust_prompt_auto_resolves_then_reaches_ready_state() {
@@ -1431,6 +1596,66 @@ mod tests {
assert!(!readiness.ready);
}
#[test]
fn startup_preflight_warns_when_task_file_is_absent_on_branch() {
let tmp = tempfile::tempdir().expect("tempdir");
Command::new("git")
.arg("init")
.current_dir(tmp.path())
.output()
.expect("git init should run");
fs::create_dir_all(tmp.path().join("src")).expect("src dir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn present() {}\n").expect("write file");
Command::new("git")
.args(["add", "src/lib.rs"])
.current_dir(tmp.path())
.output()
.expect("git add should run");
let warnings = startup_preflight_warnings(
tmp.path(),
"Fix src/lib.rs and rust/crates/runtime/src/trident.rs before testing.",
);
assert!(warnings.iter().any(|warning| {
warning.kind == WorkerStartupPreflightWarningKind::FileAbsentOnBranch
&& warning.path.as_deref() == Some("rust/crates/runtime/src/trident.rs")
}));
assert!(!warnings.iter().any(|warning| {
warning.kind == WorkerStartupPreflightWarningKind::FileAbsentOnBranch
&& warning.path.as_deref() == Some("src/lib.rs")
}));
}
#[test]
fn startup_preflight_records_structured_warning_event() {
let tmp = tempfile::tempdir().expect("tempdir");
Command::new("git")
.arg("init")
.current_dir(tmp.path())
.output()
.expect("git init should run");
let registry = WorkerRegistry::new();
let worker = registry.create(&tmp.path().display().to_string(), &[], true);
let observed = registry
.observe_startup_preflight(&worker.worker_id, "Open missing/file.rs")
.expect("preflight should run");
let event = observed
.events
.iter()
.find(|event| event.kind == WorkerEventKind::StartupPreflightWarning)
.expect("preflight warning event");
assert!(matches!(
event.payload,
Some(WorkerEventPayload::StartupPreflightWarning {
kind: WorkerStartupPreflightWarningKind::FileAbsentOnBranch,
..
})
));
}
#[test]
fn startup_timeout_classifies_tool_permission_prompt() {
let registry = WorkerRegistry::new();