use std::cmp::Reverse; use std::collections::HashSet; use std::fs; use std::io; use std::path::{Path, PathBuf}; use std::time::Instant; use glob::Pattern; use regex::RegexBuilder; use serde::{Deserialize, Serialize}; use walkdir::{DirEntry, WalkDir}; /// Maximum file size that can be read (10 MB). const MAX_READ_SIZE: u64 = 10 * 1024 * 1024; /// Maximum file size that can be written (10 MB). const MAX_WRITE_SIZE: usize = 10 * 1024 * 1024; const GLOB_SEARCH_IGNORED_DIRS: &[&str] = &[ ".git", "node_modules", ".build", "target", "dist", "coverage", ]; /// Check whether a file appears to contain binary content by examining /// the first chunk for NUL bytes. fn is_binary_file(path: &Path) -> io::Result { use std::io::Read; let mut file = fs::File::open(path)?; let mut buffer = [0u8; 8192]; let bytes_read = file.read(&mut buffer)?; Ok(buffer[..bytes_read].contains(&0)) } /// Validate that a resolved path stays within the given workspace root. /// Returns the canonical path on success, or an error if the path escapes /// the workspace boundary (e.g. via `../` traversal or symlink). #[allow(dead_code)] fn validate_workspace_boundary(resolved: &Path, workspace_root: &Path) -> io::Result<()> { if !resolved.starts_with(workspace_root) { return Err(io::Error::new( io::ErrorKind::PermissionDenied, format!( "path {} escapes workspace boundary {}", resolved.display(), workspace_root.display() ), )); } Ok(()) } /// Text payload returned by file-reading operations. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct TextFilePayload { #[serde(rename = "filePath")] pub file_path: String, pub content: String, #[serde(rename = "numLines")] pub num_lines: usize, #[serde(rename = "startLine")] pub start_line: usize, #[serde(rename = "totalLines")] pub total_lines: usize, } /// Output envelope for the `read_file` tool. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ReadFileOutput { #[serde(rename = "type")] pub kind: String, pub file: TextFilePayload, } /// Structured patch hunk emitted by write and edit operations. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct StructuredPatchHunk { #[serde(rename = "oldStart")] pub old_start: usize, #[serde(rename = "oldLines")] pub old_lines: usize, #[serde(rename = "newStart")] pub new_start: usize, #[serde(rename = "newLines")] pub new_lines: usize, pub lines: Vec, } /// Output envelope for full-file write operations. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct WriteFileOutput { #[serde(rename = "type")] pub kind: String, #[serde(rename = "filePath")] pub file_path: String, pub content: String, #[serde(rename = "structuredPatch")] pub structured_patch: Vec, #[serde(rename = "originalFile")] pub original_file: Option, #[serde(rename = "gitDiff")] pub git_diff: Option, } /// Output envelope for targeted string-replacement edits. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct EditFileOutput { #[serde(rename = "filePath")] pub file_path: String, #[serde(rename = "oldString")] pub old_string: String, #[serde(rename = "newString")] pub new_string: String, #[serde(rename = "originalFile")] pub original_file: String, #[serde(rename = "structuredPatch")] pub structured_patch: Vec, #[serde(rename = "userModified")] pub user_modified: bool, #[serde(rename = "replaceAll")] pub replace_all: bool, #[serde(rename = "gitDiff")] pub git_diff: Option, } /// Result of a glob-based filename search. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct GlobSearchOutput { #[serde(rename = "durationMs")] pub duration_ms: u128, #[serde(rename = "numFiles")] pub num_files: usize, pub filenames: Vec, pub truncated: bool, } /// Parameters accepted by the grep-style search tool. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct GrepSearchInput { pub pattern: String, pub path: Option, pub glob: Option, #[serde(rename = "output_mode")] pub output_mode: Option, #[serde(rename = "-B")] pub before: Option, #[serde(rename = "-A")] pub after: Option, #[serde(rename = "-C")] pub context_short: Option, pub context: Option, #[serde(rename = "-n")] pub line_numbers: Option, #[serde(rename = "-i")] pub case_insensitive: Option, #[serde(rename = "type")] pub file_type: Option, pub head_limit: Option, pub offset: Option, pub multiline: Option, } /// Result payload returned by the grep-style search tool. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct GrepSearchOutput { pub mode: Option, #[serde(rename = "numFiles")] pub num_files: usize, pub filenames: Vec, pub content: Option, #[serde(rename = "numLines")] pub num_lines: Option, #[serde(rename = "numMatches")] pub num_matches: Option, #[serde(rename = "appliedLimit")] pub applied_limit: Option, #[serde(rename = "appliedOffset")] pub applied_offset: Option, } /// Reads a text file and returns a line-windowed payload. pub fn read_file( path: &str, offset: Option, limit: Option, ) -> io::Result { let absolute_path = normalize_path(path)?; // Check file size before reading let metadata = fs::metadata(&absolute_path)?; if metadata.len() > MAX_READ_SIZE { return Err(io::Error::new( io::ErrorKind::InvalidData, format!( "file is too large ({} bytes, max {} bytes)", metadata.len(), MAX_READ_SIZE ), )); } // Detect binary files if is_binary_file(&absolute_path)? { return Err(io::Error::new( io::ErrorKind::InvalidData, "file appears to be binary", )); } let content = fs::read_to_string(&absolute_path)?; let lines: Vec<&str> = content.lines().collect(); let start_index = offset.unwrap_or(0).min(lines.len()); let end_index = limit.map_or(lines.len(), |limit| { start_index.saturating_add(limit).min(lines.len()) }); let selected = lines[start_index..end_index].join("\n"); Ok(ReadFileOutput { kind: String::from("text"), file: TextFilePayload { file_path: absolute_path.to_string_lossy().into_owned(), content: selected, num_lines: end_index.saturating_sub(start_index), start_line: start_index.saturating_add(1), total_lines: lines.len(), }, }) } /// Replaces a file's contents and returns patch metadata. pub fn write_file(path: &str, content: &str) -> io::Result { if content.len() > MAX_WRITE_SIZE { return Err(io::Error::new( io::ErrorKind::InvalidData, format!( "content is too large ({} bytes, max {} bytes)", content.len(), MAX_WRITE_SIZE ), )); } let absolute_path = normalize_path_allow_missing(path)?; let original_file = fs::read_to_string(&absolute_path).ok(); if let Some(parent) = absolute_path.parent() { fs::create_dir_all(parent)?; } fs::write(&absolute_path, content)?; Ok(WriteFileOutput { kind: if original_file.is_some() { String::from("update") } else { String::from("create") }, file_path: absolute_path.to_string_lossy().into_owned(), content: content.to_owned(), structured_patch: make_patch(original_file.as_deref().unwrap_or(""), content), original_file, git_diff: None, }) } /// Performs an in-file string replacement and returns patch metadata. pub fn edit_file( path: &str, old_string: &str, new_string: &str, replace_all: bool, ) -> io::Result { let absolute_path = normalize_path(path)?; let original_file = fs::read_to_string(&absolute_path)?; if old_string == new_string { return Err(io::Error::new( io::ErrorKind::InvalidInput, "old_string and new_string must differ", )); } if !original_file.contains(old_string) { return Err(io::Error::new( io::ErrorKind::NotFound, "old_string not found in file", )); } let updated = if replace_all { original_file.replace(old_string, new_string) } else { original_file.replacen(old_string, new_string, 1) }; fs::write(&absolute_path, &updated)?; Ok(EditFileOutput { file_path: absolute_path.to_string_lossy().into_owned(), old_string: old_string.to_owned(), new_string: new_string.to_owned(), original_file: original_file.clone(), structured_patch: make_patch(&original_file, &updated), user_modified: false, replace_all, git_diff: None, }) } /// Expands a glob pattern and returns matching filenames. pub fn glob_search(pattern: &str, path: Option<&str>) -> io::Result { glob_search_impl(pattern, path, None) } fn glob_search_impl( pattern: &str, path: Option<&str>, workspace_root: Option<&Path>, ) -> io::Result { let started = Instant::now(); let base_dir = path .map(normalize_path) .transpose()? .unwrap_or(std::env::current_dir()?); let canonical_root = workspace_root.map(canonicalize_workspace_root); if let Some(root) = canonical_root.as_deref() { validate_workspace_boundary(&base_dir, root)?; } let search_pattern = if Path::new(pattern).is_absolute() { pattern.to_owned() } else { base_dir.join(pattern).to_string_lossy().into_owned() }; // The `glob` crate does not support brace expansion ({a,b,c}). // Expand braces into multiple patterns so patterns like // `Assets/**/*.{cs,uxml,uss}` work correctly. let expanded = expand_braces(&search_pattern); let mut seen = HashSet::new(); let mut matches = Vec::new(); for pat in &expanded { let compiled = Pattern::new(pat) .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?; let walk_root = derive_glob_walk_root(pat); if let Some(root) = canonical_root.as_deref() { let canonical_walk_root = walk_root .canonicalize() .unwrap_or_else(|_| walk_root.clone()); validate_workspace_boundary(&canonical_walk_root, root)?; } let entries = WalkDir::new(&walk_root) .into_iter() .filter_entry(|entry| !should_skip_glob_dir(entry)); for entry in entries.flatten() { let candidate = entry.path(); if entry.file_type().is_file() && compiled.matches_path(candidate) && seen.insert(candidate.to_path_buf()) { if let Some(root) = canonical_root.as_deref() { let canonical_candidate = candidate.canonicalize()?; validate_workspace_boundary(&canonical_candidate, root)?; } matches.push(candidate.to_path_buf()); } } } matches.sort_by_key(|path| { fs::metadata(path) .and_then(|metadata| metadata.modified()) .ok() .map(Reverse) }); let truncated = matches.len() > 100; let filenames = matches .into_iter() .take(100) .map(|path| path.to_string_lossy().into_owned()) .collect::>(); Ok(GlobSearchOutput { duration_ms: started.elapsed().as_millis(), num_files: filenames.len(), filenames, truncated, }) } /// Runs a regex search over workspace files with optional context lines. pub fn grep_search(input: &GrepSearchInput) -> io::Result { grep_search_impl(input, None) } fn grep_search_impl( input: &GrepSearchInput, workspace_root: Option<&Path>, ) -> io::Result { let base_path = input .path .as_deref() .map(normalize_path) .transpose()? .unwrap_or(std::env::current_dir()?); let canonical_root = workspace_root.map(canonicalize_workspace_root); if let Some(root) = canonical_root.as_deref() { validate_workspace_boundary(&base_path, root)?; } let regex = RegexBuilder::new(&input.pattern) .case_insensitive(input.case_insensitive.unwrap_or(false)) .dot_matches_new_line(input.multiline.unwrap_or(false)) .build() .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?; let glob_filter = input .glob .as_deref() .map(Pattern::new) .transpose() .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?; let file_type = input.file_type.as_deref(); let output_mode = input .output_mode .clone() .unwrap_or_else(|| String::from("files_with_matches")); let context = input.context.or(input.context_short).unwrap_or(0); let mut filenames = Vec::new(); let mut content_lines = Vec::new(); let mut total_matches = 0usize; for file_path in collect_search_files(&base_path)? { if let Some(root) = canonical_root.as_deref() { let canonical_file = file_path.canonicalize()?; validate_workspace_boundary(&canonical_file, root)?; } if !matches_optional_filters(&file_path, glob_filter.as_ref(), file_type) { continue; } let Ok(file_contents) = fs::read_to_string(&file_path) else { continue; }; if output_mode == "count" { let count = regex.find_iter(&file_contents).count(); if count > 0 { filenames.push(file_path.to_string_lossy().into_owned()); total_matches += count; } continue; } let lines: Vec<&str> = file_contents.lines().collect(); let mut matched_lines = Vec::new(); for (index, line) in lines.iter().enumerate() { if regex.is_match(line) { total_matches += 1; matched_lines.push(index); } } if matched_lines.is_empty() { continue; } filenames.push(file_path.to_string_lossy().into_owned()); if output_mode == "content" { for index in matched_lines { let start = index.saturating_sub(input.before.unwrap_or(context)); let end = (index + input.after.unwrap_or(context) + 1).min(lines.len()); for (current, line) in lines.iter().enumerate().take(end).skip(start) { let prefix = if input.line_numbers.unwrap_or(true) { format!("{}:{}:", file_path.to_string_lossy(), current + 1) } else { format!("{}:", file_path.to_string_lossy()) }; content_lines.push(format!("{prefix}{line}")); } } } } let (filenames, applied_limit, applied_offset) = apply_limit(filenames, input.head_limit, input.offset); if output_mode == "content" { return Ok(build_grep_content_output( output_mode, filenames, content_lines, input.head_limit, input.offset, )); } Ok(GrepSearchOutput { mode: Some(output_mode.clone()), num_files: filenames.len(), filenames, content: None, num_lines: None, num_matches: (output_mode == "count").then_some(total_matches), applied_limit, applied_offset, }) } fn build_grep_content_output( output_mode: String, filenames: Vec, content_lines: Vec, head_limit: Option, offset: Option, ) -> GrepSearchOutput { let (lines, limit, offset) = apply_limit(content_lines, head_limit, offset); GrepSearchOutput { mode: Some(output_mode), num_files: filenames.len(), filenames, num_lines: Some(lines.len()), content: Some(lines.join("\n")), num_matches: None, applied_limit: limit, applied_offset: offset, } } fn canonicalize_workspace_root(workspace_root: &Path) -> PathBuf { workspace_root .canonicalize() .unwrap_or_else(|_| workspace_root.to_path_buf()) } fn should_skip_glob_dir(entry: &DirEntry) -> bool { entry.file_type().is_dir() && entry .file_name() .to_str() .is_some_and(|name| GLOB_SEARCH_IGNORED_DIRS.contains(&name)) } fn derive_glob_walk_root(pattern: &str) -> PathBuf { let path = Path::new(pattern); let mut prefix = PathBuf::new(); let mut saw_component = false; for component in path.components() { let text = component.as_os_str().to_string_lossy(); if component_contains_glob(&text) { break; } prefix.push(component.as_os_str()); saw_component = true; } if saw_component { prefix } else { std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")) } } fn component_contains_glob(component: &str) -> bool { component.contains('*') || component.contains('?') || component.contains('[') } fn collect_search_files(base_path: &Path) -> io::Result> { if base_path.is_file() { return Ok(vec![base_path.to_path_buf()]); } let mut files = Vec::new(); for entry in WalkDir::new(base_path) { let entry = entry.map_err(|error| io::Error::other(error.to_string()))?; if entry.file_type().is_file() { files.push(entry.path().to_path_buf()); } } Ok(files) } fn matches_optional_filters( path: &Path, glob_filter: Option<&Pattern>, file_type: Option<&str>, ) -> bool { if let Some(glob_filter) = glob_filter { let path_string = path.to_string_lossy(); if !glob_filter.matches(&path_string) && !glob_filter.matches_path(path) { return false; } } if let Some(file_type) = file_type { let extension = path.extension().and_then(|extension| extension.to_str()); if extension != Some(file_type) { return false; } } true } fn apply_limit( items: Vec, limit: Option, offset: Option, ) -> (Vec, Option, Option) { let offset_value = offset.unwrap_or(0); let mut items = items.into_iter().skip(offset_value).collect::>(); let explicit_limit = limit.unwrap_or(250); if explicit_limit == 0 { return (items, None, (offset_value > 0).then_some(offset_value)); } let truncated = items.len() > explicit_limit; items.truncate(explicit_limit); ( items, truncated.then_some(explicit_limit), (offset_value > 0).then_some(offset_value), ) } fn make_patch(original: &str, updated: &str) -> Vec { let mut lines = Vec::new(); for line in original.lines() { lines.push(format!("-{line}")); } for line in updated.lines() { lines.push(format!("+{line}")); } vec![StructuredPatchHunk { old_start: 1, old_lines: original.lines().count(), new_start: 1, new_lines: updated.lines().count(), lines, }] } fn normalize_path(path: &str) -> io::Result { let candidate = if Path::new(path).is_absolute() { PathBuf::from(path) } else { std::env::current_dir()?.join(path) }; candidate.canonicalize() } fn normalize_path_allow_missing(path: &str) -> io::Result { let candidate = if Path::new(path).is_absolute() { PathBuf::from(path) } else { std::env::current_dir()?.join(path) }; if let Ok(canonical) = candidate.canonicalize() { return Ok(canonical); } if let Some(parent) = candidate.parent() { let canonical_parent = parent .canonicalize() .unwrap_or_else(|_| parent.to_path_buf()); if let Some(name) = candidate.file_name() { return Ok(canonical_parent.join(name)); } } Ok(candidate) } /// Read a file with workspace boundary enforcement. #[allow(dead_code)] pub fn read_file_in_workspace( path: &str, offset: Option, limit: Option, workspace_root: &Path, ) -> io::Result { let absolute_path = normalize_path(path)?; let canonical_root = canonicalize_workspace_root(workspace_root); validate_workspace_boundary(&absolute_path, &canonical_root)?; read_file(path, offset, limit) } /// Write a file with workspace boundary enforcement. #[allow(dead_code)] pub fn write_file_in_workspace( path: &str, content: &str, workspace_root: &Path, ) -> io::Result { let absolute_path = normalize_path_allow_missing(path)?; let canonical_root = canonicalize_workspace_root(workspace_root); validate_workspace_boundary(&absolute_path, &canonical_root)?; write_file(path, content) } /// Edit a file with workspace boundary enforcement. #[allow(dead_code)] pub fn edit_file_in_workspace( path: &str, old_string: &str, new_string: &str, replace_all: bool, workspace_root: &Path, ) -> io::Result { let absolute_path = normalize_path(path)?; let canonical_root = canonicalize_workspace_root(workspace_root); validate_workspace_boundary(&absolute_path, &canonical_root)?; edit_file(path, old_string, new_string, replace_all) } /// Expand a glob pattern with workspace boundary enforcement. #[allow(dead_code)] pub fn glob_search_in_workspace( pattern: &str, path: Option<&str>, workspace_root: &Path, ) -> io::Result { glob_search_impl(pattern, path, Some(workspace_root)) } /// Search file contents with workspace boundary enforcement. #[allow(dead_code)] pub fn grep_search_in_workspace( input: &GrepSearchInput, workspace_root: &Path, ) -> io::Result { grep_search_impl(input, Some(workspace_root)) } /// Check whether a path is a symlink that resolves outside the workspace. #[allow(dead_code)] pub fn is_symlink_escape(path: &Path, workspace_root: &Path) -> io::Result { let metadata = fs::symlink_metadata(path)?; if !metadata.is_symlink() { return Ok(false); } let resolved = path.canonicalize()?; let canonical_root = workspace_root .canonicalize() .unwrap_or_else(|_| workspace_root.to_path_buf()); Ok(!resolved.starts_with(&canonical_root)) } /// Expand shell-style brace groups in a glob pattern. /// /// Handles one level of braces: `foo.{a,b,c}` → `["foo.a", "foo.b", "foo.c"]`. /// Nested braces are not expanded (uncommon in practice). /// Patterns without braces pass through unchanged. fn expand_braces(pattern: &str) -> Vec { let Some(open) = pattern.find('{') else { return vec![pattern.to_owned()]; }; let Some(close) = pattern[open..].find('}').map(|i| open + i) else { // Unmatched brace — treat as literal. return vec![pattern.to_owned()]; }; let prefix = &pattern[..open]; let suffix = &pattern[close + 1..]; let alternatives = &pattern[open + 1..close]; alternatives .split(',') .flat_map(|alt| expand_braces(&format!("{prefix}{alt}{suffix}"))) .collect() } #[cfg(test)] mod tests { use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; use super::{ component_contains_glob, derive_glob_walk_root, edit_file, expand_braces, glob_search, grep_search, is_symlink_escape, read_file, read_file_in_workspace, write_file, write_file_in_workspace, GrepSearchInput, MAX_WRITE_SIZE, }; fn temp_path(name: &str) -> std::path::PathBuf { let unique = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("time should move forward") .as_nanos(); std::env::temp_dir().join(format!("clawd-native-{name}-{unique}")) } #[test] fn reads_and_writes_files() { let path = temp_path("read-write.txt"); let write_output = write_file(path.to_string_lossy().as_ref(), "one\ntwo\nthree") .expect("write should succeed"); assert_eq!(write_output.kind, "create"); let read_output = read_file(path.to_string_lossy().as_ref(), Some(1), Some(1)) .expect("read should succeed"); assert_eq!(read_output.file.content, "two"); } #[test] fn edits_file_contents() { let path = temp_path("edit.txt"); write_file(path.to_string_lossy().as_ref(), "alpha beta alpha") .expect("initial write should succeed"); let output = edit_file(path.to_string_lossy().as_ref(), "alpha", "omega", true) .expect("edit should succeed"); assert!(output.replace_all); } #[test] fn rejects_binary_files() { let path = temp_path("binary-test.bin"); std::fs::write(&path, b"\x00\x01\x02\x03binary content").expect("write should succeed"); let result = read_file(path.to_string_lossy().as_ref(), None, None); assert!(result.is_err()); let error = result.unwrap_err(); assert_eq!(error.kind(), std::io::ErrorKind::InvalidData); assert!(error.to_string().contains("binary")); } #[test] fn rejects_oversized_writes() { let path = temp_path("oversize-write.txt"); let huge = "x".repeat(MAX_WRITE_SIZE + 1); let result = write_file(path.to_string_lossy().as_ref(), &huge); assert!(result.is_err()); let error = result.unwrap_err(); assert_eq!(error.kind(), std::io::ErrorKind::InvalidData); assert!(error.to_string().contains("too large")); } #[test] fn enforces_workspace_boundary() { let workspace = temp_path("workspace-boundary"); std::fs::create_dir_all(&workspace).expect("workspace dir should be created"); let inside = workspace.join("inside.txt"); write_file(inside.to_string_lossy().as_ref(), "safe content") .expect("write inside workspace should succeed"); // Reading inside workspace should succeed let result = read_file_in_workspace(inside.to_string_lossy().as_ref(), None, None, &workspace); assert!(result.is_ok()); // Reading outside workspace should fail let outside = temp_path("outside-boundary.txt"); write_file(outside.to_string_lossy().as_ref(), "unsafe content") .expect("write outside should succeed"); let result = read_file_in_workspace(outside.to_string_lossy().as_ref(), None, None, &workspace); assert!(result.is_err()); let error = result.unwrap_err(); assert_eq!(error.kind(), std::io::ErrorKind::PermissionDenied); assert!(error.to_string().contains("escapes workspace")); } #[test] fn detects_symlink_escape() { let workspace = temp_path("symlink-workspace"); std::fs::create_dir_all(&workspace).expect("workspace dir should be created"); let outside = temp_path("symlink-target.txt"); std::fs::write(&outside, "target content").expect("target should write"); let link_path = workspace.join("escape-link.txt"); #[cfg(unix)] { std::os::unix::fs::symlink(&outside, &link_path).expect("symlink should create"); assert!(is_symlink_escape(&link_path, &workspace).expect("check should succeed")); } // Non-symlink file should not be an escape let normal = workspace.join("normal.txt"); std::fs::write(&normal, "normal content").expect("normal file should write"); assert!(!is_symlink_escape(&normal, &workspace).expect("check should succeed")); } #[test] #[cfg(unix)] fn workspace_read_rejects_symlink_escape_regression_3007_class() { let workspace = temp_path("workspace-read-symlink-escape"); let outside = temp_path("workspace-read-symlink-target"); std::fs::create_dir_all(&workspace).expect("workspace dir should be created"); std::fs::create_dir_all(&outside).expect("outside dir should be created"); let outside_file = outside.join("secret.txt"); std::fs::write(&outside_file, "outside secret").expect("outside file should write"); let link_path = workspace.join("linked-secret.txt"); std::os::unix::fs::symlink(&outside_file, &link_path).expect("symlink should create"); let result = read_file_in_workspace(link_path.to_string_lossy().as_ref(), None, None, &workspace); assert!(result.is_err(), "symlink escape must be rejected"); let error = result.unwrap_err(); assert_eq!(error.kind(), std::io::ErrorKind::PermissionDenied); assert!( error.to_string().contains("escapes workspace"), "error should explain workspace escape: {error}" ); let _ = std::fs::remove_dir_all(&workspace); let _ = std::fs::remove_dir_all(&outside); } #[test] #[cfg(unix)] fn workspace_write_rejects_parent_symlink_escape_regression_3007_class() { let workspace = temp_path("workspace-write-symlink-escape"); let outside = temp_path("workspace-write-symlink-target"); std::fs::create_dir_all(&workspace).expect("workspace dir should be created"); std::fs::create_dir_all(&outside).expect("outside dir should be created"); let link_dir = workspace.join("linked-outside"); std::os::unix::fs::symlink(&outside, &link_dir).expect("symlink dir should create"); let escaped_child = link_dir.join("created.txt"); let result = write_file_in_workspace( escaped_child.to_string_lossy().as_ref(), "must not escape", &workspace, ); assert!(result.is_err(), "parent symlink escape must be rejected"); let error = result.unwrap_err(); assert_eq!(error.kind(), std::io::ErrorKind::PermissionDenied); assert!( error.to_string().contains("escapes workspace"), "error should explain workspace escape: {error}" ); assert!( !outside.join("created.txt").exists(), "write should not create through an escaping symlink" ); let _ = std::fs::remove_dir_all(&workspace); let _ = std::fs::remove_dir_all(&outside); } #[test] fn globs_and_greps_directory() { let dir = temp_path("search-dir"); std::fs::create_dir_all(&dir).expect("directory should be created"); let file = dir.join("demo.rs"); write_file( file.to_string_lossy().as_ref(), "fn main() {\n println!(\"hello\");\n}\n", ) .expect("file write should succeed"); let globbed = glob_search("**/*.rs", Some(dir.to_string_lossy().as_ref())) .expect("glob should succeed"); assert_eq!(globbed.num_files, 1); let grep_output = grep_search(&GrepSearchInput { pattern: String::from("hello"), path: Some(dir.to_string_lossy().into_owned()), glob: Some(String::from("**/*.rs")), output_mode: Some(String::from("content")), before: None, after: None, context_short: None, context: None, line_numbers: Some(true), case_insensitive: Some(false), file_type: None, head_limit: Some(10), offset: Some(0), multiline: Some(false), }) .expect("grep should succeed"); assert!(grep_output.content.unwrap_or_default().contains("hello")); } #[test] fn expand_braces_no_braces() { assert_eq!(expand_braces("*.rs"), vec!["*.rs"]); } #[test] fn expand_braces_single_group() { let mut result = expand_braces("Assets/**/*.{cs,uxml,uss}"); result.sort(); assert_eq!( result, vec!["Assets/**/*.cs", "Assets/**/*.uss", "Assets/**/*.uxml",] ); } #[test] fn expand_braces_nested() { let mut result = expand_braces("src/{a,b}.{rs,toml}"); result.sort(); assert_eq!( result, vec!["src/a.rs", "src/a.toml", "src/b.rs", "src/b.toml"] ); } #[test] fn expand_braces_unmatched() { assert_eq!(expand_braces("foo.{bar"), vec!["foo.{bar"]); } #[test] fn glob_search_with_braces_finds_files() { let dir = temp_path("glob-braces"); std::fs::create_dir_all(&dir).unwrap(); std::fs::write(dir.join("a.rs"), "fn main() {}").unwrap(); std::fs::write(dir.join("b.toml"), "[package]").unwrap(); std::fs::write(dir.join("c.txt"), "hello").unwrap(); let result = glob_search("*.{rs,toml}", Some(dir.to_str().unwrap())).expect("glob should succeed"); assert_eq!( result.num_files, 2, "should match .rs and .toml but not .txt" ); let _ = std::fs::remove_dir_all(&dir); } #[test] fn glob_search_skips_common_heavy_directories() { let dir = temp_path("glob-ignored-dirs"); std::fs::create_dir_all(dir.join("src")).unwrap(); std::fs::create_dir_all(dir.join("docs")).unwrap(); std::fs::create_dir_all(dir.join("node_modules/pkg")).unwrap(); std::fs::create_dir_all(dir.join(".build/checkouts/pkg")).unwrap(); std::fs::create_dir_all(dir.join("target/debug/deps")).unwrap(); std::fs::write(dir.join("src/AGENTS.md"), "src").unwrap(); std::fs::write(dir.join("docs/AGENTS.md"), "docs").unwrap(); std::fs::write(dir.join("node_modules/pkg/AGENTS.md"), "node_modules").unwrap(); std::fs::write(dir.join(".build/checkouts/pkg/AGENTS.md"), ".build").unwrap(); std::fs::write(dir.join("target/debug/deps/AGENTS.md"), "target").unwrap(); let result = glob_search("**/AGENTS.md", Some(dir.to_str().unwrap())).expect("glob should succeed"); assert_eq!(result.num_files, 2, "ignored dirs should be pruned"); assert!(result .filenames .iter() .any(|path| path.ends_with("src/AGENTS.md"))); assert!(result .filenames .iter() .any(|path| path.ends_with("docs/AGENTS.md"))); assert!(!result .filenames .iter() .any(|path| path.contains("node_modules") || path.contains(".build") || path.contains("/target/"))); let _ = std::fs::remove_dir_all(&dir); } #[test] fn derive_glob_walk_root_stops_at_first_glob_component() { let root = derive_glob_walk_root("/tmp/demo/**/AGENTS.md"); assert_eq!(root, PathBuf::from("/tmp/demo")); assert!(component_contains_glob("**")); assert!(component_contains_glob("*.rs")); assert!(!component_contains_glob("src")); } }