Harden permission enforcement against sandbox bypasses

Close two ways the permission system could be bypassed:

- Workspace path traversal: normalize `.`/`..` lexically before the
  boundary prefix comparison so paths like `/workspace/../../etc` can no
  longer escape the sandbox. Fixed in both the runtime enforcer and the
  duplicate check in the tools PowerShell path classifier.
- read-only mode no longer trusts the leading token alone: reject shell
  metacharacters (chaining/substitution/redirect/pipe/subshell), drop
  interpreters and build drivers (python/node/ruby/cargo/rustc) from the
  allow-list, gate `git` to non-mutating subcommands, and reject `find`
  actions that execute or delete.

Adds regression tests for both holes. The pre-existing, unrelated
worker_boot git-metadata test failure is not affected by this change.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Sam Lamrabte
2026-05-31 13:26:39 +02:00
parent 4d3dc5b873
commit e8c8ef1142
2 changed files with 207 additions and 20 deletions

View File

@@ -173,33 +173,119 @@ impl PermissionEnforcer {
} }
} }
/// Simple workspace boundary check via string prefix. /// Workspace boundary check.
///
/// Resolves `.` and `..` components lexically *before* comparing against the
/// workspace root, so that traversal sequences like `/workspace/../../etc`
/// cannot escape the sandbox via a naive string prefix match. Normalization is
/// lexical (it does not touch the filesystem) because the target path may not
/// exist yet on a write, and we must not depend on CWD.
fn is_within_workspace(path: &str, workspace_root: &str) -> bool { fn is_within_workspace(path: &str, workspace_root: &str) -> bool {
let normalized = if path.starts_with('/') { let combined = if path.starts_with('/') {
path.to_owned() path.to_owned()
} else { } else {
format!("{workspace_root}/{path}") format!("{workspace_root}/{path}")
}; };
let root = if workspace_root.ends_with('/') { let normalized = lexically_normalize(&combined);
workspace_root.to_owned() let root = lexically_normalize(workspace_root);
let root_with_slash = if root.ends_with('/') {
root.clone()
} else { } else {
format!("{workspace_root}/") format!("{root}/")
}; };
normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/') normalized == root || normalized.starts_with(&root_with_slash)
}
/// Collapse `.` and `..` segments without consulting the filesystem.
/// `..` that would climb above an absolute root is clamped at `/`, so the
/// result can never be a prefix-match for a deeper workspace root.
fn lexically_normalize(path: &str) -> String {
let is_absolute = path.starts_with('/');
let mut stack: Vec<&str> = Vec::new();
for component in path.split('/') {
match component {
"" | "." => {}
".." => {
stack.pop();
}
other => stack.push(other),
}
}
let joined = stack.join("/");
if is_absolute {
format!("/{joined}")
} else {
joined
}
} }
/// Conservative heuristic: is this bash command read-only? /// Conservative heuristic: is this bash command read-only?
///
/// Hardening notes:
/// - Any shell metacharacter that could chain, substitute, pipe, or redirect
/// into a state-changing command rejects the whole line. This blocks
/// `cat x; rm -rf y`, `cat x | sh`, `$(...)`, backticks, redirects, and
/// subshells regardless of the leading token.
/// - Language interpreters (`python`, `node`, `ruby`) and build drivers
/// (`cargo`, `rustc`) are NOT read-only: they execute arbitrary code, so they
/// are excluded from the allow-list.
/// - `git` is allowed only for a known set of non-mutating subcommands.
/// - `find` is rejected when it carries an action that can execute or delete.
///
/// Residual known gaps (documented, not yet closed): `sed`'s `w`/`e` script
/// commands and `awk`'s `system()` can still mutate — these require quoting or
/// metacharacters that the checks above usually catch, but a dedicated parser
/// would be more robust. Tracked as follow-up.
fn is_read_only_command(command: &str) -> bool { fn is_read_only_command(command: &str) -> bool {
let first_token = command // Shell metacharacters that enable command chaining, substitution,
.split_whitespace() // piping, redirection, or subshells. Presence of any of these means we
// cannot reason about the command from its leading token alone.
const SHELL_METACHARS: &[char] =
&[';', '|', '&', '$', '`', '>', '<', '(', ')', '{', '}', '\n'];
if command.contains(SHELL_METACHARS) {
return false;
}
let mut tokens = command.split_whitespace();
let first_token = tokens
.next() .next()
.unwrap_or("") .unwrap_or("")
.rsplit('/') .rsplit('/')
.next() .next()
.unwrap_or(""); .unwrap_or("");
// `git` is only read-only for a curated set of subcommands.
if first_token == "git" {
let subcommand = tokens.next().unwrap_or("");
return matches!(
subcommand,
"status"
| "log"
| "diff"
| "show"
| "branch"
| "rev-parse"
| "ls-files"
| "blame"
| "describe"
| "tag"
| "remote"
);
}
// `find` can execute or delete via actions; reject those forms.
if first_token == "find"
&& (command.contains("-exec")
|| command.contains("-execdir")
|| command.contains("-delete")
|| command.contains("-ok")
|| command.contains("-fprintf"))
{
return false;
}
matches!( matches!(
first_token, first_token,
"cat" "cat"
@@ -237,8 +323,6 @@ fn is_read_only_command(command: &str) -> bool {
| "tr" | "tr"
| "cut" | "cut"
| "paste" | "paste"
| "tee"
| "xargs"
| "test" | "test"
| "true" | "true"
| "false" | "false"
@@ -257,18 +341,8 @@ fn is_read_only_command(command: &str) -> bool {
| "tree" | "tree"
| "jq" | "jq"
| "yq" | "yq"
| "python3"
| "python"
| "node"
| "ruby"
| "cargo"
| "rustc"
| "git"
| "gh"
) && !command.contains("-i ") ) && !command.contains("-i ")
&& !command.contains("--in-place") && !command.contains("--in-place")
&& !command.contains(" > ")
&& !command.contains(" >> ")
} }
#[cfg(test)] #[cfg(test)]
@@ -375,6 +449,85 @@ mod tests {
assert!(!is_read_only_command("sed -i 's/a/b/' file")); assert!(!is_read_only_command("sed -i 's/a/b/' file"));
} }
// --- Hardening regression tests (#2: read-only bypasses) ---
#[test]
fn read_only_rejects_command_chaining() {
// A leading read-only token must not launder a trailing destructive one.
assert!(!is_read_only_command("cat foo; rm -rf bar"));
assert!(!is_read_only_command("cat foo && rm -rf bar"));
assert!(!is_read_only_command("ls || rm bar"));
assert!(!is_read_only_command("cat foo | sh"));
assert!(!is_read_only_command("echo `rm bar`"));
assert!(!is_read_only_command("echo $(rm bar)"));
assert!(!is_read_only_command("echo x>file")); // redirect without spaces
}
#[test]
fn read_only_rejects_interpreters_and_build_drivers() {
// These execute arbitrary code and are no longer read-only.
assert!(!is_read_only_command(
"python3 -c \"import os; os.system('rm -rf .')\""
));
assert!(!is_read_only_command("python script.py"));
assert!(!is_read_only_command("node app.js"));
assert!(!is_read_only_command("ruby x.rb"));
assert!(!is_read_only_command("cargo run"));
assert!(!is_read_only_command("rustc evil.rs"));
}
#[test]
fn read_only_gates_git_subcommands() {
// Read-only git subcommands remain allowed...
assert!(is_read_only_command("git status"));
assert!(is_read_only_command("git diff HEAD~1"));
assert!(is_read_only_command("git show abc123"));
// ...but mutating/exfiltrating ones are rejected.
assert!(!is_read_only_command("git commit -m x"));
assert!(!is_read_only_command("git push origin main"));
assert!(!is_read_only_command("git reset --hard"));
assert!(!is_read_only_command("git clean -fd"));
assert!(!is_read_only_command("git config user.email a@b.c"));
}
#[test]
fn read_only_rejects_find_actions() {
assert!(is_read_only_command("find . -name Cargo.toml"));
assert!(!is_read_only_command("find . -delete"));
// -exec uses braces/semicolon which also trip the metachar guard,
// but the explicit action check is the primary defense.
assert!(!is_read_only_command("find . -execdir rm rf"));
}
// --- Hardening regression tests (#1: workspace path traversal) ---
#[test]
fn workspace_rejects_parent_traversal() {
assert!(!is_within_workspace("/workspace/../etc/passwd", "/workspace"));
assert!(!is_within_workspace(
"/workspace/../../etc/crontab",
"/workspace"
));
assert!(!is_within_workspace("../etc/passwd", "/workspace"));
assert!(!is_within_workspace(
"/workspace/sub/../../outside",
"/workspace"
));
// Legitimate paths still resolve inside.
assert!(is_within_workspace("/workspace/./src/main.rs", "/workspace"));
assert!(is_within_workspace(
"/workspace/src/../src/main.rs",
"/workspace"
));
}
#[test]
fn workspace_write_denies_traversal_escape() {
let enforcer = make_enforcer(PermissionMode::WorkspaceWrite);
let result = enforcer.check_file_write("/workspace/../../etc/crontab", "/workspace");
assert!(matches!(result, EnforcementResult::Denied { .. }));
}
#[test] #[test]
fn active_mode_returns_policy_mode() { fn active_mode_returns_policy_mode() {
// given // given

View File

@@ -2571,6 +2571,20 @@ fn is_within_workspace(path: &str) -> bool {
let path = PathBuf::from(trimmed); let path = PathBuf::from(trimmed);
// Reject any parent-directory traversal. Callers never need `..` to refer
// to files inside the workspace, and `..` defeats both checks below: the
// relative branch only inspects the leading component, and the absolute
// branch's `canonicalize()` silently falls back to the literal `..` path
// when the target does not exist yet (e.g. a file about to be created).
// Returning false here is the safe direction: it classifies the command as
// requiring full-access permission rather than workspace-write.
if path
.components()
.any(|component| matches!(component, std::path::Component::ParentDir))
{
return false;
}
// If path is absolute, check if it starts with CWD // If path is absolute, check if it starts with CWD
if path.is_absolute() { if path.is_absolute() {
if let Ok(cwd) = std::env::current_dir() { if let Ok(cwd) = std::env::current_dir() {
@@ -2588,6 +2602,26 @@ fn run_powershell(input: PowerShellInput) -> Result<String, String> {
to_pretty_json(execute_powershell(input).map_err(|error| error.to_string())?) to_pretty_json(execute_powershell(input).map_err(|error| error.to_string())?)
} }
#[cfg(test)]
mod workspace_traversal_guard_tests {
use super::is_within_workspace;
#[test]
fn rejects_parent_traversal_components() {
// Leading and embedded `..` must both be rejected (was previously a hole
// because only the leading component was inspected).
assert!(!is_within_workspace("../secrets"));
assert!(!is_within_workspace("src/../../etc/passwd"));
assert!(!is_within_workspace("a/b/../../../etc/crontab"));
}
#[test]
fn allows_plain_relative_paths() {
assert!(is_within_workspace("src/main.rs"));
assert!(is_within_workspace("Cargo.toml"));
}
}
fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> { fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> {
serde_json::to_string_pretty(&value).map_err(|error| error.to_string()) serde_json::to_string_pretty(&value).map_err(|error| error.to_string())
} }