Harden permission enforcement against sandbox bypasses

Close two ways the permission system could be bypassed: - Workspace path traversal: normalize `.`/`..` lexically before the boundary prefix comparison so paths like `/workspace/../../etc` can no longer escape the sandbox. Fixed in both the runtime enforcer and the duplicate check in the tools PowerShell path classifier. - read-only mode no longer trusts the leading token alone: reject shell metacharacters (chaining/substitution/redirect/pipe/subshell), drop interpreters and build drivers (python/node/ruby/cargo/rustc) from the allow-list, gate `git` to non-mutating subcommands, and reject `find` actions that execute or delete. Adds regression tests for both holes. The pre-existing, unrelated worker_boot git-metadata test failure is not affected by this change. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 12:06:43 +00:00 · 2026-05-31 13:26:39 +02:00
parent 4d3dc5b873
commit e8c8ef1142
2 changed files with 207 additions and 20 deletions
--- a/rust/crates/runtime/src/permission_enforcer.rs
+++ b/rust/crates/runtime/src/permission_enforcer.rs
@@ -173,33 +173,119 @@ impl PermissionEnforcer {
    }
 }
-/// Simple workspace boundary check via string prefix.
+/// Workspace boundary check.
 ///
 /// Resolves `.` and `..` components lexically *before* comparing against the
 /// workspace root, so that traversal sequences like `/workspace/../../etc`
 /// cannot escape the sandbox via a naive string prefix match. Normalization is
 /// lexical (it does not touch the filesystem) because the target path may not
 /// exist yet on a write, and we must not depend on CWD.
 fn is_within_workspace(path: &str, workspace_root: &str) -> bool {
-    let normalized = if path.starts_with('/') {
+    let combined = if path.starts_with('/') {
        path.to_owned()
    } else {
        format!("{workspace_root}/{path}")
    };
-    let root = if workspace_root.ends_with('/') {
+    let normalized = lexically_normalize(&combined);
-        workspace_root.to_owned()
+    let root = lexically_normalize(workspace_root);
    let root_with_slash = if root.ends_with('/') {
        root.clone()
    } else {
-        format!("{workspace_root}/")
+        format!("{root}/")
    };
-    normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/')
+    normalized == root || normalized.starts_with(&root_with_slash)
 }
 /// Collapse `.` and `..` segments without consulting the filesystem.
 /// `..` that would climb above an absolute root is clamped at `/`, so the
 /// result can never be a prefix-match for a deeper workspace root.
 fn lexically_normalize(path: &str) -> String {
    let is_absolute = path.starts_with('/');
    let mut stack: Vec<&str> = Vec::new();
    for component in path.split('/') {
        match component {
            "" | "." => {}
            ".." => {
                stack.pop();
            }
            other => stack.push(other),
        }
    }
    let joined = stack.join("/");
    if is_absolute {
        format!("/{joined}")
    } else {
        joined
    }
 }
 /// Conservative heuristic: is this bash command read-only?
 ///
 /// Hardening notes:
 /// - Any shell metacharacter that could chain, substitute, pipe, or redirect
 ///   into a state-changing command rejects the whole line. This blocks
 ///   `cat x; rm -rf y`, `cat x | sh`, `$(...)`, backticks, redirects, and
 ///   subshells regardless of the leading token.
 /// - Language interpreters (`python`, `node`, `ruby`) and build drivers
 ///   (`cargo`, `rustc`) are NOT read-only: they execute arbitrary code, so they
 ///   are excluded from the allow-list.
 /// - `git` is allowed only for a known set of non-mutating subcommands.
 /// - `find` is rejected when it carries an action that can execute or delete.
 ///
 /// Residual known gaps (documented, not yet closed): `sed`'s `w`/`e` script
 /// commands and `awk`'s `system()` can still mutate — these require quoting or
 /// metacharacters that the checks above usually catch, but a dedicated parser
 /// would be more robust. Tracked as follow-up.
 fn is_read_only_command(command: &str) -> bool {
-    let first_token = command
+    // Shell metacharacters that enable command chaining, substitution,
-        .split_whitespace()
+    // piping, redirection, or subshells. Presence of any of these means we
    // cannot reason about the command from its leading token alone.
    const SHELL_METACHARS: &[char] =
        &[';', '|', '&', '$', '`', '>', '<', '(', ')', '{', '}', '\n'];
    if command.contains(SHELL_METACHARS) {
        return false;
    }
    let mut tokens = command.split_whitespace();
    let first_token = tokens
        .next()
        .unwrap_or("")
        .rsplit('/')
        .next()
        .unwrap_or("");
    // `git` is only read-only for a curated set of subcommands.
    if first_token == "git" {
        let subcommand = tokens.next().unwrap_or("");
        return matches!(
            subcommand,
            "status"
                | "log"
                | "diff"
                | "show"
                | "branch"
                | "rev-parse"
                | "ls-files"
                | "blame"
                | "describe"
                | "tag"
                | "remote"
        );
    }
    // `find` can execute or delete via actions; reject those forms.
    if first_token == "find"
        && (command.contains("-exec")
            || command.contains("-execdir")
            || command.contains("-delete")
            || command.contains("-ok")
            || command.contains("-fprintf"))
    {
        return false;
    }
    matches!(
        first_token,
        "cat"
@@ -237,8 +323,6 @@ fn is_read_only_command(command: &str) -> bool {
            | "tr"
            | "cut"
            | "paste"
            | "tee"
            | "xargs"
            | "test"
            | "true"
            | "false"
@@ -257,18 +341,8 @@ fn is_read_only_command(command: &str) -> bool {
            | "tree"
            | "jq"
            | "yq"
            | "python3"
            | "python"
            | "node"
            | "ruby"
            | "cargo"
            | "rustc"
            | "git"
            | "gh"
    ) && !command.contains("-i ")
        && !command.contains("--in-place")
        && !command.contains(" > ")
        && !command.contains(" >> ")
 }
 #[cfg(test)]
@@ -375,6 +449,85 @@ mod tests {
        assert!(!is_read_only_command("sed -i 's/a/b/' file"));
    }
    // --- Hardening regression tests (#2: read-only bypasses) ---
    #[test]
    fn read_only_rejects_command_chaining() {
        // A leading read-only token must not launder a trailing destructive one.
        assert!(!is_read_only_command("cat foo; rm -rf bar"));
        assert!(!is_read_only_command("cat foo && rm -rf bar"));
        assert!(!is_read_only_command("ls || rm bar"));
        assert!(!is_read_only_command("cat foo | sh"));
        assert!(!is_read_only_command("echo `rm bar`"));
        assert!(!is_read_only_command("echo $(rm bar)"));
        assert!(!is_read_only_command("echo x>file")); // redirect without spaces
    }
    #[test]
    fn read_only_rejects_interpreters_and_build_drivers() {
        // These execute arbitrary code and are no longer read-only.
        assert!(!is_read_only_command(
            "python3 -c \"import os; os.system('rm -rf .')\""
        ));
        assert!(!is_read_only_command("python script.py"));
        assert!(!is_read_only_command("node app.js"));
        assert!(!is_read_only_command("ruby x.rb"));
        assert!(!is_read_only_command("cargo run"));
        assert!(!is_read_only_command("rustc evil.rs"));
    }
    #[test]
    fn read_only_gates_git_subcommands() {
        // Read-only git subcommands remain allowed...
        assert!(is_read_only_command("git status"));
        assert!(is_read_only_command("git diff HEAD~1"));
        assert!(is_read_only_command("git show abc123"));
        // ...but mutating/exfiltrating ones are rejected.
        assert!(!is_read_only_command("git commit -m x"));
        assert!(!is_read_only_command("git push origin main"));
        assert!(!is_read_only_command("git reset --hard"));
        assert!(!is_read_only_command("git clean -fd"));
        assert!(!is_read_only_command("git config user.email a@b.c"));
    }
    #[test]
    fn read_only_rejects_find_actions() {
        assert!(is_read_only_command("find . -name Cargo.toml"));
        assert!(!is_read_only_command("find . -delete"));
        // -exec uses braces/semicolon which also trip the metachar guard,
        // but the explicit action check is the primary defense.
        assert!(!is_read_only_command("find . -execdir rm rf"));
    }
    // --- Hardening regression tests (#1: workspace path traversal) ---
    #[test]
    fn workspace_rejects_parent_traversal() {
        assert!(!is_within_workspace("/workspace/../etc/passwd", "/workspace"));
        assert!(!is_within_workspace(
            "/workspace/../../etc/crontab",
            "/workspace"
        ));
        assert!(!is_within_workspace("../etc/passwd", "/workspace"));
        assert!(!is_within_workspace(
            "/workspace/sub/../../outside",
            "/workspace"
        ));
        // Legitimate paths still resolve inside.
        assert!(is_within_workspace("/workspace/./src/main.rs", "/workspace"));
        assert!(is_within_workspace(
            "/workspace/src/../src/main.rs",
            "/workspace"
        ));
    }
    #[test]
    fn workspace_write_denies_traversal_escape() {
        let enforcer = make_enforcer(PermissionMode::WorkspaceWrite);
        let result = enforcer.check_file_write("/workspace/../../etc/crontab", "/workspace");
        assert!(matches!(result, EnforcementResult::Denied { .. }));
    }
    #[test]
    fn active_mode_returns_policy_mode() {
        // given
--- a/rust/crates/tools/src/lib.rs
+++ b/rust/crates/tools/src/lib.rs
@@ -2571,6 +2571,20 @@ fn is_within_workspace(path: &str) -> bool {
    let path = PathBuf::from(trimmed);
    // Reject any parent-directory traversal. Callers never need `..` to refer
    // to files inside the workspace, and `..` defeats both checks below: the
    // relative branch only inspects the leading component, and the absolute
    // branch's `canonicalize()` silently falls back to the literal `..` path
    // when the target does not exist yet (e.g. a file about to be created).
    // Returning false here is the safe direction: it classifies the command as
    // requiring full-access permission rather than workspace-write.
    if path
        .components()
        .any(|component| matches!(component, std::path::Component::ParentDir))
    {
        return false;
    }
    // If path is absolute, check if it starts with CWD
    if path.is_absolute() {
        if let Ok(cwd) = std::env::current_dir() {
@@ -2588,6 +2602,26 @@ fn run_powershell(input: PowerShellInput) -> Result<String, String> {
    to_pretty_json(execute_powershell(input).map_err(|error| error.to_string())?)
 }
 #[cfg(test)]
 mod workspace_traversal_guard_tests {
    use super::is_within_workspace;
    #[test]
    fn rejects_parent_traversal_components() {
        // Leading and embedded `..` must both be rejected (was previously a hole
        // because only the leading component was inspected).
        assert!(!is_within_workspace("../secrets"));
        assert!(!is_within_workspace("src/../../etc/passwd"));
        assert!(!is_within_workspace("a/b/../../../etc/crontab"));
    }
    #[test]
    fn allows_plain_relative_paths() {
        assert!(is_within_workspace("src/main.rs"));
        assert!(is_within_workspace("Cargo.toml"));
    }
 }
 fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> {
    serde_json::to_string_pretty(&value).map_err(|error| error.to_string())
 }