Harden permission enforcement against sandbox bypasses

Close two ways the permission system could be bypassed: - Workspace path traversal: normalize `.`/`..` lexically before the boundary prefix comparison so paths like `/workspace/../../etc` can no longer escape the sandbox. Fixed in both the runtime enforcer and the duplicate check in the tools PowerShell path classifier. - read-only mode no longer trusts the leading token alone: reject shell metacharacters (chaining/substitution/redirect/pipe/subshell), drop interpreters and build drivers (python/node/ruby/cargo/rustc) from the allow-list, gate `git` to non-mutating subcommands, and reject `find` actions that execute or delete. Adds regression tests for both holes. The pre-existing, unrelated worker_boot git-metadata test failure is not affected by this change. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 03:56:45 +00:00 · 2026-05-31 13:26:39 +02:00
parent 4d3dc5b873
commit e8c8ef1142
2 changed files with 207 additions and 20 deletions
--- a/rust/crates/runtime/src/permission_enforcer.rs
+++ b/rust/crates/runtime/src/permission_enforcer.rs
@@ -173,33 +173,119 @@ impl PermissionEnforcer {
    }
 }

-/// Simple workspace boundary check via string prefix.
+/// Workspace boundary check.
+///
+/// Resolves `.` and `..` components lexically *before* comparing against the
+/// workspace root, so that traversal sequences like `/workspace/../../etc`
+/// cannot escape the sandbox via a naive string prefix match. Normalization is
+/// lexical (it does not touch the filesystem) because the target path may not
+/// exist yet on a write, and we must not depend on CWD.
 fn is_within_workspace(path: &str, workspace_root: &str) -> bool {
-    let normalized = if path.starts_with('/') {
+    let combined = if path.starts_with('/') {
        path.to_owned()
    } else {
        format!("{workspace_root}/{path}")
    };

-    let root = if workspace_root.ends_with('/') {
-        workspace_root.to_owned()
+    let normalized = lexically_normalize(&combined);
+    let root = lexically_normalize(workspace_root);
+    let root_with_slash = if root.ends_with('/') {
+        root.clone()
    } else {
-        format!("{workspace_root}/")
+        format!("{root}/")
    };

-    normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/')
+    normalized == root || normalized.starts_with(&root_with_slash)
+}
+
+/// Collapse `.` and `..` segments without consulting the filesystem.
+/// `..` that would climb above an absolute root is clamped at `/`, so the
+/// result can never be a prefix-match for a deeper workspace root.
+fn lexically_normalize(path: &str) -> String {
+    let is_absolute = path.starts_with('/');
+    let mut stack: Vec<&str> = Vec::new();
+    for component in path.split('/') {
+        match component {
+            "" | "." => {}
+            ".." => {
+                stack.pop();
+            }
+            other => stack.push(other),
+        }
+    }
+    let joined = stack.join("/");
+    if is_absolute {
+        format!("/{joined}")
+    } else {
+        joined
+    }
 }

 /// Conservative heuristic: is this bash command read-only?
+///
+/// Hardening notes:
+/// - Any shell metacharacter that could chain, substitute, pipe, or redirect
+///   into a state-changing command rejects the whole line. This blocks
+///   `cat x; rm -rf y`, `cat x | sh`, `$(...)`, backticks, redirects, and
+///   subshells regardless of the leading token.
+/// - Language interpreters (`python`, `node`, `ruby`) and build drivers
+///   (`cargo`, `rustc`) are NOT read-only: they execute arbitrary code, so they
+///   are excluded from the allow-list.
+/// - `git` is allowed only for a known set of non-mutating subcommands.
+/// - `find` is rejected when it carries an action that can execute or delete.
+///
+/// Residual known gaps (documented, not yet closed): `sed`'s `w`/`e` script
+/// commands and `awk`'s `system()` can still mutate — these require quoting or
+/// metacharacters that the checks above usually catch, but a dedicated parser
+/// would be more robust. Tracked as follow-up.
 fn is_read_only_command(command: &str) -> bool {
-    let first_token = command
-        .split_whitespace()
+    // Shell metacharacters that enable command chaining, substitution,
+    // piping, redirection, or subshells. Presence of any of these means we
+    // cannot reason about the command from its leading token alone.
+    const SHELL_METACHARS: &[char] =
+        &[';', '|', '&', '$', '`', '>', '<', '(', ')', '{', '}', '\n'];
+    if command.contains(SHELL_METACHARS) {
+        return false;
+    }
+
+    let mut tokens = command.split_whitespace();
+    let first_token = tokens
        .next()
        .unwrap_or("")
        .rsplit('/')
        .next()
        .unwrap_or("");

+    // `git` is only read-only for a curated set of subcommands.
+    if first_token == "git" {
+        let subcommand = tokens.next().unwrap_or("");
+        return matches!(
+            subcommand,
+            "status"
+                | "log"
+                | "diff"
+                | "show"
+                | "branch"
+                | "rev-parse"
+                | "ls-files"
+                | "blame"
+                | "describe"
+                | "tag"
+                | "remote"
+        );
+    }
+
+    // `find` can execute or delete via actions; reject those forms.
+    if first_token == "find"
+        && (command.contains("-exec")
+            || command.contains("-execdir")
+            || command.contains("-delete")
+            || command.contains("-ok")
+            || command.contains("-fprintf"))
+    {
+        return false;
+    }
+
    matches!(
        first_token,
        "cat"
@@ -237,8 +323,6 @@ fn is_read_only_command(command: &str) -> bool {
            | "tr"
            | "cut"
            | "paste"
-            | "tee"
-            | "xargs"
            | "test"
            | "true"
            | "false"
@@ -257,18 +341,8 @@ fn is_read_only_command(command: &str) -> bool {
            | "tree"
            | "jq"
            | "yq"
-            | "python3"
-            | "python"
-            | "node"
-            | "ruby"
-            | "cargo"
-            | "rustc"
-            | "git"
-            | "gh"
    ) && !command.contains("-i ")
        && !command.contains("--in-place")
-        && !command.contains(" > ")
-        && !command.contains(" >> ")
 }

 #[cfg(test)]
@@ -375,6 +449,85 @@ mod tests {
        assert!(!is_read_only_command("sed -i 's/a/b/' file"));
    }

+    // --- Hardening regression tests (#2: read-only bypasses) ---
+
+    #[test]
+    fn read_only_rejects_command_chaining() {
+        // A leading read-only token must not launder a trailing destructive one.
+        assert!(!is_read_only_command("cat foo; rm -rf bar"));
+        assert!(!is_read_only_command("cat foo && rm -rf bar"));
+        assert!(!is_read_only_command("ls || rm bar"));
+        assert!(!is_read_only_command("cat foo | sh"));
+        assert!(!is_read_only_command("echo `rm bar`"));
+        assert!(!is_read_only_command("echo $(rm bar)"));
+        assert!(!is_read_only_command("echo x>file")); // redirect without spaces
+    }
+
+    #[test]
+    fn read_only_rejects_interpreters_and_build_drivers() {
+        // These execute arbitrary code and are no longer read-only.
+        assert!(!is_read_only_command(
+            "python3 -c \"import os; os.system('rm -rf .')\""
+        ));
+        assert!(!is_read_only_command("python script.py"));
+        assert!(!is_read_only_command("node app.js"));
+        assert!(!is_read_only_command("ruby x.rb"));
+        assert!(!is_read_only_command("cargo run"));
+        assert!(!is_read_only_command("rustc evil.rs"));
+    }
+
+    #[test]
+    fn read_only_gates_git_subcommands() {
+        // Read-only git subcommands remain allowed...
+        assert!(is_read_only_command("git status"));
+        assert!(is_read_only_command("git diff HEAD~1"));
+        assert!(is_read_only_command("git show abc123"));
+        // ...but mutating/exfiltrating ones are rejected.
+        assert!(!is_read_only_command("git commit -m x"));
+        assert!(!is_read_only_command("git push origin main"));
+        assert!(!is_read_only_command("git reset --hard"));
+        assert!(!is_read_only_command("git clean -fd"));
+        assert!(!is_read_only_command("git config user.email a@b.c"));
+    }
+
+    #[test]
+    fn read_only_rejects_find_actions() {
+        assert!(is_read_only_command("find . -name Cargo.toml"));
+        assert!(!is_read_only_command("find . -delete"));
+        // -exec uses braces/semicolon which also trip the metachar guard,
+        // but the explicit action check is the primary defense.
+        assert!(!is_read_only_command("find . -execdir rm rf"));
+    }
+
+    // --- Hardening regression tests (#1: workspace path traversal) ---
+
+    #[test]
+    fn workspace_rejects_parent_traversal() {
+        assert!(!is_within_workspace("/workspace/../etc/passwd", "/workspace"));
+        assert!(!is_within_workspace(
+            "/workspace/../../etc/crontab",
+            "/workspace"
+        ));
+        assert!(!is_within_workspace("../etc/passwd", "/workspace"));
+        assert!(!is_within_workspace(
+            "/workspace/sub/../../outside",
+            "/workspace"
+        ));
+        // Legitimate paths still resolve inside.
+        assert!(is_within_workspace("/workspace/./src/main.rs", "/workspace"));
+        assert!(is_within_workspace(
+            "/workspace/src/../src/main.rs",
+            "/workspace"
+        ));
+    }
+
+    #[test]
+    fn workspace_write_denies_traversal_escape() {
+        let enforcer = make_enforcer(PermissionMode::WorkspaceWrite);
+        let result = enforcer.check_file_write("/workspace/../../etc/crontab", "/workspace");
+        assert!(matches!(result, EnforcementResult::Denied { .. }));
+    }
+
    #[test]
    fn active_mode_returns_policy_mode() {
        // given
--- a/rust/crates/tools/src/lib.rs
+++ b/rust/crates/tools/src/lib.rs
@@ -2571,6 +2571,20 @@ fn is_within_workspace(path: &str) -> bool {

    let path = PathBuf::from(trimmed);

+    // Reject any parent-directory traversal. Callers never need `..` to refer
+    // to files inside the workspace, and `..` defeats both checks below: the
+    // relative branch only inspects the leading component, and the absolute
+    // branch's `canonicalize()` silently falls back to the literal `..` path
+    // when the target does not exist yet (e.g. a file about to be created).
+    // Returning false here is the safe direction: it classifies the command as
+    // requiring full-access permission rather than workspace-write.
+    if path
+        .components()
+        .any(|component| matches!(component, std::path::Component::ParentDir))
+    {
+        return false;
+    }
+
    // If path is absolute, check if it starts with CWD
    if path.is_absolute() {
        if let Ok(cwd) = std::env::current_dir() {
@@ -2588,6 +2602,26 @@ fn run_powershell(input: PowerShellInput) -> Result<String, String> {
    to_pretty_json(execute_powershell(input).map_err(|error| error.to_string())?)
 }

+#[cfg(test)]
+mod workspace_traversal_guard_tests {
+    use super::is_within_workspace;
+
+    #[test]
+    fn rejects_parent_traversal_components() {
+        // Leading and embedded `..` must both be rejected (was previously a hole
+        // because only the leading component was inspected).
+        assert!(!is_within_workspace("../secrets"));
+        assert!(!is_within_workspace("src/../../etc/passwd"));
+        assert!(!is_within_workspace("a/b/../../../etc/crontab"));
+    }
+
+    #[test]
+    fn allows_plain_relative_paths() {
+        assert!(is_within_workspace("src/main.rs"));
+        assert!(is_within_workspace("Cargo.toml"));
+    }
+}
+
 fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> {
    serde_json::to_string_pretty(&value).map_err(|error| error.to_string())
 }