From 74cc59040796b8397f7235dba7f28f06312d9737 Mon Sep 17 00:00:00 2001 From: bellman Date: Thu, 14 May 2026 17:18:03 +0900 Subject: [PATCH] omx(team): auto-checkpoint worker-1 [1] --- src/path_scope.py | 147 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 src/path_scope.py diff --git a/src/path_scope.py b/src/path_scope.py new file mode 100644 index 00000000..4341e82d --- /dev/null +++ b/src/path_scope.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import glob +import os +import re +import shlex +from dataclasses import dataclass +from pathlib import Path, PureWindowsPath + +_GLOB_META = set('*?[') +_WINDOWS_DRIVE_RE = re.compile(r'^[A-Za-z]:[\\/]') +_WINDOWS_UNC_RE = re.compile(r'^(?:\\\\|//)[^\\/]+[\\/][^\\/]+') +_ENV_ASSIGNMENT_RE = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*=') + + +@dataclass(frozen=True) +class PathScopeDecision: + allowed: bool + reason: str + candidate: str | None = None + resolved: str | None = None + + +@dataclass(frozen=True) +class WorkspacePathScope: + """Validate tool/shell path operands against explicit workspace roots. + + The policy is intentionally conservative for the Python port: any candidate + path that resolves outside the configured roots is denied, including paths + reached through symlinks or glob expansion. Windows drive/UNC paths are + treated as out-of-scope on POSIX roots unless an allowed root is also a + Windows-style root with the same prefix. + """ + + roots: tuple[Path, ...] + + @classmethod + def from_root(cls, root: str | Path) -> 'WorkspacePathScope': + return cls.from_roots((root,)) + + @classmethod + def from_roots(cls, roots: tuple[str | Path, ...] | list[str | Path]) -> 'WorkspacePathScope': + resolved_roots = tuple(Path(root).expanduser().resolve(strict=False) for root in roots) + if not resolved_roots: + raise ValueError('at least one workspace root is required') + return cls(resolved_roots) + + def validate_payload(self, payload: str, cwd: str | Path | None = None) -> PathScopeDecision: + cwd_path = Path(cwd).expanduser().resolve(strict=False) if cwd else self.roots[0] + cwd_decision = self.validate_path(cwd_path) + if not cwd_decision.allowed: + return PathScopeDecision(False, f'cwd outside workspace scope: {cwd_path}', str(cwd_path), cwd_decision.resolved) + for candidate in extract_path_candidates(payload): + decision = self.validate_path(candidate, cwd_path) + if not decision.allowed: + return decision + return PathScopeDecision(True, 'all path candidates are inside workspace scope') + + def validate_path(self, candidate: str | Path, cwd: str | Path | None = None) -> PathScopeDecision: + raw = os.path.expandvars(os.path.expanduser(str(candidate))) + if _is_windows_absolute(raw): + return self._validate_windows_path(raw) + base = Path(cwd).expanduser().resolve(strict=False) if cwd else self.roots[0] + path = Path(raw) + if not path.is_absolute(): + path = base / path + expanded = self._expand_glob(path) + for expanded_path in expanded: + resolved = expanded_path.resolve(strict=False) + if not any(_is_relative_to(resolved, root) for root in self.roots): + return PathScopeDecision( + False, + 'path resolves outside workspace scope', + str(candidate), + str(resolved), + ) + return PathScopeDecision(True, 'path is inside workspace scope', str(candidate), str(expanded[0].resolve(strict=False))) + + def _expand_glob(self, path: Path) -> tuple[Path, ...]: + path_text = str(path) + if any(char in path_text for char in _GLOB_META): + matches = tuple(Path(match) for match in glob.glob(path_text, recursive=True)) + if matches: + return matches + # For unmatched globs, validate the stable non-glob parent prefix. + stable_parts: list[str] = [] + for part in path.parts: + if any(char in part for char in _GLOB_META): + break + stable_parts.append(part) + if stable_parts: + return (Path(*stable_parts),) + return (path,) + + def _validate_windows_path(self, raw: str) -> PathScopeDecision: + candidate = PureWindowsPath(raw) + for root in self.roots: + root_text = str(root) + if not _is_windows_absolute(root_text): + continue + try: + candidate.relative_to(PureWindowsPath(root_text)) + return PathScopeDecision(True, 'windows path is inside workspace scope', raw, str(candidate)) + except ValueError: + continue + return PathScopeDecision(False, 'windows absolute path is outside workspace scope', raw, str(candidate)) + + +def extract_path_candidates(payload: str) -> tuple[str, ...]: + """Return conservative path-like operands from a shell/tool payload.""" + + try: + tokens = shlex.split(payload, posix=True) + except ValueError: + tokens = payload.split() + candidates: list[str] = [] + for token in tokens: + if not token or token.startswith('-') or _ENV_ASSIGNMENT_RE.match(token): + continue + expanded = os.path.expandvars(os.path.expanduser(token)) + if _looks_like_path(token) or _looks_like_path(expanded): + candidates.append(expanded) + return tuple(candidates) + + +def _looks_like_path(token: str) -> bool: + return ( + token in {'.', '..'} + or token.startswith(('./', '../', '/', '~/', '~/')) + or '..' in token.split('/') + or '/' in token + or '\\' in token + or any(char in token for char in _GLOB_META) + or _is_windows_absolute(token) + ) + + +def _is_windows_absolute(value: str) -> bool: + return bool(_WINDOWS_DRIVE_RE.match(value) or _WINDOWS_UNC_RE.match(value)) + + +def _is_relative_to(path: Path, root: Path) -> bool: + try: + path.relative_to(root) + return True + except ValueError: + return False