feat: 添加沙箱执行和本地仓库管理器

SandboxExec实现命令白名单和敏感信息脱敏;LocalRepoManager管理git mirror/worktree;DiffExtractor构建审查上下文

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
accelerator
2026-03-01 03:25:30 +00:00
parent 4c90bf0b9c
commit d1e1e2f33c
3 changed files with 619 additions and 0 deletions

View File

@@ -0,0 +1,306 @@
import { readFile, lstat } from 'node:fs/promises';
import path from 'node:path';
import { DiffFile, ReviewContext, ReviewRun, ChangedFile } from '../types';
import { SandboxExec } from './sandbox-exec';
import { LocalRepoManager } from './local-repo-manager';
function toStatus(status: string): ChangedFile['status'] {
const value = status.trim().charAt(0).toUpperCase();
if (['A', 'M', 'D', 'R', 'C', 'T', 'U', 'X', 'B'].includes(value)) {
return value as ChangedFile['status'];
}
return 'M';
}
function safePath(basePath: string, relativePath: string): string {
const resolved = path.resolve(basePath, relativePath);
if (!resolved.startsWith(path.resolve(basePath))) {
throw new Error(`非法文件路径: ${relativePath}`);
}
return resolved;
}
export class DiffExtractor {
constructor(
private readonly sandboxExec: SandboxExec,
private readonly localRepoManager: LocalRepoManager,
private readonly commandTimeoutMs: number,
private readonly maxFilesPerRun: number,
private readonly maxFileContentChars: number
) {}
getSandbox(): SandboxExec {
return this.sandboxExec;
}
async buildContext(run: ReviewRun, mirrorPath: string, workspacePath: string): Promise<ReviewContext> {
const targetSha = run.headSha || run.commitSha;
if (!targetSha) {
throw new Error('缺少 target sha无法构建审查上下文');
}
let baseSha = run.baseSha;
if (!baseSha) {
baseSha = await this.localRepoManager.resolveCommitParent(workspacePath, targetSha) || undefined;
}
// Root commit场景没有parent使用git show获取完整diff
const isRootCommit = !baseSha;
const diff = isRootCommit
? await this.getRootCommitDiff(workspacePath, targetSha)
: await this.getDiff(workspacePath, run.eventType, baseSha!, targetSha);
const changedFiles = isRootCommit
? await this.getRootCommitChangedFiles(workspacePath, targetSha)
: await this.getChangedFiles(workspacePath, baseSha!, targetSha);
// 构建允许的文件路径集合确保parsedDiff也受REVIEW_MAX_FILES_PER_RUN限制
const allowedPaths = new Set(changedFiles.map(f => f.path));
const parsedDiff = this.parseDiff(diff, allowedPaths);
const fileContents = await this.readChangedFileContents(workspacePath, changedFiles);
return {
workspacePath,
mirrorPath,
diff,
changedFiles,
parsedDiff,
fileContents,
};
}
private async getRootCommitDiff(workspacePath: string, sha: string): Promise<string> {
// Root commit使用git show获取完整diff相当于与空树的diff
const response = await this.sandboxExec.run('git', ['show', '--format=', '--unified=3', sha], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
return response.stdout;
}
private async getDiff(
workspacePath: string,
eventType: ReviewRun['eventType'],
baseSha: string,
targetSha: string
): Promise<string> {
if (eventType === 'pull_request') {
const response = await this.sandboxExec.run('git', ['diff', '--unified=3', `${baseSha}...${targetSha}`], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
return response.stdout;
}
const response = await this.sandboxExec.run('git', ['show', '--format=', '--unified=3', targetSha], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
return response.stdout;
}
private async getRootCommitChangedFiles(workspacePath: string, sha: string): Promise<ChangedFile[]> {
// Root commit所有文件都是新增的A状态
// --root flag是必需的否则diff-tree对root commit返回空输出
const statusResult = await this.sandboxExec.run('git', ['diff-tree', '--root', '--no-commit-id', '--name-status', '-r', sha], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
const numStatResult = await this.sandboxExec.run('git', ['diff-tree', '--root', '--no-commit-id', '--numstat', '-r', sha], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
const numMap = new Map<string, { additions: number; deletions: number }>();
for (const line of numStatResult.stdout.split('\n')) {
if (!line.trim()) {
continue;
}
const [addRaw = '0', delRaw = '0', filename] = line.split('\t');
if (!filename) {
continue;
}
const additions = Number.parseInt(addRaw, 10);
const deletions = Number.parseInt(delRaw, 10);
numMap.set(filename, {
additions: Number.isFinite(additions) ? additions : 0,
deletions: Number.isFinite(deletions) ? deletions : 0,
});
}
const changedFiles: ChangedFile[] = [];
for (const line of statusResult.stdout.split('\n')) {
if (!line.trim()) {
continue;
}
const [statusRaw = 'A', ...pathParts] = line.split('\t');
const filePath = pathParts[pathParts.length - 1];
if (!filePath) {
continue;
}
const stats = numMap.get(filePath) || { additions: 0, deletions: 0 };
changedFiles.push({
path: filePath,
status: toStatus(statusRaw),
additions: stats.additions,
deletions: stats.deletions,
});
if (changedFiles.length >= this.maxFilesPerRun) {
break;
}
}
return changedFiles;
}
private async getChangedFiles(workspacePath: string, baseSha: string, targetSha: string): Promise<ChangedFile[]> {
const statusResult = await this.sandboxExec.run('git', ['diff', '--name-status', `${baseSha}...${targetSha}`], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
const numStatResult = await this.sandboxExec.run('git', ['diff', '--numstat', `${baseSha}...${targetSha}`], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
const numMap = new Map<string, { additions: number; deletions: number }>();
for (const line of numStatResult.stdout.split('\n')) {
if (!line.trim()) {
continue;
}
const [addRaw = '0', delRaw = '0', filename] = line.split('\t');
if (!filename) {
continue;
}
const additions = Number.parseInt(addRaw, 10);
const deletions = Number.parseInt(delRaw, 10);
numMap.set(filename, {
additions: Number.isFinite(additions) ? additions : 0,
deletions: Number.isFinite(deletions) ? deletions : 0,
});
}
const changedFiles: ChangedFile[] = [];
for (const line of statusResult.stdout.split('\n')) {
if (!line.trim()) {
continue;
}
const [statusRaw = 'M', ...pathParts] = line.split('\t');
const filePath = pathParts[pathParts.length - 1];
if (!filePath) {
continue;
}
const stats = numMap.get(filePath) || { additions: 0, deletions: 0 };
changedFiles.push({
path: filePath,
status: toStatus(statusRaw),
additions: stats.additions,
deletions: stats.deletions,
});
if (changedFiles.length >= this.maxFilesPerRun) {
break;
}
}
return changedFiles;
}
private async readChangedFileContents(
workspacePath: string,
changedFiles: ChangedFile[]
): Promise<Record<string, string>> {
const result: Record<string, string> = {};
for (const file of changedFiles) {
if (file.status === 'D') {
continue;
}
try {
const filePath = safePath(workspacePath, file.path);
// 安全检查:拒绝符号链接以防止主机文件泄露
const stats = await lstat(filePath);
if (stats.isSymbolicLink()) {
continue;
}
const content = await readFile(filePath, 'utf-8');
result[file.path] = content.slice(0, this.maxFileContentChars);
} catch {
continue;
}
}
return result;
}
parseDiff(diffContent: string, allowedPaths?: Set<string>): DiffFile[] {
const files: DiffFile[] = [];
const lines = diffContent.split('\n');
let currentFile: DiffFile | null = null;
let lineNumber = 0;
let inHunk = false;
let skipCurrentFile = false;
for (const line of lines) {
if (line.startsWith('diff --git')) {
if (currentFile && !skipCurrentFile) {
files.push(currentFile);
}
currentFile = { path: '', changes: [] };
inHunk = false;
skipCurrentFile = false;
continue;
}
if (!currentFile) {
continue;
}
if (line.startsWith('+++ b/')) {
currentFile.path = line.substring(6);
// 如果提供了allowedPaths检查当前文件是否在允许列表中
if (allowedPaths && !allowedPaths.has(currentFile.path)) {
skipCurrentFile = true;
}
continue;
}
// 如果跳过当前文件,忽略所有后续内容直到下一个文件
if (skipCurrentFile) {
continue;
}
if (line.startsWith('@@')) {
const match = line.match(/@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/);
if (match && match[1]) {
lineNumber = Number.parseInt(match[1], 10) - 1;
inHunk = true;
}
continue;
}
if (!inHunk) {
continue;
}
if (line.startsWith('+')) {
lineNumber += 1;
currentFile.changes.push({ lineNumber, content: line.slice(1), type: 'add' });
} else if (line.startsWith(' ')) {
lineNumber += 1;
currentFile.changes.push({ lineNumber, content: line.slice(1), type: 'context' });
}
}
if (currentFile && !skipCurrentFile) {
files.push(currentFile);
}
return files.filter((file) => file.path && file.changes.length > 0);
}
}

View File

@@ -0,0 +1,219 @@
import { access, mkdir, rm } from 'node:fs/promises';
import path from 'node:path';
import { createHash } from 'node:crypto';
import { SandboxExec } from './sandbox-exec';
import { logger } from '../../utils/logger';
export interface LocalRepoPaths {
mirrorPath: string;
workspacePath: string;
}
function hashRepo(owner: string, repo: string): string {
return createHash('sha256').update(`${owner}/${repo}`).digest('hex').slice(0, 16);
}
export class LocalRepoManager {
private mirrorLocks = new Map<string, Promise<void>>();
constructor(
private readonly workDir: string,
private readonly sandboxExec: SandboxExec,
private readonly commandTimeoutMs: number,
private readonly giteaToken?: string
) {}
/**
* 构建git命令的认证配置参数非持久化
* 使用http.extraHeader避免将token存储在git config中
*/
private getAuthArgs(): string[] {
if (!this.giteaToken) {
return [];
}
// 使用Authorization header不会持久化到.git/config
return ['-c', `http.extraHeader=Authorization: token ${this.giteaToken}`];
}
/**
* 获取mirror仓库的互斥锁防止并发修改同一mirror
* 返回一个unlock函数调用者必须在完成后调用
*/
private async acquireMirrorLock(mirrorPath: string): Promise<() => void> {
// 获取前一个锁(如果有),用于排队等待
const currentLock = this.mirrorLocks.get(mirrorPath) || Promise.resolve();
let releaseLock: () => void;
const newLock = new Promise<void>((resolve) => {
releaseLock = resolve;
});
// 将新锁存入map供后续调用者排队
// 修复直接存储newLock而非chain使unlock时的比较能够正确工作
this.mirrorLocks.set(mirrorPath, newLock);
// 等待前一个锁完成
await currentLock;
// 返回解锁函数
return () => {
releaseLock!();
// 如果map中还是当前锁没有新的等待者清理以避免内存泄漏
if (this.mirrorLocks.get(mirrorPath) === newLock) {
this.mirrorLocks.delete(mirrorPath);
}
};
}
async prepareWorkspace(
owner: string,
repo: string,
cloneUrl: string,
targetSha: string,
runId: string,
headCloneUrl?: string
): Promise<LocalRepoPaths> {
const repoHash = hashRepo(owner, repo);
const mirrorsRoot = path.join(this.workDir, 'repos');
const workspacesRoot = path.join(this.workDir, 'workspaces');
const mirrorPath = path.join(mirrorsRoot, `${repoHash}.git`);
const workspacePath = path.join(workspacesRoot, runId);
await mkdir(mirrorsRoot, { recursive: true });
await mkdir(workspacesRoot, { recursive: true });
// 获取mirror锁防止并发修改同一mirrorremote set-url/fetch冲突
const unlock = await this.acquireMirrorLock(mirrorPath);
try {
const authArgs = this.getAuthArgs();
const mirrorExists = await this.pathExists(mirrorPath);
if (!mirrorExists) {
logger.info('创建本地 mirror 仓库', { owner, repo, mirrorPath });
await this.sandboxExec.run('git', [...authArgs, 'clone', '--mirror', cloneUrl, mirrorPath], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
} else {
// 更新remote URL不含认证信息
await this.sandboxExec.run('git', ['--git-dir', mirrorPath, 'remote', 'set-url', 'origin', cloneUrl], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
// fetch使用认证参数
await this.sandboxExec.run(
'git',
[...authArgs, '--git-dir', mirrorPath, 'fetch', '--prune', 'origin', '+refs/*:refs/*'],
{
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
}
);
}
// Fork PR场景添加head remote并fetch确保head SHA可用
if (headCloneUrl && headCloneUrl !== cloneUrl) {
logger.info('Fork PR检测添加head remote', { owner, repo, headCloneUrl });
// 检查head remote是否已存在存在则更新URL
const remoteListResult = await this.sandboxExec.run('git', ['--git-dir', mirrorPath, 'remote'], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
const hasHeadRemote = remoteListResult.stdout.includes('head');
if (hasHeadRemote) {
await this.sandboxExec.run('git', ['--git-dir', mirrorPath, 'remote', 'set-url', 'head', headCloneUrl], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
} else {
await this.sandboxExec.run('git', ['--git-dir', mirrorPath, 'remote', 'add', 'head', headCloneUrl], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
}
// Fetch head remote
await this.sandboxExec.run(
'git',
[...authArgs, '--git-dir', mirrorPath, 'fetch', 'head', '+refs/heads/*:refs/remotes/head/*'],
{
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
}
);
}
await rm(workspacePath, { recursive: true, force: true });
// 清理可能存在的stale worktree元数据崩溃恢复时目录已删除但元数据仍注册
// prune会移除所有已删除但仍注册的worktree
// 注意prune/add也会修改mirror元数据必须在锁保护下执行防止并发冲突
await this.sandboxExec.run('git', ['--git-dir', mirrorPath, 'worktree', 'prune'], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
await this.sandboxExec.run('git', ['--git-dir', mirrorPath, 'worktree', 'add', '--detach', workspacePath, targetSha], {
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
});
} finally {
// 确保锁总是被释放在所有mirror-mutating操作fetch/prune/add完成后释放
unlock();
}
return {
mirrorPath,
workspacePath,
};
}
async cleanupWorkspace(paths: LocalRepoPaths): Promise<void> {
// worktree remove也会修改mirror元数据需要使用mirror锁防止与prepareWorkspace并发冲突
const unlock = await this.acquireMirrorLock(paths.mirrorPath);
try {
await this.sandboxExec.run(
'git',
['--git-dir', paths.mirrorPath, 'worktree', 'remove', '--force', paths.workspacePath],
{
cwd: this.workDir,
timeoutMs: this.commandTimeoutMs,
}
);
} catch (error) {
logger.warn('移除 git worktree 失败,尝试直接清理目录', {
workspacePath: paths.workspacePath,
error: error instanceof Error ? error.message : String(error),
});
await rm(paths.workspacePath, { recursive: true, force: true });
} finally {
// 确保锁总是被释放
unlock();
}
}
async resolveCommitParent(workspacePath: string, commitSha: string): Promise<string | null> {
try {
const result = await this.sandboxExec.run('git', ['rev-parse', `${commitSha}^`], {
cwd: workspacePath,
timeoutMs: this.commandTimeoutMs,
});
return result.stdout.trim() || null;
} catch {
return null;
}
}
private async pathExists(targetPath: string): Promise<boolean> {
try {
await access(targetPath);
return true;
} catch {
return false;
}
}
}

View File

@@ -0,0 +1,94 @@
import { execFile } from 'node:child_process';
export interface SandboxCommandResult {
stdout: string;
stderr: string;
durationMs: number;
exitCode: number;
}
export interface SandboxRunOptions {
cwd: string;
timeoutMs: number;
}
export class SandboxExec {
private readonly allowedCommands: Set<string>;
constructor(allowedCommands: string[]) {
this.allowedCommands = new Set(allowedCommands);
}
/**
* Redact敏感信息如URLs中的token、git config中的认证header以防止泄露到日志
*/
private redactSensitiveArgs(args: string[]): string[] {
return args.map((arg) => {
// Redact git config中的http.extraHeader认证token
if (arg.includes('http.extraHeader=Authorization:')) {
return arg.replace(/(Authorization:\s*token\s+)[^\s]+/i, '$1***');
}
try {
// 检测URL格式并redact认证信息
const url = new URL(arg);
if (url.username || url.password) {
url.username = '***';
url.password = '***';
return url.toString();
}
} catch {
// 不是URL保持原样
}
return arg;
});
}
async run(command: string, args: string[], options: SandboxRunOptions): Promise<SandboxCommandResult> {
if (!this.allowedCommands.has(command)) {
throw new Error(`命令未在白名单中: ${command}`);
}
const startedAt = Date.now();
return new Promise<SandboxCommandResult>((resolve, reject) => {
execFile(
command,
args,
{
cwd: options.cwd,
timeout: options.timeoutMs,
maxBuffer: 1024 * 1024 * 16,
windowsHide: true,
env: {
PATH: process.env.PATH,
HOME: process.env.HOME,
LANG: process.env.LANG,
LC_ALL: process.env.LC_ALL,
},
},
(error, stdout, stderr) => {
const durationMs = Date.now() - startedAt;
if (error) {
const code = typeof error.code === 'number' ? error.code : -1;
// Redact敏感参数如带token的URLs以防止凭证泄露到日志和持久化错误
const redactedArgs = this.redactSensitiveArgs(args);
reject(
new Error(
`命令执行失败: ${command} ${redactedArgs.join(' ')}; code=${code}; stderr=${stderr || '(无 stderr原始错误已脱敏)'}`
)
);
return;
}
resolve({
stdout,
stderr,
durationMs,
exitCode: 0,
});
}
);
});
}
}