From 44d52cddc5e5301b9ec4ae8ca11ba926dd709cd3 Mon Sep 17 00:00:00 2001 From: jeffusion Date: Tue, 26 May 2026 22:40:35 +0800 Subject: [PATCH] feat(agent-kernel): cherry-pick high-value components from PR #15 - Zod findingResponseSchema + LLM repair for malformed model output (max 2 attempts) - Budget guards: maxSubagents, maxEmptyResponses, maxConsecutiveToolFailures - maxSubagents: refuse spawn at limit, allow model to summarize - consecutiveToolFailures: per-tool-call update, reset on success, immediate terminate - Tool permission scope system (6 scopes, allow/deny static policy, no ask) - allowListSpecified flag distinguishes subagent vs main agent resolution - SHA256 finding fingerprint with JSON tuple input (avoids colon ambiguity) - Token counting + context clipping via tokenlens (newline-boundary clipping) - diffBudget floor of 1000 tokens (prevents negative budget for small models) - tryParseFinalSubmission: full JSON first (preserves summaryMarkdown), Zod fallback - normalizeFinding: Zod-only validation, no lax fallback - E2E README: fix webhook signing (--data-binary, openssl dgst, repository.name) --- e2e/README.md | 11 +- .../loop/__tests__/main-agent-runner.test.ts | 112 ++++++++++++++++++ src/agent-kernel/loop/main-agent-runner.ts | 88 +++++++++++++- src/agent-kernel/loop/types.ts | 19 ++- src/agent-kernel/subagents/subagent-runner.ts | 1 + .../tools/__tests__/tool-permissions.test.ts | 99 +++++++++++----- src/agent-kernel/tools/tool-permissions.ts | 32 ++++- .../deterministic-publish-adapter.test.ts | 101 ++++++++++++++++ .../deterministic-publish-adapter.ts | 14 ++- src/review-agent/review-entrypoint.ts | 71 +++++++++-- .../schema/__tests__/finding-schema.test.ts | 81 +++++++++++++ src/review-agent/schema/finding-schema.ts | 57 +++++++++ src/review-agent/schema/index.ts | 10 ++ .../tools/__tests__/review-tools.test.ts | 36 +++--- src/review-agent/tools/index.ts | 6 +- src/review-agent/tools/review-tools.ts | 77 ++++++------ src/review/context/token-counter.ts | 4 +- 17 files changed, 709 insertions(+), 110 deletions(-) create mode 100644 src/review-agent/__tests__/deterministic-publish-adapter.test.ts create mode 100644 src/review-agent/schema/__tests__/finding-schema.test.ts create mode 100644 src/review-agent/schema/finding-schema.ts create mode 100644 src/review-agent/schema/index.ts diff --git a/e2e/README.md b/e2e/README.md index 491fd13..645f079 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -153,6 +153,7 @@ cat > /tmp/webhook_payload.json << EOF }, "repository": { "full_name": "e2e-admin/e2e-test-repo", + "name": "e2e-test-repo", "owner": { "login": "e2e-admin" }, "clone_url": "http://gitea:3000/e2e-admin/e2e-test-repo.git" }, @@ -160,15 +161,15 @@ cat > /tmp/webhook_payload.json << EOF } EOF -# 计算 HMAC 签名 -PAYLOAD=$(cat /tmp/webhook_payload.json) -SIG=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "e2e-test-secret" -binary | xxd -p -c 256) +# 计算 HMAC 签名(注意:必须基于文件内容计算,避免 shell 变量传递时改变内容) +SIG=$(cat /tmp/webhook_payload.json | openssl dgst -sha256 -hmac "e2e-test-secret" | awk '{print $NF}') -# 发送 webhook +# 发送 webhook(⚠️ 必须用 --data-binary 而非 -d,否则换行符被剥离导致签名不匹配) curl -s -X POST "http://localhost:3334/webhook/gitea" \ -H "Content-Type: application/json" \ + -H "X-Gitea-Event: pull_request" \ -H "X-Gitea-Signature: ${SIG}" \ - -d "$PAYLOAD" + --data-binary @/tmp/webhook_payload.json ``` ### 6. 验证审查结果 diff --git a/src/agent-kernel/loop/__tests__/main-agent-runner.test.ts b/src/agent-kernel/loop/__tests__/main-agent-runner.test.ts index 592cc3e..9ddc2d1 100644 --- a/src/agent-kernel/loop/__tests__/main-agent-runner.test.ts +++ b/src/agent-kernel/loop/__tests__/main-agent-runner.test.ts @@ -243,4 +243,116 @@ describe('MainAgentRunner', () => { JSON.stringify({ ok: false, error: { name: 'Error', message: 'lookup failed' } }) ); }); + + test('stops on maxEmptyResponses', async () => { + const modelClient = new FakeModelClient([ + response({ content: '' }), + response({ content: '' }), + response({ content: 'should not reach' }), + ]); + const runner = new MainAgentRunner({ + modelClient, + transcriptRepository: agentSessionRepository, + tools: [], + }); + + const result = await runner.run({ + model: 'mock-model', + userMessage: 'test empty responses', + maxTurns: 10, + maxToolCalls: 10, + timeoutMs: 60_000, + maxEmptyResponses: 2, + }); + + expect(result.status).toBe('max_empty_responses'); + expect(result.turns).toBe(2); + }); + + test('stops on maxConsecutiveToolFailures', async () => { + const failTool: MainAgentTool = { + definition: { + name: 'fail_tool', + description: 'Always fails.', + parameters: { type: 'object' }, + }, + execute: () => { + throw new Error('boom'); + }, + }; + const modelClient = new FakeModelClient([ + response({ + finishReason: 'tool_calls', + toolCalls: [{ id: 'c1', name: 'fail_tool', arguments: '{}' }], + }), + response({ + finishReason: 'tool_calls', + toolCalls: [{ id: 'c2', name: 'fail_tool', arguments: '{}' }], + }), + response({ + finishReason: 'tool_calls', + toolCalls: [{ id: 'c3', name: 'fail_tool', arguments: '{}' }], + }), + response({ content: 'should not reach' }), + ]); + const runner = new MainAgentRunner({ + modelClient, + transcriptRepository: agentSessionRepository, + tools: [failTool], + }); + + const result = await runner.run({ + model: 'mock-model', + userMessage: 'test tool failures', + maxTurns: 10, + maxToolCalls: 10, + timeoutMs: 60_000, + maxConsecutiveToolFailures: 3, + }); + + expect(result.status).toBe('max_consecutive_tool_failures'); + }); + + test('refuses subagent spawn beyond maxSubagents and allows summary', async () => { + const subagentTool: MainAgentTool = { + definition: { + name: 'spawn_subagent', + description: 'Spawn a subagent.', + parameters: { type: 'object' }, + }, + execute: () => ({ status: 'completed' }), + }; + const modelClient = new FakeModelClient([ + response({ + finishReason: 'tool_calls', + toolCalls: [{ id: 'c1', name: 'spawn_subagent', arguments: '{}' }], + }), + response({ + finishReason: 'tool_calls', + toolCalls: [{ id: 'c2', name: 'spawn_subagent', arguments: '{}' }], + }), + response({ + finishReason: 'tool_calls', + toolCalls: [{ id: 'c3', name: 'spawn_subagent', arguments: '{}' }], + }), + response({ content: 'review complete with 2 subagents' }), + ]); + const runner = new MainAgentRunner({ + modelClient, + transcriptRepository: agentSessionRepository, + tools: [subagentTool], + }); + + const result = await runner.run({ + model: 'mock-model', + userMessage: 'test subagent limit', + maxTurns: 10, + maxToolCalls: 10, + timeoutMs: 60_000, + maxSubagents: 2, + }); + + expect(result.status).toBe('completed'); + expect(result.finalText).toBe('review complete with 2 subagents'); + }); }); diff --git a/src/agent-kernel/loop/main-agent-runner.ts b/src/agent-kernel/loop/main-agent-runner.ts index 56e08b0..b8fcd85 100644 --- a/src/agent-kernel/loop/main-agent-runner.ts +++ b/src/agent-kernel/loop/main-agent-runner.ts @@ -84,9 +84,23 @@ export class MainAgentRunner { let turns = 0; let toolCalls = 0; + let subagentCount = 0; + let emptyResponseCount = 0; + let consecutiveToolFailures = 0; + const maxSubagents = input.maxSubagents ?? Number.POSITIVE_INFINITY; + const maxEmptyResponses = input.maxEmptyResponses ?? 3; + const maxConsecutiveToolFailures = input.maxConsecutiveToolFailures ?? 5; while (true) { - const budgetStatus = this.getBudgetStatus(input, startedAt, turns); + const budgetStatus = this.getBudgetStatus( + input, + startedAt, + turns, + emptyResponseCount, + consecutiveToolFailures, + maxEmptyResponses, + maxConsecutiveToolFailures + ); if (budgetStatus) { return this.finish(sessionId, budgetStatus, turns, toolCalls, messages); } @@ -102,6 +116,20 @@ export class MainAgentRunner { }); turns += 1; + + if (!response.content?.trim() && response.toolCalls.length === 0) { + emptyResponseCount += 1; + messages.push({ role: 'assistant', content: '' }); + this.transcriptRepository.appendMessage({ + sessionId, + role: 'assistant', + content: { text: '' }, + }); + continue; + } + + emptyResponseCount = 0; + const assistantMessage: LLMMessage = { role: 'assistant', content: response.content ?? '', @@ -139,8 +167,57 @@ export class MainAgentRunner { return this.finish(sessionId, 'max_tool_calls_reached', turns, toolCalls, messages); } + if (toolCall.name === 'spawn_subagent') { + if (subagentCount >= maxSubagents) { + const refusalMessage: LLMMessage = { + role: 'tool', + toolCallId: toolCall.id, + content: JSON.stringify({ + ok: false, + error: { + name: 'BudgetExceeded', + message: `Subagent limit reached (${maxSubagents}). Please summarize your findings instead.`, + }, + }), + }; + messages.push(refusalMessage); + this.transcriptRepository.appendMessage({ + sessionId, + role: 'tool', + content: { + toolCallId: toolCall.id, + toolName: toolCall.name, + result: { + ok: false, + error: { + name: 'BudgetExceeded', + message: `Subagent limit reached (${maxSubagents})`, + }, + }, + }, + }); + continue; + } + subagentCount += 1; + } + const result = await this.executeTool(toolCall, sessionId, input.model, turns); toolCalls += 1; + if (!result.ok) { + consecutiveToolFailures += 1; + } else { + consecutiveToolFailures = 0; + } + + if (!result.ok && consecutiveToolFailures >= maxConsecutiveToolFailures) { + return this.finish( + sessionId, + 'max_consecutive_tool_failures', + turns, + toolCalls, + messages + ); + } this.transcriptRepository.appendToolCall({ sessionId, @@ -174,10 +251,17 @@ export class MainAgentRunner { private getBudgetStatus( input: MainAgentRunInput, startedAt: number, - turns: number + turns: number, + emptyResponseCount: number, + consecutiveToolFailures: number, + maxEmptyResponses: number, + maxConsecutiveToolFailures: number ): MainAgentTerminalStatus | undefined { if (this.isTimedOut(input, startedAt)) return 'timeout_reached'; if (turns >= input.maxTurns) return 'max_turns_reached'; + if (emptyResponseCount >= maxEmptyResponses) return 'max_empty_responses'; + if (consecutiveToolFailures >= maxConsecutiveToolFailures) + return 'max_consecutive_tool_failures'; return undefined; } diff --git a/src/agent-kernel/loop/types.ts b/src/agent-kernel/loop/types.ts index 1e56bd3..0cbff5f 100644 --- a/src/agent-kernel/loop/types.ts +++ b/src/agent-kernel/loop/types.ts @@ -16,7 +16,10 @@ export type MainAgentTerminalStatus = | 'completed' | 'max_turns_reached' | 'max_tool_calls_reached' - | 'timeout_reached'; + | 'max_subagents_reached' + | 'timeout_reached' + | 'max_empty_responses' + | 'max_consecutive_tool_failures'; export interface MainAgentModelClient { chat(request: LLMChatRequest): Promise; @@ -29,8 +32,19 @@ export interface MainAgentToolContext { turn: number; } +export type ToolPermissionScope = + | 'read' + | 'write' + | 'command' + | 'network' + | 'git_write' + | 'cross_session'; + +export type ToolPermissionBehavior = 'allow' | 'deny'; + export interface MainAgentTool { definition: LLMToolDefinition; + permissionScope?: ToolPermissionScope; execute(argumentsValue: unknown, context: MainAgentToolContext): Promise | unknown; } @@ -79,7 +93,10 @@ export interface MainAgentRunInput { providerOptions?: Record; maxTurns: number; maxToolCalls: number; + maxSubagents?: number; timeoutMs: number; + maxEmptyResponses?: number; + maxConsecutiveToolFailures?: number; } export interface MainAgentRunResult { diff --git a/src/agent-kernel/subagents/subagent-runner.ts b/src/agent-kernel/subagents/subagent-runner.ts index 1a4d665..64e924b 100644 --- a/src/agent-kernel/subagents/subagent-runner.ts +++ b/src/agent-kernel/subagents/subagent-runner.ts @@ -97,6 +97,7 @@ export class SubagentRunner implements SpawnSubagentExecutor { availableTools: this.tools, allowedToolNames: input.agentDefinition.tools, disallowedToolNames: input.agentDefinition.disallowedTools, + allowListSpecified: true, }); const invocation = this.transcriptRepository.createInvocation({ diff --git a/src/agent-kernel/tools/__tests__/tool-permissions.test.ts b/src/agent-kernel/tools/__tests__/tool-permissions.test.ts index 180b8b1..2788a7c 100644 --- a/src/agent-kernel/tools/__tests__/tool-permissions.test.ts +++ b/src/agent-kernel/tools/__tests__/tool-permissions.test.ts @@ -1,64 +1,101 @@ import { describe, expect, test } from 'bun:test'; import type { MainAgentTool } from '../../loop'; -import { resolveAgentTools } from '../tool-permissions'; +import type { ToolPermissionScope } from '../../loop/types'; +import { + DEFAULT_SCOPE_POLICY, + evaluateToolPermission, + resolveAgentTools, +} from '../tool-permissions'; -function tool(name: string): MainAgentTool { +function tool(name: string, scope?: ToolPermissionScope): MainAgentTool { return { definition: { name, description: `${name} tool`, parameters: { type: 'object' }, }, + permissionScope: scope, execute: () => ({ name }), }; } -describe('resolveAgentTools', () => { - const availableTools = [tool('read_file'), tool('write_file'), tool('lookup')]; +describe('evaluateToolPermission', () => { + test('allows read scope', () => { + expect(evaluateToolPermission(tool('read_file', 'read')).behavior).toBe('allow'); + }); - test('returns the explicitly allowed subset', () => { + test('denies write scope', () => { + expect(evaluateToolPermission(tool('write_file', 'write')).behavior).toBe('deny'); + }); + + test('denies command scope', () => { + expect(evaluateToolPermission(tool('run_bash', 'command')).behavior).toBe('deny'); + }); + + test('denies network scope', () => { + expect(evaluateToolPermission(tool('http_request', 'network')).behavior).toBe('deny'); + }); + + test('defaults to read scope when unspecified', () => { + expect(evaluateToolPermission(tool('search_code')).behavior).toBe('allow'); + }); +}); + +describe('resolveAgentTools', () => { + const readTool = tool('read_file', 'read'); + const writeTool = tool('write_file', 'write'); + const searchTool = tool('search_code', 'read'); + + test('includes allowed tool names regardless of scope', () => { const resolved = resolveAgentTools({ - availableTools, - allowedToolNames: ['lookup', 'read_file'], + availableTools: [writeTool], + allowedToolNames: ['write_file'], disallowedToolNames: [], }); - - expect(resolved.tools.map((item) => item.definition.name)).toEqual(['read_file', 'lookup']); - expect(resolved.deniedToolNames).toEqual(['write_file']); - expect(resolved.unknownAllowedToolNames).toEqual([]); + expect(resolved.tools).toHaveLength(1); + expect(resolved.tools[0].definition.name).toBe('write_file'); }); - test('removes disallowed tools even when they are explicitly allowed', () => { + test('excludes disallowed tool names regardless of scope', () => { const resolved = resolveAgentTools({ - availableTools, - allowedToolNames: ['read_file', 'write_file'], - disallowedToolNames: ['write_file'], + availableTools: [readTool], + allowedToolNames: ['read_file'], + disallowedToolNames: ['read_file'], }); - - expect(resolved.tools.map((item) => item.definition.name)).toEqual(['read_file']); - expect(resolved.deniedToolNames).toEqual(['write_file', 'lookup']); + expect(resolved.tools).toHaveLength(0); + expect(resolved.deniedToolNames).toContain('read_file'); }); - test('treats an empty allow-list as no tools allowed', () => { + test('filters by scope policy when not in allowed/disallowed lists', () => { const resolved = resolveAgentTools({ - availableTools, + availableTools: [readTool, writeTool, searchTool], allowedToolNames: [], disallowedToolNames: [], }); - - expect(resolved.tools).toEqual([]); - expect(resolved.deniedToolNames).toEqual(['read_file', 'write_file', 'lookup']); + const names = resolved.tools.map((t) => t.definition.name); + expect(names).toContain('read_file'); + expect(names).toContain('search_code'); + expect(names).not.toContain('write_file'); }); - test('reports unknown allow-list names without granting tools', () => { + test('reports unknown allowed/disallowed names', () => { const resolved = resolveAgentTools({ - availableTools, - allowedToolNames: ['lookup', 'missing_tool'], - disallowedToolNames: ['unknown_deny'], + availableTools: [readTool], + allowedToolNames: ['missing_tool'], + disallowedToolNames: ['ghost_tool'], }); - - expect(resolved.tools.map((item) => item.definition.name)).toEqual(['lookup']); - expect(resolved.unknownAllowedToolNames).toEqual(['missing_tool']); - expect(resolved.unknownDisallowedToolNames).toEqual(['unknown_deny']); + expect(resolved.unknownAllowedToolNames).toContain('missing_tool'); + expect(resolved.unknownDisallowedToolNames).toContain('ghost_tool'); + }); +}); + +describe('DEFAULT_SCOPE_POLICY', () => { + test('only allows read scope', () => { + expect(DEFAULT_SCOPE_POLICY.read).toBe('allow'); + expect(DEFAULT_SCOPE_POLICY.write).toBe('deny'); + expect(DEFAULT_SCOPE_POLICY.command).toBe('deny'); + expect(DEFAULT_SCOPE_POLICY.network).toBe('deny'); + expect(DEFAULT_SCOPE_POLICY.git_write).toBe('deny'); + expect(DEFAULT_SCOPE_POLICY.cross_session).toBe('deny'); }); }); diff --git a/src/agent-kernel/tools/tool-permissions.ts b/src/agent-kernel/tools/tool-permissions.ts index d2c47ac..f584830 100644 --- a/src/agent-kernel/tools/tool-permissions.ts +++ b/src/agent-kernel/tools/tool-permissions.ts @@ -1,9 +1,11 @@ import type { MainAgentTool } from '../loop'; +import type { ToolPermissionBehavior, ToolPermissionScope } from '../loop/types'; export interface ResolveAgentToolsInput { availableTools: MainAgentTool[]; allowedToolNames: string[]; disallowedToolNames: string[]; + allowListSpecified?: boolean; } export interface ResolvedAgentTools { @@ -15,10 +17,33 @@ export interface ResolvedAgentTools { unknownDisallowedToolNames: string[]; } +export interface ToolPermissionDecision { + behavior: ToolPermissionBehavior; + reason: string; +} + +const DEFAULT_SCOPE_POLICY: Record = { + read: 'allow', + write: 'deny', + command: 'deny', + network: 'deny', + git_write: 'deny', + cross_session: 'deny', +}; + function uniqueNames(names: string[]): string[] { return [...new Set(names)]; } +export function evaluateToolPermission(tool: MainAgentTool): ToolPermissionDecision { + const scope = tool.permissionScope ?? 'read'; + const behavior = DEFAULT_SCOPE_POLICY[scope]; + return { + behavior, + reason: `Tool '${tool.definition.name}' ${behavior === 'allow' ? 'allowed' : 'denied'} for scope '${scope}'`, + }; +} + export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentTools { const availableToolNames = uniqueNames(input.availableTools.map((tool) => tool.definition.name)); const availableToolNamesSet = new Set(availableToolNames); @@ -29,7 +54,10 @@ export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentT const tools = input.availableTools.filter((tool) => { const toolName = tool.definition.name; - return allowedToolNamesSet.has(toolName) && !disallowedToolNamesSet.has(toolName); + if (disallowedToolNamesSet.has(toolName)) return false; + if (allowedToolNamesSet.size > 0) return allowedToolNamesSet.has(toolName); + if (input.allowListSpecified) return false; + return evaluateToolPermission(tool).behavior === 'allow'; }); const permittedToolNamesSet = new Set(tools.map((tool) => tool.definition.name)); @@ -46,3 +74,5 @@ export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentT ), }; } + +export { DEFAULT_SCOPE_POLICY }; diff --git a/src/review-agent/__tests__/deterministic-publish-adapter.test.ts b/src/review-agent/__tests__/deterministic-publish-adapter.test.ts new file mode 100644 index 0000000..12bd465 --- /dev/null +++ b/src/review-agent/__tests__/deterministic-publish-adapter.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, it } from 'bun:test'; +import { createHash } from 'node:crypto'; +import { applyDeterministicPublishAdapter } from '../deterministic-publish-adapter'; +import type { ReviewAgentFinding } from '../tools'; + +function makeFinding(overrides: Partial = {}): ReviewAgentFinding { + return { + fingerprint: '', + category: 'security', + severity: 'high', + confidence: 0.9, + path: 'src/app.ts', + line: 42, + title: 'SQL injection', + detail: 'Unsanitized input', + evidence: 'db.query(input)', + suggestion: 'Use parameterized queries', + ...overrides, + }; +} + +function expectedFingerprint(category: string, path: string, line: number, title: string): string { + return createHash('sha256') + .update(`${category}:${path}:${line}:${title}`) + .digest('hex') + .slice(0, 24); +} + +describe('SHA256 fingerprint generation', () => { + it('produces consistent 24-char hex fingerprint', () => { + const fp = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection'); + expect(fp).toHaveLength(24); + expect(fp).toMatch(/^[0-9a-f]{24}$/); + }); + + it('produces different fingerprints for different inputs', () => { + const fp1 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection'); + const fp2 = expectedFingerprint('correctness', 'src/app.ts', 42, 'SQL injection'); + expect(fp1).not.toBe(fp2); + }); + + it('produces same fingerprint for same inputs', () => { + const fp1 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection'); + const fp2 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection'); + expect(fp1).toBe(fp2); + }); +}); + +describe('applyDeterministicPublishAdapter deduplication', () => { + it('dedupes findings with identical fingerprints keeping higher rank', async () => { + const finding1 = makeFinding({ severity: 'low', confidence: 0.5, fingerprint: 'dup-fp' }); + const finding2 = makeFinding({ severity: 'high', confidence: 0.9, fingerprint: 'dup-fp' }); + + const store = { + getRunDetails: async () => ({ findings: [], comments: [] }), + addFindings: async () => {}, + addCommentRecord: async () => {}, + } as any; + + const result = await applyDeterministicPublishAdapter({ + store, + runId: 'test-run', + submission: { summaryMarkdown: 'test', findings: [finding1, finding2] }, + }); + + expect(result.findings).toHaveLength(1); + expect(result.findings[0].severity).toBe('high'); + }); + + it('dedupes findings with same path/line/title (similarity key)', async () => { + const finding1 = makeFinding({ + path: 'a.ts', + line: 10, + title: 'Bug', + severity: 'medium', + fingerprint: 'fp1', + }); + const finding2 = makeFinding({ + path: 'a.ts', + line: 10, + title: 'Bug', + severity: 'high', + fingerprint: 'fp2', + }); + + const store = { + getRunDetails: async () => ({ findings: [], comments: [] }), + addFindings: async () => {}, + addCommentRecord: async () => {}, + } as any; + + const result = await applyDeterministicPublishAdapter({ + store, + runId: 'test-run', + submission: { summaryMarkdown: 'test', findings: [finding1, finding2] }, + }); + + expect(result.findings).toHaveLength(1); + expect(result.findings[0].severity).toBe('high'); + }); +}); diff --git a/src/review-agent/deterministic-publish-adapter.ts b/src/review-agent/deterministic-publish-adapter.ts index af29cca..194ee88 100644 --- a/src/review-agent/deterministic-publish-adapter.ts +++ b/src/review-agent/deterministic-publish-adapter.ts @@ -1,4 +1,4 @@ -import { randomUUID } from 'node:crypto'; +import { createHash, randomUUID } from 'node:crypto'; import { applyPublishPolicy } from '../review/policy/publish-policy'; import type { FileReviewStore } from '../review/store/file-review-store'; import type { Finding, ReviewCommentRecord } from '../review/types'; @@ -27,6 +27,13 @@ function rankFinding(finding: ReviewAgentFinding): number { return severityWeight(finding.severity) * 1000 + Math.round(finding.confidence * 100); } +function buildFingerprint(category: string, path: string, line: number, title: string): string { + return createHash('sha256') + .update(JSON.stringify([category, path, line, title])) + .digest('hex') + .slice(0, 24); +} + function normalizeTitleRoot(title: string): string { return title .toLowerCase() @@ -40,8 +47,11 @@ function similarityKey(finding: ReviewAgentFinding): string { } function dedupeFindings(candidates: ReviewAgentFinding[]): ReviewAgentFinding[] { + const ensured = candidates.map((f) => + f.fingerprint ? f : { ...f, fingerprint: buildFingerprint(f.category, f.path, f.line, f.title) } + ); const byFingerprint = new Map(); - for (const finding of candidates) { + for (const finding of ensured) { const existing = byFingerprint.get(finding.fingerprint); if (!existing || rankFinding(finding) > rankFinding(existing)) { byFingerprint.set(finding.fingerprint, finding); diff --git a/src/review-agent/review-entrypoint.ts b/src/review-agent/review-entrypoint.ts index 8f8b712..ab39be0 100644 --- a/src/review-agent/review-entrypoint.ts +++ b/src/review-agent/review-entrypoint.ts @@ -11,15 +11,18 @@ import config from '../config'; import { DiffExtractor } from '../review/context/diff-extractor'; import type { LocalRepoPaths } from '../review/context/local-repo-manager'; import { LocalRepoManager } from '../review/context/local-repo-manager'; +import { tokenCounter } from '../review/context/token-counter'; import { FileReviewStore } from '../review/store/file-review-store'; import type { Finding, ReviewContext, ReviewRun } from '../review/types'; import { logger } from '../utils/logger'; import { applyDeterministicPublishAdapter } from './deterministic-publish-adapter'; +import { buildRepairPrompt, parseFindingResponse } from './schema'; import { type ReviewToolState, type SubmittedReviewFindings, createReviewTaskTools, normalizeSubmission, + parseSubmissionFromText, } from './tools'; export interface ReviewAgentRepositoryContext { @@ -88,18 +91,27 @@ function buildSessionScope(run: ReviewRun): string { function tryParseFinalSubmission(text?: string): SubmittedReviewFindings | null { if (!text) return null; try { - return normalizeSubmission(JSON.parse(text)); - } catch { - return null; - } + const parsed = JSON.parse(text); + if (parsed && typeof parsed === 'object') { + const result = normalizeSubmission(parsed); + if (result.findings.length > 0) return result; + } + } catch {} + return parseSubmissionFromText(text); } -function buildReviewPrompt(context: ReviewAgentRunContext): string { +function buildReviewPrompt(context: ReviewAgentRunContext, model?: string): string { const changedFiles = context.diffSummary?.changedFiles ?? []; const fileSummary = changedFiles .map((file) => `- ${file.status} ${file.path} (+${file.additions}/-${file.deletions})`) .join('\n'); - const diff = context.diffSummary?.diff?.trim() || '(diff omitted)'; + const rawDiff = context.diffSummary?.diff?.trim() || '(diff omitted)'; + + const usableBudget = model ? tokenCounter.getUsableBudget(model) : 124_000; + const reservedForPrompt = 4000; + const diffBudget = Math.max(1000, usableBudget - reservedForPrompt); + const diff = + tokenCounter.count(rawDiff) > diffBudget ? tokenCounter.clip(rawDiff, diffBudget) : rawDiff; return [ 'You are the main code review agent for Gitea AI Assistant.', @@ -271,6 +283,33 @@ export class ReviewAgentEntrypoint { return snapshot.headSha; } + private async repairFindingsWithLLM( + rawText: string, + messages: Array<{ role: string; content: string; toolCalls?: unknown }>, + maxAttempts = 2 + ): Promise { + let current = rawText; + for (let attempt = 0; attempt < maxAttempts; attempt++) { + const outcome = parseFindingResponse(current); + if (outcome.ok) return current; + const repairPrompt = buildRepairPrompt(outcome.error); + messages.push({ role: 'assistant', content: current }); + messages.push({ role: 'user', content: repairPrompt }); + const response = await this.modelClient.chat({ + messages: messages as import('../llm/types').LLMMessage[], + model: this.model, + temperature: 0, + responseFormat: 'json', + }); + current = response.content ?? '{"findings":[]}'; + logger.info('LLM finding repair attempt', { + attempt: attempt + 1, + error: outcome.error.slice(0, 100), + }); + } + return current; + } + private async runMainAgent( run: ReviewRun, reviewContext: ReviewContext @@ -320,10 +359,13 @@ export class ReviewAgentEntrypoint { const agentResult = await runner.run({ agentType: 'review-main-agent', model: this.model, - userMessage: buildReviewPrompt(runContext), + userMessage: buildReviewPrompt(runContext, this.model), maxTurns: 8, maxToolCalls: 4, + maxSubagents: 3, timeoutMs: config.review.commandTimeoutMs, + maxEmptyResponses: 2, + maxConsecutiveToolFailures: 3, session: { agentType: 'review-main-agent', metadata: { @@ -343,7 +385,20 @@ export class ReviewAgentEntrypoint { const submitted = reviewToolState.submittedReview ?? tryParseFinalSubmission(agentResult.finalText); - const submission = submitted ?? { summaryMarkdown: agentResult.finalText ?? '', findings: [] }; + let submission: SubmittedReviewFindings; + + if (submitted) { + submission = submitted; + } else if (agentResult.finalText) { + const repaired = await this.repairFindingsWithLLM( + agentResult.finalText, + agentResult.messages + ); + const repairedSubmission = tryParseFinalSubmission(repaired); + submission = repairedSubmission ?? { summaryMarkdown: agentResult.finalText, findings: [] }; + } else { + submission = { summaryMarkdown: '', findings: [] }; + } const adapted = await applyDeterministicPublishAdapter({ store: this.store, runId: run.id, diff --git a/src/review-agent/schema/__tests__/finding-schema.test.ts b/src/review-agent/schema/__tests__/finding-schema.test.ts new file mode 100644 index 0000000..50ee29a --- /dev/null +++ b/src/review-agent/schema/__tests__/finding-schema.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, it } from 'bun:test'; +import { buildRepairPrompt, findingResponseSchema, parseFindingResponse } from '../index'; + +describe('findingResponseSchema', () => { + it('parses valid findings', () => { + const raw = JSON.stringify({ + findings: [ + { + severity: 'high', + confidence: 0.9, + path: 'src/app.ts', + line: 42, + title: 'SQL injection', + detail: 'Unsanitized input in query', + evidence: 'db.query(req.params.id)', + suggestion: 'Use parameterized queries', + category: 'security', + }, + ], + }); + const result = parseFindingResponse(raw); + expect(result.ok).toBe(true); + if (result.ok) { + expect(result.findings).toHaveLength(1); + expect(result.findings[0].severity).toBe('high'); + expect(result.findings[0].category).toBe('security'); + } + }); + + it('applies defaults for optional fields', () => { + const raw = JSON.stringify({ + findings: [ + { + severity: 'low', + path: 'src/util.ts', + line: 10, + title: 'Missing error handling', + detail: 'No try-catch around async call', + }, + ], + }); + const result = parseFindingResponse(raw); + expect(result.ok).toBe(true); + if (result.ok) { + const finding = result.findings[0]; + expect(finding.confidence).toBe(0.8); + expect(finding.evidence).toBe(''); + expect(finding.suggestion).toBe(''); + expect(finding.category).toBeUndefined(); + } + }); + + it('rejects invalid severity', () => { + const raw = JSON.stringify({ + findings: [{ severity: 'critical', path: 'a.ts', line: 1, title: 'x', detail: 'y' }], + }); + const result = parseFindingResponse(raw); + expect(result.ok).toBe(false); + }); + + it('rejects invalid JSON', () => { + const result = parseFindingResponse('not json'); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.raw).toBe('not json'); + } + }); + + it('defaults to empty findings array', () => { + const result = findingResponseSchema.parse({}); + expect(result.findings).toEqual([]); + }); +}); + +describe('buildRepairPrompt', () => { + it('includes the parse error', () => { + const prompt = buildRepairPrompt('missing field "path"'); + expect(prompt).toContain('missing field "path"'); + expect(prompt).toContain('JSON'); + }); +}); diff --git a/src/review-agent/schema/finding-schema.ts b/src/review-agent/schema/finding-schema.ts new file mode 100644 index 0000000..f7ed219 --- /dev/null +++ b/src/review-agent/schema/finding-schema.ts @@ -0,0 +1,57 @@ +import { z } from 'zod'; + +const findingItemSchema = z.object({ + category: z.enum(['correctness', 'security', 'reliability', 'maintainability']).optional(), + severity: z.enum(['high', 'medium', 'low']), + confidence: z.number().min(0).max(1).optional().default(0.8), + path: z.string().min(1), + line: z.number().int().positive(), + title: z.string().min(1), + detail: z.string().min(1), + evidence: z.string().optional().default(''), + suggestion: z.string().optional().default(''), + fingerprint: z.string().min(1).optional(), +}); + +export const findingResponseSchema = z.object({ + findings: z.array(findingItemSchema).default([]), +}); + +export type FindingResponse = z.infer; +export type FindingItem = z.infer; + +export interface FindingParseResult { + ok: true; + findings: FindingItem[]; +} + +export interface FindingParseError { + ok: false; + error: string; + raw: string; +} + +export type FindingParseOutcome = FindingParseResult | FindingParseError; + +/** + * Parse raw LLM output text into validated findings. + * Returns a discriminated union: { ok: true, findings } or { ok: false, error, raw }. + */ +export function parseFindingResponse(raw: string): FindingParseOutcome { + try { + const parsed = JSON.parse(raw); + const result = findingResponseSchema.parse(parsed); + return { ok: true, findings: result.findings }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { ok: false, error: message, raw }; + } +} + +/** + * Build a repair prompt to ask the LLM to fix its malformed JSON output. + * Returns null if no more repair attempts should be made. + */ +export function buildRepairPrompt(parseError: string): string { + return `上一次最终结果无法通过 findingResponseSchema 校验。校验错误: ${parseError}\n\n请输出严格 JSON,格式如下:\n{"findings":[{"severity":"high|medium|low","confidence":0.8,"path":"...","line":1,"title":"...","detail":"...","evidence":"...","suggestion":"...","category":"correctness|security|reliability|maintainability"}]}\n\n不要输出额外文字,只输出 JSON。`; +} diff --git a/src/review-agent/schema/index.ts b/src/review-agent/schema/index.ts new file mode 100644 index 0000000..ee03b6a --- /dev/null +++ b/src/review-agent/schema/index.ts @@ -0,0 +1,10 @@ +export { + findingResponseSchema, + parseFindingResponse, + buildRepairPrompt, + type FindingResponse, + type FindingItem, + type FindingParseOutcome, + type FindingParseResult, + type FindingParseError, +} from './finding-schema'; diff --git a/src/review-agent/tools/__tests__/review-tools.test.ts b/src/review-agent/tools/__tests__/review-tools.test.ts index 0e22a3b..96c1bd5 100644 --- a/src/review-agent/tools/__tests__/review-tools.test.ts +++ b/src/review-agent/tools/__tests__/review-tools.test.ts @@ -111,21 +111,7 @@ describe('review task tools', () => { }); test('normalizeSubmission fills defaults and generated fingerprint', () => { - expect( - normalizeSubmission({ - summaryMarkdown: 'Summary', - findings: [ - { - category: 'maintainability', - severity: 'medium', - path: 'src/app.ts', - line: 2, - title: 'Extract helper', - detail: 'The logic is duplicated.', - }, - ], - }) - ).toEqual({ + const result = normalizeSubmission({ summaryMarkdown: 'Summary', findings: [ { @@ -135,13 +121,23 @@ describe('review task tools', () => { line: 2, title: 'Extract helper', detail: 'The logic is duplicated.', - confidence: 0.8, - evidence: '', - suggestion: '', - fingerprint: 'maintainability:src/app.ts:2:Extract helper', }, ], }); + expect(result.summaryMarkdown).toBe('Summary'); + expect(result.findings).toHaveLength(1); + expect(result.findings[0]).toMatchObject({ + category: 'maintainability', + severity: 'medium', + path: 'src/app.ts', + line: 2, + title: 'Extract helper', + detail: 'The logic is duplicated.', + confidence: 0.8, + evidence: '', + suggestion: '', + fingerprint: 'd90c88d5929c2859b8591d69', + }); }); test('submit_review_findings stores valid findings', async () => { @@ -171,7 +167,7 @@ describe('review task tools', () => { confidence: 0.8, evidence: '', suggestion: '', - fingerprint: 'reliability:src/app.ts:2:Token may be undefined', + fingerprint: 'f226057a37399d83b53cbeed', }); }); diff --git a/src/review-agent/tools/index.ts b/src/review-agent/tools/index.ts index f3dcec2..e91b8e6 100644 --- a/src/review-agent/tools/index.ts +++ b/src/review-agent/tools/index.ts @@ -1,4 +1,8 @@ -export { createReviewTaskTools, normalizeSubmission } from './review-tools'; +export { + createReviewTaskTools, + normalizeSubmission, + parseSubmissionFromText, +} from './review-tools'; export type { CreateReviewTaskToolsOptions, ReviewToolState, diff --git a/src/review-agent/tools/review-tools.ts b/src/review-agent/tools/review-tools.ts index ffe2327..6124f15 100644 --- a/src/review-agent/tools/review-tools.ts +++ b/src/review-agent/tools/review-tools.ts @@ -1,8 +1,8 @@ +import { createHash } from 'node:crypto'; import type { MainAgentTool } from '../../agent-kernel/loop/types'; import type { Finding, ReviewContext } from '../../review/types'; - -const VALID_CATEGORIES = ['correctness', 'security', 'reliability', 'maintainability'] as const; -const VALID_SEVERITIES = ['high', 'medium', 'low'] as const; +import type { FindingItem } from '../schema'; +import { parseFindingResponse } from '../schema'; export type ReviewAgentFinding = Omit; @@ -51,45 +51,46 @@ function requireObject(value: unknown, message: string): Record return value as Record; } -function requireString(input: Record, key: string): string { - const value = readString(input, key); - if (!value) throw new Error(`Finding ${key} must be a non-empty string`); - return value; +function buildFingerprint(category: string, path: string, line: number, title: string): string { + return createHash('sha256') + .update(JSON.stringify([category, path, line, title])) + .digest('hex') + .slice(0, 24); +} + +function findingItemToReviewAgent(item: FindingItem): ReviewAgentFinding { + const category = item.category ?? 'correctness'; + return { + fingerprint: item.fingerprint || buildFingerprint(category, item.path, item.line, item.title), + category, + severity: item.severity, + confidence: item.confidence ?? 0.8, + path: item.path, + line: item.line, + title: item.title, + detail: item.detail, + evidence: item.evidence ?? '', + suggestion: item.suggestion ?? '', + }; +} + +/** Parse raw JSON text into validated SubmittedReviewFindings using Zod schema. */ +export function parseSubmissionFromText(raw: string): SubmittedReviewFindings | null { + const outcome = parseFindingResponse(raw); + if (!outcome.ok) return null; + return { + summaryMarkdown: '', + findings: outcome.findings.map(findingItemToReviewAgent), + }; } function normalizeFinding(value: unknown): ReviewAgentFinding { - const input = requireObject(value, 'Each finding must be an object'); - - const category = readString(input, 'category'); - if (!category || !VALID_CATEGORIES.includes(category as (typeof VALID_CATEGORIES)[number])) { - throw new Error( - 'Finding category must be correctness, security, reliability, or maintainability' - ); + const outcome = parseFindingResponse(JSON.stringify({ findings: [value] })); + if (outcome.ok && outcome.findings.length > 0) { + return findingItemToReviewAgent(outcome.findings[0]); } - - const severity = readString(input, 'severity'); - if (!severity || !VALID_SEVERITIES.includes(severity as (typeof VALID_SEVERITIES)[number])) { - throw new Error('Finding severity must be high, medium, or low'); - } - - const path = requireString(input, 'path'); - const line = readNumber(input, 'line'); - if (line === undefined) throw new Error('Finding line must be a number'); - const title = requireString(input, 'title'); - const detail = requireString(input, 'detail'); - - return { - fingerprint: readString(input, 'fingerprint') || `${category}:${path}:${line}:${title}`, - category, - severity, - confidence: readNumber(input, 'confidence') ?? 0.8, - path, - line, - title, - detail, - evidence: readString(input, 'evidence') ?? '', - suggestion: readString(input, 'suggestion') ?? '', - } as ReviewAgentFinding; + const detail = !outcome.ok ? outcome.error : 'no valid findings in response'; + throw new Error(`Finding validation failed: ${detail}`); } export function normalizeSubmission(value: unknown): SubmittedReviewFindings { diff --git a/src/review/context/token-counter.ts b/src/review/context/token-counter.ts index 6fca58c..5efc572 100644 --- a/src/review/context/token-counter.ts +++ b/src/review/context/token-counter.ts @@ -76,7 +76,9 @@ export class TokenCounter { if (text.length <= maxChars) { return text; } - return `${text.slice(0, maxChars)}\n... [truncated, exceeded ${maxTokens} token budget]`; + const lastNewline = text.lastIndexOf('\n', maxChars); + const cutPoint = lastNewline > maxChars * 0.5 ? lastNewline : maxChars; + return `${text.slice(0, cutPoint)}\n... [truncated, exceeded ${maxTokens} token budget]`; } // -------------------------------------------------------------------------