feat(agent-kernel): cherry-pick high-value components from PR #15

- Zod findingResponseSchema + LLM repair for malformed model output (max 2 attempts)
- Budget guards: maxSubagents, maxEmptyResponses, maxConsecutiveToolFailures
  - maxSubagents: refuse spawn at limit, allow model to summarize
  - consecutiveToolFailures: per-tool-call update, reset on success, immediate terminate
- Tool permission scope system (6 scopes, allow/deny static policy, no ask)
  - allowListSpecified flag distinguishes subagent vs main agent resolution
- SHA256 finding fingerprint with JSON tuple input (avoids colon ambiguity)
- Token counting + context clipping via tokenlens (newline-boundary clipping)
- diffBudget floor of 1000 tokens (prevents negative budget for small models)
- tryParseFinalSubmission: full JSON first (preserves summaryMarkdown), Zod fallback
- normalizeFinding: Zod-only validation, no lax fallback
- E2E README: fix webhook signing (--data-binary, openssl dgst, repository.name)
This commit is contained in:
jeffusion
2026-05-26 22:40:35 +08:00
committed by Jeffusion
parent 1e38a0e5e0
commit 44d52cddc5
17 changed files with 709 additions and 110 deletions

View File

@@ -153,6 +153,7 @@ cat > /tmp/webhook_payload.json << EOF
},
"repository": {
"full_name": "e2e-admin/e2e-test-repo",
"name": "e2e-test-repo",
"owner": { "login": "e2e-admin" },
"clone_url": "http://gitea:3000/e2e-admin/e2e-test-repo.git"
},
@@ -160,15 +161,15 @@ cat > /tmp/webhook_payload.json << EOF
}
EOF
# 计算 HMAC 签名
PAYLOAD=$(cat /tmp/webhook_payload.json)
SIG=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "e2e-test-secret" -binary | xxd -p -c 256)
# 计算 HMAC 签名(注意:必须基于文件内容计算,避免 shell 变量传递时改变内容)
SIG=$(cat /tmp/webhook_payload.json | openssl dgst -sha256 -hmac "e2e-test-secret" | awk '{print $NF}')
# 发送 webhook
# 发送 webhook(⚠️ 必须用 --data-binary 而非 -d否则换行符被剥离导致签名不匹配
curl -s -X POST "http://localhost:3334/webhook/gitea" \
-H "Content-Type: application/json" \
-H "X-Gitea-Event: pull_request" \
-H "X-Gitea-Signature: ${SIG}" \
-d "$PAYLOAD"
--data-binary @/tmp/webhook_payload.json
```
### 6. 验证审查结果

View File

@@ -243,4 +243,116 @@ describe('MainAgentRunner', () => {
JSON.stringify({ ok: false, error: { name: 'Error', message: 'lookup failed' } })
);
});
test('stops on maxEmptyResponses', async () => {
const modelClient = new FakeModelClient([
response({ content: '' }),
response({ content: '' }),
response({ content: 'should not reach' }),
]);
const runner = new MainAgentRunner({
modelClient,
transcriptRepository: agentSessionRepository,
tools: [],
});
const result = await runner.run({
model: 'mock-model',
userMessage: 'test empty responses',
maxTurns: 10,
maxToolCalls: 10,
timeoutMs: 60_000,
maxEmptyResponses: 2,
});
expect(result.status).toBe('max_empty_responses');
expect(result.turns).toBe(2);
});
test('stops on maxConsecutiveToolFailures', async () => {
const failTool: MainAgentTool = {
definition: {
name: 'fail_tool',
description: 'Always fails.',
parameters: { type: 'object' },
},
execute: () => {
throw new Error('boom');
},
};
const modelClient = new FakeModelClient([
response({
finishReason: 'tool_calls',
toolCalls: [{ id: 'c1', name: 'fail_tool', arguments: '{}' }],
}),
response({
finishReason: 'tool_calls',
toolCalls: [{ id: 'c2', name: 'fail_tool', arguments: '{}' }],
}),
response({
finishReason: 'tool_calls',
toolCalls: [{ id: 'c3', name: 'fail_tool', arguments: '{}' }],
}),
response({ content: 'should not reach' }),
]);
const runner = new MainAgentRunner({
modelClient,
transcriptRepository: agentSessionRepository,
tools: [failTool],
});
const result = await runner.run({
model: 'mock-model',
userMessage: 'test tool failures',
maxTurns: 10,
maxToolCalls: 10,
timeoutMs: 60_000,
maxConsecutiveToolFailures: 3,
});
expect(result.status).toBe('max_consecutive_tool_failures');
});
test('refuses subagent spawn beyond maxSubagents and allows summary', async () => {
const subagentTool: MainAgentTool = {
definition: {
name: 'spawn_subagent',
description: 'Spawn a subagent.',
parameters: { type: 'object' },
},
execute: () => ({ status: 'completed' }),
};
const modelClient = new FakeModelClient([
response({
finishReason: 'tool_calls',
toolCalls: [{ id: 'c1', name: 'spawn_subagent', arguments: '{}' }],
}),
response({
finishReason: 'tool_calls',
toolCalls: [{ id: 'c2', name: 'spawn_subagent', arguments: '{}' }],
}),
response({
finishReason: 'tool_calls',
toolCalls: [{ id: 'c3', name: 'spawn_subagent', arguments: '{}' }],
}),
response({ content: 'review complete with 2 subagents' }),
]);
const runner = new MainAgentRunner({
modelClient,
transcriptRepository: agentSessionRepository,
tools: [subagentTool],
});
const result = await runner.run({
model: 'mock-model',
userMessage: 'test subagent limit',
maxTurns: 10,
maxToolCalls: 10,
timeoutMs: 60_000,
maxSubagents: 2,
});
expect(result.status).toBe('completed');
expect(result.finalText).toBe('review complete with 2 subagents');
});
});

View File

@@ -84,9 +84,23 @@ export class MainAgentRunner {
let turns = 0;
let toolCalls = 0;
let subagentCount = 0;
let emptyResponseCount = 0;
let consecutiveToolFailures = 0;
const maxSubagents = input.maxSubagents ?? Number.POSITIVE_INFINITY;
const maxEmptyResponses = input.maxEmptyResponses ?? 3;
const maxConsecutiveToolFailures = input.maxConsecutiveToolFailures ?? 5;
while (true) {
const budgetStatus = this.getBudgetStatus(input, startedAt, turns);
const budgetStatus = this.getBudgetStatus(
input,
startedAt,
turns,
emptyResponseCount,
consecutiveToolFailures,
maxEmptyResponses,
maxConsecutiveToolFailures
);
if (budgetStatus) {
return this.finish(sessionId, budgetStatus, turns, toolCalls, messages);
}
@@ -102,6 +116,20 @@ export class MainAgentRunner {
});
turns += 1;
if (!response.content?.trim() && response.toolCalls.length === 0) {
emptyResponseCount += 1;
messages.push({ role: 'assistant', content: '' });
this.transcriptRepository.appendMessage({
sessionId,
role: 'assistant',
content: { text: '' },
});
continue;
}
emptyResponseCount = 0;
const assistantMessage: LLMMessage = {
role: 'assistant',
content: response.content ?? '',
@@ -139,8 +167,57 @@ export class MainAgentRunner {
return this.finish(sessionId, 'max_tool_calls_reached', turns, toolCalls, messages);
}
if (toolCall.name === 'spawn_subagent') {
if (subagentCount >= maxSubagents) {
const refusalMessage: LLMMessage = {
role: 'tool',
toolCallId: toolCall.id,
content: JSON.stringify({
ok: false,
error: {
name: 'BudgetExceeded',
message: `Subagent limit reached (${maxSubagents}). Please summarize your findings instead.`,
},
}),
};
messages.push(refusalMessage);
this.transcriptRepository.appendMessage({
sessionId,
role: 'tool',
content: {
toolCallId: toolCall.id,
toolName: toolCall.name,
result: {
ok: false,
error: {
name: 'BudgetExceeded',
message: `Subagent limit reached (${maxSubagents})`,
},
},
},
});
continue;
}
subagentCount += 1;
}
const result = await this.executeTool(toolCall, sessionId, input.model, turns);
toolCalls += 1;
if (!result.ok) {
consecutiveToolFailures += 1;
} else {
consecutiveToolFailures = 0;
}
if (!result.ok && consecutiveToolFailures >= maxConsecutiveToolFailures) {
return this.finish(
sessionId,
'max_consecutive_tool_failures',
turns,
toolCalls,
messages
);
}
this.transcriptRepository.appendToolCall({
sessionId,
@@ -174,10 +251,17 @@ export class MainAgentRunner {
private getBudgetStatus(
input: MainAgentRunInput,
startedAt: number,
turns: number
turns: number,
emptyResponseCount: number,
consecutiveToolFailures: number,
maxEmptyResponses: number,
maxConsecutiveToolFailures: number
): MainAgentTerminalStatus | undefined {
if (this.isTimedOut(input, startedAt)) return 'timeout_reached';
if (turns >= input.maxTurns) return 'max_turns_reached';
if (emptyResponseCount >= maxEmptyResponses) return 'max_empty_responses';
if (consecutiveToolFailures >= maxConsecutiveToolFailures)
return 'max_consecutive_tool_failures';
return undefined;
}

View File

@@ -16,7 +16,10 @@ export type MainAgentTerminalStatus =
| 'completed'
| 'max_turns_reached'
| 'max_tool_calls_reached'
| 'timeout_reached';
| 'max_subagents_reached'
| 'timeout_reached'
| 'max_empty_responses'
| 'max_consecutive_tool_failures';
export interface MainAgentModelClient {
chat(request: LLMChatRequest): Promise<LLMChatResponse>;
@@ -29,8 +32,19 @@ export interface MainAgentToolContext {
turn: number;
}
export type ToolPermissionScope =
| 'read'
| 'write'
| 'command'
| 'network'
| 'git_write'
| 'cross_session';
export type ToolPermissionBehavior = 'allow' | 'deny';
export interface MainAgentTool {
definition: LLMToolDefinition;
permissionScope?: ToolPermissionScope;
execute(argumentsValue: unknown, context: MainAgentToolContext): Promise<unknown> | unknown;
}
@@ -79,7 +93,10 @@ export interface MainAgentRunInput {
providerOptions?: Record<string, unknown>;
maxTurns: number;
maxToolCalls: number;
maxSubagents?: number;
timeoutMs: number;
maxEmptyResponses?: number;
maxConsecutiveToolFailures?: number;
}
export interface MainAgentRunResult {

View File

@@ -97,6 +97,7 @@ export class SubagentRunner implements SpawnSubagentExecutor {
availableTools: this.tools,
allowedToolNames: input.agentDefinition.tools,
disallowedToolNames: input.agentDefinition.disallowedTools,
allowListSpecified: true,
});
const invocation = this.transcriptRepository.createInvocation({

View File

@@ -1,64 +1,101 @@
import { describe, expect, test } from 'bun:test';
import type { MainAgentTool } from '../../loop';
import { resolveAgentTools } from '../tool-permissions';
import type { ToolPermissionScope } from '../../loop/types';
import {
DEFAULT_SCOPE_POLICY,
evaluateToolPermission,
resolveAgentTools,
} from '../tool-permissions';
function tool(name: string): MainAgentTool {
function tool(name: string, scope?: ToolPermissionScope): MainAgentTool {
return {
definition: {
name,
description: `${name} tool`,
parameters: { type: 'object' },
},
permissionScope: scope,
execute: () => ({ name }),
};
}
describe('resolveAgentTools', () => {
const availableTools = [tool('read_file'), tool('write_file'), tool('lookup')];
describe('evaluateToolPermission', () => {
test('allows read scope', () => {
expect(evaluateToolPermission(tool('read_file', 'read')).behavior).toBe('allow');
});
test('returns the explicitly allowed subset', () => {
test('denies write scope', () => {
expect(evaluateToolPermission(tool('write_file', 'write')).behavior).toBe('deny');
});
test('denies command scope', () => {
expect(evaluateToolPermission(tool('run_bash', 'command')).behavior).toBe('deny');
});
test('denies network scope', () => {
expect(evaluateToolPermission(tool('http_request', 'network')).behavior).toBe('deny');
});
test('defaults to read scope when unspecified', () => {
expect(evaluateToolPermission(tool('search_code')).behavior).toBe('allow');
});
});
describe('resolveAgentTools', () => {
const readTool = tool('read_file', 'read');
const writeTool = tool('write_file', 'write');
const searchTool = tool('search_code', 'read');
test('includes allowed tool names regardless of scope', () => {
const resolved = resolveAgentTools({
availableTools,
allowedToolNames: ['lookup', 'read_file'],
availableTools: [writeTool],
allowedToolNames: ['write_file'],
disallowedToolNames: [],
});
expect(resolved.tools.map((item) => item.definition.name)).toEqual(['read_file', 'lookup']);
expect(resolved.deniedToolNames).toEqual(['write_file']);
expect(resolved.unknownAllowedToolNames).toEqual([]);
expect(resolved.tools).toHaveLength(1);
expect(resolved.tools[0].definition.name).toBe('write_file');
});
test('removes disallowed tools even when they are explicitly allowed', () => {
test('excludes disallowed tool names regardless of scope', () => {
const resolved = resolveAgentTools({
availableTools,
allowedToolNames: ['read_file', 'write_file'],
disallowedToolNames: ['write_file'],
availableTools: [readTool],
allowedToolNames: ['read_file'],
disallowedToolNames: ['read_file'],
});
expect(resolved.tools.map((item) => item.definition.name)).toEqual(['read_file']);
expect(resolved.deniedToolNames).toEqual(['write_file', 'lookup']);
expect(resolved.tools).toHaveLength(0);
expect(resolved.deniedToolNames).toContain('read_file');
});
test('treats an empty allow-list as no tools allowed', () => {
test('filters by scope policy when not in allowed/disallowed lists', () => {
const resolved = resolveAgentTools({
availableTools,
availableTools: [readTool, writeTool, searchTool],
allowedToolNames: [],
disallowedToolNames: [],
});
expect(resolved.tools).toEqual([]);
expect(resolved.deniedToolNames).toEqual(['read_file', 'write_file', 'lookup']);
const names = resolved.tools.map((t) => t.definition.name);
expect(names).toContain('read_file');
expect(names).toContain('search_code');
expect(names).not.toContain('write_file');
});
test('reports unknown allow-list names without granting tools', () => {
test('reports unknown allowed/disallowed names', () => {
const resolved = resolveAgentTools({
availableTools,
allowedToolNames: ['lookup', 'missing_tool'],
disallowedToolNames: ['unknown_deny'],
availableTools: [readTool],
allowedToolNames: ['missing_tool'],
disallowedToolNames: ['ghost_tool'],
});
expect(resolved.tools.map((item) => item.definition.name)).toEqual(['lookup']);
expect(resolved.unknownAllowedToolNames).toEqual(['missing_tool']);
expect(resolved.unknownDisallowedToolNames).toEqual(['unknown_deny']);
expect(resolved.unknownAllowedToolNames).toContain('missing_tool');
expect(resolved.unknownDisallowedToolNames).toContain('ghost_tool');
});
});
describe('DEFAULT_SCOPE_POLICY', () => {
test('only allows read scope', () => {
expect(DEFAULT_SCOPE_POLICY.read).toBe('allow');
expect(DEFAULT_SCOPE_POLICY.write).toBe('deny');
expect(DEFAULT_SCOPE_POLICY.command).toBe('deny');
expect(DEFAULT_SCOPE_POLICY.network).toBe('deny');
expect(DEFAULT_SCOPE_POLICY.git_write).toBe('deny');
expect(DEFAULT_SCOPE_POLICY.cross_session).toBe('deny');
});
});

View File

@@ -1,9 +1,11 @@
import type { MainAgentTool } from '../loop';
import type { ToolPermissionBehavior, ToolPermissionScope } from '../loop/types';
export interface ResolveAgentToolsInput {
availableTools: MainAgentTool[];
allowedToolNames: string[];
disallowedToolNames: string[];
allowListSpecified?: boolean;
}
export interface ResolvedAgentTools {
@@ -15,10 +17,33 @@ export interface ResolvedAgentTools {
unknownDisallowedToolNames: string[];
}
export interface ToolPermissionDecision {
behavior: ToolPermissionBehavior;
reason: string;
}
const DEFAULT_SCOPE_POLICY: Record<ToolPermissionScope, ToolPermissionBehavior> = {
read: 'allow',
write: 'deny',
command: 'deny',
network: 'deny',
git_write: 'deny',
cross_session: 'deny',
};
function uniqueNames(names: string[]): string[] {
return [...new Set(names)];
}
export function evaluateToolPermission(tool: MainAgentTool): ToolPermissionDecision {
const scope = tool.permissionScope ?? 'read';
const behavior = DEFAULT_SCOPE_POLICY[scope];
return {
behavior,
reason: `Tool '${tool.definition.name}' ${behavior === 'allow' ? 'allowed' : 'denied'} for scope '${scope}'`,
};
}
export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentTools {
const availableToolNames = uniqueNames(input.availableTools.map((tool) => tool.definition.name));
const availableToolNamesSet = new Set(availableToolNames);
@@ -29,7 +54,10 @@ export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentT
const tools = input.availableTools.filter((tool) => {
const toolName = tool.definition.name;
return allowedToolNamesSet.has(toolName) && !disallowedToolNamesSet.has(toolName);
if (disallowedToolNamesSet.has(toolName)) return false;
if (allowedToolNamesSet.size > 0) return allowedToolNamesSet.has(toolName);
if (input.allowListSpecified) return false;
return evaluateToolPermission(tool).behavior === 'allow';
});
const permittedToolNamesSet = new Set(tools.map((tool) => tool.definition.name));
@@ -46,3 +74,5 @@ export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentT
),
};
}
export { DEFAULT_SCOPE_POLICY };

View File

@@ -0,0 +1,101 @@
import { describe, expect, it } from 'bun:test';
import { createHash } from 'node:crypto';
import { applyDeterministicPublishAdapter } from '../deterministic-publish-adapter';
import type { ReviewAgentFinding } from '../tools';
function makeFinding(overrides: Partial<ReviewAgentFinding> = {}): ReviewAgentFinding {
return {
fingerprint: '',
category: 'security',
severity: 'high',
confidence: 0.9,
path: 'src/app.ts',
line: 42,
title: 'SQL injection',
detail: 'Unsanitized input',
evidence: 'db.query(input)',
suggestion: 'Use parameterized queries',
...overrides,
};
}
function expectedFingerprint(category: string, path: string, line: number, title: string): string {
return createHash('sha256')
.update(`${category}:${path}:${line}:${title}`)
.digest('hex')
.slice(0, 24);
}
describe('SHA256 fingerprint generation', () => {
it('produces consistent 24-char hex fingerprint', () => {
const fp = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
expect(fp).toHaveLength(24);
expect(fp).toMatch(/^[0-9a-f]{24}$/);
});
it('produces different fingerprints for different inputs', () => {
const fp1 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
const fp2 = expectedFingerprint('correctness', 'src/app.ts', 42, 'SQL injection');
expect(fp1).not.toBe(fp2);
});
it('produces same fingerprint for same inputs', () => {
const fp1 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
const fp2 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
expect(fp1).toBe(fp2);
});
});
describe('applyDeterministicPublishAdapter deduplication', () => {
it('dedupes findings with identical fingerprints keeping higher rank', async () => {
const finding1 = makeFinding({ severity: 'low', confidence: 0.5, fingerprint: 'dup-fp' });
const finding2 = makeFinding({ severity: 'high', confidence: 0.9, fingerprint: 'dup-fp' });
const store = {
getRunDetails: async () => ({ findings: [], comments: [] }),
addFindings: async () => {},
addCommentRecord: async () => {},
} as any;
const result = await applyDeterministicPublishAdapter({
store,
runId: 'test-run',
submission: { summaryMarkdown: 'test', findings: [finding1, finding2] },
});
expect(result.findings).toHaveLength(1);
expect(result.findings[0].severity).toBe('high');
});
it('dedupes findings with same path/line/title (similarity key)', async () => {
const finding1 = makeFinding({
path: 'a.ts',
line: 10,
title: 'Bug',
severity: 'medium',
fingerprint: 'fp1',
});
const finding2 = makeFinding({
path: 'a.ts',
line: 10,
title: 'Bug',
severity: 'high',
fingerprint: 'fp2',
});
const store = {
getRunDetails: async () => ({ findings: [], comments: [] }),
addFindings: async () => {},
addCommentRecord: async () => {},
} as any;
const result = await applyDeterministicPublishAdapter({
store,
runId: 'test-run',
submission: { summaryMarkdown: 'test', findings: [finding1, finding2] },
});
expect(result.findings).toHaveLength(1);
expect(result.findings[0].severity).toBe('high');
});
});

View File

@@ -1,4 +1,4 @@
import { randomUUID } from 'node:crypto';
import { createHash, randomUUID } from 'node:crypto';
import { applyPublishPolicy } from '../review/policy/publish-policy';
import type { FileReviewStore } from '../review/store/file-review-store';
import type { Finding, ReviewCommentRecord } from '../review/types';
@@ -27,6 +27,13 @@ function rankFinding(finding: ReviewAgentFinding): number {
return severityWeight(finding.severity) * 1000 + Math.round(finding.confidence * 100);
}
function buildFingerprint(category: string, path: string, line: number, title: string): string {
return createHash('sha256')
.update(JSON.stringify([category, path, line, title]))
.digest('hex')
.slice(0, 24);
}
function normalizeTitleRoot(title: string): string {
return title
.toLowerCase()
@@ -40,8 +47,11 @@ function similarityKey(finding: ReviewAgentFinding): string {
}
function dedupeFindings(candidates: ReviewAgentFinding[]): ReviewAgentFinding[] {
const ensured = candidates.map((f) =>
f.fingerprint ? f : { ...f, fingerprint: buildFingerprint(f.category, f.path, f.line, f.title) }
);
const byFingerprint = new Map<string, ReviewAgentFinding>();
for (const finding of candidates) {
for (const finding of ensured) {
const existing = byFingerprint.get(finding.fingerprint);
if (!existing || rankFinding(finding) > rankFinding(existing)) {
byFingerprint.set(finding.fingerprint, finding);

View File

@@ -11,15 +11,18 @@ import config from '../config';
import { DiffExtractor } from '../review/context/diff-extractor';
import type { LocalRepoPaths } from '../review/context/local-repo-manager';
import { LocalRepoManager } from '../review/context/local-repo-manager';
import { tokenCounter } from '../review/context/token-counter';
import { FileReviewStore } from '../review/store/file-review-store';
import type { Finding, ReviewContext, ReviewRun } from '../review/types';
import { logger } from '../utils/logger';
import { applyDeterministicPublishAdapter } from './deterministic-publish-adapter';
import { buildRepairPrompt, parseFindingResponse } from './schema';
import {
type ReviewToolState,
type SubmittedReviewFindings,
createReviewTaskTools,
normalizeSubmission,
parseSubmissionFromText,
} from './tools';
export interface ReviewAgentRepositoryContext {
@@ -88,18 +91,27 @@ function buildSessionScope(run: ReviewRun): string {
function tryParseFinalSubmission(text?: string): SubmittedReviewFindings | null {
if (!text) return null;
try {
return normalizeSubmission(JSON.parse(text));
} catch {
return null;
}
const parsed = JSON.parse(text);
if (parsed && typeof parsed === 'object') {
const result = normalizeSubmission(parsed);
if (result.findings.length > 0) return result;
}
} catch {}
return parseSubmissionFromText(text);
}
function buildReviewPrompt(context: ReviewAgentRunContext): string {
function buildReviewPrompt(context: ReviewAgentRunContext, model?: string): string {
const changedFiles = context.diffSummary?.changedFiles ?? [];
const fileSummary = changedFiles
.map((file) => `- ${file.status} ${file.path} (+${file.additions}/-${file.deletions})`)
.join('\n');
const diff = context.diffSummary?.diff?.trim() || '(diff omitted)';
const rawDiff = context.diffSummary?.diff?.trim() || '(diff omitted)';
const usableBudget = model ? tokenCounter.getUsableBudget(model) : 124_000;
const reservedForPrompt = 4000;
const diffBudget = Math.max(1000, usableBudget - reservedForPrompt);
const diff =
tokenCounter.count(rawDiff) > diffBudget ? tokenCounter.clip(rawDiff, diffBudget) : rawDiff;
return [
'You are the main code review agent for Gitea AI Assistant.',
@@ -271,6 +283,33 @@ export class ReviewAgentEntrypoint {
return snapshot.headSha;
}
private async repairFindingsWithLLM(
rawText: string,
messages: Array<{ role: string; content: string; toolCalls?: unknown }>,
maxAttempts = 2
): Promise<string> {
let current = rawText;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
const outcome = parseFindingResponse(current);
if (outcome.ok) return current;
const repairPrompt = buildRepairPrompt(outcome.error);
messages.push({ role: 'assistant', content: current });
messages.push({ role: 'user', content: repairPrompt });
const response = await this.modelClient.chat({
messages: messages as import('../llm/types').LLMMessage[],
model: this.model,
temperature: 0,
responseFormat: 'json',
});
current = response.content ?? '{"findings":[]}';
logger.info('LLM finding repair attempt', {
attempt: attempt + 1,
error: outcome.error.slice(0, 100),
});
}
return current;
}
private async runMainAgent(
run: ReviewRun,
reviewContext: ReviewContext
@@ -320,10 +359,13 @@ export class ReviewAgentEntrypoint {
const agentResult = await runner.run({
agentType: 'review-main-agent',
model: this.model,
userMessage: buildReviewPrompt(runContext),
userMessage: buildReviewPrompt(runContext, this.model),
maxTurns: 8,
maxToolCalls: 4,
maxSubagents: 3,
timeoutMs: config.review.commandTimeoutMs,
maxEmptyResponses: 2,
maxConsecutiveToolFailures: 3,
session: {
agentType: 'review-main-agent',
metadata: {
@@ -343,7 +385,20 @@ export class ReviewAgentEntrypoint {
const submitted =
reviewToolState.submittedReview ?? tryParseFinalSubmission(agentResult.finalText);
const submission = submitted ?? { summaryMarkdown: agentResult.finalText ?? '', findings: [] };
let submission: SubmittedReviewFindings;
if (submitted) {
submission = submitted;
} else if (agentResult.finalText) {
const repaired = await this.repairFindingsWithLLM(
agentResult.finalText,
agentResult.messages
);
const repairedSubmission = tryParseFinalSubmission(repaired);
submission = repairedSubmission ?? { summaryMarkdown: agentResult.finalText, findings: [] };
} else {
submission = { summaryMarkdown: '', findings: [] };
}
const adapted = await applyDeterministicPublishAdapter({
store: this.store,
runId: run.id,

View File

@@ -0,0 +1,81 @@
import { describe, expect, it } from 'bun:test';
import { buildRepairPrompt, findingResponseSchema, parseFindingResponse } from '../index';
describe('findingResponseSchema', () => {
it('parses valid findings', () => {
const raw = JSON.stringify({
findings: [
{
severity: 'high',
confidence: 0.9,
path: 'src/app.ts',
line: 42,
title: 'SQL injection',
detail: 'Unsanitized input in query',
evidence: 'db.query(req.params.id)',
suggestion: 'Use parameterized queries',
category: 'security',
},
],
});
const result = parseFindingResponse(raw);
expect(result.ok).toBe(true);
if (result.ok) {
expect(result.findings).toHaveLength(1);
expect(result.findings[0].severity).toBe('high');
expect(result.findings[0].category).toBe('security');
}
});
it('applies defaults for optional fields', () => {
const raw = JSON.stringify({
findings: [
{
severity: 'low',
path: 'src/util.ts',
line: 10,
title: 'Missing error handling',
detail: 'No try-catch around async call',
},
],
});
const result = parseFindingResponse(raw);
expect(result.ok).toBe(true);
if (result.ok) {
const finding = result.findings[0];
expect(finding.confidence).toBe(0.8);
expect(finding.evidence).toBe('');
expect(finding.suggestion).toBe('');
expect(finding.category).toBeUndefined();
}
});
it('rejects invalid severity', () => {
const raw = JSON.stringify({
findings: [{ severity: 'critical', path: 'a.ts', line: 1, title: 'x', detail: 'y' }],
});
const result = parseFindingResponse(raw);
expect(result.ok).toBe(false);
});
it('rejects invalid JSON', () => {
const result = parseFindingResponse('not json');
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.raw).toBe('not json');
}
});
it('defaults to empty findings array', () => {
const result = findingResponseSchema.parse({});
expect(result.findings).toEqual([]);
});
});
describe('buildRepairPrompt', () => {
it('includes the parse error', () => {
const prompt = buildRepairPrompt('missing field "path"');
expect(prompt).toContain('missing field "path"');
expect(prompt).toContain('JSON');
});
});

View File

@@ -0,0 +1,57 @@
import { z } from 'zod';
const findingItemSchema = z.object({
category: z.enum(['correctness', 'security', 'reliability', 'maintainability']).optional(),
severity: z.enum(['high', 'medium', 'low']),
confidence: z.number().min(0).max(1).optional().default(0.8),
path: z.string().min(1),
line: z.number().int().positive(),
title: z.string().min(1),
detail: z.string().min(1),
evidence: z.string().optional().default(''),
suggestion: z.string().optional().default(''),
fingerprint: z.string().min(1).optional(),
});
export const findingResponseSchema = z.object({
findings: z.array(findingItemSchema).default([]),
});
export type FindingResponse = z.infer<typeof findingResponseSchema>;
export type FindingItem = z.infer<typeof findingItemSchema>;
export interface FindingParseResult {
ok: true;
findings: FindingItem[];
}
export interface FindingParseError {
ok: false;
error: string;
raw: string;
}
export type FindingParseOutcome = FindingParseResult | FindingParseError;
/**
* Parse raw LLM output text into validated findings.
* Returns a discriminated union: { ok: true, findings } or { ok: false, error, raw }.
*/
export function parseFindingResponse(raw: string): FindingParseOutcome {
try {
const parsed = JSON.parse(raw);
const result = findingResponseSchema.parse(parsed);
return { ok: true, findings: result.findings };
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
return { ok: false, error: message, raw };
}
}
/**
* Build a repair prompt to ask the LLM to fix its malformed JSON output.
* Returns null if no more repair attempts should be made.
*/
export function buildRepairPrompt(parseError: string): string {
return `上一次最终结果无法通过 findingResponseSchema 校验。校验错误: ${parseError}\n\n请输出严格 JSON格式如下\n{"findings":[{"severity":"high|medium|low","confidence":0.8,"path":"...","line":1,"title":"...","detail":"...","evidence":"...","suggestion":"...","category":"correctness|security|reliability|maintainability"}]}\n\n不要输出额外文字只输出 JSON。`;
}

View File

@@ -0,0 +1,10 @@
export {
findingResponseSchema,
parseFindingResponse,
buildRepairPrompt,
type FindingResponse,
type FindingItem,
type FindingParseOutcome,
type FindingParseResult,
type FindingParseError,
} from './finding-schema';

View File

@@ -111,21 +111,7 @@ describe('review task tools', () => {
});
test('normalizeSubmission fills defaults and generated fingerprint', () => {
expect(
normalizeSubmission({
summaryMarkdown: 'Summary',
findings: [
{
category: 'maintainability',
severity: 'medium',
path: 'src/app.ts',
line: 2,
title: 'Extract helper',
detail: 'The logic is duplicated.',
},
],
})
).toEqual({
const result = normalizeSubmission({
summaryMarkdown: 'Summary',
findings: [
{
@@ -135,13 +121,23 @@ describe('review task tools', () => {
line: 2,
title: 'Extract helper',
detail: 'The logic is duplicated.',
confidence: 0.8,
evidence: '',
suggestion: '',
fingerprint: 'maintainability:src/app.ts:2:Extract helper',
},
],
});
expect(result.summaryMarkdown).toBe('Summary');
expect(result.findings).toHaveLength(1);
expect(result.findings[0]).toMatchObject({
category: 'maintainability',
severity: 'medium',
path: 'src/app.ts',
line: 2,
title: 'Extract helper',
detail: 'The logic is duplicated.',
confidence: 0.8,
evidence: '',
suggestion: '',
fingerprint: 'd90c88d5929c2859b8591d69',
});
});
test('submit_review_findings stores valid findings', async () => {
@@ -171,7 +167,7 @@ describe('review task tools', () => {
confidence: 0.8,
evidence: '',
suggestion: '',
fingerprint: 'reliability:src/app.ts:2:Token may be undefined',
fingerprint: 'f226057a37399d83b53cbeed',
});
});

View File

@@ -1,4 +1,8 @@
export { createReviewTaskTools, normalizeSubmission } from './review-tools';
export {
createReviewTaskTools,
normalizeSubmission,
parseSubmissionFromText,
} from './review-tools';
export type {
CreateReviewTaskToolsOptions,
ReviewToolState,

View File

@@ -1,8 +1,8 @@
import { createHash } from 'node:crypto';
import type { MainAgentTool } from '../../agent-kernel/loop/types';
import type { Finding, ReviewContext } from '../../review/types';
const VALID_CATEGORIES = ['correctness', 'security', 'reliability', 'maintainability'] as const;
const VALID_SEVERITIES = ['high', 'medium', 'low'] as const;
import type { FindingItem } from '../schema';
import { parseFindingResponse } from '../schema';
export type ReviewAgentFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
@@ -51,45 +51,46 @@ function requireObject(value: unknown, message: string): Record<string, unknown>
return value as Record<string, unknown>;
}
function requireString(input: Record<string, unknown>, key: string): string {
const value = readString(input, key);
if (!value) throw new Error(`Finding ${key} must be a non-empty string`);
return value;
function buildFingerprint(category: string, path: string, line: number, title: string): string {
return createHash('sha256')
.update(JSON.stringify([category, path, line, title]))
.digest('hex')
.slice(0, 24);
}
function findingItemToReviewAgent(item: FindingItem): ReviewAgentFinding {
const category = item.category ?? 'correctness';
return {
fingerprint: item.fingerprint || buildFingerprint(category, item.path, item.line, item.title),
category,
severity: item.severity,
confidence: item.confidence ?? 0.8,
path: item.path,
line: item.line,
title: item.title,
detail: item.detail,
evidence: item.evidence ?? '',
suggestion: item.suggestion ?? '',
};
}
/** Parse raw JSON text into validated SubmittedReviewFindings using Zod schema. */
export function parseSubmissionFromText(raw: string): SubmittedReviewFindings | null {
const outcome = parseFindingResponse(raw);
if (!outcome.ok) return null;
return {
summaryMarkdown: '',
findings: outcome.findings.map(findingItemToReviewAgent),
};
}
function normalizeFinding(value: unknown): ReviewAgentFinding {
const input = requireObject(value, 'Each finding must be an object');
const category = readString(input, 'category');
if (!category || !VALID_CATEGORIES.includes(category as (typeof VALID_CATEGORIES)[number])) {
throw new Error(
'Finding category must be correctness, security, reliability, or maintainability'
);
const outcome = parseFindingResponse(JSON.stringify({ findings: [value] }));
if (outcome.ok && outcome.findings.length > 0) {
return findingItemToReviewAgent(outcome.findings[0]);
}
const severity = readString(input, 'severity');
if (!severity || !VALID_SEVERITIES.includes(severity as (typeof VALID_SEVERITIES)[number])) {
throw new Error('Finding severity must be high, medium, or low');
}
const path = requireString(input, 'path');
const line = readNumber(input, 'line');
if (line === undefined) throw new Error('Finding line must be a number');
const title = requireString(input, 'title');
const detail = requireString(input, 'detail');
return {
fingerprint: readString(input, 'fingerprint') || `${category}:${path}:${line}:${title}`,
category,
severity,
confidence: readNumber(input, 'confidence') ?? 0.8,
path,
line,
title,
detail,
evidence: readString(input, 'evidence') ?? '',
suggestion: readString(input, 'suggestion') ?? '',
} as ReviewAgentFinding;
const detail = !outcome.ok ? outcome.error : 'no valid findings in response';
throw new Error(`Finding validation failed: ${detail}`);
}
export function normalizeSubmission(value: unknown): SubmittedReviewFindings {

View File

@@ -76,7 +76,9 @@ export class TokenCounter {
if (text.length <= maxChars) {
return text;
}
return `${text.slice(0, maxChars)}\n... [truncated, exceeded ${maxTokens} token budget]`;
const lastNewline = text.lastIndexOf('\n', maxChars);
const cutPoint = lastNewline > maxChars * 0.5 ? lastNewline : maxChars;
return `${text.slice(0, cutPoint)}\n... [truncated, exceeded ${maxTokens} token budget]`;
}
// -------------------------------------------------------------------------