mirror of
https://github.com/jeffusion/gitea-ai-assistant.git
synced 2026-06-08 07:26:52 +00:00
feat(agent-kernel): cherry-pick high-value components from PR #15
- Zod findingResponseSchema + LLM repair for malformed model output (max 2 attempts) - Budget guards: maxSubagents, maxEmptyResponses, maxConsecutiveToolFailures - maxSubagents: refuse spawn at limit, allow model to summarize - consecutiveToolFailures: per-tool-call update, reset on success, immediate terminate - Tool permission scope system (6 scopes, allow/deny static policy, no ask) - allowListSpecified flag distinguishes subagent vs main agent resolution - SHA256 finding fingerprint with JSON tuple input (avoids colon ambiguity) - Token counting + context clipping via tokenlens (newline-boundary clipping) - diffBudget floor of 1000 tokens (prevents negative budget for small models) - tryParseFinalSubmission: full JSON first (preserves summaryMarkdown), Zod fallback - normalizeFinding: Zod-only validation, no lax fallback - E2E README: fix webhook signing (--data-binary, openssl dgst, repository.name)
This commit is contained in:
@@ -153,6 +153,7 @@ cat > /tmp/webhook_payload.json << EOF
|
|||||||
},
|
},
|
||||||
"repository": {
|
"repository": {
|
||||||
"full_name": "e2e-admin/e2e-test-repo",
|
"full_name": "e2e-admin/e2e-test-repo",
|
||||||
|
"name": "e2e-test-repo",
|
||||||
"owner": { "login": "e2e-admin" },
|
"owner": { "login": "e2e-admin" },
|
||||||
"clone_url": "http://gitea:3000/e2e-admin/e2e-test-repo.git"
|
"clone_url": "http://gitea:3000/e2e-admin/e2e-test-repo.git"
|
||||||
},
|
},
|
||||||
@@ -160,15 +161,15 @@ cat > /tmp/webhook_payload.json << EOF
|
|||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# 计算 HMAC 签名
|
# 计算 HMAC 签名(注意:必须基于文件内容计算,避免 shell 变量传递时改变内容)
|
||||||
PAYLOAD=$(cat /tmp/webhook_payload.json)
|
SIG=$(cat /tmp/webhook_payload.json | openssl dgst -sha256 -hmac "e2e-test-secret" | awk '{print $NF}')
|
||||||
SIG=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "e2e-test-secret" -binary | xxd -p -c 256)
|
|
||||||
|
|
||||||
# 发送 webhook
|
# 发送 webhook(⚠️ 必须用 --data-binary 而非 -d,否则换行符被剥离导致签名不匹配)
|
||||||
curl -s -X POST "http://localhost:3334/webhook/gitea" \
|
curl -s -X POST "http://localhost:3334/webhook/gitea" \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
|
-H "X-Gitea-Event: pull_request" \
|
||||||
-H "X-Gitea-Signature: ${SIG}" \
|
-H "X-Gitea-Signature: ${SIG}" \
|
||||||
-d "$PAYLOAD"
|
--data-binary @/tmp/webhook_payload.json
|
||||||
```
|
```
|
||||||
|
|
||||||
### 6. 验证审查结果
|
### 6. 验证审查结果
|
||||||
|
|||||||
@@ -243,4 +243,116 @@ describe('MainAgentRunner', () => {
|
|||||||
JSON.stringify({ ok: false, error: { name: 'Error', message: 'lookup failed' } })
|
JSON.stringify({ ok: false, error: { name: 'Error', message: 'lookup failed' } })
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('stops on maxEmptyResponses', async () => {
|
||||||
|
const modelClient = new FakeModelClient([
|
||||||
|
response({ content: '' }),
|
||||||
|
response({ content: '' }),
|
||||||
|
response({ content: 'should not reach' }),
|
||||||
|
]);
|
||||||
|
const runner = new MainAgentRunner({
|
||||||
|
modelClient,
|
||||||
|
transcriptRepository: agentSessionRepository,
|
||||||
|
tools: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await runner.run({
|
||||||
|
model: 'mock-model',
|
||||||
|
userMessage: 'test empty responses',
|
||||||
|
maxTurns: 10,
|
||||||
|
maxToolCalls: 10,
|
||||||
|
timeoutMs: 60_000,
|
||||||
|
maxEmptyResponses: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.status).toBe('max_empty_responses');
|
||||||
|
expect(result.turns).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('stops on maxConsecutiveToolFailures', async () => {
|
||||||
|
const failTool: MainAgentTool = {
|
||||||
|
definition: {
|
||||||
|
name: 'fail_tool',
|
||||||
|
description: 'Always fails.',
|
||||||
|
parameters: { type: 'object' },
|
||||||
|
},
|
||||||
|
execute: () => {
|
||||||
|
throw new Error('boom');
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const modelClient = new FakeModelClient([
|
||||||
|
response({
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
toolCalls: [{ id: 'c1', name: 'fail_tool', arguments: '{}' }],
|
||||||
|
}),
|
||||||
|
response({
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
toolCalls: [{ id: 'c2', name: 'fail_tool', arguments: '{}' }],
|
||||||
|
}),
|
||||||
|
response({
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
toolCalls: [{ id: 'c3', name: 'fail_tool', arguments: '{}' }],
|
||||||
|
}),
|
||||||
|
response({ content: 'should not reach' }),
|
||||||
|
]);
|
||||||
|
const runner = new MainAgentRunner({
|
||||||
|
modelClient,
|
||||||
|
transcriptRepository: agentSessionRepository,
|
||||||
|
tools: [failTool],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await runner.run({
|
||||||
|
model: 'mock-model',
|
||||||
|
userMessage: 'test tool failures',
|
||||||
|
maxTurns: 10,
|
||||||
|
maxToolCalls: 10,
|
||||||
|
timeoutMs: 60_000,
|
||||||
|
maxConsecutiveToolFailures: 3,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.status).toBe('max_consecutive_tool_failures');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('refuses subagent spawn beyond maxSubagents and allows summary', async () => {
|
||||||
|
const subagentTool: MainAgentTool = {
|
||||||
|
definition: {
|
||||||
|
name: 'spawn_subagent',
|
||||||
|
description: 'Spawn a subagent.',
|
||||||
|
parameters: { type: 'object' },
|
||||||
|
},
|
||||||
|
execute: () => ({ status: 'completed' }),
|
||||||
|
};
|
||||||
|
const modelClient = new FakeModelClient([
|
||||||
|
response({
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
toolCalls: [{ id: 'c1', name: 'spawn_subagent', arguments: '{}' }],
|
||||||
|
}),
|
||||||
|
response({
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
toolCalls: [{ id: 'c2', name: 'spawn_subagent', arguments: '{}' }],
|
||||||
|
}),
|
||||||
|
response({
|
||||||
|
finishReason: 'tool_calls',
|
||||||
|
toolCalls: [{ id: 'c3', name: 'spawn_subagent', arguments: '{}' }],
|
||||||
|
}),
|
||||||
|
response({ content: 'review complete with 2 subagents' }),
|
||||||
|
]);
|
||||||
|
const runner = new MainAgentRunner({
|
||||||
|
modelClient,
|
||||||
|
transcriptRepository: agentSessionRepository,
|
||||||
|
tools: [subagentTool],
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await runner.run({
|
||||||
|
model: 'mock-model',
|
||||||
|
userMessage: 'test subagent limit',
|
||||||
|
maxTurns: 10,
|
||||||
|
maxToolCalls: 10,
|
||||||
|
timeoutMs: 60_000,
|
||||||
|
maxSubagents: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.status).toBe('completed');
|
||||||
|
expect(result.finalText).toBe('review complete with 2 subagents');
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -84,9 +84,23 @@ export class MainAgentRunner {
|
|||||||
|
|
||||||
let turns = 0;
|
let turns = 0;
|
||||||
let toolCalls = 0;
|
let toolCalls = 0;
|
||||||
|
let subagentCount = 0;
|
||||||
|
let emptyResponseCount = 0;
|
||||||
|
let consecutiveToolFailures = 0;
|
||||||
|
const maxSubagents = input.maxSubagents ?? Number.POSITIVE_INFINITY;
|
||||||
|
const maxEmptyResponses = input.maxEmptyResponses ?? 3;
|
||||||
|
const maxConsecutiveToolFailures = input.maxConsecutiveToolFailures ?? 5;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const budgetStatus = this.getBudgetStatus(input, startedAt, turns);
|
const budgetStatus = this.getBudgetStatus(
|
||||||
|
input,
|
||||||
|
startedAt,
|
||||||
|
turns,
|
||||||
|
emptyResponseCount,
|
||||||
|
consecutiveToolFailures,
|
||||||
|
maxEmptyResponses,
|
||||||
|
maxConsecutiveToolFailures
|
||||||
|
);
|
||||||
if (budgetStatus) {
|
if (budgetStatus) {
|
||||||
return this.finish(sessionId, budgetStatus, turns, toolCalls, messages);
|
return this.finish(sessionId, budgetStatus, turns, toolCalls, messages);
|
||||||
}
|
}
|
||||||
@@ -102,6 +116,20 @@ export class MainAgentRunner {
|
|||||||
});
|
});
|
||||||
|
|
||||||
turns += 1;
|
turns += 1;
|
||||||
|
|
||||||
|
if (!response.content?.trim() && response.toolCalls.length === 0) {
|
||||||
|
emptyResponseCount += 1;
|
||||||
|
messages.push({ role: 'assistant', content: '' });
|
||||||
|
this.transcriptRepository.appendMessage({
|
||||||
|
sessionId,
|
||||||
|
role: 'assistant',
|
||||||
|
content: { text: '' },
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
emptyResponseCount = 0;
|
||||||
|
|
||||||
const assistantMessage: LLMMessage = {
|
const assistantMessage: LLMMessage = {
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: response.content ?? '',
|
content: response.content ?? '',
|
||||||
@@ -139,8 +167,57 @@ export class MainAgentRunner {
|
|||||||
return this.finish(sessionId, 'max_tool_calls_reached', turns, toolCalls, messages);
|
return this.finish(sessionId, 'max_tool_calls_reached', turns, toolCalls, messages);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (toolCall.name === 'spawn_subagent') {
|
||||||
|
if (subagentCount >= maxSubagents) {
|
||||||
|
const refusalMessage: LLMMessage = {
|
||||||
|
role: 'tool',
|
||||||
|
toolCallId: toolCall.id,
|
||||||
|
content: JSON.stringify({
|
||||||
|
ok: false,
|
||||||
|
error: {
|
||||||
|
name: 'BudgetExceeded',
|
||||||
|
message: `Subagent limit reached (${maxSubagents}). Please summarize your findings instead.`,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
messages.push(refusalMessage);
|
||||||
|
this.transcriptRepository.appendMessage({
|
||||||
|
sessionId,
|
||||||
|
role: 'tool',
|
||||||
|
content: {
|
||||||
|
toolCallId: toolCall.id,
|
||||||
|
toolName: toolCall.name,
|
||||||
|
result: {
|
||||||
|
ok: false,
|
||||||
|
error: {
|
||||||
|
name: 'BudgetExceeded',
|
||||||
|
message: `Subagent limit reached (${maxSubagents})`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
subagentCount += 1;
|
||||||
|
}
|
||||||
|
|
||||||
const result = await this.executeTool(toolCall, sessionId, input.model, turns);
|
const result = await this.executeTool(toolCall, sessionId, input.model, turns);
|
||||||
toolCalls += 1;
|
toolCalls += 1;
|
||||||
|
if (!result.ok) {
|
||||||
|
consecutiveToolFailures += 1;
|
||||||
|
} else {
|
||||||
|
consecutiveToolFailures = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!result.ok && consecutiveToolFailures >= maxConsecutiveToolFailures) {
|
||||||
|
return this.finish(
|
||||||
|
sessionId,
|
||||||
|
'max_consecutive_tool_failures',
|
||||||
|
turns,
|
||||||
|
toolCalls,
|
||||||
|
messages
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
this.transcriptRepository.appendToolCall({
|
this.transcriptRepository.appendToolCall({
|
||||||
sessionId,
|
sessionId,
|
||||||
@@ -174,10 +251,17 @@ export class MainAgentRunner {
|
|||||||
private getBudgetStatus(
|
private getBudgetStatus(
|
||||||
input: MainAgentRunInput,
|
input: MainAgentRunInput,
|
||||||
startedAt: number,
|
startedAt: number,
|
||||||
turns: number
|
turns: number,
|
||||||
|
emptyResponseCount: number,
|
||||||
|
consecutiveToolFailures: number,
|
||||||
|
maxEmptyResponses: number,
|
||||||
|
maxConsecutiveToolFailures: number
|
||||||
): MainAgentTerminalStatus | undefined {
|
): MainAgentTerminalStatus | undefined {
|
||||||
if (this.isTimedOut(input, startedAt)) return 'timeout_reached';
|
if (this.isTimedOut(input, startedAt)) return 'timeout_reached';
|
||||||
if (turns >= input.maxTurns) return 'max_turns_reached';
|
if (turns >= input.maxTurns) return 'max_turns_reached';
|
||||||
|
if (emptyResponseCount >= maxEmptyResponses) return 'max_empty_responses';
|
||||||
|
if (consecutiveToolFailures >= maxConsecutiveToolFailures)
|
||||||
|
return 'max_consecutive_tool_failures';
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,10 @@ export type MainAgentTerminalStatus =
|
|||||||
| 'completed'
|
| 'completed'
|
||||||
| 'max_turns_reached'
|
| 'max_turns_reached'
|
||||||
| 'max_tool_calls_reached'
|
| 'max_tool_calls_reached'
|
||||||
| 'timeout_reached';
|
| 'max_subagents_reached'
|
||||||
|
| 'timeout_reached'
|
||||||
|
| 'max_empty_responses'
|
||||||
|
| 'max_consecutive_tool_failures';
|
||||||
|
|
||||||
export interface MainAgentModelClient {
|
export interface MainAgentModelClient {
|
||||||
chat(request: LLMChatRequest): Promise<LLMChatResponse>;
|
chat(request: LLMChatRequest): Promise<LLMChatResponse>;
|
||||||
@@ -29,8 +32,19 @@ export interface MainAgentToolContext {
|
|||||||
turn: number;
|
turn: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type ToolPermissionScope =
|
||||||
|
| 'read'
|
||||||
|
| 'write'
|
||||||
|
| 'command'
|
||||||
|
| 'network'
|
||||||
|
| 'git_write'
|
||||||
|
| 'cross_session';
|
||||||
|
|
||||||
|
export type ToolPermissionBehavior = 'allow' | 'deny';
|
||||||
|
|
||||||
export interface MainAgentTool {
|
export interface MainAgentTool {
|
||||||
definition: LLMToolDefinition;
|
definition: LLMToolDefinition;
|
||||||
|
permissionScope?: ToolPermissionScope;
|
||||||
execute(argumentsValue: unknown, context: MainAgentToolContext): Promise<unknown> | unknown;
|
execute(argumentsValue: unknown, context: MainAgentToolContext): Promise<unknown> | unknown;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,7 +93,10 @@ export interface MainAgentRunInput {
|
|||||||
providerOptions?: Record<string, unknown>;
|
providerOptions?: Record<string, unknown>;
|
||||||
maxTurns: number;
|
maxTurns: number;
|
||||||
maxToolCalls: number;
|
maxToolCalls: number;
|
||||||
|
maxSubagents?: number;
|
||||||
timeoutMs: number;
|
timeoutMs: number;
|
||||||
|
maxEmptyResponses?: number;
|
||||||
|
maxConsecutiveToolFailures?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface MainAgentRunResult {
|
export interface MainAgentRunResult {
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ export class SubagentRunner implements SpawnSubagentExecutor {
|
|||||||
availableTools: this.tools,
|
availableTools: this.tools,
|
||||||
allowedToolNames: input.agentDefinition.tools,
|
allowedToolNames: input.agentDefinition.tools,
|
||||||
disallowedToolNames: input.agentDefinition.disallowedTools,
|
disallowedToolNames: input.agentDefinition.disallowedTools,
|
||||||
|
allowListSpecified: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
const invocation = this.transcriptRepository.createInvocation({
|
const invocation = this.transcriptRepository.createInvocation({
|
||||||
|
|||||||
@@ -1,64 +1,101 @@
|
|||||||
import { describe, expect, test } from 'bun:test';
|
import { describe, expect, test } from 'bun:test';
|
||||||
import type { MainAgentTool } from '../../loop';
|
import type { MainAgentTool } from '../../loop';
|
||||||
import { resolveAgentTools } from '../tool-permissions';
|
import type { ToolPermissionScope } from '../../loop/types';
|
||||||
|
import {
|
||||||
|
DEFAULT_SCOPE_POLICY,
|
||||||
|
evaluateToolPermission,
|
||||||
|
resolveAgentTools,
|
||||||
|
} from '../tool-permissions';
|
||||||
|
|
||||||
function tool(name: string): MainAgentTool {
|
function tool(name: string, scope?: ToolPermissionScope): MainAgentTool {
|
||||||
return {
|
return {
|
||||||
definition: {
|
definition: {
|
||||||
name,
|
name,
|
||||||
description: `${name} tool`,
|
description: `${name} tool`,
|
||||||
parameters: { type: 'object' },
|
parameters: { type: 'object' },
|
||||||
},
|
},
|
||||||
|
permissionScope: scope,
|
||||||
execute: () => ({ name }),
|
execute: () => ({ name }),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
describe('resolveAgentTools', () => {
|
describe('evaluateToolPermission', () => {
|
||||||
const availableTools = [tool('read_file'), tool('write_file'), tool('lookup')];
|
test('allows read scope', () => {
|
||||||
|
expect(evaluateToolPermission(tool('read_file', 'read')).behavior).toBe('allow');
|
||||||
|
});
|
||||||
|
|
||||||
test('returns the explicitly allowed subset', () => {
|
test('denies write scope', () => {
|
||||||
|
expect(evaluateToolPermission(tool('write_file', 'write')).behavior).toBe('deny');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('denies command scope', () => {
|
||||||
|
expect(evaluateToolPermission(tool('run_bash', 'command')).behavior).toBe('deny');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('denies network scope', () => {
|
||||||
|
expect(evaluateToolPermission(tool('http_request', 'network')).behavior).toBe('deny');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('defaults to read scope when unspecified', () => {
|
||||||
|
expect(evaluateToolPermission(tool('search_code')).behavior).toBe('allow');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('resolveAgentTools', () => {
|
||||||
|
const readTool = tool('read_file', 'read');
|
||||||
|
const writeTool = tool('write_file', 'write');
|
||||||
|
const searchTool = tool('search_code', 'read');
|
||||||
|
|
||||||
|
test('includes allowed tool names regardless of scope', () => {
|
||||||
const resolved = resolveAgentTools({
|
const resolved = resolveAgentTools({
|
||||||
availableTools,
|
availableTools: [writeTool],
|
||||||
allowedToolNames: ['lookup', 'read_file'],
|
allowedToolNames: ['write_file'],
|
||||||
disallowedToolNames: [],
|
disallowedToolNames: [],
|
||||||
});
|
});
|
||||||
|
expect(resolved.tools).toHaveLength(1);
|
||||||
expect(resolved.tools.map((item) => item.definition.name)).toEqual(['read_file', 'lookup']);
|
expect(resolved.tools[0].definition.name).toBe('write_file');
|
||||||
expect(resolved.deniedToolNames).toEqual(['write_file']);
|
|
||||||
expect(resolved.unknownAllowedToolNames).toEqual([]);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test('removes disallowed tools even when they are explicitly allowed', () => {
|
test('excludes disallowed tool names regardless of scope', () => {
|
||||||
const resolved = resolveAgentTools({
|
const resolved = resolveAgentTools({
|
||||||
availableTools,
|
availableTools: [readTool],
|
||||||
allowedToolNames: ['read_file', 'write_file'],
|
allowedToolNames: ['read_file'],
|
||||||
disallowedToolNames: ['write_file'],
|
disallowedToolNames: ['read_file'],
|
||||||
});
|
});
|
||||||
|
expect(resolved.tools).toHaveLength(0);
|
||||||
expect(resolved.tools.map((item) => item.definition.name)).toEqual(['read_file']);
|
expect(resolved.deniedToolNames).toContain('read_file');
|
||||||
expect(resolved.deniedToolNames).toEqual(['write_file', 'lookup']);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test('treats an empty allow-list as no tools allowed', () => {
|
test('filters by scope policy when not in allowed/disallowed lists', () => {
|
||||||
const resolved = resolveAgentTools({
|
const resolved = resolveAgentTools({
|
||||||
availableTools,
|
availableTools: [readTool, writeTool, searchTool],
|
||||||
allowedToolNames: [],
|
allowedToolNames: [],
|
||||||
disallowedToolNames: [],
|
disallowedToolNames: [],
|
||||||
});
|
});
|
||||||
|
const names = resolved.tools.map((t) => t.definition.name);
|
||||||
expect(resolved.tools).toEqual([]);
|
expect(names).toContain('read_file');
|
||||||
expect(resolved.deniedToolNames).toEqual(['read_file', 'write_file', 'lookup']);
|
expect(names).toContain('search_code');
|
||||||
|
expect(names).not.toContain('write_file');
|
||||||
});
|
});
|
||||||
|
|
||||||
test('reports unknown allow-list names without granting tools', () => {
|
test('reports unknown allowed/disallowed names', () => {
|
||||||
const resolved = resolveAgentTools({
|
const resolved = resolveAgentTools({
|
||||||
availableTools,
|
availableTools: [readTool],
|
||||||
allowedToolNames: ['lookup', 'missing_tool'],
|
allowedToolNames: ['missing_tool'],
|
||||||
disallowedToolNames: ['unknown_deny'],
|
disallowedToolNames: ['ghost_tool'],
|
||||||
});
|
});
|
||||||
|
expect(resolved.unknownAllowedToolNames).toContain('missing_tool');
|
||||||
expect(resolved.tools.map((item) => item.definition.name)).toEqual(['lookup']);
|
expect(resolved.unknownDisallowedToolNames).toContain('ghost_tool');
|
||||||
expect(resolved.unknownAllowedToolNames).toEqual(['missing_tool']);
|
});
|
||||||
expect(resolved.unknownDisallowedToolNames).toEqual(['unknown_deny']);
|
});
|
||||||
|
|
||||||
|
describe('DEFAULT_SCOPE_POLICY', () => {
|
||||||
|
test('only allows read scope', () => {
|
||||||
|
expect(DEFAULT_SCOPE_POLICY.read).toBe('allow');
|
||||||
|
expect(DEFAULT_SCOPE_POLICY.write).toBe('deny');
|
||||||
|
expect(DEFAULT_SCOPE_POLICY.command).toBe('deny');
|
||||||
|
expect(DEFAULT_SCOPE_POLICY.network).toBe('deny');
|
||||||
|
expect(DEFAULT_SCOPE_POLICY.git_write).toBe('deny');
|
||||||
|
expect(DEFAULT_SCOPE_POLICY.cross_session).toBe('deny');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
import type { MainAgentTool } from '../loop';
|
import type { MainAgentTool } from '../loop';
|
||||||
|
import type { ToolPermissionBehavior, ToolPermissionScope } from '../loop/types';
|
||||||
|
|
||||||
export interface ResolveAgentToolsInput {
|
export interface ResolveAgentToolsInput {
|
||||||
availableTools: MainAgentTool[];
|
availableTools: MainAgentTool[];
|
||||||
allowedToolNames: string[];
|
allowedToolNames: string[];
|
||||||
disallowedToolNames: string[];
|
disallowedToolNames: string[];
|
||||||
|
allowListSpecified?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ResolvedAgentTools {
|
export interface ResolvedAgentTools {
|
||||||
@@ -15,10 +17,33 @@ export interface ResolvedAgentTools {
|
|||||||
unknownDisallowedToolNames: string[];
|
unknownDisallowedToolNames: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ToolPermissionDecision {
|
||||||
|
behavior: ToolPermissionBehavior;
|
||||||
|
reason: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_SCOPE_POLICY: Record<ToolPermissionScope, ToolPermissionBehavior> = {
|
||||||
|
read: 'allow',
|
||||||
|
write: 'deny',
|
||||||
|
command: 'deny',
|
||||||
|
network: 'deny',
|
||||||
|
git_write: 'deny',
|
||||||
|
cross_session: 'deny',
|
||||||
|
};
|
||||||
|
|
||||||
function uniqueNames(names: string[]): string[] {
|
function uniqueNames(names: string[]): string[] {
|
||||||
return [...new Set(names)];
|
return [...new Set(names)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function evaluateToolPermission(tool: MainAgentTool): ToolPermissionDecision {
|
||||||
|
const scope = tool.permissionScope ?? 'read';
|
||||||
|
const behavior = DEFAULT_SCOPE_POLICY[scope];
|
||||||
|
return {
|
||||||
|
behavior,
|
||||||
|
reason: `Tool '${tool.definition.name}' ${behavior === 'allow' ? 'allowed' : 'denied'} for scope '${scope}'`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentTools {
|
export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentTools {
|
||||||
const availableToolNames = uniqueNames(input.availableTools.map((tool) => tool.definition.name));
|
const availableToolNames = uniqueNames(input.availableTools.map((tool) => tool.definition.name));
|
||||||
const availableToolNamesSet = new Set(availableToolNames);
|
const availableToolNamesSet = new Set(availableToolNames);
|
||||||
@@ -29,7 +54,10 @@ export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentT
|
|||||||
|
|
||||||
const tools = input.availableTools.filter((tool) => {
|
const tools = input.availableTools.filter((tool) => {
|
||||||
const toolName = tool.definition.name;
|
const toolName = tool.definition.name;
|
||||||
return allowedToolNamesSet.has(toolName) && !disallowedToolNamesSet.has(toolName);
|
if (disallowedToolNamesSet.has(toolName)) return false;
|
||||||
|
if (allowedToolNamesSet.size > 0) return allowedToolNamesSet.has(toolName);
|
||||||
|
if (input.allowListSpecified) return false;
|
||||||
|
return evaluateToolPermission(tool).behavior === 'allow';
|
||||||
});
|
});
|
||||||
const permittedToolNamesSet = new Set(tools.map((tool) => tool.definition.name));
|
const permittedToolNamesSet = new Set(tools.map((tool) => tool.definition.name));
|
||||||
|
|
||||||
@@ -46,3 +74,5 @@ export function resolveAgentTools(input: ResolveAgentToolsInput): ResolvedAgentT
|
|||||||
),
|
),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export { DEFAULT_SCOPE_POLICY };
|
||||||
|
|||||||
101
src/review-agent/__tests__/deterministic-publish-adapter.test.ts
Normal file
101
src/review-agent/__tests__/deterministic-publish-adapter.test.ts
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import { describe, expect, it } from 'bun:test';
|
||||||
|
import { createHash } from 'node:crypto';
|
||||||
|
import { applyDeterministicPublishAdapter } from '../deterministic-publish-adapter';
|
||||||
|
import type { ReviewAgentFinding } from '../tools';
|
||||||
|
|
||||||
|
function makeFinding(overrides: Partial<ReviewAgentFinding> = {}): ReviewAgentFinding {
|
||||||
|
return {
|
||||||
|
fingerprint: '',
|
||||||
|
category: 'security',
|
||||||
|
severity: 'high',
|
||||||
|
confidence: 0.9,
|
||||||
|
path: 'src/app.ts',
|
||||||
|
line: 42,
|
||||||
|
title: 'SQL injection',
|
||||||
|
detail: 'Unsanitized input',
|
||||||
|
evidence: 'db.query(input)',
|
||||||
|
suggestion: 'Use parameterized queries',
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function expectedFingerprint(category: string, path: string, line: number, title: string): string {
|
||||||
|
return createHash('sha256')
|
||||||
|
.update(`${category}:${path}:${line}:${title}`)
|
||||||
|
.digest('hex')
|
||||||
|
.slice(0, 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('SHA256 fingerprint generation', () => {
|
||||||
|
it('produces consistent 24-char hex fingerprint', () => {
|
||||||
|
const fp = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
|
||||||
|
expect(fp).toHaveLength(24);
|
||||||
|
expect(fp).toMatch(/^[0-9a-f]{24}$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces different fingerprints for different inputs', () => {
|
||||||
|
const fp1 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
|
||||||
|
const fp2 = expectedFingerprint('correctness', 'src/app.ts', 42, 'SQL injection');
|
||||||
|
expect(fp1).not.toBe(fp2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces same fingerprint for same inputs', () => {
|
||||||
|
const fp1 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
|
||||||
|
const fp2 = expectedFingerprint('security', 'src/app.ts', 42, 'SQL injection');
|
||||||
|
expect(fp1).toBe(fp2);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('applyDeterministicPublishAdapter deduplication', () => {
|
||||||
|
it('dedupes findings with identical fingerprints keeping higher rank', async () => {
|
||||||
|
const finding1 = makeFinding({ severity: 'low', confidence: 0.5, fingerprint: 'dup-fp' });
|
||||||
|
const finding2 = makeFinding({ severity: 'high', confidence: 0.9, fingerprint: 'dup-fp' });
|
||||||
|
|
||||||
|
const store = {
|
||||||
|
getRunDetails: async () => ({ findings: [], comments: [] }),
|
||||||
|
addFindings: async () => {},
|
||||||
|
addCommentRecord: async () => {},
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
const result = await applyDeterministicPublishAdapter({
|
||||||
|
store,
|
||||||
|
runId: 'test-run',
|
||||||
|
submission: { summaryMarkdown: 'test', findings: [finding1, finding2] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.findings).toHaveLength(1);
|
||||||
|
expect(result.findings[0].severity).toBe('high');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('dedupes findings with same path/line/title (similarity key)', async () => {
|
||||||
|
const finding1 = makeFinding({
|
||||||
|
path: 'a.ts',
|
||||||
|
line: 10,
|
||||||
|
title: 'Bug',
|
||||||
|
severity: 'medium',
|
||||||
|
fingerprint: 'fp1',
|
||||||
|
});
|
||||||
|
const finding2 = makeFinding({
|
||||||
|
path: 'a.ts',
|
||||||
|
line: 10,
|
||||||
|
title: 'Bug',
|
||||||
|
severity: 'high',
|
||||||
|
fingerprint: 'fp2',
|
||||||
|
});
|
||||||
|
|
||||||
|
const store = {
|
||||||
|
getRunDetails: async () => ({ findings: [], comments: [] }),
|
||||||
|
addFindings: async () => {},
|
||||||
|
addCommentRecord: async () => {},
|
||||||
|
} as any;
|
||||||
|
|
||||||
|
const result = await applyDeterministicPublishAdapter({
|
||||||
|
store,
|
||||||
|
runId: 'test-run',
|
||||||
|
submission: { summaryMarkdown: 'test', findings: [finding1, finding2] },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.findings).toHaveLength(1);
|
||||||
|
expect(result.findings[0].severity).toBe('high');
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import { randomUUID } from 'node:crypto';
|
import { createHash, randomUUID } from 'node:crypto';
|
||||||
import { applyPublishPolicy } from '../review/policy/publish-policy';
|
import { applyPublishPolicy } from '../review/policy/publish-policy';
|
||||||
import type { FileReviewStore } from '../review/store/file-review-store';
|
import type { FileReviewStore } from '../review/store/file-review-store';
|
||||||
import type { Finding, ReviewCommentRecord } from '../review/types';
|
import type { Finding, ReviewCommentRecord } from '../review/types';
|
||||||
@@ -27,6 +27,13 @@ function rankFinding(finding: ReviewAgentFinding): number {
|
|||||||
return severityWeight(finding.severity) * 1000 + Math.round(finding.confidence * 100);
|
return severityWeight(finding.severity) * 1000 + Math.round(finding.confidence * 100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildFingerprint(category: string, path: string, line: number, title: string): string {
|
||||||
|
return createHash('sha256')
|
||||||
|
.update(JSON.stringify([category, path, line, title]))
|
||||||
|
.digest('hex')
|
||||||
|
.slice(0, 24);
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeTitleRoot(title: string): string {
|
function normalizeTitleRoot(title: string): string {
|
||||||
return title
|
return title
|
||||||
.toLowerCase()
|
.toLowerCase()
|
||||||
@@ -40,8 +47,11 @@ function similarityKey(finding: ReviewAgentFinding): string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function dedupeFindings(candidates: ReviewAgentFinding[]): ReviewAgentFinding[] {
|
function dedupeFindings(candidates: ReviewAgentFinding[]): ReviewAgentFinding[] {
|
||||||
|
const ensured = candidates.map((f) =>
|
||||||
|
f.fingerprint ? f : { ...f, fingerprint: buildFingerprint(f.category, f.path, f.line, f.title) }
|
||||||
|
);
|
||||||
const byFingerprint = new Map<string, ReviewAgentFinding>();
|
const byFingerprint = new Map<string, ReviewAgentFinding>();
|
||||||
for (const finding of candidates) {
|
for (const finding of ensured) {
|
||||||
const existing = byFingerprint.get(finding.fingerprint);
|
const existing = byFingerprint.get(finding.fingerprint);
|
||||||
if (!existing || rankFinding(finding) > rankFinding(existing)) {
|
if (!existing || rankFinding(finding) > rankFinding(existing)) {
|
||||||
byFingerprint.set(finding.fingerprint, finding);
|
byFingerprint.set(finding.fingerprint, finding);
|
||||||
|
|||||||
@@ -11,15 +11,18 @@ import config from '../config';
|
|||||||
import { DiffExtractor } from '../review/context/diff-extractor';
|
import { DiffExtractor } from '../review/context/diff-extractor';
|
||||||
import type { LocalRepoPaths } from '../review/context/local-repo-manager';
|
import type { LocalRepoPaths } from '../review/context/local-repo-manager';
|
||||||
import { LocalRepoManager } from '../review/context/local-repo-manager';
|
import { LocalRepoManager } from '../review/context/local-repo-manager';
|
||||||
|
import { tokenCounter } from '../review/context/token-counter';
|
||||||
import { FileReviewStore } from '../review/store/file-review-store';
|
import { FileReviewStore } from '../review/store/file-review-store';
|
||||||
import type { Finding, ReviewContext, ReviewRun } from '../review/types';
|
import type { Finding, ReviewContext, ReviewRun } from '../review/types';
|
||||||
import { logger } from '../utils/logger';
|
import { logger } from '../utils/logger';
|
||||||
import { applyDeterministicPublishAdapter } from './deterministic-publish-adapter';
|
import { applyDeterministicPublishAdapter } from './deterministic-publish-adapter';
|
||||||
|
import { buildRepairPrompt, parseFindingResponse } from './schema';
|
||||||
import {
|
import {
|
||||||
type ReviewToolState,
|
type ReviewToolState,
|
||||||
type SubmittedReviewFindings,
|
type SubmittedReviewFindings,
|
||||||
createReviewTaskTools,
|
createReviewTaskTools,
|
||||||
normalizeSubmission,
|
normalizeSubmission,
|
||||||
|
parseSubmissionFromText,
|
||||||
} from './tools';
|
} from './tools';
|
||||||
|
|
||||||
export interface ReviewAgentRepositoryContext {
|
export interface ReviewAgentRepositoryContext {
|
||||||
@@ -88,18 +91,27 @@ function buildSessionScope(run: ReviewRun): string {
|
|||||||
function tryParseFinalSubmission(text?: string): SubmittedReviewFindings | null {
|
function tryParseFinalSubmission(text?: string): SubmittedReviewFindings | null {
|
||||||
if (!text) return null;
|
if (!text) return null;
|
||||||
try {
|
try {
|
||||||
return normalizeSubmission(JSON.parse(text));
|
const parsed = JSON.parse(text);
|
||||||
} catch {
|
if (parsed && typeof parsed === 'object') {
|
||||||
return null;
|
const result = normalizeSubmission(parsed);
|
||||||
}
|
if (result.findings.length > 0) return result;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
return parseSubmissionFromText(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildReviewPrompt(context: ReviewAgentRunContext): string {
|
function buildReviewPrompt(context: ReviewAgentRunContext, model?: string): string {
|
||||||
const changedFiles = context.diffSummary?.changedFiles ?? [];
|
const changedFiles = context.diffSummary?.changedFiles ?? [];
|
||||||
const fileSummary = changedFiles
|
const fileSummary = changedFiles
|
||||||
.map((file) => `- ${file.status} ${file.path} (+${file.additions}/-${file.deletions})`)
|
.map((file) => `- ${file.status} ${file.path} (+${file.additions}/-${file.deletions})`)
|
||||||
.join('\n');
|
.join('\n');
|
||||||
const diff = context.diffSummary?.diff?.trim() || '(diff omitted)';
|
const rawDiff = context.diffSummary?.diff?.trim() || '(diff omitted)';
|
||||||
|
|
||||||
|
const usableBudget = model ? tokenCounter.getUsableBudget(model) : 124_000;
|
||||||
|
const reservedForPrompt = 4000;
|
||||||
|
const diffBudget = Math.max(1000, usableBudget - reservedForPrompt);
|
||||||
|
const diff =
|
||||||
|
tokenCounter.count(rawDiff) > diffBudget ? tokenCounter.clip(rawDiff, diffBudget) : rawDiff;
|
||||||
|
|
||||||
return [
|
return [
|
||||||
'You are the main code review agent for Gitea AI Assistant.',
|
'You are the main code review agent for Gitea AI Assistant.',
|
||||||
@@ -271,6 +283,33 @@ export class ReviewAgentEntrypoint {
|
|||||||
return snapshot.headSha;
|
return snapshot.headSha;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private async repairFindingsWithLLM(
|
||||||
|
rawText: string,
|
||||||
|
messages: Array<{ role: string; content: string; toolCalls?: unknown }>,
|
||||||
|
maxAttempts = 2
|
||||||
|
): Promise<string> {
|
||||||
|
let current = rawText;
|
||||||
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||||
|
const outcome = parseFindingResponse(current);
|
||||||
|
if (outcome.ok) return current;
|
||||||
|
const repairPrompt = buildRepairPrompt(outcome.error);
|
||||||
|
messages.push({ role: 'assistant', content: current });
|
||||||
|
messages.push({ role: 'user', content: repairPrompt });
|
||||||
|
const response = await this.modelClient.chat({
|
||||||
|
messages: messages as import('../llm/types').LLMMessage[],
|
||||||
|
model: this.model,
|
||||||
|
temperature: 0,
|
||||||
|
responseFormat: 'json',
|
||||||
|
});
|
||||||
|
current = response.content ?? '{"findings":[]}';
|
||||||
|
logger.info('LLM finding repair attempt', {
|
||||||
|
attempt: attempt + 1,
|
||||||
|
error: outcome.error.slice(0, 100),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
private async runMainAgent(
|
private async runMainAgent(
|
||||||
run: ReviewRun,
|
run: ReviewRun,
|
||||||
reviewContext: ReviewContext
|
reviewContext: ReviewContext
|
||||||
@@ -320,10 +359,13 @@ export class ReviewAgentEntrypoint {
|
|||||||
const agentResult = await runner.run({
|
const agentResult = await runner.run({
|
||||||
agentType: 'review-main-agent',
|
agentType: 'review-main-agent',
|
||||||
model: this.model,
|
model: this.model,
|
||||||
userMessage: buildReviewPrompt(runContext),
|
userMessage: buildReviewPrompt(runContext, this.model),
|
||||||
maxTurns: 8,
|
maxTurns: 8,
|
||||||
maxToolCalls: 4,
|
maxToolCalls: 4,
|
||||||
|
maxSubagents: 3,
|
||||||
timeoutMs: config.review.commandTimeoutMs,
|
timeoutMs: config.review.commandTimeoutMs,
|
||||||
|
maxEmptyResponses: 2,
|
||||||
|
maxConsecutiveToolFailures: 3,
|
||||||
session: {
|
session: {
|
||||||
agentType: 'review-main-agent',
|
agentType: 'review-main-agent',
|
||||||
metadata: {
|
metadata: {
|
||||||
@@ -343,7 +385,20 @@ export class ReviewAgentEntrypoint {
|
|||||||
|
|
||||||
const submitted =
|
const submitted =
|
||||||
reviewToolState.submittedReview ?? tryParseFinalSubmission(agentResult.finalText);
|
reviewToolState.submittedReview ?? tryParseFinalSubmission(agentResult.finalText);
|
||||||
const submission = submitted ?? { summaryMarkdown: agentResult.finalText ?? '', findings: [] };
|
let submission: SubmittedReviewFindings;
|
||||||
|
|
||||||
|
if (submitted) {
|
||||||
|
submission = submitted;
|
||||||
|
} else if (agentResult.finalText) {
|
||||||
|
const repaired = await this.repairFindingsWithLLM(
|
||||||
|
agentResult.finalText,
|
||||||
|
agentResult.messages
|
||||||
|
);
|
||||||
|
const repairedSubmission = tryParseFinalSubmission(repaired);
|
||||||
|
submission = repairedSubmission ?? { summaryMarkdown: agentResult.finalText, findings: [] };
|
||||||
|
} else {
|
||||||
|
submission = { summaryMarkdown: '', findings: [] };
|
||||||
|
}
|
||||||
const adapted = await applyDeterministicPublishAdapter({
|
const adapted = await applyDeterministicPublishAdapter({
|
||||||
store: this.store,
|
store: this.store,
|
||||||
runId: run.id,
|
runId: run.id,
|
||||||
|
|||||||
81
src/review-agent/schema/__tests__/finding-schema.test.ts
Normal file
81
src/review-agent/schema/__tests__/finding-schema.test.ts
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import { describe, expect, it } from 'bun:test';
|
||||||
|
import { buildRepairPrompt, findingResponseSchema, parseFindingResponse } from '../index';
|
||||||
|
|
||||||
|
describe('findingResponseSchema', () => {
|
||||||
|
it('parses valid findings', () => {
|
||||||
|
const raw = JSON.stringify({
|
||||||
|
findings: [
|
||||||
|
{
|
||||||
|
severity: 'high',
|
||||||
|
confidence: 0.9,
|
||||||
|
path: 'src/app.ts',
|
||||||
|
line: 42,
|
||||||
|
title: 'SQL injection',
|
||||||
|
detail: 'Unsanitized input in query',
|
||||||
|
evidence: 'db.query(req.params.id)',
|
||||||
|
suggestion: 'Use parameterized queries',
|
||||||
|
category: 'security',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
const result = parseFindingResponse(raw);
|
||||||
|
expect(result.ok).toBe(true);
|
||||||
|
if (result.ok) {
|
||||||
|
expect(result.findings).toHaveLength(1);
|
||||||
|
expect(result.findings[0].severity).toBe('high');
|
||||||
|
expect(result.findings[0].category).toBe('security');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies defaults for optional fields', () => {
|
||||||
|
const raw = JSON.stringify({
|
||||||
|
findings: [
|
||||||
|
{
|
||||||
|
severity: 'low',
|
||||||
|
path: 'src/util.ts',
|
||||||
|
line: 10,
|
||||||
|
title: 'Missing error handling',
|
||||||
|
detail: 'No try-catch around async call',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
const result = parseFindingResponse(raw);
|
||||||
|
expect(result.ok).toBe(true);
|
||||||
|
if (result.ok) {
|
||||||
|
const finding = result.findings[0];
|
||||||
|
expect(finding.confidence).toBe(0.8);
|
||||||
|
expect(finding.evidence).toBe('');
|
||||||
|
expect(finding.suggestion).toBe('');
|
||||||
|
expect(finding.category).toBeUndefined();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid severity', () => {
|
||||||
|
const raw = JSON.stringify({
|
||||||
|
findings: [{ severity: 'critical', path: 'a.ts', line: 1, title: 'x', detail: 'y' }],
|
||||||
|
});
|
||||||
|
const result = parseFindingResponse(raw);
|
||||||
|
expect(result.ok).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects invalid JSON', () => {
|
||||||
|
const result = parseFindingResponse('not json');
|
||||||
|
expect(result.ok).toBe(false);
|
||||||
|
if (!result.ok) {
|
||||||
|
expect(result.raw).toBe('not json');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('defaults to empty findings array', () => {
|
||||||
|
const result = findingResponseSchema.parse({});
|
||||||
|
expect(result.findings).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('buildRepairPrompt', () => {
|
||||||
|
it('includes the parse error', () => {
|
||||||
|
const prompt = buildRepairPrompt('missing field "path"');
|
||||||
|
expect(prompt).toContain('missing field "path"');
|
||||||
|
expect(prompt).toContain('JSON');
|
||||||
|
});
|
||||||
|
});
|
||||||
57
src/review-agent/schema/finding-schema.ts
Normal file
57
src/review-agent/schema/finding-schema.ts
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
const findingItemSchema = z.object({
|
||||||
|
category: z.enum(['correctness', 'security', 'reliability', 'maintainability']).optional(),
|
||||||
|
severity: z.enum(['high', 'medium', 'low']),
|
||||||
|
confidence: z.number().min(0).max(1).optional().default(0.8),
|
||||||
|
path: z.string().min(1),
|
||||||
|
line: z.number().int().positive(),
|
||||||
|
title: z.string().min(1),
|
||||||
|
detail: z.string().min(1),
|
||||||
|
evidence: z.string().optional().default(''),
|
||||||
|
suggestion: z.string().optional().default(''),
|
||||||
|
fingerprint: z.string().min(1).optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const findingResponseSchema = z.object({
|
||||||
|
findings: z.array(findingItemSchema).default([]),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type FindingResponse = z.infer<typeof findingResponseSchema>;
|
||||||
|
export type FindingItem = z.infer<typeof findingItemSchema>;
|
||||||
|
|
||||||
|
export interface FindingParseResult {
|
||||||
|
ok: true;
|
||||||
|
findings: FindingItem[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface FindingParseError {
|
||||||
|
ok: false;
|
||||||
|
error: string;
|
||||||
|
raw: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type FindingParseOutcome = FindingParseResult | FindingParseError;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse raw LLM output text into validated findings.
|
||||||
|
* Returns a discriminated union: { ok: true, findings } or { ok: false, error, raw }.
|
||||||
|
*/
|
||||||
|
export function parseFindingResponse(raw: string): FindingParseOutcome {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(raw);
|
||||||
|
const result = findingResponseSchema.parse(parsed);
|
||||||
|
return { ok: true, findings: result.findings };
|
||||||
|
} catch (error) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
return { ok: false, error: message, raw };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a repair prompt to ask the LLM to fix its malformed JSON output.
|
||||||
|
* Returns null if no more repair attempts should be made.
|
||||||
|
*/
|
||||||
|
export function buildRepairPrompt(parseError: string): string {
|
||||||
|
return `上一次最终结果无法通过 findingResponseSchema 校验。校验错误: ${parseError}\n\n请输出严格 JSON,格式如下:\n{"findings":[{"severity":"high|medium|low","confidence":0.8,"path":"...","line":1,"title":"...","detail":"...","evidence":"...","suggestion":"...","category":"correctness|security|reliability|maintainability"}]}\n\n不要输出额外文字,只输出 JSON。`;
|
||||||
|
}
|
||||||
10
src/review-agent/schema/index.ts
Normal file
10
src/review-agent/schema/index.ts
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
export {
|
||||||
|
findingResponseSchema,
|
||||||
|
parseFindingResponse,
|
||||||
|
buildRepairPrompt,
|
||||||
|
type FindingResponse,
|
||||||
|
type FindingItem,
|
||||||
|
type FindingParseOutcome,
|
||||||
|
type FindingParseResult,
|
||||||
|
type FindingParseError,
|
||||||
|
} from './finding-schema';
|
||||||
@@ -111,21 +111,7 @@ describe('review task tools', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
test('normalizeSubmission fills defaults and generated fingerprint', () => {
|
test('normalizeSubmission fills defaults and generated fingerprint', () => {
|
||||||
expect(
|
const result = normalizeSubmission({
|
||||||
normalizeSubmission({
|
|
||||||
summaryMarkdown: 'Summary',
|
|
||||||
findings: [
|
|
||||||
{
|
|
||||||
category: 'maintainability',
|
|
||||||
severity: 'medium',
|
|
||||||
path: 'src/app.ts',
|
|
||||||
line: 2,
|
|
||||||
title: 'Extract helper',
|
|
||||||
detail: 'The logic is duplicated.',
|
|
||||||
},
|
|
||||||
],
|
|
||||||
})
|
|
||||||
).toEqual({
|
|
||||||
summaryMarkdown: 'Summary',
|
summaryMarkdown: 'Summary',
|
||||||
findings: [
|
findings: [
|
||||||
{
|
{
|
||||||
@@ -135,13 +121,23 @@ describe('review task tools', () => {
|
|||||||
line: 2,
|
line: 2,
|
||||||
title: 'Extract helper',
|
title: 'Extract helper',
|
||||||
detail: 'The logic is duplicated.',
|
detail: 'The logic is duplicated.',
|
||||||
confidence: 0.8,
|
|
||||||
evidence: '',
|
|
||||||
suggestion: '',
|
|
||||||
fingerprint: 'maintainability:src/app.ts:2:Extract helper',
|
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
expect(result.summaryMarkdown).toBe('Summary');
|
||||||
|
expect(result.findings).toHaveLength(1);
|
||||||
|
expect(result.findings[0]).toMatchObject({
|
||||||
|
category: 'maintainability',
|
||||||
|
severity: 'medium',
|
||||||
|
path: 'src/app.ts',
|
||||||
|
line: 2,
|
||||||
|
title: 'Extract helper',
|
||||||
|
detail: 'The logic is duplicated.',
|
||||||
|
confidence: 0.8,
|
||||||
|
evidence: '',
|
||||||
|
suggestion: '',
|
||||||
|
fingerprint: 'd90c88d5929c2859b8591d69',
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
test('submit_review_findings stores valid findings', async () => {
|
test('submit_review_findings stores valid findings', async () => {
|
||||||
@@ -171,7 +167,7 @@ describe('review task tools', () => {
|
|||||||
confidence: 0.8,
|
confidence: 0.8,
|
||||||
evidence: '',
|
evidence: '',
|
||||||
suggestion: '',
|
suggestion: '',
|
||||||
fingerprint: 'reliability:src/app.ts:2:Token may be undefined',
|
fingerprint: 'f226057a37399d83b53cbeed',
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
export { createReviewTaskTools, normalizeSubmission } from './review-tools';
|
export {
|
||||||
|
createReviewTaskTools,
|
||||||
|
normalizeSubmission,
|
||||||
|
parseSubmissionFromText,
|
||||||
|
} from './review-tools';
|
||||||
export type {
|
export type {
|
||||||
CreateReviewTaskToolsOptions,
|
CreateReviewTaskToolsOptions,
|
||||||
ReviewToolState,
|
ReviewToolState,
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
|
import { createHash } from 'node:crypto';
|
||||||
import type { MainAgentTool } from '../../agent-kernel/loop/types';
|
import type { MainAgentTool } from '../../agent-kernel/loop/types';
|
||||||
import type { Finding, ReviewContext } from '../../review/types';
|
import type { Finding, ReviewContext } from '../../review/types';
|
||||||
|
import type { FindingItem } from '../schema';
|
||||||
const VALID_CATEGORIES = ['correctness', 'security', 'reliability', 'maintainability'] as const;
|
import { parseFindingResponse } from '../schema';
|
||||||
const VALID_SEVERITIES = ['high', 'medium', 'low'] as const;
|
|
||||||
|
|
||||||
export type ReviewAgentFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
|
export type ReviewAgentFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
|
||||||
|
|
||||||
@@ -51,45 +51,46 @@ function requireObject(value: unknown, message: string): Record<string, unknown>
|
|||||||
return value as Record<string, unknown>;
|
return value as Record<string, unknown>;
|
||||||
}
|
}
|
||||||
|
|
||||||
function requireString(input: Record<string, unknown>, key: string): string {
|
function buildFingerprint(category: string, path: string, line: number, title: string): string {
|
||||||
const value = readString(input, key);
|
return createHash('sha256')
|
||||||
if (!value) throw new Error(`Finding ${key} must be a non-empty string`);
|
.update(JSON.stringify([category, path, line, title]))
|
||||||
return value;
|
.digest('hex')
|
||||||
|
.slice(0, 24);
|
||||||
|
}
|
||||||
|
|
||||||
|
function findingItemToReviewAgent(item: FindingItem): ReviewAgentFinding {
|
||||||
|
const category = item.category ?? 'correctness';
|
||||||
|
return {
|
||||||
|
fingerprint: item.fingerprint || buildFingerprint(category, item.path, item.line, item.title),
|
||||||
|
category,
|
||||||
|
severity: item.severity,
|
||||||
|
confidence: item.confidence ?? 0.8,
|
||||||
|
path: item.path,
|
||||||
|
line: item.line,
|
||||||
|
title: item.title,
|
||||||
|
detail: item.detail,
|
||||||
|
evidence: item.evidence ?? '',
|
||||||
|
suggestion: item.suggestion ?? '',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Parse raw JSON text into validated SubmittedReviewFindings using Zod schema. */
|
||||||
|
export function parseSubmissionFromText(raw: string): SubmittedReviewFindings | null {
|
||||||
|
const outcome = parseFindingResponse(raw);
|
||||||
|
if (!outcome.ok) return null;
|
||||||
|
return {
|
||||||
|
summaryMarkdown: '',
|
||||||
|
findings: outcome.findings.map(findingItemToReviewAgent),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeFinding(value: unknown): ReviewAgentFinding {
|
function normalizeFinding(value: unknown): ReviewAgentFinding {
|
||||||
const input = requireObject(value, 'Each finding must be an object');
|
const outcome = parseFindingResponse(JSON.stringify({ findings: [value] }));
|
||||||
|
if (outcome.ok && outcome.findings.length > 0) {
|
||||||
const category = readString(input, 'category');
|
return findingItemToReviewAgent(outcome.findings[0]);
|
||||||
if (!category || !VALID_CATEGORIES.includes(category as (typeof VALID_CATEGORIES)[number])) {
|
|
||||||
throw new Error(
|
|
||||||
'Finding category must be correctness, security, reliability, or maintainability'
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
const detail = !outcome.ok ? outcome.error : 'no valid findings in response';
|
||||||
const severity = readString(input, 'severity');
|
throw new Error(`Finding validation failed: ${detail}`);
|
||||||
if (!severity || !VALID_SEVERITIES.includes(severity as (typeof VALID_SEVERITIES)[number])) {
|
|
||||||
throw new Error('Finding severity must be high, medium, or low');
|
|
||||||
}
|
|
||||||
|
|
||||||
const path = requireString(input, 'path');
|
|
||||||
const line = readNumber(input, 'line');
|
|
||||||
if (line === undefined) throw new Error('Finding line must be a number');
|
|
||||||
const title = requireString(input, 'title');
|
|
||||||
const detail = requireString(input, 'detail');
|
|
||||||
|
|
||||||
return {
|
|
||||||
fingerprint: readString(input, 'fingerprint') || `${category}:${path}:${line}:${title}`,
|
|
||||||
category,
|
|
||||||
severity,
|
|
||||||
confidence: readNumber(input, 'confidence') ?? 0.8,
|
|
||||||
path,
|
|
||||||
line,
|
|
||||||
title,
|
|
||||||
detail,
|
|
||||||
evidence: readString(input, 'evidence') ?? '',
|
|
||||||
suggestion: readString(input, 'suggestion') ?? '',
|
|
||||||
} as ReviewAgentFinding;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function normalizeSubmission(value: unknown): SubmittedReviewFindings {
|
export function normalizeSubmission(value: unknown): SubmittedReviewFindings {
|
||||||
|
|||||||
@@ -76,7 +76,9 @@ export class TokenCounter {
|
|||||||
if (text.length <= maxChars) {
|
if (text.length <= maxChars) {
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
return `${text.slice(0, maxChars)}\n... [truncated, exceeded ${maxTokens} token budget]`;
|
const lastNewline = text.lastIndexOf('\n', maxChars);
|
||||||
|
const cutPoint = lastNewline > maxChars * 0.5 ? lastNewline : maxChars;
|
||||||
|
return `${text.slice(0, cutPoint)}\n... [truncated, exceeded ${maxTokens} token budget]`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user